├── .azure_pipelines ├── build-doc.yaml ├── dockerfiles │ ├── linux-cpu.dockerfile │ └── linux-gpu.dockerfile ├── job_templates │ ├── build-docker-image-template.yaml │ ├── huggingface-login-template.yaml │ ├── olive-build-doc-template.yaml │ ├── olive-example-cpu-template.yaml │ ├── olive-example-linux-gpu-template.yaml │ ├── olive-setup-template.yaml │ ├── olive-test-cpu-template.yaml │ └── olive-test-linux-gpu-template.yaml ├── olive-aml-ci.yaml ├── olive-ci.yaml ├── olive-examples.yaml ├── olive-ort-nightly.yaml ├── package_publish.yaml └── scripts │ ├── client_patch.py │ ├── find_failed_commit.py │ ├── requirements.txt │ └── run_test.sh ├── .coveragerc ├── .editorconfig ├── .flake8 ├── .gitattributes ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.yaml ├── pull_request_template.md └── workflows │ ├── codeql.yml │ └── lint.yml ├── .gitignore ├── .lintrunner.toml ├── .pre-commit-config.yaml ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── NEWS.md ├── NOTICE.txt ├── README.md ├── SECURITY.md ├── assets └── cost_models │ ├── Llama-2-13B.csv │ ├── Llama-2-7B.csv │ ├── Llama-3.1-8B.csv │ ├── Phi-3-mini.csv │ └── Phi-3.5-mini.csv ├── docs ├── Makefile ├── README.md ├── architecture.md ├── make.bat ├── requirements.txt └── source │ ├── _static │ ├── css │ │ └── header.css │ └── js │ │ └── custom_version.js │ ├── conf.py │ ├── dump_schema.py │ ├── examples.md │ ├── extending │ ├── custom-model-evaluator.md │ ├── custom-scripts.md │ ├── design.md │ ├── how-to-add-optimization-pass.md │ ├── index.rst │ └── python_interface.md │ ├── exts │ ├── auto_config_doc │ │ └── __init__.py │ └── gallery_directive.py │ ├── features │ ├── auto-opt.md │ ├── azure-ai │ │ ├── azure-ai.md │ │ ├── azure-arc.md │ │ ├── azure-script.md │ │ ├── index.rst │ │ ├── remote-workflow.md │ │ └── shared-model-cache.md │ ├── huggingface-integration.md │ ├── ihv-integration │ │ ├── index.rst │ │ ├── openvino.md │ │ ├── qnn.md │ │ └── snpe.md │ ├── index.rst │ ├── model-compression.md │ ├── model-conversion │ │ ├── convert-onnx.md │ │ ├── convert-pytorch.md │ │ └── index.rst │ ├── model-splitting.md │ ├── onnx-transformations.md │ ├── peft-adapters.md │ └── quantization.md │ ├── getting-started │ └── getting-started.md │ ├── how-to │ ├── cli │ │ ├── cli-auto-opt.md │ │ ├── cli-finetune.md │ │ ├── cli-quantize.md │ │ └── cli-run.md │ ├── configure-workflows │ │ ├── build-workflow.md │ │ ├── engine-configuration.md │ │ ├── how-to-configure-data.md │ │ ├── how-to-configure-model.md │ │ ├── metrics-configuration.md │ │ ├── model-packaging.md │ │ ├── pass-configuration.md │ │ └── systems.md │ ├── index.rst │ └── installation.md │ ├── images │ ├── auto_opt │ │ └── pass_flows.png │ ├── azure_arc │ │ ├── add-infra.png │ │ ├── add-kub-detail.png │ │ ├── add-kub-to-arc.png │ │ ├── add-kub.png │ │ ├── attach-kub.png │ │ ├── attach-suc.png │ │ └── new-compute.png │ ├── datacontainer_example.png │ ├── dataset-flow.png │ ├── model_splitting │ │ ├── cost_model.png │ │ └── num_splits.png │ ├── multi-lora-diagram.png │ ├── olive-black-text.png │ ├── olive-design.png │ ├── olive-flow.png │ └── olive-white-text.png │ ├── index.md │ ├── reference │ ├── cli.rst │ ├── index.rst │ ├── options.md │ └── pass.rst │ └── why-olive.md ├── examples ├── README.md ├── __init__.py ├── adetailer │ ├── README.md │ ├── face_yolo_qnn.json │ ├── requirements.txt │ └── user_script.py ├── ast │ ├── README.md │ ├── ast.json │ └── requirements.txt ├── bert │ ├── .gitignore │ ├── README.md │ ├── bert.py │ ├── bert_cuda_gpu.template.json │ ├── bert_inc_dynamic_ptq_cpu.json │ ├── bert_inc_ptq_cpu.json │ ├── bert_inc_smoothquant_ptq_cpu.json │ ├── bert_inc_static_ptq_cpu.json │ ├── bert_ptq_cpu.json │ ├── bert_ptq_cpu_aml.json │ ├── bert_ptq_qdq.json │ ├── bert_ptq_qdq_vitis_ai.json │ ├── bert_qat_customized_train_loop_cpu.json │ ├── bert_trt_gpu.json │ ├── bert_trtrtx_gpu.json │ ├── conda.yaml │ ├── conda_gpu.yaml │ ├── docker │ │ └── Dockerfile │ ├── google_bert_qdq.json │ ├── google_bert_qdq_vitis_ai.json │ ├── google_bert_trtrtx.json │ ├── notebook │ │ ├── bert_auto_opt_gpu.json │ │ └── multi_ep_search.ipynb │ ├── openvino │ │ ├── README.md │ │ ├── bert_base_multilingual_cased │ │ │ ├── README.md │ │ │ ├── bert-base-multilingual-cased_context_ov_static.json │ │ │ └── user_script.py │ │ └── bert_base_uncased_mrpc │ │ │ ├── README.md │ │ │ ├── bert-base-uncased-mrpc_context_ov_static.json │ │ │ └── user_script.py │ ├── qnn │ │ ├── README.md │ │ ├── bert_common.py │ │ ├── google_bert_qnn.py │ │ ├── google_bert_qnn_fp32.json │ │ ├── google_bert_qnn_fp32_ctx.json │ │ ├── google_bert_qnn_qdq.json │ │ ├── google_bert_qnn_qdq_ctx.json │ │ ├── intel_bert_qnn_fp32.json │ │ ├── intel_bert_qnn_fp32_cxt.json │ │ ├── intel_bert_qnn_qdq.json │ │ ├── intel_bert_qnn_qdq_cxt.json │ │ ├── requirements.txt │ │ └── wikitext.py │ ├── requirements.txt │ ├── snpe │ │ ├── README.md │ │ ├── bert_snpe.json │ │ └── user_script.py │ └── user_script.py ├── bge │ ├── bge-small-en-v1.5_ptq_qnn.json │ ├── readme.md │ ├── requirements.txt │ └── user_script.py ├── clip │ ├── README.md │ ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qdq.json │ ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qdq_vitis_ai.json │ ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_trtrtx.json │ ├── openai_clip-vit-base-patch16_ptq_qdq.json │ ├── openai_clip-vit-base-patch16_ptq_qdq_vitis_ai.json │ ├── openai_clip-vit-base-patch16_trtrtx.json │ ├── openai_clip-vit-base-patch32_ptq_qdq.json │ ├── openai_clip-vit-base-patch32_ptq_qdq_vitis_ai.json │ ├── openai_clip-vit-base-patch32_trtrtx.json │ ├── openvino │ │ ├── README.md │ │ ├── clip_vit_b32_laion2b_s34B_b79k_context_ov_static.json │ │ ├── clip_vit_base_patch16_context_ov_static.json │ │ ├── clip_vit_base_patch32_context_ov_static.json │ │ └── user_script.py │ ├── qnn │ │ ├── README.md │ │ ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_fp32.json │ │ ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_fp32_ctx.json │ │ ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_qdq.json │ │ ├── laion_CLIP-ViT-B-32-laion2B-s34B-b79K_ptq_qnn_qdq_ctx.json │ │ ├── openai_clip-vit-base-patch16_ptq_qnn_fp32.json │ │ ├── openai_clip-vit-base-patch16_ptq_qnn_fp32_ctx.json │ │ ├── openai_clip-vit-base-patch16_ptq_qnn_qdq.json │ │ ├── openai_clip-vit-base-patch16_ptq_qnn_qdq_ctx.json │ │ ├── openai_clip-vit-base-patch32_ptq_qnn_fp32.json │ │ ├── openai_clip-vit-base-patch32_ptq_qnn_fp32_ctx.json │ │ ├── openai_clip-vit-base-patch32_ptq_qnn_qdq.json │ │ ├── openai_clip-vit-base-patch32_ptq_qnn_qdq_ctx.json │ │ ├── requirements.txt │ │ └── user_script.py │ ├── requirements.txt │ └── user_script.py ├── deberta │ ├── README.md │ ├── deberta.json │ └── requirements.txt ├── deepseek │ ├── README.md │ └── openvino │ │ ├── DeepSeek-R1-Distill-Qwen-1.5B_context_ov_dynamic_sym_gs128_bkp_int8_sym_r1.json │ │ └── README.md ├── directml │ ├── README.md │ ├── llm │ │ ├── .gitignore │ │ ├── README.md │ │ ├── chat_app │ │ │ ├── __init__.py │ │ │ ├── app.py │ │ │ ├── app_modules │ │ │ │ ├── overwrites.py │ │ │ │ ├── presets.py │ │ │ │ └── utils.py │ │ │ ├── assets │ │ │ │ ├── custom.css │ │ │ │ └── custom.js │ │ │ └── interface │ │ │ │ ├── base_interface.py │ │ │ │ └── hddr_llm_onnx_dml_interface.py │ │ ├── chat_templates.py │ │ ├── config.py │ │ ├── config_llm.json │ │ ├── decoder_model.py │ │ ├── falcon.py │ │ ├── llava_model.py │ │ ├── llm.py │ │ ├── model_type_mapping.py │ │ ├── phi.py │ │ ├── phi3.py │ │ ├── placeholder.png │ │ ├── requirements.txt │ │ ├── run_llm_batched_io_binding.py │ │ ├── run_llm_io_binding.py │ │ ├── run_vision_llm_io_binding.py │ │ └── user_script.py │ ├── squeezenet │ │ ├── README.md │ │ ├── squeezenet_config.json │ │ └── user_script.py │ ├── stable_diffusion │ │ ├── README.md │ │ └── readme │ │ │ └── pipeline.png │ └── stable_diffusion_xl │ │ ├── README.md │ │ └── readme │ │ ├── pipeline.png │ │ └── sdxl_flow.png ├── falcon │ ├── README.md │ ├── config.json │ └── requirements.txt ├── getting_started │ ├── README.md │ ├── olive-awq-ft-llama.ipynb │ ├── olive-deepseek-finetune.ipynb │ ├── olive_quickstart.ipynb │ └── text-gen-optimized-slms.ipynb ├── gptj │ ├── README.md │ ├── gptj_inc_dynamic_ptq_cpu.json │ ├── gptj_inc_static_ptq_cpu.json │ ├── requirements.txt │ └── user_script.py ├── gte │ ├── README.md │ ├── config.json │ └── user_script.py ├── llama2 │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── USE-POLICY-META-LLAMA-2.md │ ├── conda_gpu.yaml │ ├── llama2.py │ ├── llama2_generate.json │ ├── llama2_lmeval.json │ ├── llama2_lmeval_onnx.json │ ├── llama2_model_builder.py │ ├── llama2_model_builder_template.json │ ├── llama2_multilora.ipynb │ ├── llama2_qlora.json │ ├── llama2_split.json │ ├── llama2_template.json │ ├── llama2_tensor_parallel.json │ ├── notebook │ │ ├── llama2 │ │ │ ├── conda.yaml │ │ │ ├── config.json │ │ │ └── notebook.ipynb │ │ └── llama2_multiep │ │ │ ├── config_cpu.template.json │ │ │ ├── config_gpu.template.json │ │ │ ├── config_multi_ep.template.json │ │ │ ├── llama2.py │ │ │ ├── multiple_ep_requirements.txt │ │ │ └── notebook.ipynb │ ├── requirements-gptq.txt │ ├── requirements-pipeline.txt │ ├── requirements-qlora.txt │ ├── requirements.txt │ ├── tensor_parallel_generate.py │ └── tensor_parallel_inference.py ├── llama3 │ ├── README.md │ └── openvino │ │ ├── Llama-3.2-1B-Instruct_context_ov_dynamic_sym_bkp_int8_sym.json │ │ └── README.md ├── mistral │ ├── README.md │ ├── mistral.py │ ├── mistral_fp16.json │ ├── mistral_int4.json │ ├── requirements.txt │ └── user_script.py ├── mobilenet │ ├── .gitignore │ ├── onnx │ │ ├── README.md │ │ ├── config.json │ │ ├── imagenet.py │ │ ├── requirements.txt │ │ └── user_script.py │ └── qnn │ │ ├── README.md │ │ ├── download_files.py │ │ ├── mobilenet_qnn_ep.json │ │ ├── requirements.txt │ │ └── user_script.py ├── open_llama │ ├── README.md │ ├── conda.yaml │ ├── open_llama_arc.json │ ├── open_llama_config.json │ ├── open_llama_inc_woq.json │ ├── open_llama_sparsegpt_gpu.json │ ├── requirements-arc.txt │ ├── requirements-sparsegpt.txt │ ├── requirements-woq.txt │ ├── requirements.txt │ └── user_script.py ├── opt_125m │ ├── README.md │ ├── awq.json │ ├── awq_onnx.json │ ├── gptq.json │ ├── gptq_onnx.json │ ├── requirements-awq.txt │ ├── requirements-gptq.txt │ └── requirements.txt ├── phi2 │ ├── .gitignore │ ├── README.md │ ├── generate.py │ ├── phi2.py │ ├── phi2_genai.json │ ├── phi2_optimize_template.json │ ├── requirements-lora.txt │ ├── requirements-pipeline.txt │ ├── requirements-slicegpt.txt │ └── requirements.txt ├── phi3 │ ├── .gitignore │ ├── README.md │ ├── README_VISION.md │ ├── phi3.py │ ├── phi3_nvmo_ptq.json │ ├── phi3_template.json │ ├── phi3_vision.py │ ├── requirements-awq.txt │ ├── requirements-nvmo-awq.txt │ ├── requirements-quarot.txt │ ├── requirements-vision.txt │ ├── requirements.txt │ └── vision │ │ ├── config_templates │ │ ├── text_config.json │ │ ├── text_embedding_config.json │ │ └── vision_config.json │ │ └── scripts │ │ ├── prepare_phi3_vision_for_olive.sh │ │ └── user_script.py ├── phi3_5 │ ├── README.md │ ├── app.py │ ├── openvino │ │ ├── Phi-3.5-mini-instruct_context_ov_dynamic_sym_gs128_bkp_int8_sym.json │ │ └── README.md │ ├── qdq_config.json │ ├── qdq_config_vitis_ai.json │ ├── qnn_config.json │ └── requirements.txt ├── phi4 │ ├── README.md │ └── openvino │ │ ├── README.md │ │ ├── phi_4_mini_reasoning │ │ ├── Phi-4-mini-reasoning_context_ov_dynamic_sym_gs128_bkp_int8_sym.json │ │ └── README.md │ │ ├── phi_4_reasoning │ │ ├── Phi-4-reasoning_context_ov_dynamic_sym_gs128_bkp_int8_sym.json │ │ └── README.md │ │ └── phi_4_reasoning_plus │ │ ├── Phi-4-reasoning-plus_context_ov_dynamic_sym_gs128_bkp_int8_sym.json │ │ └── README.md ├── qwen2_5 │ ├── README.md │ └── openvino │ │ ├── Qwen2.5-1.5B-instruct_context_ov_dynamic_sym_bkp_int8_sym_r1.json │ │ └── README.md ├── red_pajama │ ├── README.md │ ├── config.json │ ├── requirements.txt │ └── user_script.py ├── resnet │ ├── README.md │ ├── conda.yaml │ ├── imagenet.py │ ├── multiple_ep_requirements.txt │ ├── openvino │ │ ├── README.md │ │ ├── imagenet.py │ │ ├── requirements.txt │ │ └── resnet_context_ov_static.json │ ├── prepare_model_data.py │ ├── qnn │ │ ├── README.md │ │ ├── imagenet.py │ │ ├── requirements.txt │ │ ├── resnet_ptq_qnn_fp32.json │ │ ├── resnet_ptq_qnn_fp32_ctx.json │ │ ├── resnet_ptq_qnn_qdq.json │ │ └── resnet_ptq_qnn_qdq_ctx.json │ ├── requirements.txt │ ├── resnet_dynamic_ptq_cpu.json │ ├── resnet_multiple_ep.json │ ├── resnet_ptq_cpu.json │ ├── resnet_ptq_cpu_aml_dataset.json │ ├── resnet_ptq_qdq.json │ ├── resnet_ptq_qdq_vitis_ai.json │ ├── resnet_qat_default_train_loop_cpu.json │ ├── resnet_qat_lightning_module_cpu.json │ ├── resnet_static_ptq_cpu.json │ ├── resnet_trtrtx.json │ ├── resnet_vitis_ai_ptq_cpu.json │ └── user_script.py ├── sentence_transformers │ ├── eval_stsb.py │ ├── readme.md │ └── sentence_transformer_config.json ├── stable_diffusion │ ├── .gitignore │ ├── README.md │ ├── assets │ │ └── dog.png │ ├── config_safety_checker.json │ ├── config_text_encoder.json │ ├── config_unet.json │ ├── config_vae_decoder.json │ ├── config_vae_encoder.json │ ├── evaluation.py │ ├── notebook │ │ ├── .gitignore │ │ ├── image │ │ │ ├── result_pen.png │ │ │ ├── result_pen_merge.png │ │ │ ├── result_wolf.png │ │ │ └── result_wolf_merge.png │ │ ├── sd_multilora.ipynb │ │ ├── text_encoder.py │ │ ├── text_encoder2.py │ │ ├── unet_pen_sketch.py │ │ ├── unet_wolf_plushie.py │ │ ├── vae_decoder.py │ │ └── vae_encoder.py │ ├── requirements-common.txt │ ├── requirements-ov.txt │ ├── requirements.txt │ ├── sd_utils │ │ ├── config.py │ │ ├── ort.py │ │ ├── ov.py │ │ └── qdq.py │ ├── stable_diffusion.py │ └── user_script.py ├── stable_diffusion_xl │ ├── .gitignore │ ├── README.md │ ├── config.py │ ├── config_text_encoder.json │ ├── config_text_encoder_2.json │ ├── config_unet.json │ ├── config_vae_decoder.json │ ├── config_vae_encoder.json │ ├── requirements-common.txt │ ├── requirements.txt │ ├── stable_diffusion_xl.py │ └── user_script.py ├── super_resolution │ ├── README.md │ ├── config.json │ ├── loader.py │ └── requirements.txt ├── table_transformer_detection │ ├── README.md │ ├── prepare_datasets.py │ ├── ttd.py │ └── ttd_config.json ├── test │ ├── __init__.py │ ├── azureml │ │ ├── __init__.py │ │ ├── test_bert_ptq_cpu_aml.py │ │ ├── test_llama2.py │ │ ├── test_resnet_ptq_cpu_aml.py │ │ └── test_resnet_vitis_ai_ptq_cpu_aml.py │ ├── local │ │ ├── __init__.py │ │ ├── test_ast.py │ │ ├── test_bert_cuda_gpu.py │ │ ├── test_bert_inc.py │ │ ├── test_bert_ptq_cpu.py │ │ ├── test_bert_ptq_cpu_docker.py │ │ ├── test_deberta.py │ │ ├── test_llama2.py │ │ ├── test_mistral_fp16.py │ │ ├── test_mobilenet.py │ │ ├── test_mobilenet_qnn_ep.py │ │ ├── test_phi2.py │ │ ├── test_resnet_ptq_cpu.py │ │ ├── test_resnet_qat.py │ │ ├── test_resnet_vitis_ai_ptq_cpu.py │ │ ├── test_stable_diffusion_cuda_gpu.py │ │ └── test_super_resolution.py │ └── utils.py ├── utils │ ├── generator.py │ └── kv_cache_utils.py ├── vgg │ ├── .gitignore │ ├── README.md │ ├── download_files.py │ ├── prepare_config.py │ ├── requirements.txt │ └── vgg_config.json └── vit │ ├── README.md │ ├── imagenet.py │ ├── openvino │ ├── README.md │ ├── imagenet.py │ ├── requirements.txt │ └── vit_base_patch16_224_context_ov_static.json │ ├── qnn │ ├── README.md │ ├── imagenet.py │ ├── requirements.txt │ ├── val_tiny_imagenet │ │ ├── val_tiny_imagenet.py │ │ └── vit_id2label.json │ ├── vit_qnn_fp32.json │ ├── vit_qnn_fp32_ctx.json │ ├── vit_qnn_qdq.json │ └── vit_qnn_qdq_ctx.json │ ├── vit_qdq.json │ ├── vit_qdq_vitis_ai.json │ └── vit_trtrtx.json ├── olive ├── __init__.py ├── __main__.py ├── auto_optimizer │ ├── __init__.py │ ├── config_template │ │ ├── opt_level_passes.yaml │ │ └── pass_capability.yaml │ ├── regulate_mixins.py │ └── template_mapping.py ├── azureml │ ├── __init__.py │ └── azureml_client.py ├── cache.py ├── cli │ ├── __init__.py │ ├── auto_opt.py │ ├── base.py │ ├── capture_onnx.py │ ├── configure_qualcomm_sdk.py │ ├── constants.py │ ├── convert_adapters.py │ ├── extract_adapters.py │ ├── finetune.py │ ├── generate_adapter.py │ ├── generate_cost_model.py │ ├── launcher.py │ ├── manage_aml_compute.py │ ├── quantize.py │ ├── run.py │ ├── session_params_tuning.py │ └── shared_cache.py ├── common │ ├── __init__.py │ ├── auto_config.py │ ├── config_utils.py │ ├── constants.py │ ├── container_client_factory.py │ ├── hf │ │ ├── __init__.py │ │ ├── login.py │ │ ├── mappings.py │ │ ├── mlflow.py │ │ ├── model_io.py │ │ ├── peft.py │ │ ├── quant.py │ │ ├── utils.py │ │ └── wrapper.py │ ├── import_lib.py │ ├── ort_inference.py │ ├── pydantic_v1.py │ ├── user_module_loader.py │ └── utils.py ├── constants.py ├── data │ ├── __init__.py │ ├── component │ │ ├── __init__.py │ │ ├── dataloader.py │ │ ├── dataset.py │ │ ├── load_dataset.py │ │ ├── post_process_data.py │ │ ├── pre_process_data.py │ │ └── text_generation.py │ ├── config.py │ ├── constants.py │ ├── container │ │ ├── __init__.py │ │ ├── data_container.py │ │ ├── dummy_data_container.py │ │ ├── huggingface_container.py │ │ └── raw_data_container.py │ ├── registry.py │ └── template.py ├── engine │ ├── __init__.py │ ├── config.py │ ├── engine.py │ ├── footprint.py │ ├── output.py │ └── packaging │ │ ├── Dockerfile.base │ │ ├── __init__.py │ │ ├── packaging_config.py │ │ └── packaging_generator.py ├── evaluator │ ├── __init__.py │ ├── accuracy.py │ ├── lmeval_onnx_model.py │ ├── metric.py │ ├── metric_backend.py │ ├── metric_config.py │ ├── metric_result.py │ ├── olive_evaluator.py │ └── registry.py ├── exception │ └── __init__.py ├── hardware │ ├── __init__.py │ ├── accelerator.py │ └── constants.py ├── logging.py ├── model │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ ├── hf_config.py │ │ ├── io_config.py │ │ ├── kv_cache_config.py │ │ ├── model_config.py │ │ └── registry.py │ ├── handler │ │ ├── __init__.py │ │ ├── base.py │ │ ├── composite.py │ │ ├── hf.py │ │ ├── mixin │ │ │ ├── __init__.py │ │ │ ├── dummy_inputs.py │ │ │ ├── hf.py │ │ │ ├── io_config.py │ │ │ ├── json.py │ │ │ ├── kv_cache.py │ │ │ ├── mlflow.py │ │ │ ├── onnx_ep.py │ │ │ └── resource.py │ │ ├── onnx.py │ │ ├── openvino.py │ │ ├── pytorch.py │ │ ├── qnn.py │ │ ├── snpe.py │ │ └── tensorflow.py │ └── utils │ │ ├── __init__.py │ │ ├── onnx_utils.py │ │ └── path_utils.py ├── olive_config.json ├── package_config.py ├── passes │ ├── __init__.py │ ├── olive_pass.py │ ├── onnx │ │ ├── __init__.py │ │ ├── append_pre_post_processing_ops.py │ │ ├── bnb_quantization.py │ │ ├── common.py │ │ ├── compose.py │ │ ├── context_binary.py │ │ ├── conversion.py │ │ ├── dynamic_to_fixed_shape.py │ │ ├── extract_adapters.py │ │ ├── float16_conversion.py │ │ ├── graph_surgeries.py │ │ ├── hqq_quantization.py │ │ ├── inc_quantization.py │ │ ├── io_datatype_converter.py │ │ ├── merge_decoders.py │ │ ├── mixed_precision.py │ │ ├── mixed_precision_overrides.py │ │ ├── mnb_to_qdq.py │ │ ├── model_builder.py │ │ ├── moe_experts_distributor.py │ │ ├── nvmo_quantization.py │ │ ├── onnx_dag.py │ │ ├── onnxscript_fusion.py │ │ ├── optimum_conversion.py │ │ ├── optimum_merging.py │ │ ├── peephole_optimizer.py │ │ ├── pipeline │ │ │ ├── __init__.py │ │ │ └── step_utils.py │ │ ├── qnn │ │ │ ├── __init__.py │ │ │ └── qnn_preprocess.py │ │ ├── quantization.py │ │ ├── session_params_tuning.py │ │ ├── split.py │ │ ├── static_llm.py │ │ ├── tensorrt │ │ │ ├── __init__.py │ │ │ └── trt_dla_transforms.py │ │ ├── transformer_optimization.py │ │ ├── vitis_ai │ │ │ ├── __init__.py │ │ │ ├── calibrate.py │ │ │ ├── meta_data.py │ │ │ ├── preprocess.py │ │ │ ├── quant_utils.py │ │ │ ├── quantize.py │ │ │ ├── quantizer.py │ │ │ └── refine.py │ │ └── vitis_ai_quantization.py │ ├── openvino │ │ ├── __init__.py │ │ ├── conversion.py │ │ ├── encapsulation.py │ │ ├── io_update.py │ │ ├── optimum_intel.py │ │ └── quantization.py │ ├── pass_config.py │ ├── pytorch │ │ ├── __init__.py │ │ ├── autoawq.py │ │ ├── capture_split_info.py │ │ ├── cluster.py │ │ ├── common.py │ │ ├── gptq.py │ │ ├── hadamard_utils.py │ │ ├── lora.py │ │ ├── merge_adapter_weights.py │ │ ├── pytorch_lightning_utils.py │ │ ├── qat_utils.py │ │ ├── quantization_aware_training.py │ │ ├── rotate.py │ │ ├── sgdg.py │ │ ├── slicegpt.py │ │ ├── sparsegpt.py │ │ ├── sparsegpt_utils.py │ │ ├── tensor_parallel.py │ │ ├── tensor_parallel_layers.py │ │ ├── tensor_parallel_llama2.py │ │ ├── torch_trt_conversion.py │ │ ├── train_utils.py │ │ └── trt_utils.py │ ├── qnn │ │ ├── __init__.py │ │ ├── context_binary_generator.py │ │ ├── conversion.py │ │ └── model_lib_generator.py │ ├── snpe │ │ ├── __init__.py │ │ ├── conversion.py │ │ ├── quantization.py │ │ └── snpe_to_onnx.py │ └── utils │ │ └── __init__.py ├── platform_sdk │ ├── __init__.py │ └── qualcomm │ │ ├── __init__.py │ │ ├── configure │ │ ├── __init__.py │ │ ├── __main__.py │ │ └── configure.py │ │ ├── constants.py │ │ ├── copy_libcdsprpc.ps1 │ │ ├── create_python_env.ps1 │ │ ├── create_python_env.sh │ │ ├── env.py │ │ ├── qnn │ │ ├── __init__.py │ │ ├── env.py │ │ ├── qnn.py │ │ └── utils │ │ │ └── __init__.py │ │ ├── runner.py │ │ ├── snpe │ │ ├── __init__.py │ │ ├── env.py │ │ ├── snpe.py │ │ ├── tools │ │ │ ├── __init__.py │ │ │ ├── dev.py │ │ │ └── inference.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ └── adb.py │ │ └── utils │ │ ├── __init__.py │ │ ├── data_loader.py │ │ └── input_list.py ├── resource_path.py ├── search │ ├── __init__.py │ ├── samplers │ │ ├── __init__.py │ │ ├── optuna_sampler.py │ │ ├── random_sampler.py │ │ ├── search_sampler.py │ │ ├── sequential_sampler.py │ │ └── tpe_sampler.py │ ├── search_parameter.py │ ├── search_point.py │ ├── search_results.py │ ├── search_sample.py │ ├── search_space.py │ ├── search_strategy.py │ └── utils.py ├── systems │ ├── __init__.py │ ├── accelerator_creator.py │ ├── azureml │ │ ├── __init__.py │ │ ├── aml_evaluation_runner.py │ │ ├── aml_pass_runner.py │ │ ├── aml_system.py │ │ └── aml_workflow_runner.py │ ├── common.py │ ├── docker │ │ ├── Dockerfile │ │ ├── Dockerfile.cpu │ │ ├── Dockerfile.gpu │ │ ├── Dockerfile.openvino │ │ ├── __init__.py │ │ ├── docker_system.py │ │ ├── eval.py │ │ ├── runner.py │ │ └── utils.py │ ├── isolated_ort │ │ ├── __init__.py │ │ ├── inference_runner.py │ │ └── isolated_ort_system.py │ ├── local.py │ ├── olive_system.py │ ├── python_environment │ │ ├── __init__.py │ │ ├── common_requirements.txt │ │ ├── evaluation_runner.py │ │ ├── pass_runner.py │ │ └── python_environment_system.py │ ├── system_alias.py │ ├── system_config.py │ └── utils │ │ ├── __init__.py │ │ ├── arg_parser.py │ │ ├── available_providers_runner.py │ │ └── misc.py └── workflows │ ├── __init__.py │ └── run │ ├── __init__.py │ ├── __main__.py │ ├── config.py │ └── run.py ├── pyproject.toml ├── requirements-dev.txt ├── requirements.txt ├── scripts ├── format_json.py ├── generate_cost_model_artifacts.py └── overwrite_version.py ├── setup.py └── test ├── __init__.py ├── integ_test ├── __init__.py ├── aml_model_test │ ├── __init__.py │ ├── conda.yaml │ └── test_aml_model.py ├── aml_resource_path │ ├── __init__.py │ └── test_aml_resource_path.py ├── evaluator │ ├── __init__.py │ ├── azureml_eval │ │ ├── __init__.py │ │ ├── conda.yaml │ │ ├── test_aml_evaluation.py │ │ ├── user_script.py │ │ └── utils.py │ ├── docker_eval │ │ ├── __init__.py │ │ ├── dockerfile │ │ │ └── Dockerfile │ │ ├── test_docker_evaluation.py │ │ ├── user_script.py │ │ └── utils.py │ └── local_eval │ │ ├── __init__.py │ │ ├── test_local_evaluation.py │ │ ├── user_script.py │ │ └── utils.py ├── pass_runner │ ├── __init__.py │ └── test_docker_system.py └── utils.py ├── multiple_ep ├── __init__.py ├── requirements.txt ├── test_aml_system.py ├── test_docker_system.py ├── test_python_env_system.py ├── user_script.py └── utils.py ├── requirements-test-cpu.txt ├── requirements-test-gpu.txt ├── requirements-test.txt └── unit_test ├── .gitignore ├── __init__.py ├── assets ├── __init__.py └── user_script.py ├── auto_optimizer ├── __init__.py ├── mock_data │ └── available_pass_flows.yaml └── test_auto_optimizer.py ├── cli ├── __init__.py ├── output_model │ └── model_config.json ├── test_base.py └── test_cli.py ├── common ├── __init__.py ├── test_container_client_factory.py ├── test_copy_dir.py ├── test_get_attr.py ├── test_hardlink_copy.py ├── test_hf.py ├── test_hf_wrapper.py ├── test_import_lib.py ├── test_retry.py └── test_save_load_weights.py ├── conftest.py ├── data_container ├── __init__.py ├── test_data_config.py ├── test_data_container.py ├── test_dataloader.py ├── test_dataset.py └── test_template.py ├── engine ├── __init__.py ├── mock_data │ └── footprints.json ├── packaging │ ├── __init__.py │ ├── code │ │ └── score.py │ └── test_packaging_generator.py ├── test_engine.py ├── test_footprint.py └── test_output.py ├── evaluator ├── __init__.py ├── test_accuracy.py ├── test_metric.py ├── test_metric_backend.py └── test_olive_evaluator.py ├── hardware ├── __init__.py └── test_accelerator.py ├── model ├── __init__.py ├── test_composite_model.py ├── test_hf_config.py ├── test_hf_model.py ├── test_kv_cache_config.py ├── test_mlflow_model.py ├── test_onnx_model.py ├── test_pytorch_model.py └── user_script.py ├── passes ├── __init__.py ├── common │ ├── __init__.py │ └── test_user_script.py ├── inc │ ├── __init__.py │ └── test_inc_quantization.py ├── onnx │ ├── __init__.py │ ├── pipeline │ │ ├── __init__.py │ │ ├── step_config.json │ │ └── test_step_utils.py │ ├── test_bnb_quantization.py │ ├── test_common.py │ ├── test_compose.py │ ├── test_context_binary.py │ ├── test_conversion.py │ ├── test_dynamic_to_fixed_shape.py │ ├── test_extract_adapters.py │ ├── test_float16_conversion.py │ ├── test_graph_surgeries.py │ ├── test_hqq_quantization.py │ ├── test_io_datatype_converter.py │ ├── test_mixed_precision.py │ ├── test_mnb_to_qdq.py │ ├── test_model_builder.py │ ├── test_nvmo_quantization.py │ ├── test_onnxscript_fusion.py │ ├── test_optimum_conversion.py │ ├── test_peephole_optimizer.py │ ├── test_pre_post_processing_op.py │ ├── test_qnn_mixed_precision_overrides.py │ ├── test_qnn_preprocess.py │ ├── test_quantization.py │ ├── test_session_params_tuning.py │ ├── test_split_model.py │ ├── test_static_llm.py │ ├── test_transformer_optimization.py │ └── test_trt_dla_transforms.py ├── openvino │ ├── __init__.py │ ├── test_openvino_conversion.py │ ├── test_openvino_encapsulation.py │ ├── test_openvino_io_update.py │ ├── test_openvino_optimum_conversion.py │ ├── test_openvino_quantization.py │ └── user_script.py ├── pytorch │ ├── __init__.py │ ├── test_autoawq.py │ ├── test_capture_split_info.py │ ├── test_gptq.py │ ├── test_lora.py │ ├── test_quantization_aware_training.py │ ├── test_rotate.py │ ├── test_slicegpt.py │ ├── test_sparsegpt.py │ └── test_torch_trt_conversion.py ├── qnn │ ├── __init__.py │ ├── test_qnn_context_bin_generator.py │ └── test_qnn_conversion.py ├── test_pass.py ├── test_pass_serialization.py └── vitis_ai │ ├── __init__.py │ ├── test_vitis_ai_add_metadata.py │ └── test_vitis_ai_quantization.py ├── resource_path ├── __init__.py └── test_resource_path.py ├── search ├── samplers │ ├── test_random_sampler.py │ ├── test_sequential_sampler.py │ └── test_tpe_sampler.py ├── test_search_results.py ├── test_search_space.py └── test_search_strategy.py ├── snpe ├── __init__.py └── test_adb_run.py ├── systems ├── __init__.py ├── azureml │ ├── __init__.py │ ├── data_dir │ │ └── datafile.json │ ├── output_metrics │ │ └── pipeline_output │ │ │ └── named-outputs │ │ │ ├── accuracy │ │ │ └── metric_result.json │ │ │ └── latency │ │ │ └── metric_result.json │ ├── script_dir │ │ └── user_script.py │ ├── test_alias_system.py │ └── test_aml_system.py ├── docker │ ├── __init__.py │ ├── output_local_path │ │ └── eval_res.json │ └── test_docker_system.py ├── isolated_ort │ ├── __init__.py │ └── test_isolated_ort_system.py ├── python_environment │ ├── __init__.py │ └── test_python_environment_system.py ├── test_local.py └── test_utils.py ├── test_cache.py ├── test_package_config.py ├── utils.py └── workflows ├── __init__.py ├── mock_data ├── default_engine.json ├── dependency_setup.json ├── readymade_system.json └── user_script.json ├── test_run_config.py ├── test_setup.py └── test_workflow_run.py /.azure_pipelines/dockerfiles/linux-cpu.dockerfile: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | ARG BASE_IMAGE 6 | FROM ${BASE_IMAGE} 7 | 8 | ARG PYTHON_VERSION 9 | 10 | RUN apt-get update && \ 11 | apt-get install -y --no-install-recommends \ 12 | python${PYTHON_VERSION} \ 13 | python${PYTHON_VERSION}-dev \ 14 | python${PYTHON_VERSION}-venv \ 15 | python3-pip 16 | RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python 17 | 18 | COPY . /olive 19 | WORKDIR /olive 20 | RUN python -m venv olive-venv 21 | RUN . olive-venv/bin/activate && \ 22 | pip install --upgrade setuptools && \ 23 | pip install -e . 24 | -------------------------------------------------------------------------------- /.azure_pipelines/job_templates/build-docker-image-template.yaml: -------------------------------------------------------------------------------- 1 | # Docker image build template 2 | 3 | parameters: 4 | dockerfile: '' 5 | python_version: '' 6 | docker_image: '' 7 | base_image: '' 8 | trt_version: '' 9 | 10 | steps: 11 | - script: | 12 | docker login -u $(docker-username) -p $(docker-password) 13 | docker build \ 14 | --build-arg BASE_IMAGE=${{ parameters.base_image }} \ 15 | --build-arg TENSORRT_VERSION=${{ parameters.trt_version }} \ 16 | --build-arg PYTHON_VERSION=${{ parameters.python_version }} \ 17 | -t ${{ parameters.docker_image }} \ 18 | -f $(Build.SourcesDirectory)/${{ parameters.dockerfile }} . 19 | displayName: Build Docker Image 20 | 21 | - script: | 22 | docker version 23 | docker image ls 24 | docker system df 25 | df -h 26 | displayName: Check Docker Images 27 | -------------------------------------------------------------------------------- /.azure_pipelines/job_templates/huggingface-login-template.yaml: -------------------------------------------------------------------------------- 1 | parameters: 2 | hf_token: 'huggingface_token' 3 | 4 | steps: 5 | - script: huggingface-cli login --token ${{ parameters.hf_token }} 6 | displayName: 'Hugging Face Login' 7 | -------------------------------------------------------------------------------- /.azure_pipelines/job_templates/olive-setup-template.yaml: -------------------------------------------------------------------------------- 1 | parameters: 2 | python_version: '3.10' 3 | onnxruntime: 'onnxruntime' 4 | onnxruntime_nightly: false 5 | torch: torch 6 | 7 | steps: 8 | - task: UsePythonVersion@0 9 | inputs: 10 | versionSpec: ${{ parameters.python_version }} 11 | displayName: Use Python ${{ parameters.python_version }} 12 | 13 | - script: python -m pip install ${{ parameters.torch }} 14 | displayName: Install torch 15 | 16 | - script: python -m pip install . 17 | displayName: Install Olive 18 | 19 | - ${{ if eq(parameters.onnxruntime_nightly, true) }}: 20 | - script: | 21 | pip install -r https://raw.githubusercontent.com/microsoft/onnxruntime/refs/heads/main/requirements.txt 22 | pip install ${{ parameters.onnxruntime }} --pre --index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/pypi/simple/ --disable-pip-version-check 23 | displayName: Install ${{ parameters.onnxruntime }} 24 | - ${{ else }}: 25 | - script: | 26 | pip install ${{ parameters.onnxruntime }} 27 | displayName: Install ${{ parameters.onnxruntime }} 28 | -------------------------------------------------------------------------------- /.azure_pipelines/olive-aml-ci.yaml: -------------------------------------------------------------------------------- 1 | trigger: 2 | batch: true 3 | branches: 4 | include: 5 | - main 6 | paths: 7 | exclude: 8 | - docs/* 9 | - examples/README.md 10 | - examples/**/README.md 11 | - README.md 12 | - CONTRIBUTING.md 13 | - LICENSE 14 | pr: none 15 | 16 | jobs: 17 | # Linux examples test 18 | - template: job_templates/olive-example-cpu-template.yaml 19 | parameters: 20 | name: Linux_CI 21 | pool: $(OLIVE_POOL_UBUNTU2004) 22 | subfolder: azureml 23 | examples: 24 | bert_ptq_cpu_aml: 25 | exampleFolder: bert 26 | exampleName: bert_ptq_cpu_aml 27 | resnet_ptq_cpu: 28 | exampleFolder: resnet 29 | exampleName: resnet_ptq_cpu_aml 30 | resnet_vitis_ai_ptq_cpu: 31 | exampleFolder: resnet 32 | exampleName: resnet_vitis_ai_ptq_cpu_aml 33 | llama2: 34 | exampleFolder: llama2 35 | exampleName: llama2 36 | exampleRequirements: requirements-pipeline.txt 37 | 38 | # Windows examples test 39 | - template: job_templates/olive-example-cpu-template.yaml 40 | parameters: 41 | name: Windows_CI 42 | pool: $(OLIVE_POOL_WIN2019) 43 | subfolder: azureml 44 | examples: 45 | bert_ptq_cpu_aml: 46 | exampleFolder: bert 47 | exampleName: bert_ptq_cpu_aml 48 | resnet_ptq_cpu: 49 | exampleFolder: resnet 50 | exampleName: resnet_ptq_cpu_aml 51 | resnet_vitis_ai_ptq_cpu: 52 | exampleFolder: resnet 53 | exampleName: resnet_vitis_ai_ptq_cpu_aml 54 | -------------------------------------------------------------------------------- /.azure_pipelines/package_publish.yaml: -------------------------------------------------------------------------------- 1 | trigger: none 2 | 3 | pool: 4 | name: $(OLIVE_POOL_UBUNTU2004) 5 | 6 | steps: 7 | - task: UsePythonVersion@0 8 | inputs: 9 | versionSpec: '3.9' 10 | 11 | - script: python -m pip install --upgrade pip setuptools wheel twine 12 | displayName: 'Install tools' 13 | 14 | - script: | 15 | python setup.py bdist_wheel 16 | displayName: 'Build package' 17 | 18 | - task: CredScan@3 19 | displayName: 'Run CredScan' 20 | inputs: 21 | debugMode: false 22 | continueOnError: true 23 | 24 | - task: ComponentGovernanceComponentDetection@0 25 | displayName: Component Detection 26 | inputs: 27 | # ignore docs and examples directories. They are not part of the package. 28 | ignoreDirectories: 29 | $(Build.SourcesDirectory)/docs 30 | $(Build.SourcesDirectory)/examples 31 | 32 | - task: CopyFiles@2 33 | displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)' 34 | inputs: 35 | SourceFolder: '$(Build.SourcesDirectory)/dist' 36 | Contents: '*.whl' 37 | TargetFolder: '$(Build.ArtifactStagingDirectory)' 38 | 39 | - task: PublishBuildArtifacts@1 40 | displayName: 'Publish artifacts' 41 | inputs: 42 | ArtifactName: olive 43 | -------------------------------------------------------------------------------- /.azure_pipelines/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-devops 2 | azureml-pipeline 3 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = 3 | olive/logging.py 4 | olive/passes/pytorch/pytorch_lightning_utils.py 5 | olive/passes/pytorch/qat_utils.py 6 | olive/systems/docker/eval.py 7 | 8 | [report] 9 | exclude_lines = 10 | pragma: no cover 11 | def __repr__ 12 | raise AssertionError 13 | raise NotImplementedError 14 | if __name__ == .__main__.: 15 | class .*\bConfig\): 16 | @(abc\.)?abstractmethod 17 | @(abc\.)?staticmethod 18 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | 5 | trim_trailing_whitespace = true 6 | insert_final_newline = true 7 | indent_style = space 8 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 120 3 | per-file-ignores = 4 | __init__.py:F401 5 | pydantic_v1.py:F401 6 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Windows specific files should retain windows line-endings 5 | *.ps1 text eol=crlf 6 | 7 | # make sure .sh retains Unix line endings, even when checked out on windows. 8 | *.sh text eol=lf 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **Olive config** 20 | Add Olive configurations here. 21 | 22 | **Olive logs** 23 | Add logs here. 24 | 25 | **Other information** 26 | - OS: [e.g. Windows, Linux] 27 | - Olive version: [e.g. 0.4.0 or main] 28 | - ONNXRuntime package and version: [e.g. onnxruntime-gpu: 1.16.1] 29 | - Transformers package version: [e.g. transformers 4.44.1] 30 | 31 | 32 | **Additional context** 33 | Add any other context about the problem here. 34 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.yaml: -------------------------------------------------------------------------------- 1 | name: Feature Request 2 | description: File a feature or enhancement proposal 3 | title: "[FR]: " 4 | labels: ["enhancement"] 5 | body: 6 | - type: markdown 7 | attributes: 8 | value: | 9 | Thank you for submitting a feature request. 10 | - type: textarea 11 | id: proposal 12 | attributes: 13 | label: Proposal Summary 14 | description: In a few sentences, provide a clear, high-level description of the feature request 15 | validations: 16 | required: true 17 | - type: checkboxes 18 | attributes: 19 | label: What component(s) does this request affect? 20 | description: Please choose one or more components below. 21 | options: 22 | - label: OliveModels 23 | - label: OliveSystems 24 | - label: OliveEvaluator 25 | - label: Metrics 26 | - label: Engine 27 | - label: Passes 28 | - label: Other 29 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Describe your changes 2 | 3 | ## Checklist before requesting a review 4 | - [ ] Add unit tests for this change. 5 | - [ ] Make sure all tests can pass. 6 | - [ ] Update documents if necessary. 7 | - [ ] Lint and apply fixes to your code by running `lintrunner -a` 8 | - [ ] Is this a user-facing change? If yes, give a description of this change to be included in the release notes. 9 | - [ ] Is this PR including examples changes? If yes, please remember to update [example documentation](https://github.com/microsoft/Olive/blob/main/docs/source/examples.md) in a follow-up PR. 10 | 11 | ## (Optional) Issue link 12 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/pre-commit/pre-commit-hooks 3 | rev: v4.5.0 4 | hooks: 5 | - id: end-of-file-fixer 6 | - id: trailing-whitespace 7 | args: [--markdown-linebreak-ext=md] 8 | - id: check-yaml 9 | - id: requirements-txt-fixer 10 | - repo: https://github.com/MarcoGorelli/absolufy-imports 11 | rev: v0.3.1 12 | hooks: 13 | - id: absolufy-imports 14 | exclude: examples/ 15 | - repo: local 16 | hooks: 17 | - id: format-json 18 | name: Format JSON 19 | language: python 20 | entry: python scripts/format_json.py 21 | files: \.(json)$ 22 | args: [ 23 | '--indent=4', 24 | '--max-line-length=120' 25 | ] 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Microsoft Corporation. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-exclude *.py[cod] 2 | recursive-include examples * 3 | recursive-include docs * 4 | prune */**/__pycache__ 5 | prune docs/build/**/* 6 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= -a -W 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | SCHEMABUILD = python $(SOURCEDIR)/dump_schema.py 11 | 12 | # Put it first so that "make" without argument is like "make help". 13 | help: 14 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 15 | 16 | schema: 17 | $(SCHEMABUILD) --output $(BUILDDIR)/html/schema.json 18 | 19 | .PHONY: help Makefile schema 20 | 21 | # Catch-all target: route all unknown targets to Sphinx using the new 22 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 23 | %: Makefile 24 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 25 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Generating the documentation 2 | 3 | To generate the documentation, you first have to build it. 4 | 5 | ## Pre-requisites 6 | 7 | Install Olive. At the root of the code repository: 8 | 9 | ```bash 10 | pip install -e . 11 | ``` 12 | 13 | Install pip requirements. At `docs`: 14 | 15 | ```bash 16 | pip install -r requirements.txt 17 | ``` 18 | 19 | ## Building the documentation 20 | 21 | At `docs`: 22 | 23 | ```bash 24 | make html 25 | make linkcheck 26 | ``` 27 | 28 | ## Previewing the documentation 29 | 30 | At `docs/build/html`: 31 | 32 | ```bash 33 | python -m http.server {port-number} 34 | ``` 35 | 36 | The documentation site will be running at `http://localhost:` 37 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | set SCHEMABUILD="python %SOURCEDIR%/dump_schema.py" 13 | 14 | %SPHINXBUILD% >NUL 2>NUL 15 | if errorlevel 9009 ( 16 | echo. 17 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 18 | echo.installed, then set the SPHINXBUILD environment variable to point 19 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 20 | echo.may add the Sphinx directory to PATH. 21 | echo. 22 | echo.If you don't have Sphinx installed, grab it from 23 | echo.https://www.sphinx-doc.org/ 24 | exit /b 1 25 | ) 26 | 27 | if "%1" == "" goto help 28 | 29 | if "%1" == "schema" goto schema 30 | 31 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% -W %O% 32 | goto end 33 | 34 | :help 35 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 36 | goto end 37 | 38 | :schema 39 | %SCHEMABUILD% --output %SOURCEDIR%/html/schema.json 40 | 41 | :end 42 | popd 43 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | # we use v1 API so need autodoc_pydantic<2.0.0 2 | # will also install pydantic<2.0.0 3 | autodoc_pydantic<2.0.0 4 | azure-ai-ml>=1.11.1 5 | azure-identity 6 | azureml-fsspec 7 | docker 8 | # latest 3.24.0 will break the pipeline 9 | # TODO(team): 55399 Switch back to the latest version once it's compatible with the pipeline 10 | marshmallow<3.24.0 11 | myst_parser 12 | onnxconverter_common 13 | psutil 14 | pydata_sphinx_theme 15 | pytorch_lightning 16 | sphinx>=6.1.3 17 | sphinx-argparse 18 | sphinx-copybutton 19 | sphinx-tabs 20 | sphinx_design 21 | sphinxcontrib-jquery 22 | sphinxcontrib-mermaid 23 | -------------------------------------------------------------------------------- /docs/source/_static/css/header.css: -------------------------------------------------------------------------------- 1 | h1 { 2 | font-size: 175%; 3 | } 4 | 5 | h2 { 6 | font-size: 150%; 7 | } 8 | 9 | h3 { 10 | font-size: 130%; 11 | } 12 | 13 | h4 { 14 | font-size: 100%; 15 | font-weight: 600; 16 | } 17 | -------------------------------------------------------------------------------- /docs/source/dump_schema.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | 4 | from olive.workflows.run.config import RunConfig 5 | 6 | if __name__ == "__main__": 7 | parser = argparse.ArgumentParser(description="Dump workflow schema") 8 | 9 | parser.add_argument("--output", type=str, default="schema.json", help="Output file") 10 | 11 | args = parser.parse_args() 12 | 13 | output_path = Path(args.output) 14 | output_path.parent.mkdir(parents=True, exist_ok=True) 15 | 16 | with open(output_path, "w") as f: 17 | f.write(RunConfig.schema_json(indent=2)) 18 | -------------------------------------------------------------------------------- /docs/source/features/azure-ai/index.rst: -------------------------------------------------------------------------------- 1 | Azure AI 2 | ======================== 3 | 4 | .. grid:: 2 2 2 3 5 | :class-container: cards 6 | 7 | .. grid-item-card:: 8 | **Azure AI integration** 9 | 10 | :octicon:`arrow-right;1em;sd-text-info` `Azure AI integration `_ 11 | 12 | .. grid-item-card:: 13 | **Connect your own machines to Azure by Azure Arc** 14 | 15 | :octicon:`arrow-right;1em;sd-text-info` `Azure Arc `_ 16 | 17 | .. grid-item-card:: 18 | **Scripts to manage your Azure assets** 19 | 20 | :octicon:`arrow-right;1em;sd-text-info` `Azure scripts `_ 21 | 22 | .. grid-item-card:: 23 | **Run Olive workflow on Azure** 24 | 25 | :octicon:`arrow-right;1em;sd-text-info` `Remote workflow `_ 26 | 27 | .. grid-item-card:: 28 | **Share models cache on Azure** 29 | 30 | :octicon:`arrow-right;1em;sd-text-info` `Shared cache `_ 31 | 32 | 33 | .. toctree:: 34 | :maxdepth: 1 35 | :hidden: 36 | 37 | azure-ai 38 | azure-arc 39 | azure-script 40 | remote-workflow 41 | shared-model-cache 42 | -------------------------------------------------------------------------------- /docs/source/features/ihv-integration/index.rst: -------------------------------------------------------------------------------- 1 | IHV Toolkit Integration 2 | ======================== 3 | 4 | .. grid:: 2 2 2 3 5 | :class-container: cards 6 | 7 | .. grid-item-card:: 8 | **OpenVINO** 9 | 10 | :octicon:`arrow-right;1em;sd-text-info` `OpenVINO `_ 11 | 12 | .. grid-item-card:: 13 | **QNN** 14 | 15 | :octicon:`arrow-right;1em;sd-text-info` `QNN `_ 16 | 17 | .. grid-item-card:: 18 | **SNPE** 19 | 20 | :octicon:`arrow-right;1em;sd-text-info` `SNPE `_ 21 | 22 | .. toctree:: 23 | :maxdepth: 1 24 | :hidden: 25 | 26 | openvino 27 | qnn 28 | snpe 29 | -------------------------------------------------------------------------------- /docs/source/features/model-conversion/convert-pytorch.md: -------------------------------------------------------------------------------- 1 | # PyTorch 2 | 3 | PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. 4 | 5 | ## TorchTRTConversion 6 | `TorchTRTConversion` converts the `torch.nn.Linear` modules in the transformer layers in a Hugging Face PyTorch model to `TRTModules` from `torch_tensorrt` with fp16 precision and sparse weights, if 7 | applicable. `torch_tensorrt` is an extension to `torch` where TensorRT compiled engines can be used like regular `torch.nn.Module`s. This pass can be used to accelerate inference on transformer models 8 | with sparse weights by taking advantage of the 2:4 structured sparsity pattern supported by TensorRT. 9 | 10 | This pass only supports HfModels. Please refer to [TorchTRTConversion](torch_trt_conversion) for more details on the types of transformers models supported. 11 | 12 | ### Example Configuration 13 | ```json 14 | { 15 | "type": "TorchTRTConversion" 16 | } 17 | ``` 18 | -------------------------------------------------------------------------------- /docs/source/features/model-conversion/index.rst: -------------------------------------------------------------------------------- 1 | Model Conversion 2 | ======================== 3 | 4 | .. grid:: 2 2 2 3 5 | :class-container: cards 6 | 7 | .. grid-item-card:: 8 | **OpenVINO** 9 | 10 | :octicon:`arrow-right;1em;sd-text-info` `ONNX Conversion `_ 11 | 12 | .. grid-item-card:: 13 | **QNN** 14 | 15 | :octicon:`arrow-right;1em;sd-text-info` `TorchTRT Conversion `_ 16 | 17 | .. grid-item-card:: 18 | **SNPE** 19 | 20 | :octicon:`arrow-right;1em;sd-text-info` `SNPE `_ 21 | 22 | .. toctree:: 23 | :maxdepth: 1 24 | :hidden: 25 | 26 | convert-onnx 27 | convert-pytorch 28 | -------------------------------------------------------------------------------- /docs/source/images/auto_opt/pass_flows.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/auto_opt/pass_flows.png -------------------------------------------------------------------------------- /docs/source/images/azure_arc/add-infra.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-infra.png -------------------------------------------------------------------------------- /docs/source/images/azure_arc/add-kub-detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-kub-detail.png -------------------------------------------------------------------------------- /docs/source/images/azure_arc/add-kub-to-arc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-kub-to-arc.png -------------------------------------------------------------------------------- /docs/source/images/azure_arc/add-kub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/add-kub.png -------------------------------------------------------------------------------- /docs/source/images/azure_arc/attach-kub.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/attach-kub.png -------------------------------------------------------------------------------- /docs/source/images/azure_arc/attach-suc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/attach-suc.png -------------------------------------------------------------------------------- /docs/source/images/azure_arc/new-compute.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/azure_arc/new-compute.png -------------------------------------------------------------------------------- /docs/source/images/datacontainer_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/datacontainer_example.png -------------------------------------------------------------------------------- /docs/source/images/dataset-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/dataset-flow.png -------------------------------------------------------------------------------- /docs/source/images/model_splitting/cost_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/model_splitting/cost_model.png -------------------------------------------------------------------------------- /docs/source/images/model_splitting/num_splits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/model_splitting/num_splits.png -------------------------------------------------------------------------------- /docs/source/images/multi-lora-diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/multi-lora-diagram.png -------------------------------------------------------------------------------- /docs/source/images/olive-black-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-black-text.png -------------------------------------------------------------------------------- /docs/source/images/olive-design.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-design.png -------------------------------------------------------------------------------- /docs/source/images/olive-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-flow.png -------------------------------------------------------------------------------- /docs/source/images/olive-white-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/docs/source/images/olive-white-text.png -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | # Olive: The AI Model Optimization Toolkit for the ONNX Runtime 2 |
3 | 4 | ```{gallery-grid} 5 | :grid-columns: 1 2 2 3 6 | 7 | - header: "{octicon}`codescan-checkmark` Overview" 8 | content: "Learn the benefits of using Olive to optimize your models.
{octicon}`arrow-right` [Overview](why-olive.md)" 9 | - header: "{octicon}`zap` Get Started" 10 | content: "Install `olive-ai` with `pip` and get up and running with OLIVE in minutes.
{octicon}`arrow-right` [Get Started](getting-started/getting-started.md)" 11 | - header: "{octicon}`rocket` How To" 12 | content: "Find more details on specific Olive capabilities, such as quantization, running workflows on remote compute, model packaging, conversions, and more!
{octicon}`arrow-right` [How-To](how-to/index)" 13 | - header: "{fas}`code` Reference" 14 | content: "Get more details on specific Olive capabilities, such as running workflows on remote compute (for example, Azure AI), model packaging, conversions, and more!
{octicon}`arrow-right` [Reference](reference/index)" 15 | - header: "{octicon}`diff-added` Extending Olive" 16 | content: "Learn about the design of Olive and how to extend Olive with your own optimization methods.
{octicon}`arrow-right` [Extend Olive](extending/index)" 17 | ``` 18 | 19 | 20 | ```{toctree} 21 | :maxdepth: 2 22 | :hidden: 23 | 24 | why-olive.md 25 | getting-started/getting-started.md 26 | how-to/index 27 | examples.md 28 | features/index 29 | reference/index 30 | extending/index 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/source/reference/index.rst: -------------------------------------------------------------------------------- 1 | Reference 2 | ============== 3 | 4 | .. grid:: 2 2 2 3 5 | :class-container: cards 6 | 7 | .. grid-item-card:: 8 | **CLI** 9 | 10 | Learn about CLI features and options. 11 | 12 | :octicon:`arrow-right;1em;sd-text-info` `CLI `_ 13 | 14 | .. grid-item-card:: 15 | **Olive options** 16 | 17 | Explore Olive configuration options. 18 | 19 | :octicon:`arrow-right;1em;sd-text-info` `Olive options `_ 20 | 21 | .. grid-item-card:: 22 | **Pass** 23 | 24 | Explore Olive passes. 25 | 26 | :octicon:`arrow-right;1em;sd-text-info` `Pass `_ 27 | 28 | .. toctree:: 29 | :maxdepth: 2 30 | :hidden: 31 | 32 | cli 33 | options 34 | pass 35 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /examples/adetailer/README.md: -------------------------------------------------------------------------------- 1 | ## How to run 2 | ### Pip requirements 3 | Install the necessary python packages: 4 | ``` 5 | python -m pip install -r requirements.txt 6 | ``` 7 | 8 | ### Run sample using config 9 | ``` 10 | olive run --config ./face_yolo_qnn.json 11 | ``` 12 | 13 | **Note**: The special configuration of op_types_to_quantize in the face_yolo_qnn.json file is to exclude the mul operation. This is because after quantizing the mul operation, the latency of this model on the QNN will increase significantly. 14 | 15 | -------------------------------------------------------------------------------- /examples/adetailer/requirements.txt: -------------------------------------------------------------------------------- 1 | pycocotools 2 | ultralytics 3 | -------------------------------------------------------------------------------- /examples/ast/README.md: -------------------------------------------------------------------------------- 1 | # AST Optimization 2 | This folder contains examples of AST(Audio Spectrogram Transformer) optimization using olive workflows. 3 | 4 | - CPU: *PyTorch Model -> Onnx Model -> Transformers Optimized Onnx Model -> Quantized Onnx Model -> ONNX Runtime performance tuning* 5 | 6 | - Model: https://huggingface.co/MIT/ast-finetuned-speech-commands-v2 7 | - Dataset: https://huggingface.co/datasets/speech_commands 8 | 9 | ### Run example using config 10 | 11 | The `ast.json` is used on CPU optimization which tries to quantize the model and tune the inference config for better performance. 12 | 13 | First, install required packages according to passes. 14 | ```sh 15 | olive run --config ast.json --setup 16 | ``` 17 | 18 | Then, optimize the model 19 | ```sh 20 | olive run --config ast.json 21 | ``` 22 | 23 | or run simply with python code: 24 | ```python 25 | from olive.workflows import run as olive_run 26 | olive_run("ast.json") 27 | ``` 28 | 29 | After running the above command, the model candidates and corresponding config will be saved in the output directory. 30 | You can then select the best model and config from the candidates and run the model with the selected config. 31 | -------------------------------------------------------------------------------- /examples/ast/requirements.txt: -------------------------------------------------------------------------------- 1 | evaluate 2 | librosa 3 | optimum 4 | psutil 5 | # https://github.com/huggingface/evaluate/issues/655 6 | scikit-learn==1.5.2 7 | soundfile 8 | -------------------------------------------------------------------------------- /examples/bert/.gitignore: -------------------------------------------------------------------------------- 1 | mlruns/ 2 | bert_qat/ 3 | -------------------------------------------------------------------------------- /examples/bert/bert.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import argparse 6 | import json 7 | from pathlib import Path 8 | 9 | if __name__ == "__main__": 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "--optimize", 13 | action="store_true", 14 | help="If set, run transformers optimization pass", 15 | ) 16 | args = parser.parse_args() 17 | 18 | input_filename = "bert_cuda_gpu.template.json" 19 | with Path(input_filename).open("r") as f: 20 | config = json.load(f) 21 | 22 | if not args.optimize: 23 | del config["passes"]["transformers_optimization"] 24 | 25 | output_filename = input_filename.replace(".template", "") 26 | with Path(output_filename).open("w") as strm: 27 | json.dump(config, fp=strm, indent=4) 28 | -------------------------------------------------------------------------------- /examples/bert/conda.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.10.16 6 | - pip=22.3.1 7 | - pip: 8 | - datasets 9 | - evaluate 10 | - optimum 11 | - psutil 12 | - scipy 13 | - scikit-learn==1.5.2 # https://github.com/huggingface/evaluate/issues/655 14 | - torch 15 | - --extra-index-url https://download.pytorch.org/whl/cpu 16 | - transformers>=4.41.1 17 | - git+https://github.com/microsoft/Olive#egg=olive-ai[cpu] 18 | -------------------------------------------------------------------------------- /examples/bert/conda_gpu.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.10.16 6 | - pip=22.3.1 7 | - pip: 8 | - datasets 9 | - evaluate 10 | - optimum 11 | - psutil 12 | - scipy 13 | - scikit-learn==1.5.2 # https://github.com/huggingface/evaluate/issues/655 14 | - torch 15 | - --extra-index-url https://download.pytorch.org/whl/cu118 16 | - transformers>=4.41.1 17 | - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu] 18 | -------------------------------------------------------------------------------- /examples/bert/openvino/README.md: -------------------------------------------------------------------------------- 1 | # BERT Optimization 2 | 3 | This folder contains examples of BERT optimization using different workflows for [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) and [Intel/bert-base-uncased-mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) models. 4 | 5 | - Intel® NPU: [Optimization for BERT base multilingual cased](./bert_base_multilingual_cased/) 6 | - Intel® NPU: [Optimization for BERT base uncased mrpc](./bert_base_uncased_mrpc/) 7 | -------------------------------------------------------------------------------- /examples/bert/openvino/bert_base_multilingual_cased/README.md: -------------------------------------------------------------------------------- 1 | # BERT Base Multilingual Cased Quantization 2 | 3 | This folder contains a sample use case of Olive to optimize a [google-bert/bert-base-multilingual-cased](https://huggingface.co/google-bert/bert-base-multilingual-cased) model using OpenVINO tools. 4 | 5 | - Intel® NPU: [BERT Base Multilingual Cased static shape model](#static-shape-model) 6 | 7 | ## Quantization Workflows 8 | 9 | This workflow performs quantization with OpenVINO NNCF. It performs the optimization pipeline: 10 | 11 | - *HuggingFace Model -> OpenVINO Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model* 12 | 13 | ### Static shape model 14 | 15 | The config file: [bert-base-multilingual-cased_context_ov_static.json](bert-base-multilingual-cased_context_ov_static.json) executes the above workflow producing static shape model. 16 | 17 | ## How to run 18 | 19 | Install the necessary python packages: 20 | 21 | ```bash 22 | python -m pip install olive-ai[openvino] 23 | ``` 24 | 25 | ### Run sample using config 26 | 27 | The optimization techniques to run are specified in the relevant config json file. 28 | 29 | ```bash 30 | olive run --config bert-base-multilingual-cased_context_ov_static.json 31 | ``` 32 | 33 | or run simply with python code: 34 | 35 | ```python 36 | from olive.workflows import run as olive_run 37 | olive_run("bert-base-multilingual-cased_context_ov_static.json") 38 | ``` 39 | 40 | After running the above command, the model candidates and corresponding config will be saved in the output directory. 41 | -------------------------------------------------------------------------------- /examples/bert/openvino/bert_base_uncased_mrpc/README.md: -------------------------------------------------------------------------------- 1 | # BERT Base Uncased MRPC Quantization 2 | 3 | This folder contains a sample use case of Olive to optimize a [Intel/bert-base-uncased-mrpc](https://huggingface.co/Intel/bert-base-uncased-mrpc) model using OpenVINO tools. 4 | 5 | - Intel® NPU: [BERT Base Uncased MRPC static shape model](#static-shape-model) 6 | 7 | ## Quantization Workflows 8 | 9 | This workflow performs quantization with OpenVINO NNCF. It performs the optimization pipeline: 10 | 11 | - *HuggingFace Model -> OpenVINO Model -> Quantized OpenVINO model -> Quantized encapsulated ONNX OpenVINO IR model* 12 | 13 | ### Static shape model 14 | 15 | The config file: [bert-base-uncased-mrpc_context_ov_static.json](bert-base-uncased-mrpc_context_ov_static.json) executes the above workflow producing static shape model. 16 | 17 | ## How to run 18 | 19 | Install the necessary python packages: 20 | 21 | ```bash 22 | python -m pip install olive-ai[openvino] 23 | ``` 24 | 25 | ### Run sample using config 26 | 27 | The optimization techniques to run are specified in the relevant config json file. 28 | 29 | ```bash 30 | olive run --config bert-base-uncased-mrpc_context_ov_static.json 31 | ``` 32 | 33 | or run simply with python code: 34 | 35 | ```python 36 | from olive.workflows import run as olive_run 37 | olive_run("bert-base-uncased-mrpc_context_ov_static.json") 38 | ``` 39 | 40 | After running the above command, the model candidates and corresponding config will be saved in the output directory. 41 | -------------------------------------------------------------------------------- /examples/bert/qnn/README.md: -------------------------------------------------------------------------------- 1 | ### BERT Optimization with PTQ on Qualcomm NPU using QNN EP 2 | This workflow performs BERT optimization on Qualcomm NPU with ONNX Runtime PTQ. It performs the optimization pipeline: 3 | - *PyTorch Model -> Onnx Model -> Static shaped Onnx Model -> Quantized Onnx Model* 4 | 5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed. 6 | 7 | **NOTE:** The model optimization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step. 8 | -------------------------------------------------------------------------------- /examples/bert/qnn/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | evaluate 3 | nltk 4 | optimum 5 | pandas 6 | tabulate 7 | -------------------------------------------------------------------------------- /examples/bert/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-ai-ml 2 | azure-identity 3 | # TODO(anyone): load_metrics was removed since 3.0.0. Using evaluate instead 4 | datasets<3.0.0 5 | docker>=7.1.0 6 | evaluate 7 | neural-compressor 8 | optimum 9 | pytorch_lightning 10 | # https://github.com/huggingface/evaluate/issues/655 11 | scikit-learn==1.5.2 12 | scipy 13 | tabulate 14 | transformers 15 | -------------------------------------------------------------------------------- /examples/bert/snpe/README.md: -------------------------------------------------------------------------------- 1 | # Bert model optimization on Qualcomm NPU with SNPE SDK 2 | This folder contains a sample use case of Olive to convert an bert model Onnx model, then to SNPE DLC and to evaluate the accuracy of the DLC model. 3 | 4 | Performs optimization pipeline: 5 | - *Pytorch Model -> Onnx Model with Dynamic Shape -> Onnx Model with Fixed Shape -> SNPE Model* 6 | 7 | ## Prerequisites 8 | ### Download and unzip SNPE SDK 9 | Download the SNPE SDK zip following [instructions from Qualcomm](https://developer.qualcomm.com/software/qualcomm-neural-processing-sdk) 10 | 11 | We test it with SNPE v2.18.0.240101. 12 | 13 | Unzip the file and set the unzipped directory path as environment variable `SNPE_ROOT`. 14 | 15 | ### Configure SNPE 16 | ```sh 17 | olive configure-qualcomm-sdk --py_version 3.8 --sdk snpe 18 | ``` 19 | 20 | ## Run sample 21 | Run the conversion and quantization locally. 22 | ``` 23 | olive run --config bert_snpe.json 24 | ``` 25 | 26 | ## Issues 27 | 28 | 1. "Module 'qti.aisw.converters' has no attribute 'onnx': 29 | Refer to this: https://developer.qualcomm.com/comment/21810#comment-21810, 30 | change the import statement in `{SNPE_ROOT}/lib/python/qti/aisw/converters/onnx/onnx_to_ir.py:L30` to: 31 | ```python 32 | from qti.aisw.converters.onnx import composable_custom_op_utils as ComposableCustomOp 33 | ``` 34 | -------------------------------------------------------------------------------- /examples/bert/snpe/user_script.py: -------------------------------------------------------------------------------- 1 | from olive.data.registry import Registry 2 | 3 | 4 | @Registry.register_post_process() 5 | def snpe_post_process(output_data, **kwargs): 6 | import torch 7 | 8 | logits = torch.tensor(output_data["logits"]) 9 | _, preds = torch.max(logits, dim=-1) 10 | 11 | return preds 12 | -------------------------------------------------------------------------------- /examples/bge/requirements.txt: -------------------------------------------------------------------------------- 1 | mteb 2 | -------------------------------------------------------------------------------- /examples/clip/qnn/README.md: -------------------------------------------------------------------------------- 1 | # CLIP VIT Optimization with PTQ on Qualcomm NPU using QNN EP 2 | This workflow performs CLIP VIT quantization on Qualcomm NPU with ONNX Runtime PTQ. It performs the pipeline: 3 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model* 4 | 5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed. 6 | 7 | **NOTE:** The model quantization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step. 8 | -------------------------------------------------------------------------------- /examples/clip/qnn/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | evaluate 3 | tabulate 4 | -------------------------------------------------------------------------------- /examples/clip/requirements.txt: -------------------------------------------------------------------------------- 1 | # TODO(anyone): load_metrics was removed since 3.0.0. Using evaluate instead 2 | datasets<3.0.0 3 | evaluate 4 | scikit-learn==1.5.2 5 | transformers 6 | -------------------------------------------------------------------------------- /examples/deberta/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-ai-ml 2 | azure-identity 3 | datasets 4 | optimum 5 | -------------------------------------------------------------------------------- /examples/deepseek/README.md: -------------------------------------------------------------------------------- 1 | # Deepseek R1 Distill optimization 2 | 3 | Sample use cases of Olive to optimize a [DeepSeek R1 Distill](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B) using Olive. 4 | 5 | - [Finetune and Optimize for CPU/CUDA](../getting_started/olive-deepseek-finetune.ipynb) 6 | - [QDQ Model with 4-bit Weights & 16-bit Activations](../phi3_5/README.md): 7 | - Run the workflow with `olive run --config qdq_config.json -m deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -o models/deepseek-r1-qdq`. 8 | - [AMD NPU: Optimization and Quantization with for VitisAI](../phi3_5/README.md): 9 | - Run the workflow with `olive run --config qdq_config_vitis_ai.json -m deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -o models/deepseek-r1-vai`. 10 | - [PTQ + AOT Compilation for Qualcomm NPUs using QNN EP](../phi3_5/README.md): 11 | - Run the workflow with `olive run --config qnn_config.json -m deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B -o models/deepseek-r1-qnn`. 12 | - Run the inference with `python app.py -m models/deepseek-r1-qnn -c "<|User|>{input}<|Assistant|>"`. 13 | - [PTQ + AWQ ONNX OVIR Encapsulated 4-bit weight compression using Optimum OpenVINO](./openvino/) 14 | -------------------------------------------------------------------------------- /examples/directml/README.md: -------------------------------------------------------------------------------- 1 | # Direct ML 2 | 3 | Keep `llm`, `squeezenet`, `stable_diffusion` and `stable_diffusion_xl` here for directml team for directml ep. 4 | -------------------------------------------------------------------------------- /examples/directml/llm/.gitignore: -------------------------------------------------------------------------------- 1 | /raw_model_data/ 2 | /footprints/ 3 | -------------------------------------------------------------------------------- /examples/directml/llm/chat_app/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) 5 | -------------------------------------------------------------------------------- /examples/directml/llm/chat_app/app_modules/overwrites.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | # pylint: disable=relative-beyond-top-level 4 | from .presets import gr 5 | from .utils import convert_asis, convert_mdtext, detect_converted_mark 6 | 7 | 8 | def postprocess(self, y: list[tuple[str | None, str | None]]) -> list[tuple[str | None, str | None]]: 9 | """Each message and response should be a string, which may be in Markdown format. 10 | 11 | Returns: 12 | List of tuples representing the message and response. 13 | Each message and response will be a string of HTML. 14 | 15 | """ 16 | if y is None or y == []: 17 | return [] 18 | temp = [] 19 | for x in y: 20 | user, bot = x 21 | if not detect_converted_mark(user): 22 | user = convert_asis(user) 23 | if not detect_converted_mark(bot): 24 | bot = convert_mdtext(bot) 25 | temp.append((user, bot)) 26 | return temp 27 | 28 | 29 | GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse 30 | -------------------------------------------------------------------------------- /examples/directml/llm/chat_app/assets/custom.js: -------------------------------------------------------------------------------- 1 | // custom javascript here 2 | -------------------------------------------------------------------------------- /examples/directml/llm/chat_app/interface/base_interface.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=disallowed-name 2 | 3 | 4 | class BaseLLMInterface: 5 | def __init__(self): 6 | pass 7 | 8 | def foo(self): 9 | pass 10 | -------------------------------------------------------------------------------- /examples/directml/llm/config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | decoder_model = None 7 | normalization_type = "rms" 8 | state_dict = {} 9 | strict_weights_loading = True 10 | hidden_size = 4096 11 | head_dim = 128 12 | intermediate_size = 11008 13 | num_heads = 32 14 | num_key_value_heads = 32 15 | num_layers = 32 16 | vocab_size = 32000 17 | epsilon = 1e-5 18 | model_type = "llama" 19 | apply_residual_connection_post_layernorm = True 20 | model_id = "meta-llama/Llama-2-7b-chat-hf" 21 | partial_rotary_factor = 1.0 22 | max_position_embeddings = 4096 23 | use_bias = False 24 | hidden_act = "silu" 25 | has_up_proj = True 26 | has_input_layernorm_bias = True 27 | has_norm_bias = True 28 | has_lm_head_bias = False 29 | use_split_sigmoid = False 30 | -------------------------------------------------------------------------------- /examples/directml/llm/placeholder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/llm/placeholder.png -------------------------------------------------------------------------------- /examples/directml/llm/requirements.txt: -------------------------------------------------------------------------------- 1 | huggingface-hub 2 | markdown 3 | mdtex2html 4 | optimum 5 | Pygments 6 | sentencepiece 7 | tabulate 8 | torch 9 | -------------------------------------------------------------------------------- /examples/directml/squeezenet/README.md: -------------------------------------------------------------------------------- 1 | # SqueezeNet Latency Optimization with DirectML 2 | This folder contains a sample use case of Olive to optimize the [SqueezeNet](https://pytorch.org/hub/pytorch_vision_squeezenet/) model using ONNX conversion, conversion to FLOAT16, and general ONNX performance tuning. 3 | 4 | Performs optimization pipeline: 5 | 6 | PyTorch Model -> [Convert to ONNX] -> [FP16 Conversion] -> [Tune performance] -> Optimized FP16 ONNX Model 7 | 8 | Outputs the best metrics, model, and corresponding Olive config. 9 | 10 | ## Optimize SqueezeNet 11 | First, install required packages according to passes. 12 | ``` 13 | olive run --config squeezenet_config.json --setup 14 | ``` 15 | Then, optimize the model 16 | ``` 17 | olive run --config squeezenet_config.json 18 | ``` 19 | 20 | or run simply with python code: 21 | 22 | ```python 23 | from olive.workflows import run as olive_run 24 | olive_run("squeezenet_config.json") 25 | ``` 26 | -------------------------------------------------------------------------------- /examples/directml/squeezenet/squeezenet_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "PyTorchModel", 4 | "model_loader": "load_pytorch_origin_model", 5 | "model_script": "user_script.py", 6 | "io_config": { 7 | "input_names": [ "input_image" ], 8 | "input_shapes": [ [ 1, 3, 224, 224 ] ], 9 | "output_names": [ "output" ] 10 | } 11 | }, 12 | "systems": { "local_system": { "type": "LocalSystem", "accelerators": [ { "device": "gpu" } ] } }, 13 | "evaluators": { 14 | "common_evaluator": { 15 | "metrics": [ 16 | { 17 | "name": "latency", 18 | "type": "latency", 19 | "sub_types": [ { "name": "avg", "priority": 1 }, { "name": "max" }, { "name": "min" } ] 20 | } 21 | ] 22 | } 23 | }, 24 | "passes": { 25 | "torch_to_onnx": { "type": "OnnxConversion", "target_opset": 13 }, 26 | "float16_conversion": { "type": "OnnxFloatToFloat16" }, 27 | "session_params_tuning": { 28 | "type": "OrtSessionParamsTuning", 29 | "device": "gpu", 30 | "execution_mode_list": [ "ORT_SEQUENTIAL" ], 31 | "providers_list": [ "DmlExecutionProvider" ] 32 | } 33 | }, 34 | "log_severity_level": 0, 35 | "evaluator": "common_evaluator", 36 | "evaluate_input_model": false, 37 | "host": "local_system", 38 | "target": "local_system", 39 | "clean_cache": true, 40 | "cache_dir": "cache" 41 | } 42 | -------------------------------------------------------------------------------- /examples/directml/squeezenet/user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import torch 6 | 7 | 8 | def load_pytorch_origin_model(torch_hub_model_path): 9 | return torch.hub.load("pytorch/vision:v0.10.0", "squeezenet1_1", pretrained=True) 10 | 11 | 12 | class DataLoader: 13 | def __init__(self, batch_size): 14 | self.batch_size = batch_size 15 | 16 | def __getitem__(self, idx): 17 | input_data = torch.rand((self.batch_size, 3, 224, 224), dtype=torch.float16) 18 | label = None 19 | return input_data, label 20 | 21 | 22 | def create_dataloader(data_dir, batch_size, *args, **kwargs): 23 | return DataLoader(batch_size) 24 | -------------------------------------------------------------------------------- /examples/directml/stable_diffusion/readme/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/stable_diffusion/readme/pipeline.png -------------------------------------------------------------------------------- /examples/directml/stable_diffusion_xl/readme/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/stable_diffusion_xl/readme/pipeline.png -------------------------------------------------------------------------------- /examples/directml/stable_diffusion_xl/readme/sdxl_flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/directml/stable_diffusion_xl/readme/sdxl_flow.png -------------------------------------------------------------------------------- /examples/falcon/README.md: -------------------------------------------------------------------------------- 1 | # Falcon Optimization 2 | This folder contains a sample use case of Olive to optimize a [falcon-7b](https://huggingface.co/tiiuae/falcon-7b) model using ONNXRuntime tools. 3 | 4 | ## Optimization Workflows 5 | This workflow performs Falcon optimization on CPU with ONNX Runtime. It performs the optimization pipeline: 6 | - *PyTorch Model -> Onnx Model -> Transformers Optimized Onnx Model fp16* 7 | 8 | Config file: [config.json](config.json) 9 | 10 | ## How to run 11 | ### Pip requirements 12 | Install the necessary python packages: 13 | ``` 14 | python -m pip install -r requirements.txt 15 | ``` 16 | 17 | ### Run sample using config 18 | 19 | The optimization techniques to run are specified in the relevant config json file. 20 | 21 | First, install required packages according to passes. 22 | ``` 23 | olive run --config config.json --setup 24 | ``` 25 | 26 | Then, optimize the model 27 | ``` 28 | olive run --config config.json 29 | ``` 30 | 31 | or run simply with python code: 32 | ```python 33 | from olive.workflows import run as olive_run 34 | olive_run("config.json") 35 | ``` 36 | 37 | After running the above command, the model candidates and corresponding config will be saved in the output directory. 38 | You can then select the best model and config from the candidates and run the model with the selected config. 39 | -------------------------------------------------------------------------------- /examples/falcon/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | -------------------------------------------------------------------------------- /examples/gptj/gptj_inc_dynamic_ptq_cpu.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "EleutherAI/gpt-j-6B" }, 3 | "data_configs": [ 4 | { 5 | "name": "latency_data_config", 6 | "user_script": "user_script.py", 7 | "load_dataset_config": { "type": "simple_dataset" }, 8 | "dataloader_config": { "type": "gptj_dataloader", "batch_size": 1 } 9 | } 10 | ], 11 | "evaluators": { 12 | "common_evaluator": { 13 | "metrics": [ 14 | { 15 | "name": "latency", 16 | "type": "latency", 17 | "sub_types": [ { "name": "avg", "priority": 1 } ], 18 | "data_config": "latency_data_config" 19 | } 20 | ] 21 | } 22 | }, 23 | "passes": { 24 | "conversion": { 25 | "type": "OnnxConversion", 26 | "target_opset": 13, 27 | "save_as_external_data": true, 28 | "all_tensors_to_one_file": true 29 | }, 30 | "quantization": { 31 | "type": "IncDynamicQuantization", 32 | "save_as_external_data": true, 33 | "all_tensors_to_one_file": true 34 | } 35 | }, 36 | "log_severity_level": 0, 37 | "evaluator": "common_evaluator", 38 | "cache_dir": "cache", 39 | "output_dir": "models/gptj_inc_dynamic_ptq_cpu" 40 | } 41 | -------------------------------------------------------------------------------- /examples/gptj/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | neural-compressor 3 | onnxruntime 4 | -------------------------------------------------------------------------------- /examples/gte/README.md: -------------------------------------------------------------------------------- 1 | # GTE-Large-v1.5 Optimization 2 | This folder contains a sample use case of Olive to optimize a [Alibaba-NLP/gte-large-en-v1.5](https://huggingface.co/Alibaba-NLP/gte-large-en-v1.5) model. 3 | 4 | ## Optimization Workflows 5 | This workflow performs optimization on CPU with ONNX Runtime. It performs the optimization pipeline: 6 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model* 7 | 8 | Config file: [config.json](config.json) 9 | 10 | ## How to run 11 | ### Run sample using config 12 | 13 | The optimization techniques to run are specified in the relevant config json file. 14 | 15 | First, install required packages according to passes. 16 | ``` 17 | olive run --config config.json --setup 18 | ``` 19 | 20 | Then, optimize the model 21 | ``` 22 | olive run --config config.json 23 | ``` 24 | 25 | or run simply with python code: 26 | ```python 27 | from olive.workflows import run as olive_run 28 | olive_run("config.json") 29 | ``` 30 | 31 | After running the above command, the model candidates and corresponding config will be saved in the output directory. 32 | You can then select the best model and config from the candidates and run the model with the selected config. 33 | -------------------------------------------------------------------------------- /examples/gte/user_script.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import Dataset 3 | 4 | from olive.data.registry import Registry 5 | 6 | 7 | class TextDataset(Dataset): 8 | def __init__(self, text): 9 | self.text = text 10 | 11 | def __len__(self): 12 | return len(self.text) 13 | 14 | def __getitem__(self, idx): 15 | sample = self.text[idx] 16 | input_ids = torch.tensor(sample["input_ids"], dtype=torch.int64) 17 | token_type_ids = torch.tensor(sample["token_type_ids"], dtype=torch.int64) 18 | attention_mask = torch.tensor(sample["attention_mask"], dtype=torch.int64) 19 | 20 | return {"input_ids": input_ids, "token_type_ids": token_type_ids, "attention_mask": attention_mask}, idx 21 | 22 | 23 | @Registry.register_pre_process() 24 | def dataset_pre_process(dataset, **kwargs): 25 | from transformers import AutoTokenizer 26 | 27 | max_samples = kwargs.get("max_samples", 128) 28 | model_name = kwargs.get("model_name") 29 | texts = [] 30 | for i, sample in enumerate(dataset): 31 | if i >= max_samples: 32 | break 33 | tokenizer = AutoTokenizer.from_pretrained(model_name) 34 | batch_dict = tokenizer(sample["text"], max_length=8192, padding=True, truncation=True) 35 | texts.append( 36 | { 37 | "input_ids": batch_dict["input_ids"], 38 | "token_type_ids": batch_dict["token_type_ids"], 39 | "attention_mask": batch_dict["attention_mask"], 40 | } 41 | ) 42 | return TextDataset(texts) 43 | -------------------------------------------------------------------------------- /examples/llama2/.gitignore: -------------------------------------------------------------------------------- 1 | llama2_cpu* 2 | llama2_gpu* 3 | llama2_model_builder.json 4 | -------------------------------------------------------------------------------- /examples/llama2/conda_gpu.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.9.21 6 | - pip=22.3.1 7 | - pip: 8 | - accelerate 9 | - bitsandbytes 10 | - peft 11 | - sentencepiece 12 | - datasets 13 | - evaluate 14 | - psutil 15 | - optimum 16 | - scipy 17 | - scikit-learn 18 | - onnxruntime-genai 19 | - torch 20 | - --extra-index-url https://download.pytorch.org/whl/cu118 21 | - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu,azureml] 22 | -------------------------------------------------------------------------------- /examples/llama2/llama2_generate.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "meta-llama/Llama-2-7b-hf" }, 3 | "data_configs": [ 4 | { 5 | "name": "generation_latency_dummy_data", 6 | "type": "TransformersPromptDummyDataContainer", 7 | "load_dataset_config": { "generative": true } 8 | } 9 | ], 10 | "systems": { 11 | "local_system": { 12 | "type": "LocalSystem", 13 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 14 | } 15 | }, 16 | "evaluators": { 17 | "merged_evaluator": { 18 | "metrics": [ 19 | { 20 | "name": "latency_prompt_processing", 21 | "type": "latency", 22 | "sub_types": [ { "name": "avg", "priority": 1 } ], 23 | "data_config": "generation_latency_dummy_data", 24 | "user_config": { "io_bind": true, "run_kwargs": { "max_new_tokens": 64 } } 25 | } 26 | ] 27 | } 28 | }, 29 | "passes": { }, 30 | "auto_optimizer_config": { "disable_auto_optimizer": true }, 31 | "evaluator": "merged_evaluator", 32 | "host": "local_system", 33 | "target": "local_system", 34 | "cache_dir": "cache", 35 | "output_dir": "models/llama2_generate" 36 | } 37 | -------------------------------------------------------------------------------- /examples/llama2/llama2_lmeval.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "HfModel", 4 | "model_path": "meta-llama/Llama-2-7b-hf", 5 | "load_kwargs": { "attn_implementation": "eager" } 6 | }, 7 | "systems": { 8 | "local_system": { 9 | "type": "LocalSystem", 10 | "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ] 11 | } 12 | }, 13 | "evaluators": { 14 | "evaluator": { "type": "LMEvaluator", "tasks": [ "hellaswag" ], "batch_size": 1, "limit": 4, "max_length": 128 } 15 | }, 16 | "evaluator": "evaluator", 17 | "host": "local_system", 18 | "target": "local_system", 19 | "cache_dir": "cache", 20 | "output_dir": "models", 21 | "clean_cache": true 22 | } 23 | -------------------------------------------------------------------------------- /examples/llama2/llama2_lmeval_onnx.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "HfModel", 4 | "model_path": "meta-llama/Llama-2-7b-hf", 5 | "load_kwargs": { "attn_implementation": "eager" } 6 | }, 7 | "systems": { 8 | "local_system": { 9 | "type": "LocalSystem", 10 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 11 | } 12 | }, 13 | "passes": { 14 | "mb": { "type": "ModelBuilder", "precision": "int4", "search": { "max_length": 2048, "min_length": 0 } } 15 | }, 16 | "evaluators": { 17 | "evaluator": { "type": "LMEvaluator", "tasks": [ "hellaswag" ], "batch_size": 1, "limit": 4, "max_length": 128 } 18 | }, 19 | "evaluator": "evaluator", 20 | "host": "local_system", 21 | "target": "local_system", 22 | "cache_dir": "cache", 23 | "output_dir": "models", 24 | "clean_cache": true 25 | } 26 | -------------------------------------------------------------------------------- /examples/llama2/llama2_model_builder_template.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 7 | } 8 | }, 9 | "data_configs": [ { "name": "transformer_token_dummy_data", "type": "TransformersTokenDummyDataContainer" } ], 10 | "passes": { 11 | "conversion": { 12 | "type": "OnnxConversion", 13 | "target_opset": 16, 14 | "save_as_external_data": true, 15 | "all_tensors_to_one_file": true, 16 | "save_metadata_for_token_generation": true 17 | }, 18 | "builder": { "type": "ModelBuilder", "precision": "int4", "search": { "max_length": 2048, "min_length": 0 } }, 19 | "metadata": { 20 | "type": "ModelBuilder", 21 | "precision": "int4", 22 | "metadata_only": true, 23 | "search": { "max_length": 2048, "min_length": 0 } 24 | }, 25 | "session_params_tuning": { 26 | "type": "OrtSessionParamsTuning", 27 | "data_config": "transformer_token_dummy_data", 28 | "io_bind": true 29 | } 30 | }, 31 | "packaging_config": [ { "type": "Zipfile", "name": "OutputModel" } ], 32 | "log_severity_level": 0, 33 | "host": "local_system", 34 | "target": "local_system", 35 | "cache_dir": "cache", 36 | "output_dir": null 37 | } 38 | -------------------------------------------------------------------------------- /examples/llama2/llama2_split.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "HfModel", 4 | "load_kwargs": { "attn_implementation": "eager" }, 5 | "model_path": "meta-llama/Llama-2-7b-hf" 6 | }, 7 | "systems": { 8 | "local_system": { 9 | "type": "LocalSystem", 10 | "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ] 11 | } 12 | }, 13 | "passes": { 14 | "s": { "type": "CaptureSplitInfo", "num_splits": 3 }, 15 | "c": { "type": "OnnxConversion", "target_opset": 17, "torch_dtype": "float32" }, 16 | "sm": { "type": "SplitModel" } 17 | }, 18 | "host": "local_system", 19 | "target": "local_system", 20 | "output_dir": "models/llama2_split" 21 | } 22 | -------------------------------------------------------------------------------- /examples/llama2/llama2_tensor_parallel.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "meta-llama/Llama-2-7b-hf" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 7 | } 8 | }, 9 | "passes": { 10 | "tensor_parallel": { 11 | "type": "PyTorchTensorParallel", 12 | "user_script": "llama2_tensor_parallel.py", 13 | "class_name": "LlamaPyTorchTensorParallel", 14 | "world_size": 4 15 | }, 16 | "conversion": { 17 | "type": "OnnxConversion", 18 | "target_opset": 17, 19 | "save_as_external_data": true, 20 | "all_tensors_to_one_file": true 21 | }, 22 | "transformers_optimization_fp16": { 23 | "type": "OrtTransformersOptimization", 24 | "save_as_external_data": true, 25 | "all_tensors_to_one_file": true, 26 | "model_type": "gpt2", 27 | "opt_level": 0, 28 | "only_onnxruntime": false, 29 | "keep_io_types": false, 30 | "float16": true, 31 | "use_gqa": true 32 | } 33 | }, 34 | "host": "local_system", 35 | "target": "local_system", 36 | "cache_dir": "cache", 37 | "output_dir": "models/tensor_parallel" 38 | } 39 | -------------------------------------------------------------------------------- /examples/llama2/notebook/llama2/conda.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.9.21 6 | - pip=22.3.1 7 | - pip: 8 | - accelerate 9 | - azure-keyvault-secrets 10 | - azure-identity 11 | - bitsandbytes 12 | - datasets 13 | - huggingface_hub 14 | - optimum 15 | - peft 16 | - scipy 17 | - sentencepiece 18 | - torch==2.0.1 19 | - transformers>=4.33.2,<= 4.37.2 20 | - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu] 21 | -------------------------------------------------------------------------------- /examples/llama2/notebook/llama2_multiep/llama2.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import argparse 6 | import json 7 | from pathlib import Path 8 | 9 | if __name__ == "__main__": 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument( 12 | "--device", 13 | choices=["cpu", "gpu", "multi_ep"], 14 | help="Device to use", 15 | ) 16 | parser.add_argument( 17 | "--quantize", 18 | action="store_true", 19 | help="If set, run transformers optimization pass", 20 | ) 21 | args = parser.parse_args() 22 | 23 | input_filename = f"config_{args.device}.template.json" 24 | with Path(input_filename).open("r") as f: 25 | config = json.load(f) 26 | 27 | if not args.quantize: 28 | del config["passes"]["blockwise_quant_int4"] 29 | 30 | output_filename = input_filename.replace(".template", "") 31 | with Path(output_filename).open("w") as strm: 32 | json.dump(config, fp=strm, indent=4) 33 | -------------------------------------------------------------------------------- /examples/llama2/notebook/llama2_multiep/multiple_ep_requirements.txt: -------------------------------------------------------------------------------- 1 | tabulate 2 | -------------------------------------------------------------------------------- /examples/llama2/requirements-gptq.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | auto-gptq 3 | -------------------------------------------------------------------------------- /examples/llama2/requirements-pipeline.txt: -------------------------------------------------------------------------------- 1 | -r requirements-qlora.txt 2 | azure-ai-ml 3 | azure-identity 4 | azure-keyvault-secrets 5 | azureml-fsspec 6 | huggingface_hub 7 | -------------------------------------------------------------------------------- /examples/llama2/requirements-qlora.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | # transformers>=4.33.2,<= 4.37.2 not compatible with latest accelerate 3 | accelerate<1.0.0 4 | bitsandbytes==0.43.3 5 | onnxruntime_genai 6 | peft 7 | scikit-learn 8 | sentencepiece 9 | -------------------------------------------------------------------------------- /examples/llama2/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=2.8.0 2 | onnx>=1.14.0 3 | optimum>=1.17.0 4 | torch 5 | # transformers optimizer fusions don't match in newer versions 6 | transformers>=4.33.2,<= 4.37.2 7 | -------------------------------------------------------------------------------- /examples/mistral/mistral_fp16.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "mistralai/Mistral-7B-v0.1" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 7 | } 8 | }, 9 | "data_configs": [ { "name": "transformer_token_dummy_data", "type": "TransformersTokenDummyDataContainer" } ], 10 | "evaluators": { 11 | "common_evaluator": { 12 | "metrics": [ 13 | { 14 | "name": "latency", 15 | "type": "latency", 16 | "sub_types": [ { "name": "avg", "priority": 1 } ], 17 | "data_config": "transformer_token_dummy_data", 18 | "user_config": { "io_bind": true } 19 | } 20 | ] 21 | } 22 | }, 23 | "passes": { 24 | "convert": { "type": "ModelBuilder", "precision": "fp16" }, 25 | "session_params_tuning": { 26 | "type": "OrtSessionParamsTuning", 27 | "data_config": "transformer_token_dummy_data", 28 | "io_bind": true, 29 | "enable_profiling": false 30 | } 31 | }, 32 | "evaluate_input_model": false, 33 | "evaluator": "common_evaluator", 34 | "host": "local_system", 35 | "target": "local_system", 36 | "cache_dir": "cache", 37 | "output_dir": "models/mistral_fp16" 38 | } 39 | -------------------------------------------------------------------------------- /examples/mistral/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | neural-compressor>=2.4.1 3 | onnxruntime-genai-cuda 4 | onnxruntime-gpu 5 | onnxruntime_extensions 6 | # optimum 1.17.0 for fp16 inference 7 | optimum>=1.17.0 8 | tabulate 9 | transformers>=4.34.99 10 | -------------------------------------------------------------------------------- /examples/mobilenet/.gitignore: -------------------------------------------------------------------------------- 1 | output/ 2 | tmp/ 3 | 4 | mobilenet_*eval.json 5 | raw_qnn_sdk_config.json 6 | -------------------------------------------------------------------------------- /examples/mobilenet/onnx/README.md: -------------------------------------------------------------------------------- 1 | # TIMM Model Optimization (Quantization & QDQ) 2 | This folder contains examples of **TIMM (PyTorch Image Models) optimization** using **Olive workflows**, focusing on **ONNX conversion, quantization, and QDQ transformation**. 3 | 4 | ## **Optimization Workflow** 5 | This example optimizes `timm/mobilenetv3_small_100.lamb_in1k` for **CPU execution** by: 6 | - *Converting PyTorch model to ONNX* 7 | - *Applying ONNX quantization* 8 | - *Applying QDQ (Quantize-DeQuantize) transformation* 9 | 10 | - **Model**: [timm/mobilenetv3_small_100.lamb_in1k](https://huggingface.co/timm/mobilenetv3_small_100.lamb_in1k) 11 | - **Dataset**: [ImageNet-1K](https://huggingface.co/datasets/imagenet-1k) 12 | 13 | --- 14 | 15 | ## **Running the Optimization** 16 | ### **Running with Config File** 17 | The provided `config.json` configuration performs **ONNX conversion, quantization, and QDQ transformation**. 18 | 19 | **Install Required Dependencies** 20 | ```sh 21 | pip install -r requirements.txt 22 | olive run --config config.json --setup 23 | ``` 24 | **Run Model Optimization** 25 | ```sh 26 | olive run --config config.json 27 | ``` 28 | 29 | After running the above command, the model candidates and corresponding config will be saved in the output directory. 30 | You can then select the best model and config from the candidates and run the model with the selected config. 31 | 32 | -------------------------------------------------------------------------------- /examples/mobilenet/onnx/requirements.txt: -------------------------------------------------------------------------------- 1 | evaluate 2 | scikit-learn 3 | timm 4 | -------------------------------------------------------------------------------- /examples/mobilenet/onnx/user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import timm 6 | 7 | 8 | def load_timm(model_name: str): 9 | model = timm.create_model(model_name, pretrained=True) 10 | return model.eval() 11 | -------------------------------------------------------------------------------- /examples/mobilenet/qnn/README.md: -------------------------------------------------------------------------------- 1 | # MobileNet optimization with QDQ Quantization on Qualcomm NPU 2 | This folder contains a sample use case of Olive to optimize a MobileNet model for Qualcomm NPU (QNN Execution Provider) using static QDQ quantization. 3 | 4 | This example requires an x86 python environment on a Windows ARM machine. 5 | 6 | 7 | ## Prerequisites 8 | ### Clone the repository and install Olive 9 | 10 | Refer to the instructions in the [examples README](../README.md) to clone the repository and install Olive. 11 | 12 | ### Install onnxruntime-qnn 13 | ```bash 14 | python -m pip install onnxruntime-qnn 15 | ``` 16 | 17 | ### Pip requirements 18 | Install the necessary python packages: 19 | ``` 20 | python -m pip install -r requirements.txt 21 | ``` 22 | 23 | ### Download data and model 24 | To download the necessary data and model files: 25 | ``` 26 | python download_files.py 27 | ``` 28 | 29 | ## Run the sample 30 | Run the following command to quantize the model and evaluate it on the NPU: 31 | ```bash 32 | olive run --config mobilenet_qnn_ep.json 33 | ``` 34 | 35 | **NOTE:** The model optimization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the `mobilenet_qnn_ep.json` configuration file to skip the evaluation step. 36 | -------------------------------------------------------------------------------- /examples/mobilenet/qnn/requirements.txt: -------------------------------------------------------------------------------- 1 | packaging 2 | pillow 3 | scipy 4 | torchvision 5 | -------------------------------------------------------------------------------- /examples/open_llama/conda.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.9.21 6 | - pip=22.3.1 7 | - pip: 8 | - datasets 9 | - optimum 10 | - sentencepiece 11 | - transformers 12 | - git+https://github.com/microsoft/Olive#egg=olive-ai[gpu] 13 | -------------------------------------------------------------------------------- /examples/open_llama/requirements-arc.txt: -------------------------------------------------------------------------------- 1 | azure-ai-ml>=1.11.1 2 | azure-identity 3 | azureml-fsspec 4 | -------------------------------------------------------------------------------- /examples/open_llama/requirements-sparsegpt.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | torch-tensorrt 3 | -------------------------------------------------------------------------------- /examples/open_llama/requirements-woq.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | intel-extension-for-transformers 3 | lm-eval==0.4.2 4 | neural-compressor>=2.3 5 | onnxruntime 6 | optimum 7 | sentencepiece 8 | transformers 9 | -------------------------------------------------------------------------------- /examples/open_llama/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | optimum 3 | sentencepiece 4 | -------------------------------------------------------------------------------- /examples/opt_125m/awq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 7 | } 8 | }, 9 | "passes": { "4bit_awq_quantizer": { "type": "AutoAWQQuantizer" } }, 10 | "host": "local_system", 11 | "target": "local_system", 12 | "cache_dir": "cache", 13 | "output_dir": "models/awq" 14 | } 15 | -------------------------------------------------------------------------------- /examples/opt_125m/awq_onnx.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 7 | } 8 | }, 9 | "passes": { 10 | "4bit_awq_quantizer": { "type": "AutoAWQQuantizer" }, 11 | "conversion_merged": { "type": "OnnxConversion", "device": "cuda", "torch_dtype": "float32" }, 12 | "transformers_optimization_fp16": { 13 | "type": "OrtTransformersOptimization", 14 | "model_type": "gpt2", 15 | "opt_level": 0, 16 | "keep_io_types": false, 17 | "float16": true 18 | } 19 | }, 20 | "host": "local_system", 21 | "target": "local_system", 22 | "cache_dir": "cache", 23 | "output_dir": "models/awq_onnx" 24 | } 25 | -------------------------------------------------------------------------------- /examples/opt_125m/gptq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 7 | } 8 | }, 9 | "data_configs": [ 10 | { 11 | "name": "wikitext2_train", 12 | "type": "HuggingfaceContainer", 13 | "load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" }, 14 | "pre_process_data_config": { "add_special_tokens": false, "max_samples": 128 } 15 | } 16 | ], 17 | "passes": { "gptq_quant_int4": { "type": "GptqQuantizer", "data_config": "wikitext2_train" } }, 18 | "host": "local_system", 19 | "target": "local_system", 20 | "cache_dir": "cache", 21 | "output_dir": "models/gptq" 22 | } 23 | -------------------------------------------------------------------------------- /examples/opt_125m/gptq_onnx.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "facebook/opt-125m" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ] 7 | } 8 | }, 9 | "data_configs": [ 10 | { 11 | "name": "wikitext2_train", 12 | "type": "HuggingfaceContainer", 13 | "load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" }, 14 | "pre_process_data_config": { "add_special_tokens": false, "max_samples": 128 } 15 | } 16 | ], 17 | "passes": { 18 | "gptq_quant_int4": { "type": "GptqQuantizer", "data_config": "wikitext2_train" }, 19 | "conversion_merged": { "type": "OnnxConversion", "device": "cuda", "torch_dtype": "float32" }, 20 | "transformers_optimization_fp16": { 21 | "type": "OrtTransformersOptimization", 22 | "model_type": "gpt2", 23 | "opt_level": 0, 24 | "keep_io_types": false, 25 | "float16": true 26 | } 27 | }, 28 | "host": "local_system", 29 | "target": "local_system", 30 | "cache_dir": "cache", 31 | "output_dir": "models/gptq_onnx" 32 | } 33 | -------------------------------------------------------------------------------- /examples/opt_125m/requirements-awq.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | autoawq 3 | -------------------------------------------------------------------------------- /examples/opt_125m/requirements-gptq.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | auto-gptq 3 | -------------------------------------------------------------------------------- /examples/opt_125m/requirements.txt: -------------------------------------------------------------------------------- 1 | optimum 2 | transformers 3 | -------------------------------------------------------------------------------- /examples/phi2/.gitignore: -------------------------------------------------------------------------------- 1 | phi2/* 2 | phi2_optimize.json 3 | -------------------------------------------------------------------------------- /examples/phi2/phi2_genai.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "HfModel", "model_path": "microsoft/phi-2" }, 3 | "systems": { 4 | "local_system": { 5 | "type": "LocalSystem", 6 | "accelerators": [ 7 | { "device": "GPU", "execution_providers": [ "CPUExecutionProvider", "CUDAExecutionProvider" ] } 8 | ] 9 | } 10 | }, 11 | "passes": { "builder": { "type": "ModelBuilder", "precision": "int4" } }, 12 | "host": "local_system", 13 | "target": "local_system", 14 | "cache_dir": "cache", 15 | "output_dir": "models/model_builder" 16 | } 17 | -------------------------------------------------------------------------------- /examples/phi2/requirements-lora.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | accelerate 3 | bitsandbytes 4 | peft 5 | scikit-learn 6 | -------------------------------------------------------------------------------- /examples/phi2/requirements-pipeline.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | einops 3 | onnx>=1.15.0 4 | onnxruntime-genai 5 | onnxscript>=0.1.0.dev20240126 6 | scikit-learn 7 | torch>=2.2.0 8 | # onnxruntime-genai 0.5.2 is not compatible with 4.48.0 9 | # need to wait it pick up commit 10 | # https://github.com/microsoft/onnxruntime-genai/commit/c61aaa6b2349b39ca63509914b4c02105b462a4a 11 | transformers>=4.36.2, <4.48.0 12 | -------------------------------------------------------------------------------- /examples/phi2/requirements-slicegpt.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | git+https://github.com/microsoft/TransformerCompression.git 3 | -------------------------------------------------------------------------------- /examples/phi2/requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | onnx>=1.15.0 3 | onnxscript>=0.1.0.dev20240126 4 | torch>=2.2.0 5 | transformers>=4.36.2 6 | -------------------------------------------------------------------------------- /examples/phi3/.gitignore: -------------------------------------------------------------------------------- 1 | phi3_run_*.json 2 | -------------------------------------------------------------------------------- /examples/phi3/phi3_nvmo_ptq.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "HfModel", 4 | "model_path": "microsoft/Phi-3-mini-4k-instruct", 5 | "task": "text-classification" 6 | }, 7 | "systems": { 8 | "local_system": { 9 | "type": "LocalSystem", 10 | "accelerators": [ { "device": "gpu", "execution_providers": [ "DmlExecutionProvider" ] } ] 11 | } 12 | }, 13 | "engine": { "target": "local_system" }, 14 | "passes": { 15 | "builder": { "type": "ModelBuilder", "precision": "fp16" }, 16 | "quantization": { 17 | "type": "NVModelOptQuantization", 18 | "algorithm": "awq", 19 | "tokenizer_dir": "microsoft/Phi-3-mini-4k-instruct", 20 | "calibration": "awq_lite" 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /examples/phi3/requirements-awq.txt: -------------------------------------------------------------------------------- 1 | autoawq 2 | onnxruntime-genai 3 | transformers 4 | -------------------------------------------------------------------------------- /examples/phi3/requirements-nvmo-awq.txt: -------------------------------------------------------------------------------- 1 | cppimport==22.8.2 2 | cupy-cuda12x 3 | datasets>=2.14.4 4 | torch 5 | transformers 6 | -------------------------------------------------------------------------------- /examples/phi3/requirements-quarot.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | git+https://github.com/microsoft/TransformerCompression.git@main 3 | -------------------------------------------------------------------------------- /examples/phi3/requirements-vision.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | huggingface_hub[cli] 3 | pillow 4 | requests 5 | -------------------------------------------------------------------------------- /examples/phi3/requirements.txt: -------------------------------------------------------------------------------- 1 | einops 2 | olive-ai>=0.6.0 3 | onnx>=1.15.0 4 | onnxruntime>=1.18.0 5 | onnxruntime-genai>=0.2.0 6 | onnxscript>=0.1.0.dev20240126 7 | torch>=2.2.0 8 | transformers>=4.36.2 9 | -------------------------------------------------------------------------------- /examples/phi3/vision/config_templates/text_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "HfModel", 4 | "model_path": "<>", 5 | "load_kwargs": { "trust_remote_code": true } 6 | }, 7 | "passes": { "builder": { "type": "ModelBuilder", "precision": "int4", "exclude_embeds": true } }, 8 | "systems": { 9 | "local_system": { 10 | "type": "LocalSystem", 11 | "accelerators": [ { "device": "CPU", "execution_providers": [ "CPUExecutionProvider" ] } ] 12 | } 13 | }, 14 | "host": "local_system", 15 | "target": "local_system" 16 | } 17 | -------------------------------------------------------------------------------- /examples/phi3/vision/config_templates/text_embedding_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "PyTorchModel", 4 | "model_path": "<>", 5 | "model_loader": "text_embedding_loader", 6 | "model_script": "vision/scripts/user_script.py", 7 | "io_config": { 8 | "input_names": [ "input_ids" ], 9 | "input_types": [ "int64" ], 10 | "input_shapes": [ [ 1, 1 ] ], 11 | "output_names": [ "inputs_embeds" ], 12 | "dynamic_axes": { 13 | "input_ids": { "0": "batch_size", "1": "sequence_length" }, 14 | "inputs_embeds": { "0": "batch_size", "1": "sequence_length" } 15 | } 16 | } 17 | }, 18 | "systems": { 19 | "local_system": { 20 | "type": "LocalSystem", 21 | "accelerators": [ { "device": "cpu", "execution_providers": [ "CPUExecutionProvider" ] } ] 22 | } 23 | }, 24 | "passes": { 25 | "convert": { 26 | "type": "OnnxConversion", 27 | "save_as_external_data": true, 28 | "all_tensors_to_one_file": true, 29 | "convert_attribute": false, 30 | "size_threshold": 0, 31 | "target_opset": 14, 32 | "torch_dtype": "<>" 33 | } 34 | }, 35 | "host": "local_system", 36 | "target": "local_system" 37 | } 38 | -------------------------------------------------------------------------------- /examples/phi3/vision/scripts/user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | import requests 7 | from PIL import Image 8 | from transformers import AutoModelForCausalLM, AutoProcessor 9 | 10 | 11 | def vision_embed_tokens_loader(model_name): 12 | model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) 13 | return model.model.vision_embed_tokens 14 | 15 | 16 | def get_dummy_inputs(model=None): 17 | processor = AutoProcessor.from_pretrained(model.model_path, trust_remote_code=True) 18 | user_prompt = "<|user|>\n" 19 | assistant_prompt = "<|assistant|>\n" 20 | prompt_suffix = "<|end|>\n" 21 | prompt = f"{user_prompt}<|image_1|>\nWhat is shown in this image?{prompt_suffix}{assistant_prompt}" 22 | url = "https://www.ilankelman.org/stopsigns/australia.jpg" 23 | image = Image.open(requests.get(url, stream=True, timeout=10).raw) 24 | inputs = processor(prompt, image, return_tensors="pt") 25 | return ( 26 | inputs["pixel_values"], 27 | inputs["image_sizes"], 28 | ) 29 | 30 | 31 | def text_embedding_loader(model_name): 32 | model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True) 33 | return model.model.embed_tokens 34 | -------------------------------------------------------------------------------- /examples/phi3_5/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | optimum 3 | -------------------------------------------------------------------------------- /examples/phi4/README.md: -------------------------------------------------------------------------------- 1 | # Phi-4 Model Optimization 2 | 3 | This repository demonstrates the optimization of the [Microsoft Phi-4-reasoning](https://huggingface.co/microsoft/Phi-4-reasoning), [Microsoft Phi-4-reasoning-plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) and [Microsoft Phi-4-mini-reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) models using **post-training quantization (PTQ)** techniques. 4 | 5 | ## **PTQ + AOT Compilation for Intel® NPUs using Optimum Intel®** 6 | 7 | - [**Intel® NPU**](./openvino/): Optimization with Optimum Intel® on Intel® NPU to generate an ONNX OpenVINO IR Encapsulated Model instructions are in the [openvino](./openvino/) folder. 8 | -------------------------------------------------------------------------------- /examples/phi4/openvino/README.md: -------------------------------------------------------------------------------- 1 | # Phi-4 Model Optimization 2 | 3 | This folder contains examples of optimization of the [Microsoft Phi-4-reasoning](https://huggingface.co/microsoft/Phi-4-reasoning), [Microsoft Phi-4-reasoning-plus](https://huggingface.co/microsoft/Phi-4-reasoning-plus) and [Microsoft Phi-4-mini-reasoning](https://huggingface.co/microsoft/Phi-4-mini-reasoning) models. 4 | 5 | Optimization with Optimum Intel® on Intel® NPU to generate an ONNX OpenVINO IR Encapsulated Model instructions are in the following folders:- 6 | 7 | - Intel® NPU: [Optimization for Microsoft Phi-4-reasoning](./phi_4_reasoning/) 8 | - Intel® NPU: [Optimization for Microsoft Phi-4-reasoning-plus](./phi_4_reasoning_plus/) 9 | - Intel® NPU: [Optimization for Microsoft Phi-4-mini-reasoning](./phi_4_mini_reasoning/) 10 | -------------------------------------------------------------------------------- /examples/qwen2_5/README.md: -------------------------------------------------------------------------------- 1 | # Qwen 2.5 Optimization 2 | 3 | Sample use cases of Olive to optimize a [Qwen/Qwen 2.5 1.5B Instruct](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct) model using Olive. 4 | 5 | - [QDQ Model with 4-bit Weights & 16-bit Activations](../phi3_5/README.md): 6 | - Run the workflow with `olive run --config qdq_config.json -m Qwen/Qwen2.5-1.5B-Instruct -o models/qwen2_5-qdq`. 7 | - [AMD NPU: Optimization and Quantization with for VitisAI](../phi3_5/README.md): 8 | - Run the workflow with `olive run --config qdq_config_vitis_ai.json -m Qwen/Qwen2.5-1.5B-Instruct -o models/qwen2_5-vai`. 9 | - [PTQ + AOT Compilation for Qualcomm NPUs using QNN EP](../phi3_5/README.md): 10 | - Run the workflow with `olive run --config qnn_config.json -m Qwen/Qwen2.5-1.5B-Instruct -o models/qwen2_5-qnn`. 11 | - Run the inference with `python app.py -m models/qwen2_5-qnn -c "<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n"`. 12 | - [PTQ + AWQ ONNX OVIR Encapsulated 4-bit weight compression using Intel® Optimum OpenVINO](./openvino/) 13 | -------------------------------------------------------------------------------- /examples/red_pajama/requirements.txt: -------------------------------------------------------------------------------- 1 | onnxruntime-gpu>=1.15.1 2 | optimum>=1.11.0 3 | torch>=2.0.0 4 | transformers>=4.31.0 5 | -------------------------------------------------------------------------------- /examples/red_pajama/user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import transformers 6 | 7 | MIN_TRANSFORMERS_VERSION = "4.30.2" 8 | 9 | # check transformers version 10 | assert transformers.__version__ >= MIN_TRANSFORMERS_VERSION, ( 11 | f"Please upgrade transformers to version {MIN_TRANSFORMERS_VERSION} or higher." 12 | ) 13 | -------------------------------------------------------------------------------- /examples/resnet/conda.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.9.21 6 | - pip=20.2 7 | - pip: 8 | - onnxruntime 9 | - pytorch-lightning 10 | - psutil 11 | - scipy 12 | - tabulate 13 | - torchvision 14 | - git+https://github.com/microsoft/Olive#egg=olive-ai[cpu] 15 | -------------------------------------------------------------------------------- /examples/resnet/multiple_ep_requirements.txt: -------------------------------------------------------------------------------- 1 | azure-ai-ml 2 | azure-identity 3 | azureml-fsspec 4 | pytorch-lightning 5 | scipy 6 | tabulate 7 | torchvision 8 | -------------------------------------------------------------------------------- /examples/resnet/openvino/requirements.txt: -------------------------------------------------------------------------------- 1 | olive-ai[openvino] 2 | torchvision 3 | -------------------------------------------------------------------------------- /examples/resnet/qnn/README.md: -------------------------------------------------------------------------------- 1 | # ResNet Optimization with PTQ on Qualcomm NPU using QNN EP 2 | This example performs ResNetoptimization on Qualcomm NPU with ONNX Runtime PTQ. It performs the optimization pipeline: 3 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model* 4 | 5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed. 6 | 7 | **NOTE:** The model quantization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step. 8 | -------------------------------------------------------------------------------- /examples/resnet/qnn/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | tabulate 3 | torchvision 4 | -------------------------------------------------------------------------------- /examples/resnet/requirements.txt: -------------------------------------------------------------------------------- 1 | azure-ai-ml 2 | azure-identity 3 | azureml-fsspec 4 | datasets 5 | psutil 6 | pytorch-lightning 7 | scipy 8 | tabulate 9 | torchvision 10 | -------------------------------------------------------------------------------- /examples/stable_diffusion/.gitignore: -------------------------------------------------------------------------------- 1 | /footprints/ 2 | /result_*.png 3 | /quantize_data*/ 4 | -------------------------------------------------------------------------------- /examples/stable_diffusion/assets/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/assets/dog.png -------------------------------------------------------------------------------- /examples/stable_diffusion/notebook/.gitignore: -------------------------------------------------------------------------------- 1 | adapters 2 | onnx_model 3 | -------------------------------------------------------------------------------- /examples/stable_diffusion/notebook/image/result_pen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_pen.png -------------------------------------------------------------------------------- /examples/stable_diffusion/notebook/image/result_pen_merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_pen_merge.png -------------------------------------------------------------------------------- /examples/stable_diffusion/notebook/image/result_wolf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_wolf.png -------------------------------------------------------------------------------- /examples/stable_diffusion/notebook/image/result_wolf_merge.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/microsoft/Olive/76aee46daacc09f98854e4452490a5aaebb857eb/examples/stable_diffusion/notebook/image/result_wolf_merge.png -------------------------------------------------------------------------------- /examples/stable_diffusion/notebook/vae_decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from diffusers import AutoencoderKL 3 | 4 | 5 | def vae_decoder_inputs(batch_size, torch_dtype): 6 | return { 7 | "latent_sample": torch.rand((batch_size, 4, 128, 128), dtype=torch_dtype), 8 | "return_dict": False, 9 | } 10 | 11 | 12 | def _dummy_inputs(model=None): 13 | return tuple(vae_decoder_inputs(1, torch.float32).values()) 14 | 15 | 16 | def _model_loader(model_name): 17 | model = AutoencoderKL.from_pretrained(model_name, subfolder="vae") 18 | model.forward = model.decode 19 | return model 20 | 21 | 22 | def _io_config(model): 23 | return { 24 | "input_names": ["latent_sample", "return_dict"], 25 | "output_names": ["sample"], 26 | "dynamic_axes": {"latent_sample": {"0": "batch", "1": "channels", "2": "height", "3": "width"}}, 27 | } 28 | -------------------------------------------------------------------------------- /examples/stable_diffusion/notebook/vae_encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from diffusers import AutoencoderKL 3 | 4 | 5 | def vae_encoder_inputs(batch_size, torch_dtype): 6 | return { 7 | "sample": torch.rand((batch_size, 3, 1024, 1024), dtype=torch_dtype), 8 | "return_dict": False, 9 | } 10 | 11 | 12 | def _dummy_inputs(model=None): 13 | return tuple(vae_encoder_inputs(1, torch.float32).values()) 14 | 15 | 16 | def _model_loader(model_name): 17 | model = AutoencoderKL.from_pretrained(model_name, subfolder="vae") 18 | model.forward = lambda sample, return_dict: model.encode(sample, return_dict)[0].sample() 19 | return model 20 | 21 | 22 | def _io_config(model): 23 | return { 24 | "input_names": ["latent_sample", "return_dict"], 25 | "output_names": ["sample"], 26 | "dynamic_axes": { 27 | "latent_sample": {"0": "batch_size", "1": "num_channels_latent", "2": "height_latent", "3": "width_latent"}, 28 | "sample": {"0": "batch_size", "1": "num_channels", "2": "height", "3": "width"}, 29 | }, 30 | } 31 | -------------------------------------------------------------------------------- /examples/stable_diffusion/requirements-common.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | # breaking changes in diffusers lora implementation 3 | diffusers<0.30.0 4 | onnx 5 | pillow 6 | tabulate 7 | torch 8 | # StableDiffusionSafetyChecker vision_model ignores attn_implementation 9 | transformers<4.43.0 10 | -------------------------------------------------------------------------------- /examples/stable_diffusion/requirements-ov.txt: -------------------------------------------------------------------------------- 1 | diffusers 2 | opencv-python 3 | pillow 4 | -------------------------------------------------------------------------------- /examples/stable_diffusion/requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements-common.txt 2 | onnxruntime-directml>=1.16.0 3 | -------------------------------------------------------------------------------- /examples/stable_diffusion/sd_utils/config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | vae_sample_size = 512 7 | unet_sample_size = 64 8 | cross_attention_dim = 768 9 | only_conversion = False 10 | data_dir = "quantize_data" 11 | -------------------------------------------------------------------------------- /examples/stable_diffusion_xl/.gitignore: -------------------------------------------------------------------------------- 1 | /footprints/ 2 | /result_*.png 3 | -------------------------------------------------------------------------------- /examples/stable_diffusion_xl/config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | vae_sample_size = 1024 7 | unet_sample_size = 128 8 | cross_attention_dim = 2048 9 | time_ids_size = 6 10 | -------------------------------------------------------------------------------- /examples/stable_diffusion_xl/requirements-common.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | diffusers 3 | invisible-watermark 4 | onnx 5 | optimum 6 | pillow 7 | torch 8 | # StableDiffusionSafetyChecker vision_model ignores attn_implementation 9 | transformers<4.43.0 10 | -------------------------------------------------------------------------------- /examples/stable_diffusion_xl/requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements-common.txt 2 | onnxruntime-directml>=1.16.2 3 | -------------------------------------------------------------------------------- /examples/super_resolution/README.md: -------------------------------------------------------------------------------- 1 | # Super Resolution Optimization with OnnxRuntime extension 2 | This folder demonstrates an examples of using OnnxRuntime extension to optimize Super Resolution. 3 | Visit [OnnxRuntime Extension](https://github.com/microsoft/onnxruntime-extensions) for installation and 4 | usage instructions. 5 | Visit [Super Resolution with OnnxRuntime](https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html) 6 | for detailed information. 7 | 8 | ## Using OnnxRuntime extension with Olive 9 | Olive includes a specific pass `AppendPrePostProcessingOps` to append pre- and post- processing operations to exported 10 | ONNX model. 11 | 12 | ```json 13 | "passes": { 14 | "prepost": { 15 | "type": "AppendPrePostProcessingOps", 16 | "tool_command": "superresolution", 17 | "tool_command_args": { 18 | "output_format": "png" 19 | } 20 | } 21 | } 22 | ``` 23 | 24 | ## How to run 25 | ### Pip requirements 26 | Install the necessary python packages: 27 | ```sh 28 | python -m pip install -r requirements.txt 29 | ``` 30 | 31 | ### Run sample using config 32 | ```sh 33 | olive run --config config.json 34 | ``` 35 | 36 | or run simply with python code: 37 | ```python 38 | from olive.workflows import run as olive_run 39 | olive_run("config.json") 40 | ``` 41 | 42 | After running the above command, the model and corresponding config will be saved in the output directory. 43 | -------------------------------------------------------------------------------- /examples/super_resolution/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "PyTorchModel", 4 | "model_loader": "load_pytorch_model", 5 | "model_script": "loader.py", 6 | "io_config": { 7 | "input_names": [ "input" ], 8 | "input_shapes": [ [ 1, 1, 224, 224 ] ], 9 | "input_types": [ "float32" ], 10 | "output_names": [ "output" ] 11 | } 12 | }, 13 | "passes": { 14 | "exporter": { "type": "OnnxConversion", "target_opset": 15 }, 15 | "prepost": { 16 | "type": "AppendPrePostProcessingOps", 17 | "tool_command": "superresolution", 18 | "tool_command_args": { "output_format": "png" } 19 | } 20 | }, 21 | "log_severity_level": 0, 22 | "clean_cache": true, 23 | "cache_dir": "cache", 24 | "output_dir": "models" 25 | } 26 | -------------------------------------------------------------------------------- /examples/super_resolution/requirements.txt: -------------------------------------------------------------------------------- 1 | onnxruntime_extensions 2 | -------------------------------------------------------------------------------- /examples/test/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /examples/test/azureml/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /examples/test/azureml/test_resnet_vitis_ai_ptq_cpu_aml.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import os 6 | 7 | import pytest 8 | 9 | from ..utils import check_output, get_example_dir, patch_config 10 | 11 | 12 | @pytest.fixture(scope="module", autouse=True) 13 | def setup(): 14 | """Setups any state specific to the execution of the given module.""" 15 | os.chdir(get_example_dir("resnet")) 16 | 17 | 18 | @pytest.mark.parametrize("system", ["aml_system"]) 19 | @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"]) 20 | def test_resnet(system, olive_json): 21 | from olive.workflows import run as olive_run 22 | 23 | olive_config = patch_config(olive_json, None, None, system) 24 | 25 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 26 | check_output(workflow_output) 27 | -------------------------------------------------------------------------------- /examples/test/local/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /examples/test/local/test_ast.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import os 7 | 8 | import pytest 9 | 10 | from ..utils import check_output, get_example_dir 11 | 12 | 13 | @pytest.fixture(scope="module", autouse=True) 14 | def setup(): 15 | """Setups any state specific to the execution of the given module.""" 16 | os.chdir(get_example_dir("ast")) 17 | 18 | 19 | def test_ast(): 20 | from olive.workflows import run as olive_run 21 | 22 | with open("ast.json") as f: 23 | olive_config = json.load(f) 24 | 25 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 26 | check_output(workflow_output) 27 | -------------------------------------------------------------------------------- /examples/test/local/test_bert_inc.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import os 7 | 8 | import pytest 9 | 10 | from ..utils import check_output, get_example_dir 11 | 12 | 13 | @pytest.fixture(scope="module", autouse=True) 14 | def setup(): 15 | """Setups any state specific to the execution of the given module.""" 16 | os.chdir(get_example_dir("bert")) 17 | 18 | 19 | @pytest.mark.parametrize("olive_json", ["bert_inc_dynamic_ptq_cpu.json", "bert_inc_ptq_cpu.json"]) 20 | def test_bert(olive_json): 21 | from olive.workflows import run as olive_run 22 | 23 | with open(olive_json) as f: 24 | olive_config = json.load(f) 25 | 26 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 27 | check_output(workflow_output) 28 | -------------------------------------------------------------------------------- /examples/test/local/test_bert_ptq_cpu.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import os 6 | 7 | import pytest 8 | 9 | from ..utils import check_output, get_example_dir, patch_config 10 | 11 | 12 | @pytest.fixture(scope="module", autouse=True) 13 | def setup(): 14 | """Setups any state specific to the execution of the given module.""" 15 | os.chdir(get_example_dir("bert")) 16 | 17 | 18 | @pytest.mark.parametrize("sampler", ["tpe"]) 19 | @pytest.mark.parametrize("execution_order", ["joint"]) 20 | @pytest.mark.parametrize("system", ["local_system"]) 21 | @pytest.mark.parametrize("olive_json", ["bert_ptq_cpu.json"]) 22 | def test_bert(sampler, execution_order, system, olive_json): 23 | from olive.workflows import run as olive_run 24 | 25 | olive_config = patch_config(olive_json, sampler, execution_order, system) 26 | # remove the latency goal since it is flaky on CI 27 | metrics = olive_config["evaluators"]["common_evaluator"]["metrics"] 28 | del metrics[1]["sub_types"][0]["goal"] 29 | 30 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 31 | check_output(workflow_output) 32 | -------------------------------------------------------------------------------- /examples/test/local/test_bert_ptq_cpu_docker.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import os 6 | import platform 7 | 8 | import pytest 9 | 10 | from olive.common.constants import OS 11 | 12 | from ..utils import check_output, get_example_dir, patch_config 13 | 14 | 15 | @pytest.fixture(scope="module", autouse=True) 16 | def setup(): 17 | """Setups any state specific to the execution of the given module.""" 18 | os.chdir(get_example_dir("bert")) 19 | 20 | 21 | @pytest.mark.parametrize("sampler", ["tpe"]) 22 | @pytest.mark.parametrize("execution_order", ["joint"]) 23 | @pytest.mark.parametrize("system", ["docker_system"]) 24 | @pytest.mark.parametrize("olive_json", ["bert_ptq_cpu.json"]) 25 | def test_bert(sampler, execution_order, system, olive_json): 26 | if system == "docker_system" and platform.system() == OS.WINDOWS: 27 | pytest.skip("Skip Linux containers on Windows host test case.") 28 | 29 | from olive.workflows import run as olive_run 30 | 31 | olive_config = patch_config(olive_json, sampler, execution_order, system) 32 | 33 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 34 | check_output(workflow_output) 35 | -------------------------------------------------------------------------------- /examples/test/local/test_deberta.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import os 7 | 8 | import pytest 9 | 10 | from ..utils import check_output, get_example_dir 11 | 12 | 13 | @pytest.fixture(scope="module", autouse=True) 14 | def setup(): 15 | """Setups any state specific to the execution of the given module.""" 16 | os.chdir(get_example_dir("deberta")) 17 | 18 | 19 | def test_deberta(): 20 | from olive.workflows import run as olive_run 21 | 22 | with open("deberta.json") as f: 23 | olive_config = json.load(f) 24 | 25 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 26 | check_output(workflow_output) 27 | -------------------------------------------------------------------------------- /examples/test/local/test_mistral_fp16.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import os 7 | 8 | import pytest 9 | 10 | from olive.common.hf.login import huggingface_login 11 | 12 | from ..utils import check_output, get_example_dir 13 | 14 | 15 | @pytest.fixture(scope="module", autouse=True) 16 | def setup(): 17 | """Setups any state specific to the execution of the given module.""" 18 | os.chdir(get_example_dir("mistral")) 19 | 20 | 21 | @pytest.mark.parametrize("olive_json", ["mistral_fp16.json"]) 22 | def test_mistral(olive_json): 23 | from olive.workflows import run as olive_run 24 | 25 | hf_token = os.environ.get("HF_TOKEN") 26 | huggingface_login(hf_token) 27 | 28 | with open(olive_json) as f: 29 | olive_config = json.load(f) 30 | 31 | footprint = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 32 | check_output(footprint) 33 | -------------------------------------------------------------------------------- /examples/test/local/test_mobilenet.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import os 6 | 7 | import pytest 8 | 9 | from olive.common.hf.login import huggingface_login 10 | 11 | from ..utils import check_output, get_example_dir, patch_config 12 | 13 | 14 | @pytest.fixture(scope="module", autouse=True) 15 | def setup(): 16 | """Setups any state specific to the execution of the given module.""" 17 | os.chdir(get_example_dir("mobilenet/onnx")) 18 | 19 | 20 | def test_mobilenet(): 21 | from olive.workflows import run as olive_run 22 | 23 | hf_token = os.environ.get("HF_TOKEN") 24 | huggingface_login(hf_token) 25 | 26 | olive_config = patch_config("config.json") 27 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 28 | check_output(workflow_output) 29 | -------------------------------------------------------------------------------- /examples/test/local/test_mobilenet_qnn_ep.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import os 7 | 8 | import pytest 9 | 10 | from olive.common.utils import retry_func, run_subprocess 11 | 12 | from ..utils import get_example_dir 13 | 14 | 15 | @pytest.fixture(scope="module", autouse=True) 16 | def setup(): 17 | """Setups any state specific to the execution of the given module.""" 18 | os.chdir(get_example_dir("mobilenet/qnn")) 19 | 20 | retry_func(run_subprocess, kwargs={"cmd": "python download_files.py", "check": True}) 21 | 22 | 23 | def test_mobilenet_qnn_ep(): 24 | from olive.workflows import run as olive_run 25 | 26 | with open("mobilenet_qnn_ep.json") as f: 27 | config = json.load(f) 28 | 29 | # only run optimization here, needs qnn-ep to run evaluation 30 | del config["evaluators"], config["evaluator"] 31 | 32 | # need to pass [] since the parser reads from sys.argv 33 | workflow_output = olive_run(config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 34 | 35 | # make sure it only ran for npu-qnn 36 | assert len(workflow_output.get_available_devices()) == 1 37 | assert workflow_output["npu"] is not None 38 | assert workflow_output["npu"]["QNNExecutionProvider"] is not None 39 | -------------------------------------------------------------------------------- /examples/test/local/test_phi2.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import os 7 | 8 | import pytest 9 | 10 | from olive.common.hf.login import huggingface_login 11 | 12 | from ..utils import assert_nodes, get_example_dir 13 | 14 | 15 | @pytest.fixture(scope="module", autouse=True) 16 | def setup(): 17 | """Setups any state specific to the execution of the given module.""" 18 | os.chdir(get_example_dir("phi2")) 19 | 20 | 21 | def test_phi2_genai(): 22 | from olive.workflows import run as olive_run 23 | 24 | hf_token = os.environ.get("HF_TOKEN") 25 | huggingface_login(hf_token) 26 | 27 | with open("phi2_genai.json") as f: 28 | olive_config = json.load(f) 29 | 30 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 31 | assert_nodes(workflow_output) 32 | -------------------------------------------------------------------------------- /examples/test/local/test_resnet_qat.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import os 6 | 7 | import pytest 8 | 9 | from olive.common.utils import retry_func, run_subprocess 10 | 11 | from ..utils import check_output, get_example_dir, patch_config 12 | 13 | 14 | @pytest.fixture(scope="module", autouse=True) 15 | def setup(): 16 | """Setups any state specific to the execution of the given module.""" 17 | os.chdir(get_example_dir("resnet")) 18 | 19 | # prepare model and data 20 | # retry since it fails randomly 21 | retry_func(run_subprocess, kwargs={"cmd": "python prepare_model_data.py", "check": True}) 22 | 23 | 24 | @pytest.mark.parametrize("sampler", ["random"]) 25 | @pytest.mark.parametrize("execution_order", ["pass-by-pass"]) 26 | @pytest.mark.parametrize("system", ["local_system"]) 27 | @pytest.mark.parametrize( 28 | "olive_json", ["resnet_qat_default_train_loop_cpu.json", "resnet_qat_lightning_module_cpu.json"] 29 | ) 30 | def test_resnet(sampler, execution_order, system, olive_json): 31 | from olive.workflows import run as olive_run 32 | 33 | olive_config = patch_config(olive_json, sampler, execution_order, system) 34 | 35 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 36 | check_output(workflow_output) 37 | -------------------------------------------------------------------------------- /examples/test/local/test_resnet_vitis_ai_ptq_cpu.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import os 6 | 7 | import pytest 8 | 9 | from ..utils import check_output, get_example_dir, patch_config 10 | 11 | 12 | @pytest.fixture(scope="module", autouse=True) 13 | def setup(): 14 | """Setups any state specific to the execution of the given module.""" 15 | os.chdir(get_example_dir("resnet")) 16 | 17 | 18 | @pytest.mark.skip(reason="Disable failing tests") 19 | @pytest.mark.parametrize("system", ["local_system"]) 20 | @pytest.mark.parametrize("olive_json", ["resnet_vitis_ai_ptq_cpu.json"]) 21 | def test_resnet(system, olive_json): 22 | from olive.workflows import run as olive_run 23 | 24 | olive_config = patch_config(olive_json, None, None, system) 25 | 26 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 27 | check_output(workflow_output) 28 | -------------------------------------------------------------------------------- /examples/test/local/test_super_resolution.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import os 7 | 8 | import pytest 9 | 10 | from ..utils import assert_nodes, get_example_dir 11 | 12 | 13 | @pytest.fixture(scope="module", autouse=True) 14 | def setup(): 15 | """Setups any state specific to the execution of the given module.""" 16 | os.chdir(get_example_dir("super_resolution")) 17 | 18 | 19 | def test_super_resolution(): 20 | from olive.workflows import run as olive_run 21 | 22 | with open("config.json") as f: 23 | olive_config = json.load(f) 24 | 25 | workflow_output = olive_run(olive_config, tempdir=os.environ.get("OLIVE_TEMPDIR", None)) 26 | assert_nodes(workflow_output) 27 | -------------------------------------------------------------------------------- /examples/vgg/.gitignore: -------------------------------------------------------------------------------- 1 | outputs/ 2 | -------------------------------------------------------------------------------- /examples/vgg/prepare_config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | import platform 7 | from pathlib import Path 8 | 9 | from olive.common.constants import OS 10 | 11 | 12 | def resolve_windows_config(): 13 | with Path("vgg_config.json").open() as f: 14 | snpe_windows_config = json.load(f) 15 | 16 | del snpe_windows_config["passes"]["snpe_quantization"] 17 | with Path("vgg_config.json").open("w") as f: 18 | json.dump(snpe_windows_config, f, indent=4) 19 | 20 | 21 | if __name__ == "__main__": 22 | if platform.system() == OS.WINDOWS: 23 | resolve_windows_config() 24 | -------------------------------------------------------------------------------- /examples/vgg/requirements.txt: -------------------------------------------------------------------------------- 1 | onnxruntime 2 | pillow 3 | torchvision 4 | -------------------------------------------------------------------------------- /examples/vgg/vgg_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { "type": "ONNXModel", "model_path": "models/vgg.onnx" }, 3 | "data_configs": [ 4 | { 5 | "name": "raw_data", 6 | "type": "RawDataContainer", 7 | "load_dataset_config": { 8 | "data_dir": "data", 9 | "input_names": [ "data" ], 10 | "input_shapes": [ [ 1, 3, 224, 224 ] ], 11 | "input_dirs": [ "." ], 12 | "input_suffix": ".raw", 13 | "input_order_file": "input_order.txt" 14 | } 15 | } 16 | ], 17 | "passes": { 18 | "snpe_conversion": { 19 | "type": "SNPEConversion", 20 | "input_names": [ "data" ], 21 | "input_shapes": [ [ 1, 3, 224, 224 ] ], 22 | "output_names": [ "vgg0_dense2_fwd" ] 23 | }, 24 | "snpe_quantization": { "type": "SNPEQuantization", "enable_htp": true, "data_config": "raw_data" } 25 | }, 26 | "log_severity_level": 0, 27 | "clean_cache": true, 28 | "cache_dir": "cache", 29 | "output_dir": "outputs" 30 | } 31 | -------------------------------------------------------------------------------- /examples/vit/openvino/requirements.txt: -------------------------------------------------------------------------------- 1 | olive-ai[openvino] 2 | torchvision 3 | -------------------------------------------------------------------------------- /examples/vit/qnn/README.md: -------------------------------------------------------------------------------- 1 | # Vision Transformer (ViT) Optimization with PTQ on Qualcomm NPU using QNN EP 2 | This example performs ViT optimization on Qualcomm NPU with ONNX Runtime PTQ. It performs the optimization pipeline: 3 | - *PyTorch Model -> Onnx Model -> Quantized Onnx Model* 4 | 5 | It requires x86 python environment on a Windows ARM machine with `onnxruntime-qnn` installed. 6 | 7 | **NOTE:** The model quantization part of the workflow can also be done on a Linux/Windows machine with a different onnxruntime package installed. Remove the `"evaluators"` and `"evaluator"` sections from the configuration file to skip the evaluation step. 8 | 9 | ## Test with Tiny-ImageNet-200 10 | Tiny-ImageNet-200 is a smaller subset of the ImageNet dataset containing 200 classes, commonly used for benchmarking deep learning models. 11 | 12 | You can test output model with provided scripts. It is also a example you can refer about inference with model. 13 | - Download dataset from http://cs231n.stanford.edu/tiny-imagenet-200.zip and extract. 14 | - Go to subfolder *val_tiny_imagenet*. In *val_tiny_imagenet.py*, update *path_to_tiny_imagenet* with Tiny-ImageNet-200 root path and *path_to_model*. Modify *limit* as how many number you want in your test. 15 | - Run 16 | ``` 17 | python .\val_tiny_imagenet.py 18 | ``` 19 | -------------------------------------------------------------------------------- /examples/vit/qnn/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | tabulate 3 | torchvision 4 | -------------------------------------------------------------------------------- /olive/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import logging 6 | import sys 7 | 8 | _logger = logging.getLogger(__name__) 9 | _logger.setLevel(logging.INFO) 10 | 11 | _sc = logging.StreamHandler(stream=sys.stdout) 12 | _formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] [%(filename)s:%(lineno)d:%(funcName)s] %(message)s") 13 | _sc.setFormatter(_formatter) 14 | _logger.addHandler(_sc) 15 | _logger.propagate = False 16 | 17 | __version__ = "0.10.0.dev0" 18 | 19 | # pylint: disable=C0413 20 | 21 | from olive.engine.output import DeviceOutput, ModelOutput, WorkflowOutput # noqa: E402 22 | 23 | __all__ = ["DeviceOutput", "ModelOutput", "WorkflowOutput"] 24 | -------------------------------------------------------------------------------- /olive/__main__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | # This is to support running Olive CLI as a module in case olive command 6 | # is not available in the PATH. 7 | # Example: python -m olive 8 | if __name__ == "__main__": 9 | from olive.cli.launcher import main 10 | 11 | main(called_as_console_script=False) 12 | -------------------------------------------------------------------------------- /olive/auto_optimizer/config_template/opt_level_passes.yaml: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | "0": 7 | # 1. The first dim of the list is the pass order 8 | # 2. The second dim of the list is the pass list which can be selected to run one by one 9 | # take this opt_level 0 an example, olive will run the passes in the following order: 10 | # [OnnxConversion] -> [OrtTransformersOptimization] -> [OrtMixedPrecision, OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer] -> [OrtSessionParamsTuning] 11 | # and run bfs to generate available pass flows(path), like: 12 | # OnnxConversion -> OrtTransformersOptimization -> OrtMixedPrecision -> OrtSessionParamsTuning 13 | # OnnxConversion -> OrtTransformersOptimization -> OnnxQuantization -> OrtSessionParamsTuning 14 | # OnnxConversion -> OrtTransformersOptimization -> IncQuantization -> OrtSessionParamsTuning 15 | # and etc. 16 | 17 | - [OnnxConversion, ModelBuilder] 18 | - [OrtTransformersOptimization] 19 | - [OnnxQuantization, IncQuantization, VitisAIQuantization, OnnxMatMul4Quantizer, OrtMixedPrecision] 20 | - [OrtSessionParamsTuning] 21 | -------------------------------------------------------------------------------- /olive/azureml/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/cli/configure_qualcomm_sdk.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from argparse import ArgumentParser 6 | 7 | from olive.cli.base import BaseOliveCLICommand 8 | 9 | 10 | class ConfigureQualcommSDKCommand(BaseOliveCLICommand): 11 | @staticmethod 12 | def register_subcommand(parser: ArgumentParser): 13 | sub_parser = parser.add_parser( 14 | "configure-qualcomm-sdk", 15 | help="Configure Qualcomm SDK for Olive", 16 | ) 17 | sub_parser.add_argument( 18 | "--py_version", 19 | type=str, 20 | help="Python version: Use 3.6 for tensorflow 1.15 and 3.8 otherwise", 21 | required=True, 22 | choices=["3.6", "3.8"], 23 | ) 24 | sub_parser.add_argument( 25 | "--sdk", 26 | type=str, 27 | help="Qualcomm SDK: snpe or qnn", 28 | required=True, 29 | choices=["snpe", "qnn"], 30 | ) 31 | 32 | sub_parser.set_defaults(func=ConfigureQualcommSDKCommand) 33 | 34 | def run(self): 35 | from olive.platform_sdk.qualcomm.configure.configure import configure 36 | 37 | configure(self.args.py_version, self.args.sdk) 38 | -------------------------------------------------------------------------------- /olive/cli/constants.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | CONDA_CONFIG = { 6 | "name": "olive_finetune", 7 | "channels": ["defaults"], 8 | "dependencies": [ 9 | "python=3.9.21", 10 | "pip=22.3.1", 11 | { 12 | "pip": [ 13 | "accelerate", 14 | "bitsandbytes", 15 | "peft", 16 | "sentencepiece", 17 | "datasets", 18 | "evaluate", 19 | "psutil", 20 | "optimum", 21 | "scipy", 22 | "scikit-learn", 23 | "torch", 24 | "onnxruntime-genai", 25 | "--extra-index-url https://download.pytorch.org/whl/cu118", 26 | "transformers>=4.41.1", 27 | "git+https://github.com/microsoft/Olive#egg=olive-ai[gpu,azureml]", 28 | ] 29 | }, 30 | ], 31 | } 32 | -------------------------------------------------------------------------------- /olive/common/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/common/constants.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.common.utils import StrEnumBase 6 | 7 | 8 | class OS(StrEnumBase): 9 | WINDOWS = "Windows" 10 | LINUX = "Linux" 11 | 12 | 13 | ##### AzureML system ##### 14 | 15 | WORKFLOW_CONFIG = "workflow_config" 16 | WORKFLOW_ARTIFACTS = "workflow_artifacts" 17 | HF_LOGIN = "HF_LOGIN" 18 | KEYVAULT_NAME = "KEYVAULT_NAME" 19 | 20 | 21 | ############# Engine ############# 22 | 23 | DEFAULT_WORKFLOW_ID = "default_workflow" 24 | DEFAULT_CACHE_DIR = ".olive-cache" 25 | 26 | 27 | ############# Packaging ############# 28 | 29 | BASE_IMAGE = "mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04" 30 | 31 | ############# HF ############# 32 | 33 | DEFAULT_HF_TASK = "text-generation-with-past" 34 | 35 | 36 | ########### Model ########### 37 | 38 | LOCAL_INPUT_MODEL_ID = "local_input_model" 39 | 40 | 41 | ########### Cache ########### 42 | 43 | ACCOUNT_URL_TEMPLATE = "https://{account_name}.blob.core.windows.net" 44 | -------------------------------------------------------------------------------- /olive/common/hf/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/common/hf/login.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import logging 6 | import os 7 | 8 | logger = logging.getLogger(__name__) 9 | 10 | 11 | def huggingface_login(token: str): 12 | from huggingface_hub import login 13 | 14 | login(token=token) 15 | 16 | 17 | def aml_runner_hf_login(): 18 | hf_login = os.environ.get("HF_LOGIN") 19 | if hf_login: 20 | from azure.identity import DefaultAzureCredential 21 | from azure.keyvault.secrets import SecretClient 22 | 23 | keyvault_name = os.environ.get("KEYVAULT_NAME") 24 | logger.debug("Getting token from keyvault %s", keyvault_name) 25 | 26 | credential = DefaultAzureCredential() 27 | secret_client = SecretClient(vault_url=f"https://{keyvault_name}.vault.azure.net/", credential=credential) 28 | token = secret_client.get_secret("hf-token").value 29 | huggingface_login(token) 30 | -------------------------------------------------------------------------------- /olive/common/hf/mappings.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | # mapping from task to peft task type 7 | # refer to peft.utils.peft_types.TaskType for all possible values 8 | TASK_TO_PEFT_TASK_TYPE = { 9 | "text-classification": "SEQ_CLS", 10 | "text-generation": "CAUSAL_LM", 11 | # TODO(jambayk): see if we need more task types 12 | } 13 | 14 | MODEL_TYPE_MAPPING = { 15 | "camembert": "bert", 16 | "deberta": "bert", 17 | "deberta-v2": "bert", 18 | "distilbert": "bert", 19 | "gpt_neox": "gpt2", 20 | "gpt-j": "gpt2", 21 | "llama": "gpt2", 22 | "roberta": "bert", 23 | "phi3": "phi", 24 | } 25 | 26 | MODELS_TO_LORA_TARGET_MODULES_MAPPING = {"phi3": ["o_proj", "qkv_proj"]} 27 | -------------------------------------------------------------------------------- /olive/common/pydantic_v1.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | """Pydantic v1 compatibility module. 7 | 8 | Pydantic v2 has breaking changes that are not compatible with the current version of Olive. 9 | Migration Guide: https://docs.pydantic.dev/latest/migration/. 10 | 11 | In order to support both versions of Pydantic, we use this module to access pydantic's v1 API. 12 | """ 13 | 14 | # pylint: disable=redefined-builtin, wildcard-import, unused-wildcard-import 15 | 16 | try: 17 | # pydantic v2 18 | from pydantic.v1 import * # noqa: F403 19 | except ImportError: 20 | # pydantic v1 21 | from pydantic import * # noqa: F403 22 | -------------------------------------------------------------------------------- /olive/data/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.data.component import * # noqa: F403 6 | from olive.data.container import * # noqa: F403 7 | -------------------------------------------------------------------------------- /olive/data/component/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.data.component import dataloader, load_dataset, post_process_data, pre_process_data 6 | 7 | __all__ = [ 8 | "dataloader", 9 | "load_dataset", 10 | "post_process_data", 11 | "pre_process_data", 12 | ] 13 | -------------------------------------------------------------------------------- /olive/data/container/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.data.container import data_container, dummy_data_container, huggingface_container, raw_data_container 6 | 7 | __all__ = [ 8 | "data_container", 9 | "dummy_data_container", 10 | "huggingface_container", 11 | "raw_data_container", 12 | ] 13 | -------------------------------------------------------------------------------- /olive/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.engine.config import EngineConfig 6 | from olive.engine.engine import Engine 7 | from olive.engine.footprint import Footprint 8 | 9 | __all__ = [ 10 | "Engine", 11 | "EngineConfig", 12 | "Footprint", 13 | ] 14 | -------------------------------------------------------------------------------- /olive/engine/packaging/Dockerfile.base: -------------------------------------------------------------------------------- 1 | # DisableDockerDetector "Prevent warnings on 1ES builds" 2 | FROM 3 | 4 | ENV DEBIAN_FRONTEND=noninteractive 5 | 6 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade 7 | RUN apt-get install -y --no-install-recommends wget gnupg 8 | 9 | RUN pip install --no-cache-dir pandas plotly psutil datasets transformers 10 | 11 | WORKDIR /olive 12 | 13 | ADD /olive 14 | 15 | RUN pip install -r requirements.txt 16 | 17 | -------------------------------------------------------------------------------- /olive/engine/packaging/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | from olive.evaluator.metric import Metric, SubMetric 7 | from olive.evaluator.metric_result import MetricResult, SubMetricResult, flatten_metric_result 8 | from olive.evaluator.olive_evaluator import OliveEvaluator 9 | 10 | __all__ = [ 11 | "Metric", 12 | "MetricResult", 13 | "OliveEvaluator", 14 | "SubMetric", 15 | "SubMetricResult", 16 | "flatten_metric_result", 17 | ] 18 | -------------------------------------------------------------------------------- /olive/exception/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | class OliveError(Exception): 6 | """Base class for Olive exceptions.""" 7 | 8 | 9 | class OlivePassError(OliveError): 10 | """Base class for Olive pass exceptions.""" 11 | 12 | 13 | class OliveEvaluationError(OliveError): 14 | """Base class for Olive evaluation exceptions.""" 15 | 16 | 17 | EXCEPTIONS_TO_RAISE = (AssertionError, AttributeError, ImportError, TypeError, ValueError) 18 | -------------------------------------------------------------------------------- /olive/hardware/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.hardware.accelerator import ( 6 | DEFAULT_CPU_ACCELERATOR, 7 | DEFAULT_GPU_CUDA_ACCELERATOR, 8 | DEFAULT_GPU_TRT_ACCELERATOR, 9 | AcceleratorLookup, 10 | AcceleratorSpec, 11 | Device, 12 | ) 13 | 14 | __all__ = [ 15 | "DEFAULT_CPU_ACCELERATOR", 16 | "DEFAULT_GPU_CUDA_ACCELERATOR", 17 | "DEFAULT_GPU_TRT_ACCELERATOR", 18 | "AcceleratorLookup", 19 | "AcceleratorSpec", 20 | "Device", 21 | ] 22 | -------------------------------------------------------------------------------- /olive/model/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.model.config import ModelConfig 6 | from olive.model.handler import * # noqa: F403 7 | 8 | __all__ = ["ModelConfig"] 9 | -------------------------------------------------------------------------------- /olive/model/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.model.config.hf_config import HfLoadKwargs 6 | from olive.model.config.io_config import ( 7 | IoConfig, 8 | complete_kv_cache_with_model_attributes, 9 | extend_io_config_with_kv_cache, 10 | ) 11 | from olive.model.config.kv_cache_config import KVCacheConfig 12 | from olive.model.config.model_config import ModelConfig 13 | 14 | __all__ = [ 15 | "HfLoadKwargs", 16 | "IoConfig", 17 | "KVCacheConfig", 18 | "ModelConfig", 19 | "complete_kv_cache_with_model_attributes", 20 | "extend_io_config_with_kv_cache", 21 | ] 22 | -------------------------------------------------------------------------------- /olive/model/config/registry.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | REGISTRY = {} 6 | 7 | 8 | def model_handler_registry(model_type): 9 | """Decorate and register all OliveModelHandler subclasses. 10 | 11 | Args: 12 | model_type (str): The model type registration name. Is case-insensitive and stored in lowercase. 13 | 14 | Returns: 15 | cls: The class of register. 16 | 17 | """ 18 | model_type = model_type.lower() 19 | 20 | def decorator_model_class(cls): 21 | if model_type in REGISTRY: 22 | raise ValueError("Cannot have two model handlers with the same name") 23 | 24 | REGISTRY[model_type] = cls 25 | cls.model_type = model_type 26 | return cls 27 | 28 | return decorator_model_class 29 | 30 | 31 | def get_model_handler(model_type): 32 | if not is_valid_model_type(model_type): 33 | raise ValueError(f"Unknown model type {model_type}") 34 | return REGISTRY[model_type.lower()] 35 | 36 | 37 | def is_valid_model_type(model_type): 38 | return model_type.lower() in REGISTRY 39 | -------------------------------------------------------------------------------- /olive/model/handler/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.model.handler.base import OliveModelHandler 6 | from olive.model.handler.composite import CompositeModelHandler 7 | from olive.model.handler.hf import DistributedHfModelHandler, HfModelHandler 8 | from olive.model.handler.onnx import DistributedOnnxModelHandler, ONNXModelHandler 9 | from olive.model.handler.openvino import OpenVINOModelHandler 10 | from olive.model.handler.pytorch import PyTorchModelHandler 11 | from olive.model.handler.qnn import QNNModelHandler 12 | from olive.model.handler.snpe import SNPEModelHandler 13 | from olive.model.handler.tensorflow import TensorFlowModelHandler 14 | 15 | __all__ = [ 16 | "CompositeModelHandler", 17 | "DistributedHfModelHandler", 18 | "DistributedOnnxModelHandler", 19 | "HfModelHandler", 20 | "ONNXModelHandler", 21 | "OliveModelHandler", 22 | "OpenVINOModelHandler", 23 | "PyTorchModelHandler", 24 | "QNNModelHandler", 25 | "SNPEModelHandler", 26 | "TensorFlowModelHandler", 27 | ] 28 | -------------------------------------------------------------------------------- /olive/model/handler/mixin/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.model.handler.mixin.dummy_inputs import DummyInputsMixin 6 | from olive.model.handler.mixin.hf import HfMixin 7 | from olive.model.handler.mixin.io_config import IoConfigMixin 8 | from olive.model.handler.mixin.json import JsonMixin 9 | from olive.model.handler.mixin.kv_cache import PytorchKvCacheMixin 10 | from olive.model.handler.mixin.mlflow import MLFlowTransformersMixin 11 | from olive.model.handler.mixin.onnx_ep import OnnxEpValidateMixin 12 | from olive.model.handler.mixin.resource import ResourceMixin 13 | 14 | __all__ = [ 15 | "DummyInputsMixin", 16 | "HfMixin", 17 | "IoConfigMixin", 18 | "JsonMixin", 19 | "MLFlowTransformersMixin", 20 | "OnnxEpValidateMixin", 21 | "PytorchKvCacheMixin", 22 | "ResourceMixin", 23 | ] 24 | -------------------------------------------------------------------------------- /olive/model/handler/mixin/io_config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from typing import Any 6 | 7 | 8 | class IoConfigMixin: 9 | """Provide the following model get io config functionalities. 10 | 11 | Each model handler could choose to override the behavior. 12 | For example, both PyTorch model and ONNX model handler choose to override the default behavior. 13 | """ 14 | 15 | @property 16 | def io_config(self) -> dict[str, Any]: 17 | return self._io_config 18 | -------------------------------------------------------------------------------- /olive/model/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.model.utils.onnx_utils import resolve_onnx_path 6 | from olive.model.utils.path_utils import normalize_path_suffix 7 | 8 | __all__ = [ 9 | "normalize_path_suffix", 10 | "resolve_onnx_path", 11 | ] 12 | -------------------------------------------------------------------------------- /olive/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.passes.olive_pass import FullPassConfig, Pass 6 | from olive.passes.pass_config import AbstractPassConfig, PassModuleConfig, PassParamDefault 7 | 8 | REGISTRY = Pass.registry 9 | 10 | __all__ = [ 11 | "REGISTRY", 12 | "AbstractPassConfig", 13 | "FullPassConfig", 14 | "Pass", 15 | "PassModuleConfig", 16 | "PassParamDefault", 17 | ] 18 | -------------------------------------------------------------------------------- /olive/passes/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/passes/onnx/qnn/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/passes/onnx/tensorrt/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/passes/onnx/vitis_ai/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved. 3 | # SPDX-License-Identifier: MIT 4 | # 5 | from onnxruntime.quantization.calibrate import CalibrationDataReader 6 | from onnxruntime.quantization.quant_utils import QuantFormat, QuantType 7 | 8 | from olive.passes.onnx.vitis_ai.quant_utils import PowerOfTwoMethod 9 | from olive.passes.onnx.vitis_ai.quantize import quantize_static 10 | 11 | __all__ = [ 12 | "CalibrationDataReader", 13 | "PowerOfTwoMethod", 14 | "QuantFormat", 15 | "QuantType", 16 | "quantize_static", 17 | ] 18 | -------------------------------------------------------------------------------- /olive/passes/openvino/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/passes/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/passes/pytorch/pytorch_lightning_utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import pytorch_lightning as pl 6 | from pytorch_lightning.strategies import DDPStrategy 7 | 8 | 9 | def create_ddp_strategy(cluster, accelerator): 10 | return DDPStrategy(find_unused_parameters=True, cluster_environment=cluster, accelerator=accelerator) 11 | 12 | 13 | def create_trainer( 14 | logger, 15 | callbacks=None, 16 | max_epochs=None, 17 | max_steps=None, 18 | val_check_interval=None, 19 | log_every_n_steps=50, 20 | precision=32, 21 | default_root_dir=None, 22 | **kwargs, 23 | ): 24 | return pl.Trainer( 25 | logger=logger, 26 | callbacks=callbacks, 27 | max_epochs=max_epochs, 28 | max_steps=max_steps, 29 | val_check_interval=val_check_interval, 30 | log_every_n_steps=log_every_n_steps, 31 | precision=precision, 32 | default_root_dir=default_root_dir, 33 | **kwargs, 34 | ) 35 | -------------------------------------------------------------------------------- /olive/passes/qnn/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/passes/snpe/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/passes/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/platform_sdk/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | from olive.platform_sdk.qualcomm.snpe.env import SNPESDKEnv 7 | 8 | __all__ = [ 9 | "SNPESDKEnv", 10 | ] 11 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/configure/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | # TODO(anyone): change this sub-module back to a file when `olive.platform_sdk.qualcomm.configure` command 7 | # is removed from Olive. 8 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/configure/__main__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | # A separate __main__.py is implemented since CodeQL complains about circular imports otherwise. 6 | if __name__ == "__main__": 7 | import sys 8 | 9 | from olive.cli.launcher import legacy_call 10 | 11 | legacy_call("olive.platform_sdk.qualcomm.configure", "configure-qualcomm-sdk", *sys.argv[1:]) 12 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/copy_libcdsprpc.ps1: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | # copies the libcdsprpc.dll from driver location to given location 7 | Set-PSDebug -Trace 2 8 | if ( $args.count -eq 0 ) { 9 | echo "Please specify the output location of libcdsprpc.dll" 10 | exit 1 11 | } 12 | $loc = [string](driverquery /v /fo csv | findstr qcadsprpc) 13 | if ( $loc -eq $null ) { 14 | driverquery /v /fo csv 15 | echo "Cannot locate FastRPC driver" 16 | exit 1 17 | } 18 | $lll2 = $loc.Split(",")[15] 19 | if ( $lll2 -eq $null ) { 20 | echo "Cannot locate path from FastRPC driver query" 21 | exit 1 22 | } 23 | $lll = $lll2.Split('"')[1] 24 | if ( $lll -eq $null ) { 25 | echo "Cannot locate path from FastRPC driver query" 26 | exit 1 27 | } 28 | echo Driver location is: $lll 29 | $dir = Split-Path $lll 30 | # $dir = [System.IO.Path]::GetDirectoryName($lll) 31 | $f = Join-Path $dir -ChildPath libcdsprpc.dll 32 | echo Copying $f to $args[0] 33 | Copy-Item -Path $f -Destination $args[0] 34 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/qnn/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/qnn/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/snpe/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.platform_sdk.qualcomm.snpe.snpe import SNPEInferenceSession, SNPESessionOptions 6 | 7 | __all__ = ["SNPEInferenceSession", "SNPESessionOptions"] 8 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/snpe/tools/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/snpe/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/platform_sdk/qualcomm/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/search/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/search/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.search.samplers.random_sampler import RandomSampler 6 | from olive.search.samplers.search_sampler import SearchSampler 7 | from olive.search.samplers.sequential_sampler import SequentialSampler 8 | from olive.search.samplers.tpe_sampler import TPESampler 9 | 10 | REGISTRY = SearchSampler.registry 11 | 12 | __all__ = ["REGISTRY", "RandomSampler", "SearchSampler", "SequentialSampler", "TPESampler"] 13 | -------------------------------------------------------------------------------- /olive/systems/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/systems/azureml/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.systems.azureml.aml_system import AzureMLSystem 6 | from olive.systems.common import AzureMLDockerConfig 7 | 8 | __all__ = ["AzureMLDockerConfig", "AzureMLSystem"] 9 | -------------------------------------------------------------------------------- /olive/systems/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04 6 | 7 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade 8 | RUN pip install azure-ai-ml \ 9 | azure-identity \ 10 | azureml-dataprep \ 11 | onnxruntime \ 12 | openvino \ 13 | openvino-dev[tensorflow,onnx] \ 14 | tensorflow \ 15 | onnxconverter_common \ 16 | olive-ai 17 | 18 | ADD requirements.txt requirements.txt 19 | RUN pip install -r requirements.txt 20 | 21 | WORKDIR /olive 22 | -------------------------------------------------------------------------------- /olive/systems/docker/Dockerfile.cpu: -------------------------------------------------------------------------------- 1 | # mcr image https://github.com/microsoft/mcr 2 | # tag list https://mcr.microsoft.com/v2/azureml/openmpi4.1.0-ubuntu20.04/tags/list 3 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04 4 | 5 | RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 6 | RUN pip install --no-cache-dir pandas plotly psutil datasets transformers onnxruntime olive-ai 7 | 8 | ADD requirements.txt requirements.txt 9 | RUN pip install -r requirements.txt 10 | -------------------------------------------------------------------------------- /olive/systems/docker/Dockerfile.gpu: -------------------------------------------------------------------------------- 1 | # mcr image https://github.com/microsoft/mcr 2 | # tag list https://mcr.microsoft.com/v2/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04/tags/list 3 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.8-cudnn8-ubuntu22.04 4 | 5 | ENV DEBIAN_FRONTEND=noninteractive 6 | 7 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade 8 | RUN apt-get install -y --no-install-recommends wget gnupg 9 | 10 | # Install TensorRT 11 | RUN v="8.4.1-1+cuda11.6" &&\ 12 | apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\ 13 | apt-get update &&\ 14 | apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} \ 15 | libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} \ 16 | python3-libnvinfer=${v} libnvinfer-samples=${v} 17 | 18 | RUN pip install --no-cache-dir pandas plotly psutil datasets transformers onnxruntime-gpu olive-ai 19 | 20 | ADD requirements.txt requirements.txt 21 | RUN pip install -r requirements.txt 22 | -------------------------------------------------------------------------------- /olive/systems/docker/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.systems.common import LocalDockerConfig 6 | from olive.systems.docker.docker_system import DockerSystem 7 | 8 | __all__ = ["DockerSystem", "LocalDockerConfig"] 9 | -------------------------------------------------------------------------------- /olive/systems/isolated_ort/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.systems.isolated_ort.isolated_ort_system import IsolatedORTSystem 6 | 7 | __all__ = ["IsolatedORTSystem"] 8 | -------------------------------------------------------------------------------- /olive/systems/python_environment/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.systems.python_environment.python_environment_system import PythonEnvironmentSystem 6 | 7 | __all__ = ["PythonEnvironmentSystem"] 8 | -------------------------------------------------------------------------------- /olive/systems/python_environment/common_requirements.txt: -------------------------------------------------------------------------------- 1 | numpy<2.0 2 | protobuf 3 | psutil 4 | pydantic 5 | -------------------------------------------------------------------------------- /olive/systems/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.systems.utils.arg_parser import get_common_args, parse_config 6 | from olive.systems.utils.misc import ( 7 | create_managed_system, 8 | create_managed_system_with_cache, 9 | create_new_environ, 10 | get_package_name_from_ep, 11 | run_available_providers_runner, 12 | ) 13 | 14 | __all__ = [ 15 | "create_managed_system", 16 | "create_managed_system_with_cache", 17 | "create_new_environ", 18 | "get_common_args", 19 | "get_package_name_from_ep", 20 | "parse_config", 21 | "run_available_providers_runner", 22 | ] 23 | -------------------------------------------------------------------------------- /olive/systems/utils/available_providers_runner.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | # NOTE: Only onnxruntime and its dependencies can be imported in this file. 6 | import argparse 7 | import json 8 | from pathlib import Path 9 | 10 | import onnxruntime as ort 11 | 12 | 13 | def get_args(raw_args): 14 | parser = argparse.ArgumentParser(description="Get available execution providers") 15 | parser.add_argument("--output_path", type=str, required=True) 16 | 17 | return parser.parse_args(raw_args) 18 | 19 | 20 | def main(raw_args=None): 21 | args = get_args(raw_args) 22 | 23 | # get available execution providers 24 | available_eps = ort.get_available_providers() 25 | 26 | # save to json 27 | with Path(args.output_path).open("w") as f: 28 | json.dump(available_eps, f) 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /olive/workflows/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.workflows.run.run import run 6 | 7 | __all__ = ["run"] 8 | -------------------------------------------------------------------------------- /olive/workflows/run/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /olive/workflows/run/__main__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | if __name__ == "__main__": 6 | import sys 7 | 8 | from olive.cli.launcher import legacy_call 9 | 10 | legacy_call("olive.workflows.run", "run", *sys.argv[1:]) 11 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | editorconfig-checker 3 | lintrunner 4 | lintrunner-adapters 5 | pylint==3.3.6 6 | ruff==0.11.4 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | onnx 3 | onnxscript>=0.2.5 4 | optuna 5 | pandas 6 | pydantic 7 | pyyaml 8 | torch 9 | torchmetrics>=1.0.0 10 | transformers 11 | -------------------------------------------------------------------------------- /scripts/generate_cost_model_artifacts.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | from olive.common.utils import run_subprocess 4 | 5 | models = { 6 | "Llama-2-7B": "meta-llama/Llama-2-7b-hf", 7 | "Llama-2-13B": "meta-llama/Llama-2-13b-hf", 8 | "Llama-3.1-8B": "meta-llama/Llama-3.1-8B", 9 | "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct", 10 | "Phi-3.5-mini": "microsoft/Phi-3.5-mini-instruct", 11 | } 12 | 13 | 14 | def main(): 15 | asset_dir = Path(__file__).parent.parent / "assets" / "cost_models" 16 | asset_dir.mkdir(parents=True, exist_ok=True) 17 | 18 | for model_name, model_id in models.items(): 19 | run_subprocess( 20 | ["olive", "generate-cost-model", "-m", model_id, "-o", str(asset_dir / f"{model_name}.csv")], check=True 21 | ) 22 | 23 | 24 | if __name__ == "__main__": 25 | main() 26 | -------------------------------------------------------------------------------- /scripts/overwrite_version.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import argparse 6 | from pathlib import Path 7 | 8 | 9 | def get_args(): 10 | parser = argparse.ArgumentParser(description="Overwrite package version in __init__.py") 11 | parser.add_argument("--version", type=str, required=True, help="Version to overwrite with") 12 | return parser.parse_args() 13 | 14 | 15 | def main(): 16 | args = get_args() 17 | version = args.version 18 | 19 | init_path = Path(__file__).parents[1].resolve() / "olive" / "__init__.py" 20 | with open(init_path) as f: 21 | lines = f.readlines() 22 | for i, line in enumerate(lines): 23 | if line.startswith("__version__"): 24 | lines[i] = f'__version__ = "{version}"\n' 25 | break 26 | 27 | with open(init_path, "w") as f: 28 | f.writelines(lines) 29 | 30 | 31 | if __name__ == "__main__": 32 | main() 33 | -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/aml_model_test/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/aml_model_test/conda.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.10.16 6 | - pip=22.3.1 7 | - pip: 8 | - azure-ai-ml 9 | - azure-identity 10 | - azureml-dataprep!=4.12.0 11 | - onnxruntime 12 | - datasets 13 | - scipy 14 | - transformers==4.31.0 # TODO(team): 55036 Fixed error and update to latest version 15 | - onnxconverter_common 16 | - git+https://github.com/microsoft/Olive.git 17 | -------------------------------------------------------------------------------- /test/integ_test/aml_resource_path/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/evaluator/azureml_eval/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/evaluator/azureml_eval/conda.yaml: -------------------------------------------------------------------------------- 1 | name: project_environment 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python=3.9.21 6 | - pip=22.3.1 7 | - pip: 8 | - azureml-dataprep!=4.12.0 9 | - onnxruntime 10 | - datasets 11 | - transformers 12 | - torchvision 13 | - onnxconverter_common 14 | - git+https://github.com/microsoft/Olive.git 15 | -------------------------------------------------------------------------------- /test/integ_test/evaluator/azureml_eval/user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from torchvision import datasets 6 | from torchvision.transforms import ToTensor 7 | 8 | from olive.data.registry import Registry 9 | 10 | 11 | @Registry.register_post_process() 12 | def mnist_post_process_for_azureml_eval(res): 13 | return res.argmax(1) 14 | 15 | 16 | @Registry.register_dataset() 17 | def mnist_dataset_for_azureml_eval(data_dir): 18 | return datasets.MNIST(data_dir, download=True, transform=ToTensor()) 19 | -------------------------------------------------------------------------------- /test/integ_test/evaluator/docker_eval/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/evaluator/docker_eval/dockerfile/Dockerfile: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04 6 | 7 | RUN apt-get -y update && ACCEPT_EULA=Y apt-get -y upgrade 8 | RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu --no-cache-dir 9 | RUN pip install onnxruntime \ 10 | datasets \ 11 | git+https://github.com/microsoft/Olive.git \ 12 | onnxconverter_common \ 13 | openvino \ 14 | openvino-dev \ 15 | pandas \ 16 | plotly \ 17 | psutil \ 18 | transformers \ 19 | --no-cache-dir 20 | 21 | WORKDIR /olive 22 | -------------------------------------------------------------------------------- /test/integ_test/evaluator/local_eval/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/integ_test/pass_runner/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/multiple_ep/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/multiple_ep/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets 2 | olive-ai 3 | onnxconverter_common 4 | torchvision 5 | transformers 6 | -------------------------------------------------------------------------------- /test/multiple_ep/user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from torchvision import datasets 6 | from torchvision.transforms import ToTensor 7 | 8 | from olive.data.registry import Registry 9 | 10 | 11 | @Registry.register_post_process() 12 | def mnist_post_process_for_multiple_ep(res): 13 | return res.argmax(1) 14 | 15 | 16 | @Registry.register_dataset() 17 | def mnist_dataset_for_multiple_ep(data_dir, *args, **kwargs): 18 | return datasets.MNIST(data_dir, transform=ToTensor()) 19 | -------------------------------------------------------------------------------- /test/requirements-test-cpu.txt: -------------------------------------------------------------------------------- 1 | -r requirements-test.txt 2 | onnxruntime-genai 3 | 4 | -------------------------------------------------------------------------------- /test/requirements-test-gpu.txt: -------------------------------------------------------------------------------- 1 | -r requirements-test.txt 2 | auto-gptq==0.7.1 3 | autoawq==0.2.8 4 | bitsandbytes 5 | onnxruntime-genai-cuda 6 | triton 7 | -------------------------------------------------------------------------------- /test/requirements-test.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | azure-ai-ml 3 | azure-identity 4 | azure-storage-blob 5 | # azureml.evaluate.mlflow.hftransformers is deprecated in 0.0.66 and above 6 | azureml-evaluate-mlflow>=0.0.60, <0.0.66 7 | azureml-fsspec 8 | # Pin azureml-metrics[all] greater than 0.0.26 to avoid breaking change in azureml-evaluate-mlflow 9 | azureml-metrics[all]>=0.0.26 10 | coverage 11 | cppimport 12 | datasets 13 | docker>=7.1.0 14 | evaluate 15 | marshmallow<3.24.0 16 | mlflow>=2.4.0, <2.20.0 17 | neural-compressor<2.4 18 | nncf>=2.16.0 19 | numpy<2.0.0 20 | nvidia-modelopt 21 | onnx-graphsurgeon 22 | onnxconverter_common 23 | onnxmltools 24 | onnxoptimizer 25 | onnxruntime_extensions 26 | onnxscript>=0.2.4 27 | openvino>=2025.1.0 28 | optimum[openvino]>=1.17.0, <=1.24 29 | optuna 30 | pandas 31 | peft 32 | plotly 33 | psutil 34 | pytest 35 | pytorch_lightning 36 | scipy 37 | sentencepiece 38 | soundfile 39 | tabulate 40 | torchvision 41 | -------------------------------------------------------------------------------- /test/unit_test/.gitignore: -------------------------------------------------------------------------------- 1 | dummy_model.onnx 2 | -------------------------------------------------------------------------------- /test/unit_test/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/assets/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/assets/user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.model import OliveModelHandler 6 | 7 | 8 | def eval_func(model: OliveModelHandler, device, execution_providers): 9 | return 0.382715310 10 | 11 | 12 | def metric_func(inference_output, actuals): 13 | return 0.382715311 14 | -------------------------------------------------------------------------------- /test/unit_test/auto_optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/cli/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/cli/output_model/model_config.json: -------------------------------------------------------------------------------- 1 | { "type": "PyTorchModel", "model_path": "model_path" } 2 | -------------------------------------------------------------------------------- /test/unit_test/common/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/common/test_get_attr.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import pytest 6 | 7 | from olive.common.utils import get_attr 8 | 9 | 10 | def test_attr_exists(): 11 | class A: 12 | def __init__(self, b): 13 | self.b = b 14 | 15 | class B: 16 | def __init__(self, c): 17 | self.c = c 18 | 19 | class C: 20 | def __init__(self): 21 | self.d = "hi" 22 | 23 | c = C() 24 | b = B(c) 25 | a = A(b) 26 | 27 | attrs = ["", "b", "b.c", "b.c.d"] 28 | expected = [a, b, c, "hi"] 29 | for attr, exp in zip(attrs, expected): 30 | assert get_attr(a, attr) == exp 31 | 32 | 33 | def test_attr_no_exists(): 34 | a = "hi" 35 | 36 | assert get_attr(a, "b") is None 37 | 38 | 39 | def test_attr_no_exists_raise(): 40 | a = "hi" 41 | 42 | with pytest.raises(AttributeError): 43 | get_attr(a, "b", fail_on_not_found=True) 44 | -------------------------------------------------------------------------------- /test/unit_test/common/test_retry.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import pytest 6 | 7 | from olive.common.utils import retry_func 8 | 9 | # pylint: disable=global-statement 10 | 11 | 12 | num_tries = 0 13 | 14 | 15 | def fail_with_key_error(): 16 | global num_tries 17 | if num_tries == 0: 18 | num_tries += 1 19 | raise KeyError("This is a key error") 20 | else: 21 | return True 22 | 23 | 24 | def return_args(*args, **kwargs): 25 | return args, kwargs 26 | 27 | 28 | @pytest.mark.parametrize("exceptions", [KeyError, (KeyError, ValueError), Exception]) 29 | def test_success(exceptions): 30 | global num_tries 31 | num_tries = 0 32 | assert retry_func(fail_with_key_error, max_tries=2, delay=1, exceptions=exceptions) 33 | assert num_tries == 1 34 | 35 | 36 | def test_failure(): 37 | global num_tries 38 | num_tries = 0 39 | with pytest.raises(KeyError): 40 | retry_func(fail_with_key_error, max_tries=1, delay=1) 41 | 42 | 43 | def test_args(): 44 | assert retry_func(return_args, [1, 2, 3], {"a": 4, "b": 5}) == ((1, 2, 3), {"a": 4, "b": 5}) 45 | 46 | 47 | def test_different_exceptions(): 48 | global num_tries 49 | num_tries = 0 50 | with pytest.raises(KeyError): 51 | retry_func(fail_with_key_error, max_tries=2, delay=1, exceptions=ValueError) 52 | -------------------------------------------------------------------------------- /test/unit_test/conftest.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import shutil 6 | 7 | import pytest 8 | 9 | from test.unit_test.utils import create_onnx_model_file, delete_onnx_model_files 10 | 11 | 12 | @pytest.fixture(scope="session", autouse=True) 13 | def setup_onnx_model(request, tmp_path_factory): 14 | cache_path = tmp_path_factory.mktemp("transformers_cache") 15 | import transformers 16 | 17 | # we cannot use os.environ["TRANSFORMERS_CACHE"] = str(cache_path) 18 | # because the TRANSFORMERS_CACHE is loaded when importing transformers 19 | transformers.utils.hub.TRANSFORMERS_CACHE = str(cache_path) 20 | 21 | from datasets import disable_caching 22 | 23 | disable_caching() 24 | create_onnx_model_file() 25 | yield 26 | delete_onnx_model_files() 27 | shutil.rmtree(cache_path, ignore_errors=True) 28 | -------------------------------------------------------------------------------- /test/unit_test/data_container/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/engine/packaging/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/engine/packaging/code/score.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/evaluator/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/hardware/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/model/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/model/user_script.py: -------------------------------------------------------------------------------- 1 | from test.unit_test.utils import get_pytorch_model 2 | 3 | 4 | def load_decoder_model(model_path): 5 | return get_pytorch_model().load_model() 6 | 7 | 8 | def load_decoder_with_past_model(model_path): 9 | return get_pytorch_model().load_model() 10 | 11 | 12 | def decoder_with_past_inputs(model): 13 | pass 14 | 15 | 16 | def decoder_inputs(model): 17 | pass 18 | -------------------------------------------------------------------------------- /test/unit_test/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/common/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/common/test_user_script.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from olive.hardware import DEFAULT_CPU_ACCELERATOR 6 | from olive.passes.onnx.session_params_tuning import OrtSessionParamsTuning 7 | 8 | 9 | class TestUserScriptConfig: 10 | def test_no_config(self): 11 | config = OrtSessionParamsTuning.generate_config(DEFAULT_CPU_ACCELERATOR, disable_search=True) 12 | assert config 13 | assert OrtSessionParamsTuning.validate_config(config, DEFAULT_CPU_ACCELERATOR) 14 | -------------------------------------------------------------------------------- /test/unit_test/passes/inc/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/onnx/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/onnx/test_float16_conversion.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import onnx 6 | import pytest 7 | 8 | from olive.passes.olive_pass import create_pass_from_dict 9 | from olive.passes.onnx.float16_conversion import OnnxFloatToFloat16 10 | from test.unit_test.utils import get_onnx_model 11 | 12 | 13 | @pytest.mark.parametrize("keep_io_types", [True, False]) 14 | def test_onnxfloattofloat16(keep_io_types, tmp_path): 15 | # setup 16 | # this is a simple model with a single Gemm node 17 | input_model = get_onnx_model() 18 | p = create_pass_from_dict(OnnxFloatToFloat16, {"keep_io_types": keep_io_types}, disable_search=True) 19 | output_folder = str(tmp_path / "onnx") 20 | 21 | # execute 22 | output_model = p.run(input_model, output_folder) 23 | 24 | # assert 25 | # check that the input and output types are as expected 26 | io_config = output_model.io_config 27 | for io_type in [*io_config["input_types"], *io_config["output_types"]]: 28 | assert io_type == ("float32" if keep_io_types else "float16") 29 | 30 | # check that the model initializer types are float16 31 | for initializer in output_model.load_model().graph.initializer: 32 | assert initializer.data_type == onnx.TensorProto.FLOAT16 33 | -------------------------------------------------------------------------------- /test/unit_test/passes/onnx/test_mixed_precision.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | from olive.passes.olive_pass import create_pass_from_dict 7 | from olive.passes.onnx.mixed_precision import OrtMixedPrecision 8 | from test.unit_test.utils import get_onnx_model 9 | 10 | 11 | def test_ort_mixed_precision_pass(tmp_path): 12 | # setup 13 | input_model = get_onnx_model() 14 | p = create_pass_from_dict(OrtMixedPrecision, {}, disable_search=True) 15 | output_folder = str(tmp_path / "onnx") 16 | 17 | # execute 18 | p.run(input_model, output_folder) 19 | -------------------------------------------------------------------------------- /test/unit_test/passes/onnx/test_model_builder.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from pathlib import Path 6 | 7 | import pytest 8 | 9 | from olive.model import ONNXModelHandler 10 | from olive.passes.olive_pass import create_pass_from_dict 11 | from olive.passes.onnx.model_builder import ModelBuilder 12 | from test.unit_test.utils import make_local_tiny_llama 13 | 14 | 15 | @pytest.mark.parametrize("metadata_only", [True, False]) 16 | def test_model_builder(tmp_path, metadata_only): 17 | input_model = make_local_tiny_llama(tmp_path / "input_model", "onnx" if metadata_only else "hf") 18 | 19 | p = create_pass_from_dict(ModelBuilder, {"precision": "fp32", "metadata_only": metadata_only}, disable_search=True) 20 | output_folder = tmp_path / "output_model" 21 | 22 | # execute the pass 23 | output_model = p.run(input_model, output_folder) 24 | 25 | # assert 26 | assert isinstance(output_model, ONNXModelHandler) 27 | assert Path(output_model.model_path).exists() 28 | assert Path(output_folder / "genai_config.json").exists() 29 | -------------------------------------------------------------------------------- /test/unit_test/passes/onnx/test_qnn_mixed_precision_overrides.py: -------------------------------------------------------------------------------- 1 | from olive.passes.olive_pass import create_pass_from_dict 2 | from olive.passes.onnx.mixed_precision_overrides import MixedPrecisionOverrides 3 | from test.unit_test.utils import get_onnx_model 4 | 5 | 6 | def test_qnn_mixed_precision_overrides(tmp_path): 7 | input_model = get_onnx_model() 8 | p = create_pass_from_dict( 9 | MixedPrecisionOverrides, 10 | { 11 | "overrides_config": { 12 | "/fc1/Gemm_output_0": "QUInt16", 13 | } 14 | }, 15 | disable_search=True, 16 | ) 17 | out = p.run(input_model, tmp_path) 18 | assert out == input_model 19 | -------------------------------------------------------------------------------- /test/unit_test/passes/openvino/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/openvino/user_script.py: -------------------------------------------------------------------------------- 1 | from test.unit_test.utils import get_pytorch_model, get_pytorch_model_dummy_input 2 | 3 | 4 | def get_dummy_input(): 5 | input_model = get_pytorch_model() 6 | return get_pytorch_model_dummy_input(input_model) 7 | 8 | 9 | def get_input(): 10 | return [[1, 1]] 11 | -------------------------------------------------------------------------------- /test/unit_test/passes/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/pytorch/test_autoawq.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | from pathlib import Path 6 | 7 | import pytest 8 | import torch 9 | 10 | from olive.hardware.accelerator import AcceleratorSpec, Device 11 | from olive.model import HfModelHandler 12 | from olive.passes.olive_pass import create_pass_from_dict 13 | from olive.passes.pytorch.autoawq import AutoAWQQuantizer 14 | 15 | 16 | @pytest.mark.skipif( 17 | not torch.cuda.is_available(), 18 | reason="awq requires GPU.", 19 | ) 20 | def test_awq(tmp_path: Path): 21 | # setup 22 | input_model = HfModelHandler(model_path="facebook/opt-125m", load_kwargs={"use_safetensors": False}) 23 | 24 | p = create_pass_from_dict( 25 | AutoAWQQuantizer, 26 | disable_search=True, 27 | accelerator_spec=AcceleratorSpec(accelerator_type=Device.GPU, execution_provider="CUDAExecutionProvider"), 28 | ) 29 | awq_out_folder = str(tmp_path / "awq") 30 | 31 | # execute 32 | out = p.run(input_model, awq_out_folder) 33 | 34 | # assert 35 | assert isinstance(out, HfModelHandler) 36 | 37 | from transformers import OPTForCausalLM 38 | 39 | assert isinstance(out.load_model(), OPTForCausalLM) 40 | -------------------------------------------------------------------------------- /test/unit_test/passes/qnn/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/passes/test_pass_serialization.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import pytest 6 | 7 | from olive.hardware import DEFAULT_CPU_ACCELERATOR 8 | from olive.passes.olive_pass import FullPassConfig 9 | from olive.passes.onnx.conversion import OnnxConversion 10 | 11 | 12 | @pytest.mark.parametrize("host_device", [None, "cpu", "gpu"]) 13 | def test_pass_serialization(host_device): 14 | config = OnnxConversion.generate_config(DEFAULT_CPU_ACCELERATOR) 15 | onnx_conversion = OnnxConversion(DEFAULT_CPU_ACCELERATOR, config, host_device=host_device) 16 | json = onnx_conversion.to_json(True) 17 | 18 | cfg = FullPassConfig.from_json(json) 19 | p = cfg.create_pass() 20 | assert isinstance(p, OnnxConversion) 21 | assert p.accelerator_spec == DEFAULT_CPU_ACCELERATOR 22 | assert p.config == config 23 | assert p.host_device == host_device 24 | -------------------------------------------------------------------------------- /test/unit_test/passes/vitis_ai/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/resource_path/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/snpe/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/systems/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/systems/azureml/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/systems/azureml/data_dir/datafile.json: -------------------------------------------------------------------------------- 1 | { } 2 | -------------------------------------------------------------------------------- /test/unit_test/systems/azureml/output_metrics/pipeline_output/named-outputs/accuracy/metric_result.json: -------------------------------------------------------------------------------- 1 | { 2 | "accuracy-accuracy_score": { "value": 0.99618, "priority": 1, "higher_is_better": true }, 3 | "accuracy-f1_score": { "value": 0.99618, "priority": -1, "higher_is_better": true }, 4 | "accuracy-precision": { "value": 0.99618, "priority": -1, "higher_is_better": true }, 5 | "accuracy-recall": { "value": 0.99618, "priority": -1, "higher_is_better": true }, 6 | "accuracy-auroc": { "value": 0.99618, "priority": -1, "higher_is_better": true } 7 | } 8 | -------------------------------------------------------------------------------- /test/unit_test/systems/azureml/output_metrics/pipeline_output/named-outputs/latency/metric_result.json: -------------------------------------------------------------------------------- 1 | { 2 | "latency-avg": { "value": 0.031415, "priority": 1, "higher_is_better": false }, 3 | "latency-max": { "value": 0.031415, "priority": -1, "higher_is_better": false }, 4 | "latency-min": { "value": 0.031415, "priority": -1, "higher_is_better": false }, 5 | "latency-p50": { "value": 0.031415, "priority": -1, "higher_is_better": false }, 6 | "latency-p75": { "value": 0.031415, "priority": -1, "higher_is_better": false }, 7 | "latency-p90": { "value": 0.031415, "priority": -1, "higher_is_better": false }, 8 | "latency-p95": { "value": 0.031415, "priority": -1, "higher_is_better": false }, 9 | "latency-p99": { "value": 0.031415, "priority": -1, "higher_is_better": false }, 10 | "latency-p999": { "value": 0.031415, "priority": -1, "higher_is_better": false } 11 | } 12 | -------------------------------------------------------------------------------- /test/unit_test/systems/azureml/script_dir/user_script.py: -------------------------------------------------------------------------------- 1 | # Test file for test__create_data_script_inputs_and_args 2 | -------------------------------------------------------------------------------- /test/unit_test/systems/docker/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/systems/docker/output_local_path/eval_res.json: -------------------------------------------------------------------------------- 1 | { 2 | "accuracy-accuracy_score": { "value": 0.99618, "priority": 1, "higher_is_better": true }, 3 | "accuracy-f1_score": { "value": 0.99618, "priority": 1, "higher_is_better": true }, 4 | "accuracy-precision": { "value": 0.99618, "priority": 1, "higher_is_better": true }, 5 | "accuracy-recall": { "value": 0.99618, "priority": 1, "higher_is_better": true }, 6 | "accuracy-auroc": { "value": 0.99618, "priority": 1, "higher_is_better": true } 7 | } 8 | -------------------------------------------------------------------------------- /test/unit_test/systems/isolated_ort/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/systems/python_environment/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/systems/test_utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | import json 6 | from unittest.mock import patch 7 | 8 | from olive.systems.utils.available_providers_runner import main as available_providers_main 9 | 10 | 11 | @patch("onnxruntime.get_available_providers") 12 | def test_available_providers_runner(mock_get_providers, tmp_path): 13 | mock_get_providers.return_value = ["DummyExecutionProvider"] 14 | output_path = tmp_path / "available_eps.json" 15 | 16 | # command 17 | args = ["--output_path", str(output_path)] 18 | 19 | # execute 20 | available_providers_main(args) 21 | 22 | # assert 23 | assert output_path.exists() 24 | mock_get_providers.assert_called_once() 25 | with output_path.open("r") as f: 26 | assert json.load(f) == ["DummyExecutionProvider"] 27 | -------------------------------------------------------------------------------- /test/unit_test/test_package_config.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | 6 | from olive.package_config import OlivePackageConfig 7 | 8 | 9 | class TestPackageConfig: 10 | def test_passes_configuration(self): 11 | package_config = OlivePackageConfig.load_default_config() 12 | for pass_module_name, pass_module_config in package_config.passes.items(): 13 | assert pass_module_config.module_path 14 | assert pass_module_config.module_path[-len(pass_module_name) :].lower() == pass_module_name 15 | package_config.import_pass_module(pass_module_name) 16 | -------------------------------------------------------------------------------- /test/unit_test/workflows/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft Corporation. All rights reserved. 3 | # Licensed under the MIT License. 4 | # -------------------------------------------------------------------------- 5 | -------------------------------------------------------------------------------- /test/unit_test/workflows/mock_data/default_engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "HfModel", 4 | "model_path": "hf-internal-testing/tiny-random-BertForSequenceClassification", 5 | "task": "text-classification" 6 | }, 7 | "passes": { "conversion": { "type": "OnnxConversion" } } 8 | } 9 | -------------------------------------------------------------------------------- /test/unit_test/workflows/mock_data/dependency_setup.json: -------------------------------------------------------------------------------- 1 | { 2 | "input_model": { 3 | "type": "PyTorchModel", 4 | "model_path": "dummy_model.pt", 5 | "io_config": { "input_names": [ "x" ], "input_shapes": [ [ 1, 2, 3 ] ], "output_names": [ "y" ] } 6 | }, 7 | "systems": { "local_system": { "type": "LocalSystem", "accelerators": [ { "device": "gpu" } ] } }, 8 | "passes": { 9 | "onnx_conversion": { "type": "OnnxConversion" }, 10 | "session_params_tuning": { "type": "OrtSessionParamsTuning" } 11 | }, 12 | "host": "local_system", 13 | "target": "local_system" 14 | } 15 | -------------------------------------------------------------------------------- /test/unit_test/workflows/mock_data/readymade_system.json: -------------------------------------------------------------------------------- 1 | { 2 | "azureml_client": { 3 | "subscription_id": "my_subscription_id", 4 | "resource_group": "my_resource_group", 5 | "workspace_name": "my_workspace" 6 | }, 7 | "input_model": { "type": "PyTorchModel", "model_path": "models/resnet_trained_for_cifar10.pt" }, 8 | "systems": { 9 | "azureml_system": { 10 | "type": "AzureNDV2System", 11 | "config": { 12 | "accelerators": [ { "device": "gpu", "execution_providers": [ "CUDAExecutionProvider" ] } ], 13 | "aml_compute": "gpu-cluster", 14 | "aml_docker_config": { 15 | "base_image": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu22.04", 16 | "conda_file_path": "conda.yaml" 17 | }, 18 | "is_dev": true 19 | } 20 | } 21 | }, 22 | "passes": { "onnx_conversion": { "type": "OnnxConversion" } }, 23 | "engine": { "host": "azureml_system", "target": "azureml_system" } 24 | } 25 | --------------------------------------------------------------------------------