├── tests ├── __init__.py ├── exporters │ ├── __init__.py │ ├── onnx │ │ └── __init__.py │ ├── tflite │ │ └── __init__.py │ └── Dockerfile_exporters_gpu ├── assets │ ├── hub │ │ └── config.json │ └── onnx │ │ ├── model.onnx │ │ └── config.json ├── run_doctest.sh ├── README.md ├── gptq │ └── Dockerfile_quantization_gpu ├── bettertransformer │ └── Dockerfile_bettertransformer_gpu ├── onnxruntime │ ├── docker │ │ └── Dockerfile_onnxruntime_gpu │ ├── ds_configs │ │ ├── ds_config_zero_stage_inifinity.json │ │ ├── ds_config_zero_stage_1.json │ │ ├── ds_config_zero_stage_2.json │ │ └── ds_config_zero_stage_3.json │ └── test_utils.py ├── cli │ └── cli_with_custom_command.py ├── test_modeling_base.py ├── onnx │ └── test_onnx_export_custom_module.py └── benchmark │ └── memory_tracker.py ├── optimum ├── onnxruntime │ ├── models │ │ ├── __init__.py │ │ └── bloom.py │ ├── runs │ │ └── utils.py │ ├── io_binding │ │ └── __init__.py │ ├── preprocessors │ │ ├── __init__.py │ │ ├── passes │ │ │ ├── __init__.py │ │ │ ├── fully_connected.py │ │ │ ├── gelu.py │ │ │ └── layernorm.py │ │ └── quantization.py │ ├── constants.py │ ├── graph.py │ └── training_args_seq2seq.py ├── utils │ ├── dummy_bettertransformer_objects.py │ ├── constant.py │ ├── preprocessing │ │ ├── __init__.py │ │ └── task_processors_manager.py │ ├── modeling_utils.py │ ├── doc.py │ ├── dummy_diffusers_objects.py │ └── __init__.py ├── commands │ ├── register │ │ ├── __init__.py │ │ └── README.md │ ├── export │ │ ├── __init__.py │ │ └── base.py │ ├── onnxruntime │ │ ├── __init__.py │ │ └── base.py │ ├── __init__.py │ └── env.py ├── version.py ├── gptq │ ├── __init__.py │ └── constants.py ├── fx │ ├── __init__.py │ ├── quantization │ │ └── __init__.py │ ├── optimization │ │ └── __init__.py │ └── utils.py ├── exporters │ ├── __init__.py │ ├── base.py │ ├── error_utils.py │ ├── onnx │ │ ├── constants.py │ │ └── __init__.py │ └── tflite │ │ ├── __init__.py │ │ └── config.py ├── bettertransformer │ └── __init__.py ├── pipelines │ ├── __init__.py │ └── diffusers │ │ └── watermark.py ├── quantization_base.py ├── onnx │ └── __init__.py └── conftest.py ├── docs ├── source │ ├── notebooks.md │ ├── nvidia_overview.mdx │ ├── torch_fx │ │ ├── concept_guides │ │ │ └── symbolic_tracer.mdx │ │ ├── package_reference │ │ │ └── optimization.mdx │ │ └── overview.mdx │ ├── onnxruntime │ │ ├── package_reference │ │ │ ├── optimization.mdx │ │ │ ├── quantization.mdx │ │ │ ├── trainer.mdx │ │ │ └── configuration.mdx │ │ ├── concept_guides │ │ │ └── onnx.mdx │ │ └── overview.mdx │ ├── exporters │ │ ├── overview.mdx │ │ ├── tflite │ │ │ ├── package_reference │ │ │ │ ├── export.mdx │ │ │ │ └── configuration.mdx │ │ │ ├── usage_guides │ │ │ │ └── contribute.mdx │ │ │ └── overview.mdx │ │ └── onnx │ │ │ └── package_reference │ │ │ ├── export.mdx │ │ │ └── configuration.mdx │ ├── _redirects.yml │ └── utils │ │ ├── dummy_input_generators.mdx │ │ └── normalized_config.mdx ├── Dockerfile └── conftest.py ├── examples └── onnxruntime │ ├── quantization │ ├── image-classification │ │ ├── requirements.txt │ │ └── README.md │ ├── question-answering │ │ ├── requirements.txt │ │ └── README.md │ ├── token-classification │ │ ├── requirements.txt │ │ └── README.md │ ├── multiple-choice │ │ ├── requirements.txt │ │ └── README.md │ └── text-classification │ │ ├── requirements.txt │ │ └── README.md │ ├── optimization │ ├── question-answering │ │ ├── requirements.txt │ │ └── README.md │ ├── token-classification │ │ ├── requirements.txt │ │ └── README.md │ ├── multiple-choice │ │ ├── requirements.txt │ │ └── README.md │ └── text-classification │ │ ├── requirements.txt │ │ └── README.md │ └── training │ ├── text-classification │ ├── requirements.txt │ └── zero_stage_2.json │ ├── translation │ └── requirements.txt │ ├── question-answering │ ├── requirements.txt │ └── README.md │ ├── stable-diffusion │ └── text-to-image │ │ └── requirements.txt │ ├── image-classification │ ├── requirements.txt │ └── README.md │ ├── token-classification │ ├── requirements.txt │ └── README.md │ ├── summarization │ ├── requirements.txt │ └── README.md │ ├── language-modeling │ ├── requirements.txt │ └── README.md │ └── docker │ └── Dockerfile-ort-nightly-rocm57 ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── feature-request.yml │ └── bug-report.yml ├── workflows │ ├── upload_pr_documentation.yml │ ├── test_bettertransformer_gpu.yml │ ├── doctests.yml │ ├── test_exporters_common.yml │ ├── test_onnx.yml │ ├── test_onnxruntime_slow.yml │ ├── test_export_onnx_timm.yml │ ├── test_onnxruntime_train.yml │ ├── test_fx.yml │ ├── test_export_onnx_cli_timm.yml │ ├── test_export_onnx_cli.yml │ ├── test_exporters_gpu.yml │ ├── test_gptq.yml │ ├── test_export_tflite.yml │ ├── test_onnxruntime_gpu.yml │ ├── test_export_tflite_cli.yml │ ├── dev_test_exporters.yml │ ├── test_export_tflite_cli_quantization_fp16.yml │ ├── test_benckmark.yml │ ├── dev_test_onnx.yml │ ├── test_export_tflite_cli_dynamic_quantization_int8.yml │ ├── test_dummy_inputs.yml │ ├── test_export_tflite_cli_quantization_int8x16.yml │ ├── dev_test_benckmark.yml │ ├── test_cli.yml │ ├── test_export_tflite_cli_quantization_full_int8.yml │ ├── dev_test_dummy_inputs.yml │ ├── test_export_tflite_cli_quantization_int8_custom_dataset.yml │ ├── test_export_tflite_cli_quantization_int8_default_dataset.yml │ ├── dev_test_fx.yml │ ├── test_exporters_slow.yml │ ├── dev_test_bettertransformer.yml │ ├── dev_test_onnxruntime.yml │ ├── test_export_onnx.yml │ ├── check_code_quality.yml │ ├── test_bettertransformer.yml │ ├── test_optimum_common.yml │ ├── test_offline.yml │ └── dev_test_optimum_common.yml ├── PULL_REQUEST_TEMPLATE.md └── generate_dev_tests.py ├── setup.cfg ├── MANIFEST.in ├── pyproject.toml ├── Makefile └── .gitignore /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/exporters/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/exporters/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/exporters/tflite/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /optimum/onnxruntime/models/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tests/assets/hub/config.json: -------------------------------------------------------------------------------- 1 | {"from_local":true} -------------------------------------------------------------------------------- /docs/source/notebooks.md: -------------------------------------------------------------------------------- 1 | ../../notebooks/README.md -------------------------------------------------------------------------------- /tests/assets/onnx/model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/merveenoyan/optimum/main/tests/assets/onnx/model.onnx -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.5.0 2 | torchvision>=0.6.0 3 | datasets>=1.17.0 4 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | torch >= 1.9.0 3 | onnx 4 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | torch >= 1.9.0 3 | onnx 4 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | seqeval 2 | datasets >= 1.18.0 3 | torch >= 1.9 4 | onnx 5 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | seqeval 2 | datasets >= 1.8.0 3 | torch >= 1.9 4 | onnx 5 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/training/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | -------------------------------------------------------------------------------- /docs/source/nvidia_overview.mdx: -------------------------------------------------------------------------------- 1 | # 🤗 Optimum Nvidia 2 | 3 | Find more information about 🤗 Optimum Nvidia [here](https://github.com/huggingface/optimum-nvidia). 4 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/translation/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.0 2 | sentencepiece != 0.1.92 3 | protobuf 4 | sacrebleu >= 1.4.12 5 | py7zr 6 | torch >= 1.8 -------------------------------------------------------------------------------- /docs/source/torch_fx/concept_guides/symbolic_tracer.mdx: -------------------------------------------------------------------------------- 1 | # Symbolic tracer 2 | 3 | In Torch FX, the symbolic tracer feeds dummy values through the code to record the underlying operations. -------------------------------------------------------------------------------- /examples/onnxruntime/training/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9.0 7 | torch-ort 8 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/training/stable-diffusion/text-to-image/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.16.0 2 | transformers>=4.25.1 3 | datasets 4 | git+https://github.com/huggingface/diffusers 5 | ftfy 6 | tensorboard 7 | Jinja2 8 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.12.0 2 | torch>=1.5.0 3 | torchvision>=0.6.0 4 | datasets>=1.17.0 5 | evaluate 6 | onnx>=1.9.0 7 | onnxruntime-training>=1.9.0 8 | torch-ort 9 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.3 2 | scipy 3 | scikit-learn 4 | sentencepiece != 0.1.92 5 | seqeval 6 | torch >= 1.8.1 7 | seqeval 8 | sentencepiece != 0.1.92 9 | torch >= 1.9 10 | torch-ort 11 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | evaluate 3 | datasets >= 1.8.0 4 | sentencepiece != 0.1.92 5 | scipy 6 | scikit-learn 7 | protobuf 8 | rouge-score 9 | nltk 10 | py7zr 11 | torch >= 1.9.0 12 | torch-ort 13 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf == 3.20.2 6 | torch >= 1.9.0 7 | transformers>=4.16.0 8 | onnx>=1.9.0 9 | onnxruntime-training>=1.9.0 10 | torch-ort 11 | -------------------------------------------------------------------------------- /optimum/utils/dummy_bettertransformer_objects.py: -------------------------------------------------------------------------------- 1 | from .import_utils import DummyObject, requires_backends 2 | 3 | 4 | class BarkSelfAttention(metaclass=DummyObject): 5 | _backends = ["transformers_431"] 6 | 7 | def __init__(self, *args, **kwargs): 8 | requires_backends(self, ["transformers_431"]) 9 | -------------------------------------------------------------------------------- /tests/run_doctest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pip install accelerate 3 | pip install .[benchmark] 4 | touch optimum/__init__.py 5 | python tests/utils/prepare_for_doc_test.py optimum docs 6 | pytest --verbose -s --doctest-modules $(cat tests/utils/documentation_tests.txt) --doctest-continue-on-failure --doctest-glob='*.mdx' 7 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 2.1 3 | contact_links: 4 | - name: Website Related 5 | url: https://github.com/huggingface/hub-docs/issues 6 | about: Feature requests and bug reports related to the website 7 | - name: Forum 8 | url: https://discuss.huggingface.co/ 9 | about: General usage questions and community discussions -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = optimum 7 | line_length = 119 8 | lines_after_imports = 2 9 | multi_line_output = 3 10 | use_parentheses = True 11 | 12 | [flake8] 13 | ignore = E203, E501, E741, W503, W605 14 | max-line-length = 119 15 | 16 | [tool:pytest] 17 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS 18 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/text-classification/zero_stage_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "steps_per_print": 100, 3 | "zero_optimization": { 4 | "stage": 2 5 | }, 6 | "zero_allow_untested_optimizer": true, 7 | "fp16": { 8 | "enabled": true, 9 | "initial_scale_power": 12 10 | }, 11 | "tensorboard":{ 12 | "enabled": false 13 | }, 14 | "train_micro_batch_size_per_gpu": "auto", 15 | "gradient_accumulation_steps": "auto" 16 | } -------------------------------------------------------------------------------- /docs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nikolaik/python-nodejs:python3.8-nodejs18 2 | 3 | ARG commit_sha 4 | ARG clone_url 5 | 6 | RUN apt -y update 7 | RUN python3 -m pip install --no-cache-dir --upgrade pip 8 | RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder.git 9 | 10 | RUN git clone $clone_url && cd optimum && git checkout $commit_sha 11 | RUN python3 -m pip install --no-cache-dir ./optimum[onnxruntime,benchmark,quality,exporters-tf,doc-build,diffusers] 12 | -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Upload PR Documentation 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Build PR documentation"] 6 | types: 7 | - completed 8 | 9 | jobs: 10 | build: 11 | uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main 12 | with: 13 | package_name: optimum 14 | secrets: 15 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} 16 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} 17 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Helpful tips for testing & debugging optimum 2 | 3 | ## VSCODE 4 | 5 | If you are using vscode you might have hard time discovering the test for the "testing" menu to run tests individually or debug them. You can copy the snippet below into `.vscode/settings.json`. 6 | 7 | ```json 8 | { 9 | "python.testing.pytestArgs": [ 10 | "tests/onnxruntime", 11 | "tests/test_*" 12 | ], 13 | "python.testing.unittestEnabled": false, 14 | "python.testing.pytestEnabled": true 15 | } 16 | ``` 17 | 18 | This snippet will discover all base tests and the tests inside the `tests/onnxruntime` folder. 19 | -------------------------------------------------------------------------------- /optimum/commands/register/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | include README.md 16 | include LICENSE 17 | -------------------------------------------------------------------------------- /optimum/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __version__ = "1.20.0.dev0" 16 | -------------------------------------------------------------------------------- /optimum/onnxruntime/runs/utils.py: -------------------------------------------------------------------------------- 1 | from ..modeling_decoder import ORTModelForCausalLM 2 | from ..modeling_ort import ( 3 | ORTModelForFeatureExtraction, 4 | ORTModelForImageClassification, 5 | ORTModelForQuestionAnswering, 6 | ORTModelForSequenceClassification, 7 | ORTModelForTokenClassification, 8 | ) 9 | 10 | 11 | task_ortmodel_map = { 12 | "text-generation": ORTModelForCausalLM, 13 | "feature-extraction": ORTModelForFeatureExtraction, 14 | "image-classification": ORTModelForImageClassification, 15 | "question-answering": ORTModelForQuestionAnswering, 16 | "text-classification": ORTModelForSequenceClassification, 17 | "token-classification": ORTModelForTokenClassification, 18 | } 19 | -------------------------------------------------------------------------------- /optimum/gptq/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .quantizer import GPTQQuantizer, load_quantized_model 16 | -------------------------------------------------------------------------------- /optimum/fx/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from . import optimization, quantization 16 | -------------------------------------------------------------------------------- /optimum/fx/quantization/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .functions import fuse_fx, prepare_fx, prepare_qat_fx 15 | -------------------------------------------------------------------------------- /optimum/onnxruntime/io_binding/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .io_binding_helper import IOBindingHelper, TypeHelper 15 | -------------------------------------------------------------------------------- /optimum/exporters/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from . import onnx # noqa 16 | from .tasks import TasksManager # noqa 17 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .quantization import PreprocessorPass, QuantizationPreprocessor 16 | -------------------------------------------------------------------------------- /.github/workflows/test_bettertransformer_gpu.yml: -------------------------------------------------------------------------------- 1 | name: BetterTransformer GPU / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 1 */3 * * # at 1am every 3 days 7 | 8 | jobs: 9 | do-the-job: 10 | name: Start self-hosted EC2 runner 11 | runs-on: [single-gpu, nvidia-gpu, t4, ci] 12 | env: 13 | AWS_REGION: us-east-1 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v2 17 | - name: Build image 18 | run: | 19 | docker build -f tests/bettertransformer/Dockerfile_bettertransformer_gpu -t bettertransformer-gpu . 20 | - name: Test with unittest within docker container 21 | run: | 22 | docker run --rm --gpus all --workdir=/workspace/optimum/tests bettertransformer-gpu:latest 23 | -------------------------------------------------------------------------------- /optimum/bettertransformer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace and Meta Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from .models import BetterTransformerManager 15 | from .transformation import BetterTransformer 16 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/optimization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Optimization 14 | 15 | ## ORTOptimizer 16 | 17 | [[autodoc]] onnxruntime.optimization.ORTOptimizer 18 | - all -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/quantization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Quantization 14 | 15 | ## ORTQuantizer 16 | 17 | [[autodoc]] onnxruntime.quantization.ORTQuantizer 18 | - all 19 | -------------------------------------------------------------------------------- /optimum/exporters/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Base exporters config.""" 16 | 17 | from abc import ABC 18 | 19 | 20 | class ExportConfig(ABC): 21 | pass 22 | -------------------------------------------------------------------------------- /optimum/commands/export/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from .base import ExportCommand 17 | from .onnx import ONNXExportCommand 18 | from .tflite import TFLiteExportCommand 19 | -------------------------------------------------------------------------------- /.github/workflows/doctests.yml: -------------------------------------------------------------------------------- 1 | name: Optimum common / Doctest 2 | # Note: this test uses transformers stable and optimum dev 3 | 4 | on: 5 | workflow_dispatch: 6 | schedule: 7 | - cron: 0 1 * * 0 # every sunday at 1am 8 | 9 | jobs: 10 | do-the-job: 11 | name: Start self-hosted EC2 runner 12 | runs-on: [single-gpu, nvidia-gpu, t4, ci] 13 | env: 14 | AWS_REGION: us-east-1 15 | steps: 16 | - name: Checkout 17 | uses: actions/checkout@v2 18 | - name: Build image 19 | run: | 20 | docker build -f tests/onnxruntime/Dockerfile_onnxruntime_gpu -t onnxruntime-gpu . 21 | - name: Test with unittest within docker container 22 | run: | 23 | docker run --rm --gpus all --workdir=/workspace/optimum/ onnxruntime-gpu:latest /bin/bash tests/run_doctest.sh 24 | -------------------------------------------------------------------------------- /optimum/commands/onnxruntime/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .base import ONNXRuntimeCommand 17 | from .optimize import ONNXRuntimeOptimizeCommand 18 | from .quantize import ONNXRuntimeQuantizeCommand 19 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .excluders import ExcludeNodeAfter, ExcludeNodeFollowedBy 16 | from .gelu import ExcludeGeLUNodes 17 | from .layernorm import ExcludeLayerNormNodes 18 | -------------------------------------------------------------------------------- /docs/source/exporters/overview.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Overview 14 | 15 | 🤗 Optimum enables exporting models from PyTorch or TensorFlow to different formats through its `exporters` module. For now, two exporting format are supported: ONNX and TFLite (TensorFlow Lite). 16 | -------------------------------------------------------------------------------- /optimum/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .pipelines_base import ( 17 | MAPPING_LOADING_FUNC, 18 | ORT_SUPPORTED_TASKS, 19 | load_bettertransformer, 20 | load_ort_pipeline, 21 | pipeline, 22 | ) 23 | -------------------------------------------------------------------------------- /docs/source/exporters/tflite/package_reference/export.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Export functions 14 | 15 | ## Main functions 16 | 17 | [[autodoc]] exporters.tflite.convert.export 18 | 19 | ## Utility functions 20 | 21 | [[autodoc]] exporters.tflite.convert.validate_model_outputs 22 | -------------------------------------------------------------------------------- /tests/assets/onnx/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "tiny-distilbert-classification", 3 | "activation": "gelu", 4 | "architectures": [ 5 | "DistilBertForSequenceClassification" 6 | ], 7 | "attention_dropout": 0.1, 8 | "dim": 2, 9 | "dropout": 0.1, 10 | "finetuning_task": "sst-2", 11 | "hidden_dim": 2, 12 | "id2label": { 13 | "0": "NEGATIVE", 14 | "1": "POSITIVE" 15 | }, 16 | "initializer_range": 0.02, 17 | "label2id": { 18 | "NEGATIVE": 0, 19 | "POSITIVE": 1 20 | }, 21 | "max_position_embeddings": 512, 22 | "model_type": "distilbert", 23 | "n_heads": 2, 24 | "n_layers": 2, 25 | "output_past": true, 26 | "pad_token_id": 0, 27 | "qa_dropout": 0.1, 28 | "seq_classif_dropout": 0.2, 29 | "sinusoidal_pos_embds": false, 30 | "tie_weights_": true, 31 | "torch_dtype": "float32", 32 | "transformers_version": "4.10.0.dev0", 33 | "vocab_size": 30522 34 | } 35 | -------------------------------------------------------------------------------- /optimum/gptq/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | SEQLEN_KEYS_TRANFORMERS = ["max_position_embeddings", "seq_length", "n_positions"] 16 | BLOCK_PATTERNS = [ 17 | "transformer.h", 18 | "model.decoder.layers", 19 | "gpt_neox.layers", 20 | "model.layers", 21 | ] 22 | 23 | GPTQ_CONFIG = "quantize_config.json" 24 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_common.yml: -------------------------------------------------------------------------------- 1 | name: Exporters Common / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9] 19 | os: [ubuntu-20.04] 20 | 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | pip install .[tests,exporters-tf] 31 | - name: Test with unittest 32 | working-directory: tests 33 | run: | 34 | pytest exporters/common/ -s --durations=0 35 | -------------------------------------------------------------------------------- /.github/workflows/test_onnx.yml: -------------------------------------------------------------------------------- 1 | name: ONNX / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9] 19 | os: [ubuntu-20.04, macos-13] 20 | 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | pip install .[tests,onnxruntime,exporters-tf] 31 | - name: Test with unittest 32 | working-directory: tests 33 | run: | 34 | python -m unittest discover -s onnx -p 'test_*.py' 35 | -------------------------------------------------------------------------------- /optimum/fx/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .transformations import ( # noqa 16 | ChangeTrueDivToMulByInverse, 17 | FuseBatchNorm1dInLinear, 18 | FuseBatchNorm2dInConv2d, 19 | FuseBiasInLinear, 20 | MergeLinears, 21 | ReversibleTransformation, 22 | Transformation, 23 | compose, 24 | ) 25 | -------------------------------------------------------------------------------- /optimum/onnxruntime/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ENCODER_ONNX_FILE_PATTERN = r"(.*)?encoder(.*)?\.onnx" 16 | DECODER_ONNX_FILE_PATTERN = r"(.*)?decoder((?!(with_past|merged)).)*?\.onnx" 17 | DECODER_WITH_PAST_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?with_past(.*)?\.onnx" 18 | DECODER_MERGED_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?merged(.*)?\.onnx" 19 | -------------------------------------------------------------------------------- /.github/workflows/test_onnxruntime_slow.yml: -------------------------------------------------------------------------------- 1 | name: ONNX Runtime slow / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 7 * * * # every day at 7am 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: [3.8, 3.9] 18 | os: [ubuntu-20.04] 19 | 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies for export 28 | run: | 29 | pip install .[tests,onnxruntime] 30 | - name: Test with unittest 31 | working-directory: tests 32 | run: | 33 | RUN_SLOW=1 pytest onnxruntime -s -m "run_slow" --durations=0 34 | -------------------------------------------------------------------------------- /tests/gptq/Dockerfile_quantization_gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 2 | CMD nvidia-smi 3 | 4 | # Ignore interactive questions during `docker build` 5 | ENV DEBIAN_FRONTEND noninteractive 6 | 7 | # Install and update tools to minimize security vulnerabilities 8 | RUN apt-get update 9 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 10 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev python3-pip && \ 11 | apt-get clean 12 | RUN unattended-upgrade 13 | RUN apt-get autoremove -y 14 | 15 | RUN python3 -m pip install -U pip 16 | 17 | RUN pip install torch torchvision torchaudio 18 | RUN pip install transformers accelerate auto-gptq datasets 19 | 20 | # Install Optimum 21 | COPY . /workspace/optimum 22 | RUN pip install /workspace/optimum[tests] 23 | 24 | ENV RUN_SLOW=1 25 | WORKDIR /workspace/optimum/tests/ 26 | CMD pytest gptq/test_*.py --durations=0 -s -vvvvv 27 | -------------------------------------------------------------------------------- /.github/workflows/test_export_onnx_timm.yml: -------------------------------------------------------------------------------- 1 | name: Exporters ONNX Timm / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 7 * * * # every day at 7am 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: [3.8, 3.9] 18 | os: [ubuntu-20.04] 19 | 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies for pytorch export 28 | run: | 29 | pip install .[tests,exporters] 30 | - name: Test with unittest 31 | working-directory: tests 32 | run: | 33 | RUN_SLOW=1 pytest exporters/onnx/ -s -n auto -k "timm" --durations=0 34 | 35 | -------------------------------------------------------------------------------- /optimum/commands/register/README.md: -------------------------------------------------------------------------------- 1 | # Register commands in the Optimum CLI from a subpackage 2 | 3 | It is possible to register a command in the Optimum CLI, either as a command or a subcommand of an already existing command. 4 | 5 | Steps to follow: 6 | 7 | 1. Create a command as a subclass of `optimum.commands.BaseOptimumCLICommand`. 8 | 2. Create a Python file under `optimum/commands/register/`, and define a `REGISTER_COMMANDS` list variable there. 9 | 3. Fill the `REGISTER_COMMANDS` as follows: 10 | 11 | ```python 12 | # CustomCommand1 and CustomCommand2 could also be defined in this file actually. 13 | from ..my_custom_commands import CustomCommand1, CustomCommand2 14 | from ..export import ExportCommand 15 | 16 | REGISTER_COMMANDS = [ 17 | # CustomCommand1 will be registered as a subcommand of the root Optimum CLI. 18 | CustomCommand1, 19 | # CustomCommand2 will be registered as a subcommand of the `optimum-cli export` command. 20 | (CustomCommand2, ExportCommand) # CustomCommand2 will be registered 21 | ] 22 | ``` 23 | -------------------------------------------------------------------------------- /tests/bettertransformer/Dockerfile_bettertransformer_gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 2 | CMD nvidia-smi 3 | 4 | # Ignore interactive questions during `docker build` 5 | ENV DEBIAN_FRONTEND noninteractive 6 | 7 | # Install and update tools to minimize security vulnerabilities 8 | RUN apt-get update 9 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 10 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev python3-pip && \ 11 | apt-get clean 12 | RUN unattended-upgrade 13 | RUN apt-get autoremove -y 14 | 15 | RUN python3 -m pip install -U pip 16 | 17 | RUN pip install torch torchvision torchaudio 18 | RUN pip install transformers accelerate datasets 19 | 20 | # Install Optimum 21 | COPY . /workspace/optimum 22 | RUN pip install /workspace/optimum[tests] 23 | 24 | ENV RUN_SLOW=1 25 | WORKDIR /workspace/optimum/tests/ 26 | CMD pytest bettertransformer/test_*.py -s --durations=0 -m gpu_test 27 | -------------------------------------------------------------------------------- /optimum/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseOptimumCLICommand, CommandInfo, RootOptimumCLICommand 16 | from .env import EnvironmentCommand 17 | from .export import ExportCommand, ONNXExportCommand, TFLiteExportCommand 18 | from .onnxruntime import ONNXRuntimeCommand, ONNXRuntimeOptimizeCommand, ONNXRuntimeQuantizeCommand 19 | from .optimum_cli import register_optimum_cli_subcommand 20 | -------------------------------------------------------------------------------- /optimum/utils/constant.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | CONFIG_NAME = "config.json" 17 | DIFFUSION_MODEL_UNET_SUBFOLDER = "unet" 18 | DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder" 19 | DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER = "vae_decoder" 20 | DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER = "vae_encoder" 21 | DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER = "text_encoder_2" 22 | ONNX_WEIGHTS_NAME = "model.onnx" 23 | -------------------------------------------------------------------------------- /.github/workflows/test_onnxruntime_train.yml: -------------------------------------------------------------------------------- 1 | name: ONNX Runtime / Test ORTTrainer 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 1 */3 * * # at 1am every 3 days 7 | pull_request: 8 | types: [opened, synchronize, reopened, labeled] 9 | 10 | jobs: 11 | do-the-job: 12 | if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'training')}} 13 | name: Run ORTTrainer test 14 | runs-on: [single-gpu, nvidia-gpu, t4, ci] 15 | env: 16 | AWS_REGION: us-east-1 17 | steps: 18 | - name: Checkout 19 | uses: actions/checkout@v2 20 | - name: Build image 21 | run: | 22 | docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer -t onnxruntime/train . 23 | - name: Run test within docker container 24 | run: | 25 | docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime/train:latest -------------------------------------------------------------------------------- /docs/source/exporters/tflite/package_reference/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration classes for TFLite export 14 | 15 | ## Base classes 16 | 17 | [[autodoc]] exporters.tflite.TFLiteConfig 18 | - inputs 19 | - outputs 20 | - generate_dummy_inputs 21 | 22 | ## Middle-end classes 23 | 24 | [[autodoc]] exporters.tflite.config.TextEncoderTFliteConfig 25 | 26 | [[autodoc]] exporters.tflite.config.VisionTFLiteConfig 27 | -------------------------------------------------------------------------------- /.github/workflows/test_fx.yml: -------------------------------------------------------------------------------- 1 | name: FX / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9] 19 | os: [ubuntu-20.04, macos-13] 20 | 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | pip install .[tests] 31 | pip install git+https://github.com/huggingface/transformers.git 32 | - name: Test with unittest 33 | working-directory: tests 34 | run: | 35 | python -m pytest fx/optimization/test_transformations.py --exitfirst 36 | -------------------------------------------------------------------------------- /.github/workflows/test_export_onnx_cli_timm.yml: -------------------------------------------------------------------------------- 1 | name: Exporters ONNX CLI Timm (scheduled) / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 7 * * * # every day at 7am 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: [3.8, 3.9] 18 | os: [ubuntu-20.04] 19 | 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies for pytorch export 28 | run: | 29 | pip install .[tests,exporters] 30 | - name: Test with unittest 31 | working-directory: tests 32 | run: | 33 | RUN_SLOW=1 pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -k "timm" -s --durations=0 34 | -------------------------------------------------------------------------------- /optimum/onnxruntime/graph.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import List 15 | 16 | from onnxruntime.transformers.onnx_model import OnnxModel 17 | 18 | 19 | def find_fully_connected_layers_nodes(model: OnnxModel) -> List[List[str]]: 20 | adds = model.get_nodes_by_op_type("Add") 21 | fc = list(filter(lambda graph: graph[1] is not None, ((add, model.match_parent(add, "MatMul")) for add in adds))) 22 | 23 | return fc 24 | -------------------------------------------------------------------------------- /.github/workflows/test_export_onnx_cli.yml: -------------------------------------------------------------------------------- 1 | name: Exporters ONNX CLI / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9] 19 | os: [ubuntu-20.04] 20 | 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies for pytorch export 29 | run: | 30 | pip install .[tests,exporters] 31 | - name: Test with unittest 32 | working-directory: tests 33 | run: | 34 | pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -m "not tensorflow_test and not timm_test" -s --durations=0 35 | -------------------------------------------------------------------------------- /optimum/quantization_base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABC, abstractmethod 3 | from pathlib import Path 4 | from typing import Optional, Union 5 | 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class OptimumQuantizer(ABC): 11 | @classmethod 12 | def from_pretrained( 13 | cls, 14 | model_or_path: Union[str, Path], 15 | file_name: Optional[str] = None, 16 | ): 17 | """Overwrite this method in subclass to define how to load your model from pretrained""" 18 | raise NotImplementedError( 19 | "Overwrite this method in subclass to define how to load your model from pretrained for quantization" 20 | ) 21 | 22 | @abstractmethod 23 | def quantize(self, save_dir: Union[str, Path], file_prefix: Optional[str] = None, **kwargs): 24 | """Overwrite this method in subclass to define how to quantize your model for quantization""" 25 | raise NotImplementedError( 26 | "Overwrite this method in subclass to define how to quantize your model for quantization" 27 | ) 28 | -------------------------------------------------------------------------------- /optimum/utils/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .base import Preprocessor, TaskProcessor 17 | from .image_classification import ImageClassificationProcessing 18 | from .question_answering import QuestionAnsweringProcessing 19 | from .task_processors_manager import TaskProcessorsManager 20 | from .text_classification import TextClassificationProcessing 21 | from .token_classification import TokenClassificationProcessing 22 | -------------------------------------------------------------------------------- /tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu: -------------------------------------------------------------------------------- 1 | # use version with CUDA 11.8 and TensorRT 8.5.1.7 to match ORT 1.14 requirements 2 | FROM nvcr.io/nvidia/tensorrt:22.12-py3 3 | CMD nvidia-smi 4 | 5 | # Ignore interactive questions during `docker build` 6 | ENV DEBIAN_FRONTEND noninteractive 7 | 8 | # Install and update tools to minimize security vulnerabilities 9 | RUN apt-get update 10 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 11 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \ 12 | apt-get clean 13 | RUN unattended-upgrade 14 | RUN apt-get autoremove -y 15 | 16 | RUN python -m pip install -U pip 17 | 18 | RUN pip install transformers torch onnxruntime-gpu 19 | RUN pip install datasets evaluate diffusers scipy 20 | 21 | # Install Optimum 22 | COPY . /workspace/optimum 23 | RUN pip install /workspace/optimum[onnxruntime-gpu,tests] 24 | 25 | ENV TEST_LEVEL=1 26 | CMD pytest onnxruntime/test_*.py --durations=0 -s -vvvvv -m cuda_ep_test -m trt_ep_test 27 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_gpu.yml: -------------------------------------------------------------------------------- 1 | name: Exporters / Test GPU 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 1 */3 * * # at 1am every 3 days 7 | pull_request: 8 | types: [labeled] 9 | # uncomment to enable on PR merge on main branch: 10 | #push: 11 | # branches: 12 | # - main 13 | 14 | jobs: 15 | do-the-job: 16 | if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} 17 | name: Start self-hosted EC2 runner 18 | runs-on: [single-gpu, nvidia-gpu, t4, ci] 19 | env: 20 | AWS_REGION: us-east-1 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v2 24 | - name: Build image 25 | run: | 26 | docker build -f tests/exporters/Dockerfile_exporters_gpu -t exporters-gpu . 27 | - name: Test with unittest within docker container 28 | run: | 29 | docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests exporters-gpu:latest 30 | -------------------------------------------------------------------------------- /.github/workflows/test_gptq.yml: -------------------------------------------------------------------------------- 1 | name: GPTQ Quantization / Test GPU 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 1 */3 * * # at 1am every 3 days 7 | pull_request: 8 | types: [opened, synchronize, reopened, labeled] 9 | # uncomment to enable on PR merge on main branch: 10 | #push: 11 | # branches: 12 | # - main 13 | 14 | jobs: 15 | do-the-job: 16 | if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} 17 | name: Start self-hosted EC2 runner 18 | runs-on: [single-gpu, nvidia-gpu, t4, ci] 19 | env: 20 | AWS_REGION: us-east-1 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v2 24 | - name: Build image 25 | run: | 26 | docker build -f tests/gptq/Dockerfile_quantization_gpu -t gptq-gpu . 27 | - name: Test with unittest within docker container 28 | run: | 29 | docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests gptq-gpu:latest 30 | -------------------------------------------------------------------------------- /tests/exporters/Dockerfile_exporters_gpu: -------------------------------------------------------------------------------- 1 | # use version with cudnn 8.5 to match torch==1.13.1 that uses 8.5.0.96 2 | # has Python 3.8.10 3 | FROM nvcr.io/nvidia/tensorrt:22.08-py3 4 | CMD nvidia-smi 5 | 6 | # Ignore interactive questions during `docker build` 7 | ENV DEBIAN_FRONTEND noninteractive 8 | 9 | # Install and update tools to minimize security vulnerabilities 10 | RUN apt-get update 11 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 12 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \ 13 | apt-get clean 14 | RUN unattended-upgrade 15 | RUN apt-get autoremove -y 16 | 17 | RUN python -m pip install -U pip 18 | 19 | RUN pip install torch scipy datasets evaluate diffusers 20 | 21 | RUN pip install transformers 22 | RUN pip install onnxruntime-gpu 23 | 24 | # Install Optimum 25 | COPY . /workspace/optimum 26 | RUN pip install /workspace/optimum[onnxruntime-gpu,tests,exporters-gpu] 27 | 28 | ENV TEST_LEVEL=1 29 | ENV RUN_SLOW=1 30 | CMD pytest exporters --durations=0 -s -vvvvv -m gpu_test 31 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F680 Feature request" 2 | description: Submit a proposal/request for a new optimum feature 3 | labels: [ "feature" ] 4 | body: 5 | - type: textarea 6 | id: feature-request 7 | validations: 8 | required: true 9 | attributes: 10 | label: Feature request 11 | description: | 12 | A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist. 13 | 14 | - type: textarea 15 | id: motivation 16 | validations: 17 | required: true 18 | attributes: 19 | label: Motivation 20 | description: | 21 | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. 22 | 23 | 24 | - type: textarea 25 | id: contribution 26 | validations: 27 | required: true 28 | attributes: 29 | label: Your contribution 30 | description: | 31 | Is there any way that you could help, e.g. by submitting a PR? 32 | -------------------------------------------------------------------------------- /docs/source/exporters/tflite/usage_guides/contribute.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Adding support for an unsupported architecture 14 | 15 | If you wish to export a model whose architecture is not already supported by the library, the PR [#813 Adds support for ResNet](https://github.com/huggingface/optimum/pull/813 ) can be used as a reference. 16 | 17 | You can make sure tests pass for the new `my_new_modeltype` model type by running: 18 | 19 | ```bash 20 | pytest tests/exporters/tflite/test_*.py -k "my_new_modeltype" -s --exitfirst 21 | ``` 22 | -------------------------------------------------------------------------------- /optimum/exporters/error_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Utilities related to error handling.""" 16 | 17 | 18 | class ShapeError(ValueError): 19 | pass 20 | 21 | 22 | class AtolError(ValueError): 23 | pass 24 | 25 | 26 | class OutputMatchError(ValueError): 27 | pass 28 | 29 | 30 | class NumberOfInputsMatchError(ValueError): 31 | pass 32 | 33 | 34 | class NumberOfOutputsMatchError(ValueError): 35 | pass 36 | 37 | 38 | class MinimumVersionError(ValueError): 39 | pass 40 | -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_tflite_*.py -s --durations=0 40 | -------------------------------------------------------------------------------- /.github/workflows/test_onnxruntime_gpu.yml: -------------------------------------------------------------------------------- 1 | name: ONNX Runtime / Test GPU 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 1 */3 * * # at 1am every 3 days 7 | pull_request: 8 | types: [opened, synchronize, reopened, labeled] 9 | # uncomment to enable on PR merge on main branch: 10 | #push: 11 | # branches: 12 | # - main 13 | 14 | jobs: 15 | do-the-job: 16 | if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }} 17 | name: Start self-hosted EC2 runner 18 | runs-on: [single-gpu, nvidia-gpu, t4, ci] 19 | env: 20 | AWS_REGION: us-east-1 21 | steps: 22 | - name: Checkout 23 | uses: actions/checkout@v2 24 | - name: Build image 25 | run: | 26 | docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu -t onnxruntime-gpu . 27 | - name: Test with unittest within docker container 28 | run: | 29 | docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime-gpu:latest 30 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/trainer.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Trainer 14 | 15 | ## ORTTrainer 16 | 17 | [[autodoc]] onnxruntime.trainer.ORTTrainer 18 | - all 19 | 20 | ## ORTSeq2SeqTrainer 21 | 22 | [[autodoc]] onnxruntime.trainer_seq2seq.ORTSeq2SeqTrainer 23 | - evaluate 24 | - predict 25 | 26 | ## ORTTrainingArguments 27 | 28 | [[autodoc]] onnxruntime.training_args.ORTTrainingArguments 29 | - all 30 | 31 | ## ORTSeq2SeqTrainingArguments 32 | 33 | [[autodoc]] onnxruntime.training_args_seq2seq.ORTSeq2SeqTrainingArguments 34 | - all -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite_cli.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_exporters_tflite_cli.py -m "not quantization" -s --durations=0 40 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_exporters.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_Exporters / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.8 17 | - 3.9 18 | os: 19 | - ubuntu-20.04 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | pip install .[tests,exporters-tf] 30 | pip install -U git+https://github.com/huggingface/evaluate 31 | pip install -U git+https://github.com/huggingface/diffusers 32 | pip install -U git+https://github.com/huggingface/transformers 33 | - name: Test with unittest 34 | working-directory: tests 35 | run: | 36 | pytest exporters -s --durations=0 -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite_cli_quantization_fp16.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI Quantization FP16 / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_exporters_tflite_cli.py -k "float16_quantization" -s --durations=0 40 | -------------------------------------------------------------------------------- /.github/workflows/test_benckmark.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | name: Benchmark suite / Python - Test 4 | 5 | on: 6 | push: 7 | branches: [ main ] 8 | pull_request: 9 | branches: [ main ] 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | build: 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | python-version: [3.8, 3.9] 21 | os: [ubuntu-20.04] 22 | 23 | runs-on: ${{ matrix.os }} 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: Setup Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | - name: Install dependencies 31 | run: | 32 | pip install wheel 33 | pip install .[tests,onnxruntime,benchmark] 34 | - name: Test with unittest 35 | run: | 36 | python -m unittest discover --start-directory tests/benchmark --pattern 'test_*.py' 37 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_onnx.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_ONNX / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.8 17 | - 3.9 18 | os: 19 | - ubuntu-20.04 20 | - macos-13 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | pip install .[tests,onnxruntime] tensorflow tf2onnx 31 | pip install -U git+https://github.com/huggingface/evaluate 32 | pip install -U git+https://github.com/huggingface/diffusers 33 | pip install -U git+https://github.com/huggingface/transformers 34 | - name: Test with unittest 35 | working-directory: tests 36 | run: | 37 | python -m unittest discover -s onnx -p test_*.py 38 | -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite_cli_dynamic_quantization_int8.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI Dynamic Quantization INT8 / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_exporters_tflite_cli.py -k "int8_dynamic_quantization" -s --durations=0 40 | -------------------------------------------------------------------------------- /.github/workflows/test_dummy_inputs.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | name: Dummy inputs / Python - Test 4 | 5 | on: 6 | push: 7 | branches: [ main ] 8 | pull_request: 9 | branches: [ main ] 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | build: 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | python-version: [3.8, 3.9] 21 | os: [ubuntu-20.04, macos-13] 22 | 23 | runs-on: ${{ matrix.os }} 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: Setup Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install .[tests] 34 | - name: Test with unittest 35 | working-directory: tests 36 | run: | 37 | python -m unittest discover -s utils -p 'test_*.py' 38 | -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite_cli_quantization_int8x16.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI Quantization INT8x16 / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_exporters_tflite_cli.py -k "tflite_int8x16_quantization_with_default_dataset" -s --durations=0 40 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_benckmark.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_Benchmark suite / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.8 17 | - 3.9 18 | os: 19 | - ubuntu-20.04 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | pip install wheel 30 | pip install .[tests,onnxruntime,benchmark] 31 | pip install -U git+https://github.com/huggingface/evaluate 32 | pip install -U git+https://github.com/huggingface/diffusers 33 | pip install -U git+https://github.com/huggingface/transformers 34 | - name: Test with unittest 35 | run: | 36 | python -m unittest discover --start-directory tests/benchmark --pattern 37 | test_*.py -------------------------------------------------------------------------------- /.github/workflows/test_cli.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | name: Optimum CLI / Python - Test 4 | 5 | on: 6 | push: 7 | branches: [ main ] 8 | pull_request: 9 | branches: [ main ] 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | build: 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | python-version: [3.8, 3.9] 21 | os: [ubuntu-20.04, macos-13] 22 | 23 | runs-on: ${{ matrix.os }} 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: Setup Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install .[tests,exporters,exporters-tf] 34 | - name: Test with unittest 35 | working-directory: tests 36 | run: | 37 | python -m unittest discover -s cli -p 'test_*.py' 38 | 39 | -------------------------------------------------------------------------------- /tests/onnxruntime/ds_configs/ds_config_zero_stage_inifinity.json: -------------------------------------------------------------------------------- 1 | { 2 | "zero_optimization": { 3 | "stage": 3, 4 | "offload_optimizer": { 5 | "device": "nvme", 6 | "nvme_path": "/dev/nvme1n1", 7 | "pin_memory": true, 8 | "buffer_count": 4, 9 | "fast_init": false 10 | }, 11 | "offload_param": { 12 | "device": "nvme", 13 | "nvme_path": "/dev/nvme1n1", 14 | "pin_memory": true, 15 | "buffer_count": 5, 16 | "buffer_size": 1e8, 17 | "max_in_cpu": 1e9 18 | }, 19 | "aio": { 20 | "block_size": 262144, 21 | "queue_depth": 32, 22 | "thread_count": 1, 23 | "single_submit": false, 24 | "overlap_events": true 25 | }, 26 | "overlap_comm": true, 27 | "contiguous_gradients": true, 28 | "sub_group_size": 1e9, 29 | "reduce_bucket_size": "auto", 30 | "stage3_prefetch_bucket_size": "auto", 31 | "stage3_param_persistence_threshold": "auto", 32 | "stage3_max_live_parameters": 1e9, 33 | "stage3_max_reuse_distance": 1e9, 34 | "stage3_gather_16bit_weights_on_model_save": true 35 | } 36 | } -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite_cli_quantization_full_int8.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI Quantization Full INT8 Default Dataset / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_exporters_tflite_cli.py -k "full_int8_quantization_with_default_dataset" -s --durations=0 40 | -------------------------------------------------------------------------------- /optimum/pipelines/diffusers/watermark.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from imwatermark import WatermarkEncoder 3 | 4 | 5 | WATERMARK_MESSAGE = 0b101100111110110010010000011110111011000110011110 6 | WATERMARK_BITS = [int(bit) for bit in bin(WATERMARK_MESSAGE)[2:]] 7 | 8 | 9 | # Adapted from https://github.com/huggingface/diffusers/blob/v0.18.1/src/diffusers/pipelines/stable_diffusion_xl/watermark.py#L12 10 | class StableDiffusionXLWatermarker: 11 | def __init__(self): 12 | self.watermark = WATERMARK_BITS 13 | self.encoder = WatermarkEncoder() 14 | self.encoder.set_watermark("bits", self.watermark) 15 | 16 | def apply_watermark(self, images: np.array): 17 | # can't encode images that are smaller than 256 18 | if images.shape[-1] < 256: 19 | return images 20 | 21 | # cv2 doesn't support float16 22 | if images.dtype == np.float16: 23 | images = images.astype(np.float32) 24 | 25 | images = (255 * (images / 2 + 0.5)).transpose((0, 2, 3, 1)) 26 | 27 | images = np.array([self.encoder.encode(image, "dwtDct") for image in images]).transpose((0, 3, 1, 2)) 28 | 29 | np.clip(2 * (images / 255 - 0.5), -1.0, 1.0, out=images) 30 | 31 | return images 32 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_dummy_inputs.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_Dummy inputs / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.8 17 | - 3.9 18 | os: 19 | - ubuntu-20.04 20 | - macos-13 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | python -m pip install --upgrade pip 31 | pip install .[tests] 32 | pip install -U git+https://github.com/huggingface/evaluate 33 | pip install -U git+https://github.com/huggingface/diffusers 34 | pip install -U git+https://github.com/huggingface/transformers 35 | - name: Test with unittest 36 | working-directory: tests 37 | run: | 38 | python -m unittest discover -s utils -p test_*.py 39 | -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite_cli_quantization_int8_custom_dataset.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI Quantization INT8 Custom Dataset / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_exporters_tflite_cli.py -k "tflite_int8_quantization_with_custom_dataset" -s --durations=0 40 | -------------------------------------------------------------------------------- /.github/workflows/test_export_tflite_cli_quantization_int8_default_dataset.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI Quantization INT8 Default Dataset / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | paths: 9 | - "optimum/exporters/tasks.py" 10 | - "optimum/exporters/tflite/**.py" 11 | - "tests/exporters/**.py" 12 | - "setup.py" 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: [3.8, 3.9] 24 | os: [ubuntu-20.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Install dependencies 34 | run: | 35 | pip install .[tests,exporters-tf] 36 | - name: Test with unittest 37 | working-directory: tests 38 | run: | 39 | pytest exporters/tflite/test_exporters_tflite_cli.py -k "tflite_int8_quantization_with_default_dataset" -s --durations=0 40 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_fx.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_FX / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.8 17 | - 3.9 18 | os: 19 | - ubuntu-20.04 20 | - macos-13 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | pip install .[tests] 31 | pip install git+https://github.com/huggingface/transformers.git 32 | pip install -U git+https://github.com/huggingface/evaluate 33 | pip install -U git+https://github.com/huggingface/diffusers 34 | pip install -U git+https://github.com/huggingface/transformers 35 | - name: Test with unittest 36 | working-directory: tests 37 | run: | 38 | python -m pytest fx/optimization/test_transformations.py --exitfirst 39 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_slow.yml: -------------------------------------------------------------------------------- 1 | name: Exporters slow / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 7 * * * # every day at 7am 7 | 8 | concurrency: 9 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 10 | cancel-in-progress: true 11 | 12 | jobs: 13 | build: 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: [3.8, 3.9] 18 | os: [ubuntu-20.04] 19 | 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies for pytorch export 28 | run: | 29 | pip install .[tests,exporters] 30 | - name: Test with unittest 31 | working-directory: tests 32 | run: | 33 | RUN_SLOW=1 pytest exporters -s -m "not tensorflow_test and run_slow" --durations=0 34 | - name: Install dependencies for tensorflow export 35 | run: | 36 | pip install .[tests,exporters-tf] 37 | - name: Test with unittest 38 | working-directory: tests 39 | run: | 40 | RUN_SLOW=1 pytest exporters -s -m "tensorflow_test and run_slow" --durations=0 41 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_bettertransformer.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_BetterTransformer / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.8 17 | os: 18 | - ubuntu-20.04 19 | - macos-13 20 | runs-on: ${{ matrix.os }} 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v2 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | pip install .[tests] 30 | pip3 install --upgrade torch torchvision torchaudio 31 | pip install accelerate 32 | pip install -U git+https://github.com/huggingface/evaluate 33 | pip install -U git+https://github.com/huggingface/diffusers 34 | pip install -U git+https://github.com/huggingface/transformers 35 | - name: Test with unittest 36 | working-directory: tests 37 | run: | 38 | python -m unittest discover -s bettertransformer -p test_*.py 39 | -------------------------------------------------------------------------------- /optimum/exporters/onnx/constants.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # 2 GB 17 | EXTERNAL_DATA_FORMAT_SIZE_LIMIT = 2 * 1024 * 1024 * 1024 18 | 19 | ONNX_ENCODER_NAME = "encoder_model" 20 | ONNX_DECODER_NAME = "decoder_model" 21 | ONNX_DECODER_WITH_PAST_NAME = "decoder_with_past_model" 22 | ONNX_DECODER_MERGED_NAME = "decoder_model_merged" 23 | 24 | UNPICKABLE_ARCHS = [ 25 | "encodec", 26 | "hubert", 27 | "sew", 28 | "sew-d", 29 | "speecht5", 30 | "unispeech", 31 | "unispeech-sat", 32 | "wav2vec2", 33 | "wav2vec2-conformer", 34 | "wavlm", 35 | ] 36 | 37 | SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [ 38 | "bart", 39 | "musicgen", 40 | "whisper", 41 | ] 42 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_onnxruntime.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_ONNX Runtime / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.8 17 | - 3.9 18 | os: 19 | - ubuntu-20.04 20 | - windows-2019 21 | - macos-13 22 | runs-on: ${{ matrix.os }} 23 | steps: 24 | - uses: actions/checkout@v2 25 | - name: Setup Python ${{ matrix.python-version }} 26 | uses: actions/setup-python@v2 27 | with: 28 | python-version: ${{ matrix.python-version }} 29 | - name: Install dependencies 30 | run: | 31 | pip install .[tests,onnxruntime] 32 | pip install -U git+https://github.com/huggingface/evaluate 33 | pip install -U git+https://github.com/huggingface/diffusers 34 | pip install -U git+https://github.com/huggingface/transformers 35 | - name: Test with pytest 36 | working-directory: tests 37 | run: | 38 | python -m pytest -n auto -m "not run_in_series" onnxruntime 39 | python -m pytest -m "run_in_series" onnxruntime 40 | -------------------------------------------------------------------------------- /tests/cli/cli_with_custom_command.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | 18 | from optimum.commands import BaseOptimumCLICommand, CommandInfo, ExportCommand 19 | 20 | 21 | class MyCustomCommand(BaseOptimumCLICommand): 22 | COMMAND = CommandInfo(name="blablabla", help="Says something.") 23 | 24 | def run(self): 25 | print("If the CI can read this, it means it worked!") 26 | 27 | 28 | parent_command_cls = os.environ.get("TEST_REGISTER_COMMAND_WITH_SUBCOMMAND", None) 29 | 30 | if parent_command_cls == "true": 31 | REGISTER_COMMANDS = [ 32 | (MyCustomCommand, ExportCommand), 33 | ] 34 | else: 35 | REGISTER_COMMANDS = [ 36 | MyCustomCommand, 37 | ] 38 | -------------------------------------------------------------------------------- /.github/workflows/test_export_onnx.yml: -------------------------------------------------------------------------------- 1 | name: Exporters ONNX / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9] 19 | os: [ubuntu-20.04] 20 | 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies for pytorch export 29 | run: | 30 | pip install .[tests,exporters] 31 | - name: Test with unittest 32 | working-directory: tests 33 | run: | 34 | pytest exporters/onnx/test_onnx_*.py -s -n auto -m "not tensorflow_test and not timm_test" --durations=0 35 | - name: Install dependencies for tensorflow export 36 | run: | 37 | pip install .[tests,exporters-tf] 38 | - name: Test with unittest 39 | working-directory: tests 40 | run: | 41 | pytest exporters/onnx/test_onnx_*.py -n auto -m "tensorflow_test" -s --durations=0 42 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/concept_guides/onnx.mdx: -------------------------------------------------------------------------------- 1 | # ONNX 🤝 ONNX Runtime 2 | 3 | ONNX is an open standard that defines a common set of operators and a common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and TensorFlow. When a model is exported to the ONNX format, these operators are used to construct a computational graph (often called an _intermediate representation_) that represents the flow of data through the neural network. 4 | 5 | 6 | 7 | You can use [Netron](https://netron.app/) to visualize any ONNX file on the Hugging Face Hub. Simply append append the file's URL to `http://netron.app?url=` as in [this example](https://netron.app/?url=https://huggingface.co/cmarkea/distilcamembert-base-ner/blob/main/model.onnx) 8 | 9 | 10 | 11 | By exposing a graph with standardized operators and data types, ONNX makes it easy to switch between frameworks. For example, a model trained in PyTorch can be exported to ONNX format and then imported in TensorFlow (and vice versa). 12 | 13 | Where ONNX really shines is when it is coupled with a dedicated accelerator like ONNX Runtime, or ORT for short. ORT provides tools to optimize the ONNX graph through techniques like operator fusion and constant folding, and defines an interface to execution providers that allow you to run the model on different types of hardware. -------------------------------------------------------------------------------- /docs/source/torch_fx/package_reference/optimization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Optimization 14 | 15 | ## Transformation 16 | 17 | [[autodoc]] fx.optimization.Transformation 18 | - all 19 | - __call__ 20 | 21 | ## Reversible transformation 22 | 23 | [[autodoc]] fx.optimization.ReversibleTransformation 24 | - all 25 | - __call__ 26 | 27 | [[autodoc]] fx.optimization.compose 28 | 29 | ### Transformations 30 | 31 | [[autodoc]] fx.optimization.MergeLinears 32 | - all 33 | 34 | [[autodoc]] fx.optimization.FuseBiasInLinear 35 | - all 36 | 37 | [[autodoc]] fx.optimization.ChangeTrueDivToMulByInverse 38 | - all 39 | 40 | [[autodoc]] fx.optimization.FuseBatchNorm2dInConv2d 41 | - all 42 | 43 | [[autodoc]] fx.optimization.FuseBatchNorm1dInLinear 44 | - all -------------------------------------------------------------------------------- /optimum/exporters/tflite/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import TYPE_CHECKING 17 | 18 | from transformers.utils import _LazyModule 19 | 20 | 21 | _import_structure = { 22 | "base": ["QuantizationApproach", "TFLiteQuantizationConfig", "TFLiteConfig"], 23 | "convert": ["export", "validate_model_outputs"], 24 | } 25 | 26 | if TYPE_CHECKING: 27 | from .base import QuantizationApproach, TFLiteQuantizationConfig, TFLiteConfig # noqa 28 | from .convert import export, validate_model_outputs # noqa 29 | else: 30 | import sys 31 | 32 | sys.modules[__name__] = _LazyModule( 33 | __name__, 34 | globals()["__file__"], 35 | _import_structure, 36 | module_spec=__spec__, 37 | ) 38 | -------------------------------------------------------------------------------- /docs/source/_redirects.yml: -------------------------------------------------------------------------------- 1 | # Optimum Graphcore 2 | graphcore_index: graphcore/index 3 | graphcore_quickstart: graphcore/quickstart 4 | graphcore_ipu_config: graphcore/ipu_config 5 | graphcore_trainer: graphcore/trainer 6 | graphcore_add_support_for_new_model: graphcore/add_support_for_new_model 7 | 8 | # Optimum Habana 9 | habana_index: habana/index 10 | habana_quickstart: habana/quickstart 11 | habana_single_hpu: habana/tutorials/single_hpu 12 | habana_distributed: habana/tutorials/distributed 13 | habana_deepspeed: habana/usage_guides/deepspeed 14 | habana_accelerate_training: habana/usage_guides/accelerate_training 15 | habana_trainer: habana/package_reference/trainer 16 | habana_gaudi_config: habana/package_reference/gaudi_config 17 | habana/usage_guides/stable_diffusion: habana/tutorials/stable_diffusion 18 | habana/tutorials/pretraining: habana/usage_guides/pretraining 19 | 20 | # Optimum Intel 21 | intel_index: intel/index 22 | intel_quickstart: intel/optimization_inc 23 | intel_configuration: intel/reference_inc 24 | intel_optimization: intel/optimization_inc 25 | intel_quantization: intel/optimization_inc 26 | intel_pruning: intel/optimization_inc 27 | intel_trainer: intel/reference_inc 28 | 29 | # Optimum Neuron 30 | docs/optimum-neuron/index: /docs/optimum-neuron/index 31 | 32 | # Optimum TPU 33 | docs/optimum-tpu/index: /docs/optimum-tpu/index 34 | tpu/index: /docs/optimum-tpu/index 35 | -------------------------------------------------------------------------------- /.github/workflows/check_code_quality.yml: -------------------------------------------------------------------------------- 1 | name: check_code_quality 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - "optimum/**.py" 8 | - "tests/**.py" 9 | - "examples/**.py" 10 | 11 | pull_request: 12 | branches: [ main ] 13 | paths: 14 | - "optimum/**.py" 15 | - "tests/**.py" 16 | - "examples/**.py" 17 | 18 | concurrency: 19 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 20 | cancel-in-progress: true 21 | 22 | jobs: 23 | build: 24 | strategy: 25 | fail-fast: false 26 | matrix: 27 | python-version: [3.8] 28 | os: [ubuntu-20.04] 29 | 30 | runs-on: ${{ matrix.os }} 31 | steps: 32 | - uses: actions/checkout@v2 33 | - name: Setup Python ${{ matrix.python-version }} 34 | uses: actions/setup-python@v2 35 | with: 36 | python-version: ${{ matrix.python-version }} 37 | - name: Create and start a virtual environment 38 | run: | 39 | python -m venv venv 40 | source venv/bin/activate 41 | - name: Install dependencies 42 | run: | 43 | source venv/bin/activate 44 | pip install --upgrade pip 45 | pip install .[quality] 46 | - name: Check style with black 47 | run: | 48 | source venv/bin/activate 49 | black --check . 50 | - name: Check style with ruff 51 | run: | 52 | source venv/bin/activate 53 | ruff . 54 | -------------------------------------------------------------------------------- /tests/onnxruntime/ds_configs/ds_config_zero_stage_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "hysteresis": 2, 7 | "min_loss_scale": 1 8 | }, 9 | 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | 14 | "zero_optimization": { 15 | "stage": 1, 16 | "allgather_partitions": true, 17 | "allgather_bucket_size": 2e8, 18 | "overlap_comm": true, 19 | "reduce_scatter": true, 20 | "reduce_bucket_size": 2e8, 21 | "contiguous_gradients": false, 22 | "cpu_offload": false 23 | }, 24 | 25 | "zero_allow_untested_optimizer": true, 26 | 27 | "optimizer": { 28 | "type": "AdamW", 29 | "params": { 30 | "lr": "auto", 31 | "betas": "auto", 32 | "eps": "auto", 33 | "weight_decay": "auto" 34 | } 35 | }, 36 | 37 | "scheduler": { 38 | "type": "WarmupLR", 39 | "params": { 40 | "warmup_min_lr": "auto", 41 | "warmup_max_lr": "auto", 42 | "warmup_num_steps": "auto" 43 | } 44 | }, 45 | 46 | "steps_per_print": 2000, 47 | "train_batch_size": "auto", 48 | "train_micro_batch_size_per_gpu": "auto", 49 | "wall_clock_breakdown": false 50 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [tool.black] 16 | line-length = 119 17 | target-version = ['py37'] 18 | 19 | [tool.ruff] 20 | # Never enforce `E501` (line length violations). 21 | ignore = ["C901", "E501", "E741", "W605"] 22 | select = ["C", "E", "F", "I", "W"] 23 | line-length = 119 24 | 25 | # Ignore import violations in all `__init__.py` files. 26 | [tool.ruff.per-file-ignores] 27 | "__init__.py" = ["E402", "F401", "F403", "F811"] 28 | 29 | [tool.ruff.isort] 30 | lines-after-imports = 2 31 | known-first-party = ["optimum"] 32 | 33 | [tool.pytest.ini_options] 34 | markers = [ 35 | "gpu_test", 36 | "cuda_ep_test", 37 | "trt_ep_test", 38 | "rocm_ep_test", 39 | "tensorflow_test", 40 | "timm_test", 41 | "run_in_series", 42 | "run_slow", 43 | "accelerate_test", 44 | "fp16", 45 | "quantization", 46 | ] 47 | -------------------------------------------------------------------------------- /.github/workflows/test_bettertransformer.yml: -------------------------------------------------------------------------------- 1 | name: BetterTransformer / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.8, 3.9] 19 | os: [ubuntu-20.04, macos-13] 20 | 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies 29 | run: | 30 | pip install .[tests] 31 | pip install --no-cache-dir --upgrade torch torchvision torchaudio 32 | pip install accelerate 33 | - name: Test on pytorch stable 34 | working-directory: tests 35 | run: | 36 | pytest bettertransformer/test_*.py -s -vvvvv 37 | - name: Install dependencies 2 38 | run: | 39 | pip uninstall -y torch torchvision torchaudio 40 | pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu 41 | - name: Test on pytorch nightly 42 | working-directory: tests 43 | run: | 44 | pytest bettertransformer/test_*.py -s -vvvvv 45 | 46 | -------------------------------------------------------------------------------- /.github/workflows/test_optimum_common.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | name: Optimum common / Python - Test 4 | 5 | on: 6 | push: 7 | branches: [ main ] 8 | pull_request: 9 | branches: [ main ] 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | jobs: 16 | build: 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | python-version: [3.8, 3.9] 21 | os: [ubuntu-20.04, windows-2019, macos-13] 22 | 23 | runs-on: ${{ matrix.os }} 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: Setup Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install .[tests] 34 | ls -l optimum/ 35 | - name: Test with unittest 36 | shell: bash 37 | run: | 38 | # Setting HUGGINGFACE_CO_STAGING to true for only one job of the matrix as the staging tests cannot run in parallel. 39 | export HUGGINGFACE_CO_STAGING=${{ matrix.python-version == '3.8' && matrix.os == 'ubuntu-20.04' }} 40 | pytest tests/test_*.py 41 | 42 | -------------------------------------------------------------------------------- /optimum/commands/export/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """optimum.exporters command-line interface base classes.""" 16 | 17 | from .. import BaseOptimumCLICommand, CommandInfo 18 | from .onnx import ONNXExportCommand 19 | from .tflite import TFLiteExportCommand 20 | 21 | 22 | class ExportCommand(BaseOptimumCLICommand): 23 | COMMAND = CommandInfo( 24 | name="export", 25 | help="Export PyTorch and TensorFlow models to several format.", 26 | ) 27 | SUBCOMMANDS = ( 28 | CommandInfo( 29 | name="onnx", 30 | help="Export PyTorch and TensorFlow to ONNX.", 31 | subcommand_class=ONNXExportCommand, 32 | ), 33 | CommandInfo( 34 | name="tflite", 35 | help="Export TensorFlow to TensorFlow Lite.", 36 | subcommand_class=TFLiteExportCommand, 37 | ), 38 | ) 39 | -------------------------------------------------------------------------------- /docs/source/exporters/tflite/overview.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Overview 14 | 15 | 🤗 Optimum handles the export of TensorFlow models to TFLite in the `exporters.tflite` module. In addition, models hosted on the Hugging Face Hub with PyTorch weights but having a TensorFlow implementation will also be supported in the export thanks to Transformers' [TFPreTrainedModel.from_pretrained()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.from_pretrained) auto-conversion to TensorFlow. 16 | 17 | The TFLite export support provides classes, functions and a command line interface to export a model easily. 18 | 19 | Supported architectures: 20 | 21 | - Albert 22 | - BERT 23 | - Camembert 24 | - ConvBert 25 | - Deberta 26 | - Deberta V2 27 | - DistilBert 28 | - Electra 29 | - Flaubert 30 | - MobileBert 31 | - MPNet 32 | - ResNet 33 | - Roberta 34 | - RoFormer 35 | - XLM 36 | - XLMRoberta 37 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/multiple-choice/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Multiple choice 18 | 19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/multiple-choice/run_swag.py) allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for multiple choice tasks. 20 | 21 | The following example applies graph optimizations on a BERT fine-tuned on the SWAG dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 22 | 23 | ```bash 24 | python run_swag.py \ 25 | --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \ 26 | --optimization_level 1 \ 27 | --do_eval \ 28 | --output_dir /tmp/optimized_bert_swag 29 | ``` 30 | -------------------------------------------------------------------------------- /optimum/onnxruntime/training_args_seq2seq.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass, field 16 | from typing import Optional 17 | 18 | from transformers import Seq2SeqTrainingArguments 19 | 20 | from .training_args import ORTTrainingArguments 21 | 22 | 23 | @dataclass 24 | class ORTSeq2SeqTrainingArguments(Seq2SeqTrainingArguments, ORTTrainingArguments): 25 | """ 26 | Parameters: 27 | optim (`str` or [`training_args.ORTOptimizerNames`] or [`transformers.training_args.OptimizerNames`], *optional*, defaults to `"adamw_hf"`): 28 | The optimizer to use, including optimizers in Transformers: adamw_hf, adamw_torch, adamw_apex_fused, or adafactor. And optimizers implemented by ONNX Runtime: adamw_ort_fused. 29 | """ 30 | 31 | optim: Optional[str] = field( 32 | default="adamw_hf", 33 | metadata={"help": "The optimizer to use."}, 34 | ) 35 | -------------------------------------------------------------------------------- /optimum/commands/onnxruntime/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """optimum.onnxruntime command-line interface base classes.""" 16 | 17 | from .. import BaseOptimumCLICommand, CommandInfo 18 | from .optimize import ONNXRuntimeOptimizeCommand 19 | from .quantize import ONNXRuntimeQuantizeCommand 20 | 21 | 22 | class ONNXRuntimeCommand(BaseOptimumCLICommand): 23 | COMMAND = CommandInfo( 24 | name="onnxruntime", 25 | help="ONNX Runtime optimize and quantize utilities.", 26 | ) 27 | SUBCOMMANDS = ( 28 | CommandInfo( 29 | name="optimize", 30 | help="Optimize ONNX models.", 31 | subcommand_class=ONNXRuntimeOptimizeCommand, 32 | ), 33 | CommandInfo( 34 | name="quantize", 35 | help="Dynammic quantization for ONNX models.", 36 | subcommand_class=ONNXRuntimeQuantizeCommand, 37 | ), 38 | ) 39 | -------------------------------------------------------------------------------- /tests/onnxruntime/ds_configs/ds_config_zero_stage_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "bf16": { 3 | "enabled": "auto" 4 | }, 5 | 6 | "fp16": { 7 | "enabled": "auto", 8 | "loss_scale": 0, 9 | "loss_scale_window": 1000, 10 | "initial_scale_power": 16, 11 | "hysteresis": 2, 12 | "min_loss_scale": 1 13 | }, 14 | 15 | "optimizer": { 16 | "type": "AdamW", 17 | "params": { 18 | "lr": "auto", 19 | "betas": "auto", 20 | "eps": "auto", 21 | "weight_decay": "auto" 22 | } 23 | }, 24 | 25 | "scheduler": { 26 | "type": "WarmupLR", 27 | "params": { 28 | "warmup_min_lr": "auto", 29 | "warmup_max_lr": "auto", 30 | "warmup_num_steps": "auto" 31 | } 32 | }, 33 | 34 | "zero_optimization": { 35 | "stage": 2, 36 | "offload_optimizer": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "allgather_partitions": true, 41 | "allgather_bucket_size": 2e8, 42 | "overlap_comm": true, 43 | "reduce_scatter": true, 44 | "reduce_bucket_size": 2e8, 45 | "contiguous_gradients": true 46 | }, 47 | 48 | "gradient_accumulation_steps": "auto", 49 | "gradient_clipping": "auto", 50 | "steps_per_print": 2000, 51 | "train_batch_size": "auto", 52 | "train_micro_batch_size_per_gpu": "auto", 53 | "wall_clock_breakdown": false 54 | } 55 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/multiple-choice/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Multiple choice 18 | 19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/multiple-choice/run_swag.py) allows us to apply different quantization approaches (such as dynamic and static quantization) using the [ONNX Runtime](https://github.com/microsoft/onnxruntime) quantization tool for multiple choice tasks. 20 | 21 | The following example applies post-training dynamic quantization on a BERT fine-tuned on the SWAG dataset. 22 | 23 | ```bash 24 | python run_swag.py \ 25 | --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \ 26 | --quantization_approach dynamic \ 27 | --do_eval \ 28 | --output_dir /tmp/quantized_bert_swag 29 | ``` 30 | 31 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 32 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/fully_connected.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Set, Tuple 15 | 16 | from onnx import ModelProto 17 | 18 | from onnxruntime.transformers.onnx_model import OnnxModel 19 | 20 | from .. import PreprocessorPass 21 | 22 | 23 | class IncludeFullyConnectedNodes(PreprocessorPass): 24 | def __init__(self): 25 | super().__init__() 26 | 27 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 28 | fc_subgraphs = [] 29 | for add_node in model.get_nodes_by_op_type("Add"): 30 | fc_components = model.match_parent_path(add_node, ["MatMul"], [1]) 31 | if fc_components is not None: 32 | fc_components.append(add_node) 33 | fc_subgraphs.append(fc_components) 34 | fc_components = {node.name for fc in fc_subgraphs for node in fc} 35 | return fc_components, set() 36 | -------------------------------------------------------------------------------- /optimum/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from transformers.utils import _LazyModule 17 | 18 | 19 | _import_structure = { 20 | "graph_transformations": [ 21 | "cast_slice_nodes_inputs_to_int32", 22 | "merge_decoders", 23 | "remove_duplicate_weights", 24 | "replace_atenops_to_gather", 25 | "remove_duplicate_weights_from_tied_info", 26 | ], 27 | } 28 | 29 | if TYPE_CHECKING: 30 | from .graph_transformations import ( 31 | cast_slice_nodes_inputs_to_int32, 32 | merge_decoders, 33 | remove_duplicate_weights, 34 | remove_duplicate_weights_from_tied_info, 35 | replace_atenops_to_gather, 36 | ) 37 | else: 38 | import sys 39 | 40 | sys.modules[__name__] = _LazyModule( 41 | __name__, 42 | globals()["__file__"], 43 | _import_structure, 44 | module_spec=__spec__, 45 | ) 46 | -------------------------------------------------------------------------------- /optimum/exporters/tflite/config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | Common TensorFlow Lite configuration classes that handle most of the features for building model specific 17 | configurations. 18 | """ 19 | 20 | from ...utils import DummyTextInputGenerator, DummyVisionInputGenerator, logging 21 | from .base import TFLiteConfig 22 | 23 | 24 | logger = logging.get_logger(__name__) 25 | 26 | 27 | class TextEncoderTFliteConfig(TFLiteConfig): 28 | """ 29 | Handles encoder-based text architectures. 30 | """ 31 | 32 | DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,) 33 | MANDATORY_AXES = ("batch_size", "sequence_length", ("multiple-choice", "num_choices")) 34 | 35 | 36 | class VisionTFLiteConfig(TFLiteConfig): 37 | """ 38 | Handles vision architectures. 39 | """ 40 | 41 | DUMMY_INPUT_GENERATOR_CLASSES = (DummyVisionInputGenerator,) 42 | MANDATORY_AXES = ("batch_size", "num_channels", "width", "height") 43 | -------------------------------------------------------------------------------- /.github/workflows/test_offline.yml: -------------------------------------------------------------------------------- 1 | name: Offline usage / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | pull_request: 7 | branches: [ main ] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.9] 19 | os: [ubuntu-20.04] 20 | 21 | runs-on: ${{ matrix.os }} 22 | steps: 23 | - uses: actions/checkout@v2 24 | - name: Setup Python ${{ matrix.python-version }} 25 | uses: actions/setup-python@v2 26 | with: 27 | python-version: ${{ matrix.python-version }} 28 | - name: Install dependencies for pytorch export 29 | run: | 30 | pip install .[tests,exporters,onnxruntime] 31 | - name: Test with unittest 32 | run: | 33 | HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2 34 | 35 | HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation 36 | 37 | huggingface-cli download hf-internal-testing/tiny-random-gpt2 38 | 39 | HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation 40 | 41 | pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv 42 | 43 | HF_HUB_OFFLINE=1 pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/gelu.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Set, Tuple 15 | 16 | from onnx import ModelProto 17 | 18 | from onnxruntime.transformers.onnx_model import OnnxModel 19 | 20 | from .. import PreprocessorPass 21 | 22 | 23 | class ExcludeGeLUNodes(PreprocessorPass): 24 | def __init__(self): 25 | super().__init__() 26 | 27 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 28 | gelu_subgraphs = [] 29 | for mul_node in model.get_nodes_by_op_type("Mul"): 30 | gelu_components = model.match_parent_path(mul_node, ["Mul", "Add", "Erf", "Div"], [0, 1, 0, 0]) 31 | 32 | if gelu_components is not None: 33 | gelu_components.append(mul_node) 34 | gelu_subgraphs.append(gelu_components) 35 | 36 | gl_components = (node.name for gl in gelu_subgraphs for node in gl) 37 | return set(), set(gl_components) 38 | -------------------------------------------------------------------------------- /.github/workflows/dev_test_optimum_common.yml: -------------------------------------------------------------------------------- 1 | # This yml file is autogenerated. Do not edit. 2 | 3 | name: dev_Optimum common / Python - Test 4 | on: 5 | schedule: 6 | - cron: 0 7 * * * 7 | concurrency: 8 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 9 | cancel-in-progress: true 10 | jobs: 11 | build: 12 | strategy: 13 | fail-fast: false 14 | matrix: 15 | python-version: 16 | - 3.7 17 | - 3.8 18 | - 3.9 19 | os: 20 | - ubuntu-20.04 21 | - windows-2019 22 | - macos-13 23 | runs-on: ${{ matrix.os }} 24 | steps: 25 | - uses: actions/checkout@v2 26 | - name: Setup Python ${{ matrix.python-version }} 27 | uses: actions/setup-python@v2 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install .[tests] 34 | ls -l optimum/ 35 | pip install -U git+https://github.com/huggingface/evaluate 36 | pip install -U git+https://github.com/huggingface/diffusers 37 | pip install -U git+https://github.com/huggingface/transformers 38 | - name: Test with unittest 39 | shell: bash 40 | run: | 41 | # Setting HUGGINGFACE_CO_STAGING to true for only one job of the matrix 42 | as the staging tests cannot run in parallel. 43 | export HUGGINGFACE_CO_STAGING=${{ matrix.python-version == 3.8 && matrix.os 44 | == ubuntu-20.04 }} 45 | python -m unittest discover -s tests -p test_*.py 46 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # What does this PR do? 2 | 3 | 12 | 13 | 14 | 15 | Fixes # (issue) 16 | 17 | 18 | ## Before submitting 19 | - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). 20 | - [ ] Did you make sure to update the documentation with your changes? 21 | - [ ] Did you write any new necessary tests? 22 | 23 | ## Who can review? 24 | 25 | 33 | -------------------------------------------------------------------------------- /docs/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import doctest 19 | import sys 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(__file__), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | # Doctest custom flag to ignore output. 29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT") 30 | 31 | OutputChecker = doctest.OutputChecker 32 | 33 | 34 | class CustomOutputChecker(OutputChecker): 35 | def check_output(self, want, got, optionflags): 36 | if IGNORE_RESULT & optionflags: 37 | return True 38 | return OutputChecker.check_output(self, want, got, optionflags) 39 | 40 | 41 | doctest.OutputChecker = CustomOutputChecker 42 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/token-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Token classification 18 | 19 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/token-classification/run_ner.py) 20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks. 21 | 22 | The following example applies graph optimizations on a DistilBERT fine-tuned on the CoNLL-2003 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 23 | 24 | ```bash 25 | python run_ner.py \ 26 | --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \ 27 | --dataset_name conll2003 \ 28 | --optimization_level 1 \ 29 | --do_eval \ 30 | --output_dir /tmp/optimized_distilbert_conll2003 31 | ``` 32 | 33 | -------------------------------------------------------------------------------- /optimum/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import doctest 19 | import sys 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(__file__), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | # Doctest custom flag to ignore output. 29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT") 30 | 31 | OutputChecker = doctest.OutputChecker 32 | 33 | 34 | class CustomOutputChecker(OutputChecker): 35 | def check_output(self, want, got, optionflags): 36 | if IGNORE_RESULT & optionflags: 37 | return True 38 | return OutputChecker.check_output(self, want, got, optionflags) 39 | 40 | 41 | doctest.OutputChecker = CustomOutputChecker 42 | -------------------------------------------------------------------------------- /optimum/fx/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from functools import wraps 16 | 17 | import transformers 18 | from packaging import version 19 | 20 | 21 | _TRANSFORMERS_MIN_VERSION = version.parse("4.20.0.dev0") 22 | 23 | transformers_version = version.parse(transformers.__version__) 24 | _fx_features_available = (_TRANSFORMERS_MIN_VERSION.major, _TRANSFORMERS_MIN_VERSION.minor) <= ( 25 | transformers_version.major, 26 | transformers_version.minor, 27 | ) 28 | 29 | 30 | def are_fx_features_available(): 31 | return _fx_features_available 32 | 33 | 34 | def check_if_available(func): 35 | @wraps(func) 36 | def wrapper(*args, **kwargs): 37 | if not are_fx_features_available(): 38 | raise ImportError( 39 | f"Found an incompatible version of transformers. Found version {transformers_version}, but only {_TRANSFORMERS_MIN_VERSION} and above are supported." 40 | ) 41 | return func(*args, **kwargs) 42 | 43 | return wrapper 44 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/image-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Image classification 18 | 19 | The script [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/image_classification/run_image_classification.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for image classification tasks. 20 | 21 | The following example applies dynamic quantization on a ViT model fine-tuned on the beans classification dataset. 22 | 23 | ```bash 24 | python run_image_classification.py \ 25 | --model_name_or_path nateraw/vit-base-beans \ 26 | --dataset_name beans \ 27 | --quantization_approach dynamic \ 28 | --do_eval \ 29 | --output_dir /tmp/image_classification_vit_beans 30 | ``` 31 | 32 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 33 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/token-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Token classification 18 | 19 | 20 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/token-classification/run_ner.py) 21 | allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph 22 | optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks. 23 | 24 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the CoNLL-2003 task 25 | 26 | ```bash 27 | python run_ner.py \ 28 | --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \ 29 | --dataset_name conll2003 \ 30 | --quantization_approach dynamic \ 31 | --do_eval \ 32 | --output_dir /tmp/quantized_distilbert_conll2003 33 | ``` 34 | 35 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 36 | -------------------------------------------------------------------------------- /docs/source/utils/dummy_input_generators.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Dummy Input Generators 14 | 15 | It is very common to have to generate dummy inputs to perform a task (tracing, exporting a model to some backend, 16 | testing model outputs, etc). The goal of [`~optimum.utils.input_generators.DummyInputGenerator`] classes is to make this 17 | generation easy and re-usable. 18 | 19 | 20 | ## Base class 21 | 22 | [[autodoc]] optimum.utils.input_generators.DummyInputGenerator 23 | 24 | 25 | ## Existing dummy input generators 26 | 27 | [[autodoc]] optimum.utils.input_generators.DummyTextInputGenerator 28 | 29 | [[autodoc]] optimum.utils.input_generators.DummyDecoderTextInputGenerator 30 | 31 | [[autodoc]] optimum.utils.input_generators.DummyPastKeyValuesGenerator 32 | 33 | [[autodoc]] optimum.utils.input_generators.DummySeq2SeqPastKeyValuesGenerator 34 | 35 | [[autodoc]] optimum.utils.input_generators.DummyBboxInputGenerator 36 | 37 | [[autodoc]] optimum.utils.input_generators.DummyVisionInputGenerator 38 | 39 | [[autodoc]] optimum.utils.input_generators.DummyAudioInputGenerator 40 | -------------------------------------------------------------------------------- /optimum/utils/modeling_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import functools 16 | 17 | 18 | MODEL_TO_PATCH_FOR_PAST = { 19 | "bart", 20 | "blenderbot", 21 | "blenderbot-small", 22 | "bloom", 23 | "llama", 24 | "mistral", 25 | "mpt", 26 | "opt", 27 | "pegasus", 28 | } 29 | 30 | 31 | def recurse_getattr(obj, attr: str): 32 | """ 33 | Recursive `getattr`. 34 | 35 | Args: 36 | obj: 37 | A class instance holding the attribute. 38 | attr (`str`): 39 | The attribute that is to be retrieved, e.g. 'attribute1.attribute2'. 40 | """ 41 | 42 | def _getattr(obj, attr): 43 | return getattr(obj, attr) 44 | 45 | return functools.reduce(_getattr, [obj] + attr.split(".")) 46 | 47 | 48 | def recurse_setattr(module, name, value): 49 | """A function to recursively set attributes to a module.""" 50 | if "." not in name: 51 | setattr(module, name, value) 52 | else: 53 | name, rest = name.split(".", 1) 54 | recurse_setattr(getattr(module, name), rest, value) 55 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/text-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Text classification 18 | 19 | ## GLUE tasks 20 | 21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py) 22 | allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as 23 | the ones from the [GLUE benchmark](https://gluebenchmark.com/). 24 | 25 | The following example applies graph optimization on a DistilBERT fine-tuned on the sst-2 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 26 | 27 | ```bash 28 | python run_glue.py \ 29 | --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \ 30 | --task_name sst2 \ 31 | --optimization_level 1 \ 32 | --do_eval \ 33 | --output_dir /tmp/optimized_distilbert_sst2 34 | ``` 35 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/docker/Dockerfile-ort-nightly-rocm57: -------------------------------------------------------------------------------- 1 | # Use rocm image 2 | FROM rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1 3 | CMD rocm-smi 4 | 5 | # Ignore interactive questions during `docker build` 6 | ENV DEBIAN_FRONTEND noninteractive 7 | 8 | # Versions 9 | # available options 3.10 10 | ARG PYTHON_VERSION=3.10 11 | 12 | # Bash shell 13 | RUN chsh -s /bin/bash 14 | SHELL ["/bin/bash", "-c"] 15 | 16 | # Install and update tools to minimize security vulnerabilities 17 | RUN apt-get update 18 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 19 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \ 20 | apt-get clean 21 | RUN apt-get autoremove -y 22 | 23 | ARG PYTHON_EXE=/opt/conda/envs/py_$PYTHON_VERSION/bin/python 24 | 25 | # (Optional) Intall test dependencies 26 | RUN $PYTHON_EXE -m pip install -U pip 27 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers 28 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece --no-cache-dir 29 | RUN $PYTHON_EXE -m pip install deepspeed --no-cache-dir 30 | RUN conda install -y mpi4py 31 | 32 | # PyTorch 33 | RUN $PYTHON_EXE -m pip install onnx ninja 34 | 35 | # ORT Module 36 | RUN $PYTHON_EXE -m pip install --pre onnxruntime-training -f https://download.onnxruntime.ai/onnxruntime_nightly_rocm57.html 37 | RUN $PYTHON_EXE -m pip install torch-ort 38 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2 39 | RUN $PYTHON_EXE -m torch_ort.configure 40 | 41 | WORKDIR . 42 | 43 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /tests/onnxruntime/ds_configs/ds_config_zero_stage_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | 11 | "optimizer": { 12 | "type": "AdamW", 13 | "params": { 14 | "lr": "auto", 15 | "betas": "auto", 16 | "eps": "auto", 17 | "weight_decay": "auto" 18 | } 19 | }, 20 | 21 | "scheduler": { 22 | "type": "WarmupLR", 23 | "params": { 24 | "warmup_min_lr": "auto", 25 | "warmup_max_lr": "auto", 26 | "warmup_num_steps": "auto" 27 | } 28 | }, 29 | 30 | "zero_optimization": { 31 | "stage": 3, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "offload_param": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "overlap_comm": true, 41 | "contiguous_gradients": true, 42 | "sub_group_size": 1e9, 43 | "reduce_bucket_size": "auto", 44 | "stage3_prefetch_bucket_size": "auto", 45 | "stage3_param_persistence_threshold": "auto", 46 | "stage3_max_live_parameters": 1e9, 47 | "stage3_max_reuse_distance": 1e9, 48 | "stage3_gather_16bit_weights_on_model_save": true 49 | }, 50 | 51 | "gradient_accumulation_steps": "auto", 52 | "gradient_clipping": "auto", 53 | "steps_per_print": 2000, 54 | "train_batch_size": "auto", 55 | "train_micro_batch_size_per_gpu": "auto", 56 | "wall_clock_breakdown": false 57 | } -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/text-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Text classification 18 | 19 | ## GLUE tasks 20 | 21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/text-classification/run_glue.py) 22 | allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph 23 | optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as 24 | the ones from the [GLUE benchmark](https://gluebenchmark.com/). 25 | 26 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the sst-2 task. 27 | 28 | ```bash 29 | python run_glue.py \ 30 | --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \ 31 | --task_name sst2 \ 32 | --quantization_approach dynamic \ 33 | --do_eval \ 34 | --output_dir /tmp/quantized_distilbert_sst2 35 | ``` 36 | 37 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 38 | -------------------------------------------------------------------------------- /docs/source/exporters/onnx/package_reference/export.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Export functions 14 | 15 | You can export models to ONNX from two frameworks in 🤗 Optimum: PyTorch and TensorFlow. There is an export function for each of these frameworks, [`~optimum.exporters.onnx.convert.export_pytorch`] and [`~optimum.exporters.onnx.convert.export_tensorflow`], but the recommended way of using those is via the main export function [`~optimum.exporters.main_export`], which will take care of using the proper exporting function according to the available framework, check that the exported model is valid, and provide extended options to run optimizations on the exported model. 16 | 17 | ## Main functions 18 | 19 | [[autodoc]] exporters.onnx.main_export 20 | 21 | [[autodoc]] exporters.onnx.onnx_export_from_model 22 | 23 | [[autodoc]] exporters.onnx.convert.export 24 | 25 | [[autodoc]] exporters.onnx.convert.export_pytorch 26 | 27 | [[autodoc]] exporters.onnx.convert.export_tensorflow 28 | 29 | 30 | ## Utility functions 31 | 32 | [[autodoc]] exporters.onnx.convert.check_dummy_inputs_are_allowed 33 | 34 | [[autodoc]] exporters.onnx.convert.validate_model_outputs 35 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/overview.mdx: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | 🤗 Optimum provides an integration with ONNX Runtime, a cross-platform, high performance engine for Open Neural Network Exchange (ONNX) models. 4 | 5 |
6 |
7 |
How-to guides
9 |

Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum to solve real-world problems.

10 |
11 |
Conceptual guides
13 |

High-level explanations for building a better understanding about important topics such as quantization and graph optimization.

14 |
15 |
Reference
17 |

Technical descriptions of how the ONNX Runtime classes and methods of 🤗 Optimum work.

18 |
19 |
20 |
21 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/layernorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Set, Tuple 15 | 16 | from onnx import ModelProto 17 | 18 | from onnxruntime.transformers.onnx_model import OnnxModel 19 | 20 | from .. import PreprocessorPass 21 | 22 | 23 | class ExcludeLayerNormNodes(PreprocessorPass): 24 | def __init__(self): 25 | super().__init__() 26 | 27 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 28 | layer_norm_subgraphs = [] 29 | for add_node in model.get_nodes_by_op_type("Add"): 30 | layer_norm_components = model.match_parent_path( 31 | add_node, 32 | ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Sub", "ReduceMean"], 33 | [0, 0, 1, 0, 0, 0, 0, 1], 34 | ) 35 | 36 | if layer_norm_components is not None: 37 | layer_norm_components.append(add_node) 38 | layer_norm_subgraphs.append(layer_norm_components) 39 | 40 | ln_components = (node.name for ln in layer_norm_subgraphs for node in ln) 41 | return set(), set(ln_components) 42 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/question-answering/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Question answering 18 | 19 | 20 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py) 21 | allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph 22 | optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks. 23 | 24 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 25 | the flag `--version_2_with_negative`. 26 | 27 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the SQuAD1.0 dataset. 28 | 29 | ```bash 30 | python run_qa.py \ 31 | --model_name_or_path distilbert-base-uncased-distilled-squad \ 32 | --dataset_name squad \ 33 | --quantization_approach dynamic \ 34 | --do_eval \ 35 | --output_dir /tmp/quantized_distilbert_squad 36 | ``` 37 | 38 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 39 | -------------------------------------------------------------------------------- /tests/onnxruntime/test_utils.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import unittest 3 | 4 | import onnxruntime as ort 5 | import torch 6 | 7 | from optimum.onnxruntime.configuration import AutoQuantizationConfig, OptimizationConfig, ORTConfig 8 | from optimum.onnxruntime.utils import get_device_for_provider, get_provider_for_device 9 | 10 | 11 | class ProviderAndDeviceGettersTest(unittest.TestCase): 12 | def test_get_device_for_provider(self): 13 | self.assertEqual(get_device_for_provider("CPUExecutionProvider", provider_options={}), torch.device("cpu")) 14 | self.assertEqual( 15 | get_device_for_provider("CUDAExecutionProvider", provider_options={"device_id": 1}), torch.device("cuda:1") 16 | ) 17 | 18 | def test_get_provider_for_device(self): 19 | self.assertEqual(get_provider_for_device(torch.device("cpu")), "CPUExecutionProvider") 20 | 21 | if "ROCMExecutionProvider" in ort.get_available_providers(): 22 | self.assertEqual(get_provider_for_device(torch.device("cuda")), "ROCMExecutionProvider") 23 | else: 24 | self.assertEqual(get_provider_for_device(torch.device("cuda")), "CUDAExecutionProvider") 25 | 26 | 27 | class ORTConfigTest(unittest.TestCase): 28 | def test_save_and_load(self): 29 | with tempfile.TemporaryDirectory() as tmp_dir: 30 | quantization_config = AutoQuantizationConfig.arm64(is_static=False, per_channel=False) 31 | optimization_config = OptimizationConfig(optimization_level=2) 32 | ort_config = ORTConfig(opset=11, quantization=quantization_config, optimization=optimization_config) 33 | ort_config.save_pretrained(tmp_dir) 34 | loaded_ort_config = ORTConfig.from_pretrained(tmp_dir) 35 | self.assertEqual(ort_config.to_dict(), loaded_ort_config.to_dict()) 36 | -------------------------------------------------------------------------------- /docs/source/torch_fx/overview.mdx: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | 🤗 Optimum provides an integration with Torch FX, a library for PyTorch that allows developers to implement custom transformations of their models that can be optimized for performance. 4 | 5 |
6 |
7 |
How-to guides
9 |

Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum to solve real-world problems.

10 |
11 |
Conceptual guides
13 |

High-level explanations for building a better understanding about important topics such as quantization and graph optimization.

14 |
15 |
Reference
17 |

Technical descriptions of how the Torch FX classes and methods of 🤗 Optimum work.

18 |
19 |
20 |
21 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration 14 | 15 | The configuration classes are the way to specify how a task should be done. There are two tasks supported with the ONNX Runtime package: 16 | 17 | 1. Optimization: Performed by the [`~onnxruntime.ORTOptimizer`], this task can be tweaked using an [`~onnxruntime.configuration.OptimizationConfig`]. 18 | 19 | 2. Quantization: Performed by the [`~onnxruntime.ORTQuantizer`], quantization can be set using a [`~onnxruntime.configuration.QuantizationConfig`]. A calibration step is required in some cases (post training static quantization), which can be specified using a [`~onnxruntime.configuration.CalibrationConfig`]. 20 | 21 | ## OptimizationConfig 22 | 23 | [[autodoc]] onnxruntime.configuration.OptimizationConfig 24 | 25 | [[autodoc]] onnxruntime.configuration.AutoOptimizationConfig 26 | 27 | ## QuantizationConfig 28 | 29 | [[autodoc]] onnxruntime.configuration.QuantizationConfig 30 | 31 | ## AutoQuantizationConfig 32 | 33 | [[autodoc]] onnxruntime.configuration.AutoQuantizationConfig 34 | - all 35 | 36 | ### CalibrationConfig 37 | 38 | [[autodoc]] onnxruntime.configuration.CalibrationConfig 39 | 40 | ## ORTConfig 41 | 42 | [[autodoc]] onnxruntime.configuration.ORTConfig 43 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/question-answering/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Question answering 18 | 19 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/question-answering/run_qa.py) 20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks. 21 | 22 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 23 | the flag `--version_2_with_negative`. 24 | 25 | The following example applies graph optimizations on a DistilBERT fine-tuned on the SQuAD1.0 dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 26 | 27 | ```bash 28 | python run_qa.py \ 29 | --model_name_or_path distilbert-base-uncased-distilled-squad \ 30 | --dataset_name squad \ 31 | --optimization_level 1 \ 32 | --do_eval \ 33 | --output_dir /tmp/optimized_distilbert_squad 34 | ``` 35 | 36 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 37 | -------------------------------------------------------------------------------- /docs/source/utils/normalized_config.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Normalized Configurations 14 | 15 | Model configuration classes in 🤗 Transformers are not standardized. Although Transformers implements an `attribute_map` attribute that mitigates the issue to some extent, it does not make it easy to reason on common configuration attributes in the code. 16 | [`~optimum.utils.normalized_config.NormalizedConfig`] classes try to fix that by allowing access to the configuration 17 | attribute they wrap in a standardized way. 18 | 19 | 20 | ## Base class 21 | 22 | 23 | 24 | While it is possible to create `NormalizedConfig` subclasses for common use-cases, it is also possible to overwrite 25 | the `original attribute name -> normalized attribute name` mapping directly using the 26 | [`~optimum.utils.normalized_config.NormalizedConfig.with_args`] class method. 27 | 28 | 29 | 30 | [[autodoc]] optimum.utils.normalized_config.NormalizedConfig 31 | 32 | 33 | ## Existing normalized configurations 34 | 35 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextConfig 36 | 37 | [[autodoc]] optimum.utils.normalized_config.NormalizedSeq2SeqConfig 38 | 39 | [[autodoc]] optimum.utils.normalized_config.NormalizedVisionConfig 40 | 41 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextAndVisionConfig 42 | -------------------------------------------------------------------------------- /.github/generate_dev_tests.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import yaml 4 | 5 | 6 | tests = [ 7 | "test_exporters.yml", 8 | "test_dummy_inputs.yml", 9 | "test_bettertransformer.yml", 10 | "test_onnx.yml", 11 | "test_fx.yml", 12 | "test_onnxruntime.yml", 13 | "test_benckmark.yml", 14 | "test_optimum_common.yml", 15 | ] 16 | 17 | for test_name in tests: 18 | new_name = "dev_" + test_name 19 | 20 | with open(Path("workflows", test_name), "r") as file: 21 | workflox_yml = yaml.load(file, yaml.BaseLoader) 22 | 23 | workflox_yml["name"] = "dev_" + workflox_yml["name"] 24 | workflox_yml["on"] = {"schedule": [{"cron": "0 7 * * *"}]} 25 | 26 | for i, step in enumerate(workflox_yml["jobs"]["build"]["steps"]): 27 | if "name" in step and step["name"] == "Install dependencies": 28 | workflox_yml["jobs"]["build"]["steps"][i][ 29 | "run" 30 | ] += "pip install -U git+https://github.com/huggingface/evaluate\npip install -U git+https://github.com/huggingface/diffusers\npip install -U git+https://github.com/huggingface/transformers\n" 31 | 32 | with open(Path("workflows", new_name), "w") as outfile: 33 | yaml.dump( 34 | workflox_yml, 35 | outfile, 36 | default_flow_style=False, 37 | allow_unicode=True, 38 | width=float("inf"), 39 | sort_keys=False, 40 | ) 41 | 42 | with open(Path("workflows", new_name), "r+") as outfile: 43 | workflox_yml = outfile.read() 44 | workflox_yml = "# This yml file is autogenerated. Do not edit.\n\n" + workflox_yml 45 | 46 | workflox_yml = workflox_yml.replace("'", "") 47 | workflox_yml = workflox_yml.replace("run:", "run: |\n ") 48 | 49 | workflox_yml = "\n".join([ll.rstrip() for ll in workflox_yml.splitlines() if ll.strip()]) 50 | 51 | outfile.seek(0) 52 | outfile.write(workflox_yml) 53 | outfile.truncate() 54 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | SHELL := /bin/bash 16 | CURRENT_DIR = $(shell pwd) 17 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum.git 18 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL 19 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) 20 | 21 | .PHONY: style test 22 | 23 | # Run code quality checks 24 | style_check: 25 | black --check . 26 | ruff . 27 | 28 | style: 29 | black . 30 | ruff . --fix 31 | 32 | # Run tests for the library 33 | test: 34 | python -m pytest tests 35 | 36 | # Utilities to release to PyPi 37 | build_dist_install_tools: 38 | pip install build 39 | pip install twine 40 | 41 | build_dist: 42 | rm -fr build 43 | rm -fr dist 44 | python -m build 45 | 46 | pypi_upload: build_dist 47 | python -m twine upload dist/* 48 | 49 | build_doc_docker_image: 50 | docker build -t doc_maker --build-arg commit_sha=$(COMMIT_SHA_OPTIMUM) --build-arg clone_url=$(REAL_CLONE_URL) ./docs 51 | 52 | doc: build_doc_docker_image 53 | @test -n "$(BUILD_DIR)" || (echo "BUILD_DIR is empty." ; exit 1) 54 | @test -n "$(VERSION)" || (echo "VERSION is empty." ; exit 1) 55 | docker run -v $(CURRENT_DIR):/doc_folder --workdir=/doc_folder doc_maker \ 56 | doc-builder build optimum /optimum/docs/source/ \ 57 | --build_dir $(BUILD_DIR) \ 58 | --version $(VERSION) \ 59 | --version_tag_suffix "" \ 60 | --html \ 61 | --clean 62 | -------------------------------------------------------------------------------- /optimum/onnxruntime/models/bloom.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING, Tuple 2 | 3 | 4 | if TYPE_CHECKING: 5 | import torch 6 | 7 | 8 | def bloom_convert_to_standard_cache( 9 | past_key_value: Tuple[Tuple["torch.Tensor", "torch.Tensor"]], batch_size: int 10 | ) -> Tuple[Tuple["torch.Tensor", "torch.Tensor"]]: 11 | """ 12 | Standardizes the format of the cache so as to match most implementations, i.e. to tuple(tuple([batch_size, 13 | num_heads, ...])) 14 | """ 15 | batch_size_times_num_heads, head_dim, seq_length = past_key_value[0][0].shape 16 | num_heads = batch_size_times_num_heads // batch_size 17 | # key: [batch_size * num_heads, head_dim, seq_length] -> [batch_size, num_heads, head_dim, seq_length] 18 | # value: [batch_size * num_heads, seq_length, head_dim] -> [batch_size, num_heads, seq_length, head_dim] 19 | return tuple( 20 | ( 21 | layer_past[0].view(batch_size, num_heads, head_dim, seq_length), 22 | layer_past[1].view(batch_size, num_heads, seq_length, head_dim), 23 | ) 24 | for layer_past in past_key_value 25 | ) 26 | 27 | 28 | def bloom_convert_to_bloom_cache( 29 | past_key_value: Tuple[Tuple["torch.Tensor", "torch.Tensor"]] 30 | ) -> Tuple[Tuple["torch.Tensor", "torch.Tensor"]]: 31 | """ 32 | Converts the cache to the format expected by Bloom, i.e. to tuple(tuple([batch_size * num_heads, ...])) 33 | """ 34 | batch_size, num_heads, head_dim, seq_length = past_key_value[0][0].shape 35 | batch_size_times_num_heads = batch_size * num_heads 36 | # key: [batch_size, num_heads, head_dim, seq_length] -> [batch_size * num_heads, head_dim, seq_length] 37 | # value: [batch_size, num_heads, seq_length, head_dim] -> [batch_size * num_heads, seq_length, head_dim] 38 | return tuple( 39 | ( 40 | layer_past[0].view(batch_size_times_num_heads, head_dim, seq_length), 41 | layer_past[1].view(batch_size_times_num_heads, seq_length, head_dim), 42 | ) 43 | for layer_past in past_key_value 44 | ) 45 | -------------------------------------------------------------------------------- /tests/test_modeling_base.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | import tempfile 4 | import unittest 5 | 6 | import requests as r 7 | import torch 8 | from transformers.configuration_utils import PretrainedConfig 9 | 10 | from optimum.modeling_base import OptimizedModel 11 | from optimum.utils.testing_utils import require_hf_token 12 | 13 | 14 | TEST_HUB_PATH = "philschmid/unit_test_model" 15 | TEST_LOCAL_PATH = "tests/assets/hub" 16 | 17 | 18 | class DummyModel(OptimizedModel): 19 | def _save_pretrained(self, save_directory, **kwargs): 20 | return 21 | 22 | @classmethod 23 | def _from_pretrained(cls, **kwargs): 24 | config = PretrainedConfig.from_dict(kwargs["config"]) 25 | model = cls(model=torch.nn.Module, config=config) 26 | return model 27 | 28 | def forward(self, *args, **kwargs): 29 | pass 30 | 31 | 32 | class TestOptimizedModel(unittest.TestCase): 33 | def test_load_model_from_hub(self): 34 | # TODO: figure out how to create repos and push stuff to staging 35 | if os.getenv("HUGGINGFACE_CO_STAGING", False): 36 | self.skipTest("Skip test on staging") 37 | 38 | dummy_model = DummyModel.from_pretrained(TEST_HUB_PATH) 39 | self.assertTrue(dummy_model.config.remote) 40 | 41 | @require_hf_token 42 | def test_push_to_hub(self): 43 | with tempfile.TemporaryDirectory() as tmpdirname: 44 | model = DummyModel.from_pretrained(TEST_LOCAL_PATH) 45 | # create remote hash to check if file was updated. 46 | remote_hash = random.getrandbits(128) 47 | model.config.from_local = remote_hash 48 | 49 | model.save_pretrained( 50 | tmpdirname, 51 | use_auth_token=os.environ.get("HF_AUTH_TOKEN", None), 52 | push_to_hub=True, 53 | repository_id="unit_test_save_model", 54 | ) 55 | # folder contains all config files and pytorch_model.bin 56 | url = "https://huggingface.co/philschmid/unit_test_save_model/raw/main/config.json" 57 | response = r.get(url) 58 | self.assertEqual(remote_hash, response.json()["from_local"]) 59 | -------------------------------------------------------------------------------- /optimum/utils/doc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from dataclasses import fields 17 | 18 | 19 | def generate_doc_dataclass(cls) -> str: 20 | """Class decorator for generate the documentation for dataclass.""" 21 | doc = "\f\nAttributes:\n" 22 | for attribute in fields(cls): 23 | doc += f" {attribute.name}" # attribute name 24 | 25 | # whether optional 26 | attribute_type = str(attribute.type) 27 | if attribute_type.startswith("typing.Optional"): 28 | optional = True 29 | type_display = attribute_type[attribute_type.find("[") + 1 : -1] 30 | type_display = type_display.split(".")[-1] 31 | else: 32 | optional = False 33 | 34 | if attribute_type.startswith("typing"): 35 | type_display = attribute_type.split(".")[-1] 36 | else: 37 | type_display = attribute.type.__name__ 38 | 39 | if optional: 40 | doc += f" (`{type_display}`, *optional*): " 41 | else: 42 | doc += f" (`{type_display}`): " 43 | 44 | doc += f"{attribute.metadata['description']}\n" # argument description 45 | cls.__doc__ = (cls.__doc__ if cls.__doc__ is not None else "") + "\n\n" + "".join(doc) 46 | return cls 47 | 48 | 49 | def add_dynamic_docstring( 50 | *docstr, 51 | text, 52 | dynamic_elements, 53 | ): 54 | def docstring_decorator(fn): 55 | func_doc = (fn.__doc__ or "") + "".join(docstr) 56 | fn.__doc__ = func_doc + text.format(**dynamic_elements) 57 | return fn 58 | 59 | return docstring_decorator 60 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/language-modeling/README.md: -------------------------------------------------------------------------------- 1 | 13 | 14 | # Language Modeling 15 | 16 | ## Language Modeling Training 17 | 18 | By running the scripts [`run_clm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_clm.py) 19 | and [`run_mlm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_mlm.py), 20 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the 21 | [HuggingFace hub](https://huggingface.co/models). 22 | 23 | 24 | __The following example applies the acceleration features powered by ONNX Runtime.__ 25 | 26 | 27 | ### ONNX Runtime Training 28 | 29 | The following example trains GPT2 on wikitext-2 with mixed precision (fp16). 30 | 31 | ```bash 32 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_clm.py \ 33 | --model_name_or_path gpt2 \ 34 | --dataset_name wikitext \ 35 | --dataset_config_name wikitext-2-raw-v1 \ 36 | --do_train \ 37 | --output_dir /tmp/test-clm \ 38 | --fp16 39 | ``` 40 | 41 | 42 | __Note__ 43 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 44 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 45 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 46 | 47 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 48 | --- 49 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/question-answering/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Question answering 18 | 19 | ## SQuAD Tasks 20 | 21 | By running the script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/question-answering/run_qa.py), 22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) to fine-tune the models from the 23 | [HuggingFace hub](https://huggingface.co/models) for question answering tasks such as SQuAD. 24 | 25 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 26 | the flag `--version_2_with_negative`. 27 | 28 | __The following example applies the acceleration features powered by ONNX Runtime.__ 29 | 30 | 31 | ### Onnxruntime Training 32 | 33 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset. 34 | 35 | ```bash 36 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_qa.py \ 37 | --model_name_or_path bert-base-uncased \ 38 | --dataset_name squad \ 39 | --do_train \ 40 | --do_eval \ 41 | --output_dir /tmp/ort_bert_squad/ 42 | ``` 43 | 44 | __Note__ 45 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 46 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 47 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 48 | 49 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 50 | --- -------------------------------------------------------------------------------- /examples/onnxruntime/training/image-classification/README.md: -------------------------------------------------------------------------------- 1 | 13 | 14 | # Image Classification 15 | 16 | By running the scripts [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/image-classification/run_image_classification.py) we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the 17 | [HuggingFace hub](https://huggingface.co/models). 18 | 19 | 20 | __The following example applies the acceleration features powered by ONNX Runtime.__ 21 | 22 | 23 | ### ONNX Runtime Training 24 | 25 | The following example trains ViT on beans dataset with mixed precision (fp16). 26 | 27 | ```bash 28 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_image_classification.py \ 29 | --model_name_or_path google/vit-base-patch16-224-in21k \ 30 | --dataset_name beans \ 31 | --output_dir ./beans_outputs/ \ 32 | --remove_unused_columns False \ 33 | --label_column_name labels \ 34 | --do_train \ 35 | --do_eval \ 36 | --learning_rate 2e-5 \ 37 | --num_train_epochs 10 \ 38 | --per_device_train_batch_size 32 \ 39 | --per_device_eval_batch_size 32 \ 40 | --logging_strategy steps \ 41 | --logging_steps 10 \ 42 | --evaluation_strategy epoch \ 43 | --seed 1337 44 | ``` 45 | 46 | 47 | __Note__ 48 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 49 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 50 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 51 | --- 52 | -------------------------------------------------------------------------------- /tests/onnx/test_onnx_export_custom_module.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import os 16 | from unittest import TestCase 17 | 18 | import pytest 19 | from transformers import is_torch_available 20 | from transformers.testing_utils import require_torch 21 | 22 | 23 | if is_torch_available(): 24 | import torch 25 | from transformers.models.deberta import modeling_deberta 26 | 27 | 28 | class StableDropoutTestCase(TestCase): 29 | """Tests export of StableDropout module.""" 30 | 31 | @require_torch 32 | @pytest.mark.filterwarnings("ignore:.*Dropout.*:UserWarning:torch.onnx.*") # torch.onnx is spammy. 33 | def test_training(self): 34 | """Tests export of StableDropout in training mode.""" 35 | devnull = open(os.devnull, "wb") 36 | # drop_prob must be > 0 for the test to be meaningful 37 | sd = modeling_deberta.StableDropout(0.1) 38 | # Avoid warnings in training mode 39 | do_constant_folding = False 40 | # Dropout is a no-op in inference mode 41 | training = torch.onnx.TrainingMode.PRESERVE 42 | input = (torch.randn(2, 2),) 43 | 44 | torch.onnx.export( 45 | sd, 46 | input, 47 | devnull, 48 | opset_version=12, # Minimum supported 49 | do_constant_folding=do_constant_folding, 50 | training=training, 51 | ) 52 | 53 | # Expected to fail with opset_version < 12 54 | with self.assertRaises(Exception): 55 | torch.onnx.export( 56 | sd, 57 | input, 58 | devnull, 59 | opset_version=11, 60 | do_constant_folding=do_constant_folding, 61 | training=training, 62 | ) 63 | -------------------------------------------------------------------------------- /optimum/utils/preprocessing/task_processors_manager.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Dataset processing factory.""" 16 | 17 | from typing import TYPE_CHECKING, Any, Type 18 | 19 | from optimum.utils.preprocessing.image_classification import ImageClassificationProcessing 20 | from optimum.utils.preprocessing.question_answering import QuestionAnsweringProcessing 21 | from optimum.utils.preprocessing.text_classification import TextClassificationProcessing 22 | from optimum.utils.preprocessing.token_classification import TokenClassificationProcessing 23 | 24 | 25 | if TYPE_CHECKING: 26 | from .base import DatasetProcessing 27 | 28 | 29 | class TaskProcessorsManager: 30 | _TASK_TO_DATASET_PROCESSING_CLASS = { 31 | "text-classification": TextClassificationProcessing, 32 | "token-classification": TokenClassificationProcessing, 33 | "question-answering": QuestionAnsweringProcessing, 34 | "image-classification": ImageClassificationProcessing, 35 | } 36 | 37 | @classmethod 38 | def get_task_processor_class_for_task(cls, task: str) -> Type: 39 | if task not in cls._TASK_TO_DATASET_PROCESSING_CLASS: 40 | supported_tasks = ", ".join(cls._TASK_TO_DATASET_PROCESSING_CLASS.keys()) 41 | raise KeyError( 42 | f"Could not find a `TaskProcessor` class for the task called {task}, supported tasks: " 43 | f"{supported_tasks}." 44 | ) 45 | return cls._TASK_TO_DATASET_PROCESSING_CLASS[task] 46 | 47 | @classmethod 48 | def for_task(cls, task: str, *dataset_processing_args, **dataset_processing_kwargs: Any) -> "DatasetProcessing": 49 | return cls.get_task_processor_class_for_task(task)(*dataset_processing_args, **dataset_processing_kwargs) 50 | -------------------------------------------------------------------------------- /tests/benchmark/memory_tracker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | from contextlib import contextmanager 4 | from multiprocessing import Pipe, Process 5 | from multiprocessing.connection import Connection 6 | 7 | 8 | # Adapted from optimum-benchmark, I don't trust pytorch peak memory memory info when external libs are used. 9 | class MemoryTracker: 10 | def __init__(self): 11 | self.peak_memory: int = 0 12 | self.device_index = int(os.environ["CUDA_VISIBLE_DEVICES"].split(",")[0]) 13 | 14 | @contextmanager 15 | def track(self, interval: float = 0.1): 16 | print(f"Tracking memory for device {self.device_index}") 17 | yield from self._track_peak_memory(interval) 18 | 19 | def _track_peak_memory(self, interval: float): 20 | child_connection, parent_connection = Pipe() 21 | # instantiate process 22 | mem_process: Process = PeakMemoryMeasureProcess(self.device_index, child_connection, interval) 23 | mem_process.start() 24 | # wait until we get memory 25 | parent_connection.recv() 26 | yield 27 | # start parent connection 28 | parent_connection.send(0) 29 | # receive peak memory 30 | self.peak_memory = parent_connection.recv() 31 | 32 | 33 | class PeakMemoryMeasureProcess(Process): 34 | def __init__(self, device_index: int, child_connection: Connection, interval: float): 35 | super().__init__() 36 | self.device_index = device_index 37 | self.interval = interval 38 | self.connection = child_connection 39 | self.mem_usage = 0 40 | 41 | def run(self): 42 | self.connection.send(0) 43 | stop = False 44 | 45 | command = f"nvidia-smi --query-gpu=memory.used --format=csv --id={self.device_index}" 46 | 47 | while True: 48 | # py3nvml is broken since it outputs only the reserved memory, and nvidia-smi has only the MiB precision. 49 | gpu_mem_mb = subprocess.check_output(command.split()).decode("ascii").split("\n")[1].split()[0] 50 | gpu_mem_mb = int(gpu_mem_mb) * 1.048576 51 | self.mem_usage = max(self.mem_usage, gpu_mem_mb) 52 | 53 | if stop: 54 | break 55 | stop = self.connection.poll(self.interval) 56 | 57 | # send results to parent pipe 58 | self.connection.send(self.mem_usage) 59 | self.connection.close() 60 | -------------------------------------------------------------------------------- /optimum/exporters/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import TYPE_CHECKING 17 | 18 | from transformers.utils import _LazyModule 19 | 20 | 21 | _import_structure = { 22 | "base": ["OnnxConfig", "OnnxConfigWithLoss", "OnnxConfigWithPast", "OnnxSeq2SeqConfigWithPast"], 23 | "config": ["TextDecoderOnnxConfig", "TextEncoderOnnxConfig", "TextSeq2SeqOnnxConfig"], 24 | "convert": [ 25 | "export", 26 | "export_models", 27 | "validate_model_outputs", 28 | "validate_models_outputs", 29 | "onnx_export_from_model", 30 | ], 31 | "utils": [ 32 | "get_decoder_models_for_export", 33 | "get_encoder_decoder_models_for_export", 34 | "get_stable_diffusion_models_for_export", 35 | "MODEL_TYPES_REQUIRING_POSITION_IDS", 36 | ], 37 | "__main__": ["main_export"], 38 | } 39 | 40 | if TYPE_CHECKING: 41 | from .base import OnnxConfig, OnnxConfigWithLoss, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast # noqa 42 | from .config import TextDecoderOnnxConfig, TextEncoderOnnxConfig, TextSeq2SeqOnnxConfig # noqa 43 | from .convert import ( 44 | export, 45 | export_models, 46 | validate_model_outputs, 47 | validate_models_outputs, 48 | onnx_export_from_model, 49 | ) # noqa 50 | from .utils import ( 51 | get_decoder_models_for_export, 52 | get_encoder_decoder_models_for_export, 53 | get_stable_diffusion_models_for_export, 54 | MODEL_TYPES_REQUIRING_POSITION_IDS, 55 | ) 56 | from .__main__ import main_export 57 | else: 58 | import sys 59 | 60 | sys.modules[__name__] = _LazyModule( 61 | __name__, 62 | globals()["__file__"], 63 | _import_structure, 64 | module_spec=__spec__, 65 | ) 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.DS_Store 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # Models 133 | *.onnx 134 | # include small test model for tests 135 | !tests/assets/onnx/model.onnx 136 | 137 | .vscode -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/quantization.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from abc import ABC, abstractmethod 15 | from logging import getLogger 16 | from os import PathLike 17 | from pathlib import Path 18 | from typing import Optional, Set, Tuple, Union 19 | 20 | from onnx import ModelProto, load_model 21 | 22 | from onnxruntime.transformers.onnx_model import OnnxModel 23 | 24 | 25 | LOGGER = getLogger("GraphWalker") 26 | 27 | 28 | class PreprocessorPass(ABC): 29 | def __init__(self): 30 | self._logger = LOGGER 31 | 32 | @abstractmethod 33 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Optional[Set[str]], Optional[Set[str]]]: 34 | raise NotImplementedError() 35 | 36 | 37 | class QuantizationPreprocessor: 38 | __slots__ = ("_passes",) 39 | 40 | def __init__(self): 41 | self._passes = [] 42 | 43 | def from_config(self, config): 44 | pass 45 | 46 | def register_pass(self, target: PreprocessorPass): 47 | if target not in self._passes: 48 | self._passes.append(target) 49 | 50 | def collect(self, model_or_path: Union[str, PathLike, Path, bytes]) -> Tuple[Set[str], Set[str]]: 51 | global_nodes_to_quantize, global_nodes_to_exclude = set(), set() 52 | graph = load_model(model_or_path.as_posix() if isinstance(model_or_path, Path) else model_or_path) 53 | model = OnnxModel(graph) 54 | 55 | for walking_pass in self._passes: 56 | nodes_to_quantize, nodes_to_exclude = walking_pass(graph, model) 57 | 58 | if nodes_to_quantize is not None: 59 | global_nodes_to_quantize.update(nodes_to_quantize) 60 | 61 | if nodes_to_exclude is not None: 62 | global_nodes_to_exclude.update(nodes_to_exclude) 63 | 64 | # Exclude the nodes from quantization when present in both sets 65 | global_nodes_to_quantize = global_nodes_to_quantize - global_nodes_to_exclude 66 | 67 | return global_nodes_to_quantize, global_nodes_to_exclude 68 | -------------------------------------------------------------------------------- /optimum/commands/env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import platform 16 | 17 | import huggingface_hub 18 | from transformers import __version__ as transformers_version 19 | from transformers.utils import is_tf_available, is_torch_available 20 | 21 | from ..version import __version__ as version 22 | from . import BaseOptimumCLICommand, CommandInfo 23 | 24 | 25 | class EnvironmentCommand(BaseOptimumCLICommand): 26 | COMMAND = CommandInfo(name="env", help="Get information about the environment used.") 27 | 28 | @staticmethod 29 | def format_dict(d): 30 | return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" 31 | 32 | def run(self): 33 | pt_version = "not installed" 34 | pt_cuda_available = "NA" 35 | if is_torch_available(): 36 | import torch 37 | 38 | pt_version = torch.__version__ 39 | pt_cuda_available = torch.cuda.is_available() 40 | 41 | tf_version = "not installed" 42 | tf_cuda_available = "NA" 43 | if is_tf_available(): 44 | import tensorflow as tf 45 | 46 | tf_version = tf.__version__ 47 | try: 48 | # deprecated in v2.1 49 | tf_cuda_available = tf.test.is_gpu_available() 50 | except AttributeError: 51 | # returns list of devices, convert to bool 52 | tf_cuda_available = bool(tf.config.list_physical_devices("GPU")) 53 | 54 | info = { 55 | "`optimum` version": version, 56 | "`transformers` version": transformers_version, 57 | "Platform": platform.platform(), 58 | "Python version": platform.python_version(), 59 | "Huggingface_hub version": huggingface_hub.__version__, 60 | "PyTorch version (GPU?)": f"{pt_version} (cuda availabe: {pt_cuda_available})", 61 | "Tensorflow version (GPU?)": f"{tf_version} (cuda availabe: {tf_cuda_available})", 62 | } 63 | 64 | print("\nCopy-and-paste the text below in your GitHub issue:\n") 65 | print(self.format_dict(info)) 66 | 67 | return info 68 | -------------------------------------------------------------------------------- /optimum/utils/dummy_diffusers_objects.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .import_utils import DummyObject, requires_backends 16 | 17 | 18 | class ORTStableDiffusionPipeline(metaclass=DummyObject): 19 | _backends = ["diffusers"] 20 | 21 | def __init__(self, *args, **kwargs): 22 | requires_backends(self, ["diffusers"]) 23 | 24 | @classmethod 25 | def from_pretrained(cls, *args, **kwargs): 26 | requires_backends(cls, ["diffusers"]) 27 | 28 | 29 | class ORTStableDiffusionImg2ImgPipeline(metaclass=DummyObject): 30 | _backends = ["diffusers"] 31 | 32 | def __init__(self, *args, **kwargs): 33 | requires_backends(self, ["diffusers"]) 34 | 35 | @classmethod 36 | def from_pretrained(cls, *args, **kwargs): 37 | requires_backends(cls, ["diffusers"]) 38 | 39 | 40 | class ORTStableDiffusionInpaintPipeline(metaclass=DummyObject): 41 | _backends = ["diffusers"] 42 | 43 | def __init__(self, *args, **kwargs): 44 | requires_backends(self, ["diffusers"]) 45 | 46 | @classmethod 47 | def from_pretrained(cls, *args, **kwargs): 48 | requires_backends(cls, ["diffusers"]) 49 | 50 | 51 | class ORTStableDiffusionXLPipeline(metaclass=DummyObject): 52 | _backends = ["diffusers"] 53 | 54 | def __init__(self, *args, **kwargs): 55 | requires_backends(self, ["diffusers"]) 56 | 57 | @classmethod 58 | def from_pretrained(cls, *args, **kwargs): 59 | requires_backends(cls, ["diffusers"]) 60 | 61 | 62 | class ORTStableDiffusionXLImg2ImgPipeline(metaclass=DummyObject): 63 | _backends = ["diffusers"] 64 | 65 | def __init__(self, *args, **kwargs): 66 | requires_backends(self, ["diffusers"]) 67 | 68 | @classmethod 69 | def from_pretrained(cls, *args, **kwargs): 70 | requires_backends(cls, ["diffusers"]) 71 | 72 | 73 | class ORTLatentConsistencyModelPipeline(metaclass=DummyObject): 74 | _backends = ["diffusers"] 75 | 76 | def __init__(self, *args, **kwargs): 77 | requires_backends(self, ["diffusers"]) 78 | 79 | @classmethod 80 | def from_pretrained(cls, *args, **kwargs): 81 | requires_backends(cls, ["diffusers"]) 82 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/summarization/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | ## Summarization 18 | 19 | By running the script [`run_summarization.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/summarization/run_summarization.py), 20 | you will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune and evaluate models from the 21 | [HuggingFace hub](https://huggingface.co/models) on summarization tasks. 22 | 23 | ### Supported models 24 | 25 | Theoretically, all sequence-to-sequence models with [ONNXConfig](https://github.com/huggingface/transformers/blob/main/src/transformers/onnx/features.py) support in Transformers shall work, here are the models that the Optimum team has tested and validated. 26 | 27 | * `Bart` 28 | * `T5` 29 | 30 | `run_summarization.py` is a lightweight example of how to download and preprocess a dataset from the 🤗 Datasets library or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it. 31 | 32 | 33 | __The following example applies the acceleration features powered by ONNX Runtime.__ 34 | 35 | 36 | ### Onnx Runtime Training 37 | 38 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset. 39 | 40 | ```bash 41 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_summarization.py \ 42 | --model_name_or_path t5-small \ 43 | --dataset_name cnn_dailymail \ 44 | --dataset_config "3.0.0" \ 45 | --source_prefix "summarize: " \ 46 | --do_train \ 47 | --do_eval \ 48 | --per_device_train_batch_size=4 \ 49 | --per_device_eval_batch_size=4 \ 50 | --output_dir /tmp/ort_summarization_t5/ \ 51 | --overwrite_output_dir \ 52 | --predict_with_generate 53 | ``` 54 | 55 | __Note__ 56 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 57 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 58 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 59 | 60 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 61 | --- -------------------------------------------------------------------------------- /docs/source/exporters/onnx/package_reference/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration classes for ONNX exports 14 | 15 | Exporting a model to ONNX involves specifying: 16 | 1. The input names. 17 | 2. The output names. 18 | 3. The dynamic axes. These refer to the input dimensions can be changed dynamically at runtime (e.g. a batch size or sequence length). 19 | All other axes will be treated as static, and hence fixed at runtime. 20 | 4. Dummy inputs to trace the model. This is needed in PyTorch to record the computational graph and convert it to ONNX. 21 | 22 | Since this data depends on the choice of model and task, we represent it in terms of _configuration classes_. Each configuration class is associated with 23 | a specific model architecture, and follows the naming convention `ArchitectureNameOnnxConfig`. For instance, the configuration which specifies the ONNX 24 | export of BERT models is `BertOnnxConfig`. 25 | 26 | Since many architectures share similar properties for their ONNX configuration, 🤗 Optimum adopts a 3-level class hierarchy: 27 | 1. Abstract and generic base classes. These handle all the fundamental features, while being agnostic to the modality (text, image, audio, etc). 28 | 2. Middle-end classes. These are aware of the modality, but multiple can exist for the same modality depending on the inputs they support. 29 | They specify which input generators should be used for the dummy inputs, but remain model-agnostic. 30 | 3. Model-specific classes like the `BertOnnxConfig` mentioned above. These are the ones actually used to export models. 31 | 32 | 33 | ## Base classes 34 | 35 | [[autodoc]] exporters.onnx.OnnxConfig 36 | - inputs 37 | - outputs 38 | - generate_dummy_inputs 39 | 40 | [[autodoc]] exporters.onnx.OnnxConfigWithPast 41 | - add_past_key_values 42 | 43 | [[autodoc]] exporters.onnx.OnnxSeq2SeqConfigWithPast 44 | 45 | ## Middle-end classes 46 | 47 | ### Text 48 | 49 | [[autodoc]] exporters.onnx.config.TextEncoderOnnxConfig 50 | 51 | [[autodoc]] exporters.onnx.config.TextDecoderOnnxConfig 52 | 53 | [[autodoc]] exporters.onnx.config.TextSeq2SeqOnnxConfig 54 | 55 | 56 | ### Vision 57 | 58 | [[autodoc]] exporters.onnx.config.VisionOnnxConfig 59 | 60 | 61 | ### Multi-modal 62 | 63 | [[autodoc]] exporters.onnx.config.TextAndVisionOnnxConfig 64 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/token-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Token classification 18 | 19 | ## NER Tasks 20 | 21 | By running the script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/token-classification/run_ner.py), 22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune the models from the 23 | [HuggingFace hub](https://huggingface.co/models) for token classification tasks such as Named Entity Recognition (NER). 24 | 25 | 26 | __The following example applies the acceleration features powered by ONNX Runtime.__ 27 | 28 | 29 | ### ONNX Runtime Training 30 | 31 | The following example fine-tunes a BERT on the sst-2 task. 32 | 33 | ```bash 34 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_ner.py \ 35 | --model_name_or_path bert-base-cased \ 36 | --dataset_name conll2003 \ 37 | --do_train \ 38 | --do_eval \ 39 | --output_dir /tmp/ort_bert_conll2003/ 40 | ``` 41 | 42 | ### Performance 43 | 44 | We get the following results for [bert-large-cased](https://huggingface.co/bert-large-cased) model mixed precision training(fp16) on the previous 45 | task under PyTorch and ONNX Runtime backends. A single Nvidia A100 card was used to run the experiment for 7 epochs: 46 | 47 | | Model | Backend | Runtime(s) | Train samples(/s) | 48 | | ---------------- | ------------ | ---------- | ----------------- | 49 | | bert-large-cased | PyTorch | 711.5 | 138.1 | 50 | | bert-large-cased | ONNX Runtime | 637.2 | 154.3 | 51 | 52 | We observe the gain of ONNX Runtime compared to PyTorch as follow: 53 | 54 | | | Latency | Throughput | 55 | | ----- | ------- | ---------- | 56 | | Gain | 10.45% | 11.67% | 57 | 58 | 59 | __Note__ 60 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 61 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 62 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 63 | 64 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 65 | --- 66 | -------------------------------------------------------------------------------- /optimum/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from .constant import ( 17 | CONFIG_NAME, 18 | DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, 19 | DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, 20 | DIFFUSION_MODEL_UNET_SUBFOLDER, 21 | DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, 22 | DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, 23 | ONNX_WEIGHTS_NAME, 24 | ) 25 | from .import_utils import ( 26 | DIFFUSERS_MINIMUM_VERSION, 27 | ORT_QUANTIZE_MINIMUM_VERSION, 28 | TORCH_MINIMUM_VERSION, 29 | TRANSFORMERS_MINIMUM_VERSION, 30 | check_if_diffusers_greater, 31 | check_if_pytorch_greater, 32 | check_if_transformers_greater, 33 | is_accelerate_available, 34 | is_auto_gptq_available, 35 | is_diffusers_available, 36 | is_onnx_available, 37 | is_onnxruntime_available, 38 | is_pydantic_available, 39 | is_sentence_transformers_available, 40 | is_timm_available, 41 | is_torch_onnx_support_available, 42 | require_numpy_strictly_lower, 43 | torch_version, 44 | ) 45 | from .input_generators import ( 46 | DEFAULT_DUMMY_SHAPES, 47 | DTYPE_MAPPER, 48 | BloomDummyPastKeyValuesGenerator, 49 | DummyAudioInputGenerator, 50 | DummyBboxInputGenerator, 51 | DummyCodegenDecoderTextInputGenerator, 52 | DummyDecoderTextInputGenerator, 53 | DummyEncodecInputGenerator, 54 | DummyInputGenerator, 55 | DummyIntGenerator, 56 | DummyLabelsGenerator, 57 | DummyPastKeyValuesGenerator, 58 | DummyPix2StructInputGenerator, 59 | DummyPointsGenerator, 60 | DummySeq2SeqDecoderTextInputGenerator, 61 | DummySeq2SeqPastKeyValuesGenerator, 62 | DummySpeechT5InputGenerator, 63 | DummyTextInputGenerator, 64 | DummyTimestepInputGenerator, 65 | DummyVisionEmbeddingsGenerator, 66 | DummyVisionEncoderDecoderPastKeyValuesGenerator, 67 | DummyVisionInputGenerator, 68 | DummyXPathSeqInputGenerator, 69 | FalconDummyPastKeyValuesGenerator, 70 | GemmaDummyPastKeyValuesGenerator, 71 | GPTBigCodeDummyPastKeyValuesGenerator, 72 | MistralDummyPastKeyValuesGenerator, 73 | MultiQueryPastKeyValuesGenerator, 74 | ) 75 | from .modeling_utils import recurse_getattr, recurse_setattr 76 | from .normalized_config import ( 77 | NormalizedConfig, 78 | NormalizedConfigManager, 79 | NormalizedEncoderDecoderConfig, 80 | NormalizedSeq2SeqConfig, 81 | NormalizedTextAndVisionConfig, 82 | NormalizedTextConfig, 83 | NormalizedVisionConfig, 84 | ) 85 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41B Bug Report" 2 | description: Submit a bug report to help us improve Optimum 3 | labels: [ "bug" ] 4 | body: 5 | - type: textarea 6 | id: system-info 7 | attributes: 8 | label: System Info 9 | description: Please share your system info with us. 10 | render: shell 11 | placeholder: optimum version, platform, python version, ... 12 | validations: 13 | required: true 14 | 15 | - type: textarea 16 | id: who-can-help 17 | attributes: 18 | label: Who can help? 19 | description: | 20 | Your issue will be replied to more quickly if you can figure out the right person to tag with @ 21 | If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**. 22 | Please tag fewer than 3 people. 23 | 24 | - Pipelines: `@philschmid` 25 | - Export of transformers model to ONNX/TFLite: `@michaelbenayoun` 26 | - ONNX Runtime: `@JingyaHuang`, `@echarlaix` 27 | - Intel Neural Compressor: `@echarlaix` 28 | - Habana: `@regisss` 29 | 30 | placeholder: "@Username ..." 31 | 32 | - type: checkboxes 33 | id: information-scripts-examples 34 | attributes: 35 | label: Information 36 | description: 'The problem arises when using:' 37 | options: 38 | - label: "The official example scripts" 39 | - label: "My own modified scripts" 40 | 41 | - type: checkboxes 42 | id: information-tasks 43 | attributes: 44 | label: Tasks 45 | description: "The tasks I am working on are:" 46 | options: 47 | - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)" 48 | - label: "My own task or dataset (give details below)" 49 | 50 | - type: textarea 51 | id: reproduction 52 | validations: 53 | required: true 54 | attributes: 55 | label: Reproduction (minimal, reproducible, runnable) 56 | description: | 57 | Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet. 58 | If you have code snippets, error messages, stack traces please provide them here as well. 59 | Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting 60 | Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code. 61 | Providing a **minimal**, **reproducible** reproduction using a **publicly available model** significantly increase the chances of a fix in a timely manner. 62 | 63 | placeholder: | 64 | Providing a minimal, reproducible reproduction using a publicly available model significantly increase the chances of a fix in a timely manner. 65 | 66 | 67 | - type: textarea 68 | id: expected-behavior 69 | validations: 70 | required: true 71 | attributes: 72 | label: Expected behavior 73 | description: "A clear and concise description of what you would expect to happen." 74 | --------------------------------------------------------------------------------