├── tests
    ├── __init__.py
    ├── exporters
    │   ├── __init__.py
    │   ├── onnx
    │   │   └── __init__.py
    │   ├── tflite
    │   │   └── __init__.py
    │   └── Dockerfile_exporters_gpu
    ├── assets
    │   ├── hub
    │   │   └── config.json
    │   └── onnx
    │   │   ├── model.onnx
    │   │   └── config.json
    ├── run_doctest.sh
    ├── README.md
    ├── gptq
    │   └── Dockerfile_quantization_gpu
    ├── bettertransformer
    │   └── Dockerfile_bettertransformer_gpu
    ├── onnxruntime
    │   ├── docker
    │   │   └── Dockerfile_onnxruntime_gpu
    │   ├── ds_configs
    │   │   ├── ds_config_zero_stage_inifinity.json
    │   │   ├── ds_config_zero_stage_1.json
    │   │   ├── ds_config_zero_stage_2.json
    │   │   └── ds_config_zero_stage_3.json
    │   └── test_utils.py
    ├── cli
    │   └── cli_with_custom_command.py
    ├── test_modeling_base.py
    ├── onnx
    │   └── test_onnx_export_custom_module.py
    └── benchmark
    │   └── memory_tracker.py
├── optimum
    ├── onnxruntime
    │   ├── models
    │   │   ├── __init__.py
    │   │   └── bloom.py
    │   ├── runs
    │   │   └── utils.py
    │   ├── io_binding
    │   │   └── __init__.py
    │   ├── preprocessors
    │   │   ├── __init__.py
    │   │   ├── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── fully_connected.py
    │   │   │   ├── gelu.py
    │   │   │   └── layernorm.py
    │   │   └── quantization.py
    │   ├── constants.py
    │   ├── graph.py
    │   └── training_args_seq2seq.py
    ├── utils
    │   ├── dummy_bettertransformer_objects.py
    │   ├── constant.py
    │   ├── preprocessing
    │   │   ├── __init__.py
    │   │   └── task_processors_manager.py
    │   ├── modeling_utils.py
    │   ├── doc.py
    │   ├── dummy_diffusers_objects.py
    │   └── __init__.py
    ├── commands
    │   ├── register
    │   │   ├── __init__.py
    │   │   └── README.md
    │   ├── export
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── onnxruntime
    │   │   ├── __init__.py
    │   │   └── base.py
    │   ├── __init__.py
    │   └── env.py
    ├── version.py
    ├── gptq
    │   ├── __init__.py
    │   └── constants.py
    ├── fx
    │   ├── __init__.py
    │   ├── quantization
    │   │   └── __init__.py
    │   ├── optimization
    │   │   └── __init__.py
    │   └── utils.py
    ├── exporters
    │   ├── __init__.py
    │   ├── base.py
    │   ├── error_utils.py
    │   ├── onnx
    │   │   ├── constants.py
    │   │   └── __init__.py
    │   └── tflite
    │   │   ├── __init__.py
    │   │   └── config.py
    ├── bettertransformer
    │   └── __init__.py
    ├── pipelines
    │   ├── __init__.py
    │   └── diffusers
    │   │   └── watermark.py
    ├── quantization_base.py
    ├── onnx
    │   └── __init__.py
    └── conftest.py
├── docs
    ├── source
    │   ├── notebooks.md
    │   ├── nvidia_overview.mdx
    │   ├── torch_fx
    │   │   ├── concept_guides
    │   │   │   └── symbolic_tracer.mdx
    │   │   ├── package_reference
    │   │   │   └── optimization.mdx
    │   │   └── overview.mdx
    │   ├── onnxruntime
    │   │   ├── package_reference
    │   │   │   ├── optimization.mdx
    │   │   │   ├── quantization.mdx
    │   │   │   ├── trainer.mdx
    │   │   │   └── configuration.mdx
    │   │   ├── concept_guides
    │   │   │   └── onnx.mdx
    │   │   └── overview.mdx
    │   ├── exporters
    │   │   ├── overview.mdx
    │   │   ├── tflite
    │   │   │   ├── package_reference
    │   │   │   │   ├── export.mdx
    │   │   │   │   └── configuration.mdx
    │   │   │   ├── usage_guides
    │   │   │   │   └── contribute.mdx
    │   │   │   └── overview.mdx
    │   │   └── onnx
    │   │   │   └── package_reference
    │   │   │       ├── export.mdx
    │   │   │       └── configuration.mdx
    │   ├── _redirects.yml
    │   └── utils
    │   │   ├── dummy_input_generators.mdx
    │   │   └── normalized_config.mdx
    ├── Dockerfile
    └── conftest.py
├── examples
    └── onnxruntime
    │   ├── quantization
    │       ├── image-classification
    │       │   ├── requirements.txt
    │       │   └── README.md
    │       ├── question-answering
    │       │   ├── requirements.txt
    │       │   └── README.md
    │       ├── token-classification
    │       │   ├── requirements.txt
    │       │   └── README.md
    │       ├── multiple-choice
    │       │   ├── requirements.txt
    │       │   └── README.md
    │       └── text-classification
    │       │   ├── requirements.txt
    │       │   └── README.md
    │   ├── optimization
    │       ├── question-answering
    │       │   ├── requirements.txt
    │       │   └── README.md
    │       ├── token-classification
    │       │   ├── requirements.txt
    │       │   └── README.md
    │       ├── multiple-choice
    │       │   ├── requirements.txt
    │       │   └── README.md
    │       └── text-classification
    │       │   ├── requirements.txt
    │       │   └── README.md
    │   └── training
    │       ├── text-classification
    │           ├── requirements.txt
    │           └── zero_stage_2.json
    │       ├── translation
    │           └── requirements.txt
    │       ├── question-answering
    │           ├── requirements.txt
    │           └── README.md
    │       ├── stable-diffusion
    │           └── text-to-image
    │           │   └── requirements.txt
    │       ├── image-classification
    │           ├── requirements.txt
    │           └── README.md
    │       ├── token-classification
    │           ├── requirements.txt
    │           └── README.md
    │       ├── summarization
    │           ├── requirements.txt
    │           └── README.md
    │       ├── language-modeling
    │           ├── requirements.txt
    │           └── README.md
    │       └── docker
    │           └── Dockerfile-ort-nightly-rocm57
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── feature-request.yml
    │   └── bug-report.yml
    ├── workflows
    │   ├── upload_pr_documentation.yml
    │   ├── test_bettertransformer_gpu.yml
    │   ├── doctests.yml
    │   ├── test_exporters_common.yml
    │   ├── test_onnx.yml
    │   ├── test_onnxruntime_slow.yml
    │   ├── test_export_onnx_timm.yml
    │   ├── test_onnxruntime_train.yml
    │   ├── test_fx.yml
    │   ├── test_export_onnx_cli_timm.yml
    │   ├── test_export_onnx_cli.yml
    │   ├── test_exporters_gpu.yml
    │   ├── test_gptq.yml
    │   ├── test_export_tflite.yml
    │   ├── test_onnxruntime_gpu.yml
    │   ├── test_export_tflite_cli.yml
    │   ├── dev_test_exporters.yml
    │   ├── test_export_tflite_cli_quantization_fp16.yml
    │   ├── test_benckmark.yml
    │   ├── dev_test_onnx.yml
    │   ├── test_export_tflite_cli_dynamic_quantization_int8.yml
    │   ├── test_dummy_inputs.yml
    │   ├── test_export_tflite_cli_quantization_int8x16.yml
    │   ├── dev_test_benckmark.yml
    │   ├── test_cli.yml
    │   ├── test_export_tflite_cli_quantization_full_int8.yml
    │   ├── dev_test_dummy_inputs.yml
    │   ├── test_export_tflite_cli_quantization_int8_custom_dataset.yml
    │   ├── test_export_tflite_cli_quantization_int8_default_dataset.yml
    │   ├── dev_test_fx.yml
    │   ├── test_exporters_slow.yml
    │   ├── dev_test_bettertransformer.yml
    │   ├── dev_test_onnxruntime.yml
    │   ├── test_export_onnx.yml
    │   ├── check_code_quality.yml
    │   ├── test_bettertransformer.yml
    │   ├── test_optimum_common.yml
    │   ├── test_offline.yml
    │   └── dev_test_optimum_common.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── generate_dev_tests.py
├── setup.cfg
├── MANIFEST.in
├── pyproject.toml
├── Makefile
└── .gitignore


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/exporters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/exporters/onnx/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/exporters/tflite/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/models/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tests/assets/hub/config.json:
--------------------------------------------------------------------------------
1 | {"from_local":true}


--------------------------------------------------------------------------------
/docs/source/notebooks.md:
--------------------------------------------------------------------------------
1 | ../../notebooks/README.md


--------------------------------------------------------------------------------
/tests/assets/onnx/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/merveenoyan/optimum/main/tests/assets/onnx/model.onnx


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/image-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.5.0
2 | torchvision>=0.6.0
3 | datasets>=1.17.0
4 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | torch >= 1.9.0
3 | onnx
4 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | torch >= 1.9.0
3 | onnx
4 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/token-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | seqeval
2 | datasets >= 1.18.0
3 | torch >= 1.9
4 | onnx
5 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/token-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | seqeval
2 | datasets >= 1.8.0
3 | torch >= 1.9
4 | onnx
5 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/training/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | 


--------------------------------------------------------------------------------
/docs/source/nvidia_overview.mdx:
--------------------------------------------------------------------------------
1 | # 🤗 Optimum Nvidia
2 | 
3 | Find more information about 🤗 Optimum Nvidia [here](https://github.com/huggingface/optimum-nvidia).
4 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/translation/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.18.0
2 | sentencepiece != 0.1.92
3 | protobuf
4 | sacrebleu >= 1.4.12
5 | py7zr
6 | torch >= 1.8


--------------------------------------------------------------------------------
/docs/source/torch_fx/concept_guides/symbolic_tracer.mdx:
--------------------------------------------------------------------------------
1 | # Symbolic tracer
2 | 
3 | In Torch FX, the symbolic tracer feeds dummy values through the code to record the underlying operations.


--------------------------------------------------------------------------------
/examples/onnxruntime/training/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9.0
7 | torch-ort
8 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/multiple-choice/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/multiple-choice/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/training/stable-diffusion/text-to-image/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | transformers>=4.25.1
3 | datasets
4 | git+https://github.com/huggingface/diffusers
5 | ftfy
6 | tensorboard
7 | Jinja2
8 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/image-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.12.0
2 | torch>=1.5.0
3 | torchvision>=0.6.0
4 | datasets>=1.17.0
5 | evaluate
6 | onnx>=1.9.0
7 | onnxruntime-training>=1.9.0
8 | torch-ort
9 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/token-classification/requirements.txt:
--------------------------------------------------------------------------------
 1 | datasets >= 1.18.3
 2 | scipy
 3 | scikit-learn
 4 | sentencepiece != 0.1.92
 5 | seqeval
 6 | torch >= 1.8.1
 7 | seqeval
 8 | sentencepiece != 0.1.92
 9 | torch >= 1.9
10 | torch-ort
11 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/summarization/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | evaluate
 3 | datasets >= 1.8.0
 4 | sentencepiece != 0.1.92
 5 | scipy
 6 | scikit-learn
 7 | protobuf
 8 | rouge-score
 9 | nltk
10 | py7zr
11 | torch >= 1.9.0
12 | torch-ort
13 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/language-modeling/requirements.txt:
--------------------------------------------------------------------------------
 1 | datasets >= 1.8.0
 2 | sentencepiece != 0.1.92
 3 | scipy
 4 | scikit-learn
 5 | protobuf == 3.20.2
 6 | torch >= 1.9.0
 7 | transformers>=4.16.0
 8 | onnx>=1.9.0
 9 | onnxruntime-training>=1.9.0
10 | torch-ort
11 | 


--------------------------------------------------------------------------------
/optimum/utils/dummy_bettertransformer_objects.py:
--------------------------------------------------------------------------------
1 | from .import_utils import DummyObject, requires_backends
2 | 
3 | 
4 | class BarkSelfAttention(metaclass=DummyObject):
5 |     _backends = ["transformers_431"]
6 | 
7 |     def __init__(self, *args, **kwargs):
8 |         requires_backends(self, ["transformers_431"])
9 | 


--------------------------------------------------------------------------------
/tests/run_doctest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | pip install accelerate
3 | pip install .[benchmark]
4 | touch optimum/__init__.py
5 | python tests/utils/prepare_for_doc_test.py optimum docs
6 | pytest --verbose -s --doctest-modules $(cat tests/utils/documentation_tests.txt) --doctest-continue-on-failure --doctest-glob='*.mdx'
7 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | version: 2.1
3 | contact_links:
4 |   - name: Website Related
5 |     url: https://github.com/huggingface/hub-docs/issues
6 |     about: Feature requests and bug reports related to the website
7 |   - name: Forum
8 |     url: https://discuss.huggingface.co/
9 |     about: General usage questions and community discussions


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = optimum
 7 | line_length = 119
 8 | lines_after_imports = 2
 9 | multi_line_output = 3
10 | use_parentheses = True
11 | 
12 | [flake8]
13 | ignore = E203, E501, E741, W503, W605
14 | max-line-length = 119
15 | 
16 | [tool:pytest]
17 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
18 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/text-classification/zero_stage_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "steps_per_print": 100,
 3 |     "zero_optimization": {
 4 |         "stage": 2
 5 |     },
 6 |     "zero_allow_untested_optimizer": true,
 7 |     "fp16": {
 8 |         "enabled": true,
 9 |         "initial_scale_power": 12
10 |     },
11 |     "tensorboard":{
12 |         "enabled": false
13 |     },
14 |     "train_micro_batch_size_per_gpu": "auto",
15 |     "gradient_accumulation_steps": "auto"
16 | }


--------------------------------------------------------------------------------
/docs/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nikolaik/python-nodejs:python3.8-nodejs18
 2 | 
 3 | ARG commit_sha
 4 | ARG clone_url
 5 | 
 6 | RUN apt -y update
 7 | RUN python3 -m pip install --no-cache-dir --upgrade pip
 8 | RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder.git
 9 | 
10 | RUN git clone $clone_url && cd optimum && git checkout $commit_sha
11 | RUN python3 -m pip install --no-cache-dir ./optimum[onnxruntime,benchmark,quality,exporters-tf,doc-build,diffusers]
12 | 


--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Upload PR Documentation
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Build PR documentation"]
 6 |     types:
 7 |       - completed
 8 | 
 9 | jobs:
10 |   build:
11 |     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12 |     with:
13 |       package_name: optimum
14 |     secrets:
15 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16 |       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
17 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Helpful tips for testing & debugging optimum
 2 | 
 3 | ## VSCODE
 4 | 
 5 | If you are using vscode you might have hard time discovering the test for the "testing" menu to run tests individually or debug them. You can copy the snippet below into `.vscode/settings.json`. 
 6 | 
 7 | ```json
 8 | {
 9 |   "python.testing.pytestArgs": [
10 |       "tests/onnxruntime",
11 |       "tests/test_*"
12 |   ],
13 |   "python.testing.unittestEnabled": false,
14 |   "python.testing.pytestEnabled": true
15 | }
16 | ```
17 | 
18 | This snippet will discover all base tests and the tests inside the `tests/onnxruntime` folder.
19 | 


--------------------------------------------------------------------------------
/optimum/commands/register/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | include README.md
16 | include LICENSE
17 | 


--------------------------------------------------------------------------------
/optimum/version.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | __version__ = "1.20.0.dev0"
16 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/runs/utils.py:
--------------------------------------------------------------------------------
 1 | from ..modeling_decoder import ORTModelForCausalLM
 2 | from ..modeling_ort import (
 3 |     ORTModelForFeatureExtraction,
 4 |     ORTModelForImageClassification,
 5 |     ORTModelForQuestionAnswering,
 6 |     ORTModelForSequenceClassification,
 7 |     ORTModelForTokenClassification,
 8 | )
 9 | 
10 | 
11 | task_ortmodel_map = {
12 |     "text-generation": ORTModelForCausalLM,
13 |     "feature-extraction": ORTModelForFeatureExtraction,
14 |     "image-classification": ORTModelForImageClassification,
15 |     "question-answering": ORTModelForQuestionAnswering,
16 |     "text-classification": ORTModelForSequenceClassification,
17 |     "token-classification": ORTModelForTokenClassification,
18 | }
19 | 


--------------------------------------------------------------------------------
/optimum/gptq/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from .quantizer import GPTQQuantizer, load_quantized_model
16 | 


--------------------------------------------------------------------------------
/optimum/fx/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | #  Licensed under the Apache License, Version 2.0 (the "License");
 5 | #  you may not use this file except in compliance with the License.
 6 | #  You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | #  Unless required by applicable law or agreed to in writing, software
11 | #  distributed under the License is distributed on an "AS IS" BASIS,
12 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | #  See the License for the specific language governing permissions and
14 | #  limitations under the License.
15 | from . import optimization, quantization
16 | 


--------------------------------------------------------------------------------
/optimum/fx/quantization/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from .functions import fuse_fx, prepare_fx, prepare_qat_fx
15 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/io_binding/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from .io_binding_helper import IOBindingHelper, TypeHelper
15 | 


--------------------------------------------------------------------------------
/optimum/exporters/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from . import onnx  # noqa
16 | from .tasks import TasksManager  # noqa
17 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | from .quantization import PreprocessorPass, QuantizationPreprocessor
16 | 


--------------------------------------------------------------------------------
/.github/workflows/test_bettertransformer_gpu.yml:
--------------------------------------------------------------------------------
 1 | name: BetterTransformer GPU / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 1 */3 * * # at 1am every 3 days
 7 | 
 8 | jobs:
 9 |   do-the-job:
10 |     name: Start self-hosted EC2 runner
11 |     runs-on: [single-gpu, nvidia-gpu, t4, ci]
12 |     env:
13 |       AWS_REGION: us-east-1
14 |     steps:
15 |       - name: Checkout
16 |         uses: actions/checkout@v2
17 |       - name: Build image
18 |         run: |
19 |           docker build -f tests/bettertransformer/Dockerfile_bettertransformer_gpu -t bettertransformer-gpu .
20 |       - name: Test with unittest within docker container
21 |         run: |
22 |           docker run --rm --gpus all --workdir=/workspace/optimum/tests bettertransformer-gpu:latest
23 | 


--------------------------------------------------------------------------------
/optimum/bettertransformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace and Meta Team.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | from .models import BetterTransformerManager
15 | from .transformation import BetterTransformer
16 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/optimization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Optimization
14 | 
15 | ## ORTOptimizer
16 | 
17 | [[autodoc]] onnxruntime.optimization.ORTOptimizer
18 |     - all


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/quantization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Quantization
14 | 
15 | ## ORTQuantizer
16 | 
17 | [[autodoc]] onnxruntime.quantization.ORTQuantizer
18 |     - all
19 | 


--------------------------------------------------------------------------------
/optimum/exporters/base.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Base exporters config."""
16 | 
17 | from abc import ABC
18 | 
19 | 
20 | class ExportConfig(ABC):
21 |     pass
22 | 


--------------------------------------------------------------------------------
/optimum/commands/export/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .base import ExportCommand
17 | from .onnx import ONNXExportCommand
18 | from .tflite import TFLiteExportCommand
19 | 


--------------------------------------------------------------------------------
/.github/workflows/doctests.yml:
--------------------------------------------------------------------------------
 1 | name: Optimum common / Doctest
 2 | # Note: this test uses transformers stable and optimum dev
 3 | 
 4 | on:
 5 |   workflow_dispatch:
 6 |   schedule:
 7 |     - cron: 0 1 * * 0 # every sunday at 1am
 8 | 
 9 | jobs:
10 |   do-the-job:
11 |     name: Start self-hosted EC2 runner
12 |     runs-on: [single-gpu, nvidia-gpu, t4, ci]
13 |     env:
14 |       AWS_REGION: us-east-1
15 |     steps:
16 |       - name: Checkout
17 |         uses: actions/checkout@v2
18 |       - name: Build image
19 |         run: |
20 |           docker build -f tests/onnxruntime/Dockerfile_onnxruntime_gpu -t onnxruntime-gpu .
21 |       - name: Test with unittest within docker container
22 |         run: |
23 |           docker run --rm --gpus all --workdir=/workspace/optimum/ onnxruntime-gpu:latest /bin/bash tests/run_doctest.sh
24 | 


--------------------------------------------------------------------------------
/optimum/commands/onnxruntime/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .base import ONNXRuntimeCommand
17 | from .optimize import ONNXRuntimeOptimizeCommand
18 | from .quantize import ONNXRuntimeQuantizeCommand
19 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | from .excluders import ExcludeNodeAfter, ExcludeNodeFollowedBy
16 | from .gelu import ExcludeGeLUNodes
17 | from .layernorm import ExcludeLayerNormNodes
18 | 


--------------------------------------------------------------------------------
/docs/source/exporters/overview.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Overview
14 | 
15 | 🤗 Optimum enables exporting models from PyTorch or TensorFlow to different formats through its `exporters` module. For now, two exporting format are supported: ONNX and TFLite (TensorFlow Lite).
16 | 


--------------------------------------------------------------------------------
/optimum/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .pipelines_base import (
17 |     MAPPING_LOADING_FUNC,
18 |     ORT_SUPPORTED_TASKS,
19 |     load_bettertransformer,
20 |     load_ort_pipeline,
21 |     pipeline,
22 | )
23 | 


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/package_reference/export.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Export functions
14 | 
15 | ## Main functions
16 | 
17 | [[autodoc]] exporters.tflite.convert.export
18 | 
19 | ## Utility functions
20 | 
21 | [[autodoc]] exporters.tflite.convert.validate_model_outputs
22 | 


--------------------------------------------------------------------------------
/tests/assets/onnx/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "tiny-distilbert-classification",
 3 |   "activation": "gelu",
 4 |   "architectures": [
 5 |     "DistilBertForSequenceClassification"
 6 |   ],
 7 |   "attention_dropout": 0.1,
 8 |   "dim": 2,
 9 |   "dropout": 0.1,
10 |   "finetuning_task": "sst-2",
11 |   "hidden_dim": 2,
12 |   "id2label": {
13 |     "0": "NEGATIVE",
14 |     "1": "POSITIVE"
15 |   },
16 |   "initializer_range": 0.02,
17 |   "label2id": {
18 |     "NEGATIVE": 0,
19 |     "POSITIVE": 1
20 |   },
21 |   "max_position_embeddings": 512,
22 |   "model_type": "distilbert",
23 |   "n_heads": 2,
24 |   "n_layers": 2,
25 |   "output_past": true,
26 |   "pad_token_id": 0,
27 |   "qa_dropout": 0.1,
28 |   "seq_classif_dropout": 0.2,
29 |   "sinusoidal_pos_embds": false,
30 |   "tie_weights_": true,
31 |   "torch_dtype": "float32",
32 |   "transformers_version": "4.10.0.dev0",
33 |   "vocab_size": 30522
34 | }
35 | 


--------------------------------------------------------------------------------
/optimum/gptq/constants.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | SEQLEN_KEYS_TRANFORMERS = ["max_position_embeddings", "seq_length", "n_positions"]
16 | BLOCK_PATTERNS = [
17 |     "transformer.h",
18 |     "model.decoder.layers",
19 |     "gpt_neox.layers",
20 |     "model.layers",
21 | ]
22 | 
23 | GPTQ_CONFIG = "quantize_config.json"
24 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_common.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters Common / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.8, 3.9]
19 |         os: [ubuntu-20.04]
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies
29 |       run: |
30 |         pip install .[tests,exporters-tf]
31 |     - name: Test with unittest
32 |       working-directory: tests
33 |       run: |
34 |         pytest exporters/common/ -s --durations=0
35 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnx.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.8, 3.9]
19 |         os: [ubuntu-20.04, macos-13]
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies
29 |       run: |
30 |         pip install .[tests,onnxruntime,exporters-tf]
31 |     - name: Test with unittest
32 |       working-directory: tests
33 |       run: |
34 |         python -m unittest discover -s onnx -p 'test_*.py'
35 | 


--------------------------------------------------------------------------------
/optimum/fx/optimization/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | #  Licensed under the Apache License, Version 2.0 (the "License");
 5 | #  you may not use this file except in compliance with the License.
 6 | #  You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | #  Unless required by applicable law or agreed to in writing, software
11 | #  distributed under the License is distributed on an "AS IS" BASIS,
12 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | #  See the License for the specific language governing permissions and
14 | #  limitations under the License.
15 | from .transformations import (  # noqa
16 |     ChangeTrueDivToMulByInverse,
17 |     FuseBatchNorm1dInLinear,
18 |     FuseBatchNorm2dInConv2d,
19 |     FuseBiasInLinear,
20 |     MergeLinears,
21 |     ReversibleTransformation,
22 |     Transformation,
23 |     compose,
24 | )
25 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/constants.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | ENCODER_ONNX_FILE_PATTERN = r"(.*)?encoder(.*)?\.onnx"
16 | DECODER_ONNX_FILE_PATTERN = r"(.*)?decoder((?!(with_past|merged)).)*?\.onnx"
17 | DECODER_WITH_PAST_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?with_past(.*)?\.onnx"
18 | DECODER_MERGED_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?merged(.*)?\.onnx"
19 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnxruntime_slow.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX Runtime slow / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 7 * * * # every day at 7am
 7 | 
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 |   cancel-in-progress: true
11 | 
12 | jobs:
13 |   build:
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         python-version: [3.8, 3.9]
18 |         os: [ubuntu-20.04]
19 | 
20 |     runs-on: ${{ matrix.os }}
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Setup Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies for export
28 |       run: |
29 |         pip install .[tests,onnxruntime]
30 |     - name: Test with unittest
31 |       working-directory: tests
32 |       run: |
33 |         RUN_SLOW=1 pytest onnxruntime -s -m "run_slow" --durations=0
34 | 


--------------------------------------------------------------------------------
/tests/gptq/Dockerfile_quantization_gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
 2 | CMD nvidia-smi
 3 | 
 4 | # Ignore interactive questions during `docker build`
 5 | ENV DEBIAN_FRONTEND noninteractive
 6 | 
 7 | # Install and update tools to minimize security vulnerabilities
 8 | RUN apt-get update
 9 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
10 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev python3-pip && \
11 |     apt-get clean
12 | RUN unattended-upgrade
13 | RUN apt-get autoremove -y
14 | 
15 | RUN python3 -m pip install -U pip
16 | 
17 | RUN pip install torch torchvision torchaudio
18 | RUN pip install transformers accelerate auto-gptq datasets
19 | 
20 | # Install Optimum
21 | COPY . /workspace/optimum
22 | RUN pip install /workspace/optimum[tests]
23 | 
24 | ENV RUN_SLOW=1
25 | WORKDIR /workspace/optimum/tests/
26 | CMD pytest gptq/test_*.py --durations=0 -s -vvvvv
27 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_onnx_timm.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters ONNX Timm / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 7 * * * # every day at 7am
 7 | 
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 |   cancel-in-progress: true
11 | 
12 | jobs:
13 |   build:
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         python-version: [3.8, 3.9]
18 |         os: [ubuntu-20.04]
19 | 
20 |     runs-on: ${{ matrix.os }}
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Setup Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies for pytorch export
28 |       run: |
29 |         pip install .[tests,exporters]
30 |     - name: Test with unittest
31 |       working-directory: tests
32 |       run: |
33 |         RUN_SLOW=1 pytest exporters/onnx/ -s -n auto -k "timm" --durations=0
34 | 
35 | 


--------------------------------------------------------------------------------
/optimum/commands/register/README.md:
--------------------------------------------------------------------------------
 1 | # Register commands in the Optimum CLI from a subpackage
 2 | 
 3 | It is possible to register a command in the Optimum CLI, either as a command or a subcommand of an already existing command.
 4 | 
 5 | Steps to follow:
 6 | 
 7 | 1. Create a command as a subclass of `optimum.commands.BaseOptimumCLICommand`.
 8 | 2. Create a Python file under `optimum/commands/register/`, and define a `REGISTER_COMMANDS` list variable there.
 9 | 3. Fill the `REGISTER_COMMANDS` as follows:
10 | 
11 | ```python
12 | # CustomCommand1 and CustomCommand2 could also be defined in this file actually.
13 | from ..my_custom_commands import CustomCommand1, CustomCommand2
14 | from ..export import ExportCommand
15 | 
16 | REGISTER_COMMANDS = [
17 |   # CustomCommand1 will be registered as a subcommand of the root Optimum CLI. 
18 |   CustomCommand1, 
19 |   # CustomCommand2 will be registered as a subcommand of the `optimum-cli export` command. 
20 |   (CustomCommand2, ExportCommand) # CustomCommand2 will be registered
21 | ]
22 | ```
23 | 


--------------------------------------------------------------------------------
/tests/bettertransformer/Dockerfile_bettertransformer_gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
 2 | CMD nvidia-smi
 3 | 
 4 | # Ignore interactive questions during `docker build`
 5 | ENV DEBIAN_FRONTEND noninteractive
 6 | 
 7 | # Install and update tools to minimize security vulnerabilities
 8 | RUN apt-get update
 9 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
10 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev python3-pip && \
11 |     apt-get clean
12 | RUN unattended-upgrade
13 | RUN apt-get autoremove -y
14 | 
15 | RUN python3 -m pip install -U pip
16 | 
17 | RUN pip install torch torchvision torchaudio
18 | RUN pip install transformers accelerate datasets
19 | 
20 | # Install Optimum
21 | COPY . /workspace/optimum
22 | RUN pip install /workspace/optimum[tests]
23 | 
24 | ENV RUN_SLOW=1
25 | WORKDIR /workspace/optimum/tests/
26 | CMD pytest bettertransformer/test_*.py -s --durations=0 -m gpu_test
27 | 


--------------------------------------------------------------------------------
/optimum/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseOptimumCLICommand, CommandInfo, RootOptimumCLICommand
16 | from .env import EnvironmentCommand
17 | from .export import ExportCommand, ONNXExportCommand, TFLiteExportCommand
18 | from .onnxruntime import ONNXRuntimeCommand, ONNXRuntimeOptimizeCommand, ONNXRuntimeQuantizeCommand
19 | from .optimum_cli import register_optimum_cli_subcommand
20 | 


--------------------------------------------------------------------------------
/optimum/utils/constant.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | CONFIG_NAME = "config.json"
17 | DIFFUSION_MODEL_UNET_SUBFOLDER = "unet"
18 | DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder"
19 | DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER = "vae_decoder"
20 | DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER = "vae_encoder"
21 | DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER = "text_encoder_2"
22 | ONNX_WEIGHTS_NAME = "model.onnx"
23 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnxruntime_train.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX Runtime / Test ORTTrainer
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 1 */3 * * # at 1am every 3 days
 7 |   pull_request:
 8 |     types: [opened, synchronize, reopened, labeled]
 9 | 
10 | jobs:
11 |   do-the-job:
12 |     if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') ||  contains( github.event.pull_request.labels.*.name, 'training')}}
13 |     name: Run ORTTrainer test
14 |     runs-on: [single-gpu, nvidia-gpu, t4, ci]
15 |     env:
16 |       AWS_REGION: us-east-1
17 |     steps:
18 |       - name: Checkout
19 |         uses: actions/checkout@v2
20 |       - name: Build image
21 |         run: |
22 |           docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer -t onnxruntime/train .
23 |       - name: Run test within docker container
24 |         run: |
25 |           docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime/train:latest


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration classes for TFLite export
14 | 
15 | ## Base classes
16 | 
17 | [[autodoc]] exporters.tflite.TFLiteConfig
18 |     - inputs
19 |     - outputs
20 |     - generate_dummy_inputs
21 | 
22 | ## Middle-end classes
23 | 
24 | [[autodoc]] exporters.tflite.config.TextEncoderTFliteConfig
25 | 
26 | [[autodoc]] exporters.tflite.config.VisionTFLiteConfig
27 | 


--------------------------------------------------------------------------------
/.github/workflows/test_fx.yml:
--------------------------------------------------------------------------------
 1 | name: FX / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.8, 3.9]
19 |         os: [ubuntu-20.04, macos-13]
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies
29 |       run: |
30 |         pip install .[tests]
31 |         pip install git+https://github.com/huggingface/transformers.git
32 |     - name: Test with unittest
33 |       working-directory: tests
34 |       run: |
35 |         python -m pytest fx/optimization/test_transformations.py --exitfirst
36 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_onnx_cli_timm.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters ONNX CLI Timm (scheduled) / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 7 * * * # every day at 7am
 7 | 
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 |   cancel-in-progress: true
11 | 
12 | jobs:
13 |   build:
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         python-version: [3.8, 3.9]
18 |         os: [ubuntu-20.04]
19 | 
20 |     runs-on: ${{ matrix.os }}
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Setup Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies for pytorch export
28 |       run: |
29 |         pip install .[tests,exporters]
30 |     - name: Test with unittest
31 |       working-directory: tests
32 |       run: |
33 |         RUN_SLOW=1 pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -k "timm" -s --durations=0
34 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/graph.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import List
15 | 
16 | from onnxruntime.transformers.onnx_model import OnnxModel
17 | 
18 | 
19 | def find_fully_connected_layers_nodes(model: OnnxModel) -> List[List[str]]:
20 |     adds = model.get_nodes_by_op_type("Add")
21 |     fc = list(filter(lambda graph: graph[1] is not None, ((add, model.match_parent(add, "MatMul")) for add in adds)))
22 | 
23 |     return fc
24 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_onnx_cli.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters ONNX CLI / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.8, 3.9]
19 |         os: [ubuntu-20.04]
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies for pytorch export
29 |       run: |
30 |         pip install .[tests,exporters]
31 |     - name: Test with unittest
32 |       working-directory: tests
33 |       run: |
34 |         pytest exporters/onnx/test_exporters_onnx_cli.py -n auto -m "not tensorflow_test and not timm_test" -s --durations=0
35 | 


--------------------------------------------------------------------------------
/optimum/quantization_base.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from abc import ABC, abstractmethod
 3 | from pathlib import Path
 4 | from typing import Optional, Union
 5 | 
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class OptimumQuantizer(ABC):
11 |     @classmethod
12 |     def from_pretrained(
13 |         cls,
14 |         model_or_path: Union[str, Path],
15 |         file_name: Optional[str] = None,
16 |     ):
17 |         """Overwrite this method in subclass to define how to load your model from pretrained"""
18 |         raise NotImplementedError(
19 |             "Overwrite this method in subclass to define how to load your model from pretrained for quantization"
20 |         )
21 | 
22 |     @abstractmethod
23 |     def quantize(self, save_dir: Union[str, Path], file_prefix: Optional[str] = None, **kwargs):
24 |         """Overwrite this method in subclass to define how to quantize your model for quantization"""
25 |         raise NotImplementedError(
26 |             "Overwrite this method in subclass to define how to quantize your model for quantization"
27 |         )
28 | 


--------------------------------------------------------------------------------
/optimum/utils/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .base import Preprocessor, TaskProcessor
17 | from .image_classification import ImageClassificationProcessing
18 | from .question_answering import QuestionAnsweringProcessing
19 | from .task_processors_manager import TaskProcessorsManager
20 | from .text_classification import TextClassificationProcessing
21 | from .token_classification import TokenClassificationProcessing
22 | 


--------------------------------------------------------------------------------
/tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu:
--------------------------------------------------------------------------------
 1 | # use version with CUDA 11.8 and TensorRT 8.5.1.7 to match ORT 1.14 requirements
 2 | FROM nvcr.io/nvidia/tensorrt:22.12-py3
 3 | CMD nvidia-smi
 4 | 
 5 | # Ignore interactive questions during `docker build`
 6 | ENV DEBIAN_FRONTEND noninteractive
 7 | 
 8 | # Install and update tools to minimize security vulnerabilities
 9 | RUN apt-get update
10 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
11 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
12 |     apt-get clean
13 | RUN unattended-upgrade
14 | RUN apt-get autoremove -y
15 | 
16 | RUN python -m pip install -U pip
17 | 
18 | RUN pip install transformers torch onnxruntime-gpu
19 | RUN pip install datasets evaluate diffusers scipy
20 | 
21 | # Install Optimum
22 | COPY . /workspace/optimum
23 | RUN pip install /workspace/optimum[onnxruntime-gpu,tests]
24 | 
25 | ENV TEST_LEVEL=1
26 | CMD pytest onnxruntime/test_*.py --durations=0 -s -vvvvv -m cuda_ep_test -m trt_ep_test
27 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_gpu.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters / Test GPU
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 1 */3 * * # at 1am every 3 days
 7 |   pull_request:
 8 |     types: [labeled]
 9 |   # uncomment to enable on PR merge on main branch:
10 |   #push:
11 |   #  branches:
12 |   #    - main
13 | 
14 | jobs:
15 |   do-the-job:
16 |     if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }}
17 |     name: Start self-hosted EC2 runner
18 |     runs-on: [single-gpu, nvidia-gpu, t4, ci]
19 |     env:
20 |       AWS_REGION: us-east-1
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v2
24 |       - name: Build image
25 |         run: |
26 |           docker build -f tests/exporters/Dockerfile_exporters_gpu -t exporters-gpu .
27 |       - name: Test with unittest within docker container
28 |         run: |
29 |           docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests exporters-gpu:latest
30 | 


--------------------------------------------------------------------------------
/.github/workflows/test_gptq.yml:
--------------------------------------------------------------------------------
 1 | name: GPTQ Quantization / Test GPU
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 1 */3 * * # at 1am every 3 days
 7 |   pull_request:
 8 |     types: [opened, synchronize, reopened, labeled]
 9 |   # uncomment to enable on PR merge on main branch:
10 |   #push:
11 |   #  branches:
12 |   #    - main
13 | 
14 | jobs:
15 |   do-the-job:
16 |     if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }}
17 |     name: Start self-hosted EC2 runner
18 |     runs-on: [single-gpu, nvidia-gpu, t4, ci]
19 |     env:
20 |       AWS_REGION: us-east-1
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v2
24 |       - name: Build image
25 |         run: |
26 |           docker build -f tests/gptq/Dockerfile_quantization_gpu -t gptq-gpu .
27 |       - name: Test with unittest within docker container
28 |         run: |
29 |           docker run --rm --gpus all -v $(pwd)/hf_cache:/root/.cache/huggingface --workdir=/workspace/optimum/tests gptq-gpu:latest
30 | 


--------------------------------------------------------------------------------
/tests/exporters/Dockerfile_exporters_gpu:
--------------------------------------------------------------------------------
 1 | # use version with cudnn 8.5 to match torch==1.13.1 that uses 8.5.0.96
 2 | # has Python 3.8.10
 3 | FROM nvcr.io/nvidia/tensorrt:22.08-py3
 4 | CMD nvidia-smi
 5 | 
 6 | # Ignore interactive questions during `docker build`
 7 | ENV DEBIAN_FRONTEND noninteractive
 8 | 
 9 | # Install and update tools to minimize security vulnerabilities
10 | RUN apt-get update
11 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
12 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
13 |     apt-get clean
14 | RUN unattended-upgrade
15 | RUN apt-get autoremove -y
16 | 
17 | RUN python -m pip install -U pip
18 | 
19 | RUN pip install torch scipy datasets evaluate diffusers
20 | 
21 | RUN pip install transformers
22 | RUN pip install onnxruntime-gpu
23 | 
24 | # Install Optimum
25 | COPY . /workspace/optimum
26 | RUN pip install /workspace/optimum[onnxruntime-gpu,tests,exporters-gpu]
27 | 
28 | ENV TEST_LEVEL=1
29 | ENV RUN_SLOW=1
30 | CMD pytest exporters --durations=0 -s -vvvvv -m gpu_test
31 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F680 Feature request"
 2 | description: Submit a proposal/request for a new optimum feature
 3 | labels: [ "feature" ]
 4 | body:
 5 |   - type: textarea
 6 |     id: feature-request
 7 |     validations:
 8 |       required: true
 9 |     attributes:
10 |       label: Feature request
11 |       description: |
12 |         A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist.
13 | 
14 |   - type: textarea
15 |     id: motivation
16 |     validations:
17 |       required: true
18 |     attributes:
19 |       label: Motivation
20 |       description: |
21 |         Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
22 |         
23 | 
24 |   - type: textarea
25 |     id: contribution
26 |     validations:
27 |       required: true
28 |     attributes:
29 |       label: Your contribution
30 |       description: |
31 |         Is there any way that you could help, e.g. by submitting a PR?
32 | 


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/usage_guides/contribute.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Adding support for an unsupported architecture
14 | 
15 | If you wish to export a model whose architecture is not already supported by the library, the PR [#813 Adds support for ResNet](https://github.com/huggingface/optimum/pull/813 ) can be used as a reference.
16 | 
17 | You can make sure tests pass for the new `my_new_modeltype` model type by running:
18 | 
19 | ```bash
20 | pytest tests/exporters/tflite/test_*.py -k "my_new_modeltype" -s --exitfirst
21 | ```
22 | 


--------------------------------------------------------------------------------
/optimum/exporters/error_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Utilities related to error handling."""
16 | 
17 | 
18 | class ShapeError(ValueError):
19 |     pass
20 | 
21 | 
22 | class AtolError(ValueError):
23 |     pass
24 | 
25 | 
26 | class OutputMatchError(ValueError):
27 |     pass
28 | 
29 | 
30 | class NumberOfInputsMatchError(ValueError):
31 |     pass
32 | 
33 | 
34 | class NumberOfOutputsMatchError(ValueError):
35 |     pass
36 | 
37 | 
38 | class MinimumVersionError(ValueError):
39 |     pass
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_tflite_*.py -s --durations=0
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnxruntime_gpu.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX Runtime / Test GPU
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 1 */3 * * # at 1am every 3 days
 7 |   pull_request:
 8 |     types: [opened, synchronize, reopened, labeled]
 9 |   # uncomment to enable on PR merge on main branch:
10 |   #push:
11 |   #  branches:
12 |   #    - main
13 | 
14 | jobs:
15 |   do-the-job:
16 |     if: ${{ (github.event_name == 'workflow_dispatch') || (github.event_name == 'schedule') || contains( github.event.pull_request.labels.*.name, 'gpu-test') }}
17 |     name: Start self-hosted EC2 runner
18 |     runs-on: [single-gpu, nvidia-gpu, t4, ci]
19 |     env:
20 |       AWS_REGION: us-east-1
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v2
24 |       - name: Build image
25 |         run: |
26 |           docker build -f tests/onnxruntime/docker/Dockerfile_onnxruntime_gpu -t onnxruntime-gpu .
27 |       - name: Test with unittest within docker container
28 |         run: |
29 |           docker run --rm --gpus all -v /mnt/cache/.cache/huggingface:/root/.cache/huggingface --workdir=/workspace/optimum/tests onnxruntime-gpu:latest
30 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/trainer.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Trainer
14 | 
15 | ## ORTTrainer
16 | 
17 | [[autodoc]] onnxruntime.trainer.ORTTrainer
18 |     - all
19 | 
20 | ## ORTSeq2SeqTrainer
21 | 
22 | [[autodoc]] onnxruntime.trainer_seq2seq.ORTSeq2SeqTrainer
23 |     - evaluate
24 |     - predict
25 | 
26 | ## ORTTrainingArguments
27 | 
28 | [[autodoc]] onnxruntime.training_args.ORTTrainingArguments
29 |     - all
30 | 
31 | ## ORTSeq2SeqTrainingArguments
32 | 
33 | [[autodoc]] onnxruntime.training_args_seq2seq.ORTSeq2SeqTrainingArguments
34 |     - all


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite_cli.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_exporters_tflite_cli.py -m "not quantization" -s --durations=0
40 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_exporters.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_Exporters / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.8
17 |         - 3.9
18 |         os:
19 |         - ubuntu-20.04
20 |     runs-on: ${{ matrix.os }}
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Setup Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         pip install .[tests,exporters-tf]
30 |         pip install -U git+https://github.com/huggingface/evaluate
31 |         pip install -U git+https://github.com/huggingface/diffusers
32 |         pip install -U git+https://github.com/huggingface/transformers
33 |     - name: Test with unittest
34 |       working-directory: tests
35 |       run: |
36 |         pytest exporters -s --durations=0


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite_cli_quantization_fp16.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI Quantization FP16 / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_exporters_tflite_cli.py -k "float16_quantization" -s --durations=0
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test_benckmark.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | name: Benchmark suite / Python - Test
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ main ]
 8 |   pull_request:
 9 |     branches: [ main ]
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   build:
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         python-version: [3.8, 3.9]
21 |         os: [ubuntu-20.04]
22 | 
23 |     runs-on: ${{ matrix.os }}
24 |     steps:
25 |     - uses: actions/checkout@v2
26 |     - name: Setup Python ${{ matrix.python-version }}
27 |       uses: actions/setup-python@v2
28 |       with:
29 |         python-version: ${{ matrix.python-version }}
30 |     - name: Install dependencies
31 |       run: |
32 |         pip install wheel
33 |         pip install .[tests,onnxruntime,benchmark]
34 |     - name: Test with unittest
35 |       run: |
36 |         python -m unittest discover --start-directory tests/benchmark --pattern 'test_*.py'
37 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_onnx.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_ONNX / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.8
17 |         - 3.9
18 |         os:
19 |         - ubuntu-20.04
20 |         - macos-13
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies
29 |       run: |
30 |         pip install .[tests,onnxruntime] tensorflow tf2onnx
31 |         pip install -U git+https://github.com/huggingface/evaluate
32 |         pip install -U git+https://github.com/huggingface/diffusers
33 |         pip install -U git+https://github.com/huggingface/transformers
34 |     - name: Test with unittest
35 |       working-directory: tests
36 |       run: |
37 |         python -m unittest discover -s onnx -p test_*.py
38 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite_cli_dynamic_quantization_int8.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI Dynamic Quantization INT8 / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_exporters_tflite_cli.py -k "int8_dynamic_quantization" -s --durations=0
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test_dummy_inputs.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | name: Dummy inputs / Python - Test
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ main ]
 8 |   pull_request:
 9 |     branches: [ main ]
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   build:
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         python-version: [3.8, 3.9]
21 |         os: [ubuntu-20.04, macos-13]
22 | 
23 |     runs-on: ${{ matrix.os }}
24 |     steps:
25 |     - uses: actions/checkout@v2
26 |     - name: Setup Python ${{ matrix.python-version }}
27 |       uses: actions/setup-python@v2
28 |       with:
29 |         python-version: ${{ matrix.python-version }}
30 |     - name: Install dependencies
31 |       run: |
32 |         python -m pip install --upgrade pip
33 |         pip install .[tests]
34 |     - name: Test with unittest
35 |       working-directory: tests
36 |       run: |
37 |         python -m unittest discover -s utils -p 'test_*.py'
38 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite_cli_quantization_int8x16.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI Quantization INT8x16 / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_exporters_tflite_cli.py -k "tflite_int8x16_quantization_with_default_dataset" -s --durations=0
40 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_benckmark.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_Benchmark suite / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.8
17 |         - 3.9
18 |         os:
19 |         - ubuntu-20.04
20 |     runs-on: ${{ matrix.os }}
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Setup Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         pip install wheel
30 |         pip install .[tests,onnxruntime,benchmark]
31 |         pip install -U git+https://github.com/huggingface/evaluate
32 |         pip install -U git+https://github.com/huggingface/diffusers
33 |         pip install -U git+https://github.com/huggingface/transformers
34 |     - name: Test with unittest
35 |       run: |
36 |         python -m unittest discover --start-directory tests/benchmark --pattern
37 |         test_*.py


--------------------------------------------------------------------------------
/.github/workflows/test_cli.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | name: Optimum CLI / Python - Test
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ main ]
 8 |   pull_request:
 9 |     branches: [ main ]
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   build:
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         python-version: [3.8, 3.9]
21 |         os: [ubuntu-20.04, macos-13]
22 | 
23 |     runs-on: ${{ matrix.os }}
24 |     steps:
25 |     - uses: actions/checkout@v2
26 |     - name: Setup Python ${{ matrix.python-version }}
27 |       uses: actions/setup-python@v2
28 |       with:
29 |         python-version: ${{ matrix.python-version }}
30 |     - name: Install dependencies
31 |       run: |
32 |         python -m pip install --upgrade pip
33 |         pip install .[tests,exporters,exporters-tf]
34 |     - name: Test with unittest
35 |       working-directory: tests
36 |       run: |
37 |         python -m unittest discover -s cli -p 'test_*.py'
38 | 
39 | 


--------------------------------------------------------------------------------
/tests/onnxruntime/ds_configs/ds_config_zero_stage_inifinity.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "zero_optimization": {
 3 |         "stage": 3,
 4 |         "offload_optimizer": {
 5 |             "device": "nvme",
 6 |             "nvme_path": "/dev/nvme1n1",
 7 |             "pin_memory": true,
 8 |             "buffer_count": 4,
 9 |             "fast_init": false
10 |         },
11 |         "offload_param": {
12 |             "device": "nvme",
13 |             "nvme_path": "/dev/nvme1n1",
14 |             "pin_memory": true,
15 |             "buffer_count": 5,
16 |             "buffer_size": 1e8,
17 |             "max_in_cpu": 1e9
18 |         },
19 |         "aio": {
20 |             "block_size": 262144,
21 |             "queue_depth": 32,
22 |             "thread_count": 1,
23 |             "single_submit": false,
24 |             "overlap_events": true
25 |         },
26 |         "overlap_comm": true,
27 |         "contiguous_gradients": true,
28 |         "sub_group_size": 1e9,
29 |         "reduce_bucket_size": "auto",
30 |         "stage3_prefetch_bucket_size": "auto",
31 |         "stage3_param_persistence_threshold": "auto",
32 |         "stage3_max_live_parameters": 1e9,
33 |         "stage3_max_reuse_distance": 1e9,
34 |         "stage3_gather_16bit_weights_on_model_save": true
35 |     }
36 | }


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite_cli_quantization_full_int8.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI Quantization Full INT8 Default Dataset / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_exporters_tflite_cli.py -k "full_int8_quantization_with_default_dataset" -s --durations=0
40 | 


--------------------------------------------------------------------------------
/optimum/pipelines/diffusers/watermark.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from imwatermark import WatermarkEncoder
 3 | 
 4 | 
 5 | WATERMARK_MESSAGE = 0b101100111110110010010000011110111011000110011110
 6 | WATERMARK_BITS = [int(bit) for bit in bin(WATERMARK_MESSAGE)[2:]]
 7 | 
 8 | 
 9 | # Adapted from https://github.com/huggingface/diffusers/blob/v0.18.1/src/diffusers/pipelines/stable_diffusion_xl/watermark.py#L12
10 | class StableDiffusionXLWatermarker:
11 |     def __init__(self):
12 |         self.watermark = WATERMARK_BITS
13 |         self.encoder = WatermarkEncoder()
14 |         self.encoder.set_watermark("bits", self.watermark)
15 | 
16 |     def apply_watermark(self, images: np.array):
17 |         # can't encode images that are smaller than 256
18 |         if images.shape[-1] < 256:
19 |             return images
20 | 
21 |         # cv2 doesn't support float16
22 |         if images.dtype == np.float16:
23 |             images = images.astype(np.float32)
24 | 
25 |         images = (255 * (images / 2 + 0.5)).transpose((0, 2, 3, 1))
26 | 
27 |         images = np.array([self.encoder.encode(image, "dwtDct") for image in images]).transpose((0, 3, 1, 2))
28 | 
29 |         np.clip(2 * (images / 255 - 0.5), -1.0, 1.0, out=images)
30 | 
31 |         return images
32 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_dummy_inputs.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_Dummy inputs / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.8
17 |         - 3.9
18 |         os:
19 |         - ubuntu-20.04
20 |         - macos-13
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies
29 |       run: |
30 |         python -m pip install --upgrade pip
31 |         pip install .[tests]
32 |         pip install -U git+https://github.com/huggingface/evaluate
33 |         pip install -U git+https://github.com/huggingface/diffusers
34 |         pip install -U git+https://github.com/huggingface/transformers
35 |     - name: Test with unittest
36 |       working-directory: tests
37 |       run: |
38 |         python -m unittest discover -s utils -p test_*.py
39 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite_cli_quantization_int8_custom_dataset.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI Quantization INT8 Custom Dataset / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_exporters_tflite_cli.py -k "tflite_int8_quantization_with_custom_dataset" -s --durations=0
40 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_tflite_cli_quantization_int8_default_dataset.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI Quantization INT8 Default Dataset / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 |     paths:
 9 |       - "optimum/exporters/tasks.py"
10 |       - "optimum/exporters/tflite/**.py"
11 |       - "tests/exporters/**.py"
12 |       - "setup.py"
13 | 
14 | concurrency:
15 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
16 |   cancel-in-progress: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.8, 3.9]
24 |         os: [ubuntu-20.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Install dependencies
34 |       run: |
35 |         pip install .[tests,exporters-tf]
36 |     - name: Test with unittest
37 |       working-directory: tests
38 |       run: |
39 |         pytest exporters/tflite/test_exporters_tflite_cli.py -k "tflite_int8_quantization_with_default_dataset" -s --durations=0
40 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_fx.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_FX / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.8
17 |         - 3.9
18 |         os:
19 |         - ubuntu-20.04
20 |         - macos-13
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies
29 |       run: |
30 |         pip install .[tests]
31 |         pip install git+https://github.com/huggingface/transformers.git
32 |         pip install -U git+https://github.com/huggingface/evaluate
33 |         pip install -U git+https://github.com/huggingface/diffusers
34 |         pip install -U git+https://github.com/huggingface/transformers
35 |     - name: Test with unittest
36 |       working-directory: tests
37 |       run: |
38 |         python -m pytest fx/optimization/test_transformations.py --exitfirst
39 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_slow.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters slow / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 7 * * * # every day at 7am
 7 | 
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 |   cancel-in-progress: true
11 | 
12 | jobs:
13 |   build:
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         python-version: [3.8, 3.9]
18 |         os: [ubuntu-20.04]
19 | 
20 |     runs-on: ${{ matrix.os }}
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Setup Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies for pytorch export
28 |       run: |
29 |         pip install .[tests,exporters]
30 |     - name: Test with unittest
31 |       working-directory: tests
32 |       run: |
33 |         RUN_SLOW=1 pytest exporters -s -m "not tensorflow_test and run_slow" --durations=0
34 |     - name: Install dependencies for tensorflow export
35 |       run: |
36 |         pip install .[tests,exporters-tf]
37 |     - name: Test with unittest
38 |       working-directory: tests
39 |       run: |
40 |         RUN_SLOW=1 pytest exporters -s -m "tensorflow_test and run_slow" --durations=0
41 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_bettertransformer.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_BetterTransformer / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.8
17 |         os:
18 |         - ubuntu-20.04
19 |         - macos-13
20 |     runs-on: ${{ matrix.os }}
21 |     steps:
22 |     - uses: actions/checkout@v2
23 |     - name: Setup Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v2
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         pip install .[tests]
30 |         pip3 install --upgrade torch torchvision torchaudio
31 |         pip install accelerate
32 |         pip install -U git+https://github.com/huggingface/evaluate
33 |         pip install -U git+https://github.com/huggingface/diffusers
34 |         pip install -U git+https://github.com/huggingface/transformers
35 |     - name: Test with unittest
36 |       working-directory: tests
37 |       run: |
38 |         python -m unittest discover -s bettertransformer -p test_*.py
39 | 


--------------------------------------------------------------------------------
/optimum/exporters/onnx/constants.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # 2 GB
17 | EXTERNAL_DATA_FORMAT_SIZE_LIMIT = 2 * 1024 * 1024 * 1024
18 | 
19 | ONNX_ENCODER_NAME = "encoder_model"
20 | ONNX_DECODER_NAME = "decoder_model"
21 | ONNX_DECODER_WITH_PAST_NAME = "decoder_with_past_model"
22 | ONNX_DECODER_MERGED_NAME = "decoder_model_merged"
23 | 
24 | UNPICKABLE_ARCHS = [
25 |     "encodec",
26 |     "hubert",
27 |     "sew",
28 |     "sew-d",
29 |     "speecht5",
30 |     "unispeech",
31 |     "unispeech-sat",
32 |     "wav2vec2",
33 |     "wav2vec2-conformer",
34 |     "wavlm",
35 | ]
36 | 
37 | SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [
38 |     "bart",
39 |     "musicgen",
40 |     "whisper",
41 | ]
42 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_onnxruntime.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_ONNX Runtime / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.8
17 |         - 3.9
18 |         os:
19 |         - ubuntu-20.04
20 |         - windows-2019
21 |         - macos-13
22 |     runs-on: ${{ matrix.os }}
23 |     steps:
24 |     - uses: actions/checkout@v2
25 |     - name: Setup Python ${{ matrix.python-version }}
26 |       uses: actions/setup-python@v2
27 |       with:
28 |         python-version: ${{ matrix.python-version }}
29 |     - name: Install dependencies
30 |       run: |
31 |         pip install .[tests,onnxruntime]
32 |         pip install -U git+https://github.com/huggingface/evaluate
33 |         pip install -U git+https://github.com/huggingface/diffusers
34 |         pip install -U git+https://github.com/huggingface/transformers
35 |     - name: Test with pytest
36 |       working-directory: tests
37 |       run: |
38 |         python -m pytest -n auto -m "not run_in_series" onnxruntime
39 |         python -m pytest -m "run_in_series" onnxruntime
40 | 


--------------------------------------------------------------------------------
/tests/cli/cli_with_custom_command.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | 
18 | from optimum.commands import BaseOptimumCLICommand, CommandInfo, ExportCommand
19 | 
20 | 
21 | class MyCustomCommand(BaseOptimumCLICommand):
22 |     COMMAND = CommandInfo(name="blablabla", help="Says something.")
23 | 
24 |     def run(self):
25 |         print("If the CI can read this, it means it worked!")
26 | 
27 | 
28 | parent_command_cls = os.environ.get("TEST_REGISTER_COMMAND_WITH_SUBCOMMAND", None)
29 | 
30 | if parent_command_cls == "true":
31 |     REGISTER_COMMANDS = [
32 |         (MyCustomCommand, ExportCommand),
33 |     ]
34 | else:
35 |     REGISTER_COMMANDS = [
36 |         MyCustomCommand,
37 |     ]
38 | 


--------------------------------------------------------------------------------
/.github/workflows/test_export_onnx.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters ONNX / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.8, 3.9]
19 |         os: [ubuntu-20.04]
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies for pytorch export
29 |       run: |
30 |         pip install .[tests,exporters]
31 |     - name: Test with unittest
32 |       working-directory: tests
33 |       run: |
34 |         pytest exporters/onnx/test_onnx_*.py -s -n auto -m "not tensorflow_test and not timm_test" --durations=0
35 |     - name: Install dependencies for tensorflow export
36 |       run: |
37 |         pip install .[tests,exporters-tf]
38 |     - name: Test with unittest
39 |       working-directory: tests
40 |       run: |
41 |         pytest exporters/onnx/test_onnx_*.py -n auto -m "tensorflow_test" -s --durations=0
42 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/concept_guides/onnx.mdx:
--------------------------------------------------------------------------------
 1 | # ONNX 🤝 ONNX Runtime
 2 | 
 3 | ONNX is an open standard that defines a common set of operators and a common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and TensorFlow. When a model is exported to the ONNX format, these operators are used to construct a computational graph (often called an _intermediate representation_) that represents the flow of data through the neural network.
 4 | 
 5 | <Tip>
 6 | 
 7 | You can use [Netron](https://netron.app/) to visualize any ONNX file on the Hugging Face Hub. Simply append append the file's URL to `http://netron.app?url=` as in [this example](https://netron.app/?url=https://huggingface.co/cmarkea/distilcamembert-base-ner/blob/main/model.onnx)
 8 | 
 9 | </Tip>
10 | 
11 | By exposing a graph with standardized operators and data types, ONNX makes it easy to switch between frameworks. For example, a model trained in PyTorch can be exported to ONNX format and then imported in TensorFlow (and vice versa).
12 | 
13 | Where ONNX really shines is when it is coupled with a dedicated accelerator like ONNX Runtime, or ORT for short. ORT provides tools to optimize the ONNX graph through techniques like operator fusion and constant folding, and defines an interface to execution providers that allow you to run the model on different types of hardware.


--------------------------------------------------------------------------------
/docs/source/torch_fx/package_reference/optimization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Optimization
14 | 
15 | ## Transformation
16 | 
17 | [[autodoc]] fx.optimization.Transformation
18 |     - all
19 |     - __call__
20 | 
21 | ## Reversible transformation
22 | 
23 | [[autodoc]] fx.optimization.ReversibleTransformation
24 |     - all
25 |     - __call__
26 | 
27 | [[autodoc]] fx.optimization.compose
28 | 
29 | ### Transformations
30 | 
31 | [[autodoc]] fx.optimization.MergeLinears
32 |     - all
33 | 
34 | [[autodoc]] fx.optimization.FuseBiasInLinear
35 |     - all
36 | 
37 | [[autodoc]] fx.optimization.ChangeTrueDivToMulByInverse
38 |     - all
39 | 
40 | [[autodoc]] fx.optimization.FuseBatchNorm2dInConv2d
41 |     - all
42 | 
43 | [[autodoc]] fx.optimization.FuseBatchNorm1dInLinear
44 |     - all


--------------------------------------------------------------------------------
/optimum/exporters/tflite/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from typing import TYPE_CHECKING
17 | 
18 | from transformers.utils import _LazyModule
19 | 
20 | 
21 | _import_structure = {
22 |     "base": ["QuantizationApproach", "TFLiteQuantizationConfig", "TFLiteConfig"],
23 |     "convert": ["export", "validate_model_outputs"],
24 | }
25 | 
26 | if TYPE_CHECKING:
27 |     from .base import QuantizationApproach, TFLiteQuantizationConfig, TFLiteConfig  # noqa
28 |     from .convert import export, validate_model_outputs  # noqa
29 | else:
30 |     import sys
31 | 
32 |     sys.modules[__name__] = _LazyModule(
33 |         __name__,
34 |         globals()["__file__"],
35 |         _import_structure,
36 |         module_spec=__spec__,
37 |     )
38 | 


--------------------------------------------------------------------------------
/docs/source/_redirects.yml:
--------------------------------------------------------------------------------
 1 | # Optimum Graphcore
 2 | graphcore_index: graphcore/index
 3 | graphcore_quickstart: graphcore/quickstart
 4 | graphcore_ipu_config: graphcore/ipu_config
 5 | graphcore_trainer: graphcore/trainer
 6 | graphcore_add_support_for_new_model: graphcore/add_support_for_new_model
 7 | 
 8 | # Optimum Habana
 9 | habana_index: habana/index
10 | habana_quickstart: habana/quickstart
11 | habana_single_hpu: habana/tutorials/single_hpu
12 | habana_distributed: habana/tutorials/distributed
13 | habana_deepspeed: habana/usage_guides/deepspeed
14 | habana_accelerate_training: habana/usage_guides/accelerate_training
15 | habana_trainer: habana/package_reference/trainer
16 | habana_gaudi_config: habana/package_reference/gaudi_config
17 | habana/usage_guides/stable_diffusion: habana/tutorials/stable_diffusion
18 | habana/tutorials/pretraining: habana/usage_guides/pretraining
19 | 
20 | # Optimum Intel
21 | intel_index: intel/index
22 | intel_quickstart: intel/optimization_inc
23 | intel_configuration: intel/reference_inc
24 | intel_optimization: intel/optimization_inc
25 | intel_quantization: intel/optimization_inc
26 | intel_pruning: intel/optimization_inc
27 | intel_trainer: intel/reference_inc
28 | 
29 | # Optimum Neuron
30 | docs/optimum-neuron/index: /docs/optimum-neuron/index
31 | 
32 | # Optimum TPU
33 | docs/optimum-tpu/index: /docs/optimum-tpu/index
34 | tpu/index: /docs/optimum-tpu/index
35 | 


--------------------------------------------------------------------------------
/.github/workflows/check_code_quality.yml:
--------------------------------------------------------------------------------
 1 | name: check_code_quality
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |     paths:
 7 |       - "optimum/**.py"
 8 |       - "tests/**.py"
 9 |       - "examples/**.py"
10 | 
11 |   pull_request:
12 |     branches: [ main ]
13 |     paths:
14 |       - "optimum/**.py"
15 |       - "tests/**.py"
16 |       - "examples/**.py"
17 | 
18 | concurrency:
19 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
20 |   cancel-in-progress: true
21 | 
22 | jobs:
23 |   build:
24 |     strategy:
25 |       fail-fast: false
26 |       matrix:
27 |         python-version: [3.8]
28 |         os: [ubuntu-20.04]
29 | 
30 |     runs-on: ${{ matrix.os }}
31 |     steps:
32 |     - uses: actions/checkout@v2
33 |     - name: Setup Python ${{ matrix.python-version }}
34 |       uses: actions/setup-python@v2
35 |       with:
36 |         python-version: ${{ matrix.python-version }}
37 |     - name: Create and start a virtual environment
38 |       run: |
39 |         python -m venv venv
40 |         source venv/bin/activate
41 |     - name: Install dependencies
42 |       run: |
43 |         source venv/bin/activate
44 |         pip install --upgrade pip
45 |         pip install .[quality]
46 |     - name: Check style with black
47 |       run: |
48 |         source venv/bin/activate
49 |         black --check .
50 |     - name: Check style with ruff
51 |       run: |
52 |         source venv/bin/activate
53 |         ruff .
54 | 


--------------------------------------------------------------------------------
/tests/onnxruntime/ds_configs/ds_config_zero_stage_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |      "fp16": {
 3 |             "enabled": "auto",
 4 |             "loss_scale": 0,
 5 |             "loss_scale_window": 1000,
 6 |             "hysteresis": 2,
 7 |             "min_loss_scale": 1
 8 |     },
 9 | 
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 | 
14 |     "zero_optimization": {
15 |             "stage": 1,
16 |             "allgather_partitions": true,
17 |             "allgather_bucket_size": 2e8,
18 |             "overlap_comm": true,
19 |             "reduce_scatter": true,
20 |             "reduce_bucket_size": 2e8,
21 |             "contiguous_gradients": false,
22 |             "cpu_offload": false
23 |     },
24 | 
25 |     "zero_allow_untested_optimizer": true,
26 | 
27 |     "optimizer": {
28 |             "type": "AdamW",
29 |             "params": {
30 |                     "lr": "auto",
31 |                     "betas": "auto",
32 |                     "eps": "auto",
33 |                     "weight_decay": "auto"
34 |             }
35 |     },
36 | 
37 |     "scheduler": {
38 |             "type": "WarmupLR",
39 |             "params": {
40 |                     "warmup_min_lr": "auto",
41 |                     "warmup_max_lr": "auto",
42 |                     "warmup_num_steps": "auto"
43 |             }
44 |     },
45 | 
46 |     "steps_per_print": 2000,
47 |     "train_batch_size": "auto",
48 |     "train_micro_batch_size_per_gpu": "auto",
49 |     "wall_clock_breakdown": false
50 | }


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | [tool.black]
16 | line-length = 119
17 | target-version = ['py37']
18 | 
19 | [tool.ruff]
20 | # Never enforce `E501` (line length violations).
21 | ignore = ["C901", "E501", "E741", "W605"]
22 | select = ["C", "E", "F", "I", "W"]
23 | line-length = 119
24 | 
25 | # Ignore import violations in all `__init__.py` files.
26 | [tool.ruff.per-file-ignores]
27 | "__init__.py" = ["E402", "F401", "F403", "F811"]
28 | 
29 | [tool.ruff.isort]
30 | lines-after-imports = 2
31 | known-first-party = ["optimum"]
32 | 
33 | [tool.pytest.ini_options]
34 | markers = [
35 |     "gpu_test",
36 |     "cuda_ep_test",
37 |     "trt_ep_test",
38 |     "rocm_ep_test",
39 |     "tensorflow_test",
40 |     "timm_test",
41 |     "run_in_series",
42 |     "run_slow",
43 |     "accelerate_test",
44 |     "fp16",
45 |     "quantization",
46 | ]
47 | 


--------------------------------------------------------------------------------
/.github/workflows/test_bettertransformer.yml:
--------------------------------------------------------------------------------
 1 | name: BetterTransformer / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.8, 3.9]
19 |         os: [ubuntu-20.04, macos-13]
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies
29 |       run: |
30 |         pip install .[tests]
31 |         pip install --no-cache-dir --upgrade torch torchvision torchaudio
32 |         pip install accelerate
33 |     - name: Test on pytorch stable
34 |       working-directory: tests
35 |       run: |
36 |         pytest bettertransformer/test_*.py -s -vvvvv
37 |     - name: Install dependencies 2
38 |       run: |
39 |         pip uninstall -y torch torchvision torchaudio
40 |         pip install --no-cache-dir --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
41 |     - name: Test on pytorch nightly
42 |       working-directory: tests
43 |       run: |
44 |         pytest bettertransformer/test_*.py -s -vvvvv
45 | 
46 | 


--------------------------------------------------------------------------------
/.github/workflows/test_optimum_common.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 3 | name: Optimum common / Python - Test
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ main ]
 8 |   pull_request:
 9 |     branches: [ main ]
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   build:
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         python-version: [3.8, 3.9]
21 |         os: [ubuntu-20.04, windows-2019, macos-13]
22 | 
23 |     runs-on: ${{ matrix.os }}
24 |     steps:
25 |     - uses: actions/checkout@v2
26 |     - name: Setup Python ${{ matrix.python-version }}
27 |       uses: actions/setup-python@v2
28 |       with:
29 |         python-version: ${{ matrix.python-version }}
30 |     - name: Install dependencies
31 |       run: |
32 |         python -m pip install --upgrade pip
33 |         pip install .[tests]
34 |         ls -l optimum/
35 |     - name: Test with unittest
36 |       shell: bash
37 |       run: |
38 |         # Setting HUGGINGFACE_CO_STAGING to true for only one job of the matrix as the staging tests cannot run in parallel.
39 |         export HUGGINGFACE_CO_STAGING=${{ matrix.python-version == '3.8' && matrix.os == 'ubuntu-20.04' }}
40 |         pytest tests/test_*.py
41 |         
42 | 


--------------------------------------------------------------------------------
/optimum/commands/export/base.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """optimum.exporters command-line interface base classes."""
16 | 
17 | from .. import BaseOptimumCLICommand, CommandInfo
18 | from .onnx import ONNXExportCommand
19 | from .tflite import TFLiteExportCommand
20 | 
21 | 
22 | class ExportCommand(BaseOptimumCLICommand):
23 |     COMMAND = CommandInfo(
24 |         name="export",
25 |         help="Export PyTorch and TensorFlow models to several format.",
26 |     )
27 |     SUBCOMMANDS = (
28 |         CommandInfo(
29 |             name="onnx",
30 |             help="Export PyTorch and TensorFlow to ONNX.",
31 |             subcommand_class=ONNXExportCommand,
32 |         ),
33 |         CommandInfo(
34 |             name="tflite",
35 |             help="Export TensorFlow to TensorFlow Lite.",
36 |             subcommand_class=TFLiteExportCommand,
37 |         ),
38 |     )
39 | 


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/overview.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Overview
14 | 
15 | 🤗 Optimum handles the export of TensorFlow models to TFLite in the `exporters.tflite` module. In addition, models hosted on the Hugging Face Hub with PyTorch weights but having a TensorFlow implementation will also be supported in the export thanks to Transformers' [TFPreTrainedModel.from_pretrained()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.from_pretrained) auto-conversion to TensorFlow.
16 | 
17 | The TFLite export support provides classes, functions and a command line interface to export a model easily.
18 | 
19 | Supported architectures:
20 | 
21 | - Albert
22 | - BERT
23 | - Camembert
24 | - ConvBert
25 | - Deberta
26 | - Deberta V2
27 | - DistilBert
28 | - Electra
29 | - Flaubert
30 | - MobileBert
31 | - MPNet
32 | - ResNet
33 | - Roberta
34 | - RoFormer
35 | - XLM
36 | - XLMRoberta
37 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/multiple-choice/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Multiple choice
18 | 
19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/multiple-choice/run_swag.py) allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for multiple choice tasks.
20 | 
21 | The following example applies graph optimizations on a BERT fine-tuned on the SWAG dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
22 | 
23 | ```bash
24 | python run_swag.py \
25 |     --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \
26 |     --optimization_level 1 \
27 |     --do_eval \
28 |     --output_dir /tmp/optimized_bert_swag
29 | ```
30 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/training_args_seq2seq.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass, field
16 | from typing import Optional
17 | 
18 | from transformers import Seq2SeqTrainingArguments
19 | 
20 | from .training_args import ORTTrainingArguments
21 | 
22 | 
23 | @dataclass
24 | class ORTSeq2SeqTrainingArguments(Seq2SeqTrainingArguments, ORTTrainingArguments):
25 |     """
26 |     Parameters:
27 |         optim (`str` or [`training_args.ORTOptimizerNames`] or [`transformers.training_args.OptimizerNames`], *optional*, defaults to `"adamw_hf"`):
28 |             The optimizer to use, including optimizers in Transformers: adamw_hf, adamw_torch, adamw_apex_fused, or adafactor. And optimizers implemented by ONNX Runtime: adamw_ort_fused.
29 |     """
30 | 
31 |     optim: Optional[str] = field(
32 |         default="adamw_hf",
33 |         metadata={"help": "The optimizer to use."},
34 |     )
35 | 


--------------------------------------------------------------------------------
/optimum/commands/onnxruntime/base.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """optimum.onnxruntime command-line interface base classes."""
16 | 
17 | from .. import BaseOptimumCLICommand, CommandInfo
18 | from .optimize import ONNXRuntimeOptimizeCommand
19 | from .quantize import ONNXRuntimeQuantizeCommand
20 | 
21 | 
22 | class ONNXRuntimeCommand(BaseOptimumCLICommand):
23 |     COMMAND = CommandInfo(
24 |         name="onnxruntime",
25 |         help="ONNX Runtime optimize and quantize utilities.",
26 |     )
27 |     SUBCOMMANDS = (
28 |         CommandInfo(
29 |             name="optimize",
30 |             help="Optimize ONNX models.",
31 |             subcommand_class=ONNXRuntimeOptimizeCommand,
32 |         ),
33 |         CommandInfo(
34 |             name="quantize",
35 |             help="Dynammic quantization for ONNX models.",
36 |             subcommand_class=ONNXRuntimeQuantizeCommand,
37 |         ),
38 |     )
39 | 


--------------------------------------------------------------------------------
/tests/onnxruntime/ds_configs/ds_config_zero_stage_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bf16": {
 3 |         "enabled": "auto"
 4 |     },
 5 | 
 6 |     "fp16": {
 7 |         "enabled": "auto",
 8 |         "loss_scale": 0,
 9 |         "loss_scale_window": 1000,
10 |         "initial_scale_power": 16,
11 |         "hysteresis": 2,
12 |         "min_loss_scale": 1
13 |     },
14 | 
15 |     "optimizer": {
16 |         "type": "AdamW",
17 |         "params": {
18 |                 "lr": "auto",
19 |                 "betas": "auto",
20 |                 "eps": "auto",
21 |                 "weight_decay": "auto"
22 |         }
23 |     },
24 | 
25 |     "scheduler": {
26 |             "type": "WarmupLR",
27 |             "params": {
28 |                     "warmup_min_lr": "auto",
29 |                     "warmup_max_lr": "auto",
30 |                     "warmup_num_steps": "auto"
31 |             }
32 |     },
33 | 
34 |     "zero_optimization": {
35 |        "stage": 2,
36 |        "offload_optimizer": {
37 |            "device": "cpu",
38 |            "pin_memory": true
39 |        },
40 |        "allgather_partitions": true,
41 |        "allgather_bucket_size": 2e8,
42 |        "overlap_comm": true,
43 |        "reduce_scatter": true,
44 |        "reduce_bucket_size": 2e8,
45 |        "contiguous_gradients": true
46 |     },
47 | 
48 |     "gradient_accumulation_steps": "auto",
49 |     "gradient_clipping": "auto",
50 |     "steps_per_print": 2000,
51 |     "train_batch_size": "auto",
52 |     "train_micro_batch_size_per_gpu": "auto",
53 |     "wall_clock_breakdown": false
54 | }
55 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/multiple-choice/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Multiple choice
18 | 
19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/multiple-choice/run_swag.py) allows us to apply different quantization approaches (such as dynamic and static quantization) using the [ONNX Runtime](https://github.com/microsoft/onnxruntime) quantization tool for multiple choice tasks.
20 | 
21 | The following example applies post-training dynamic quantization on a BERT fine-tuned on the SWAG dataset.
22 | 
23 | ```bash
24 | python run_swag.py \
25 |     --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \
26 |     --quantization_approach dynamic \
27 |     --do_eval \
28 |     --output_dir /tmp/quantized_bert_swag
29 | ```
30 | 
31 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
32 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/fully_connected.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import Set, Tuple
15 | 
16 | from onnx import ModelProto
17 | 
18 | from onnxruntime.transformers.onnx_model import OnnxModel
19 | 
20 | from .. import PreprocessorPass
21 | 
22 | 
23 | class IncludeFullyConnectedNodes(PreprocessorPass):
24 |     def __init__(self):
25 |         super().__init__()
26 | 
27 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
28 |         fc_subgraphs = []
29 |         for add_node in model.get_nodes_by_op_type("Add"):
30 |             fc_components = model.match_parent_path(add_node, ["MatMul"], [1])
31 |             if fc_components is not None:
32 |                 fc_components.append(add_node)
33 |                 fc_subgraphs.append(fc_components)
34 |         fc_components = {node.name for fc in fc_subgraphs for node in fc}
35 |         return fc_components, set()
36 | 


--------------------------------------------------------------------------------
/optimum/onnx/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import TYPE_CHECKING
15 | 
16 | from transformers.utils import _LazyModule
17 | 
18 | 
19 | _import_structure = {
20 |     "graph_transformations": [
21 |         "cast_slice_nodes_inputs_to_int32",
22 |         "merge_decoders",
23 |         "remove_duplicate_weights",
24 |         "replace_atenops_to_gather",
25 |         "remove_duplicate_weights_from_tied_info",
26 |     ],
27 | }
28 | 
29 | if TYPE_CHECKING:
30 |     from .graph_transformations import (
31 |         cast_slice_nodes_inputs_to_int32,
32 |         merge_decoders,
33 |         remove_duplicate_weights,
34 |         remove_duplicate_weights_from_tied_info,
35 |         replace_atenops_to_gather,
36 |     )
37 | else:
38 |     import sys
39 | 
40 |     sys.modules[__name__] = _LazyModule(
41 |         __name__,
42 |         globals()["__file__"],
43 |         _import_structure,
44 |         module_spec=__spec__,
45 |     )
46 | 


--------------------------------------------------------------------------------
/optimum/exporters/tflite/config.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Common TensorFlow Lite configuration classes that handle most of the features for building model specific
17 | configurations.
18 | """
19 | 
20 | from ...utils import DummyTextInputGenerator, DummyVisionInputGenerator, logging
21 | from .base import TFLiteConfig
22 | 
23 | 
24 | logger = logging.get_logger(__name__)
25 | 
26 | 
27 | class TextEncoderTFliteConfig(TFLiteConfig):
28 |     """
29 |     Handles encoder-based text architectures.
30 |     """
31 | 
32 |     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,)
33 |     MANDATORY_AXES = ("batch_size", "sequence_length", ("multiple-choice", "num_choices"))
34 | 
35 | 
36 | class VisionTFLiteConfig(TFLiteConfig):
37 |     """
38 |     Handles vision architectures.
39 |     """
40 | 
41 |     DUMMY_INPUT_GENERATOR_CLASSES = (DummyVisionInputGenerator,)
42 |     MANDATORY_AXES = ("batch_size", "num_channels", "width", "height")
43 | 


--------------------------------------------------------------------------------
/.github/workflows/test_offline.yml:
--------------------------------------------------------------------------------
 1 | name: Offline usage / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |   pull_request:
 7 |     branches: [ main ]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.9]
19 |         os: [ubuntu-20.04]
20 | 
21 |     runs-on: ${{ matrix.os }}
22 |     steps:
23 |     - uses: actions/checkout@v2
24 |     - name: Setup Python ${{ matrix.python-version }}
25 |       uses: actions/setup-python@v2
26 |       with:
27 |         python-version: ${{ matrix.python-version }}
28 |     - name: Install dependencies for pytorch export
29 |       run: |
30 |         pip install .[tests,exporters,onnxruntime]
31 |     - name: Test with unittest
32 |       run: |
33 |         HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2
34 | 
35 |         HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation
36 | 
37 |         huggingface-cli download hf-internal-testing/tiny-random-gpt2
38 | 
39 |         HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation
40 | 
41 |         pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv
42 | 
43 |         HF_HUB_OFFLINE=1 pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/gelu.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import Set, Tuple
15 | 
16 | from onnx import ModelProto
17 | 
18 | from onnxruntime.transformers.onnx_model import OnnxModel
19 | 
20 | from .. import PreprocessorPass
21 | 
22 | 
23 | class ExcludeGeLUNodes(PreprocessorPass):
24 |     def __init__(self):
25 |         super().__init__()
26 | 
27 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
28 |         gelu_subgraphs = []
29 |         for mul_node in model.get_nodes_by_op_type("Mul"):
30 |             gelu_components = model.match_parent_path(mul_node, ["Mul", "Add", "Erf", "Div"], [0, 1, 0, 0])
31 | 
32 |             if gelu_components is not None:
33 |                 gelu_components.append(mul_node)
34 |                 gelu_subgraphs.append(gelu_components)
35 | 
36 |         gl_components = (node.name for gl in gelu_subgraphs for node in gl)
37 |         return set(), set(gl_components)
38 | 


--------------------------------------------------------------------------------
/.github/workflows/dev_test_optimum_common.yml:
--------------------------------------------------------------------------------
 1 | # This yml file is autogenerated. Do not edit.
 2 | 
 3 | name: dev_Optimum common / Python - Test
 4 | on:
 5 |   schedule:
 6 |   - cron: 0 7 * * *
 7 | concurrency:
 8 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
 9 |   cancel-in-progress: true
10 | jobs:
11 |   build:
12 |     strategy:
13 |       fail-fast: false
14 |       matrix:
15 |         python-version:
16 |         - 3.7
17 |         - 3.8
18 |         - 3.9
19 |         os:
20 |         - ubuntu-20.04
21 |         - windows-2019
22 |         - macos-13
23 |     runs-on: ${{ matrix.os }}
24 |     steps:
25 |     - uses: actions/checkout@v2
26 |     - name: Setup Python ${{ matrix.python-version }}
27 |       uses: actions/setup-python@v2
28 |       with:
29 |         python-version: ${{ matrix.python-version }}
30 |     - name: Install dependencies
31 |       run: |
32 |         python -m pip install --upgrade pip
33 |         pip install .[tests]
34 |         ls -l optimum/
35 |         pip install -U git+https://github.com/huggingface/evaluate
36 |         pip install -U git+https://github.com/huggingface/diffusers
37 |         pip install -U git+https://github.com/huggingface/transformers
38 |     - name: Test with unittest
39 |       shell: bash
40 |       run: |
41 |         # Setting HUGGINGFACE_CO_STAGING to true for only one job of the matrix
42 |         as the staging tests cannot run in parallel.
43 |         export HUGGINGFACE_CO_STAGING=${{ matrix.python-version == 3.8 && matrix.os
44 |         == ubuntu-20.04 }}
45 |         python -m unittest discover -s tests -p test_*.py
46 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # What does this PR do?
 2 | 
 3 | <!--
 4 | Congratulations! You've made it this far! You're not quite done yet though.
 5 | 
 6 | Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution.
 7 | 
 8 | Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change.
 9 | 
10 | Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost.
11 | -->
12 | 
13 | <!-- Remove if not applicable -->
14 | 
15 | Fixes # (issue)
16 | 
17 | 
18 | ## Before submitting
19 | - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
20 | - [ ] Did you make sure to update the documentation with your changes?
21 | - [ ] Did you write any new necessary tests?
22 | 
23 | ## Who can review?
24 | 
25 | <!--
26 | For faster review, we strongly recommend you to ping the following people:
27 | - ONNX / ONNX Runtime : @fxmarty, @echarlaix, @JingyaHuang, @michaelbenayoun
28 | - ONNX Runtime Training: @JingyaHuang
29 | - BetterTransformer: @fxmarty
30 | - GPTQ, quantization: @fxmarty, @SunMarc
31 | - TFLite export: @michaelbenayoun
32 | -->
33 | 


--------------------------------------------------------------------------------
/docs/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 | 
18 | import doctest
19 | import sys
20 | from os.path import abspath, dirname, join
21 | 
22 | 
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(__file__), "src"))
26 | sys.path.insert(1, git_repo_path)
27 | 
28 | # Doctest custom flag to ignore output.
29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
30 | 
31 | OutputChecker = doctest.OutputChecker
32 | 
33 | 
34 | class CustomOutputChecker(OutputChecker):
35 |     def check_output(self, want, got, optionflags):
36 |         if IGNORE_RESULT & optionflags:
37 |             return True
38 |         return OutputChecker.check_output(self, want, got, optionflags)
39 | 
40 | 
41 | doctest.OutputChecker = CustomOutputChecker
42 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/token-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Token classification
18 | 
19 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/token-classification/run_ner.py)
20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks. 
21 | 
22 | The following example applies graph optimizations on a DistilBERT fine-tuned on the CoNLL-2003 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
23 | 
24 | ```bash
25 | python run_ner.py \
26 |     --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \
27 |     --dataset_name conll2003 \
28 |     --optimization_level 1 \
29 |     --do_eval \
30 |     --output_dir /tmp/optimized_distilbert_conll2003
31 | ```
32 | 
33 | 


--------------------------------------------------------------------------------
/optimum/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 | 
18 | import doctest
19 | import sys
20 | from os.path import abspath, dirname, join
21 | 
22 | 
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(__file__), "src"))
26 | sys.path.insert(1, git_repo_path)
27 | 
28 | # Doctest custom flag to ignore output.
29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
30 | 
31 | OutputChecker = doctest.OutputChecker
32 | 
33 | 
34 | class CustomOutputChecker(OutputChecker):
35 |     def check_output(self, want, got, optionflags):
36 |         if IGNORE_RESULT & optionflags:
37 |             return True
38 |         return OutputChecker.check_output(self, want, got, optionflags)
39 | 
40 | 
41 | doctest.OutputChecker = CustomOutputChecker
42 | 


--------------------------------------------------------------------------------
/optimum/fx/utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from functools import wraps
16 | 
17 | import transformers
18 | from packaging import version
19 | 
20 | 
21 | _TRANSFORMERS_MIN_VERSION = version.parse("4.20.0.dev0")
22 | 
23 | transformers_version = version.parse(transformers.__version__)
24 | _fx_features_available = (_TRANSFORMERS_MIN_VERSION.major, _TRANSFORMERS_MIN_VERSION.minor) <= (
25 |     transformers_version.major,
26 |     transformers_version.minor,
27 | )
28 | 
29 | 
30 | def are_fx_features_available():
31 |     return _fx_features_available
32 | 
33 | 
34 | def check_if_available(func):
35 |     @wraps(func)
36 |     def wrapper(*args, **kwargs):
37 |         if not are_fx_features_available():
38 |             raise ImportError(
39 |                 f"Found an incompatible version of transformers. Found version {transformers_version}, but only {_TRANSFORMERS_MIN_VERSION} and above are supported."
40 |             )
41 |         return func(*args, **kwargs)
42 | 
43 |     return wrapper
44 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/image-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Image classification
18 | 
19 | The script [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/image_classification/run_image_classification.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for image classification tasks.
20 | 
21 | The following example applies dynamic quantization on a ViT model fine-tuned on the beans classification dataset.
22 | 
23 | ```bash
24 | python run_image_classification.py \
25 |     --model_name_or_path nateraw/vit-base-beans \
26 |     --dataset_name beans \
27 |     --quantization_approach dynamic \
28 |     --do_eval \
29 |     --output_dir /tmp/image_classification_vit_beans
30 | ```
31 | 
32 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
33 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/token-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Token classification
18 | 
19 | 
20 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/token-classification/run_ner.py)
21 | allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph 
22 | optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks. 
23 | 
24 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the CoNLL-2003 task
25 | 
26 | ```bash
27 | python run_ner.py \
28 |     --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \
29 |     --dataset_name conll2003 \
30 |     --quantization_approach dynamic \
31 |     --do_eval \
32 |     --output_dir /tmp/quantized_distilbert_conll2003
33 | ```
34 | 
35 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
36 | 


--------------------------------------------------------------------------------
/docs/source/utils/dummy_input_generators.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Dummy Input Generators
14 | 
15 | It is very common to have to generate dummy inputs to perform a task (tracing, exporting a model to some backend,
16 | testing model outputs, etc). The goal of [`~optimum.utils.input_generators.DummyInputGenerator`] classes is to make this
17 | generation easy and re-usable.
18 | 
19 | 
20 | ## Base class
21 | 
22 | [[autodoc]] optimum.utils.input_generators.DummyInputGenerator
23 | 
24 | 
25 | ## Existing dummy input generators
26 | 
27 | [[autodoc]] optimum.utils.input_generators.DummyTextInputGenerator
28 | 
29 | [[autodoc]] optimum.utils.input_generators.DummyDecoderTextInputGenerator
30 | 
31 | [[autodoc]] optimum.utils.input_generators.DummyPastKeyValuesGenerator
32 | 
33 | [[autodoc]] optimum.utils.input_generators.DummySeq2SeqPastKeyValuesGenerator
34 | 
35 | [[autodoc]] optimum.utils.input_generators.DummyBboxInputGenerator
36 | 
37 | [[autodoc]] optimum.utils.input_generators.DummyVisionInputGenerator
38 | 
39 | [[autodoc]] optimum.utils.input_generators.DummyAudioInputGenerator
40 | 


--------------------------------------------------------------------------------
/optimum/utils/modeling_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import functools
16 | 
17 | 
18 | MODEL_TO_PATCH_FOR_PAST = {
19 |     "bart",
20 |     "blenderbot",
21 |     "blenderbot-small",
22 |     "bloom",
23 |     "llama",
24 |     "mistral",
25 |     "mpt",
26 |     "opt",
27 |     "pegasus",
28 | }
29 | 
30 | 
31 | def recurse_getattr(obj, attr: str):
32 |     """
33 |     Recursive `getattr`.
34 | 
35 |     Args:
36 |         obj:
37 |             A class instance holding the attribute.
38 |         attr (`str`):
39 |             The attribute that is to be retrieved, e.g. 'attribute1.attribute2'.
40 |     """
41 | 
42 |     def _getattr(obj, attr):
43 |         return getattr(obj, attr)
44 | 
45 |     return functools.reduce(_getattr, [obj] + attr.split("."))
46 | 
47 | 
48 | def recurse_setattr(module, name, value):
49 |     """A function to recursively set attributes to a module."""
50 |     if "." not in name:
51 |         setattr(module, name, value)
52 |     else:
53 |         name, rest = name.split(".", 1)
54 |         recurse_setattr(getattr(module, name), rest, value)
55 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/text-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Text classification 
18 | 
19 | ## GLUE tasks
20 | 
21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py)
22 | allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as 
23 | the ones from the [GLUE benchmark](https://gluebenchmark.com/).
24 | 
25 | The following example applies graph optimization on a DistilBERT fine-tuned on the sst-2 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
26 | 
27 | ```bash
28 | python run_glue.py \
29 |     --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \
30 |     --task_name sst2 \
31 |     --optimization_level 1 \
32 |     --do_eval \
33 |     --output_dir /tmp/optimized_distilbert_sst2
34 | ```
35 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-rocm57:
--------------------------------------------------------------------------------
 1 | # Use rocm image
 2 | FROM rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1
 3 | CMD rocm-smi
 4 | 
 5 | # Ignore interactive questions during `docker build`
 6 | ENV DEBIAN_FRONTEND noninteractive
 7 | 
 8 | # Versions
 9 | # available options 3.10
10 | ARG PYTHON_VERSION=3.10
11 | 
12 | # Bash shell
13 | RUN chsh -s /bin/bash
14 | SHELL ["/bin/bash", "-c"]
15 | 
16 | # Install and update tools to minimize security vulnerabilities
17 | RUN apt-get update
18 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
19 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
20 |     apt-get clean
21 | RUN apt-get autoremove -y
22 | 
23 | ARG PYTHON_EXE=/opt/conda/envs/py_$PYTHON_VERSION/bin/python
24 | 
25 | # (Optional) Intall test dependencies
26 | RUN $PYTHON_EXE -m pip install -U pip
27 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers
28 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece --no-cache-dir
29 | RUN $PYTHON_EXE -m pip install deepspeed --no-cache-dir
30 | RUN conda install -y mpi4py
31 | 
32 | # PyTorch
33 | RUN $PYTHON_EXE -m pip install onnx ninja
34 | 
35 | # ORT Module
36 | RUN $PYTHON_EXE -m pip install --pre onnxruntime-training -f https://download.onnxruntime.ai/onnxruntime_nightly_rocm57.html
37 | RUN $PYTHON_EXE -m pip install torch-ort
38 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2
39 | RUN $PYTHON_EXE -m torch_ort.configure
40 | 
41 | WORKDIR .
42 | 
43 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/tests/onnxruntime/ds_configs/ds_config_zero_stage_3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 | 
11 |     "optimizer": {
12 |         "type": "AdamW",
13 |         "params": {
14 |             "lr": "auto",
15 |             "betas": "auto",
16 |             "eps": "auto",
17 |             "weight_decay": "auto"
18 |         }
19 |     },
20 | 
21 |     "scheduler": {
22 |         "type": "WarmupLR",
23 |         "params": {
24 |             "warmup_min_lr": "auto",
25 |             "warmup_max_lr": "auto",
26 |             "warmup_num_steps": "auto"
27 |         }
28 |     },
29 | 
30 |     "zero_optimization": {
31 |         "stage": 3,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "offload_param": {
37 |             "device": "cpu",
38 |             "pin_memory": true
39 |         },
40 |         "overlap_comm": true,
41 |         "contiguous_gradients": true,
42 |         "sub_group_size": 1e9,
43 |         "reduce_bucket_size": "auto",
44 |         "stage3_prefetch_bucket_size": "auto",
45 |         "stage3_param_persistence_threshold": "auto",
46 |         "stage3_max_live_parameters": 1e9,
47 |         "stage3_max_reuse_distance": 1e9,
48 |         "stage3_gather_16bit_weights_on_model_save": true
49 |     },
50 | 
51 |     "gradient_accumulation_steps": "auto",
52 |     "gradient_clipping": "auto",
53 |     "steps_per_print": 2000,
54 |     "train_batch_size": "auto",
55 |     "train_micro_batch_size_per_gpu": "auto",
56 |     "wall_clock_breakdown": false
57 | }


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/text-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Text classification 
18 | 
19 | ## GLUE tasks
20 | 
21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/text-classification/run_glue.py)
22 | allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph 
23 | optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as 
24 | the ones from the [GLUE benchmark](https://gluebenchmark.com/).
25 | 
26 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the sst-2 task.
27 | 
28 | ```bash
29 | python run_glue.py \
30 |     --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \
31 |     --task_name sst2 \
32 |     --quantization_approach dynamic \
33 |     --do_eval \
34 |     --output_dir /tmp/quantized_distilbert_sst2
35 | ```
36 | 
37 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
38 | 


--------------------------------------------------------------------------------
/docs/source/exporters/onnx/package_reference/export.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Export functions
14 | 
15 | You can export models to ONNX from two frameworks in 🤗 Optimum: PyTorch and TensorFlow. There is an export function for each of these frameworks, [`~optimum.exporters.onnx.convert.export_pytorch`] and [`~optimum.exporters.onnx.convert.export_tensorflow`], but the recommended way of using those is via the main export function [`~optimum.exporters.main_export`], which will take care of using the proper exporting function according to the available framework, check that the exported model is valid, and provide extended options to run optimizations on the exported model.
16 | 
17 | ## Main functions
18 | 
19 | [[autodoc]] exporters.onnx.main_export
20 | 
21 | [[autodoc]] exporters.onnx.onnx_export_from_model
22 | 
23 | [[autodoc]] exporters.onnx.convert.export
24 | 
25 | [[autodoc]] exporters.onnx.convert.export_pytorch
26 | 
27 | [[autodoc]] exporters.onnx.convert.export_tensorflow
28 | 
29 | 
30 | ## Utility functions
31 | 
32 | [[autodoc]] exporters.onnx.convert.check_dummy_inputs_are_allowed
33 | 
34 | [[autodoc]] exporters.onnx.convert.validate_model_outputs
35 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/overview.mdx:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | 🤗 Optimum provides an integration with ONNX Runtime, a cross-platform, high performance engine for Open Neural Network Exchange (ONNX) models.
 4 | 
 5 | <div class="mt-10">
 6 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
 7 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./usage_guides/pipelines"
 8 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
 9 |       <p class="text-gray-700">Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum to solve real-world problems.</p>
10 |     </a>
11 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./concept_guides/onnx"
12 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
13 |       <p class="text-gray-700">High-level explanations for building a better understanding about important topics such as quantization and graph optimization.</p>
14 |    </a>
15 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./package_reference/modeling_ort"
16 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
17 |       <p class="text-gray-700">Technical descriptions of how the ONNX Runtime classes and methods of 🤗 Optimum work.</p>
18 |     </a>
19 |   </div>
20 | </div>
21 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/layernorm.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import Set, Tuple
15 | 
16 | from onnx import ModelProto
17 | 
18 | from onnxruntime.transformers.onnx_model import OnnxModel
19 | 
20 | from .. import PreprocessorPass
21 | 
22 | 
23 | class ExcludeLayerNormNodes(PreprocessorPass):
24 |     def __init__(self):
25 |         super().__init__()
26 | 
27 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
28 |         layer_norm_subgraphs = []
29 |         for add_node in model.get_nodes_by_op_type("Add"):
30 |             layer_norm_components = model.match_parent_path(
31 |                 add_node,
32 |                 ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Sub", "ReduceMean"],
33 |                 [0, 0, 1, 0, 0, 0, 0, 1],
34 |             )
35 | 
36 |             if layer_norm_components is not None:
37 |                 layer_norm_components.append(add_node)
38 |                 layer_norm_subgraphs.append(layer_norm_components)
39 | 
40 |         ln_components = (node.name for ln in layer_norm_subgraphs for node in ln)
41 |         return set(), set(ln_components)
42 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/question-answering/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Question answering
18 | 
19 | 
20 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py)
21 | allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph 
22 | optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.
23 | 
24 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 
25 | the flag `--version_2_with_negative`.
26 | 
27 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the SQuAD1.0 dataset.
28 | 
29 | ```bash
30 | python run_qa.py \
31 |     --model_name_or_path distilbert-base-uncased-distilled-squad \
32 |     --dataset_name squad \
33 |     --quantization_approach dynamic \
34 |     --do_eval \
35 |     --output_dir /tmp/quantized_distilbert_squad
36 | ```
37 | 
38 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
39 | 


--------------------------------------------------------------------------------
/tests/onnxruntime/test_utils.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | import unittest
 3 | 
 4 | import onnxruntime as ort
 5 | import torch
 6 | 
 7 | from optimum.onnxruntime.configuration import AutoQuantizationConfig, OptimizationConfig, ORTConfig
 8 | from optimum.onnxruntime.utils import get_device_for_provider, get_provider_for_device
 9 | 
10 | 
11 | class ProviderAndDeviceGettersTest(unittest.TestCase):
12 |     def test_get_device_for_provider(self):
13 |         self.assertEqual(get_device_for_provider("CPUExecutionProvider", provider_options={}), torch.device("cpu"))
14 |         self.assertEqual(
15 |             get_device_for_provider("CUDAExecutionProvider", provider_options={"device_id": 1}), torch.device("cuda:1")
16 |         )
17 | 
18 |     def test_get_provider_for_device(self):
19 |         self.assertEqual(get_provider_for_device(torch.device("cpu")), "CPUExecutionProvider")
20 | 
21 |         if "ROCMExecutionProvider" in ort.get_available_providers():
22 |             self.assertEqual(get_provider_for_device(torch.device("cuda")), "ROCMExecutionProvider")
23 |         else:
24 |             self.assertEqual(get_provider_for_device(torch.device("cuda")), "CUDAExecutionProvider")
25 | 
26 | 
27 | class ORTConfigTest(unittest.TestCase):
28 |     def test_save_and_load(self):
29 |         with tempfile.TemporaryDirectory() as tmp_dir:
30 |             quantization_config = AutoQuantizationConfig.arm64(is_static=False, per_channel=False)
31 |             optimization_config = OptimizationConfig(optimization_level=2)
32 |             ort_config = ORTConfig(opset=11, quantization=quantization_config, optimization=optimization_config)
33 |             ort_config.save_pretrained(tmp_dir)
34 |             loaded_ort_config = ORTConfig.from_pretrained(tmp_dir)
35 |             self.assertEqual(ort_config.to_dict(), loaded_ort_config.to_dict())
36 | 


--------------------------------------------------------------------------------
/docs/source/torch_fx/overview.mdx:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | 🤗 Optimum provides an integration with Torch FX, a library for PyTorch that allows developers to implement custom transformations of their models that can be optimized for performance.
 4 | 
 5 | <div class="mt-10">
 6 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
 7 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./usage_guides/optimization"
 8 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
 9 |       <p class="text-gray-700">Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum to solve real-world problems.</p>
10 |     </a>
11 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./concept_guides/symbolic_tracer"
12 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
13 |       <p class="text-gray-700">High-level explanations for building a better understanding about important topics such as quantization and graph optimization.</p>
14 |    </a>
15 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./package_reference/optimization"
16 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
17 |       <p class="text-gray-700">Technical descriptions of how the Torch FX classes and methods of 🤗 Optimum work.</p>
18 |     </a>
19 |   </div>
20 | </div>
21 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration
14 | 
15 | The configuration classes are the way to specify how a task should be done. There are two tasks supported with the ONNX Runtime package:
16 | 
17 | 1. Optimization: Performed by the [`~onnxruntime.ORTOptimizer`], this task can be tweaked using an [`~onnxruntime.configuration.OptimizationConfig`].
18 | 
19 | 2. Quantization: Performed by the [`~onnxruntime.ORTQuantizer`], quantization can be set using a [`~onnxruntime.configuration.QuantizationConfig`]. A calibration step is required in some cases (post training static quantization), which can be specified using a [`~onnxruntime.configuration.CalibrationConfig`].
20 | 
21 | ## OptimizationConfig
22 | 
23 | [[autodoc]] onnxruntime.configuration.OptimizationConfig
24 | 
25 | [[autodoc]] onnxruntime.configuration.AutoOptimizationConfig
26 | 
27 | ## QuantizationConfig
28 | 
29 | [[autodoc]] onnxruntime.configuration.QuantizationConfig
30 | 
31 | ## AutoQuantizationConfig
32 | 
33 | [[autodoc]] onnxruntime.configuration.AutoQuantizationConfig
34 |     - all
35 | 
36 | ### CalibrationConfig
37 | 
38 | [[autodoc]] onnxruntime.configuration.CalibrationConfig
39 | 
40 | ## ORTConfig
41 | 
42 | [[autodoc]] onnxruntime.configuration.ORTConfig
43 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/question-answering/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Question answering
18 | 
19 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/question-answering/run_qa.py)
20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.
21 | 
22 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 
23 | the flag `--version_2_with_negative`.
24 | 
25 | The following example applies graph optimizations on a DistilBERT fine-tuned on the SQuAD1.0 dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
26 | 
27 | ```bash
28 | python run_qa.py \
29 |     --model_name_or_path distilbert-base-uncased-distilled-squad \
30 |     --dataset_name squad \
31 |     --optimization_level 1 \
32 |     --do_eval \
33 |     --output_dir /tmp/optimized_distilbert_squad
34 | ```
35 | 
36 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
37 | 


--------------------------------------------------------------------------------
/docs/source/utils/normalized_config.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Normalized Configurations
14 | 
15 | Model configuration classes in 🤗 Transformers are not standardized. Although Transformers implements an `attribute_map` attribute that mitigates the issue to some extent, it does not make it easy to reason on common configuration attributes in the code.
16 | [`~optimum.utils.normalized_config.NormalizedConfig`] classes try to fix that by allowing access to the configuration
17 | attribute they wrap in a standardized way.
18 | 
19 | 
20 | ## Base class
21 | 
22 | <Tip>
23 | 
24 | While it is possible to create `NormalizedConfig` subclasses for common use-cases, it is also possible to overwrite
25 | the `original attribute name -> normalized attribute name` mapping directly using the
26 | [`~optimum.utils.normalized_config.NormalizedConfig.with_args`] class method.
27 | 
28 | </Tip>
29 | 
30 | [[autodoc]] optimum.utils.normalized_config.NormalizedConfig
31 | 
32 | 
33 | ## Existing normalized configurations
34 | 
35 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextConfig
36 | 
37 | [[autodoc]] optimum.utils.normalized_config.NormalizedSeq2SeqConfig
38 | 
39 | [[autodoc]] optimum.utils.normalized_config.NormalizedVisionConfig
40 | 
41 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextAndVisionConfig
42 | 


--------------------------------------------------------------------------------
/.github/generate_dev_tests.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import yaml
 4 | 
 5 | 
 6 | tests = [
 7 |     "test_exporters.yml",
 8 |     "test_dummy_inputs.yml",
 9 |     "test_bettertransformer.yml",
10 |     "test_onnx.yml",
11 |     "test_fx.yml",
12 |     "test_onnxruntime.yml",
13 |     "test_benckmark.yml",
14 |     "test_optimum_common.yml",
15 | ]
16 | 
17 | for test_name in tests:
18 |     new_name = "dev_" + test_name
19 | 
20 |     with open(Path("workflows", test_name), "r") as file:
21 |         workflox_yml = yaml.load(file, yaml.BaseLoader)
22 | 
23 |         workflox_yml["name"] = "dev_" + workflox_yml["name"]
24 |         workflox_yml["on"] = {"schedule": [{"cron": "0 7 * * *"}]}
25 | 
26 |         for i, step in enumerate(workflox_yml["jobs"]["build"]["steps"]):
27 |             if "name" in step and step["name"] == "Install dependencies":
28 |                 workflox_yml["jobs"]["build"]["steps"][i][
29 |                     "run"
30 |                 ] += "pip install -U git+https://github.com/huggingface/evaluate\npip install -U git+https://github.com/huggingface/diffusers\npip install -U git+https://github.com/huggingface/transformers\n"
31 | 
32 |     with open(Path("workflows", new_name), "w") as outfile:
33 |         yaml.dump(
34 |             workflox_yml,
35 |             outfile,
36 |             default_flow_style=False,
37 |             allow_unicode=True,
38 |             width=float("inf"),
39 |             sort_keys=False,
40 |         )
41 | 
42 |     with open(Path("workflows", new_name), "r+") as outfile:
43 |         workflox_yml = outfile.read()
44 |         workflox_yml = "# This yml file is autogenerated. Do not edit.\n\n" + workflox_yml
45 | 
46 |         workflox_yml = workflox_yml.replace("'", "")
47 |         workflox_yml = workflox_yml.replace("run:", "run: |\n       ")
48 | 
49 |         workflox_yml = "\n".join([ll.rstrip() for ll in workflox_yml.splitlines() if ll.strip()])
50 | 
51 |         outfile.seek(0)
52 |         outfile.write(workflox_yml)
53 |         outfile.truncate()
54 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | SHELL := /bin/bash
16 | CURRENT_DIR = $(shell pwd)
17 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum.git
18 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL
19 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
20 | 
21 | .PHONY:	style test
22 | 
23 | # Run code quality checks
24 | style_check:
25 | 	black --check .
26 | 	ruff .
27 | 
28 | style:
29 | 	black .
30 | 	ruff . --fix
31 | 
32 | # Run tests for the library
33 | test:
34 | 	python -m pytest tests
35 | 
36 | # Utilities to release to PyPi
37 | build_dist_install_tools:
38 | 	pip install build
39 | 	pip install twine
40 | 
41 | build_dist:
42 | 	rm -fr build
43 | 	rm -fr dist
44 | 	python -m build
45 | 
46 | pypi_upload: build_dist
47 | 	python -m twine upload dist/*
48 | 
49 | build_doc_docker_image:
50 | 	docker build -t doc_maker --build-arg commit_sha=$(COMMIT_SHA_OPTIMUM) --build-arg clone_url=$(REAL_CLONE_URL) ./docs
51 | 
52 | doc: build_doc_docker_image
53 | 	@test -n "$(BUILD_DIR)" || (echo "BUILD_DIR is empty." ; exit 1)
54 | 	@test -n "$(VERSION)" || (echo "VERSION is empty." ; exit 1)
55 | 	docker run -v $(CURRENT_DIR):/doc_folder --workdir=/doc_folder doc_maker \
56 | 	doc-builder build optimum /optimum/docs/source/ \
57 | 		--build_dir $(BUILD_DIR) \
58 | 		--version $(VERSION) \
59 | 		--version_tag_suffix "" \
60 | 		--html \
61 | 		--clean
62 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/models/bloom.py:
--------------------------------------------------------------------------------
 1 | from typing import TYPE_CHECKING, Tuple
 2 | 
 3 | 
 4 | if TYPE_CHECKING:
 5 |     import torch
 6 | 
 7 | 
 8 | def bloom_convert_to_standard_cache(
 9 |     past_key_value: Tuple[Tuple["torch.Tensor", "torch.Tensor"]], batch_size: int
10 | ) -> Tuple[Tuple["torch.Tensor", "torch.Tensor"]]:
11 |     """
12 |     Standardizes the format of the cache so as to match most implementations, i.e. to tuple(tuple([batch_size,
13 |     num_heads, ...]))
14 |     """
15 |     batch_size_times_num_heads, head_dim, seq_length = past_key_value[0][0].shape
16 |     num_heads = batch_size_times_num_heads // batch_size
17 |     # key: [batch_size * num_heads, head_dim, seq_length] -> [batch_size, num_heads, head_dim, seq_length]
18 |     # value: [batch_size * num_heads, seq_length, head_dim] -> [batch_size, num_heads, seq_length, head_dim]
19 |     return tuple(
20 |         (
21 |             layer_past[0].view(batch_size, num_heads, head_dim, seq_length),
22 |             layer_past[1].view(batch_size, num_heads, seq_length, head_dim),
23 |         )
24 |         for layer_past in past_key_value
25 |     )
26 | 
27 | 
28 | def bloom_convert_to_bloom_cache(
29 |     past_key_value: Tuple[Tuple["torch.Tensor", "torch.Tensor"]]
30 | ) -> Tuple[Tuple["torch.Tensor", "torch.Tensor"]]:
31 |     """
32 |     Converts the cache to the format expected by Bloom, i.e. to tuple(tuple([batch_size * num_heads, ...]))
33 |     """
34 |     batch_size, num_heads, head_dim, seq_length = past_key_value[0][0].shape
35 |     batch_size_times_num_heads = batch_size * num_heads
36 |     # key:  [batch_size, num_heads, head_dim, seq_length] -> [batch_size * num_heads, head_dim, seq_length]
37 |     # value: [batch_size, num_heads, seq_length, head_dim] -> [batch_size * num_heads, seq_length, head_dim]
38 |     return tuple(
39 |         (
40 |             layer_past[0].view(batch_size_times_num_heads, head_dim, seq_length),
41 |             layer_past[1].view(batch_size_times_num_heads, seq_length, head_dim),
42 |         )
43 |         for layer_past in past_key_value
44 |     )
45 | 


--------------------------------------------------------------------------------
/tests/test_modeling_base.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | import tempfile
 4 | import unittest
 5 | 
 6 | import requests as r
 7 | import torch
 8 | from transformers.configuration_utils import PretrainedConfig
 9 | 
10 | from optimum.modeling_base import OptimizedModel
11 | from optimum.utils.testing_utils import require_hf_token
12 | 
13 | 
14 | TEST_HUB_PATH = "philschmid/unit_test_model"
15 | TEST_LOCAL_PATH = "tests/assets/hub"
16 | 
17 | 
18 | class DummyModel(OptimizedModel):
19 |     def _save_pretrained(self, save_directory, **kwargs):
20 |         return
21 | 
22 |     @classmethod
23 |     def _from_pretrained(cls, **kwargs):
24 |         config = PretrainedConfig.from_dict(kwargs["config"])
25 |         model = cls(model=torch.nn.Module, config=config)
26 |         return model
27 | 
28 |     def forward(self, *args, **kwargs):
29 |         pass
30 | 
31 | 
32 | class TestOptimizedModel(unittest.TestCase):
33 |     def test_load_model_from_hub(self):
34 |         # TODO: figure out how to create repos and push stuff to staging
35 |         if os.getenv("HUGGINGFACE_CO_STAGING", False):
36 |             self.skipTest("Skip test on staging")
37 | 
38 |         dummy_model = DummyModel.from_pretrained(TEST_HUB_PATH)
39 |         self.assertTrue(dummy_model.config.remote)
40 | 
41 |     @require_hf_token
42 |     def test_push_to_hub(self):
43 |         with tempfile.TemporaryDirectory() as tmpdirname:
44 |             model = DummyModel.from_pretrained(TEST_LOCAL_PATH)
45 |             # create remote hash to check if file was updated.
46 |             remote_hash = random.getrandbits(128)
47 |             model.config.from_local = remote_hash
48 | 
49 |             model.save_pretrained(
50 |                 tmpdirname,
51 |                 use_auth_token=os.environ.get("HF_AUTH_TOKEN", None),
52 |                 push_to_hub=True,
53 |                 repository_id="unit_test_save_model",
54 |             )
55 |             # folder contains all config files and pytorch_model.bin
56 |             url = "https://huggingface.co/philschmid/unit_test_save_model/raw/main/config.json"
57 |             response = r.get(url)
58 |             self.assertEqual(remote_hash, response.json()["from_local"])
59 | 


--------------------------------------------------------------------------------
/optimum/utils/doc.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from dataclasses import fields
17 | 
18 | 
19 | def generate_doc_dataclass(cls) -> str:
20 |     """Class decorator for generate the documentation for dataclass."""
21 |     doc = "\f\nAttributes:\n"
22 |     for attribute in fields(cls):
23 |         doc += f"   {attribute.name}"  # attribute name
24 | 
25 |         # whether optional
26 |         attribute_type = str(attribute.type)
27 |         if attribute_type.startswith("typing.Optional"):
28 |             optional = True
29 |             type_display = attribute_type[attribute_type.find("[") + 1 : -1]
30 |             type_display = type_display.split(".")[-1]
31 |         else:
32 |             optional = False
33 | 
34 |             if attribute_type.startswith("typing"):
35 |                 type_display = attribute_type.split(".")[-1]
36 |             else:
37 |                 type_display = attribute.type.__name__
38 | 
39 |         if optional:
40 |             doc += f" (`{type_display}`, *optional*): "
41 |         else:
42 |             doc += f" (`{type_display}`): "
43 | 
44 |         doc += f"{attribute.metadata['description']}\n"  # argument description
45 |     cls.__doc__ = (cls.__doc__ if cls.__doc__ is not None else "") + "\n\n" + "".join(doc)
46 |     return cls
47 | 
48 | 
49 | def add_dynamic_docstring(
50 |     *docstr,
51 |     text,
52 |     dynamic_elements,
53 | ):
54 |     def docstring_decorator(fn):
55 |         func_doc = (fn.__doc__ or "") + "".join(docstr)
56 |         fn.__doc__ = func_doc + text.format(**dynamic_elements)
57 |         return fn
58 | 
59 |     return docstring_decorator
60 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/language-modeling/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | Unless required by applicable law or agreed to in writing, software
 8 | distributed under the License is distributed on an "AS IS" BASIS,
 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | -->
13 | 
14 | # Language Modeling
15 | 
16 | ## Language Modeling Training
17 | 
18 | By running the scripts [`run_clm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_clm.py)
19 | and [`run_mlm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_mlm.py),
20 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the
21 | [HuggingFace hub](https://huggingface.co/models).
22 | 
23 | 
24 | __The following example applies the acceleration features powered by ONNX Runtime.__
25 | 
26 | 
27 | ### ONNX Runtime Training
28 | 
29 | The following example trains GPT2 on wikitext-2 with mixed precision (fp16).
30 | 
31 | ```bash
32 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_clm.py \
33 |     --model_name_or_path gpt2 \
34 |     --dataset_name wikitext \
35 |     --dataset_config_name wikitext-2-raw-v1 \
36 |     --do_train \
37 |     --output_dir /tmp/test-clm \
38 |     --fp16
39 | ```
40 | 
41 | 
42 | __Note__
43 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
44 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
45 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
46 | 
47 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
48 | ---
49 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/question-answering/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Question answering
18 | 
19 | ## SQuAD Tasks
20 | 
21 | By running the script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/question-answering/run_qa.py),
22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) to fine-tune the models from the
23 | [HuggingFace hub](https://huggingface.co/models) for question answering tasks such as SQuAD.
24 | 
25 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along
26 | the flag `--version_2_with_negative`.
27 | 
28 | __The following example applies the acceleration features powered by ONNX Runtime.__
29 | 
30 | 
31 | ### Onnxruntime Training
32 | 
33 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset.
34 | 
35 | ```bash
36 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_qa.py \
37 |     --model_name_or_path bert-base-uncased \
38 |     --dataset_name squad \
39 |     --do_train \
40 |     --do_eval \
41 |     --output_dir /tmp/ort_bert_squad/
42 | ```
43 | 
44 | __Note__
45 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
46 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
47 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
48 | 
49 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
50 | ---


--------------------------------------------------------------------------------
/examples/onnxruntime/training/image-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | Unless required by applicable law or agreed to in writing, software
 8 | distributed under the License is distributed on an "AS IS" BASIS,
 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | -->
13 | 
14 | # Image Classification 
15 | 
16 | By running the scripts [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/image-classification/run_image_classification.py) we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the
17 | [HuggingFace hub](https://huggingface.co/models).
18 | 
19 | 
20 | __The following example applies the acceleration features powered by ONNX Runtime.__
21 | 
22 | 
23 | ### ONNX Runtime Training
24 | 
25 | The following example trains ViT on beans dataset with mixed precision (fp16).
26 | 
27 | ```bash
28 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_image_classification.py \
29 |     --model_name_or_path google/vit-base-patch16-224-in21k \
30 |     --dataset_name beans \
31 |     --output_dir ./beans_outputs/ \
32 |     --remove_unused_columns False \
33 |     --label_column_name labels \
34 |     --do_train \
35 |     --do_eval \
36 |     --learning_rate 2e-5 \
37 |     --num_train_epochs 10 \
38 |     --per_device_train_batch_size 32 \
39 |     --per_device_eval_batch_size 32 \
40 |     --logging_strategy steps \
41 |     --logging_steps 10 \
42 |     --evaluation_strategy epoch \
43 |     --seed 1337
44 | ```
45 | 
46 | 
47 | __Note__
48 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
49 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
50 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
51 | ---
52 | 


--------------------------------------------------------------------------------
/tests/onnx/test_onnx_export_custom_module.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import os
16 | from unittest import TestCase
17 | 
18 | import pytest
19 | from transformers import is_torch_available
20 | from transformers.testing_utils import require_torch
21 | 
22 | 
23 | if is_torch_available():
24 |     import torch
25 |     from transformers.models.deberta import modeling_deberta
26 | 
27 | 
28 | class StableDropoutTestCase(TestCase):
29 |     """Tests export of StableDropout module."""
30 | 
31 |     @require_torch
32 |     @pytest.mark.filterwarnings("ignore:.*Dropout.*:UserWarning:torch.onnx.*")  # torch.onnx is spammy.
33 |     def test_training(self):
34 |         """Tests export of StableDropout in training mode."""
35 |         devnull = open(os.devnull, "wb")
36 |         # drop_prob must be > 0 for the test to be meaningful
37 |         sd = modeling_deberta.StableDropout(0.1)
38 |         # Avoid warnings in training mode
39 |         do_constant_folding = False
40 |         # Dropout is a no-op in inference mode
41 |         training = torch.onnx.TrainingMode.PRESERVE
42 |         input = (torch.randn(2, 2),)
43 | 
44 |         torch.onnx.export(
45 |             sd,
46 |             input,
47 |             devnull,
48 |             opset_version=12,  # Minimum supported
49 |             do_constant_folding=do_constant_folding,
50 |             training=training,
51 |         )
52 | 
53 |         # Expected to fail with opset_version < 12
54 |         with self.assertRaises(Exception):
55 |             torch.onnx.export(
56 |                 sd,
57 |                 input,
58 |                 devnull,
59 |                 opset_version=11,
60 |                 do_constant_folding=do_constant_folding,
61 |                 training=training,
62 |             )
63 | 


--------------------------------------------------------------------------------
/optimum/utils/preprocessing/task_processors_manager.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Dataset processing factory."""
16 | 
17 | from typing import TYPE_CHECKING, Any, Type
18 | 
19 | from optimum.utils.preprocessing.image_classification import ImageClassificationProcessing
20 | from optimum.utils.preprocessing.question_answering import QuestionAnsweringProcessing
21 | from optimum.utils.preprocessing.text_classification import TextClassificationProcessing
22 | from optimum.utils.preprocessing.token_classification import TokenClassificationProcessing
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from .base import DatasetProcessing
27 | 
28 | 
29 | class TaskProcessorsManager:
30 |     _TASK_TO_DATASET_PROCESSING_CLASS = {
31 |         "text-classification": TextClassificationProcessing,
32 |         "token-classification": TokenClassificationProcessing,
33 |         "question-answering": QuestionAnsweringProcessing,
34 |         "image-classification": ImageClassificationProcessing,
35 |     }
36 | 
37 |     @classmethod
38 |     def get_task_processor_class_for_task(cls, task: str) -> Type:
39 |         if task not in cls._TASK_TO_DATASET_PROCESSING_CLASS:
40 |             supported_tasks = ", ".join(cls._TASK_TO_DATASET_PROCESSING_CLASS.keys())
41 |             raise KeyError(
42 |                 f"Could not find a `TaskProcessor` class for the task called {task}, supported tasks: "
43 |                 f"{supported_tasks}."
44 |             )
45 |         return cls._TASK_TO_DATASET_PROCESSING_CLASS[task]
46 | 
47 |     @classmethod
48 |     def for_task(cls, task: str, *dataset_processing_args, **dataset_processing_kwargs: Any) -> "DatasetProcessing":
49 |         return cls.get_task_processor_class_for_task(task)(*dataset_processing_args, **dataset_processing_kwargs)
50 | 


--------------------------------------------------------------------------------
/tests/benchmark/memory_tracker.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import subprocess
 3 | from contextlib import contextmanager
 4 | from multiprocessing import Pipe, Process
 5 | from multiprocessing.connection import Connection
 6 | 
 7 | 
 8 | # Adapted from optimum-benchmark, I don't trust pytorch peak memory memory info when external libs are used.
 9 | class MemoryTracker:
10 |     def __init__(self):
11 |         self.peak_memory: int = 0
12 |         self.device_index = int(os.environ["CUDA_VISIBLE_DEVICES"].split(",")[0])
13 | 
14 |     @contextmanager
15 |     def track(self, interval: float = 0.1):
16 |         print(f"Tracking memory for device {self.device_index}")
17 |         yield from self._track_peak_memory(interval)
18 | 
19 |     def _track_peak_memory(self, interval: float):
20 |         child_connection, parent_connection = Pipe()
21 |         # instantiate process
22 |         mem_process: Process = PeakMemoryMeasureProcess(self.device_index, child_connection, interval)
23 |         mem_process.start()
24 |         # wait until we get memory
25 |         parent_connection.recv()
26 |         yield
27 |         # start parent connection
28 |         parent_connection.send(0)
29 |         # receive peak memory
30 |         self.peak_memory = parent_connection.recv()
31 | 
32 | 
33 | class PeakMemoryMeasureProcess(Process):
34 |     def __init__(self, device_index: int, child_connection: Connection, interval: float):
35 |         super().__init__()
36 |         self.device_index = device_index
37 |         self.interval = interval
38 |         self.connection = child_connection
39 |         self.mem_usage = 0
40 | 
41 |     def run(self):
42 |         self.connection.send(0)
43 |         stop = False
44 | 
45 |         command = f"nvidia-smi --query-gpu=memory.used --format=csv --id={self.device_index}"
46 | 
47 |         while True:
48 |             # py3nvml is broken since it outputs only the reserved memory, and nvidia-smi has only the MiB precision.
49 |             gpu_mem_mb = subprocess.check_output(command.split()).decode("ascii").split("\n")[1].split()[0]
50 |             gpu_mem_mb = int(gpu_mem_mb) * 1.048576
51 |             self.mem_usage = max(self.mem_usage, gpu_mem_mb)
52 | 
53 |             if stop:
54 |                 break
55 |             stop = self.connection.poll(self.interval)
56 | 
57 |         # send results to parent pipe
58 |         self.connection.send(self.mem_usage)
59 |         self.connection.close()
60 | 


--------------------------------------------------------------------------------
/optimum/exporters/onnx/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from typing import TYPE_CHECKING
17 | 
18 | from transformers.utils import _LazyModule
19 | 
20 | 
21 | _import_structure = {
22 |     "base": ["OnnxConfig", "OnnxConfigWithLoss", "OnnxConfigWithPast", "OnnxSeq2SeqConfigWithPast"],
23 |     "config": ["TextDecoderOnnxConfig", "TextEncoderOnnxConfig", "TextSeq2SeqOnnxConfig"],
24 |     "convert": [
25 |         "export",
26 |         "export_models",
27 |         "validate_model_outputs",
28 |         "validate_models_outputs",
29 |         "onnx_export_from_model",
30 |     ],
31 |     "utils": [
32 |         "get_decoder_models_for_export",
33 |         "get_encoder_decoder_models_for_export",
34 |         "get_stable_diffusion_models_for_export",
35 |         "MODEL_TYPES_REQUIRING_POSITION_IDS",
36 |     ],
37 |     "__main__": ["main_export"],
38 | }
39 | 
40 | if TYPE_CHECKING:
41 |     from .base import OnnxConfig, OnnxConfigWithLoss, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast  # noqa
42 |     from .config import TextDecoderOnnxConfig, TextEncoderOnnxConfig, TextSeq2SeqOnnxConfig  # noqa
43 |     from .convert import (
44 |         export,
45 |         export_models,
46 |         validate_model_outputs,
47 |         validate_models_outputs,
48 |         onnx_export_from_model,
49 |     )  # noqa
50 |     from .utils import (
51 |         get_decoder_models_for_export,
52 |         get_encoder_decoder_models_for_export,
53 |         get_stable_diffusion_models_for_export,
54 |         MODEL_TYPES_REQUIRING_POSITION_IDS,
55 |     )
56 |     from .__main__ import main_export
57 | else:
58 |     import sys
59 | 
60 |     sys.modules[__name__] = _LazyModule(
61 |         __name__,
62 |         globals()["__file__"],
63 |         _import_structure,
64 |         module_spec=__spec__,
65 |     )
66 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.DS_Store
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | # Models
133 | *.onnx
134 | # include small test model for tests
135 | !tests/assets/onnx/model.onnx
136 | 
137 | .vscode


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/quantization.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from abc import ABC, abstractmethod
15 | from logging import getLogger
16 | from os import PathLike
17 | from pathlib import Path
18 | from typing import Optional, Set, Tuple, Union
19 | 
20 | from onnx import ModelProto, load_model
21 | 
22 | from onnxruntime.transformers.onnx_model import OnnxModel
23 | 
24 | 
25 | LOGGER = getLogger("GraphWalker")
26 | 
27 | 
28 | class PreprocessorPass(ABC):
29 |     def __init__(self):
30 |         self._logger = LOGGER
31 | 
32 |     @abstractmethod
33 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Optional[Set[str]], Optional[Set[str]]]:
34 |         raise NotImplementedError()
35 | 
36 | 
37 | class QuantizationPreprocessor:
38 |     __slots__ = ("_passes",)
39 | 
40 |     def __init__(self):
41 |         self._passes = []
42 | 
43 |     def from_config(self, config):
44 |         pass
45 | 
46 |     def register_pass(self, target: PreprocessorPass):
47 |         if target not in self._passes:
48 |             self._passes.append(target)
49 | 
50 |     def collect(self, model_or_path: Union[str, PathLike, Path, bytes]) -> Tuple[Set[str], Set[str]]:
51 |         global_nodes_to_quantize, global_nodes_to_exclude = set(), set()
52 |         graph = load_model(model_or_path.as_posix() if isinstance(model_or_path, Path) else model_or_path)
53 |         model = OnnxModel(graph)
54 | 
55 |         for walking_pass in self._passes:
56 |             nodes_to_quantize, nodes_to_exclude = walking_pass(graph, model)
57 | 
58 |             if nodes_to_quantize is not None:
59 |                 global_nodes_to_quantize.update(nodes_to_quantize)
60 | 
61 |             if nodes_to_exclude is not None:
62 |                 global_nodes_to_exclude.update(nodes_to_exclude)
63 | 
64 |         # Exclude the nodes from quantization when present in both sets
65 |         global_nodes_to_quantize = global_nodes_to_quantize - global_nodes_to_exclude
66 | 
67 |         return global_nodes_to_quantize, global_nodes_to_exclude
68 | 


--------------------------------------------------------------------------------
/optimum/commands/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import platform
16 | 
17 | import huggingface_hub
18 | from transformers import __version__ as transformers_version
19 | from transformers.utils import is_tf_available, is_torch_available
20 | 
21 | from ..version import __version__ as version
22 | from . import BaseOptimumCLICommand, CommandInfo
23 | 
24 | 
25 | class EnvironmentCommand(BaseOptimumCLICommand):
26 |     COMMAND = CommandInfo(name="env", help="Get information about the environment used.")
27 | 
28 |     @staticmethod
29 |     def format_dict(d):
30 |         return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n"
31 | 
32 |     def run(self):
33 |         pt_version = "not installed"
34 |         pt_cuda_available = "NA"
35 |         if is_torch_available():
36 |             import torch
37 | 
38 |             pt_version = torch.__version__
39 |             pt_cuda_available = torch.cuda.is_available()
40 | 
41 |         tf_version = "not installed"
42 |         tf_cuda_available = "NA"
43 |         if is_tf_available():
44 |             import tensorflow as tf
45 | 
46 |             tf_version = tf.__version__
47 |             try:
48 |                 # deprecated in v2.1
49 |                 tf_cuda_available = tf.test.is_gpu_available()
50 |             except AttributeError:
51 |                 # returns list of devices, convert to bool
52 |                 tf_cuda_available = bool(tf.config.list_physical_devices("GPU"))
53 | 
54 |         info = {
55 |             "`optimum` version": version,
56 |             "`transformers` version": transformers_version,
57 |             "Platform": platform.platform(),
58 |             "Python version": platform.python_version(),
59 |             "Huggingface_hub version": huggingface_hub.__version__,
60 |             "PyTorch version (GPU?)": f"{pt_version} (cuda availabe: {pt_cuda_available})",
61 |             "Tensorflow version (GPU?)": f"{tf_version} (cuda availabe: {tf_cuda_available})",
62 |         }
63 | 
64 |         print("\nCopy-and-paste the text below in your GitHub issue:\n")
65 |         print(self.format_dict(info))
66 | 
67 |         return info
68 | 


--------------------------------------------------------------------------------
/optimum/utils/dummy_diffusers_objects.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .import_utils import DummyObject, requires_backends
16 | 
17 | 
18 | class ORTStableDiffusionPipeline(metaclass=DummyObject):
19 |     _backends = ["diffusers"]
20 | 
21 |     def __init__(self, *args, **kwargs):
22 |         requires_backends(self, ["diffusers"])
23 | 
24 |     @classmethod
25 |     def from_pretrained(cls, *args, **kwargs):
26 |         requires_backends(cls, ["diffusers"])
27 | 
28 | 
29 | class ORTStableDiffusionImg2ImgPipeline(metaclass=DummyObject):
30 |     _backends = ["diffusers"]
31 | 
32 |     def __init__(self, *args, **kwargs):
33 |         requires_backends(self, ["diffusers"])
34 | 
35 |     @classmethod
36 |     def from_pretrained(cls, *args, **kwargs):
37 |         requires_backends(cls, ["diffusers"])
38 | 
39 | 
40 | class ORTStableDiffusionInpaintPipeline(metaclass=DummyObject):
41 |     _backends = ["diffusers"]
42 | 
43 |     def __init__(self, *args, **kwargs):
44 |         requires_backends(self, ["diffusers"])
45 | 
46 |     @classmethod
47 |     def from_pretrained(cls, *args, **kwargs):
48 |         requires_backends(cls, ["diffusers"])
49 | 
50 | 
51 | class ORTStableDiffusionXLPipeline(metaclass=DummyObject):
52 |     _backends = ["diffusers"]
53 | 
54 |     def __init__(self, *args, **kwargs):
55 |         requires_backends(self, ["diffusers"])
56 | 
57 |     @classmethod
58 |     def from_pretrained(cls, *args, **kwargs):
59 |         requires_backends(cls, ["diffusers"])
60 | 
61 | 
62 | class ORTStableDiffusionXLImg2ImgPipeline(metaclass=DummyObject):
63 |     _backends = ["diffusers"]
64 | 
65 |     def __init__(self, *args, **kwargs):
66 |         requires_backends(self, ["diffusers"])
67 | 
68 |     @classmethod
69 |     def from_pretrained(cls, *args, **kwargs):
70 |         requires_backends(cls, ["diffusers"])
71 | 
72 | 
73 | class ORTLatentConsistencyModelPipeline(metaclass=DummyObject):
74 |     _backends = ["diffusers"]
75 | 
76 |     def __init__(self, *args, **kwargs):
77 |         requires_backends(self, ["diffusers"])
78 | 
79 |     @classmethod
80 |     def from_pretrained(cls, *args, **kwargs):
81 |         requires_backends(cls, ["diffusers"])
82 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/summarization/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | ## Summarization
18 | 
19 | By running the script [`run_summarization.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/summarization/run_summarization.py),
20 | you will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune and evaluate models from the
21 | [HuggingFace hub](https://huggingface.co/models) on summarization tasks.
22 | 
23 | ### Supported models
24 | 
25 | Theoretically, all sequence-to-sequence models with [ONNXConfig](https://github.com/huggingface/transformers/blob/main/src/transformers/onnx/features.py) support in Transformers shall work, here are the models that the Optimum team has tested and validated.
26 | 
27 | * `Bart`
28 | * `T5`
29 | 
30 | `run_summarization.py` is a lightweight example of how to download and preprocess a dataset from the 🤗 Datasets library or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it.
31 | 
32 | 
33 | __The following example applies the acceleration features powered by ONNX Runtime.__
34 | 
35 | 
36 | ### Onnx Runtime Training
37 | 
38 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset.
39 | 
40 | ```bash
41 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_summarization.py \
42 |     --model_name_or_path t5-small \
43 |     --dataset_name cnn_dailymail \
44 |     --dataset_config "3.0.0" \
45 |     --source_prefix "summarize: " \
46 |     --do_train \
47 |     --do_eval \
48 |     --per_device_train_batch_size=4 \
49 |     --per_device_eval_batch_size=4 \
50 |     --output_dir /tmp/ort_summarization_t5/ \
51 |     --overwrite_output_dir \
52 |     --predict_with_generate
53 | ```
54 | 
55 | __Note__
56 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
57 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
58 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
59 | 
60 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
61 | ---


--------------------------------------------------------------------------------
/docs/source/exporters/onnx/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration classes for ONNX exports
14 | 
15 | Exporting a model to ONNX involves specifying:
16 | 1. The input names.
17 | 2. The output names.
18 | 3. The dynamic axes. These refer to the input dimensions can be changed dynamically at runtime (e.g. a batch size or sequence length).
19 | All other axes will be treated as static, and hence fixed at runtime.
20 | 4. Dummy inputs to trace the model. This is needed in PyTorch to record the computational graph and convert it to ONNX.
21 | 
22 | Since this data depends on the choice of model and task, we represent it in terms of _configuration classes_. Each configuration class is associated with
23 | a specific model architecture, and follows the naming convention `ArchitectureNameOnnxConfig`. For instance, the configuration which specifies the ONNX
24 | export of BERT models is `BertOnnxConfig`.
25 | 
26 | Since many architectures share similar properties for their ONNX configuration, 🤗 Optimum adopts a 3-level class hierarchy:
27 | 1. Abstract and generic base classes. These handle all the fundamental features, while being agnostic to the modality (text, image, audio, etc).
28 | 2. Middle-end classes. These are aware of the modality, but multiple can exist for the same modality depending on the inputs they support.
29 | They specify which input generators should be used for the dummy inputs, but remain model-agnostic.
30 | 3. Model-specific classes like the `BertOnnxConfig` mentioned above. These are the ones actually used to export models.
31 | 
32 | 
33 | ## Base classes
34 | 
35 | [[autodoc]] exporters.onnx.OnnxConfig
36 |     - inputs
37 |     - outputs
38 |     - generate_dummy_inputs
39 | 
40 | [[autodoc]] exporters.onnx.OnnxConfigWithPast
41 |     - add_past_key_values
42 | 
43 | [[autodoc]] exporters.onnx.OnnxSeq2SeqConfigWithPast
44 | 
45 | ## Middle-end classes
46 | 
47 | ### Text
48 | 
49 | [[autodoc]] exporters.onnx.config.TextEncoderOnnxConfig
50 | 
51 | [[autodoc]] exporters.onnx.config.TextDecoderOnnxConfig
52 | 
53 | [[autodoc]] exporters.onnx.config.TextSeq2SeqOnnxConfig
54 | 
55 | 
56 | ### Vision
57 | 
58 | [[autodoc]] exporters.onnx.config.VisionOnnxConfig
59 | 
60 | 
61 | ### Multi-modal
62 | 
63 | [[autodoc]] exporters.onnx.config.TextAndVisionOnnxConfig
64 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/token-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Token classification
18 | 
19 | ## NER Tasks
20 | 
21 | By running the script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/token-classification/run_ner.py),
22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune the models from the
23 | [HuggingFace hub](https://huggingface.co/models) for token classification tasks such as Named Entity Recognition (NER).
24 | 
25 | 
26 | __The following example applies the acceleration features powered by ONNX Runtime.__
27 | 
28 | 
29 | ### ONNX Runtime Training
30 | 
31 | The following example fine-tunes a BERT on the sst-2 task.
32 | 
33 | ```bash
34 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_ner.py \
35 |     --model_name_or_path bert-base-cased \
36 |     --dataset_name conll2003 \
37 |     --do_train \
38 |     --do_eval \
39 |     --output_dir /tmp/ort_bert_conll2003/
40 | ```
41 | 
42 | ### Performance
43 | 
44 | We get the following results for [bert-large-cased](https://huggingface.co/bert-large-cased) model mixed precision training(fp16) on the previous
45 | task under PyTorch and ONNX Runtime backends. A single Nvidia A100 card was used to run the experiment for 7 epochs:
46 | 
47 | | Model            | Backend      | Runtime(s) | Train samples(/s) |
48 | | ---------------- | ------------ | ---------- | ----------------- |
49 | | bert-large-cased | PyTorch      | 711.5      | 138.1             |
50 | | bert-large-cased | ONNX Runtime | 637.2      | 154.3             |
51 | 
52 | We observe the gain of ONNX Runtime compared to PyTorch as follow:
53 | 
54 | |       | Latency | Throughput |
55 | | ----- | ------- | ---------- |
56 | | Gain  | 10.45%  | 11.67%     |
57 | 
58 | 
59 | __Note__
60 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
61 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
62 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
63 | 
64 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
65 | ---
66 | 


--------------------------------------------------------------------------------
/optimum/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | 
16 | from .constant import (
17 |     CONFIG_NAME,
18 |     DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER,
19 |     DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER,
20 |     DIFFUSION_MODEL_UNET_SUBFOLDER,
21 |     DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER,
22 |     DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER,
23 |     ONNX_WEIGHTS_NAME,
24 | )
25 | from .import_utils import (
26 |     DIFFUSERS_MINIMUM_VERSION,
27 |     ORT_QUANTIZE_MINIMUM_VERSION,
28 |     TORCH_MINIMUM_VERSION,
29 |     TRANSFORMERS_MINIMUM_VERSION,
30 |     check_if_diffusers_greater,
31 |     check_if_pytorch_greater,
32 |     check_if_transformers_greater,
33 |     is_accelerate_available,
34 |     is_auto_gptq_available,
35 |     is_diffusers_available,
36 |     is_onnx_available,
37 |     is_onnxruntime_available,
38 |     is_pydantic_available,
39 |     is_sentence_transformers_available,
40 |     is_timm_available,
41 |     is_torch_onnx_support_available,
42 |     require_numpy_strictly_lower,
43 |     torch_version,
44 | )
45 | from .input_generators import (
46 |     DEFAULT_DUMMY_SHAPES,
47 |     DTYPE_MAPPER,
48 |     BloomDummyPastKeyValuesGenerator,
49 |     DummyAudioInputGenerator,
50 |     DummyBboxInputGenerator,
51 |     DummyCodegenDecoderTextInputGenerator,
52 |     DummyDecoderTextInputGenerator,
53 |     DummyEncodecInputGenerator,
54 |     DummyInputGenerator,
55 |     DummyIntGenerator,
56 |     DummyLabelsGenerator,
57 |     DummyPastKeyValuesGenerator,
58 |     DummyPix2StructInputGenerator,
59 |     DummyPointsGenerator,
60 |     DummySeq2SeqDecoderTextInputGenerator,
61 |     DummySeq2SeqPastKeyValuesGenerator,
62 |     DummySpeechT5InputGenerator,
63 |     DummyTextInputGenerator,
64 |     DummyTimestepInputGenerator,
65 |     DummyVisionEmbeddingsGenerator,
66 |     DummyVisionEncoderDecoderPastKeyValuesGenerator,
67 |     DummyVisionInputGenerator,
68 |     DummyXPathSeqInputGenerator,
69 |     FalconDummyPastKeyValuesGenerator,
70 |     GemmaDummyPastKeyValuesGenerator,
71 |     GPTBigCodeDummyPastKeyValuesGenerator,
72 |     MistralDummyPastKeyValuesGenerator,
73 |     MultiQueryPastKeyValuesGenerator,
74 | )
75 | from .modeling_utils import recurse_getattr, recurse_setattr
76 | from .normalized_config import (
77 |     NormalizedConfig,
78 |     NormalizedConfigManager,
79 |     NormalizedEncoderDecoderConfig,
80 |     NormalizedSeq2SeqConfig,
81 |     NormalizedTextAndVisionConfig,
82 |     NormalizedTextConfig,
83 |     NormalizedVisionConfig,
84 | )
85 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug Report"
 2 | description: Submit a bug report to help us improve Optimum
 3 | labels: [ "bug" ]
 4 | body:
 5 |   - type: textarea
 6 |     id: system-info
 7 |     attributes:
 8 |       label: System Info
 9 |       description: Please share your system info with us.
10 |       render: shell
11 |       placeholder: optimum version, platform, python version, ...
12 |     validations:
13 |       required: true
14 | 
15 |   - type: textarea
16 |     id: who-can-help
17 |     attributes:
18 |       label: Who can help?
19 |       description: |
20 |         Your issue will be replied to more quickly if you can figure out the right person to tag with @
21 |         If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
22 |         Please tag fewer than 3 people.
23 | 
24 |         - Pipelines: `@philschmid`
25 |         - Export of transformers model to ONNX/TFLite: `@michaelbenayoun`
26 |         - ONNX Runtime: `@JingyaHuang`, `@echarlaix`
27 |         - Intel Neural Compressor: `@echarlaix`
28 |         - Habana: `@regisss`
29 | 
30 |       placeholder: "@Username ..."
31 | 
32 |   - type: checkboxes
33 |     id: information-scripts-examples
34 |     attributes:
35 |       label: Information
36 |       description: 'The problem arises when using:'
37 |       options:
38 |         - label: "The official example scripts"
39 |         - label: "My own modified scripts"
40 | 
41 |   - type: checkboxes
42 |     id: information-tasks
43 |     attributes:
44 |       label: Tasks
45 |       description: "The tasks I am working on are:"
46 |       options:
47 |         - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
48 |         - label: "My own task or dataset (give details below)"
49 | 
50 |   - type: textarea
51 |     id: reproduction
52 |     validations:
53 |       required: true
54 |     attributes:
55 |       label: Reproduction (minimal, reproducible, runnable)
56 |       description: |
57 |         Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
58 |         If you have code snippets, error messages, stack traces please provide them here as well.
59 |         Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
60 |         Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
61 |         Providing a **minimal**, **reproducible** reproduction using a **publicly available model** significantly increase the chances of a fix in a timely manner.
62 | 
63 |       placeholder: |
64 |         Providing a minimal, reproducible reproduction using a publicly available model significantly increase the chances of a fix in a timely manner.
65 | 
66 | 
67 |   - type: textarea
68 |     id: expected-behavior
69 |     validations:
70 |       required: true
71 |     attributes:
72 |       label: Expected behavior
73 |       description: "A clear and concise description of what you would expect to happen."
74 | 


--------------------------------------------------------------------------------