├── .github ├── ISSUE_TEMPLATE │ ├── bug-report.yml │ ├── config.yml │ └── feature-request.yml ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── build_main_documentation.yml │ ├── build_pr_documentation.yml │ ├── quality.yml │ ├── stale.yml │ ├── test_bettertransformer.yml │ ├── test_cli.yml │ ├── test_common.yml │ ├── test_exporters_common.yml │ ├── test_exporters_onnx.yml │ ├── test_exporters_onnx_cli.yml │ ├── test_exporters_tflite.yml │ ├── test_exporters_tflite_cli.yml │ ├── test_fx_automatic_parallelism.yml │ ├── test_fx_optimization.yml │ ├── test_gptq.yml │ ├── test_offline.yml │ ├── test_onnx.yml │ ├── test_onnxruntime.yml │ ├── test_onnxruntime_gpu.yml │ ├── test_onnxruntime_slow.yml │ ├── test_onnxruntime_training.yml │ ├── test_utils.yml │ ├── trufflehog.yml │ └── upload_pr_documentation.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── docs ├── Dockerfile ├── README.md ├── combine_docs.py ├── conftest.py └── source │ ├── _redirects.yml │ ├── _toctree.yml │ ├── bettertransformer │ ├── overview.mdx │ └── tutorials │ │ ├── contribute.mdx │ │ └── convert.mdx │ ├── concept_guides │ └── quantization.mdx │ ├── exporters │ ├── onnx │ │ ├── overview.mdx │ │ ├── package_reference │ │ │ ├── configuration.mdx │ │ │ └── export.mdx │ │ └── usage_guides │ │ │ ├── contribute.mdx │ │ │ └── export_a_model.mdx │ ├── overview.mdx │ ├── task_manager.mdx │ └── tflite │ │ ├── overview.mdx │ │ ├── package_reference │ │ ├── configuration.mdx │ │ └── export.mdx │ │ └── usage_guides │ │ ├── contribute.mdx │ │ └── export_a_model.mdx │ ├── furiosa_overview.mdx │ ├── index.mdx │ ├── installation.mdx │ ├── llm_quantization │ └── usage_guides │ │ └── quantization.mdx │ ├── notebooks.md │ ├── nvidia_overview.mdx │ ├── onnxruntime │ ├── concept_guides │ │ └── onnx.mdx │ ├── overview.mdx │ ├── package_reference │ │ ├── configuration.mdx │ │ ├── modeling_ort.mdx │ │ ├── optimization.mdx │ │ ├── quantization.mdx │ │ └── trainer.mdx │ ├── quickstart.mdx │ └── usage_guides │ │ ├── amdgpu.mdx │ │ ├── gpu.mdx │ │ ├── models.mdx │ │ ├── optimization.mdx │ │ ├── pipelines.mdx │ │ ├── quantization.mdx │ │ └── trainer.mdx │ ├── quicktour.mdx │ ├── torch_fx │ ├── concept_guides │ │ └── symbolic_tracer.mdx │ ├── overview.mdx │ ├── package_reference │ │ └── optimization.mdx │ └── usage_guides │ │ └── optimization.mdx │ └── utils │ ├── dummy_input_generators.mdx │ └── normalized_config.mdx ├── examples └── onnxruntime │ ├── optimization │ ├── multiple-choice │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_swag.py │ ├── question-answering │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_qa.py │ │ ├── trainer_qa.py │ │ └── utils_qa.py │ ├── text-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_glue.py │ └── token-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_ner.py │ ├── quantization │ ├── image-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_image_classification.py │ ├── multiple-choice │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_swag.py │ ├── question-answering │ │ ├── README.md │ │ ├── requirements.txt │ │ ├── run_qa.py │ │ ├── trainer_qa.py │ │ └── utils_qa.py │ ├── text-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_glue.py │ └── token-classification │ │ ├── README.md │ │ ├── requirements.txt │ │ └── run_ner.py │ └── training │ ├── docker │ ├── Dockerfile-ort-nightly-cu118 │ ├── Dockerfile-ort-nightly-rocm57 │ └── Dockerfile-ort1.17.1-cu118 │ ├── image-classification │ ├── README.md │ ├── requirements.txt │ └── run_image_classification.py │ ├── language-modeling │ ├── README.md │ ├── requirements.txt │ ├── run_clm.py │ └── run_mlm.py │ ├── question-answering │ ├── README.md │ ├── requirements.txt │ ├── run_qa.py │ ├── trainer_qa.py │ └── utils_qa.py │ ├── stable-diffusion │ └── text-to-image │ │ ├── README.md │ │ ├── requirements.txt │ │ └── train_text_to_image.py │ ├── summarization │ ├── README.md │ ├── requirements.txt │ └── run_summarization.py │ ├── text-classification │ ├── README.md │ ├── requirements.txt │ ├── run_classification.py │ ├── run_glue.py │ └── zero_stage_2.json │ ├── token-classification │ ├── README.md │ ├── requirements.txt │ └── run_ner.py │ └── translation │ ├── README.md │ ├── requirements.txt │ └── run_translation.py ├── notebooks └── README.md ├── optimum ├── bettertransformer │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ ├── attention.py │ │ ├── base.py │ │ ├── decoder_models.py │ │ └── encoder_models.py │ └── transformation.py ├── commands │ ├── __init__.py │ ├── base.py │ ├── env.py │ ├── export │ │ ├── __init__.py │ │ ├── base.py │ │ ├── onnx.py │ │ └── tflite.py │ ├── optimum_cli.py │ └── register │ │ ├── README.md │ │ └── __init__.py ├── configuration_utils.py ├── conftest.py ├── exporters │ ├── __init__.py │ ├── base.py │ ├── error_utils.py │ ├── onnx │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── _traceable_cache.py │ │ ├── base.py │ │ ├── config.py │ │ ├── constants.py │ │ ├── convert.py │ │ ├── model_configs.py │ │ ├── model_patcher.py │ │ └── utils.py │ ├── tasks.py │ ├── tflite │ │ ├── __init__.py │ │ ├── __main__.py │ │ ├── base.py │ │ ├── config.py │ │ ├── convert.py │ │ └── model_configs.py │ └── utils.py ├── fx │ ├── __init__.py │ ├── optimization │ │ ├── __init__.py │ │ └── transformations.py │ ├── parallelization │ │ ├── __init__.py │ │ ├── api.py │ │ ├── core.py │ │ ├── decomp.py │ │ ├── distributed │ │ │ ├── __init__.py │ │ │ └── dist_ops.py │ │ ├── op_registry │ │ │ ├── __init__.py │ │ │ └── op_handlers.py │ │ ├── parallel_layers │ │ │ ├── __init__.py │ │ │ ├── embedding.py │ │ │ ├── linear.py │ │ │ └── loss.py │ │ ├── passes.py │ │ └── utils.py │ └── utils.py ├── gptq │ ├── __init__.py │ ├── constants.py │ ├── data.py │ ├── eval.py │ ├── quantizer.py │ └── utils.py ├── modeling_base.py ├── onnx │ ├── __init__.py │ ├── configuration.py │ ├── graph_transformations.py │ ├── modeling_seq2seq.py │ ├── transformations_utils.py │ └── utils.py ├── onnxruntime │ ├── __init__.py │ ├── base.py │ ├── configuration.py │ ├── constants.py │ ├── modeling_decoder.py │ ├── modeling_diffusion.py │ ├── modeling_ort.py │ ├── modeling_seq2seq.py │ ├── optimization.py │ ├── preprocessors │ │ ├── __init__.py │ │ ├── passes │ │ │ ├── __init__.py │ │ │ ├── excluders.py │ │ │ ├── fully_connected.py │ │ │ ├── gelu.py │ │ │ └── layernorm.py │ │ └── quantization.py │ ├── quantization.py │ ├── runs │ │ ├── __init__.py │ │ ├── calibrator.py │ │ └── utils.py │ ├── subpackage │ │ ├── __init__.py │ │ └── commands │ │ │ ├── __init__.py │ │ │ ├── base.py │ │ │ ├── optimize.py │ │ │ └── quantize.py │ ├── trainer.py │ ├── trainer_seq2seq.py │ ├── training_args.py │ ├── training_args_seq2seq.py │ └── utils.py ├── pipelines │ ├── __init__.py │ └── pipelines_base.py ├── quantization_base.py ├── runs_base.py ├── subpackages.py ├── utils │ ├── __init__.py │ ├── constant.py │ ├── doc.py │ ├── dummy_bettertransformer_objects.py │ ├── dummy_diffusers_objects.py │ ├── file_utils.py │ ├── import_utils.py │ ├── input_generators.py │ ├── logging.py │ ├── modeling_utils.py │ ├── normalized_config.py │ ├── preprocessing │ │ ├── __init__.py │ │ ├── base.py │ │ ├── image_classification.py │ │ ├── question_answering.py │ │ ├── task_processors_manager.py │ │ ├── text_classification.py │ │ └── token_classification.py │ ├── runs.py │ ├── save_utils.py │ └── testing_utils.py └── version.py ├── pyproject.toml ├── setup.cfg ├── setup.py └── tests ├── README.md ├── __init__.py ├── assets └── onnx │ ├── config.json │ └── model.onnx ├── bettertransformer ├── Dockerfile_bettertransformer_gpu ├── test_audio.py ├── test_common.py ├── test_decoder.py ├── test_encoder.py ├── test_encoder_decoder.py ├── test_gpu.py ├── test_vision.py └── testing_utils.py ├── cli ├── cli_with_custom_command.py └── test_cli.py ├── common └── test_configuration_utils.py ├── exporters ├── __init__.py ├── common │ └── test_tasks_manager.py ├── onnx │ ├── __init__.py │ ├── test_export.py │ └── test_export_cli.py ├── tflite │ ├── __init__.py │ ├── test_export.py │ └── test_export_cli.py └── utils.py ├── fx ├── optimization │ └── test_transformations.py └── parallelization │ ├── dist_utils.py │ └── test_tensor_parallel.py ├── gptq └── test_quantization.py ├── onnx ├── test_onnx_export_custom_module.py └── test_onnx_graph_transformations.py ├── onnxruntime-training ├── ds_configs │ ├── ds_config_zero_stage_1.json │ ├── ds_config_zero_stage_2.json │ ├── ds_config_zero_stage_3.json │ └── ds_config_zero_stage_inifinity.json ├── test_examples.py └── test_trainer.py ├── onnxruntime ├── test_decoder.py ├── test_diffusion.py ├── test_modeling.py ├── test_optimization.py ├── test_quantization.py ├── test_timm.py ├── test_utils.py └── testing_utils.py ├── run_doctest.sh └── utils ├── documentation_tests.txt ├── prepare_for_doc_test.py ├── test_dummpy_input_generators.py └── test_task_processors.py /.github/ISSUE_TEMPLATE/bug-report.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F41B Bug Report" 2 | description: Submit a bug report to help us improve Optimum 3 | labels: [ "bug" ] 4 | body: 5 | - type: textarea 6 | id: system-info 7 | attributes: 8 | label: System Info 9 | description: Please share your system info with us. 10 | render: shell 11 | placeholder: optimum version, platform, python version, ... 12 | validations: 13 | required: true 14 | 15 | - type: textarea 16 | id: who-can-help 17 | attributes: 18 | label: Who can help? 19 | description: | 20 | Your issue will be replied to more quickly if you can figure out the right person to tag with @ 21 | If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**. 22 | Please tag fewer than 3 people. 23 | 24 | - Pipelines: `@philschmid` 25 | - Export of transformers model to ONNX/TFLite: `@michaelbenayoun` 26 | - ONNX Runtime: `@JingyaHuang`, `@echarlaix` 27 | - Intel Neural Compressor: `@echarlaix` 28 | - Habana: `@regisss` 29 | 30 | placeholder: "@Username ..." 31 | 32 | - type: checkboxes 33 | id: information-scripts-examples 34 | attributes: 35 | label: Information 36 | description: 'The problem arises when using:' 37 | options: 38 | - label: "The official example scripts" 39 | - label: "My own modified scripts" 40 | 41 | - type: checkboxes 42 | id: information-tasks 43 | attributes: 44 | label: Tasks 45 | description: "The tasks I am working on are:" 46 | options: 47 | - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)" 48 | - label: "My own task or dataset (give details below)" 49 | 50 | - type: textarea 51 | id: reproduction 52 | validations: 53 | required: true 54 | attributes: 55 | label: Reproduction (minimal, reproducible, runnable) 56 | description: | 57 | Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet. 58 | If you have code snippets, error messages, stack traces please provide them here as well. 59 | Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting 60 | Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code. 61 | Providing a **minimal**, **reproducible** reproduction using a **publicly available model** significantly increase the chances of a fix in a timely manner. 62 | 63 | placeholder: | 64 | Providing a minimal, reproducible reproduction using a publicly available model significantly increase the chances of a fix in a timely manner. 65 | 66 | 67 | - type: textarea 68 | id: expected-behavior 69 | validations: 70 | required: true 71 | attributes: 72 | label: Expected behavior 73 | description: "A clear and concise description of what you would expect to happen." 74 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: true 2 | version: 2.1 3 | contact_links: 4 | - name: Website Related 5 | url: https://github.com/huggingface/hub-docs/issues 6 | about: Feature requests and bug reports related to the website 7 | - name: Forum 8 | url: https://discuss.huggingface.co/ 9 | about: General usage questions and community discussions -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-request.yml: -------------------------------------------------------------------------------- 1 | name: "\U0001F680 Feature request" 2 | description: Submit a proposal/request for a new optimum feature 3 | labels: [ "feature" ] 4 | body: 5 | - type: textarea 6 | id: feature-request 7 | validations: 8 | required: true 9 | attributes: 10 | label: Feature request 11 | description: | 12 | A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist. 13 | 14 | - type: textarea 15 | id: motivation 16 | validations: 17 | required: true 18 | attributes: 19 | label: Motivation 20 | description: | 21 | Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too. 22 | 23 | 24 | - type: textarea 25 | id: contribution 26 | validations: 27 | required: true 28 | attributes: 29 | label: Your contribution 30 | description: | 31 | Is there any way that you could help, e.g. by submitting a PR? 32 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # What does this PR do? 2 | 3 | 12 | 13 | 14 | 15 | Fixes # (issue) 16 | 17 | 18 | ## Before submitting 19 | - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case). 20 | - [ ] Did you make sure to update the documentation with your changes? 21 | - [ ] Did you write any new necessary tests? 22 | 23 | ## Who can review? 24 | 25 | 33 | -------------------------------------------------------------------------------- /.github/workflows/quality.yml: -------------------------------------------------------------------------------- 1 | name: Code Quality 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | jobs: 14 | build: 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [3.9] 19 | runs-on: [ubuntu-22.04] 20 | 21 | runs-on: ${{ matrix.runs-on }} 22 | 23 | steps: 24 | - name: Checkout code 25 | uses: actions/checkout@v4 26 | 27 | - name: Setup Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v5 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | 32 | - name: Install dependencies 33 | run: | 34 | pip install --upgrade pip 35 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 36 | pip install .[quality] 37 | 38 | - name: Check style with black 39 | run: | 40 | black --check . 41 | 42 | - name: Check style with ruff 43 | run: | 44 | ruff . 45 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Close stale issues and PRs 2 | 3 | on: 4 | schedule: 5 | - cron: '30 1 * * *' 6 | 7 | permissions: 8 | issues: write 9 | pull-requests: write 10 | 11 | jobs: 12 | stale: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/stale@v8 16 | with: 17 | stale-issue-message: 'This issue has been marked as stale because it has been open for 30 days with no activity. This thread will be automatically closed in 5 days if no further activity occurs.' 18 | stale-pr-message: 'This PR has been marked as stale because it has been open for 90 days with no activity. This thread will be automatically closed in 30 days if no further activity occurs.' 19 | exempt-issue-labels: 'bug,exporters,good first issue,onnx,onnxruntime,quantization' 20 | days-before-issue-stale: 30 21 | days-before-issue-close: 5 22 | days-before-pr-stale: 90 23 | days-before-pr-close: 30 24 | exempt-all-pr-assignees: true -------------------------------------------------------------------------------- /.github/workflows/test_bettertransformer.yml: -------------------------------------------------------------------------------- 1 | name: BetterTransformer / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | runs-on: [ubuntu-22.04] 23 | 24 | runs-on: ${{ matrix.runs-on }} 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies (stable pytorch) 36 | run: | 37 | pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 38 | pip install .[tests] transformers==4.48.* 39 | 40 | - name: Test with pytest (stable pytorch) 41 | run: | 42 | pytest tests/bettertransformer -n auto -vvvvv 43 | 44 | - name: Install dependencies (nightly pytorch) 45 | run: | 46 | pip install --pre --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu 47 | 48 | - name: Test with pytest (nightly pytorch) 49 | run: | 50 | pytest tests/bettertransformer -n auto -vvvv 51 | -------------------------------------------------------------------------------- /.github/workflows/test_cli.yml: -------------------------------------------------------------------------------- 1 | name: Optimum CLI / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | runs-on: [ubuntu-22.04, macos-13, windows-2022] 23 | 24 | runs-on: ${{ matrix.runs-on }} 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies 36 | run: | 37 | pip install --upgrade pip 38 | pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 39 | pip install .[tests,exporters,exporters-tf] 40 | 41 | - name: Test with pytest 42 | run: | 43 | pytest tests/cli -vvvv --durations=0 44 | -------------------------------------------------------------------------------- /.github/workflows/test_common.yml: -------------------------------------------------------------------------------- 1 | name: Common / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | runs-on: [ubuntu-22.04, windows-2019, macos-14] 23 | 24 | runs-on: ${{ matrix.runs-on }} 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies 36 | run: | 37 | pip install --upgrade pip 38 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 39 | pip install .[tests] 40 | 41 | - name: Test with pytest 42 | run: | 43 | pytest tests/common -vvvv --durations=0 44 | env: 45 | HUGGINGFACE_CO_STAGING: ${{ matrix.python-version == '3.9' && matrix.runs-on == 'ubuntu-22.04' }} 46 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_common.yml: -------------------------------------------------------------------------------- 1 | name: Exporters Common / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | runs-on: [ubuntu-22.04] 22 | python-version: [3.9] 23 | 24 | runs-on: ${{ matrix.runs-on }} 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies 36 | run: | 37 | pip install --upgrade pip 38 | pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 39 | pip install .[tests,exporters,exporters-tf] 40 | 41 | - name: Test with pytest 42 | run: | 43 | pytest tests/exporters/common -vvvv --durations=0 -n auto 44 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_onnx.yml: -------------------------------------------------------------------------------- 1 | name: Exporters ONNX / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | runs-on: [ubuntu-22.04] 23 | 24 | runs-on: ${{ matrix.runs-on }} 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies 36 | run: | 37 | pip install --upgrade pip 38 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 39 | pip install .[tests,exporters] diffusers 40 | 41 | - name: Test with pytest 42 | run: | 43 | pytest tests/exporters/onnx/test_export.py -vvvv --durations=0 -n auto 44 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_onnx_cli.yml: -------------------------------------------------------------------------------- 1 | name: Exporters ONNX CLI / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | os: [ubuntu-22.04] 23 | 24 | runs-on: ${{ matrix.os }} 25 | 26 | steps: 27 | - name: Checkout repository 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies 36 | run: | 37 | pip install --upgrade pip 38 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 39 | pip install .[tests,exporters] diffusers 40 | 41 | - name: Test with pytest 42 | run: | 43 | pytest tests/exporters/onnx/test_export_cli.py -vvvv --durations=0 -n auto 44 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_tflite.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | branches: [main] 9 | types: [opened, synchronize, reopened, labeled, unlabeled] 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | env: 16 | TRANSFORMERS_IS_CI: true 17 | 18 | jobs: 19 | build: 20 | if: ${{ 21 | (github.event_name == 'push') || 22 | (github.event_name == 'workflow_dispatch') || 23 | contains( github.event.pull_request.labels.*.name, 'tflite' ) 24 | }} 25 | 26 | strategy: 27 | fail-fast: false 28 | matrix: 29 | python-version: [3.9] 30 | runs-on: [ubuntu-22.04] 31 | quantization_schema: 32 | [ 33 | "not quantization", 34 | float16_quantization, 35 | int8_dynamic_quantization, 36 | int8_quantization_with_custom_dataset, 37 | int8_quantization_with_default_dataset, 38 | int8x16_quantization_with_default_dataset, 39 | full_int8_quantization_with_default_dataset, 40 | ] 41 | 42 | runs-on: ${{ matrix.runs-on }} 43 | 44 | steps: 45 | - name: Checkout code 46 | uses: actions/checkout@v4 47 | 48 | - name: Setup Python ${{ matrix.python-version }} 49 | uses: actions/setup-python@v5 50 | with: 51 | python-version: ${{ matrix.python-version }} 52 | 53 | - name: Install dependencies 54 | run: | 55 | pip install --upgrade pip 56 | pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 57 | pip install .[tests,exporters-tf] 58 | 59 | - name: Test with pytest 60 | run: | 61 | pytest tests/exporters/tflite/test_export.py -k "${{ matrix.quantization_schema }}" -vvvv --durations=0 -n auto 62 | -------------------------------------------------------------------------------- /.github/workflows/test_exporters_tflite_cli.yml: -------------------------------------------------------------------------------- 1 | name: Exporters TFLite CLI / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | branches: [main] 9 | types: [opened, synchronize, reopened, labeled, unlabeled] 10 | 11 | concurrency: 12 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 13 | cancel-in-progress: true 14 | 15 | env: 16 | TRANSFORMERS_IS_CI: true 17 | 18 | jobs: 19 | build: 20 | if: ${{ 21 | (github.event_name == 'push') || 22 | (github.event_name == 'workflow_dispatch') || 23 | contains( github.event.pull_request.labels.*.name, 'tflite' ) 24 | }} 25 | 26 | strategy: 27 | fail-fast: false 28 | matrix: 29 | python-version: [3.9] 30 | runs-on: [ubuntu-22.04] 31 | quantization_schema: 32 | [ 33 | "not quantization", 34 | float16_quantization, 35 | int8_dynamic_quantization, 36 | int8_quantization_with_custom_dataset, 37 | int8_quantization_with_default_dataset, 38 | int8x16_quantization_with_default_dataset, 39 | full_int8_quantization_with_default_dataset, 40 | ] 41 | 42 | runs-on: ${{ matrix.runs-on }} 43 | 44 | steps: 45 | - name: Checkout code 46 | uses: actions/checkout@v4 47 | 48 | - name: Setup Python ${{ matrix.python-version }} 49 | uses: actions/setup-python@v5 50 | with: 51 | python-version: ${{ matrix.python-version }} 52 | 53 | - name: Install dependencies 54 | run: | 55 | pip install --upgrade pip 56 | pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 57 | pip install .[tests,exporters-tf] 58 | 59 | - name: Test with pytest 60 | run: | 61 | pytest tests/exporters/tflite/test_export_cli.py -k "${{ matrix.quantization_schema }}" -vvvv --durations=0 -n auto 62 | -------------------------------------------------------------------------------- /.github/workflows/test_fx_automatic_parallelism.yml: -------------------------------------------------------------------------------- 1 | name: FX Automatic Parallelism on GPU / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | # TODO: Uncomment when fixed 6 | # push: 7 | # branches: 8 | # - main 9 | # paths: 10 | # - 'optimum/fx/parallelization/**.py' 11 | # pull_request: 12 | # branches: 13 | # - main 14 | # paths: 15 | # - 'optimum/fx/parallelization/**.py' 16 | 17 | concurrency: 18 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 19 | cancel-in-progress: true 20 | 21 | env: 22 | TRANSFORMERS_IS_CI: true 23 | 24 | jobs: 25 | run_gpu_tests: 26 | runs-on: 27 | group: aws-g5-12xlarge-plus 28 | 29 | container: 30 | image: nvidia/cuda:12.4.1-devel-ubuntu22.04 31 | options: --mount type=tmpfs,destination=/tmp --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/ 32 | env: 33 | NCCL_DEBUG: INFO 34 | defaults: 35 | run: 36 | shell: bash 37 | 38 | steps: 39 | - name: Checkout optimum 40 | uses: actions/checkout@v4 41 | with: 42 | fetch-depth: 1 43 | 44 | - uses: actions/setup-python@v5 45 | with: 46 | python-version: "3.10" 47 | 48 | - name: Run nvidia-smi 49 | run: | 50 | nvidia-smi 51 | 52 | - name: Install dependencies 53 | run: | 54 | pip install -U pip 55 | pip install .[tests] 56 | 57 | - name: Run automatic model parallelism tests 58 | run: | 59 | pytest tests/fx/parallelization -s -v -o log_cli=true 60 | -------------------------------------------------------------------------------- /.github/workflows/test_fx_optimization.yml: -------------------------------------------------------------------------------- 1 | name: FX Optimization / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | os: [ubuntu-22.04] 23 | 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v4 28 | 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | 34 | - name: Install dependencies 35 | run: | 36 | pip install --upgrade pip 37 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 38 | pip install .[tests] 39 | 40 | - name: Test with pytest 41 | run: | 42 | pytest tests/fx/optimization -n auto -vvvv 43 | -------------------------------------------------------------------------------- /.github/workflows/test_gptq.yml: -------------------------------------------------------------------------------- 1 | name: GPTQ / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [main] 7 | paths: 8 | - tests/gptq/** 9 | - optimum/gptq/** 10 | - .github/workflows/test_gptq.yml 11 | pull_request: 12 | branches: [main] 13 | paths: 14 | - tests/gptq/** 15 | - optimum/gptq/** 16 | - .github/workflows/test_gptq.yml 17 | schedule: 18 | # every day at midnight 19 | - cron: "0 0 * * *" 20 | 21 | jobs: 22 | test_gptq: 23 | runs-on: 24 | group: aws-g6-4xlarge-plus 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Run tests 31 | uses: addnab/docker-run-action@v3 32 | with: 33 | image: pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime 34 | # latest auto-gptq was built with pytorch 2.2 and cuda 12.1 35 | options: | 36 | --rm 37 | --gpus all 38 | --shm-size 16G 39 | --env RUN_SLOW=1 40 | --env HF_HOME=/mnt/cache/ 41 | --volume /mnt/cache/:/mnt/cache/ 42 | --volume ${{ github.workspace }}:/workspace 43 | --workdir /workspace 44 | run: | 45 | pip install auto-gptq 46 | pip install -e .[tests] 47 | pytest tests/gptq -s -vvvv --durations=0 48 | -------------------------------------------------------------------------------- /.github/workflows/test_offline.yml: -------------------------------------------------------------------------------- 1 | name: Offline usage / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | runs-on: [ubuntu-22.04] 23 | 24 | runs-on: ${{ matrix.runs-on }} 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies for pytorch export 36 | run: | 37 | pip install .[tests,exporters,onnxruntime] 38 | 39 | - name: Test with pytest 40 | run: | 41 | HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2 42 | 43 | HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation 44 | 45 | huggingface-cli download hf-internal-testing/tiny-random-gpt2 46 | 47 | HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation 48 | 49 | pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv 50 | 51 | HF_HUB_OFFLINE=1 pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv 52 | -------------------------------------------------------------------------------- /.github/workflows/test_onnx.yml: -------------------------------------------------------------------------------- 1 | name: ONNX / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | python-version: [3.9] 22 | runs-on: [ubuntu-22.04] 23 | 24 | runs-on: ${{ matrix.runs-on }} 25 | 26 | steps: 27 | - name: Checkout code 28 | uses: actions/checkout@v4 29 | 30 | - name: Setup Python ${{ matrix.python-version }} 31 | uses: actions/setup-python@v5 32 | with: 33 | python-version: ${{ matrix.python-version }} 34 | 35 | - name: Install dependencies 36 | run: | 37 | pip install --upgrade pip 38 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 39 | pip install .[tests,exporters] diffusers 40 | 41 | - name: Test with pytest 42 | run: | 43 | pytest tests/onnx -n auto -vvvv --durations=0 44 | -------------------------------------------------------------------------------- /.github/workflows/test_onnxruntime.yml: -------------------------------------------------------------------------------- 1 | name: ONNX Runtime / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | push: 6 | branches: [main] 7 | pull_request: 8 | branches: [main] 9 | types: 10 | - opened 11 | - labeled 12 | - reopened 13 | - unlabeled 14 | - synchronize 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 18 | cancel-in-progress: true 19 | 20 | env: 21 | TRANSFORMERS_IS_CI: true 22 | 23 | jobs: 24 | build: 25 | strategy: 26 | fail-fast: false 27 | matrix: 28 | python-version: [3.9] 29 | runs-on: [ubuntu-22.04] 30 | test_file: 31 | [ 32 | test_timm.py, 33 | test_decoder.py, 34 | test_modeling.py, 35 | test_diffusion.py, 36 | test_optimization.py, 37 | test_quantization.py, 38 | test_utils.py, 39 | ] 40 | 41 | runs-on: ${{ matrix.runs-on }} 42 | 43 | steps: 44 | - name: Free Disk Space (Ubuntu) 45 | if: matrix.test_file == 'test_modeling.py' 46 | uses: jlumbroso/free-disk-space@main 47 | 48 | - name: Checkout code 49 | uses: actions/checkout@v4 50 | 51 | - name: Setup Python ${{ matrix.python-version }} 52 | uses: actions/setup-python@v5 53 | with: 54 | python-version: ${{ matrix.python-version }} 55 | 56 | - name: Install dependencies 57 | run: | 58 | pip install --upgrade pip 59 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 60 | pip install .[tests,onnxruntime] diffusers 61 | 62 | - name: Test with pytest (in series) 63 | if: matrix.test_file == 'test_modeling.py' 64 | run: | 65 | pytest tests/onnxruntime/test_modeling.py -m "run_in_series" --durations=0 -vvvv 66 | 67 | - name: Test with pytest (in parallel) 68 | run: | 69 | pytest tests/onnxruntime/${{ matrix.test_file }} -m "not run_in_series" --durations=0 -vvvv -n auto 70 | env: 71 | HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} 72 | -------------------------------------------------------------------------------- /.github/workflows/test_onnxruntime_gpu.yml: -------------------------------------------------------------------------------- 1 | name: ONNX Runtime GPU / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 7 * * * # every day at 7am UTC 7 | pull_request: 8 | branches: [main] 9 | types: 10 | - opened 11 | - labeled 12 | - reopened 13 | - unlabeled 14 | - synchronize 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | build: 22 | if: ${{ 23 | (github.event_name == 'push') || 24 | (github.event_name == 'workflow_dispatch') || 25 | contains(github.event.pull_request.labels.*.name, 'gpu') || 26 | contains(github.event.pull_request.labels.*.name, 'onnxruntime-gpu') 27 | }} 28 | 29 | runs-on: 30 | group: aws-g6-4xlarge-plus 31 | 32 | container: 33 | image: nvcr.io/nvidia/tensorrt:24.12-py3 34 | options: --gpus all 35 | 36 | steps: 37 | - name: Checkout 38 | uses: actions/checkout@v4 39 | 40 | - name: Setup Python 41 | uses: actions/setup-python@v5 42 | with: 43 | python-version: "3.9" 44 | 45 | - name: Install dependencies 46 | run: | 47 | pip install --upgrade pip 48 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 49 | pip install .[tests,onnxruntime-gpu] diffusers 50 | 51 | - name: Test with pytest 52 | run: | 53 | pytest tests/onnxruntime -m "cuda_ep_test or trt_ep_test" --durations=0 -vvvv -n auto 54 | -------------------------------------------------------------------------------- /.github/workflows/test_onnxruntime_slow.yml: -------------------------------------------------------------------------------- 1 | name: ONNX Runtime Slow / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 7 * * * # every day at 7am UTC 7 | pull_request: 8 | branches: [main] 9 | types: 10 | - opened 11 | - labeled 12 | - reopened 13 | - unlabeled 14 | - synchronize 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 18 | cancel-in-progress: true 19 | 20 | env: 21 | TRANSFORMERS_IS_CI: true 22 | 23 | jobs: 24 | build: 25 | if: ${{ 26 | (github.event_name == 'push') || 27 | (github.event_name == 'schedule') || 28 | (github.event_name == 'workflow_dispatch') || 29 | contains(github.event.pull_request.labels.*.name, 'slow') || 30 | contains(github.event.pull_request.labels.*.name, 'onnxruntime-slow') 31 | }} 32 | 33 | strategy: 34 | fail-fast: false 35 | matrix: 36 | python-version: [3.9] 37 | transformers-version: [latest] 38 | runs-on: [ubuntu-22.04, windows-2022] 39 | include: 40 | - {python-version: 3.9, transformers-version: 4.36.*, runs-on: ubuntu-22.04} 41 | - {python-version: 3.9, transformers-version: 4.45.*, runs-on: ubuntu-22.04} 42 | 43 | runs-on: ${{ matrix.runs-on }} 44 | 45 | steps: 46 | - name: Free Disk Space (Ubuntu) 47 | if: matrix.runs-on == 'ubuntu-22.04' 48 | uses: jlumbroso/free-disk-space@main 49 | 50 | - name: Free Disk Space (macOS) 51 | if: matrix.runs-on == 'macos-15' 52 | run: | 53 | sudo rm -rf /Library/Developer/Xcode/DerivedData/* 54 | sudo rm -rf ~/Library/Developer/Xcode/Archives/* 55 | sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode/* 56 | sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode.SimulatorKit/* 57 | 58 | - name: Checkout 59 | uses: actions/checkout@v4 60 | 61 | - name: Setup Python ${{ matrix.python-version }} 62 | uses: actions/setup-python@v5 63 | with: 64 | python-version: ${{ matrix.python-version }} 65 | 66 | - name: Install dependencies 67 | run: | 68 | pip install --upgrade pip 69 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 70 | pip install .[tests,onnxruntime] diffusers 71 | 72 | - name: Install transformers ${{ matrix.transformers-version }} 73 | if: ${{ matrix.transformers-version == '4.36.*' }} 74 | run: | 75 | pip install "transformers==${{ matrix.transformers-version }}" "diffusers<0.32.0" 76 | 77 | - name: Install transformers ${{ matrix.transformers-version }} 78 | if: ${{ matrix.transformers-version == '4.45.*' }} 79 | run: | 80 | pip install "transformers==${{ matrix.transformers-version }}" "diffusers<0.33.0" 81 | 82 | - name: Test with pytest (in series) 83 | run: | 84 | pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv 85 | env: 86 | RUN_SLOW: 1 87 | 88 | - name: Test with pytest (in parallel) 89 | run: | 90 | pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -n auto 91 | env: 92 | HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} 93 | RUN_SLOW: 1 94 | -------------------------------------------------------------------------------- /.github/workflows/test_onnxruntime_training.yml: -------------------------------------------------------------------------------- 1 | name: ONNX Runtime Training / Python - Test 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: 0 7 * * * # every day at 7am UTC 7 | pull_request: 8 | branches: [main] 9 | types: 10 | - opened 11 | - labeled 12 | - reopened 13 | - unlabeled 14 | - synchronize 15 | 16 | concurrency: 17 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 18 | cancel-in-progress: true 19 | 20 | jobs: 21 | build: 22 | if: ${{ 23 | (github.event_name == 'push') || 24 | (github.event_name == 'workflow_dispatch') || 25 | contains( github.event.pull_request.labels.*.name, 'training') || 26 | contains( github.event.pull_request.labels.*.name, 'onnxruntime-training') 27 | }} 28 | 29 | runs-on: 30 | group: aws-g6-4xlarge-plus 31 | 32 | container: 33 | image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 34 | options: --gpus all 35 | 36 | steps: 37 | - name: Checkout 38 | uses: actions/checkout@v4 39 | 40 | - name: Setup Python 41 | uses: actions/setup-python@v5 42 | with: 43 | python-version: "3.9" 44 | 45 | - name: Install dependencies 46 | env: 47 | TORCH_CUDA_ARCH_LIST: "5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX" 48 | run: | 49 | pip install --upgrade pip 50 | pip install --no-cache-dir "torch<2.6" torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 51 | pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure 52 | pip install --no-cache-dir evaluate absl-py rouge_score seqeval sacrebleu nltk scikit-learn 53 | pip install .[tests,onnxruntime-training] 54 | 55 | - name: Test with pytest (trainer) 56 | run: | 57 | RUN_SLOW=1 pytest tests/onnxruntime-training/test_trainer.py --durations=0 -vvvv 58 | env: 59 | HF_DATASETS_TRUST_REMOTE_CODE: 1 60 | 61 | - name: Test with pytest (examples) 62 | run: | 63 | RUN_SLOW=1 pytest tests/onnxruntime-training/test_examples.py --durations=0 -vvvv 64 | env: 65 | HF_DATASETS_TRUST_REMOTE_CODE: 1 66 | -------------------------------------------------------------------------------- /.github/workflows/test_utils.yml: -------------------------------------------------------------------------------- 1 | name: Utils / Python - Test 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | concurrency: 10 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 11 | cancel-in-progress: true 12 | 13 | env: 14 | TRANSFORMERS_IS_CI: true 15 | 16 | jobs: 17 | build: 18 | strategy: 19 | fail-fast: false 20 | matrix: 21 | os: [ubuntu-22.04, macos-13, windows-2022] 22 | python-version: ["3.9"] 23 | 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v4 28 | 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v5 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | 34 | - name: Install dependencies 35 | run: | 36 | pip install --upgrade pip 37 | pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu 38 | pip install .[tests] 39 | 40 | - name: Tests needing datasets 41 | run: | 42 | pytest tests/utils -n auto -vvvv --durations=0 -------------------------------------------------------------------------------- /.github/workflows/trufflehog.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | 4 | name: Secret Leaks 5 | 6 | jobs: 7 | trufflehog: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v4 12 | with: 13 | fetch-depth: 0 14 | - name: Secret Scanning 15 | uses: trufflesecurity/trufflehog@main 16 | 17 | 18 | -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Upload PR Documentation 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Build PR documentation"] 6 | types: 7 | - completed 8 | 9 | jobs: 10 | build: 11 | uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main 12 | with: 13 | package_name: optimum 14 | secrets: 15 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} 16 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.DS_Store 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # Models 133 | *.onnx 134 | # include small test model for tests 135 | !tests/assets/onnx/model.onnx 136 | 137 | .vscode -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 2 | # How to contribute to Optimum? 3 | 4 | Optimum is an open source project, so all contributions and suggestions are welcome. 5 | 6 | You can contribute in many different ways: giving ideas, answering questions, reporting bugs, proposing enhancements, improving the documentation, fixing bugs,... 7 | 8 | Many thanks in advance to every contributor. 9 | 10 | ## How to work on an open Issue? 11 | You have the list of open Issues at: https://github.com/huggingface/optimum/issues 12 | 13 | Some of them may have the label `help wanted`: that means that any contributor is welcomed! 14 | 15 | If you would like to work on any of the open Issues: 16 | 17 | 1. Make sure it is not already assigned to someone else. You have the assignee (if any) on the top of the right column of the Issue page. 18 | 19 | 2. You can self-assign it by commenting on the Issue page with one of the keywords: `#take` or `#self-assign`. 20 | 21 | 3. Work on your self-assigned issue and eventually create a Pull Request. 22 | 23 | ## How to create a Pull Request? 24 | 1. Fork the [repository](https://github.com/huggingface/optimum) by clicking on the 'Fork' button on the repository's page. This creates a copy of the code under your GitHub user account. 25 | 26 | 2. Clone your fork to your local disk, and add the base repository as a remote: 27 | 28 | ```bash 29 | git clone git@github.com:/optimum.git 30 | cd optimum 31 | git remote add upstream https://github.com/huggingface/optimum.git 32 | ``` 33 | 34 | 3. Create a new branch to hold your development changes: 35 | 36 | ```bash 37 | git checkout -b a-descriptive-name-for-my-changes 38 | ``` 39 | 40 | **do not** work on the `main` branch. 41 | 42 | 4. Set up a development environment by running the following command in a virtual environment: 43 | 44 | ```bash 45 | pip install -e ".[dev]" 46 | ``` 47 | 48 | (If optimum was already installed in the virtual environment, remove 49 | it with `pip uninstall optimum` before reinstalling it in editable 50 | mode with the `-e` flag.) 51 | 52 | 5. Develop the features on your branch. 53 | 54 | 6. Format your code. Run black and ruff so that your newly added files look nice with the following command: 55 | 56 | ```bash 57 | make style 58 | ``` 59 | 60 | 7. Once you're happy with your changes, add the changed files using `git add` and make a commit with `git commit` to record your changes locally: 61 | 62 | ```bash 63 | git add modified_file.py 64 | git commit 65 | ``` 66 | 67 | It is a good idea to sync your copy of the code with the original 68 | repository regularly. This way you can quickly account for changes: 69 | 70 | ```bash 71 | git fetch upstream 72 | git rebase upstream/main 73 | ``` 74 | 75 | Push the changes to your account using: 76 | 77 | ```bash 78 | git push -u origin a-descriptive-name-for-my-changes 79 | ``` 80 | 81 | 8. Once you are satisfied, go the webpage of your fork on GitHub. Click on "Pull request" to send your to the project maintainers for review. 82 | 83 | ## Code of conduct 84 | 85 | This project adheres to the HuggingFace [code of conduct](CODE_OF_CONDUCT.md). 86 | By participating, you are expected to uphold this code. 87 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | include README.md 16 | include LICENSE 17 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | SHELL := /bin/bash 16 | CURRENT_DIR = $(shell pwd) 17 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum.git 18 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL 19 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) 20 | 21 | .PHONY: style test 22 | 23 | # Run code quality checks 24 | style_check: 25 | black --check . 26 | ruff check . 27 | 28 | style: 29 | black . 30 | ruff check . --fix 31 | 32 | # Run tests for the library 33 | test: 34 | python -m pytest tests 35 | 36 | # Utilities to release to PyPi 37 | build_dist_install_tools: 38 | pip install build 39 | pip install twine 40 | 41 | build_dist: 42 | rm -fr build 43 | rm -fr dist 44 | python -m build 45 | 46 | pypi_upload: build_dist 47 | python -m twine upload dist/* 48 | 49 | build_doc_docker_image: 50 | docker build -t doc_maker --build-arg commit_sha=$(COMMIT_SHA_OPTIMUM) --build-arg clone_url=$(REAL_CLONE_URL) ./docs 51 | 52 | doc: build_doc_docker_image 53 | @test -n "$(BUILD_DIR)" || (echo "BUILD_DIR is empty." ; exit 1) 54 | @test -n "$(VERSION)" || (echo "VERSION is empty." ; exit 1) 55 | docker run -v $(CURRENT_DIR):/doc_folder --workdir=/doc_folder doc_maker \ 56 | doc-builder build optimum /optimum/docs/source/ \ 57 | --build_dir $(BUILD_DIR) \ 58 | --version $(VERSION) \ 59 | --version_tag_suffix "" \ 60 | --html \ 61 | --clean 62 | -------------------------------------------------------------------------------- /docs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nikolaik/python-nodejs:python3.11-nodejs23 2 | 3 | ARG commit_sha 4 | ARG clone_url 5 | 6 | RUN apt -y update 7 | RUN python3 -m pip install --no-cache-dir --upgrade pip 8 | RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder.git 9 | 10 | RUN git clone $clone_url && cd optimum && git checkout $commit_sha 11 | RUN python3 -m pip install --no-cache-dir ./optimum[onnxruntime,benchmark,quality,doc-build,diffusers] 12 | -------------------------------------------------------------------------------- /docs/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import doctest 19 | import sys 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(__file__), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | # Doctest custom flag to ignore output. 29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT") 30 | 31 | OutputChecker = doctest.OutputChecker 32 | 33 | 34 | class CustomOutputChecker(OutputChecker): 35 | def check_output(self, want, got, optionflags): 36 | if IGNORE_RESULT & optionflags: 37 | return True 38 | return OutputChecker.check_output(self, want, got, optionflags) 39 | 40 | 41 | doctest.OutputChecker = CustomOutputChecker 42 | -------------------------------------------------------------------------------- /docs/source/_redirects.yml: -------------------------------------------------------------------------------- 1 | # Optimum Graphcore 2 | graphcore_index: graphcore/index 3 | graphcore_quickstart: graphcore/quickstart 4 | graphcore_ipu_config: graphcore/ipu_config 5 | graphcore_trainer: graphcore/trainer 6 | graphcore_add_support_for_new_model: graphcore/add_support_for_new_model 7 | 8 | # Optimum Habana 9 | habana_index: habana/index 10 | habana_quickstart: habana/quickstart 11 | habana_single_hpu: habana/tutorials/single_hpu 12 | habana_distributed: habana/tutorials/distributed 13 | habana_deepspeed: habana/usage_guides/deepspeed 14 | habana_accelerate_training: habana/usage_guides/accelerate_training 15 | habana_trainer: habana/package_reference/trainer 16 | habana_gaudi_config: habana/package_reference/gaudi_config 17 | habana/usage_guides/stable_diffusion: habana/tutorials/stable_diffusion 18 | habana/tutorials/pretraining: habana/usage_guides/pretraining 19 | 20 | # Optimum Intel 21 | intel_index: intel/index 22 | intel_quickstart: intel/index 23 | intel_configuration: intel/neural_compressor/reference 24 | intel_optimization: intel/neural_compressor/optimization 25 | intel_quantization: intel/neural_compressor/optimization 26 | intel_pruning: intel/neural_compressor/optimization 27 | intel_trainer: intel/neural_compressor/reference 28 | intel/inference: intel/openvino/inference 29 | intel/optimization_ov: intel/openvino/optimization 30 | intel/reference_ov: intel/openvino/reference 31 | intel/optimization_inc: intel/neural_compressor/optimization 32 | intel/distributed_training: intel/neural_compressor/distributed_training 33 | intel/reference_inc: intel/neural_compressor/reference 34 | 35 | # Optimum Neuron 36 | docs/optimum-neuron/index: /docs/optimum-neuron/index 37 | 38 | # Optimum TPU 39 | docs/optimum-tpu/index: /docs/optimum-tpu/index 40 | tpu/index: /docs/optimum-tpu/index 41 | 42 | # Optimum ExecuTorch 43 | docs/optimum-executorch/index: /docs/optimum-executorch/index 44 | -------------------------------------------------------------------------------- /docs/source/exporters/onnx/package_reference/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration classes for ONNX exports 14 | 15 | Exporting a model to ONNX involves specifying: 16 | 1. The input names. 17 | 2. The output names. 18 | 3. The dynamic axes. These refer to the input dimensions can be changed dynamically at runtime (e.g. a batch size or sequence length). 19 | All other axes will be treated as static, and hence fixed at runtime. 20 | 4. Dummy inputs to trace the model. This is needed in PyTorch to record the computational graph and convert it to ONNX. 21 | 22 | Since this data depends on the choice of model and task, we represent it in terms of _configuration classes_. Each configuration class is associated with 23 | a specific model architecture, and follows the naming convention `ArchitectureNameOnnxConfig`. For instance, the configuration which specifies the ONNX 24 | export of BERT models is `BertOnnxConfig`. 25 | 26 | Since many architectures share similar properties for their ONNX configuration, 🤗 Optimum adopts a 3-level class hierarchy: 27 | 1. Abstract and generic base classes. These handle all the fundamental features, while being agnostic to the modality (text, image, audio, etc). 28 | 2. Middle-end classes. These are aware of the modality, but multiple can exist for the same modality depending on the inputs they support. 29 | They specify which input generators should be used for the dummy inputs, but remain model-agnostic. 30 | 3. Model-specific classes like the `BertOnnxConfig` mentioned above. These are the ones actually used to export models. 31 | 32 | 33 | ## Base classes 34 | 35 | [[autodoc]] exporters.onnx.OnnxConfig 36 | - inputs 37 | - outputs 38 | - generate_dummy_inputs 39 | 40 | [[autodoc]] exporters.onnx.OnnxConfigWithPast 41 | - add_past_key_values 42 | 43 | [[autodoc]] exporters.onnx.OnnxSeq2SeqConfigWithPast 44 | 45 | ## Middle-end classes 46 | 47 | ### Text 48 | 49 | [[autodoc]] exporters.onnx.config.TextEncoderOnnxConfig 50 | 51 | [[autodoc]] exporters.onnx.config.TextDecoderOnnxConfig 52 | 53 | [[autodoc]] exporters.onnx.config.TextSeq2SeqOnnxConfig 54 | 55 | 56 | ### Vision 57 | 58 | [[autodoc]] exporters.onnx.config.VisionOnnxConfig 59 | 60 | 61 | ### Multi-modal 62 | 63 | [[autodoc]] exporters.onnx.config.TextAndVisionOnnxConfig 64 | -------------------------------------------------------------------------------- /docs/source/exporters/onnx/package_reference/export.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Export functions 14 | 15 | You can export models to ONNX from two frameworks in 🤗 Optimum: PyTorch and TensorFlow. There is an export function for each of these frameworks, [`~optimum.exporters.onnx.convert.export_pytorch`] and [`~optimum.exporters.onnx.convert.export_tensorflow`], but the recommended way of using those is via the main export function [`~optimum.exporters.main_export`], which will take care of using the proper exporting function according to the available framework, check that the exported model is valid, and provide extended options to run optimizations on the exported model. 16 | 17 | ## Main functions 18 | 19 | [[autodoc]] exporters.onnx.main_export 20 | 21 | [[autodoc]] exporters.onnx.onnx_export_from_model 22 | 23 | [[autodoc]] exporters.onnx.convert.export 24 | 25 | [[autodoc]] exporters.onnx.convert.export_pytorch 26 | 27 | [[autodoc]] exporters.onnx.convert.export_tensorflow 28 | 29 | 30 | ## Utility functions 31 | 32 | [[autodoc]] exporters.onnx.convert.check_dummy_inputs_are_allowed 33 | 34 | [[autodoc]] exporters.onnx.convert.validate_model_outputs 35 | -------------------------------------------------------------------------------- /docs/source/exporters/overview.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Overview 14 | 15 | 🤗 Optimum enables exporting models from PyTorch or TensorFlow to different formats through its `exporters` module. For now, three exporting format are supported: ONNX and TFLite (TensorFlow Lite). 16 | -------------------------------------------------------------------------------- /docs/source/exporters/tflite/overview.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Overview 14 | 15 | 🤗 Optimum handles the export of TensorFlow models to TFLite in the `exporters.tflite` module. In addition, models hosted on the Hugging Face Hub with PyTorch weights but having a TensorFlow implementation will also be supported in the export thanks to Transformers' [TFPreTrainedModel.from_pretrained()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.from_pretrained) auto-conversion to TensorFlow. 16 | 17 | The TFLite export support provides classes, functions and a command line interface to export a model easily. 18 | 19 | Supported architectures: 20 | 21 | - Albert 22 | - BERT 23 | - Camembert 24 | - ConvBert 25 | - Deberta 26 | - Deberta V2 27 | - DistilBert 28 | - Electra 29 | - Flaubert 30 | - MobileBert 31 | - MPNet 32 | - ResNet 33 | - Roberta 34 | - RoFormer 35 | - XLM 36 | - XLMRoberta 37 | -------------------------------------------------------------------------------- /docs/source/exporters/tflite/package_reference/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration classes for TFLite export 14 | 15 | ## Base classes 16 | 17 | [[autodoc]] exporters.tflite.TFLiteConfig 18 | - inputs 19 | - outputs 20 | - generate_dummy_inputs 21 | 22 | ## Middle-end classes 23 | 24 | [[autodoc]] exporters.tflite.config.TextEncoderTFliteConfig 25 | 26 | [[autodoc]] exporters.tflite.config.VisionTFLiteConfig 27 | -------------------------------------------------------------------------------- /docs/source/exporters/tflite/package_reference/export.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Export functions 14 | 15 | ## Main functions 16 | 17 | [[autodoc]] exporters.tflite.convert.export 18 | 19 | ## Utility functions 20 | 21 | [[autodoc]] exporters.tflite.convert.validate_model_outputs 22 | -------------------------------------------------------------------------------- /docs/source/exporters/tflite/usage_guides/contribute.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Adding support for an unsupported architecture 14 | 15 | If you wish to export a model whose architecture is not already supported by the library, the PR [#813 Adds support for ResNet](https://github.com/huggingface/optimum/pull/813 ) can be used as a reference. 16 | 17 | You can make sure tests pass for the new `my_new_modeltype` model type by running: 18 | 19 | ```bash 20 | pytest tests/exporters/tflite/test_*.py -k "my_new_modeltype" -s --exitfirst 21 | ``` 22 | -------------------------------------------------------------------------------- /docs/source/furiosa_overview.mdx: -------------------------------------------------------------------------------- 1 | # 🤗 Optimum Furiosa 2 | 3 | Find more information about 🤗 Optimum Furiosa [here](https://github.com/huggingface/optimum-furiosa). 4 | -------------------------------------------------------------------------------- /docs/source/notebooks.md: -------------------------------------------------------------------------------- 1 | ../../notebooks/README.md -------------------------------------------------------------------------------- /docs/source/nvidia_overview.mdx: -------------------------------------------------------------------------------- 1 | # 🤗 Optimum Nvidia 2 | 3 | Find more information about 🤗 Optimum Nvidia [here](https://github.com/huggingface/optimum-nvidia). 4 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/concept_guides/onnx.mdx: -------------------------------------------------------------------------------- 1 | # ONNX 🤝 ONNX Runtime 2 | 3 | ONNX is an open standard that defines a common set of operators and a common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and TensorFlow. When a model is exported to the ONNX format, these operators are used to construct a computational graph (often called an _intermediate representation_) that represents the flow of data through the neural network. 4 | 5 | 6 | 7 | You can use [Netron](https://netron.app/) to visualize any ONNX file on the Hugging Face Hub. Simply append append the file's URL to `http://netron.app?url=` as in [this example](https://netron.app/?url=https://huggingface.co/cmarkea/distilcamembert-base-ner/blob/main/model.onnx) 8 | 9 | 10 | 11 | By exposing a graph with standardized operators and data types, ONNX makes it easy to switch between frameworks. For example, a model trained in PyTorch can be exported to ONNX format and then imported in TensorFlow (and vice versa). 12 | 13 | Where ONNX really shines is when it is coupled with a dedicated accelerator like ONNX Runtime, or ORT for short. ORT provides tools to optimize the ONNX graph through techniques like operator fusion and constant folding, and defines an interface to execution providers that allow you to run the model on different types of hardware. -------------------------------------------------------------------------------- /docs/source/onnxruntime/overview.mdx: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | 🤗 Optimum provides an integration with ONNX Runtime, a cross-platform, high performance engine for Open Neural Network Exchange (ONNX) models. 4 | 5 |
6 | 20 |
21 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration 14 | 15 | The configuration classes are the way to specify how a task should be done. There are two tasks supported with the ONNX Runtime package: 16 | 17 | 1. Optimization: Performed by the [`~onnxruntime.ORTOptimizer`], this task can be tweaked using an [`~onnxruntime.configuration.OptimizationConfig`]. 18 | 19 | 2. Quantization: Performed by the [`~onnxruntime.ORTQuantizer`], quantization can be set using a [`~onnxruntime.configuration.QuantizationConfig`]. A calibration step is required in some cases (post training static quantization), which can be specified using a [`~onnxruntime.configuration.CalibrationConfig`]. 20 | 21 | ## OptimizationConfig 22 | 23 | [[autodoc]] onnxruntime.configuration.OptimizationConfig 24 | 25 | [[autodoc]] onnxruntime.configuration.AutoOptimizationConfig 26 | 27 | ## QuantizationConfig 28 | 29 | [[autodoc]] onnxruntime.configuration.QuantizationConfig 30 | 31 | ## AutoQuantizationConfig 32 | 33 | [[autodoc]] onnxruntime.configuration.AutoQuantizationConfig 34 | - all 35 | 36 | ### CalibrationConfig 37 | 38 | [[autodoc]] onnxruntime.configuration.CalibrationConfig 39 | 40 | ## ORTConfig 41 | 42 | [[autodoc]] onnxruntime.configuration.ORTConfig 43 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/optimization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Optimization 14 | 15 | ## ORTOptimizer 16 | 17 | [[autodoc]] onnxruntime.optimization.ORTOptimizer 18 | - all -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/quantization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Quantization 14 | 15 | ## ORTQuantizer 16 | 17 | [[autodoc]] onnxruntime.quantization.ORTQuantizer 18 | - all 19 | -------------------------------------------------------------------------------- /docs/source/onnxruntime/package_reference/trainer.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Trainer 14 | 15 | ## ORTTrainer 16 | 17 | [[autodoc]] onnxruntime.trainer.ORTTrainer 18 | - all 19 | 20 | ## ORTSeq2SeqTrainer 21 | 22 | [[autodoc]] onnxruntime.trainer_seq2seq.ORTSeq2SeqTrainer 23 | - evaluate 24 | - predict 25 | 26 | ## ORTTrainingArguments 27 | 28 | [[autodoc]] onnxruntime.training_args.ORTTrainingArguments 29 | - all 30 | 31 | ## ORTSeq2SeqTrainingArguments 32 | 33 | [[autodoc]] onnxruntime.training_args_seq2seq.ORTSeq2SeqTrainingArguments 34 | - all -------------------------------------------------------------------------------- /docs/source/torch_fx/concept_guides/symbolic_tracer.mdx: -------------------------------------------------------------------------------- 1 | # Symbolic tracer 2 | 3 | In Torch FX, the symbolic tracer feeds dummy values through the code to record the underlying operations. -------------------------------------------------------------------------------- /docs/source/torch_fx/overview.mdx: -------------------------------------------------------------------------------- 1 | # Overview 2 | 3 | 🤗 Optimum provides an integration with Torch FX, a library for PyTorch that allows developers to implement custom transformations of their models that can be optimized for performance. 4 | 5 |
6 | 20 |
21 | -------------------------------------------------------------------------------- /docs/source/torch_fx/package_reference/optimization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Optimization 14 | 15 | ## Transformation 16 | 17 | [[autodoc]] fx.optimization.Transformation 18 | - all 19 | - __call__ 20 | 21 | ## Reversible transformation 22 | 23 | [[autodoc]] fx.optimization.ReversibleTransformation 24 | - all 25 | - __call__ 26 | 27 | [[autodoc]] fx.optimization.compose 28 | 29 | ### Transformations 30 | 31 | [[autodoc]] fx.optimization.MergeLinears 32 | - all 33 | 34 | [[autodoc]] fx.optimization.FuseBiasInLinear 35 | - all 36 | 37 | [[autodoc]] fx.optimization.ChangeTrueDivToMulByInverse 38 | - all 39 | 40 | [[autodoc]] fx.optimization.FuseBatchNorm2dInConv2d 41 | - all 42 | 43 | [[autodoc]] fx.optimization.FuseBatchNorm1dInLinear 44 | - all -------------------------------------------------------------------------------- /docs/source/utils/dummy_input_generators.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Dummy Input Generators 14 | 15 | It is very common to have to generate dummy inputs to perform a task (tracing, exporting a model to some backend, 16 | testing model outputs, etc). The goal of [`~optimum.utils.input_generators.DummyInputGenerator`] classes is to make this 17 | generation easy and re-usable. 18 | 19 | 20 | ## Base class 21 | 22 | [[autodoc]] optimum.utils.input_generators.DummyInputGenerator 23 | 24 | 25 | ## Existing dummy input generators 26 | 27 | [[autodoc]] optimum.utils.input_generators.DummyTextInputGenerator 28 | 29 | [[autodoc]] optimum.utils.input_generators.DummyDecoderTextInputGenerator 30 | 31 | [[autodoc]] optimum.utils.input_generators.DummyPastKeyValuesGenerator 32 | 33 | [[autodoc]] optimum.utils.input_generators.DummySeq2SeqPastKeyValuesGenerator 34 | 35 | [[autodoc]] optimum.utils.input_generators.DummyBboxInputGenerator 36 | 37 | [[autodoc]] optimum.utils.input_generators.DummyVisionInputGenerator 38 | 39 | [[autodoc]] optimum.utils.input_generators.DummyAudioInputGenerator 40 | -------------------------------------------------------------------------------- /docs/source/utils/normalized_config.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Normalized Configurations 14 | 15 | Model configuration classes in 🤗 Transformers are not standardized. Although Transformers implements an `attribute_map` attribute that mitigates the issue to some extent, it does not make it easy to reason on common configuration attributes in the code. 16 | [`~optimum.utils.normalized_config.NormalizedConfig`] classes try to fix that by allowing access to the configuration 17 | attribute they wrap in a standardized way. 18 | 19 | 20 | ## Base class 21 | 22 | 23 | 24 | While it is possible to create `NormalizedConfig` subclasses for common use-cases, it is also possible to overwrite 25 | the `original attribute name -> normalized attribute name` mapping directly using the 26 | [`~optimum.utils.normalized_config.NormalizedConfig.with_args`] class method. 27 | 28 | 29 | 30 | [[autodoc]] optimum.utils.normalized_config.NormalizedConfig 31 | 32 | 33 | ## Existing normalized configurations 34 | 35 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextConfig 36 | 37 | [[autodoc]] optimum.utils.normalized_config.NormalizedSeq2SeqConfig 38 | 39 | [[autodoc]] optimum.utils.normalized_config.NormalizedVisionConfig 40 | 41 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextAndVisionConfig 42 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/multiple-choice/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Multiple choice 18 | 19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/multiple-choice/run_swag.py) allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for multiple choice tasks. 20 | 21 | The following example applies graph optimizations on a BERT fine-tuned on the SWAG dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 22 | 23 | ```bash 24 | python run_swag.py \ 25 | --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \ 26 | --optimization_level 1 \ 27 | --do_eval \ 28 | --output_dir /tmp/optimized_bert_swag 29 | ``` 30 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/question-answering/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Question answering 18 | 19 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/question-answering/run_qa.py) 20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks. 21 | 22 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 23 | the flag `--version_2_with_negative`. 24 | 25 | The following example applies graph optimizations on a DistilBERT fine-tuned on the SQuAD1.0 dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 26 | 27 | ```bash 28 | python run_qa.py \ 29 | --model_name_or_path distilbert-base-uncased-distilled-squad \ 30 | --dataset_name squad \ 31 | --optimization_level 1 \ 32 | --do_eval \ 33 | --output_dir /tmp/optimized_distilbert_squad 34 | ``` 35 | 36 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 37 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | torch >= 1.9.0 3 | onnx 4 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/text-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Text classification 18 | 19 | ## GLUE tasks 20 | 21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py) allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as the ones from the [GLUE benchmark](https://gluebenchmark.com/). 22 | 23 | The following example applies graph optimization on a DistilBERT fine-tuned on the sst-2 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 24 | 25 | ```bash 26 | python run_glue.py \ 27 | --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \ 28 | --task_name sst2 \ 29 | --optimization_level 1 \ 30 | --do_eval \ 31 | --output_dir /tmp/optimized_distilbert_sst2 32 | ``` 33 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/token-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Token classification 18 | 19 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/token-classification/run_ner.py) 20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks. 21 | 22 | The following example applies graph optimizations on a DistilBERT fine-tuned on the CoNLL-2003 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph. 23 | 24 | ```bash 25 | python run_ner.py \ 26 | --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \ 27 | --dataset_name conll2003 \ 28 | --optimization_level 1 \ 29 | --do_eval \ 30 | --output_dir /tmp/optimized_distilbert_conll2003 31 | ``` 32 | 33 | -------------------------------------------------------------------------------- /examples/onnxruntime/optimization/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | seqeval 2 | datasets >= 1.18.0 3 | torch >= 1.9 4 | onnx 5 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/image-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Image classification 18 | 19 | The script [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/image_classification/run_image_classification.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for image classification tasks. 20 | 21 | The following example applies dynamic quantization on a ViT model fine-tuned on the beans classification dataset. 22 | 23 | ```bash 24 | python run_image_classification.py \ 25 | --model_name_or_path nateraw/vit-base-beans \ 26 | --dataset_name beans \ 27 | --quantization_approach dynamic \ 28 | --do_eval \ 29 | --output_dir /tmp/image_classification_vit_beans 30 | ``` 31 | 32 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 33 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=1.5.0 2 | torchvision>=0.6.0 3 | datasets>=1.17.0 4 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/multiple-choice/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Multiple choice 18 | 19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/multiple-choice/run_swag.py) allows us to apply different quantization approaches (such as dynamic and static quantization) using the [ONNX Runtime](https://github.com/microsoft/onnxruntime) quantization tool for multiple choice tasks. 20 | 21 | The following example applies post-training dynamic quantization on a BERT fine-tuned on the SWAG dataset. 22 | 23 | ```bash 24 | python run_swag.py \ 25 | --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \ 26 | --quantization_approach dynamic \ 27 | --do_eval \ 28 | --output_dir /tmp/quantized_bert_swag 29 | ``` 30 | 31 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 32 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/multiple-choice/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/question-answering/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Question answering 18 | 19 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks. 20 | 21 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along the flag `--version_2_with_negative`. 22 | 23 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the SQuAD1.0 dataset. 24 | 25 | ```bash 26 | python run_qa.py \ 27 | --model_name_or_path distilbert-base-uncased-distilled-squad \ 28 | --dataset_name squad \ 29 | --quantization_approach dynamic \ 30 | --do_eval \ 31 | --output_dir /tmp/quantized_distilbert_squad 32 | ``` 33 | 34 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 35 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | torch >= 1.9.0 3 | onnx 4 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/text-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Text classification 18 | 19 | ## GLUE tasks 20 | 21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/text-classification/run_glue.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as the ones from the [GLUE benchmark](https://gluebenchmark.com/). 22 | 23 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the sst-2 task. 24 | 25 | ```bash 26 | python run_glue.py \ 27 | --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \ 28 | --task_name sst2 \ 29 | --quantization_approach dynamic \ 30 | --do_eval \ 31 | --output_dir /tmp/quantized_distilbert_sst2 32 | ``` 33 | 34 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 35 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9 7 | onnx 8 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/token-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Token classification 18 | 19 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/token-classification/run_ner.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks. 20 | 21 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the CoNLL-2003 task 22 | 23 | ```bash 24 | python run_ner.py \ 25 | --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \ 26 | --dataset_name conll2003 \ 27 | --quantization_approach dynamic \ 28 | --do_eval \ 29 | --output_dir /tmp/quantized_distilbert_conll2003 30 | ``` 31 | 32 | In order to apply dynamic or static quantization, `quantization_approach` must be set to respectively `dynamic` or `static`. 33 | -------------------------------------------------------------------------------- /examples/onnxruntime/quantization/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | seqeval 2 | datasets >= 1.8.0 3 | torch >= 1.9 4 | onnx 5 | onnxruntime >= 1.9.0 -------------------------------------------------------------------------------- /examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | # Copyright 2023 The HuggingFace Team All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Use nvidia/cuda image 18 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 19 | CMD nvidia-smi 20 | 21 | # Ignore interactive questions during `docker build` 22 | ENV DEBIAN_FRONTEND noninteractive 23 | 24 | # Versions 25 | # available options 3.8, 3.9, 3.10, 3.11 26 | ARG PYTHON_VERSION=3.9 27 | ARG TORCH_CUDA_VERSION=cu118 28 | ARG TORCH_VERSION=2.0.0 29 | ARG TORCHVISION_VERSION=0.15.1 30 | 31 | # Bash shell 32 | RUN chsh -s /bin/bash 33 | SHELL ["/bin/bash", "-c"] 34 | 35 | # Install and update tools to minimize security vulnerabilities 36 | RUN apt-get update 37 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 38 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \ 39 | apt-get clean 40 | RUN unattended-upgrade 41 | RUN apt-get autoremove -y 42 | 43 | # Install miniconda (comes with python 3.9 default) 44 | ARG BUILD_USER=onnxruntimedev 45 | ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3 46 | RUN apt-get install curl 47 | 48 | ARG CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh 49 | RUN curl -fSsL --insecure ${CONDA_URL} -o install-conda.sh && \ 50 | /bin/bash ./install-conda.sh -b -p $MINICONDA_PREFIX && \ 51 | $MINICONDA_PREFIX/bin/conda clean -ya && \ 52 | $MINICONDA_PREFIX/bin/conda install -y python=${PYTHON_VERSION} 53 | 54 | ENV PATH=$MINICONDA_PREFIX/bin:${PATH} 55 | 56 | ARG PYTHON_EXE=$MINICONDA_PREFIX/bin/python 57 | 58 | # (Optional) Intall test dependencies 59 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers 60 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece 61 | RUN $PYTHON_EXE -m pip install deepspeed mpi4py 62 | # RUN $PYTHON_EXE -m pip install optuna ray sigopt wandb 63 | 64 | # PyTorch 65 | RUN $PYTHON_EXE -m pip install onnx ninja 66 | RUN $PYTHON_EXE -m pip install torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} -f https://download.pytorch.org/whl/${TORCH_CUDA_VERSION} 67 | 68 | # ORT Module 69 | RUN $PYTHON_EXE -m pip install --pre onnxruntime-training -f https://download.onnxruntime.ai/onnxruntime_nightly_cu118.html 70 | RUN $PYTHON_EXE -m pip install torch-ort 71 | ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX" 72 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2 73 | RUN $PYTHON_EXE -m torch_ort.configure 74 | 75 | WORKDIR . 76 | 77 | CMD ["/bin/bash"] 78 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/docker/Dockerfile-ort-nightly-rocm57: -------------------------------------------------------------------------------- 1 | # Use rocm image 2 | FROM rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1 3 | CMD rocm-smi 4 | 5 | # Ignore interactive questions during `docker build` 6 | ENV DEBIAN_FRONTEND noninteractive 7 | 8 | # Versions 9 | # available options 3.10 10 | ARG PYTHON_VERSION=3.10 11 | 12 | # Bash shell 13 | RUN chsh -s /bin/bash 14 | SHELL ["/bin/bash", "-c"] 15 | 16 | # Install and update tools to minimize security vulnerabilities 17 | RUN apt-get update 18 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 19 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \ 20 | apt-get clean 21 | RUN apt-get autoremove -y 22 | 23 | ARG PYTHON_EXE=/opt/conda/envs/py_$PYTHON_VERSION/bin/python 24 | 25 | # (Optional) Intall test dependencies 26 | RUN $PYTHON_EXE -m pip install -U pip 27 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers 28 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece --no-cache-dir 29 | RUN $PYTHON_EXE -m pip install deepspeed --no-cache-dir 30 | RUN conda install -y mpi4py 31 | 32 | # PyTorch 33 | RUN $PYTHON_EXE -m pip install onnx ninja 34 | 35 | # ORT Module 36 | RUN $PYTHON_EXE -m pip install --pre onnxruntime-training -f https://download.onnxruntime.ai/onnxruntime_nightly_rocm57.html 37 | RUN $PYTHON_EXE -m pip install torch-ort 38 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2 39 | RUN $PYTHON_EXE -m torch_ort.configure 40 | 41 | WORKDIR . 42 | 43 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /examples/onnxruntime/training/docker/Dockerfile-ort1.17.1-cu118: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | # Copyright 2023 The HuggingFace Team All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Use nvidia/cuda image 18 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 19 | CMD nvidia-smi 20 | 21 | # Ignore interactive questions during `docker build` 22 | ENV DEBIAN_FRONTEND noninteractive 23 | 24 | # Versions 25 | ARG PYTHON_VERSION=3.10 26 | ARG TORCH_CUDA_VERSION=cu118 27 | ARG TORCH_VERSION=2.0.0 28 | ARG TORCHVISION_VERSION=0.15.1 29 | 30 | # Bash shell 31 | RUN chsh -s /bin/bash 32 | SHELL ["/bin/bash", "-c"] 33 | 34 | # Install and update tools to minimize security vulnerabilities 35 | RUN apt-get update 36 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 37 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \ 38 | apt-get clean 39 | RUN unattended-upgrade 40 | RUN apt-get autoremove -y 41 | 42 | # Install miniconda (comes with python 3.9 default) 43 | ARG BUILD_USER=onnxruntimedev 44 | ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3 45 | RUN apt-get install curl 46 | 47 | ARG CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh 48 | RUN curl -fSsL --insecure ${CONDA_URL} -o install-conda.sh && \ 49 | /bin/bash ./install-conda.sh -b -p $MINICONDA_PREFIX && \ 50 | $MINICONDA_PREFIX/bin/conda clean -ya && \ 51 | $MINICONDA_PREFIX/bin/conda install -y python=${PYTHON_VERSION} 52 | 53 | ENV PATH=$MINICONDA_PREFIX/bin:${PATH} 54 | 55 | ARG PYTHON_EXE=$MINICONDA_PREFIX/bin/python 56 | 57 | # (Optional) Intall test dependencies 58 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers 59 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece 60 | RUN $PYTHON_EXE -m pip install deepspeed mpi4py 61 | # RUN $PYTHON_EXE -m pip install optuna ray sigopt wandb 62 | 63 | # PyTorch 64 | RUN $PYTHON_EXE -m pip install onnx ninja 65 | RUN $PYTHON_EXE -m pip install torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} -f https://download.pytorch.org/whl/${TORCH_CUDA_VERSION} 66 | 67 | # ORT Module 68 | RUN $PYTHON_EXE -m pip install onnxruntime-training==1.17.1 -f https://download.onnxruntime.ai/onnxruntime_stable_cu118.html 69 | RUN $PYTHON_EXE -m pip install torch-ort 70 | ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX" 71 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2 72 | RUN $PYTHON_EXE -m torch_ort.configure 73 | 74 | # https://github.com/vllm-project/vllm/issues/1726 75 | RUN pip uninstall nvidia-nccl-cu12 -y 76 | 77 | WORKDIR . 78 | 79 | CMD ["/bin/bash"] 80 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/image-classification/README.md: -------------------------------------------------------------------------------- 1 | 13 | 14 | # Image Classification 15 | 16 | By running the scripts [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/image-classification/run_image_classification.py) we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the 17 | [HuggingFace hub](https://huggingface.co/models). 18 | 19 | 20 | __The following example applies the acceleration features powered by ONNX Runtime.__ 21 | 22 | 23 | ### ONNX Runtime Training 24 | 25 | The following example trains ViT on beans dataset with mixed precision (fp16). 26 | 27 | ```bash 28 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_image_classification.py \ 29 | --model_name_or_path google/vit-base-patch16-224-in21k \ 30 | --dataset_name beans \ 31 | --output_dir ./beans_outputs/ \ 32 | --remove_unused_columns False \ 33 | --label_column_name labels \ 34 | --do_train \ 35 | --do_eval \ 36 | --learning_rate 2e-5 \ 37 | --num_train_epochs 10 \ 38 | --per_device_train_batch_size 32 \ 39 | --per_device_eval_batch_size 32 \ 40 | --logging_strategy steps \ 41 | --logging_steps 10 \ 42 | --eval_strategy epoch \ 43 | --seed 1337 44 | ``` 45 | 46 | 47 | __Note__ 48 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 49 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 50 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 51 | --- 52 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/image-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.12.0 2 | torch>=1.5.0 3 | torchvision>=0.6.0 4 | datasets>=1.17.0 5 | evaluate 6 | onnx>=1.9.0 7 | onnxruntime-training>=1.9.0 8 | torch-ort 9 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/language-modeling/README.md: -------------------------------------------------------------------------------- 1 | 13 | 14 | # Language Modeling 15 | 16 | ## Language Modeling Training 17 | 18 | By running the scripts [`run_clm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_clm.py) 19 | and [`run_mlm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_mlm.py), 20 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the 21 | [HuggingFace hub](https://huggingface.co/models). 22 | 23 | 24 | __The following example applies the acceleration features powered by ONNX Runtime.__ 25 | 26 | 27 | ### ONNX Runtime Training 28 | 29 | The following example trains GPT2 on wikitext-2 with mixed precision (fp16). 30 | 31 | ```bash 32 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_clm.py \ 33 | --model_name_or_path gpt2 \ 34 | --dataset_name wikitext \ 35 | --dataset_config_name wikitext-2-raw-v1 \ 36 | --do_train \ 37 | --output_dir /tmp/test-clm \ 38 | --fp16 39 | ``` 40 | 41 | 42 | __Note__ 43 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 44 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 45 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 46 | 47 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 48 | --- 49 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/language-modeling/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf == 3.20.2 6 | torch >= 1.9.0 7 | transformers>=4.16.0 8 | onnx>=1.9.0 9 | onnxruntime-training>=1.9.0 10 | torch-ort 11 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/question-answering/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Question answering 18 | 19 | ## SQuAD Tasks 20 | 21 | By running the script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/question-answering/run_qa.py), 22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) to fine-tune the models from the 23 | [HuggingFace hub](https://huggingface.co/models) for question answering tasks such as SQuAD. 24 | 25 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 26 | the flag `--version_2_with_negative`. 27 | 28 | __The following example applies the acceleration features powered by ONNX Runtime.__ 29 | 30 | 31 | ### Onnxruntime Training 32 | 33 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset. 34 | 35 | ```bash 36 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_qa.py \ 37 | --model_name_or_path bert-base-uncased \ 38 | --dataset_name squad \ 39 | --do_train \ 40 | --do_eval \ 41 | --output_dir /tmp/ort_bert_squad/ 42 | ``` 43 | 44 | __Note__ 45 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 46 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 47 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 48 | 49 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 50 | --- -------------------------------------------------------------------------------- /examples/onnxruntime/training/question-answering/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | torch >= 1.9.0 7 | torch-ort 8 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/stable-diffusion/text-to-image/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate>=0.16.0 2 | transformers>=4.25.1 3 | datasets 4 | git+https://github.com/huggingface/diffusers 5 | ftfy 6 | tensorboard 7 | Jinja2 8 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/summarization/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | ## Summarization 18 | 19 | By running the script [`run_summarization.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/summarization/run_summarization.py), 20 | you will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune and evaluate models from the 21 | [HuggingFace hub](https://huggingface.co/models) on summarization tasks. 22 | 23 | ### Supported models 24 | 25 | Theoretically, all sequence-to-sequence models with [ONNXConfig](https://github.com/huggingface/transformers/blob/main/src/transformers/onnx/features.py) support in Transformers shall work, here are the models that the Optimum team has tested and validated. 26 | 27 | * `Bart` 28 | * `T5` 29 | 30 | `run_summarization.py` is a lightweight example of how to download and preprocess a dataset from the 🤗 Datasets library or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it. 31 | 32 | 33 | __The following example applies the acceleration features powered by ONNX Runtime.__ 34 | 35 | 36 | ### Onnx Runtime Training 37 | 38 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset. 39 | 40 | ```bash 41 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_summarization.py \ 42 | --model_name_or_path t5-small \ 43 | --dataset_name cnn_dailymail \ 44 | --dataset_config "3.0.0" \ 45 | --source_prefix "summarize: " \ 46 | --do_train \ 47 | --do_eval \ 48 | --per_device_train_batch_size=4 \ 49 | --per_device_eval_batch_size=4 \ 50 | --output_dir /tmp/ort_summarization_t5/ \ 51 | --overwrite_output_dir \ 52 | --predict_with_generate 53 | ``` 54 | 55 | __Note__ 56 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 57 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 58 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 59 | 60 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 61 | --- -------------------------------------------------------------------------------- /examples/onnxruntime/training/summarization/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate 2 | evaluate 3 | datasets >= 1.8.0 4 | sentencepiece != 0.1.92 5 | scipy 6 | scikit-learn 7 | protobuf 8 | rouge-score 9 | nltk 10 | py7zr 11 | torch >= 1.9.0 12 | torch-ort 13 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/text-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.8.0 2 | sentencepiece != 0.1.92 3 | scipy 4 | scikit-learn 5 | protobuf 6 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/text-classification/zero_stage_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "steps_per_print": 100, 3 | "zero_optimization": { 4 | "stage": 2 5 | }, 6 | "zero_allow_untested_optimizer": true, 7 | "fp16": { 8 | "enabled": true, 9 | "initial_scale_power": 12 10 | }, 11 | "tensorboard":{ 12 | "enabled": false 13 | }, 14 | "train_micro_batch_size_per_gpu": "auto", 15 | "gradient_accumulation_steps": "auto" 16 | } -------------------------------------------------------------------------------- /examples/onnxruntime/training/token-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Token classification 18 | 19 | ## NER Tasks 20 | 21 | By running the script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/token-classification/run_ner.py), 22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune the models from the 23 | [HuggingFace hub](https://huggingface.co/models) for token classification tasks such as Named Entity Recognition (NER). 24 | 25 | 26 | __The following example applies the acceleration features powered by ONNX Runtime.__ 27 | 28 | 29 | ### ONNX Runtime Training 30 | 31 | The following example fine-tunes a BERT on the sst-2 task. 32 | 33 | ```bash 34 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_ner.py \ 35 | --model_name_or_path bert-base-cased \ 36 | --dataset_name conll2003 \ 37 | --do_train \ 38 | --do_eval \ 39 | --output_dir /tmp/ort_bert_conll2003/ 40 | ``` 41 | 42 | ### Performance 43 | 44 | We get the following results for [bert-large-cased](https://huggingface.co/bert-large-cased) model mixed precision training(fp16) on the previous 45 | task under PyTorch and ONNX Runtime backends. A single Nvidia A100 card was used to run the experiment for 7 epochs: 46 | 47 | | Model | Backend | Runtime(s) | Train samples(/s) | 48 | | ---------------- | ------------ | ---------- | ----------------- | 49 | | bert-large-cased | PyTorch | 711.5 | 138.1 | 50 | | bert-large-cased | ONNX Runtime | 637.2 | 154.3 | 51 | 52 | We observe the gain of ONNX Runtime compared to PyTorch as follow: 53 | 54 | | | Latency | Throughput | 55 | | ----- | ------- | ---------- | 56 | | Gain | 10.45% | 11.67% | 57 | 58 | 59 | __Note__ 60 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 61 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 62 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 63 | 64 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 65 | --- 66 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/token-classification/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.3 2 | scipy 3 | scikit-learn 4 | sentencepiece != 0.1.92 5 | seqeval 6 | torch >= 1.8.1 7 | seqeval 8 | sentencepiece != 0.1.92 9 | torch >= 1.9 10 | torch-ort 11 | -------------------------------------------------------------------------------- /examples/onnxruntime/training/translation/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Translation 18 | 19 | By running the script [`run_translation.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/translation/run_translation.py), 20 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) to fine-tune the models from the 21 | [HuggingFace hub](https://huggingface.co/models) for translation tasks. 22 | 23 | ### Supported Architectures 24 | 25 | - `BartForConditionalGeneration` 26 | - `T5ForConditionalGeneration` 27 | 28 | `run_translation.py` is a lightweight examples of how to download and preprocess a dataset from the [🤗 Datasets](https://github.com/huggingface/datasets) library 29 | or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it. 30 | 31 | For custom datasets in `jsonlines` format please see: https://huggingface.co/docs/datasets/loading_datasets.html#json-files. 32 | 33 | __The following example applies the acceleration features powered by ONNX Runtime.__ 34 | 35 | 36 | ### Onnxruntime Training 37 | 38 | The following example fine-tunes a T5 large model on the wmt16 dataset. 39 | 40 | ```bash 41 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_translation.py \ 42 | --model_name_or_path t5-large \ 43 | --dataset_name wmt16 \ 44 | --dataset_config ro-en \ 45 | --label_smoothing 0.1 \ 46 | --predict_with_generate \ 47 | --source_lang en \ 48 | --target_lang ro \ 49 | --do_train \ 50 | --max_train_samples 30000 \ 51 | --fp16 \ 52 | --output_dir /tmp/ort_t5_translation/ 53 | ``` 54 | 55 | ### Performance 56 | 57 | We get the following results for [t5-large](https://huggingface.co/t5-large) mixed precision training(fp16) on the previous 58 | task under PyTorch and ONNX Runtime backends. A single Nvidia A100 card was used to run the experiment for 3 epochs:: 59 | 60 | | Model | Backend | Runtime(s) | Train samples(/s) | 61 | | -------- | ------------ | ---------- | ----------------- | 62 | | t5-large | PyTorch | 2038.8 | 44.1 | 63 | | t5-large | ONNX Runtime | 1536.7 | 58.6 | 64 | 65 | We observe the gain of ONNX Runtime compared to PyTorch as follow: 66 | 67 | | | Latency | Throughput | 68 | | ----- | ------- | ---------- | 69 | | Gain | 24.63% | 32.67% | 70 | 71 | 72 | __Note__ 73 | 74 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared* 75 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions* 76 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).* 77 | 78 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.* 79 | --- -------------------------------------------------------------------------------- /examples/onnxruntime/training/translation/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets >= 1.18.0 2 | sentencepiece != 0.1.92 3 | protobuf 4 | sacrebleu >= 1.4.12 5 | py7zr 6 | torch >= 1.8 -------------------------------------------------------------------------------- /optimum/bettertransformer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace and Meta Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from ..utils.import_utils import _transformers_version, is_transformers_version 17 | 18 | 19 | if is_transformers_version(">=", "4.49"): 20 | raise RuntimeError( 21 | f"BetterTransformer requires transformers<4.49 but found {_transformers_version}. " 22 | "`optimum.bettertransformer` is deprecated and will be removed in optimum v2.0." 23 | ) 24 | 25 | from .models import BetterTransformerManager 26 | from .transformation import BetterTransformer 27 | -------------------------------------------------------------------------------- /optimum/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .base import BaseOptimumCLICommand, CommandInfo, RootOptimumCLICommand 16 | from .env import EnvironmentCommand 17 | from .export import ExportCommand, ONNXExportCommand, TFLiteExportCommand 18 | from .optimum_cli import optimum_cli_subcommand 19 | -------------------------------------------------------------------------------- /optimum/commands/env.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import platform 16 | 17 | import huggingface_hub 18 | from transformers import __version__ as transformers_version 19 | from transformers.utils import is_tf_available, is_torch_available 20 | 21 | from ..version import __version__ as version 22 | from . import BaseOptimumCLICommand, CommandInfo 23 | 24 | 25 | class EnvironmentCommand(BaseOptimumCLICommand): 26 | COMMAND = CommandInfo(name="env", help="Get information about the environment used.") 27 | 28 | @staticmethod 29 | def format_dict(d): 30 | return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n" 31 | 32 | def run(self): 33 | pt_version = "not installed" 34 | pt_cuda_available = "NA" 35 | if is_torch_available(): 36 | import torch 37 | 38 | pt_version = torch.__version__ 39 | pt_cuda_available = torch.cuda.is_available() 40 | 41 | tf_version = "not installed" 42 | tf_cuda_available = "NA" 43 | if is_tf_available(): 44 | import tensorflow as tf 45 | 46 | tf_version = tf.__version__ 47 | try: 48 | # deprecated in v2.1 49 | tf_cuda_available = tf.test.is_gpu_available() 50 | except AttributeError: 51 | # returns list of devices, convert to bool 52 | tf_cuda_available = bool(tf.config.list_physical_devices("GPU")) 53 | 54 | info = { 55 | "`optimum` version": version, 56 | "`transformers` version": transformers_version, 57 | "Platform": platform.platform(), 58 | "Python version": platform.python_version(), 59 | "Huggingface_hub version": huggingface_hub.__version__, 60 | "PyTorch version (GPU?)": f"{pt_version} (cuda availabe: {pt_cuda_available})", 61 | "Tensorflow version (GPU?)": f"{tf_version} (cuda availabe: {tf_cuda_available})", 62 | } 63 | 64 | print("\nCopy-and-paste the text below in your GitHub issue:\n") 65 | print(self.format_dict(info)) 66 | 67 | return info 68 | -------------------------------------------------------------------------------- /optimum/commands/export/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from .base import ExportCommand 17 | from .onnx import ONNXExportCommand 18 | from .tflite import TFLiteExportCommand 19 | -------------------------------------------------------------------------------- /optimum/commands/export/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """optimum.exporters command-line interface base classes.""" 16 | 17 | from .. import BaseOptimumCLICommand, CommandInfo 18 | from .onnx import ONNXExportCommand 19 | from .tflite import TFLiteExportCommand 20 | 21 | 22 | class ExportCommand(BaseOptimumCLICommand): 23 | COMMAND = CommandInfo( 24 | name="export", 25 | help="Export PyTorch and TensorFlow models to several format.", 26 | ) 27 | SUBCOMMANDS = ( 28 | CommandInfo( 29 | name="onnx", 30 | help="Export PyTorch and TensorFlow to ONNX.", 31 | subcommand_class=ONNXExportCommand, 32 | ), 33 | CommandInfo( 34 | name="tflite", 35 | help="Export TensorFlow to TensorFlow Lite.", 36 | subcommand_class=TFLiteExportCommand, 37 | ), 38 | ) 39 | -------------------------------------------------------------------------------- /optimum/commands/register/README.md: -------------------------------------------------------------------------------- 1 | # Register commands in the Optimum CLI from a subpackage 2 | 3 | It is possible to register a command in the Optimum CLI, either as a command or a subcommand of an already existing command. 4 | 5 | Steps to follow: 6 | 7 | 1. Create a command as a subclass of `optimum.commands.BaseOptimumCLICommand`. 8 | 2. Create a Python file under `optimum/commands/register/`, and define a `REGISTER_COMMANDS` list variable there. 9 | 3. Fill the `REGISTER_COMMANDS` as follows: 10 | 11 | ```python 12 | # CustomCommand1 and CustomCommand2 could also be defined in this file actually. 13 | from ..my_custom_commands import CustomCommand1, CustomCommand2 14 | from ..export import ExportCommand 15 | 16 | REGISTER_COMMANDS = [ 17 | # CustomCommand1 will be registered as a subcommand of the root Optimum CLI. 18 | CustomCommand1, 19 | # CustomCommand2 will be registered as a subcommand of the `optimum-cli export` command. 20 | (CustomCommand2, ExportCommand) # CustomCommand2 will be registered 21 | ] 22 | ``` 23 | -------------------------------------------------------------------------------- /optimum/commands/register/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | -------------------------------------------------------------------------------- /optimum/conftest.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # tests directory-specific settings - this file is run automatically 16 | # by pytest before any tests are run 17 | 18 | import doctest 19 | import sys 20 | from os.path import abspath, dirname, join 21 | 22 | 23 | # allow having multiple repository checkouts and not needing to remember to rerun 24 | # 'pip install -e .[dev]' when switching between checkouts and running tests. 25 | git_repo_path = abspath(join(dirname(__file__), "src")) 26 | sys.path.insert(1, git_repo_path) 27 | 28 | # Doctest custom flag to ignore output. 29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT") 30 | 31 | OutputChecker = doctest.OutputChecker 32 | 33 | 34 | class CustomOutputChecker(OutputChecker): 35 | def check_output(self, want, got, optionflags): 36 | if IGNORE_RESULT & optionflags: 37 | return True 38 | return OutputChecker.check_output(self, want, got, optionflags) 39 | 40 | 41 | doctest.OutputChecker = CustomOutputChecker 42 | -------------------------------------------------------------------------------- /optimum/exporters/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from . import onnx # noqa 16 | from .tasks import TasksManager # noqa 17 | -------------------------------------------------------------------------------- /optimum/exporters/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Base exporters config.""" 16 | 17 | from abc import ABC 18 | 19 | 20 | class ExportConfig(ABC): 21 | pass 22 | -------------------------------------------------------------------------------- /optimum/exporters/error_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Utilities related to error handling.""" 16 | 17 | 18 | class ShapeError(ValueError): 19 | pass 20 | 21 | 22 | class AtolError(ValueError): 23 | pass 24 | 25 | 26 | class OutputMatchError(ValueError): 27 | pass 28 | 29 | 30 | class NumberOfInputsMatchError(ValueError): 31 | pass 32 | 33 | 34 | class NumberOfOutputsMatchError(ValueError): 35 | pass 36 | 37 | 38 | class MinimumVersionError(ValueError): 39 | pass 40 | -------------------------------------------------------------------------------- /optimum/exporters/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import TYPE_CHECKING 17 | 18 | from transformers.utils import _LazyModule 19 | 20 | 21 | _import_structure = { 22 | "base": ["OnnxConfig", "OnnxConfigWithLoss", "OnnxConfigWithPast", "OnnxSeq2SeqConfigWithPast"], 23 | "config": ["TextDecoderOnnxConfig", "TextEncoderOnnxConfig", "TextSeq2SeqOnnxConfig"], 24 | "convert": [ 25 | "export", 26 | "export_models", 27 | "validate_model_outputs", 28 | "validate_models_outputs", 29 | "onnx_export_from_model", 30 | ], 31 | "utils": [ 32 | "get_decoder_models_for_export", 33 | "get_encoder_decoder_models_for_export", 34 | "get_diffusion_models_for_export", 35 | "MODEL_TYPES_REQUIRING_POSITION_IDS", 36 | ], 37 | "__main__": ["main_export"], 38 | } 39 | 40 | if TYPE_CHECKING: 41 | from .base import OnnxConfig, OnnxConfigWithLoss, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast # noqa 42 | from .config import TextDecoderOnnxConfig, TextEncoderOnnxConfig, TextSeq2SeqOnnxConfig # noqa 43 | from .convert import ( 44 | export, 45 | export_models, 46 | validate_model_outputs, 47 | validate_models_outputs, 48 | onnx_export_from_model, 49 | ) # noqa 50 | from .utils import ( 51 | get_decoder_models_for_export, 52 | get_encoder_decoder_models_for_export, 53 | get_diffusion_models_for_export, 54 | MODEL_TYPES_REQUIRING_POSITION_IDS, 55 | ) 56 | from .__main__ import main_export 57 | else: 58 | import sys 59 | 60 | sys.modules[__name__] = _LazyModule( 61 | __name__, 62 | globals()["__file__"], 63 | _import_structure, 64 | module_spec=__spec__, 65 | ) 66 | -------------------------------------------------------------------------------- /optimum/exporters/onnx/constants.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | # 2 GB 17 | EXTERNAL_DATA_FORMAT_SIZE_LIMIT = 2 * 1024 * 1024 * 1024 18 | 19 | ONNX_ENCODER_NAME = "encoder_model" 20 | ONNX_DECODER_NAME = "decoder_model" 21 | ONNX_DECODER_WITH_PAST_NAME = "decoder_with_past_model" 22 | ONNX_DECODER_MERGED_NAME = "decoder_model_merged" 23 | 24 | UNPICKABLE_ARCHS = [ 25 | "encodec", 26 | "hubert", 27 | "sew", 28 | "sew-d", 29 | "speecht5", 30 | "unispeech", 31 | "unispeech-sat", 32 | "wav2vec2", 33 | "wav2vec2-conformer", 34 | "wavlm", 35 | ] 36 | 37 | SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [ 38 | "bart", 39 | "musicgen", 40 | "whisper", 41 | ] 42 | -------------------------------------------------------------------------------- /optimum/exporters/tflite/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from typing import TYPE_CHECKING 17 | 18 | from transformers.utils import _LazyModule 19 | 20 | 21 | _import_structure = { 22 | "base": ["QuantizationApproach", "TFLiteQuantizationConfig", "TFLiteConfig"], 23 | "convert": ["export", "validate_model_outputs"], 24 | } 25 | 26 | if TYPE_CHECKING: 27 | from .base import QuantizationApproach, TFLiteQuantizationConfig, TFLiteConfig # noqa 28 | from .convert import export, validate_model_outputs # noqa 29 | else: 30 | import sys 31 | 32 | sys.modules[__name__] = _LazyModule( 33 | __name__, 34 | globals()["__file__"], 35 | _import_structure, 36 | module_spec=__spec__, 37 | ) 38 | -------------------------------------------------------------------------------- /optimum/exporters/tflite/config.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | Common TensorFlow Lite configuration classes that handle most of the features for building model specific 17 | configurations. 18 | """ 19 | 20 | from ...utils import DummyTextInputGenerator, DummyVisionInputGenerator, logging 21 | from .base import TFLiteConfig 22 | 23 | 24 | logger = logging.get_logger(__name__) 25 | 26 | 27 | class TextEncoderTFliteConfig(TFLiteConfig): 28 | """ 29 | Handles encoder-based text architectures. 30 | """ 31 | 32 | DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,) 33 | MANDATORY_AXES = ("batch_size", "sequence_length", ("multiple-choice", "num_choices")) 34 | 35 | 36 | class VisionTFLiteConfig(TFLiteConfig): 37 | """ 38 | Handles vision architectures. 39 | """ 40 | 41 | DUMMY_INPUT_GENERATOR_CLASSES = (DummyVisionInputGenerator,) 42 | MANDATORY_AXES = ("batch_size", "num_channels", "width", "height") 43 | -------------------------------------------------------------------------------- /optimum/fx/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from . import optimization 16 | -------------------------------------------------------------------------------- /optimum/fx/optimization/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .transformations import ( # noqa 16 | ChangeTrueDivToMulByInverse, 17 | FuseBatchNorm1dInLinear, 18 | FuseBatchNorm2dInConv2d, 19 | FuseBiasInLinear, 20 | MergeLinears, 21 | ReversibleTransformation, 22 | Transformation, 23 | compose, 24 | ) 25 | -------------------------------------------------------------------------------- /optimum/fx/parallelization/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .api import parallelize_backend, parallelize_model 16 | from .core import Config, ParallelExecutionCtx 17 | -------------------------------------------------------------------------------- /optimum/fx/parallelization/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .dist_ops import ( 16 | differentiable_all_gather, 17 | differentiable_all_reduce_sum, 18 | differentiable_identity, 19 | differentiable_scatter, 20 | scatter, 21 | ) 22 | -------------------------------------------------------------------------------- /optimum/fx/parallelization/op_registry/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .op_handlers import REGISTRY, FallbackParallelAxisPropagateHandler 16 | -------------------------------------------------------------------------------- /optimum/fx/parallelization/parallel_layers/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .embedding import VocabParallelEmbedding 16 | from .linear import ColumnParallelLinear, RowParallelLinear 17 | from .loss import VocabParallelCrossEntropyLoss, sharded_cross_entropy_wrapper_fn 18 | -------------------------------------------------------------------------------- /optimum/fx/utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from functools import wraps 16 | 17 | import transformers 18 | from packaging import version 19 | 20 | 21 | _TRANSFORMERS_MIN_VERSION = version.parse("4.20.0.dev0") 22 | 23 | transformers_version = version.parse(transformers.__version__) 24 | _fx_features_available = (_TRANSFORMERS_MIN_VERSION.major, _TRANSFORMERS_MIN_VERSION.minor) <= ( 25 | transformers_version.major, 26 | transformers_version.minor, 27 | ) 28 | 29 | 30 | def are_fx_features_available(): 31 | return _fx_features_available 32 | 33 | 34 | def check_if_available(func): 35 | @wraps(func) 36 | def wrapper(*args, **kwargs): 37 | if not are_fx_features_available(): 38 | raise ImportError( 39 | f"Found an incompatible version of transformers. Found version {transformers_version}, but only {_TRANSFORMERS_MIN_VERSION} and above are supported." 40 | ) 41 | return func(*args, **kwargs) 42 | 43 | return wrapper 44 | -------------------------------------------------------------------------------- /optimum/gptq/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 HuggingFace Inc. team. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | from .quantizer import GPTQQuantizer, load_quantized_model 16 | -------------------------------------------------------------------------------- /optimum/gptq/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | SEQLEN_KEYS_TRANFORMERS = ["max_position_embeddings", "seq_length", "n_positions"] 16 | BLOCK_PATTERNS = [ 17 | "transformer.h", 18 | "model.decoder.layers", 19 | "gpt_neox.layers", 20 | "model.layers", 21 | # modules loaded by AutoModel vs AutoModelForCausalLM have different prefixes 22 | "h", 23 | "decoder.layers", 24 | "layers", 25 | ] 26 | 27 | GPTQ_CONFIG = "quantize_config.json" 28 | -------------------------------------------------------------------------------- /optimum/gptq/eval.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from datasets import load_dataset 4 | from tqdm import tqdm 5 | 6 | 7 | def evaluate_perplexity(model, tokenizer): 8 | def _perplexity(nlls, n_samples, seqlen): 9 | return torch.exp(torch.stack(nlls).sum() / (n_samples * seqlen)) 10 | 11 | # load and prepare dataset 12 | data = load_dataset("wikitext", "wikitext-2-raw-v1", split="test") 13 | data = tokenizer("\n\n".join(data["text"]), return_tensors="pt") 14 | data = data.input_ids.to(model.device) 15 | 16 | seqlen = 512 17 | model = model.eval() 18 | n_samples = data.numel() // seqlen 19 | 20 | nlls = [] 21 | 22 | with tqdm(range(n_samples), desc="Perplexity -") as progress_bar: 23 | for i in progress_bar: 24 | start_index = i * seqlen 25 | end_index = (i + 1) * seqlen 26 | batch = data[:, start_index:end_index].to(model.device) 27 | with torch.no_grad(): 28 | logits = model(batch).logits 29 | shift_logits = logits[:, :-1, :].contiguous().float() 30 | shift_labels = data[:, start_index:end_index][:, 1:] 31 | loss_fct = nn.CrossEntropyLoss() 32 | loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)) 33 | neg_log_likelihood = loss.float() * seqlen 34 | nlls.append(neg_log_likelihood) 35 | 36 | curr_ppl = _perplexity(nlls, i + 1, seqlen) 37 | progress_bar.set_description(f"Perplexity {curr_ppl:.3f}") 38 | 39 | ppl = _perplexity(nlls, n_samples, seqlen) 40 | 41 | return ppl.item() 42 | -------------------------------------------------------------------------------- /optimum/onnx/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import TYPE_CHECKING 15 | 16 | from transformers.utils import _LazyModule 17 | 18 | 19 | _import_structure = { 20 | "graph_transformations": [ 21 | "cast_slice_nodes_inputs_to_int32", 22 | "merge_decoders", 23 | "remove_duplicate_weights", 24 | "replace_atenops_to_gather", 25 | "remove_duplicate_weights_from_tied_info", 26 | ], 27 | } 28 | 29 | if TYPE_CHECKING: 30 | from .graph_transformations import ( 31 | cast_slice_nodes_inputs_to_int32, 32 | merge_decoders, 33 | remove_duplicate_weights, 34 | remove_duplicate_weights_from_tied_info, 35 | replace_atenops_to_gather, 36 | ) 37 | else: 38 | import sys 39 | 40 | sys.modules[__name__] = _LazyModule( 41 | __name__, 42 | globals()["__file__"], 43 | _import_structure, 44 | module_spec=__spec__, 45 | ) 46 | -------------------------------------------------------------------------------- /optimum/onnxruntime/constants.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | ENCODER_ONNX_FILE_PATTERN = r"(.*)?encoder(.*)?\.onnx" 16 | DECODER_ONNX_FILE_PATTERN = r"(.*)?decoder((?!(with_past|merged)).)*?\.onnx" 17 | DECODER_WITH_PAST_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?with_past(.*)?\.onnx" 18 | DECODER_MERGED_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?merged(.*)?\.onnx" 19 | ONNX_FILE_PATTERN = r".*\.onnx$" 20 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .quantization import PreprocessorPass, QuantizationPreprocessor 16 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .excluders import ExcludeNodeAfter, ExcludeNodeFollowedBy 16 | from .gelu import ExcludeGeLUNodes 17 | from .layernorm import ExcludeLayerNormNodes 18 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/excluders.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Set, Tuple 15 | 16 | from onnx import ModelProto 17 | 18 | from onnxruntime.transformers.onnx_model import OnnxModel 19 | 20 | from .. import PreprocessorPass 21 | 22 | 23 | class ExcludeNodeFollowedBy(PreprocessorPass): 24 | def __init__(self, operator_type_to_exclude: str, following_operator_type: str): 25 | super().__init__() 26 | 27 | self.operator_type_to_exclude = operator_type_to_exclude 28 | self.following_operator_type = following_operator_type 29 | 30 | def __call__(self, _: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 31 | # Find out the nodes to exclude in the graph 32 | candidate_nodes_to_exclude = { 33 | candidate_output: candidate.name 34 | for candidate in model.get_nodes_by_op_type(self.operator_type_to_exclude) 35 | for candidate_output in candidate.output 36 | } 37 | 38 | nodes_of_following_type = { 39 | node_input: node.name 40 | for node in model.get_nodes_by_op_type(self.following_operator_type) 41 | for node_input in node.input 42 | } 43 | 44 | # Intersection of both are the one we want to remove 45 | to_exclude = set(candidate_nodes_to_exclude.keys()).intersection(nodes_of_following_type.keys()) 46 | nodes_to_exclude = {candidate_nodes_to_exclude[node] for node in to_exclude} 47 | 48 | return set(), nodes_to_exclude 49 | 50 | 51 | class ExcludeNodeAfter(PreprocessorPass): 52 | def __init__(self, parent_operator_type: str, operator_type_to_exclude: str): 53 | super().__init__() 54 | 55 | self.parent_operator_type = parent_operator_type 56 | self.operator_type_to_exclude = operator_type_to_exclude 57 | 58 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 59 | # Find out the nodes to exclude in the graph 60 | candidate_nodes_to_exclude = { 61 | candidate_input: candidate.name 62 | for candidate in model.get_nodes_by_op_type(self.operator_type_to_exclude) 63 | for candidate_input in candidate.input 64 | } 65 | 66 | parent_node = { 67 | node_output: node.name 68 | for node in model.get_nodes_by_op_type(self.parent_operator_type) 69 | for node_output in node.output 70 | } 71 | 72 | # Intersection of both are the one we want to remove 73 | to_exclude = set(candidate_nodes_to_exclude.keys()).intersection(parent_node.keys()) 74 | nodes_to_exclude = {candidate_nodes_to_exclude[node] for node in to_exclude} 75 | 76 | return set(), nodes_to_exclude 77 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/fully_connected.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Set, Tuple 15 | 16 | from onnx import ModelProto 17 | 18 | from onnxruntime.transformers.onnx_model import OnnxModel 19 | 20 | from .. import PreprocessorPass 21 | 22 | 23 | class IncludeFullyConnectedNodes(PreprocessorPass): 24 | def __init__(self): 25 | super().__init__() 26 | 27 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 28 | fc_subgraphs = [] 29 | for add_node in model.get_nodes_by_op_type("Add"): 30 | fc_components = model.match_parent_path(add_node, ["MatMul"], [1]) 31 | if fc_components is not None: 32 | fc_components.append(add_node) 33 | fc_subgraphs.append(fc_components) 34 | fc_components = {node.name for fc in fc_subgraphs for node in fc} 35 | return fc_components, set() 36 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/gelu.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Set, Tuple 15 | 16 | from onnx import ModelProto 17 | 18 | from onnxruntime.transformers.onnx_model import OnnxModel 19 | 20 | from .. import PreprocessorPass 21 | 22 | 23 | class ExcludeGeLUNodes(PreprocessorPass): 24 | def __init__(self): 25 | super().__init__() 26 | 27 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 28 | gelu_subgraphs = [] 29 | for mul_node in model.get_nodes_by_op_type("Mul"): 30 | gelu_components = model.match_parent_path(mul_node, ["Mul", "Add", "Erf", "Div"], [0, 1, 0, 0]) 31 | 32 | if gelu_components is not None: 33 | gelu_components.append(mul_node) 34 | gelu_subgraphs.append(gelu_components) 35 | 36 | gl_components = (node.name for gl in gelu_subgraphs for node in gl) 37 | return set(), set(gl_components) 38 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/passes/layernorm.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from typing import Set, Tuple 15 | 16 | from onnx import ModelProto 17 | 18 | from onnxruntime.transformers.onnx_model import OnnxModel 19 | 20 | from .. import PreprocessorPass 21 | 22 | 23 | class ExcludeLayerNormNodes(PreprocessorPass): 24 | def __init__(self): 25 | super().__init__() 26 | 27 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]: 28 | layer_norm_subgraphs = [] 29 | for add_node in model.get_nodes_by_op_type("Add"): 30 | layer_norm_components = model.match_parent_path( 31 | add_node, 32 | ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Sub", "ReduceMean"], 33 | [0, 0, 1, 0, 0, 0, 0, 1], 34 | ) 35 | 36 | if layer_norm_components is not None: 37 | layer_norm_components.append(add_node) 38 | layer_norm_subgraphs.append(layer_norm_components) 39 | 40 | ln_components = (node.name for ln in layer_norm_subgraphs for node in ln) 41 | return set(), set(ln_components) 42 | -------------------------------------------------------------------------------- /optimum/onnxruntime/preprocessors/quantization.py: -------------------------------------------------------------------------------- 1 | # Copyright 2022 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | from abc import ABC, abstractmethod 15 | from logging import getLogger 16 | from os import PathLike 17 | from pathlib import Path 18 | from typing import Optional, Set, Tuple, Union 19 | 20 | from onnx import ModelProto, load_model 21 | 22 | from onnxruntime.transformers.onnx_model import OnnxModel 23 | 24 | 25 | LOGGER = getLogger("GraphWalker") 26 | 27 | 28 | class PreprocessorPass(ABC): 29 | def __init__(self): 30 | self._logger = LOGGER 31 | 32 | @abstractmethod 33 | def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Optional[Set[str]], Optional[Set[str]]]: 34 | raise NotImplementedError() 35 | 36 | 37 | class QuantizationPreprocessor: 38 | __slots__ = ("_passes",) 39 | 40 | def __init__(self): 41 | self._passes = [] 42 | 43 | def from_config(self, config): 44 | pass 45 | 46 | def register_pass(self, target: PreprocessorPass): 47 | if target not in self._passes: 48 | self._passes.append(target) 49 | 50 | def collect(self, model_or_path: Union[str, PathLike, Path, bytes]) -> Tuple[Set[str], Set[str]]: 51 | global_nodes_to_quantize, global_nodes_to_exclude = set(), set() 52 | graph = load_model(model_or_path.as_posix() if isinstance(model_or_path, Path) else model_or_path) 53 | model = OnnxModel(graph) 54 | 55 | for walking_pass in self._passes: 56 | nodes_to_quantize, nodes_to_exclude = walking_pass(graph, model) 57 | 58 | if nodes_to_quantize is not None: 59 | global_nodes_to_quantize.update(nodes_to_quantize) 60 | 61 | if nodes_to_exclude is not None: 62 | global_nodes_to_exclude.update(nodes_to_exclude) 63 | 64 | # Exclude the nodes from quantization when present in both sets 65 | global_nodes_to_quantize = global_nodes_to_quantize - global_nodes_to_exclude 66 | 67 | return global_nodes_to_quantize, global_nodes_to_exclude 68 | -------------------------------------------------------------------------------- /optimum/onnxruntime/runs/utils.py: -------------------------------------------------------------------------------- 1 | from ..modeling_decoder import ORTModelForCausalLM 2 | from ..modeling_ort import ( 3 | ORTModelForFeatureExtraction, 4 | ORTModelForImageClassification, 5 | ORTModelForQuestionAnswering, 6 | ORTModelForSequenceClassification, 7 | ORTModelForTokenClassification, 8 | ) 9 | 10 | 11 | task_ortmodel_map = { 12 | "text-generation": ORTModelForCausalLM, 13 | "feature-extraction": ORTModelForFeatureExtraction, 14 | "image-classification": ORTModelForImageClassification, 15 | "question-answering": ORTModelForQuestionAnswering, 16 | "text-classification": ORTModelForSequenceClassification, 17 | "token-classification": ORTModelForTokenClassification, 18 | } 19 | -------------------------------------------------------------------------------- /optimum/onnxruntime/subpackage/__init__.py: -------------------------------------------------------------------------------- 1 | from .commands import ONNXRuntimeCommand 2 | -------------------------------------------------------------------------------- /optimum/onnxruntime/subpackage/commands/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .base import ONNXRuntimeCommand 17 | -------------------------------------------------------------------------------- /optimum/onnxruntime/subpackage/commands/base.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """optimum.onnxruntime command-line interface base classes.""" 16 | 17 | from optimum.commands import BaseOptimumCLICommand, CommandInfo, optimum_cli_subcommand 18 | 19 | from .optimize import ONNXRuntimeOptimizeCommand 20 | from .quantize import ONNXRuntimeQuantizeCommand 21 | 22 | 23 | @optimum_cli_subcommand() 24 | class ONNXRuntimeCommand(BaseOptimumCLICommand): 25 | COMMAND = CommandInfo( 26 | name="onnxruntime", 27 | help="ONNX Runtime optimize and quantize utilities.", 28 | ) 29 | SUBCOMMANDS = ( 30 | CommandInfo( 31 | name="optimize", 32 | help="Optimize ONNX models.", 33 | subcommand_class=ONNXRuntimeOptimizeCommand, 34 | ), 35 | CommandInfo( 36 | name="quantize", 37 | help="Dynammic quantization for ONNX models.", 38 | subcommand_class=ONNXRuntimeQuantizeCommand, 39 | ), 40 | ) 41 | -------------------------------------------------------------------------------- /optimum/onnxruntime/training_args_seq2seq.py: -------------------------------------------------------------------------------- 1 | # Copyright 2020 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import dataclass, field 16 | from typing import Optional 17 | 18 | from transformers import Seq2SeqTrainingArguments 19 | 20 | from .training_args import ORTTrainingArguments 21 | 22 | 23 | @dataclass 24 | class ORTSeq2SeqTrainingArguments(Seq2SeqTrainingArguments, ORTTrainingArguments): 25 | """ 26 | Parameters: 27 | optim (`str` or [`training_args.ORTOptimizerNames`] or [`transformers.training_args.OptimizerNames`], *optional*, defaults to `"adamw_hf"`): 28 | The optimizer to use, including optimizers in Transformers: adamw_hf, adamw_torch, adamw_apex_fused, or adafactor. And optimizers implemented by ONNX Runtime: adamw_ort_fused. 29 | """ 30 | 31 | optim: Optional[str] = field( 32 | default="adamw_hf", 33 | metadata={"help": "The optimizer to use."}, 34 | ) 35 | -------------------------------------------------------------------------------- /optimum/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .pipelines_base import ( 17 | MAPPING_LOADING_FUNC, 18 | ORT_SUPPORTED_TASKS, 19 | load_bettertransformer, 20 | load_ort_pipeline, 21 | pipeline, 22 | ) 23 | -------------------------------------------------------------------------------- /optimum/quantization_base.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABC, abstractmethod 3 | from pathlib import Path 4 | from typing import Optional, Union 5 | 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | 10 | class OptimumQuantizer(ABC): 11 | @classmethod 12 | def from_pretrained( 13 | cls, 14 | model_or_path: Union[str, Path], 15 | file_name: Optional[str] = None, 16 | ): 17 | """Overwrite this method in subclass to define how to load your model from pretrained""" 18 | raise NotImplementedError( 19 | "Overwrite this method in subclass to define how to load your model from pretrained for quantization" 20 | ) 21 | 22 | @abstractmethod 23 | def quantize(self, save_dir: Union[str, Path], file_prefix: Optional[str] = None, **kwargs): 24 | """Overwrite this method in subclass to define how to quantize your model for quantization""" 25 | raise NotImplementedError( 26 | "Overwrite this method in subclass to define how to quantize your model for quantization" 27 | ) 28 | -------------------------------------------------------------------------------- /optimum/subpackages.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright 2024 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import importlib 17 | import logging 18 | import sys 19 | 20 | 21 | if sys.version_info >= (3, 8): 22 | from importlib import metadata as importlib_metadata 23 | else: 24 | import importlib_metadata 25 | from importlib.util import find_spec, module_from_spec 26 | 27 | from .utils import is_onnxruntime_available 28 | 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | def load_namespace_modules(namespace: str, module: str): 34 | """Load modules with a specific name inside a namespace 35 | 36 | This method operates on namespace packages: 37 | https://packaging.python.org/en/latest/guides/packaging-namespace-packages/ 38 | 39 | For each package inside the specified `namespace`, it looks for the specified `module` and loads it. 40 | 41 | Args: 42 | namespace (`str`): 43 | The namespace containing modules to be loaded. 44 | module (`str`): 45 | The name of the module to load in each namespace package. 46 | """ 47 | for dist in importlib_metadata.distributions(): 48 | dist_name = dist.metadata["Name"] 49 | if dist_name is None: 50 | continue 51 | if dist_name == f"{namespace}-benchmark": 52 | continue 53 | if not dist_name.startswith(f"{namespace}-"): 54 | continue 55 | package_import_name = dist_name.replace("-", ".") 56 | module_import_name = f"{package_import_name}.{module}" 57 | if module_import_name in sys.modules: 58 | # Module already loaded 59 | continue 60 | backend_spec = find_spec(module_import_name) 61 | if backend_spec is None: 62 | continue 63 | try: 64 | imported_module = module_from_spec(backend_spec) 65 | sys.modules[module_import_name] = imported_module 66 | backend_spec.loader.exec_module(imported_module) 67 | logger.debug(f"Successfully loaded {module_import_name}") 68 | except Exception as e: 69 | logger.error(f"An exception occured while loading {module_import_name}: {e}.") 70 | 71 | 72 | def load_subpackages(): 73 | """Load optimum subpackages 74 | 75 | This method goes through packages inside the `optimum` namespace and loads the `subpackage` module if it exists. 76 | 77 | This module is then in charge of registering the subpackage commands. 78 | """ 79 | SUBPACKAGE_LOADER = "subpackage" 80 | load_namespace_modules("optimum", SUBPACKAGE_LOADER) 81 | 82 | # Load subpackages from internal modules not explicitly defined as namespace packages 83 | loader_name = "." + SUBPACKAGE_LOADER 84 | if is_onnxruntime_available(): 85 | importlib.import_module(loader_name, package="optimum.onnxruntime") 86 | -------------------------------------------------------------------------------- /optimum/utils/constant.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | CONFIG_NAME = "config.json" 17 | ONNX_WEIGHTS_NAME = "model.onnx" 18 | 19 | DIFFUSION_MODEL_UNET_SUBFOLDER = "unet" 20 | DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer" 21 | DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER = "vae_decoder" 22 | DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER = "vae_encoder" 23 | DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder" 24 | DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER = "text_encoder_2" 25 | DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3" 26 | DIFFUSION_PIPELINE_CONFIG_FILE_NAME = "model_index.json" 27 | DIFFUSION_MODEL_CONFIG_FILE_NAME = "config.json" 28 | DIFFUSION_MODEL_ONNX_FILE_NAME = "model.onnx" 29 | -------------------------------------------------------------------------------- /optimum/utils/doc.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from dataclasses import fields 17 | 18 | 19 | def generate_doc_dataclass(cls) -> str: 20 | """Class decorator for generate the documentation for dataclass.""" 21 | doc = "\f\nAttributes:\n" 22 | for attribute in fields(cls): 23 | doc += f" {attribute.name}" # attribute name 24 | 25 | # whether optional 26 | attribute_type = str(attribute.type) 27 | if attribute_type.startswith("typing.Optional"): 28 | optional = True 29 | type_display = attribute_type[attribute_type.find("[") + 1 : -1] 30 | type_display = type_display.split(".")[-1] 31 | else: 32 | optional = False 33 | 34 | if attribute_type.startswith("typing"): 35 | type_display = attribute_type.split(".")[-1] 36 | else: 37 | type_display = attribute.type.__name__ 38 | 39 | if optional: 40 | doc += f" (`{type_display}`, *optional*): " 41 | else: 42 | doc += f" (`{type_display}`): " 43 | 44 | doc += f"{attribute.metadata['description']}\n" # argument description 45 | cls.__doc__ = (cls.__doc__ if cls.__doc__ is not None else "") + "\n\n" + "".join(doc) 46 | return cls 47 | 48 | 49 | def add_dynamic_docstring( 50 | *docstr, 51 | text, 52 | dynamic_elements, 53 | ): 54 | def docstring_decorator(fn): 55 | func_doc = (fn.__doc__ or "") + "".join(docstr) 56 | fn.__doc__ = func_doc + text.format(**dynamic_elements) 57 | return fn 58 | 59 | return docstring_decorator 60 | -------------------------------------------------------------------------------- /optimum/utils/dummy_bettertransformer_objects.py: -------------------------------------------------------------------------------- 1 | from .import_utils import DummyObject, requires_backends 2 | 3 | 4 | class BarkSelfAttention(metaclass=DummyObject): 5 | _backends = ["transformers_431"] 6 | 7 | def __init__(self, *args, **kwargs): 8 | requires_backends(self, ["transformers_431"]) 9 | -------------------------------------------------------------------------------- /optimum/utils/modeling_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import functools 16 | 17 | 18 | MODEL_TO_PATCH_FOR_PAST = { 19 | "bart", 20 | "blenderbot", 21 | "blenderbot-small", 22 | "bloom", 23 | "llama", 24 | "mistral", 25 | "mpt", 26 | "opt", 27 | "pegasus", 28 | } 29 | 30 | 31 | def recurse_getattr(obj, attr: str): 32 | """ 33 | Recursive `getattr`. 34 | 35 | Args: 36 | obj: 37 | A class instance holding the attribute. 38 | attr (`str`): 39 | The attribute that is to be retrieved, e.g. 'attribute1.attribute2'. 40 | """ 41 | 42 | def _getattr(obj, attr): 43 | return getattr(obj, attr) 44 | 45 | return functools.reduce(_getattr, [obj] + attr.split(".")) 46 | 47 | 48 | def recurse_setattr(module, name, value): 49 | """A function to recursively set attributes to a module.""" 50 | if "." not in name: 51 | setattr(module, name, value) 52 | else: 53 | name, rest = name.split(".", 1) 54 | recurse_setattr(getattr(module, name), rest, value) 55 | -------------------------------------------------------------------------------- /optimum/utils/preprocessing/__init__.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from .base import Preprocessor, TaskProcessor 17 | from .image_classification import ImageClassificationProcessing 18 | from .question_answering import QuestionAnsweringProcessing 19 | from .task_processors_manager import TaskProcessorsManager 20 | from .text_classification import TextClassificationProcessing 21 | from .token_classification import TokenClassificationProcessing 22 | -------------------------------------------------------------------------------- /optimum/utils/preprocessing/task_processors_manager.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Dataset processing factory.""" 16 | 17 | from typing import TYPE_CHECKING, Any, Type 18 | 19 | from optimum.utils.preprocessing.image_classification import ImageClassificationProcessing 20 | from optimum.utils.preprocessing.question_answering import QuestionAnsweringProcessing 21 | from optimum.utils.preprocessing.text_classification import TextClassificationProcessing 22 | from optimum.utils.preprocessing.token_classification import TokenClassificationProcessing 23 | 24 | 25 | if TYPE_CHECKING: 26 | from .base import TaskProcessor 27 | 28 | 29 | class TaskProcessorsManager: 30 | _TASK_TO_DATASET_PROCESSING_CLASS = { 31 | "text-classification": TextClassificationProcessing, 32 | "token-classification": TokenClassificationProcessing, 33 | "question-answering": QuestionAnsweringProcessing, 34 | "image-classification": ImageClassificationProcessing, 35 | } 36 | 37 | @classmethod 38 | def get_task_processor_class_for_task(cls, task: str) -> Type["TaskProcessor"]: 39 | if task not in cls._TASK_TO_DATASET_PROCESSING_CLASS: 40 | supported_tasks = ", ".join(cls._TASK_TO_DATASET_PROCESSING_CLASS.keys()) 41 | raise KeyError( 42 | f"Could not find a `TaskProcessor` class for the task called {task}, supported tasks: " 43 | f"{supported_tasks}." 44 | ) 45 | return cls._TASK_TO_DATASET_PROCESSING_CLASS[task] 46 | 47 | @classmethod 48 | def for_task(cls, task: str, *dataset_processing_args, **dataset_processing_kwargs: Any) -> "TaskProcessor": 49 | return cls.get_task_processor_class_for_task(task)(*dataset_processing_args, **dataset_processing_kwargs) 50 | -------------------------------------------------------------------------------- /optimum/utils/save_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """Utilities related to saving files.""" 16 | 17 | import logging 18 | from pathlib import Path 19 | from typing import List, Union 20 | 21 | from transformers import AutoFeatureExtractor, AutoImageProcessor, AutoProcessor, AutoTokenizer 22 | 23 | 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | def maybe_load_preprocessors( 28 | src_name_or_path: Union[str, Path], subfolder: str = "", trust_remote_code: bool = False 29 | ) -> List: 30 | preprocessors = [] 31 | try: 32 | preprocessors.append( 33 | AutoTokenizer.from_pretrained(src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code) 34 | ) 35 | except Exception: 36 | pass 37 | 38 | try: 39 | preprocessors.append( 40 | AutoProcessor.from_pretrained(src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code) 41 | ) 42 | except Exception: 43 | pass 44 | 45 | try: 46 | preprocessors.append( 47 | AutoFeatureExtractor.from_pretrained( 48 | src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code 49 | ) 50 | ) 51 | except Exception: 52 | pass 53 | 54 | try: 55 | preprocessors.append( 56 | AutoImageProcessor.from_pretrained( 57 | src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code 58 | ) 59 | ) 60 | except Exception: 61 | pass 62 | return preprocessors 63 | 64 | 65 | def maybe_save_preprocessors( 66 | src_name_or_path: Union[str, Path], 67 | dest_dir: Union[str, Path], 68 | src_subfolder: str = "", 69 | trust_remote_code: bool = False, 70 | ): 71 | """ 72 | Saves the tokenizer, the processor and the feature extractor when found in `src_dir` in `dest_dir`. 73 | 74 | Args: 75 | src_dir (`Union[str, Path]`): 76 | The source directory from which to copy the files. 77 | dest_dir (`Union[str, Path]`): 78 | The destination directory to copy the files to. 79 | src_subfolder (`str`, defaults to `""`): 80 | In case the preprocessor files are located inside a subfolder of the model directory / repo on the Hugging 81 | Face Hub, you can specify the subfolder name here. 82 | trust_remote_code (`bool`, defaults to `False`): 83 | Whether to allow to save preprocessors that is allowed to run arbitrary code. Use this option at your own risk. 84 | """ 85 | if not isinstance(dest_dir, Path): 86 | dest_dir = Path(dest_dir) 87 | 88 | dest_dir.mkdir(exist_ok=True) 89 | for preprocessor in maybe_load_preprocessors( 90 | src_name_or_path, subfolder=src_subfolder, trust_remote_code=trust_remote_code 91 | ): 92 | preprocessor.save_pretrained(dest_dir) 93 | -------------------------------------------------------------------------------- /optimum/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __version__ = "1.26.0.dev0" 16 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2021 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [tool.black] 16 | line-length = 119 17 | target-version = ['py37'] 18 | 19 | [tool.ruff] 20 | # Never enforce `E501` (line length violations). 21 | ignore = ["C901", "E501", "E741", "W605"] 22 | select = ["C", "E", "F", "I", "W"] 23 | line-length = 119 24 | 25 | # Ignore import violations in all `__init__.py` files. 26 | [tool.ruff.per-file-ignores] 27 | "__init__.py" = ["E402", "F401", "F403", "F811"] 28 | 29 | [tool.ruff.isort] 30 | lines-after-imports = 2 31 | known-first-party = ["optimum"] 32 | 33 | [tool.pytest.ini_options] 34 | markers = [ 35 | "gpu_test", 36 | "cuda_ep_test", 37 | "trt_ep_test", 38 | "rocm_ep_test", 39 | "tensorflow_test", 40 | "datasets_test", 41 | "run_in_series", 42 | "run_slow", 43 | "accelerate_test", 44 | "fp16", 45 | "quantization", 46 | ] 47 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = optimum 7 | line_length = 119 8 | lines_after_imports = 2 9 | multi_line_output = 3 10 | use_parentheses = True 11 | 12 | [flake8] 13 | ignore = E203, E501, E741, W503, W605 14 | max-line-length = 119 15 | 16 | [tool:pytest] 17 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS 18 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Helpful tips for testing & debugging optimum 2 | 3 | ## VSCODE 4 | 5 | If you are using vscode you might have hard time discovering the test for the "testing" menu to run tests individually or debug them. You can copy the snippet below into `.vscode/settings.json`. 6 | 7 | ```json 8 | { 9 | "python.testing.pytestArgs": [ 10 | "tests/onnxruntime", 11 | "tests/test_*" 12 | ], 13 | "python.testing.unittestEnabled": false, 14 | "python.testing.pytestEnabled": true 15 | } 16 | ``` 17 | 18 | This snippet will discover all base tests and the tests inside the `tests/onnxruntime` folder. 19 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum/76ddcba844a75d2e9ccb4c693cf9688bfeaa9690/tests/__init__.py -------------------------------------------------------------------------------- /tests/assets/onnx/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "_name_or_path": "tiny-distilbert-classification", 3 | "activation": "gelu", 4 | "architectures": [ 5 | "DistilBertForSequenceClassification" 6 | ], 7 | "attention_dropout": 0.1, 8 | "dim": 2, 9 | "dropout": 0.1, 10 | "finetuning_task": "sst-2", 11 | "hidden_dim": 2, 12 | "id2label": { 13 | "0": "NEGATIVE", 14 | "1": "POSITIVE" 15 | }, 16 | "initializer_range": 0.02, 17 | "label2id": { 18 | "NEGATIVE": 0, 19 | "POSITIVE": 1 20 | }, 21 | "max_position_embeddings": 512, 22 | "model_type": "distilbert", 23 | "n_heads": 2, 24 | "n_layers": 2, 25 | "output_past": true, 26 | "pad_token_id": 0, 27 | "qa_dropout": 0.1, 28 | "seq_classif_dropout": 0.2, 29 | "sinusoidal_pos_embds": false, 30 | "tie_weights_": true, 31 | "torch_dtype": "float32", 32 | "transformers_version": "4.10.0.dev0", 33 | "vocab_size": 30522 34 | } 35 | -------------------------------------------------------------------------------- /tests/assets/onnx/model.onnx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum/76ddcba844a75d2e9ccb4c693cf9688bfeaa9690/tests/assets/onnx/model.onnx -------------------------------------------------------------------------------- /tests/bettertransformer/Dockerfile_bettertransformer_gpu: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04 2 | CMD nvidia-smi 3 | 4 | # Ignore interactive questions during `docker build` 5 | ENV DEBIAN_FRONTEND noninteractive 6 | 7 | # Install and update tools to minimize security vulnerabilities 8 | RUN apt-get update 9 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \ 10 | bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev python3-pip && \ 11 | apt-get clean 12 | RUN unattended-upgrade 13 | RUN apt-get autoremove -y 14 | 15 | RUN python3 -m pip install -U pip 16 | 17 | RUN pip install torch torchvision torchaudio 18 | RUN pip install transformers==4.48.* accelerate datasets 19 | 20 | # Install Optimum 21 | COPY . /workspace/optimum 22 | RUN pip install /workspace/optimum[tests] 23 | 24 | ENV RUN_SLOW=1 25 | WORKDIR /workspace/optimum/tests/ 26 | CMD pytest bettertransformer/test_*.py -s --durations=0 -m gpu_test 27 | -------------------------------------------------------------------------------- /tests/cli/cli_with_custom_command.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | import os 17 | 18 | from optimum.commands import BaseOptimumCLICommand, CommandInfo, ExportCommand 19 | 20 | 21 | class MyCustomCommand(BaseOptimumCLICommand): 22 | COMMAND = CommandInfo(name="blablabla", help="Says something.") 23 | 24 | def run(self): 25 | print("If the CI can read this, it means it worked!") 26 | 27 | 28 | parent_command_cls = os.environ.get("TEST_REGISTER_COMMAND_WITH_SUBCOMMAND", None) 29 | 30 | if parent_command_cls == "true": 31 | REGISTER_COMMANDS = [ 32 | (MyCustomCommand, ExportCommand), 33 | ] 34 | else: 35 | REGISTER_COMMANDS = [ 36 | MyCustomCommand, 37 | ] 38 | -------------------------------------------------------------------------------- /tests/common/test_configuration_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2021 HuggingFace Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import tempfile 16 | import unittest 17 | 18 | from huggingface_hub import login 19 | from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test 20 | 21 | from optimum.configuration_utils import BaseConfig 22 | 23 | 24 | class FakeConfig(BaseConfig): 25 | CONFIG_NAME = "fake_config.json" 26 | FULL_CONFIGURATION_FILE = "fake_config.json" 27 | 28 | def __init__(self, attribute=1, **kwargs): 29 | self.attribute = attribute 30 | super().__init__(**kwargs) 31 | 32 | 33 | class ConfigTester(unittest.TestCase): 34 | def test_create_and_test_config_from_and_save_pretrained(self): 35 | config_first = FakeConfig(attribute=10) 36 | 37 | with tempfile.TemporaryDirectory() as tmpdirname: 38 | config_first.save_pretrained(tmpdirname) 39 | config_second = FakeConfig.from_pretrained(tmpdirname) 40 | 41 | self.assertEqual(config_second.to_dict(), config_first.to_dict()) 42 | 43 | 44 | @is_staging_test 45 | class ConfigPushToHubTester(unittest.TestCase): 46 | @classmethod 47 | def setUpClass(cls): 48 | login(token=TOKEN) 49 | 50 | def test_push_to_hub(self): 51 | config = FakeConfig(attribute=15) 52 | 53 | with TemporaryHubRepo(token=TOKEN) as tmp_repo: 54 | config.push_to_hub(tmp_repo.repo_id, token=TOKEN) 55 | 56 | new_config = FakeConfig.from_pretrained(tmp_repo.repo_id, token=TOKEN) 57 | for k, v in config.to_dict().items(): 58 | if k != "optimum_version" and k != "transformers_version": 59 | self.assertEqual(v, getattr(new_config, k)) 60 | 61 | def test_push_to_hub_in_organization(self): 62 | config = FakeConfig(attribute=15) 63 | 64 | with TemporaryHubRepo(namespace="valid_org", token=TOKEN) as tmp_repo: 65 | config.push_to_hub(tmp_repo.repo_id, token=TOKEN) 66 | new_config = FakeConfig.from_pretrained(tmp_repo.repo_id, token=TOKEN) 67 | for k, v in config.to_dict().items(): 68 | if k != "optimum_version" and k != "transformers_version": 69 | self.assertEqual(v, getattr(new_config, k)) 70 | -------------------------------------------------------------------------------- /tests/exporters/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum/76ddcba844a75d2e9ccb4c693cf9688bfeaa9690/tests/exporters/__init__.py -------------------------------------------------------------------------------- /tests/exporters/onnx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum/76ddcba844a75d2e9ccb4c693cf9688bfeaa9690/tests/exporters/onnx/__init__.py -------------------------------------------------------------------------------- /tests/exporters/tflite/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huggingface/optimum/76ddcba844a75d2e9ccb4c693cf9688bfeaa9690/tests/exporters/tflite/__init__.py -------------------------------------------------------------------------------- /tests/fx/parallelization/dist_utils.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2024 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import os 16 | from typing import Callable, List, Optional 17 | 18 | import torch 19 | import torch.distributed as dist 20 | import torch.multiprocessing as mp 21 | from transformers import set_seed 22 | 23 | 24 | SEED = 42 25 | NUM_AVAILABLE_DEVICES = torch.cuda.device_count() 26 | 27 | 28 | def dist_init( 29 | rank: int, 30 | world_size: int, 31 | backend: str = "nccl", 32 | master_addr: str = "127.0.0.1", 33 | master_port: str = "29501", 34 | ): 35 | os.environ["RANK"] = str(rank) 36 | os.environ["WORLD_SIZE"] = str(world_size) 37 | os.environ["MASTER_ADDR"] = master_addr 38 | os.environ["MASTER_PORT"] = master_port 39 | 40 | dist.init_process_group( 41 | backend=backend, 42 | init_method="env://", 43 | world_size=world_size, 44 | rank=rank, 45 | ) 46 | 47 | torch.cuda.set_device(rank) 48 | 49 | 50 | def runner(rank: int, fn: Callable, deterministic: bool, *args, **kwargs): 51 | if deterministic: 52 | set_seed(SEED) 53 | fn(rank, *args, **kwargs) 54 | 55 | 56 | def spawn(world_size: int, fn: Callable, *args, deterministic: bool = False): 57 | mp.spawn(fn=runner, args=(fn, deterministic, world_size, *args), nprocs=world_size, join=True) 58 | 59 | 60 | def tearDown(group: Optional[dist.ProcessGroup] = None): 61 | dist.destroy_process_group(group) 62 | 63 | 64 | def gather_at_main_process( 65 | tensor: torch.Tensor, group: dist.ProcessGroup, rank: int, world_size: int 66 | ) -> List[torch.Tensor]: 67 | if world_size == 1: 68 | return [tensor] 69 | 70 | tensor = tensor.contiguous() 71 | if rank == 0: 72 | tensors = [torch.empty_like(tensor) for _ in range(world_size)] 73 | tensors[rank] = tensor 74 | else: 75 | tensors = None 76 | dist.gather(tensor=tensor, gather_list=tensors, dst=0, group=group) 77 | return tensors 78 | -------------------------------------------------------------------------------- /tests/onnx/test_onnx_export_custom_module.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2022 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import os 16 | from unittest import TestCase 17 | 18 | import torch 19 | from transformers.models.sew_d import modeling_sew_d 20 | 21 | 22 | class StableDropoutTestCase(TestCase): 23 | """Tests export of StableDropout module.""" 24 | 25 | def test_training(self): 26 | """Tests export of StableDropout in training mode.""" 27 | 28 | devnull = open(os.devnull, "wb") 29 | # drop_prob must be > 0 for the test to be meaningful 30 | sd = modeling_sew_d.StableDropout(0.1) 31 | # Avoid warnings in training mode 32 | do_constant_folding = False 33 | # Dropout is a no-op in inference mode 34 | training = torch.onnx.TrainingMode.PRESERVE 35 | input = (torch.randn(2, 2),) 36 | 37 | # Expected to pass on torch >= 2.5 38 | torch.onnx.export( 39 | sd, 40 | input, 41 | devnull, 42 | opset_version=12, 43 | do_constant_folding=do_constant_folding, 44 | training=training, 45 | ) 46 | 47 | devnull.close() 48 | 49 | def test_inference(self): 50 | """Tests export of StableDropout in inference mode.""" 51 | 52 | devnull = open(os.devnull, "wb") 53 | # drop_prob must be > 0 for the test to be meaningful 54 | sd = modeling_sew_d.StableDropout(0.1) 55 | # Dropout is a no-op in inference mode 56 | training = torch.onnx.TrainingMode.EVAL 57 | input = (torch.randn(2, 2),) 58 | 59 | # Expected to pass on torch >= 2.5 60 | torch.onnx.export( 61 | sd, 62 | input, 63 | devnull, 64 | opset_version=12, 65 | do_constant_folding=True, 66 | training=training, 67 | ) 68 | 69 | devnull.close() 70 | -------------------------------------------------------------------------------- /tests/onnxruntime-training/ds_configs/ds_config_zero_stage_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "hysteresis": 2, 7 | "min_loss_scale": 1 8 | }, 9 | 10 | "bf16": { 11 | "enabled": "auto" 12 | }, 13 | 14 | "zero_optimization": { 15 | "stage": 1, 16 | "allgather_partitions": true, 17 | "allgather_bucket_size": 2e8, 18 | "overlap_comm": true, 19 | "reduce_scatter": true, 20 | "reduce_bucket_size": 2e8, 21 | "contiguous_gradients": false, 22 | "cpu_offload": false 23 | }, 24 | 25 | "zero_allow_untested_optimizer": true, 26 | 27 | "optimizer": { 28 | "type": "AdamW", 29 | "params": { 30 | "lr": "auto", 31 | "betas": "auto", 32 | "eps": "auto", 33 | "weight_decay": "auto" 34 | } 35 | }, 36 | 37 | "scheduler": { 38 | "type": "WarmupLR", 39 | "params": { 40 | "warmup_min_lr": "auto", 41 | "warmup_max_lr": "auto", 42 | "warmup_num_steps": "auto" 43 | } 44 | }, 45 | 46 | "steps_per_print": 2000, 47 | "train_batch_size": "auto", 48 | "train_micro_batch_size_per_gpu": "auto", 49 | "wall_clock_breakdown": false 50 | } -------------------------------------------------------------------------------- /tests/onnxruntime-training/ds_configs/ds_config_zero_stage_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "bf16": { 3 | "enabled": "auto" 4 | }, 5 | 6 | "fp16": { 7 | "enabled": "auto", 8 | "loss_scale": 0, 9 | "loss_scale_window": 1000, 10 | "initial_scale_power": 16, 11 | "hysteresis": 2, 12 | "min_loss_scale": 1 13 | }, 14 | 15 | "optimizer": { 16 | "type": "AdamW", 17 | "params": { 18 | "lr": "auto", 19 | "betas": "auto", 20 | "eps": "auto", 21 | "weight_decay": "auto" 22 | } 23 | }, 24 | 25 | "scheduler": { 26 | "type": "WarmupLR", 27 | "params": { 28 | "warmup_min_lr": "auto", 29 | "warmup_max_lr": "auto", 30 | "warmup_num_steps": "auto" 31 | } 32 | }, 33 | 34 | "zero_optimization": { 35 | "stage": 2, 36 | "offload_optimizer": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "allgather_partitions": true, 41 | "allgather_bucket_size": 2e8, 42 | "overlap_comm": true, 43 | "reduce_scatter": true, 44 | "reduce_bucket_size": 2e8, 45 | "contiguous_gradients": true 46 | }, 47 | 48 | "gradient_accumulation_steps": "auto", 49 | "gradient_clipping": "auto", 50 | "steps_per_print": 2000, 51 | "train_batch_size": "auto", 52 | "train_micro_batch_size_per_gpu": "auto", 53 | "wall_clock_breakdown": false 54 | } 55 | -------------------------------------------------------------------------------- /tests/onnxruntime-training/ds_configs/ds_config_zero_stage_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "fp16": { 3 | "enabled": "auto", 4 | "loss_scale": 0, 5 | "loss_scale_window": 1000, 6 | "initial_scale_power": 16, 7 | "hysteresis": 2, 8 | "min_loss_scale": 1 9 | }, 10 | 11 | "optimizer": { 12 | "type": "AdamW", 13 | "params": { 14 | "lr": "auto", 15 | "betas": "auto", 16 | "eps": "auto", 17 | "weight_decay": "auto" 18 | } 19 | }, 20 | 21 | "scheduler": { 22 | "type": "WarmupLR", 23 | "params": { 24 | "warmup_min_lr": "auto", 25 | "warmup_max_lr": "auto", 26 | "warmup_num_steps": "auto" 27 | } 28 | }, 29 | 30 | "zero_optimization": { 31 | "stage": 3, 32 | "offload_optimizer": { 33 | "device": "cpu", 34 | "pin_memory": true 35 | }, 36 | "offload_param": { 37 | "device": "cpu", 38 | "pin_memory": true 39 | }, 40 | "overlap_comm": true, 41 | "contiguous_gradients": true, 42 | "sub_group_size": 1e9, 43 | "reduce_bucket_size": "auto", 44 | "stage3_prefetch_bucket_size": "auto", 45 | "stage3_param_persistence_threshold": "auto", 46 | "stage3_max_live_parameters": 1e9, 47 | "stage3_max_reuse_distance": 1e9, 48 | "stage3_gather_16bit_weights_on_model_save": true 49 | }, 50 | 51 | "gradient_accumulation_steps": "auto", 52 | "gradient_clipping": "auto", 53 | "steps_per_print": 2000, 54 | "train_batch_size": "auto", 55 | "train_micro_batch_size_per_gpu": "auto", 56 | "wall_clock_breakdown": false 57 | } -------------------------------------------------------------------------------- /tests/onnxruntime-training/ds_configs/ds_config_zero_stage_inifinity.json: -------------------------------------------------------------------------------- 1 | { 2 | "zero_optimization": { 3 | "stage": 3, 4 | "offload_optimizer": { 5 | "device": "nvme", 6 | "nvme_path": "/dev/nvme1n1", 7 | "pin_memory": true, 8 | "buffer_count": 4, 9 | "fast_init": false 10 | }, 11 | "offload_param": { 12 | "device": "nvme", 13 | "nvme_path": "/dev/nvme1n1", 14 | "pin_memory": true, 15 | "buffer_count": 5, 16 | "buffer_size": 1e8, 17 | "max_in_cpu": 1e9 18 | }, 19 | "aio": { 20 | "block_size": 262144, 21 | "queue_depth": 32, 22 | "thread_count": 1, 23 | "single_submit": false, 24 | "overlap_events": true 25 | }, 26 | "overlap_comm": true, 27 | "contiguous_gradients": true, 28 | "sub_group_size": 1e9, 29 | "reduce_bucket_size": "auto", 30 | "stage3_prefetch_bucket_size": "auto", 31 | "stage3_param_persistence_threshold": "auto", 32 | "stage3_max_live_parameters": 1e9, 33 | "stage3_max_reuse_distance": 1e9, 34 | "stage3_gather_16bit_weights_on_model_save": true 35 | } 36 | } -------------------------------------------------------------------------------- /tests/onnxruntime/test_timm.py: -------------------------------------------------------------------------------- 1 | import gc 2 | 3 | import onnxruntime 4 | import requests 5 | import timm 6 | import torch 7 | from parameterized import parameterized 8 | from PIL import Image 9 | from testing_utils import ORTModelTestMixin 10 | from transformers import PretrainedConfig 11 | 12 | from optimum.onnxruntime import ORTModelForImageClassification 13 | 14 | 15 | class ORTModelForImageClassificationIntegrationTest(ORTModelTestMixin): 16 | TIMM_SUPPORTED_MODELS = ["timm/inception_v3.tf_adv_in1k"] # only one is required for testing 17 | 18 | @parameterized.expand(TIMM_SUPPORTED_MODELS) 19 | def test_compare_to_timm(self, model_id): 20 | onnx_model = ORTModelForImageClassification.from_pretrained(model_id) 21 | self.assertIsInstance(onnx_model.model, onnxruntime.InferenceSession) 22 | self.assertIsInstance(onnx_model.config, PretrainedConfig) 23 | 24 | timm_model = timm.create_model(model_id, pretrained=True) 25 | timm_model = timm_model.eval() 26 | 27 | # get model specific transforms (normalization, resize) 28 | data_config = timm.data.resolve_model_data_config(timm_model) 29 | transforms = timm.data.create_transform(**data_config, is_training=False) 30 | 31 | url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png" 32 | image = Image.open(requests.get(url, stream=True).raw).convert("RGB") 33 | inputs = transforms(image).unsqueeze(0) 34 | 35 | with torch.no_grad(): 36 | timm_outputs = timm_model(inputs) 37 | 38 | for input_type in ["pt", "np"]: 39 | if input_type == "np": 40 | inputs = inputs.cpu().detach().numpy() 41 | 42 | onnx_outputs = onnx_model(inputs) 43 | 44 | self.assertIn("logits", onnx_outputs) 45 | self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type]) 46 | 47 | # compare tensor outputs 48 | torch.testing.assert_close(torch.Tensor(onnx_outputs.logits), timm_outputs, atol=self.ATOL, rtol=self.RTOL) 49 | 50 | gc.collect() 51 | -------------------------------------------------------------------------------- /tests/onnxruntime/test_utils.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import unittest 3 | 4 | import onnxruntime as ort 5 | import torch 6 | 7 | from optimum.onnxruntime.configuration import AutoQuantizationConfig, OptimizationConfig, ORTConfig 8 | from optimum.onnxruntime.utils import get_device_for_provider, get_provider_for_device 9 | 10 | 11 | class ProviderAndDeviceGettersTest(unittest.TestCase): 12 | def test_get_device_for_provider(self): 13 | self.assertEqual(get_device_for_provider("CPUExecutionProvider", provider_options={}), torch.device("cpu")) 14 | self.assertEqual( 15 | get_device_for_provider("CUDAExecutionProvider", provider_options={"device_id": 1}), torch.device("cuda:1") 16 | ) 17 | 18 | def test_get_provider_for_device(self): 19 | self.assertEqual(get_provider_for_device(torch.device("cpu")), "CPUExecutionProvider") 20 | 21 | if "ROCMExecutionProvider" in ort.get_available_providers(): 22 | self.assertEqual(get_provider_for_device(torch.device("cuda")), "ROCMExecutionProvider") 23 | else: 24 | self.assertEqual(get_provider_for_device(torch.device("cuda")), "CUDAExecutionProvider") 25 | 26 | 27 | class ORTConfigTest(unittest.TestCase): 28 | def test_save_and_load(self): 29 | with tempfile.TemporaryDirectory() as tmp_dir: 30 | quantization_config = AutoQuantizationConfig.arm64(is_static=False, per_channel=False) 31 | optimization_config = OptimizationConfig(optimization_level=2) 32 | ort_config = ORTConfig(opset=11, quantization=quantization_config, optimization=optimization_config) 33 | ort_config.save_pretrained(tmp_dir) 34 | loaded_ort_config = ORTConfig.from_pretrained(tmp_dir) 35 | self.assertEqual(ort_config.to_dict(), loaded_ort_config.to_dict()) 36 | -------------------------------------------------------------------------------- /tests/run_doctest.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # we use 4.48 for compatibility with bettertransformer 3 | pip install .[tests] transformers==4.48.* optuna 4 | python tests/utils/prepare_for_doc_test.py optimum docs 5 | pytest --verbose -s --doctest-modules $(cat tests/utils/documentation_tests.txt) --doctest-continue-on-failure --doctest-glob='*.mdx' 6 | python tests/utils/prepare_for_doc_test.py optimum docs --remove_new_line --------------------------------------------------------------------------------