The response has been limited to 50k tokens of the smallest files in the repo. You can remove this limitation by removing the max tokens filter.
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug-report.yml
    │   ├── config.yml
    │   └── feature-request.yml
    ├── PULL_REQUEST_TEMPLATE.md
    └── workflows
    │   ├── build_main_documentation.yml
    │   ├── build_pr_documentation.yml
    │   ├── quality.yml
    │   ├── stale.yml
    │   ├── style_bot.yml
    │   ├── test_bettertransformer.yml
    │   ├── test_cli.yml
    │   ├── test_common.yml
    │   ├── test_exporters_common.yml
    │   ├── test_exporters_onnx.yml
    │   ├── test_exporters_onnx_cli.yml
    │   ├── test_exporters_tflite.yml
    │   ├── test_exporters_tflite_cli.yml
    │   ├── test_fx_automatic_parallelism.yml
    │   ├── test_fx_optimization.yml
    │   ├── test_gptq.yml
    │   ├── test_offline.yml
    │   ├── test_onnx.yml
    │   ├── test_onnxruntime.yml
    │   ├── test_onnxruntime_gpu.yml
    │   ├── test_onnxruntime_slow.yml
    │   ├── test_onnxruntime_training.yml
    │   ├── test_utils.yml
    │   ├── trufflehog.yml
    │   └── upload_pr_documentation.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── Makefile
├── README.md
├── docs
    ├── Dockerfile
    ├── README.md
    ├── combine_docs.py
    ├── conftest.py
    └── source
    │   ├── _redirects.yml
    │   ├── _toctree.yml
    │   ├── bettertransformer
    │       ├── overview.mdx
    │       └── tutorials
    │       │   ├── contribute.mdx
    │       │   └── convert.mdx
    │   ├── concept_guides
    │       └── quantization.mdx
    │   ├── exporters
    │       ├── onnx
    │       │   ├── overview.mdx
    │       │   ├── package_reference
    │       │   │   ├── configuration.mdx
    │       │   │   └── export.mdx
    │       │   └── usage_guides
    │       │   │   ├── contribute.mdx
    │       │   │   └── export_a_model.mdx
    │       ├── overview.mdx
    │       ├── task_manager.mdx
    │       └── tflite
    │       │   ├── overview.mdx
    │       │   ├── package_reference
    │       │       ├── configuration.mdx
    │       │       └── export.mdx
    │       │   └── usage_guides
    │       │       ├── contribute.mdx
    │       │       └── export_a_model.mdx
    │   ├── furiosa_overview.mdx
    │   ├── index.mdx
    │   ├── installation.mdx
    │   ├── llm_quantization
    │       └── usage_guides
    │       │   └── quantization.mdx
    │   ├── notebooks.md
    │   ├── nvidia_overview.mdx
    │   ├── onnxruntime
    │       ├── concept_guides
    │       │   └── onnx.mdx
    │       ├── overview.mdx
    │       ├── package_reference
    │       │   ├── configuration.mdx
    │       │   ├── modeling_ort.mdx
    │       │   ├── optimization.mdx
    │       │   ├── quantization.mdx
    │       │   └── trainer.mdx
    │       ├── quickstart.mdx
    │       └── usage_guides
    │       │   ├── amdgpu.mdx
    │       │   ├── gpu.mdx
    │       │   ├── models.mdx
    │       │   ├── optimization.mdx
    │       │   ├── pipelines.mdx
    │       │   ├── quantization.mdx
    │       │   └── trainer.mdx
    │   ├── quicktour.mdx
    │   ├── torch_fx
    │       ├── concept_guides
    │       │   └── symbolic_tracer.mdx
    │       ├── overview.mdx
    │       ├── package_reference
    │       │   └── optimization.mdx
    │       └── usage_guides
    │       │   └── optimization.mdx
    │   └── utils
    │       ├── dummy_input_generators.mdx
    │       └── normalized_config.mdx
├── examples
    └── onnxruntime
    │   ├── optimization
    │       ├── multiple-choice
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   └── run_swag.py
    │       ├── question-answering
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   ├── run_qa.py
    │       │   ├── trainer_qa.py
    │       │   └── utils_qa.py
    │       ├── text-classification
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   └── run_glue.py
    │       └── token-classification
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   └── run_ner.py
    │   ├── quantization
    │       ├── image-classification
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   └── run_image_classification.py
    │       ├── multiple-choice
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   └── run_swag.py
    │       ├── question-answering
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   ├── run_qa.py
    │       │   ├── trainer_qa.py
    │       │   └── utils_qa.py
    │       ├── text-classification
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   └── run_glue.py
    │       └── token-classification
    │       │   ├── README.md
    │       │   ├── requirements.txt
    │       │   └── run_ner.py
    │   └── training
    │       ├── docker
    │           ├── Dockerfile-ort-nightly-cu118
    │           ├── Dockerfile-ort-nightly-rocm57
    │           └── Dockerfile-ort1.17.1-cu118
    │       ├── image-classification
    │           ├── README.md
    │           ├── requirements.txt
    │           └── run_image_classification.py
    │       ├── language-modeling
    │           ├── README.md
    │           ├── requirements.txt
    │           ├── run_clm.py
    │           └── run_mlm.py
    │       ├── question-answering
    │           ├── README.md
    │           ├── requirements.txt
    │           ├── run_qa.py
    │           ├── trainer_qa.py
    │           └── utils_qa.py
    │       ├── stable-diffusion
    │           └── text-to-image
    │           │   ├── README.md
    │           │   ├── requirements.txt
    │           │   └── train_text_to_image.py
    │       ├── summarization
    │           ├── README.md
    │           ├── requirements.txt
    │           └── run_summarization.py
    │       ├── text-classification
    │           ├── README.md
    │           ├── requirements.txt
    │           ├── run_classification.py
    │           ├── run_glue.py
    │           └── zero_stage_2.json
    │       ├── token-classification
    │           ├── README.md
    │           ├── requirements.txt
    │           └── run_ner.py
    │       └── translation
    │           ├── README.md
    │           ├── requirements.txt
    │           └── run_translation.py
├── notebooks
    └── README.md
├── optimum
    ├── bettertransformer
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── attention.py
    │   │   ├── base.py
    │   │   ├── decoder_models.py
    │   │   └── encoder_models.py
    │   └── transformation.py
    ├── commands
    │   ├── __init__.py
    │   ├── base.py
    │   ├── env.py
    │   ├── export
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── onnx.py
    │   │   └── tflite.py
    │   ├── optimum_cli.py
    │   └── register
    │   │   ├── README.md
    │   │   └── __init__.py
    ├── configuration_utils.py
    ├── conftest.py
    ├── exporters
    │   ├── __init__.py
    │   ├── base.py
    │   ├── error_utils.py
    │   ├── onnx
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── _traceable_cache.py
    │   │   ├── base.py
    │   │   ├── config.py
    │   │   ├── constants.py
    │   │   ├── convert.py
    │   │   ├── model_configs.py
    │   │   ├── model_patcher.py
    │   │   └── utils.py
    │   ├── tasks.py
    │   ├── tflite
    │   │   ├── __init__.py
    │   │   ├── __main__.py
    │   │   ├── base.py
    │   │   ├── config.py
    │   │   ├── convert.py
    │   │   └── model_configs.py
    │   └── utils.py
    ├── fx
    │   ├── __init__.py
    │   ├── optimization
    │   │   ├── __init__.py
    │   │   └── transformations.py
    │   ├── parallelization
    │   │   ├── __init__.py
    │   │   ├── api.py
    │   │   ├── core.py
    │   │   ├── decomp.py
    │   │   ├── distributed
    │   │   │   ├── __init__.py
    │   │   │   └── dist_ops.py
    │   │   ├── op_registry
    │   │   │   ├── __init__.py
    │   │   │   └── op_handlers.py
    │   │   ├── parallel_layers
    │   │   │   ├── __init__.py
    │   │   │   ├── embedding.py
    │   │   │   ├── linear.py
    │   │   │   └── loss.py
    │   │   ├── passes.py
    │   │   └── utils.py
    │   └── utils.py
    ├── gptq
    │   ├── __init__.py
    │   ├── constants.py
    │   ├── data.py
    │   ├── eval.py
    │   ├── quantizer.py
    │   └── utils.py
    ├── modeling_base.py
    ├── onnx
    │   ├── __init__.py
    │   ├── configuration.py
    │   ├── graph_transformations.py
    │   ├── modeling_seq2seq.py
    │   ├── transformations_utils.py
    │   └── utils.py
    ├── onnxruntime
    │   ├── __init__.py
    │   ├── base.py
    │   ├── configuration.py
    │   ├── constants.py
    │   ├── modeling_decoder.py
    │   ├── modeling_diffusion.py
    │   ├── modeling_ort.py
    │   ├── modeling_seq2seq.py
    │   ├── optimization.py
    │   ├── preprocessors
    │   │   ├── __init__.py
    │   │   ├── passes
    │   │   │   ├── __init__.py
    │   │   │   ├── excluders.py
    │   │   │   ├── fully_connected.py
    │   │   │   ├── gelu.py
    │   │   │   └── layernorm.py
    │   │   └── quantization.py
    │   ├── quantization.py
    │   ├── runs
    │   │   ├── __init__.py
    │   │   ├── calibrator.py
    │   │   └── utils.py
    │   ├── subpackage
    │   │   ├── __init__.py
    │   │   └── commands
    │   │   │   ├── __init__.py
    │   │   │   ├── base.py
    │   │   │   ├── optimize.py
    │   │   │   └── quantize.py
    │   ├── trainer.py
    │   ├── trainer_seq2seq.py
    │   ├── training_args.py
    │   ├── training_args_seq2seq.py
    │   └── utils.py
    ├── pipelines
    │   ├── __init__.py
    │   └── pipelines_base.py
    ├── quantization_base.py
    ├── runs_base.py
    ├── subpackages.py
    ├── utils
    │   ├── __init__.py
    │   ├── constant.py
    │   ├── doc.py
    │   ├── dummy_bettertransformer_objects.py
    │   ├── dummy_diffusers_objects.py
    │   ├── file_utils.py
    │   ├── import_utils.py
    │   ├── input_generators.py
    │   ├── logging.py
    │   ├── modeling_utils.py
    │   ├── normalized_config.py
    │   ├── preprocessing
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── image_classification.py
    │   │   ├── question_answering.py
    │   │   ├── task_processors_manager.py
    │   │   ├── text_classification.py
    │   │   └── token_classification.py
    │   ├── runs.py
    │   ├── save_utils.py
    │   └── testing_utils.py
    └── version.py
├── pyproject.toml
├── setup.cfg
├── setup.py
└── tests
    ├── README.md
    ├── __init__.py
    ├── assets
        └── onnx
        │   ├── config.json
        │   └── model.onnx
    ├── bettertransformer
        ├── Dockerfile_bettertransformer_gpu
        ├── test_audio.py
        ├── test_common.py
        ├── test_decoder.py
        ├── test_encoder.py
        ├── test_encoder_decoder.py
        ├── test_gpu.py
        ├── test_vision.py
        └── testing_utils.py
    ├── cli
        ├── cli_with_custom_command.py
        └── test_cli.py
    ├── common
        └── test_configuration_utils.py
    ├── exporters
        ├── __init__.py
        ├── common
        │   └── test_tasks_manager.py
        ├── onnx
        │   ├── __init__.py
        │   ├── test_export.py
        │   └── test_export_cli.py
        ├── tflite
        │   ├── __init__.py
        │   ├── test_export.py
        │   └── test_export_cli.py
        └── utils.py
    ├── fx
        ├── optimization
        │   └── test_transformations.py
        └── parallelization
        │   ├── dist_utils.py
        │   └── test_tensor_parallel.py
    ├── gptq
        └── test_quantization.py
    ├── onnx
        ├── test_onnx_export_custom_module.py
        └── test_onnx_graph_transformations.py
    ├── onnxruntime-training
        ├── ds_configs
        │   ├── ds_config_zero_stage_1.json
        │   ├── ds_config_zero_stage_2.json
        │   ├── ds_config_zero_stage_3.json
        │   └── ds_config_zero_stage_inifinity.json
        ├── test_examples.py
        └── test_trainer.py
    ├── onnxruntime
        ├── test_decoder.py
        ├── test_diffusion.py
        ├── test_modeling.py
        ├── test_optimization.py
        ├── test_quantization.py
        ├── test_timm.py
        ├── test_utils.py
        └── testing_utils.py
    ├── run_doctest.sh
    └── utils
        ├── documentation_tests.txt
        ├── prepare_for_doc_test.py
        ├── test_dummpy_input_generators.py
        └── test_task_processors.py


/.github/ISSUE_TEMPLATE/bug-report.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F41B Bug Report"
 2 | description: Submit a bug report to help us improve Optimum
 3 | labels: [ "bug" ]
 4 | body:
 5 |   - type: textarea
 6 |     id: system-info
 7 |     attributes:
 8 |       label: System Info
 9 |       description: Please share your system info with us.
10 |       render: shell
11 |       placeholder: optimum version, platform, python version, ...
12 |     validations:
13 |       required: true
14 | 
15 |   - type: textarea
16 |     id: who-can-help
17 |     attributes:
18 |       label: Who can help?
19 |       description: |
20 |         Your issue will be replied to more quickly if you can figure out the right person to tag with @
21 |         If you know how to use git blame, that is the easiest way, otherwise, here is a rough guide of **who to tag**.
22 |         Please tag fewer than 3 people.
23 | 
24 |         - Pipelines: `@philschmid`
25 |         - Export of transformers model to ONNX/TFLite: `@michaelbenayoun`
26 |         - ONNX Runtime: `@JingyaHuang`, `@echarlaix`
27 |         - Intel Neural Compressor: `@echarlaix`
28 |         - Habana: `@regisss`
29 | 
30 |       placeholder: "@Username ..."
31 | 
32 |   - type: checkboxes
33 |     id: information-scripts-examples
34 |     attributes:
35 |       label: Information
36 |       description: 'The problem arises when using:'
37 |       options:
38 |         - label: "The official example scripts"
39 |         - label: "My own modified scripts"
40 | 
41 |   - type: checkboxes
42 |     id: information-tasks
43 |     attributes:
44 |       label: Tasks
45 |       description: "The tasks I am working on are:"
46 |       options:
47 |         - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
48 |         - label: "My own task or dataset (give details below)"
49 | 
50 |   - type: textarea
51 |     id: reproduction
52 |     validations:
53 |       required: true
54 |     attributes:
55 |       label: Reproduction (minimal, reproducible, runnable)
56 |       description: |
57 |         Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
58 |         If you have code snippets, error messages, stack traces please provide them here as well.
59 |         Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
60 |         Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
61 |         Providing a **minimal**, **reproducible** reproduction using a **publicly available model** significantly increase the chances of a fix in a timely manner.
62 | 
63 |       placeholder: |
64 |         Providing a minimal, reproducible reproduction using a publicly available model significantly increase the chances of a fix in a timely manner.
65 | 
66 | 
67 |   - type: textarea
68 |     id: expected-behavior
69 |     validations:
70 |       required: true
71 |     attributes:
72 |       label: Expected behavior
73 |       description: "A clear and concise description of what you would expect to happen."
74 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | blank_issues_enabled: true
2 | version: 2.1
3 | contact_links:
4 |   - name: Website Related
5 |     url: https://github.com/huggingface/hub-docs/issues
6 |     about: Feature requests and bug reports related to the website
7 |   - name: Forum
8 |     url: https://discuss.huggingface.co/
9 |     about: General usage questions and community discussions


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-request.yml:
--------------------------------------------------------------------------------
 1 | name: "\U0001F680 Feature request"
 2 | description: Submit a proposal/request for a new optimum feature
 3 | labels: [ "feature" ]
 4 | body:
 5 |   - type: textarea
 6 |     id: feature-request
 7 |     validations:
 8 |       required: true
 9 |     attributes:
10 |       label: Feature request
11 |       description: |
12 |         A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist.
13 | 
14 |   - type: textarea
15 |     id: motivation
16 |     validations:
17 |       required: true
18 |     attributes:
19 |       label: Motivation
20 |       description: |
21 |         Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
22 |         
23 | 
24 |   - type: textarea
25 |     id: contribution
26 |     validations:
27 |       required: true
28 |     attributes:
29 |       label: Your contribution
30 |       description: |
31 |         Is there any way that you could help, e.g. by submitting a PR?
32 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | # What does this PR do?
 2 | 
 3 | <!--
 4 | Congratulations! You've made it this far! You're not quite done yet though.
 5 | 
 6 | Once merged, your PR is going to appear in the release notes with the title you set, so make sure it's a great title that fully reflects the extent of your awesome contribution.
 7 | 
 8 | Then, please replace this with a description of the change and which issue is fixed (if applicable). Please also include relevant motivation and context. List any dependencies (if any) that are required for this change.
 9 | 
10 | Once you're done, someone will review your PR shortly (see the section "Who can review?" below to tag some potential reviewers). They may suggest changes to make the code even better. If no one reviewed your PR after a week has passed, don't hesitate to post a new comment @-mentioning the same persons---sometimes notifications get lost.
11 | -->
12 | 
13 | <!-- Remove if not applicable -->
14 | 
15 | Fixes # (issue)
16 | 
17 | 
18 | ## Before submitting
19 | - [ ] This PR fixes a typo or improves the docs (you can dismiss the other checks if that's the case).
20 | - [ ] Did you make sure to update the documentation with your changes?
21 | - [ ] Did you write any new necessary tests?
22 | 
23 | ## Who can review?
24 | 
25 | <!--
26 | For faster review, we strongly recommend you to ping the following people:
27 | - ONNX / ONNX Runtime : @fxmarty, @echarlaix, @JingyaHuang, @michaelbenayoun
28 | - ONNX Runtime Training: @JingyaHuang
29 | - BetterTransformer: @fxmarty
30 | - GPTQ, quantization: @fxmarty, @SunMarc
31 | - TFLite export: @michaelbenayoun
32 | -->
33 | 


--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
 1 | name: Code Quality
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   build:
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: [3.9]
19 |         runs-on: [ubuntu-22.04]
20 | 
21 |     runs-on: ${{ matrix.runs-on }}
22 | 
23 |     steps:
24 |       - name: Checkout code
25 |         uses: actions/checkout@v4
26 | 
27 |       - name: Setup Python ${{ matrix.python-version }}
28 |         uses: actions/setup-python@v5
29 |         with:
30 |           python-version: ${{ matrix.python-version }}
31 | 
32 |       - name: Install dependencies
33 |         run: |
34 |           pip install --upgrade pip
35 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
36 |           pip install .[quality]
37 | 
38 |       - name: Check style with black
39 |         run: |
40 |           black --check .
41 | 
42 |       - name: Check style with ruff
43 |         run: |
44 |           ruff .
45 | 


--------------------------------------------------------------------------------
/.github/workflows/stale.yml:
--------------------------------------------------------------------------------
 1 | name: Close stale issues and PRs
 2 | 
 3 | on:
 4 |   schedule:
 5 |     - cron: '30 1 * * *'
 6 | 
 7 | permissions:
 8 |   issues: write
 9 |   pull-requests: write
10 | 
11 | jobs:
12 |   stale:
13 |     runs-on: ubuntu-latest
14 |     steps:
15 |       - uses: actions/stale@v8
16 |         with:
17 |           stale-issue-message: 'This issue has been marked as stale because it has been open for 30 days with no activity. This thread will be automatically closed in 5 days if no further activity occurs.'
18 |           stale-pr-message: 'This PR has been marked as stale because it has been open for 90 days with no activity. This thread will be automatically closed in 30 days if no further activity occurs.'
19 |           exempt-issue-labels: 'bug,exporters,good first issue,onnx,onnxruntime,quantization'
20 |           days-before-issue-stale: 30
21 |           days-before-issue-close: 5
22 |           days-before-pr-stale: 90
23 |           days-before-pr-close: 30
24 |           exempt-all-pr-assignees: true


--------------------------------------------------------------------------------
/.github/workflows/style_bot.yml:
--------------------------------------------------------------------------------
 1 | name: Style Bot
 2 | 
 3 | on:
 4 |   issue_comment:
 5 |     types: [created]
 6 | 
 7 | permissions:
 8 |   pull-requests: write
 9 | 
10 | jobs:
11 |   style:
12 |     uses: huggingface/huggingface_hub/.github/workflows/style-bot-action.yml@main
13 |     with:
14 |       python_quality_dependencies: "[quality]"
15 |       style_command_type: "style_only"
16 |     secrets:
17 |       bot_token: ${{ secrets.HF_STYLE_BOT_ACTION }}
18 | 


--------------------------------------------------------------------------------
/.github/workflows/test_bettertransformer.yml:
--------------------------------------------------------------------------------
 1 | name: BetterTransformer / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         runs-on: [ubuntu-22.04]
23 | 
24 |     runs-on: ${{ matrix.runs-on }}
25 | 
26 |     steps:
27 |       - name: Checkout code
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Setup Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 | 
35 |       - name: Install dependencies (stable pytorch)
36 |         run: |
37 |           pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
38 |           pip install .[tests] transformers==4.48.*
39 | 
40 |       - name: Test with pytest (stable pytorch)
41 |         run: |
42 |           pytest tests/bettertransformer -n auto -vvvvv
43 | 
44 |       - name: Install dependencies (nightly pytorch)
45 |         run: |
46 |           pip install --pre --upgrade torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
47 | 
48 |       - name: Test with pytest (nightly pytorch)
49 |         run: |
50 |           pytest tests/bettertransformer -n auto -vvvv
51 | 


--------------------------------------------------------------------------------
/.github/workflows/test_cli.yml:
--------------------------------------------------------------------------------
 1 | name: Optimum CLI / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         runs-on: [ubuntu-22.04, macos-13, windows-2022]
23 | 
24 |     runs-on: ${{ matrix.runs-on }}
25 | 
26 |     steps:
27 |       - name: Checkout code
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Setup Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           pip install --upgrade pip
38 |           pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
39 |           pip install .[tests,exporters,exporters-tf]
40 | 
41 |       - name: Test with pytest
42 |         run: |
43 |           pytest tests/cli -vvvv --durations=0
44 | 


--------------------------------------------------------------------------------
/.github/workflows/test_common.yml:
--------------------------------------------------------------------------------
 1 | name: Common / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         runs-on: [ubuntu-22.04, windows-2022, macos-14]
23 | 
24 |     runs-on: ${{ matrix.runs-on }}
25 | 
26 |     steps:
27 |       - name: Checkout code
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Setup Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           pip install --upgrade pip
38 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
39 |           pip install .[tests]
40 | 
41 |       - name: Test with pytest
42 |         run: |
43 |           pytest tests/common -vvvv --durations=0
44 |         env:
45 |           HUGGINGFACE_CO_STAGING: ${{ matrix.python-version == '3.9' && matrix.runs-on == 'ubuntu-22.04' }}
46 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_common.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters Common / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         runs-on: [ubuntu-22.04]
22 |         python-version: [3.9]
23 | 
24 |     runs-on: ${{ matrix.runs-on }}
25 | 
26 |     steps:
27 |       - name: Checkout code
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Setup Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           pip install --upgrade pip
38 |           pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
39 |           pip install .[tests,exporters,exporters-tf]
40 | 
41 |       - name: Test with pytest
42 |         run: |
43 |           pytest tests/exporters/common -vvvv --durations=0 -n auto
44 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_onnx.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters ONNX / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         runs-on: [ubuntu-22.04]
23 | 
24 |     runs-on: ${{ matrix.runs-on }}
25 | 
26 |     steps:
27 |       - name: Checkout code
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Setup Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           pip install --upgrade pip
38 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
39 |           pip install .[tests,exporters] diffusers
40 | 
41 |       - name: Test with pytest
42 |         run: |
43 |           pytest tests/exporters/onnx/test_export.py -vvvv --durations=0 -n auto
44 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_onnx_cli.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters ONNX CLI / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         os: [ubuntu-22.04]
23 | 
24 |     runs-on: ${{ matrix.os }}
25 | 
26 |     steps:
27 |       - name: Checkout repository
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Setup Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 | 
35 |       - name: Install dependencies
36 |         run: |
37 |           pip install --upgrade pip
38 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
39 |           pip install .[tests,exporters] diffusers
40 | 
41 |       - name: Test with pytest
42 |         run: |
43 |           pytest tests/exporters/onnx/test_export_cli.py -vvvv --durations=0 -n auto
44 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_tflite.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: [main]
 7 |   pull_request:
 8 |     branches: [main]
 9 |     types: [opened, synchronize, reopened, labeled, unlabeled]
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | env:
16 |   TRANSFORMERS_IS_CI: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.9]
24 |         runs-on: [ubuntu-22.04]
25 |         quantization_schema:
26 |           [
27 |             "not quantization",
28 |             float16_quantization,
29 |             int8_dynamic_quantization,
30 |             int8_quantization_with_custom_dataset,
31 |             int8_quantization_with_default_dataset,
32 |             int8x16_quantization_with_default_dataset,
33 |             full_int8_quantization_with_default_dataset,
34 |           ]
35 | 
36 |     runs-on: ${{ matrix.runs-on }}
37 | 
38 |     steps:
39 |       - name: Checkout code
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Setup Python ${{ matrix.python-version }}
43 |         uses: actions/setup-python@v5
44 |         with:
45 |           python-version: ${{ matrix.python-version }}
46 | 
47 |       - name: Install dependencies
48 |         run: |
49 |           pip install --upgrade pip
50 |           pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
51 |           pip install .[tests,exporters-tf]
52 | 
53 |       - name: Test with pytest
54 |         run: |
55 |           pytest tests/exporters/tflite/test_export.py -k "${{ matrix.quantization_schema }}" -vvvv --durations=0 -n auto
56 | 


--------------------------------------------------------------------------------
/.github/workflows/test_exporters_tflite_cli.yml:
--------------------------------------------------------------------------------
 1 | name: Exporters TFLite CLI / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: [main]
 7 |   pull_request:
 8 |     branches: [main]
 9 |     types: [opened, synchronize, reopened, labeled, unlabeled]
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | env:
16 |   TRANSFORMERS_IS_CI: true
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: [3.9]
24 |         runs-on: [ubuntu-22.04]
25 |         quantization_schema:
26 |           [
27 |             "not quantization",
28 |             float16_quantization,
29 |             int8_dynamic_quantization,
30 |             int8_quantization_with_custom_dataset,
31 |             int8_quantization_with_default_dataset,
32 |             int8x16_quantization_with_default_dataset,
33 |             full_int8_quantization_with_default_dataset,
34 |           ]
35 | 
36 |     runs-on: ${{ matrix.runs-on }}
37 | 
38 |     steps:
39 |       - name: Checkout code
40 |         uses: actions/checkout@v4
41 | 
42 |       - name: Setup Python ${{ matrix.python-version }}
43 |         uses: actions/setup-python@v5
44 |         with:
45 |           python-version: ${{ matrix.python-version }}
46 | 
47 |       - name: Install dependencies
48 |         run: |
49 |           pip install --upgrade pip
50 |           pip install --no-cache-dir torch==2.1.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
51 |           pip install .[tests,exporters-tf]
52 | 
53 |       - name: Test with pytest
54 |         run: |
55 |           pytest tests/exporters/tflite/test_export_cli.py -k "${{ matrix.quantization_schema }}" -vvvv --durations=0 -n auto
56 | 


--------------------------------------------------------------------------------
/.github/workflows/test_fx_automatic_parallelism.yml:
--------------------------------------------------------------------------------
 1 | name: FX Automatic Parallelism on GPU / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   # TODO: Uncomment when fixed
 6 |   # push:
 7 |   #   branches:
 8 |   #     - main
 9 |   #   paths:
10 |   #     - 'optimum/fx/parallelization/**.py'
11 |   # pull_request:
12 |   #   branches:
13 |   #     - main
14 |   #   paths:
15 |   #     - 'optimum/fx/parallelization/**.py'
16 | 
17 | concurrency:
18 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
19 |   cancel-in-progress: true
20 | 
21 | env:
22 |   TRANSFORMERS_IS_CI: true
23 | 
24 | jobs:
25 |   run_gpu_tests:
26 |     runs-on:
27 |       group: aws-g5-12xlarge-plus
28 | 
29 |     container:
30 |       image: nvidia/cuda:12.4.1-devel-ubuntu22.04
31 |       options: --mount type=tmpfs,destination=/tmp --shm-size 64gb --gpus all --ipc host -v /mnt/hf_cache:/mnt/cache/
32 |       env:
33 |         NCCL_DEBUG: INFO
34 |     defaults:
35 |       run:
36 |         shell: bash
37 | 
38 |     steps:
39 |       - name: Checkout optimum
40 |         uses: actions/checkout@v4
41 |         with:
42 |           fetch-depth: 1
43 | 
44 |       - uses: actions/setup-python@v5
45 |         with:
46 |           python-version: "3.10"
47 | 
48 |       - name: Run nvidia-smi
49 |         run: |
50 |           nvidia-smi
51 | 
52 |       - name: Install dependencies
53 |         run: |
54 |           pip install -U pip
55 |           pip install .[tests]
56 | 
57 |       - name: Run automatic model parallelism tests
58 |         run: |
59 |           pytest tests/fx/parallelization -s -v -o log_cli=true 
60 | 


--------------------------------------------------------------------------------
/.github/workflows/test_fx_optimization.yml:
--------------------------------------------------------------------------------
 1 | name: FX Optimization / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         os: [ubuntu-22.04]
23 | 
24 |     runs-on: ${{ matrix.os }}
25 |     steps:
26 |       - name: Checkout code
27 |         uses: actions/checkout@v4
28 | 
29 |       - name: Setup Python ${{ matrix.python-version }}
30 |         uses: actions/setup-python@v5
31 |         with:
32 |           python-version: ${{ matrix.python-version }}
33 | 
34 |       - name: Install dependencies
35 |         run: |
36 |           pip install --upgrade pip
37 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
38 |           pip install .[tests]
39 | 
40 |       - name: Test with pytest
41 |         run: |
42 |           pytest tests/fx/optimization -n auto -vvvv
43 | 


--------------------------------------------------------------------------------
/.github/workflows/test_gptq.yml:
--------------------------------------------------------------------------------
 1 | name: GPTQ / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: [main]
 7 |     paths:
 8 |       - tests/gptq/**
 9 |       - optimum/gptq/**
10 |       - .github/workflows/test_gptq.yml
11 |   pull_request:
12 |     branches: [main]
13 |     paths:
14 |       - tests/gptq/**
15 |       - optimum/gptq/**
16 |       - .github/workflows/test_gptq.yml
17 |   schedule:
18 |     # every day at midnight
19 |     - cron: "0 0 * * *"
20 | 
21 | jobs:
22 |   test_gptq:
23 |     runs-on:
24 |       group: aws-g6-4xlarge-plus
25 | 
26 |     steps:
27 |       - name: Checkout code
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Run tests
31 |         uses: addnab/docker-run-action@v3
32 |         with:
33 |           image: pytorch/pytorch:2.2.2-cuda12.1-cudnn8-runtime
34 |           # latest auto-gptq was built with pytorch 2.2 and cuda 12.1
35 |           options: |
36 |             --rm
37 |             --gpus all
38 |             --shm-size 16G
39 |             --env RUN_SLOW=1
40 |             --env HF_HOME=/mnt/cache/
41 |             --volume /mnt/cache/:/mnt/cache/
42 |             --volume ${{ github.workspace }}:/workspace
43 |             --workdir /workspace
44 |           run: |
45 |             pip install auto-gptq
46 |             pip install -e .[tests]
47 |             pytest tests/gptq -s -vvvv --durations=0
48 | 


--------------------------------------------------------------------------------
/.github/workflows/test_offline.yml:
--------------------------------------------------------------------------------
 1 | name: Offline usage / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         runs-on: [ubuntu-22.04]
23 | 
24 |     runs-on: ${{ matrix.runs-on }}
25 | 
26 |     steps:
27 |       - name: Checkout code
28 |         uses: actions/checkout@v4
29 | 
30 |       - name: Setup Python ${{ matrix.python-version }}
31 |         uses: actions/setup-python@v5
32 |         with:
33 |           python-version: ${{ matrix.python-version }}
34 | 
35 |       - name: Install dependencies for pytorch export
36 |         run: |
37 |           pip install .[tests,exporters,onnxruntime]
38 | 
39 |       - name: Test with pytest
40 |         run: |
41 |           HF_HOME=/tmp/ huggingface-cli download hf-internal-testing/tiny-random-gpt2
42 | 
43 |           HF_HOME=/tmp/ HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation
44 | 
45 |           huggingface-cli download hf-internal-testing/tiny-random-gpt2
46 | 
47 |           HF_HUB_OFFLINE=1 optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 gpt2_onnx --task text-generation
48 | 
49 |           pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv
50 | 
51 |           HF_HUB_OFFLINE=1 pytest tests/onnxruntime/test_modeling.py -k "test_load_model_from_hub and not from_hub_onnx" -s -vvvvv
52 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnx.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         python-version: [3.9]
22 |         runs-on: [ubuntu-22.04]
23 | 
24 |     runs-on: ${{ matrix.runs-on }}
25 | 
26 |     steps:
27 |     - name: Checkout code
28 |       uses: actions/checkout@v4
29 | 
30 |     - name: Setup Python ${{ matrix.python-version }}
31 |       uses: actions/setup-python@v5
32 |       with:
33 |         python-version: ${{ matrix.python-version }}
34 | 
35 |     - name: Install dependencies
36 |       run: |
37 |         pip install --upgrade pip
38 |         pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
39 |         pip install .[tests,exporters] diffusers
40 |   
41 |     - name: Test with pytest
42 |       run: |
43 |         pytest tests/onnx -n auto -vvvv --durations=0
44 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnxruntime.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX Runtime / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   push:
 6 |     branches: [main]
 7 |   pull_request:
 8 |     branches: [main]
 9 |     types:
10 |       - opened
11 |       - labeled
12 |       - reopened
13 |       - unlabeled
14 |       - synchronize
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
18 |   cancel-in-progress: true
19 | 
20 | env:
21 |   TRANSFORMERS_IS_CI: true
22 | 
23 | jobs:
24 |   build:
25 |     strategy:
26 |       fail-fast: false
27 |       matrix:
28 |         python-version: [3.9]
29 |         runs-on: [ubuntu-22.04]
30 |         test_file:
31 |           [
32 |             test_timm.py,
33 |             test_decoder.py,
34 |             test_modeling.py,
35 |             test_diffusion.py,
36 |             test_optimization.py,
37 |             test_quantization.py,
38 |             test_utils.py,
39 |           ]
40 | 
41 |     runs-on: ${{ matrix.runs-on }}
42 | 
43 |     steps:
44 |       - name: Free Disk Space (Ubuntu)
45 |         if: matrix.test_file == 'test_modeling.py'
46 |         uses: jlumbroso/free-disk-space@main
47 | 
48 |       - name: Checkout code
49 |         uses: actions/checkout@v4
50 | 
51 |       - name: Setup Python ${{ matrix.python-version }}
52 |         uses: actions/setup-python@v5
53 |         with:
54 |           python-version: ${{ matrix.python-version }}
55 | 
56 |       - name: Install dependencies
57 |         run: |
58 |           pip install --upgrade pip
59 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
60 |           pip install .[tests,onnxruntime] diffusers
61 | 
62 |       - name: Test with pytest (in series)
63 |         if: matrix.test_file == 'test_modeling.py'
64 |         run: |
65 |           pytest tests/onnxruntime/test_modeling.py -m "run_in_series" --durations=0 -vvvv
66 | 
67 |       - name: Test with pytest (in parallel)
68 |         run: |
69 |           pytest tests/onnxruntime/${{ matrix.test_file }} -m "not run_in_series" --durations=0 -vvvv -n auto
70 |         env:
71 |           HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
72 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnxruntime_gpu.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX Runtime GPU / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 7 * * * # every day at 7am UTC
 7 |   pull_request:
 8 |     branches: [main]
 9 |     types:
10 |       - opened
11 |       - labeled
12 |       - reopened
13 |       - unlabeled
14 |       - synchronize
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
18 |   cancel-in-progress: true
19 | 
20 | jobs:
21 |   build:
22 |     if: ${{
23 |       (github.event_name == 'push') ||
24 |       (github.event_name == 'workflow_dispatch') ||
25 |       contains(github.event.pull_request.labels.*.name, 'gpu') ||
26 |       contains(github.event.pull_request.labels.*.name, 'onnxruntime-gpu')
27 |       }}
28 | 
29 |     runs-on:
30 |       group: aws-g6-4xlarge-plus
31 | 
32 |     container:
33 |       image: nvcr.io/nvidia/tensorrt:24.12-py3
34 |       options: --gpus all
35 | 
36 |     steps:
37 |       - name: Checkout
38 |         uses: actions/checkout@v4
39 | 
40 |       - name: Setup Python
41 |         uses: actions/setup-python@v5
42 |         with:
43 |           python-version: "3.9"
44 | 
45 |       - name: Install dependencies
46 |         run: |
47 |           pip install --upgrade pip
48 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124
49 |           pip install .[tests,onnxruntime-gpu] diffusers
50 | 
51 |       - name: Test with pytest
52 |         run: |
53 |           pytest tests/onnxruntime -m "cuda_ep_test or trt_ep_test" --durations=0 -vvvv -n auto
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnxruntime_slow.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX Runtime Slow / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 7 * * * # every day at 7am UTC
 7 |   pull_request:
 8 |     branches: [main]
 9 |     types:
10 |       - opened
11 |       - labeled
12 |       - reopened
13 |       - unlabeled
14 |       - synchronize
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
18 |   cancel-in-progress: true
19 | 
20 | env:
21 |   TRANSFORMERS_IS_CI: true
22 | 
23 | jobs:
24 |   build:
25 |     if: ${{
26 |       (github.event_name == 'push') ||
27 |       (github.event_name == 'schedule') ||
28 |       (github.event_name == 'workflow_dispatch') ||
29 |       contains(github.event.pull_request.labels.*.name, 'slow') ||
30 |       contains(github.event.pull_request.labels.*.name, 'onnxruntime-slow')
31 |       }}
32 | 
33 |     strategy:
34 |       fail-fast: false
35 |       matrix:
36 |         python-version: [3.9]
37 |         transformers-version: [latest]
38 |         runs-on: [ubuntu-22.04, windows-2022]
39 |         include:
40 |           - {python-version: 3.9, transformers-version: 4.36.*, runs-on: ubuntu-22.04}
41 |           - {python-version: 3.9, transformers-version: 4.45.*, runs-on: ubuntu-22.04}
42 | 
43 |     runs-on: ${{ matrix.runs-on }}
44 | 
45 |     steps:
46 |       - name: Free Disk Space (Ubuntu)
47 |         if: matrix.runs-on == 'ubuntu-22.04'
48 |         uses: jlumbroso/free-disk-space@main
49 | 
50 |       - name: Free Disk Space (macOS)
51 |         if: matrix.runs-on == 'macos-15'
52 |         run: |
53 |           sudo rm -rf /Library/Developer/Xcode/DerivedData/*
54 |           sudo rm -rf ~/Library/Developer/Xcode/Archives/*
55 |           sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode/*
56 |           sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode.SimulatorKit/*
57 | 
58 |       - name: Checkout
59 |         uses: actions/checkout@v4
60 | 
61 |       - name: Setup Python ${{ matrix.python-version }}
62 |         uses: actions/setup-python@v5
63 |         with:
64 |           python-version: ${{ matrix.python-version }}
65 | 
66 |       - name: Install dependencies
67 |         run: |
68 |           pip install --upgrade pip
69 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
70 |           pip install .[tests,onnxruntime] diffusers
71 | 
72 |       - name: Install transformers ${{ matrix.transformers-version }}
73 |         if: ${{ matrix.transformers-version == '4.36.*' }}
74 |         run: |
75 |           pip install "transformers==${{ matrix.transformers-version }}" "diffusers<0.32.0"
76 | 
77 |       - name: Install transformers ${{ matrix.transformers-version }}
78 |         if: ${{ matrix.transformers-version == '4.45.*' }}
79 |         run: |
80 |           pip install "transformers==${{ matrix.transformers-version }}" "diffusers<0.33.0"
81 |   
82 |       - name: Test with pytest (in series)
83 |         run: |
84 |           pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv
85 |         env:
86 |           RUN_SLOW: 1
87 |   
88 |       - name: Test with pytest (in parallel)
89 |         run: |
90 |           pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -n auto
91 |         env:
92 |           HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}
93 |           RUN_SLOW: 1
94 | 


--------------------------------------------------------------------------------
/.github/workflows/test_onnxruntime_training.yml:
--------------------------------------------------------------------------------
 1 | name: ONNX Runtime Training / Python - Test
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |   schedule:
 6 |     - cron: 0 7 * * * # every day at 7am UTC
 7 |   pull_request:
 8 |     branches: [main]
 9 |     types:
10 |       - opened
11 |       - labeled
12 |       - reopened
13 |       - unlabeled
14 |       - synchronize
15 | 
16 | concurrency:
17 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
18 |   cancel-in-progress: true
19 | 
20 | jobs:
21 |   build:
22 |     if: ${{
23 |       (github.event_name == 'push') ||
24 |       (github.event_name == 'workflow_dispatch') ||
25 |       contains( github.event.pull_request.labels.*.name, 'training') ||
26 |       contains( github.event.pull_request.labels.*.name, 'onnxruntime-training')
27 |       }}
28 | 
29 |     runs-on:
30 |       group: aws-g6-4xlarge-plus
31 | 
32 |     container:
33 |       image: nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
34 |       options: --gpus all
35 | 
36 |     steps:
37 |       - name: Checkout
38 |         uses: actions/checkout@v4
39 | 
40 |       - name: Setup Python
41 |         uses: actions/setup-python@v5
42 |         with:
43 |           python-version: "3.9"
44 | 
45 |       - name: Install dependencies
46 |         env:
47 |           TORCH_CUDA_ARCH_LIST: "5.0 6.0 7.0 7.5 8.0 8.6 9.0+PTX"
48 |         run: |
49 |           pip install --upgrade pip
50 |           pip install --no-cache-dir "torch<2.6" torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
51 |           pip install --no-cache-dir torch-ort onnxruntime-training && python -m torch_ort.configure
52 |           pip install --no-cache-dir evaluate absl-py rouge_score seqeval sacrebleu nltk scikit-learn
53 |           pip install .[tests,onnxruntime-training]
54 | 
55 |       - name: Test with pytest (trainer)
56 |         run: |
57 |           RUN_SLOW=1 pytest tests/onnxruntime-training/test_trainer.py --durations=0 -vvvv
58 |         env:
59 |           HF_DATASETS_TRUST_REMOTE_CODE: 1
60 | 
61 |       - name: Test with pytest (examples)
62 |         run: |
63 |           RUN_SLOW=1 pytest tests/onnxruntime-training/test_examples.py --durations=0 -vvvv
64 |         env:
65 |           HF_DATASETS_TRUST_REMOTE_CODE: 1
66 | 


--------------------------------------------------------------------------------
/.github/workflows/test_utils.yml:
--------------------------------------------------------------------------------
 1 | name: Utils / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | env:
14 |   TRANSFORMERS_IS_CI: true
15 | 
16 | jobs:
17 |   build:
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         os: [ubuntu-22.04, macos-13, windows-2022]
22 |         python-version: ["3.9"]
23 | 
24 |     runs-on: ${{ matrix.os }}
25 |     steps:
26 |       - name: Checkout code
27 |         uses: actions/checkout@v4
28 | 
29 |       - name: Setup Python ${{ matrix.python-version }}
30 |         uses: actions/setup-python@v5
31 |         with:
32 |           python-version: ${{ matrix.python-version }}
33 | 
34 |       - name: Install dependencies
35 |         run: |
36 |           pip install --upgrade pip
37 |           pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
38 |           pip install .[tests]
39 | 
40 |       - name: Tests needing datasets
41 |         run: |
42 |           pytest tests/utils -n auto -vvvv --durations=0


--------------------------------------------------------------------------------
/.github/workflows/trufflehog.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 | 
 4 | name: Secret Leaks
 5 | 
 6 | jobs:
 7 |   trufflehog:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |     - name: Checkout code
11 |       uses: actions/checkout@v4
12 |       with:
13 |         fetch-depth: 0
14 |     - name: Secret Scanning
15 |       uses: trufflesecurity/trufflehog@main
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Upload PR Documentation
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Build PR documentation"]
 6 |     types:
 7 |       - completed
 8 | 
 9 | jobs:
10 |   build:
11 |     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12 |     with:
13 |       package_name: optimum
14 |     secrets:
15 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16 |       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.DS_Store
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | # Models
133 | *.onnx
134 | # include small test model for tests
135 | !tests/assets/onnx/model.onnx
136 | 
137 | .vscode


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # How to contribute to Optimum?
 3 | 
 4 | Optimum is an open source project, so all contributions and suggestions are welcome.
 5 | 
 6 | You can contribute in many different ways: giving ideas, answering questions, reporting bugs, proposing enhancements, improving the documentation, fixing bugs,...
 7 | 
 8 | Many thanks in advance to every contributor.
 9 | 
10 | ## How to work on an open Issue?
11 | You have the list of open Issues at: https://github.com/huggingface/optimum/issues
12 | 
13 | Some of them may have the label `help wanted`: that means that any contributor is welcomed!
14 | 
15 | If you would like to work on any of the open Issues:
16 | 
17 | 1. Make sure it is not already assigned to someone else. You have the assignee (if any) on the top of the right column of the Issue page.
18 | 
19 | 2. You can self-assign it by commenting on the Issue page with one of the keywords: `#take` or `#self-assign`.
20 | 
21 | 3. Work on your self-assigned issue and eventually create a Pull Request.
22 | 
23 | ## How to create a Pull Request?
24 | 1. Fork the [repository](https://github.com/huggingface/optimum) by clicking on the 'Fork' button on the repository's page. This creates a copy of the code under your GitHub user account.
25 | 
26 | 2. Clone your fork to your local disk, and add the base repository as a remote:
27 | 
28 | 	```bash
29 | 	git clone git@github.com:<your Github handle>/optimum.git
30 | 	cd optimum
31 | 	git remote add upstream https://github.com/huggingface/optimum.git
32 | 	```
33 | 
34 | 3. Create a new branch to hold your development changes:
35 | 
36 | 	```bash
37 | 	git checkout -b a-descriptive-name-for-my-changes
38 | 	```
39 | 
40 | 	**do not** work on the `main` branch.
41 | 
42 | 4. Set up a development environment by running the following command in a virtual environment:
43 | 
44 | 	```bash
45 | 	pip install -e ".[dev]"
46 | 	```
47 | 
48 |    (If optimum was already installed in the virtual environment, remove
49 |    it with `pip uninstall optimum` before reinstalling it in editable
50 |    mode with the `-e` flag.)
51 | 
52 | 5. Develop the features on your branch.
53 | 
54 | 6. Format your code. Run black and ruff so that your newly added files look nice with the following command:
55 | 
56 | 	```bash
57 | 	make style
58 | 	```
59 | 
60 | 7.  Once you're happy with your changes, add the changed files using `git add` and make a commit with `git commit` to record your changes locally:
61 | 
62 | 	```bash
63 | 	git add modified_file.py
64 | 	git commit
65 | 	```
66 | 
67 | 	It is a good idea to sync your copy of the code with the original
68 | 	repository regularly. This way you can quickly account for changes:
69 | 
70 | 	```bash
71 | 	git fetch upstream
72 | 	git rebase upstream/main
73 |     ```
74 | 
75 |    Push the changes to your account using:
76 | 
77 |    ```bash
78 |    git push -u origin a-descriptive-name-for-my-changes
79 |    ```
80 | 
81 | 8. Once you are satisfied, go the webpage of your fork on GitHub. Click on "Pull request" to send your to the project maintainers for review.
82 | 
83 | ## Code of conduct
84 | 
85 | This project adheres to the HuggingFace [code of conduct](CODE_OF_CONDUCT.md).
86 | By participating, you are expected to uphold this code.
87 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | include README.md
16 | include LICENSE
17 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | SHELL := /bin/bash
16 | CURRENT_DIR = $(shell pwd)
17 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum.git
18 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL
19 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
20 | 
21 | .PHONY:	style test
22 | 
23 | # Run code quality checks
24 | style_check:
25 | 	black --check .
26 | 	ruff check .
27 | 
28 | style:
29 | 	black .
30 | 	ruff check . --fix
31 | 
32 | # Run tests for the library
33 | test:
34 | 	python -m pytest tests
35 | 
36 | # Utilities to release to PyPi
37 | build_dist_install_tools:
38 | 	pip install build
39 | 	pip install twine
40 | 
41 | build_dist:
42 | 	rm -fr build
43 | 	rm -fr dist
44 | 	python -m build
45 | 
46 | pypi_upload: build_dist
47 | 	python -m twine upload dist/*
48 | 
49 | build_doc_docker_image:
50 | 	docker build -t doc_maker --build-arg commit_sha=$(COMMIT_SHA_OPTIMUM) --build-arg clone_url=$(REAL_CLONE_URL) ./docs
51 | 
52 | doc: build_doc_docker_image
53 | 	@test -n "$(BUILD_DIR)" || (echo "BUILD_DIR is empty." ; exit 1)
54 | 	@test -n "$(VERSION)" || (echo "VERSION is empty." ; exit 1)
55 | 	docker run -v $(CURRENT_DIR):/doc_folder --workdir=/doc_folder doc_maker \
56 | 	doc-builder build optimum /optimum/docs/source/ \
57 | 		--build_dir $(BUILD_DIR) \
58 | 		--version $(VERSION) \
59 | 		--version_tag_suffix "" \
60 | 		--html \
61 | 		--clean
62 | 


--------------------------------------------------------------------------------
/docs/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nikolaik/python-nodejs:python3.11-nodejs23
 2 | 
 3 | ARG commit_sha
 4 | ARG clone_url
 5 | 
 6 | RUN apt -y update
 7 | RUN python3 -m pip install --no-cache-dir --upgrade pip
 8 | RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder.git
 9 | 
10 | RUN git clone $clone_url && cd optimum && git checkout $commit_sha
11 | RUN python3 -m pip install --no-cache-dir ./optimum[onnxruntime,benchmark,quality,doc-build,diffusers]
12 | 


--------------------------------------------------------------------------------
/docs/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 | 
18 | import doctest
19 | import sys
20 | from os.path import abspath, dirname, join
21 | 
22 | 
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(__file__), "src"))
26 | sys.path.insert(1, git_repo_path)
27 | 
28 | # Doctest custom flag to ignore output.
29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
30 | 
31 | OutputChecker = doctest.OutputChecker
32 | 
33 | 
34 | class CustomOutputChecker(OutputChecker):
35 |     def check_output(self, want, got, optionflags):
36 |         if IGNORE_RESULT & optionflags:
37 |             return True
38 |         return OutputChecker.check_output(self, want, got, optionflags)
39 | 
40 | 
41 | doctest.OutputChecker = CustomOutputChecker
42 | 


--------------------------------------------------------------------------------
/docs/source/_redirects.yml:
--------------------------------------------------------------------------------
 1 | # Optimum Graphcore
 2 | graphcore_index: graphcore/index
 3 | graphcore_quickstart: graphcore/quickstart
 4 | graphcore_ipu_config: graphcore/ipu_config
 5 | graphcore_trainer: graphcore/trainer
 6 | graphcore_add_support_for_new_model: graphcore/add_support_for_new_model
 7 | 
 8 | # Optimum Habana
 9 | habana_index: habana/index
10 | habana_quickstart: habana/quickstart
11 | habana_single_hpu: habana/tutorials/single_hpu
12 | habana_distributed: habana/tutorials/distributed
13 | habana_deepspeed: habana/usage_guides/deepspeed
14 | habana_accelerate_training: habana/usage_guides/accelerate_training
15 | habana_trainer: habana/package_reference/trainer
16 | habana_gaudi_config: habana/package_reference/gaudi_config
17 | habana/usage_guides/stable_diffusion: habana/tutorials/stable_diffusion
18 | habana/tutorials/pretraining: habana/usage_guides/pretraining
19 | 
20 | # Optimum Intel
21 | intel_index: intel/index
22 | intel_quickstart: intel/index
23 | intel_configuration: intel/neural_compressor/reference
24 | intel_optimization: intel/neural_compressor/optimization
25 | intel_quantization: intel/neural_compressor/optimization
26 | intel_pruning: intel/neural_compressor/optimization
27 | intel_trainer: intel/neural_compressor/reference
28 | intel/inference: intel/openvino/inference
29 | intel/optimization_ov: intel/openvino/optimization
30 | intel/reference_ov: intel/openvino/reference
31 | intel/optimization_inc: intel/neural_compressor/optimization
32 | intel/distributed_training: intel/neural_compressor/distributed_training
33 | intel/reference_inc: intel/neural_compressor/reference
34 | 
35 | # Optimum Neuron
36 | docs/optimum-neuron/index: /docs/optimum-neuron/index
37 | 
38 | # Optimum TPU
39 | docs/optimum-tpu/index: /docs/optimum-tpu/index
40 | tpu/index: /docs/optimum-tpu/index
41 | 
42 | # Optimum ExecuTorch
43 | docs/optimum-executorch/index: /docs/optimum-executorch/index
44 | 


--------------------------------------------------------------------------------
/docs/source/exporters/onnx/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration classes for ONNX exports
14 | 
15 | Exporting a model to ONNX involves specifying:
16 | 1. The input names.
17 | 2. The output names.
18 | 3. The dynamic axes. These refer to the input dimensions can be changed dynamically at runtime (e.g. a batch size or sequence length).
19 | All other axes will be treated as static, and hence fixed at runtime.
20 | 4. Dummy inputs to trace the model. This is needed in PyTorch to record the computational graph and convert it to ONNX.
21 | 
22 | Since this data depends on the choice of model and task, we represent it in terms of _configuration classes_. Each configuration class is associated with
23 | a specific model architecture, and follows the naming convention `ArchitectureNameOnnxConfig`. For instance, the configuration which specifies the ONNX
24 | export of BERT models is `BertOnnxConfig`.
25 | 
26 | Since many architectures share similar properties for their ONNX configuration, 🤗 Optimum adopts a 3-level class hierarchy:
27 | 1. Abstract and generic base classes. These handle all the fundamental features, while being agnostic to the modality (text, image, audio, etc).
28 | 2. Middle-end classes. These are aware of the modality, but multiple can exist for the same modality depending on the inputs they support.
29 | They specify which input generators should be used for the dummy inputs, but remain model-agnostic.
30 | 3. Model-specific classes like the `BertOnnxConfig` mentioned above. These are the ones actually used to export models.
31 | 
32 | 
33 | ## Base classes
34 | 
35 | [[autodoc]] exporters.onnx.OnnxConfig
36 |     - inputs
37 |     - outputs
38 |     - generate_dummy_inputs
39 | 
40 | [[autodoc]] exporters.onnx.OnnxConfigWithPast
41 |     - add_past_key_values
42 | 
43 | [[autodoc]] exporters.onnx.OnnxSeq2SeqConfigWithPast
44 | 
45 | ## Middle-end classes
46 | 
47 | ### Text
48 | 
49 | [[autodoc]] exporters.onnx.config.TextEncoderOnnxConfig
50 | 
51 | [[autodoc]] exporters.onnx.config.TextDecoderOnnxConfig
52 | 
53 | [[autodoc]] exporters.onnx.config.TextSeq2SeqOnnxConfig
54 | 
55 | 
56 | ### Vision
57 | 
58 | [[autodoc]] exporters.onnx.config.VisionOnnxConfig
59 | 
60 | 
61 | ### Multi-modal
62 | 
63 | [[autodoc]] exporters.onnx.config.TextAndVisionOnnxConfig
64 | 


--------------------------------------------------------------------------------
/docs/source/exporters/onnx/package_reference/export.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Export functions
14 | 
15 | You can export models to ONNX from two frameworks in 🤗 Optimum: PyTorch and TensorFlow. There is an export function for each of these frameworks, [`~optimum.exporters.onnx.convert.export_pytorch`] and [`~optimum.exporters.onnx.convert.export_tensorflow`], but the recommended way of using those is via the main export function [`~optimum.exporters.main_export`], which will take care of using the proper exporting function according to the available framework, check that the exported model is valid, and provide extended options to run optimizations on the exported model.
16 | 
17 | ## Main functions
18 | 
19 | [[autodoc]] exporters.onnx.main_export
20 | 
21 | [[autodoc]] exporters.onnx.onnx_export_from_model
22 | 
23 | [[autodoc]] exporters.onnx.convert.export
24 | 
25 | [[autodoc]] exporters.onnx.convert.export_pytorch
26 | 
27 | [[autodoc]] exporters.onnx.convert.export_tensorflow
28 | 
29 | 
30 | ## Utility functions
31 | 
32 | [[autodoc]] exporters.onnx.convert.check_dummy_inputs_are_allowed
33 | 
34 | [[autodoc]] exporters.onnx.convert.validate_model_outputs
35 | 


--------------------------------------------------------------------------------
/docs/source/exporters/overview.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Overview
14 | 
15 | 🤗 Optimum enables exporting models from PyTorch or TensorFlow to different formats through its `exporters` module. For now, three exporting format are supported: ONNX and TFLite (TensorFlow Lite).
16 | 


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/overview.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Overview
14 | 
15 | 🤗 Optimum handles the export of TensorFlow models to TFLite in the `exporters.tflite` module. In addition, models hosted on the Hugging Face Hub with PyTorch weights but having a TensorFlow implementation will also be supported in the export thanks to Transformers' [TFPreTrainedModel.from_pretrained()](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.TFPreTrainedModel.from_pretrained) auto-conversion to TensorFlow.
16 | 
17 | The TFLite export support provides classes, functions and a command line interface to export a model easily.
18 | 
19 | Supported architectures:
20 | 
21 | - Albert
22 | - BERT
23 | - Camembert
24 | - ConvBert
25 | - Deberta
26 | - Deberta V2
27 | - DistilBert
28 | - Electra
29 | - Flaubert
30 | - MobileBert
31 | - MPNet
32 | - ResNet
33 | - Roberta
34 | - RoFormer
35 | - XLM
36 | - XLMRoberta
37 | 


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration classes for TFLite export
14 | 
15 | ## Base classes
16 | 
17 | [[autodoc]] exporters.tflite.TFLiteConfig
18 |     - inputs
19 |     - outputs
20 |     - generate_dummy_inputs
21 | 
22 | ## Middle-end classes
23 | 
24 | [[autodoc]] exporters.tflite.config.TextEncoderTFliteConfig
25 | 
26 | [[autodoc]] exporters.tflite.config.VisionTFLiteConfig
27 | 


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/package_reference/export.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Export functions
14 | 
15 | ## Main functions
16 | 
17 | [[autodoc]] exporters.tflite.convert.export
18 | 
19 | ## Utility functions
20 | 
21 | [[autodoc]] exporters.tflite.convert.validate_model_outputs
22 | 


--------------------------------------------------------------------------------
/docs/source/exporters/tflite/usage_guides/contribute.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Adding support for an unsupported architecture
14 | 
15 | If you wish to export a model whose architecture is not already supported by the library, the PR [#813 Adds support for ResNet](https://github.com/huggingface/optimum/pull/813 ) can be used as a reference.
16 | 
17 | You can make sure tests pass for the new `my_new_modeltype` model type by running:
18 | 
19 | ```bash
20 | pytest tests/exporters/tflite/test_*.py -k "my_new_modeltype" -s --exitfirst
21 | ```
22 | 


--------------------------------------------------------------------------------
/docs/source/furiosa_overview.mdx:
--------------------------------------------------------------------------------
1 | # 🤗 Optimum Furiosa
2 | 
3 | Find more information about 🤗 Optimum Furiosa [here](https://github.com/huggingface/optimum-furiosa).
4 | 


--------------------------------------------------------------------------------
/docs/source/notebooks.md:
--------------------------------------------------------------------------------
1 | ../../notebooks/README.md


--------------------------------------------------------------------------------
/docs/source/nvidia_overview.mdx:
--------------------------------------------------------------------------------
1 | # 🤗 Optimum Nvidia
2 | 
3 | Find more information about 🤗 Optimum Nvidia [here](https://github.com/huggingface/optimum-nvidia).
4 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/concept_guides/onnx.mdx:
--------------------------------------------------------------------------------
 1 | # ONNX 🤝 ONNX Runtime
 2 | 
 3 | ONNX is an open standard that defines a common set of operators and a common file format to represent deep learning models in a wide variety of frameworks, including PyTorch and TensorFlow. When a model is exported to the ONNX format, these operators are used to construct a computational graph (often called an _intermediate representation_) that represents the flow of data through the neural network.
 4 | 
 5 | <Tip>
 6 | 
 7 | You can use [Netron](https://netron.app/) to visualize any ONNX file on the Hugging Face Hub. Simply append append the file's URL to `http://netron.app?url=` as in [this example](https://netron.app/?url=https://huggingface.co/cmarkea/distilcamembert-base-ner/blob/main/model.onnx)
 8 | 
 9 | </Tip>
10 | 
11 | By exposing a graph with standardized operators and data types, ONNX makes it easy to switch between frameworks. For example, a model trained in PyTorch can be exported to ONNX format and then imported in TensorFlow (and vice versa).
12 | 
13 | Where ONNX really shines is when it is coupled with a dedicated accelerator like ONNX Runtime, or ORT for short. ORT provides tools to optimize the ONNX graph through techniques like operator fusion and constant folding, and defines an interface to execution providers that allow you to run the model on different types of hardware.


--------------------------------------------------------------------------------
/docs/source/onnxruntime/overview.mdx:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | 🤗 Optimum provides an integration with ONNX Runtime, a cross-platform, high performance engine for Open Neural Network Exchange (ONNX) models.
 4 | 
 5 | <div class="mt-10">
 6 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
 7 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./usage_guides/pipelines"
 8 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
 9 |       <p class="text-gray-700">Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum to solve real-world problems.</p>
10 |     </a>
11 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./concept_guides/onnx"
12 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
13 |       <p class="text-gray-700">High-level explanations for building a better understanding about important topics such as quantization and graph optimization.</p>
14 |    </a>
15 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./package_reference/modeling_ort"
16 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
17 |       <p class="text-gray-700">Technical descriptions of how the ONNX Runtime classes and methods of 🤗 Optimum work.</p>
18 |     </a>
19 |   </div>
20 | </div>
21 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration
14 | 
15 | The configuration classes are the way to specify how a task should be done. There are two tasks supported with the ONNX Runtime package:
16 | 
17 | 1. Optimization: Performed by the [`~onnxruntime.ORTOptimizer`], this task can be tweaked using an [`~onnxruntime.configuration.OptimizationConfig`].
18 | 
19 | 2. Quantization: Performed by the [`~onnxruntime.ORTQuantizer`], quantization can be set using a [`~onnxruntime.configuration.QuantizationConfig`]. A calibration step is required in some cases (post training static quantization), which can be specified using a [`~onnxruntime.configuration.CalibrationConfig`].
20 | 
21 | ## OptimizationConfig
22 | 
23 | [[autodoc]] onnxruntime.configuration.OptimizationConfig
24 | 
25 | [[autodoc]] onnxruntime.configuration.AutoOptimizationConfig
26 | 
27 | ## QuantizationConfig
28 | 
29 | [[autodoc]] onnxruntime.configuration.QuantizationConfig
30 | 
31 | ## AutoQuantizationConfig
32 | 
33 | [[autodoc]] onnxruntime.configuration.AutoQuantizationConfig
34 |     - all
35 | 
36 | ### CalibrationConfig
37 | 
38 | [[autodoc]] onnxruntime.configuration.CalibrationConfig
39 | 
40 | ## ORTConfig
41 | 
42 | [[autodoc]] onnxruntime.configuration.ORTConfig
43 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/optimization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Optimization
14 | 
15 | ## ORTOptimizer
16 | 
17 | [[autodoc]] onnxruntime.optimization.ORTOptimizer
18 |     - all


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/quantization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Quantization
14 | 
15 | ## ORTQuantizer
16 | 
17 | [[autodoc]] onnxruntime.quantization.ORTQuantizer
18 |     - all
19 | 


--------------------------------------------------------------------------------
/docs/source/onnxruntime/package_reference/trainer.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Trainer
14 | 
15 | ## ORTTrainer
16 | 
17 | [[autodoc]] onnxruntime.trainer.ORTTrainer
18 |     - all
19 | 
20 | ## ORTSeq2SeqTrainer
21 | 
22 | [[autodoc]] onnxruntime.trainer_seq2seq.ORTSeq2SeqTrainer
23 |     - evaluate
24 |     - predict
25 | 
26 | ## ORTTrainingArguments
27 | 
28 | [[autodoc]] onnxruntime.training_args.ORTTrainingArguments
29 |     - all
30 | 
31 | ## ORTSeq2SeqTrainingArguments
32 | 
33 | [[autodoc]] onnxruntime.training_args_seq2seq.ORTSeq2SeqTrainingArguments
34 |     - all


--------------------------------------------------------------------------------
/docs/source/torch_fx/concept_guides/symbolic_tracer.mdx:
--------------------------------------------------------------------------------
1 | # Symbolic tracer
2 | 
3 | In Torch FX, the symbolic tracer feeds dummy values through the code to record the underlying operations.


--------------------------------------------------------------------------------
/docs/source/torch_fx/overview.mdx:
--------------------------------------------------------------------------------
 1 | # Overview
 2 | 
 3 | 🤗 Optimum provides an integration with Torch FX, a library for PyTorch that allows developers to implement custom transformations of their models that can be optimized for performance.
 4 | 
 5 | <div class="mt-10">
 6 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
 7 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./usage_guides/optimization"
 8 |       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">How-to guides</div>
 9 |       <p class="text-gray-700">Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum to solve real-world problems.</p>
10 |     </a>
11 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./concept_guides/symbolic_tracer"
12 |       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Conceptual guides</div>
13 |       <p class="text-gray-700">High-level explanations for building a better understanding about important topics such as quantization and graph optimization.</p>
14 |    </a>
15 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./package_reference/optimization"
16 |       ><div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Reference</div>
17 |       <p class="text-gray-700">Technical descriptions of how the Torch FX classes and methods of 🤗 Optimum work.</p>
18 |     </a>
19 |   </div>
20 | </div>
21 | 


--------------------------------------------------------------------------------
/docs/source/torch_fx/package_reference/optimization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Optimization
14 | 
15 | ## Transformation
16 | 
17 | [[autodoc]] fx.optimization.Transformation
18 |     - all
19 |     - __call__
20 | 
21 | ## Reversible transformation
22 | 
23 | [[autodoc]] fx.optimization.ReversibleTransformation
24 |     - all
25 |     - __call__
26 | 
27 | [[autodoc]] fx.optimization.compose
28 | 
29 | ### Transformations
30 | 
31 | [[autodoc]] fx.optimization.MergeLinears
32 |     - all
33 | 
34 | [[autodoc]] fx.optimization.FuseBiasInLinear
35 |     - all
36 | 
37 | [[autodoc]] fx.optimization.ChangeTrueDivToMulByInverse
38 |     - all
39 | 
40 | [[autodoc]] fx.optimization.FuseBatchNorm2dInConv2d
41 |     - all
42 | 
43 | [[autodoc]] fx.optimization.FuseBatchNorm1dInLinear
44 |     - all


--------------------------------------------------------------------------------
/docs/source/utils/dummy_input_generators.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Dummy Input Generators
14 | 
15 | It is very common to have to generate dummy inputs to perform a task (tracing, exporting a model to some backend,
16 | testing model outputs, etc). The goal of [`~optimum.utils.input_generators.DummyInputGenerator`] classes is to make this
17 | generation easy and re-usable.
18 | 
19 | 
20 | ## Base class
21 | 
22 | [[autodoc]] optimum.utils.input_generators.DummyInputGenerator
23 | 
24 | 
25 | ## Existing dummy input generators
26 | 
27 | [[autodoc]] optimum.utils.input_generators.DummyTextInputGenerator
28 | 
29 | [[autodoc]] optimum.utils.input_generators.DummyDecoderTextInputGenerator
30 | 
31 | [[autodoc]] optimum.utils.input_generators.DummyPastKeyValuesGenerator
32 | 
33 | [[autodoc]] optimum.utils.input_generators.DummySeq2SeqPastKeyValuesGenerator
34 | 
35 | [[autodoc]] optimum.utils.input_generators.DummyBboxInputGenerator
36 | 
37 | [[autodoc]] optimum.utils.input_generators.DummyVisionInputGenerator
38 | 
39 | [[autodoc]] optimum.utils.input_generators.DummyAudioInputGenerator
40 | 


--------------------------------------------------------------------------------
/docs/source/utils/normalized_config.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Normalized Configurations
14 | 
15 | Model configuration classes in 🤗 Transformers are not standardized. Although Transformers implements an `attribute_map` attribute that mitigates the issue to some extent, it does not make it easy to reason on common configuration attributes in the code.
16 | [`~optimum.utils.normalized_config.NormalizedConfig`] classes try to fix that by allowing access to the configuration
17 | attribute they wrap in a standardized way.
18 | 
19 | 
20 | ## Base class
21 | 
22 | <Tip>
23 | 
24 | While it is possible to create `NormalizedConfig` subclasses for common use-cases, it is also possible to overwrite
25 | the `original attribute name -> normalized attribute name` mapping directly using the
26 | [`~optimum.utils.normalized_config.NormalizedConfig.with_args`] class method.
27 | 
28 | </Tip>
29 | 
30 | [[autodoc]] optimum.utils.normalized_config.NormalizedConfig
31 | 
32 | 
33 | ## Existing normalized configurations
34 | 
35 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextConfig
36 | 
37 | [[autodoc]] optimum.utils.normalized_config.NormalizedSeq2SeqConfig
38 | 
39 | [[autodoc]] optimum.utils.normalized_config.NormalizedVisionConfig
40 | 
41 | [[autodoc]] optimum.utils.normalized_config.NormalizedTextAndVisionConfig
42 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/multiple-choice/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Multiple choice
18 | 
19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/multiple-choice/run_swag.py) allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for multiple choice tasks.
20 | 
21 | The following example applies graph optimizations on a BERT fine-tuned on the SWAG dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
22 | 
23 | ```bash
24 | python run_swag.py \
25 |     --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \
26 |     --optimization_level 1 \
27 |     --do_eval \
28 |     --output_dir /tmp/optimized_bert_swag
29 | ```
30 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/multiple-choice/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/question-answering/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Question answering
18 | 
19 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/question-answering/run_qa.py)
20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.
21 | 
22 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along 
23 | the flag `--version_2_with_negative`.
24 | 
25 | The following example applies graph optimizations on a DistilBERT fine-tuned on the SQuAD1.0 dataset. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
26 | 
27 | ```bash
28 | python run_qa.py \
29 |     --model_name_or_path distilbert-base-uncased-distilled-squad \
30 |     --dataset_name squad \
31 |     --optimization_level 1 \
32 |     --do_eval \
33 |     --output_dir /tmp/optimized_distilbert_squad
34 | ```
35 | 
36 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
37 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | torch >= 1.9.0
3 | onnx
4 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/text-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Text classification
18 | 
19 | ## GLUE tasks
20 | 
21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/text-classification/run_glue.py) allows us to apply graph optimizations and fusion using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as the ones from the [GLUE benchmark](https://gluebenchmark.com/).
22 | 
23 | The following example applies graph optimization on a DistilBERT fine-tuned on the sst-2 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
24 | 
25 | ```bash
26 | python run_glue.py \
27 |     --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \
28 |     --task_name sst2 \
29 |     --optimization_level 1 \
30 |     --do_eval \
31 |     --output_dir /tmp/optimized_distilbert_sst2
32 | ```
33 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/token-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Token classification
18 | 
19 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/optimization/token-classification/run_ner.py)
20 | allows us to apply graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks. 
21 | 
22 | The following example applies graph optimizations on a DistilBERT fine-tuned on the CoNLL-2003 task. Here the optimization level is selected to be 1, enabling basic optimizations such as redundant node eliminations and constant folding. Higher optimization level will result in hardware dependent optimized graph.
23 | 
24 | ```bash
25 | python run_ner.py \
26 |     --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \
27 |     --dataset_name conll2003 \
28 |     --optimization_level 1 \
29 |     --do_eval \
30 |     --output_dir /tmp/optimized_distilbert_conll2003
31 | ```
32 | 
33 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/optimization/token-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | seqeval
2 | datasets >= 1.18.0
3 | torch >= 1.9
4 | onnx
5 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/image-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Image classification
18 | 
19 | The script [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/image_classification/run_image_classification.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for image classification tasks.
20 | 
21 | The following example applies dynamic quantization on a ViT model fine-tuned on the beans classification dataset.
22 | 
23 | ```bash
24 | python run_image_classification.py \
25 |     --model_name_or_path nateraw/vit-base-beans \
26 |     --dataset_name beans \
27 |     --quantization_approach dynamic \
28 |     --do_eval \
29 |     --output_dir /tmp/image_classification_vit_beans
30 | ```
31 | 
32 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
33 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/image-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=1.5.0
2 | torchvision>=0.6.0
3 | datasets>=1.17.0
4 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/multiple-choice/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Multiple choice
18 | 
19 | The script [`run_swag.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/multiple-choice/run_swag.py) allows us to apply different quantization approaches (such as dynamic and static quantization) using the [ONNX Runtime](https://github.com/microsoft/onnxruntime) quantization tool for multiple choice tasks.
20 | 
21 | The following example applies post-training dynamic quantization on a BERT fine-tuned on the SWAG dataset.
22 | 
23 | ```bash
24 | python run_swag.py \
25 |     --model_name_or_path ehdwns1516/bert-base-uncased_SWAG \
26 |     --quantization_approach dynamic \
27 |     --do_eval \
28 |     --output_dir /tmp/quantized_bert_swag
29 | ```
30 | 
31 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
32 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/multiple-choice/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/question-answering/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Question answering
18 | 
19 | The script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/question-answering/run_qa.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for question answering tasks.
20 | 
21 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along the flag `--version_2_with_negative`.
22 | 
23 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the SQuAD1.0 dataset.
24 | 
25 | ```bash
26 | python run_qa.py \
27 |     --model_name_or_path distilbert-base-uncased-distilled-squad \
28 |     --dataset_name squad \
29 |     --quantization_approach dynamic \
30 |     --do_eval \
31 |     --output_dir /tmp/quantized_distilbert_squad
32 | ```
33 | 
34 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
35 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | torch >= 1.9.0
3 | onnx
4 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/text-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Text classification 
18 | 
19 | ## GLUE tasks
20 | 
21 | The script [`run_glue.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/text-classification/run_glue.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for sequence classification tasks such as the ones from the [GLUE benchmark](https://gluebenchmark.com/).
22 | 
23 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the sst-2 task.
24 | 
25 | ```bash
26 | python run_glue.py \
27 |     --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english \
28 |     --task_name sst2 \
29 |     --quantization_approach dynamic \
30 |     --do_eval \
31 |     --output_dir /tmp/quantized_distilbert_sst2
32 | ```
33 | 
34 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
35 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9
7 | onnx
8 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/token-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2020 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Token classification
18 | 
19 | The script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/quantization/token-classification/run_ner.py) allows us to apply different quantization approaches (such as dynamic and static quantization) as well as graph optimizations using [ONNX Runtime](https://github.com/microsoft/onnxruntime) for token classification tasks.
20 | 
21 | The following example applies post-training dynamic quantization on a DistilBERT fine-tuned on the CoNLL-2003 task
22 | 
23 | ```bash
24 | python run_ner.py \
25 |     --model_name_or_path elastic/distilbert-base-uncased-finetuned-conll03-english \
26 |     --dataset_name conll2003 \
27 |     --quantization_approach dynamic \
28 |     --do_eval \
29 |     --output_dir /tmp/quantized_distilbert_conll2003
30 | ```
31 | 
32 | In order to apply dynamic or static quantization, `quantization_approach` must be set to  respectively `dynamic` or `static`.
33 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/quantization/token-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | seqeval
2 | datasets >= 1.8.0
3 | torch >= 1.9
4 | onnx
5 | onnxruntime >= 1.9.0


--------------------------------------------------------------------------------
/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | # Copyright 2023 The HuggingFace Team All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Use nvidia/cuda image
18 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
19 | CMD nvidia-smi
20 | 
21 | # Ignore interactive questions during `docker build`
22 | ENV DEBIAN_FRONTEND noninteractive
23 | 
24 | # Versions
25 | # available options 3.8, 3.9, 3.10, 3.11
26 | ARG PYTHON_VERSION=3.9
27 | ARG TORCH_CUDA_VERSION=cu118
28 | ARG TORCH_VERSION=2.0.0
29 | ARG TORCHVISION_VERSION=0.15.1
30 | 
31 | # Bash shell
32 | RUN chsh -s /bin/bash
33 | SHELL ["/bin/bash", "-c"]
34 | 
35 | # Install and update tools to minimize security vulnerabilities
36 | RUN apt-get update
37 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
38 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
39 |     apt-get clean
40 | RUN unattended-upgrade
41 | RUN apt-get autoremove -y
42 | 
43 | # Install miniconda (comes with python 3.9 default)
44 | ARG BUILD_USER=onnxruntimedev
45 | ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3
46 | RUN apt-get install curl
47 | 
48 | ARG CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh
49 | RUN curl -fSsL --insecure ${CONDA_URL} -o install-conda.sh && \
50 |     /bin/bash ./install-conda.sh -b -p $MINICONDA_PREFIX && \
51 |     $MINICONDA_PREFIX/bin/conda clean -ya && \
52 |     $MINICONDA_PREFIX/bin/conda install -y python=${PYTHON_VERSION}
53 | 
54 | ENV PATH=$MINICONDA_PREFIX/bin:${PATH}
55 | 
56 | ARG PYTHON_EXE=$MINICONDA_PREFIX/bin/python
57 | 
58 | # (Optional) Intall test dependencies
59 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers
60 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece
61 | RUN $PYTHON_EXE -m pip install deepspeed mpi4py
62 | # RUN $PYTHON_EXE -m pip install optuna ray sigopt wandb
63 | 
64 | # PyTorch
65 | RUN $PYTHON_EXE -m pip install onnx ninja
66 | RUN $PYTHON_EXE -m pip install torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} -f https://download.pytorch.org/whl/${TORCH_CUDA_VERSION}
67 | 
68 | # ORT Module
69 | RUN $PYTHON_EXE -m pip install --pre onnxruntime-training -f https://download.onnxruntime.ai/onnxruntime_nightly_cu118.html
70 | RUN $PYTHON_EXE -m pip install torch-ort
71 | ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
72 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2
73 | RUN $PYTHON_EXE -m torch_ort.configure
74 | 
75 | WORKDIR .
76 | 
77 | CMD ["/bin/bash"]
78 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-rocm57:
--------------------------------------------------------------------------------
 1 | # Use rocm image
 2 | FROM rocm/pytorch:rocm5.7_ubuntu22.04_py3.10_pytorch_2.0.1
 3 | CMD rocm-smi
 4 | 
 5 | # Ignore interactive questions during `docker build`
 6 | ENV DEBIAN_FRONTEND noninteractive
 7 | 
 8 | # Versions
 9 | # available options 3.10
10 | ARG PYTHON_VERSION=3.10
11 | 
12 | # Bash shell
13 | RUN chsh -s /bin/bash
14 | SHELL ["/bin/bash", "-c"]
15 | 
16 | # Install and update tools to minimize security vulnerabilities
17 | RUN apt-get update
18 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
19 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
20 |     apt-get clean
21 | RUN apt-get autoremove -y
22 | 
23 | ARG PYTHON_EXE=/opt/conda/envs/py_$PYTHON_VERSION/bin/python
24 | 
25 | # (Optional) Intall test dependencies
26 | RUN $PYTHON_EXE -m pip install -U pip
27 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers
28 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece --no-cache-dir
29 | RUN $PYTHON_EXE -m pip install deepspeed --no-cache-dir
30 | RUN conda install -y mpi4py
31 | 
32 | # PyTorch
33 | RUN $PYTHON_EXE -m pip install onnx ninja
34 | 
35 | # ORT Module
36 | RUN $PYTHON_EXE -m pip install --pre onnxruntime-training -f https://download.onnxruntime.ai/onnxruntime_nightly_rocm57.html
37 | RUN $PYTHON_EXE -m pip install torch-ort
38 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2
39 | RUN $PYTHON_EXE -m torch_ort.configure
40 | 
41 | WORKDIR .
42 | 
43 | CMD ["/bin/bash"]


--------------------------------------------------------------------------------
/examples/onnxruntime/training/docker/Dockerfile-ort1.17.1-cu118:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | # Copyright 2023 The HuggingFace Team All rights reserved.
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # Use nvidia/cuda image
18 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
19 | CMD nvidia-smi
20 | 
21 | # Ignore interactive questions during `docker build`
22 | ENV DEBIAN_FRONTEND noninteractive
23 | 
24 | # Versions
25 | ARG PYTHON_VERSION=3.10
26 | ARG TORCH_CUDA_VERSION=cu118
27 | ARG TORCH_VERSION=2.0.0
28 | ARG TORCHVISION_VERSION=0.15.1
29 | 
30 | # Bash shell
31 | RUN chsh -s /bin/bash
32 | SHELL ["/bin/bash", "-c"]
33 | 
34 | # Install and update tools to minimize security vulnerabilities
35 | RUN apt-get update
36 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
37 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
38 |     apt-get clean
39 | RUN unattended-upgrade
40 | RUN apt-get autoremove -y
41 | 
42 | # Install miniconda (comes with python 3.9 default)
43 | ARG BUILD_USER=onnxruntimedev
44 | ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3
45 | RUN apt-get install curl
46 | 
47 | ARG CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh
48 | RUN curl -fSsL --insecure ${CONDA_URL} -o install-conda.sh && \
49 |     /bin/bash ./install-conda.sh -b -p $MINICONDA_PREFIX && \
50 |     $MINICONDA_PREFIX/bin/conda clean -ya && \
51 |     $MINICONDA_PREFIX/bin/conda install -y python=${PYTHON_VERSION}
52 | 
53 | ENV PATH=$MINICONDA_PREFIX/bin:${PATH}
54 | 
55 | ARG PYTHON_EXE=$MINICONDA_PREFIX/bin/python
56 | 
57 | # (Optional) Intall test dependencies
58 | RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers
59 | RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece
60 | RUN $PYTHON_EXE -m pip install deepspeed mpi4py
61 | # RUN $PYTHON_EXE -m pip install optuna ray sigopt wandb
62 | 
63 | # PyTorch
64 | RUN $PYTHON_EXE -m pip install onnx ninja
65 | RUN $PYTHON_EXE -m pip install torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} -f https://download.pytorch.org/whl/${TORCH_CUDA_VERSION}
66 | 
67 | # ORT Module
68 | RUN $PYTHON_EXE -m pip install onnxruntime-training==1.17.1 -f https://download.onnxruntime.ai/onnxruntime_stable_cu118.html
69 | RUN $PYTHON_EXE -m pip install torch-ort
70 | ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
71 | RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2
72 | RUN $PYTHON_EXE -m torch_ort.configure
73 | 
74 | # https://github.com/vllm-project/vllm/issues/1726
75 | RUN pip uninstall nvidia-nccl-cu12 -y
76 | 
77 | WORKDIR .
78 | 
79 | CMD ["/bin/bash"]
80 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/image-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | Unless required by applicable law or agreed to in writing, software
 8 | distributed under the License is distributed on an "AS IS" BASIS,
 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | -->
13 | 
14 | # Image Classification 
15 | 
16 | By running the scripts [`run_image_classification.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/image-classification/run_image_classification.py) we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the
17 | [HuggingFace hub](https://huggingface.co/models).
18 | 
19 | 
20 | __The following example applies the acceleration features powered by ONNX Runtime.__
21 | 
22 | 
23 | ### ONNX Runtime Training
24 | 
25 | The following example trains ViT on beans dataset with mixed precision (fp16).
26 | 
27 | ```bash
28 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_image_classification.py \
29 |     --model_name_or_path google/vit-base-patch16-224-in21k \
30 |     --dataset_name beans \
31 |     --output_dir ./beans_outputs/ \
32 |     --remove_unused_columns False \
33 |     --label_column_name labels \
34 |     --do_train \
35 |     --do_eval \
36 |     --learning_rate 2e-5 \
37 |     --num_train_epochs 10 \
38 |     --per_device_train_batch_size 32 \
39 |     --per_device_eval_batch_size 32 \
40 |     --logging_strategy steps \
41 |     --logging_steps 10 \
42 |     --eval_strategy epoch \
43 |     --seed 1337
44 | ```
45 | 
46 | 
47 | __Note__
48 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
49 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
50 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
51 | ---
52 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/image-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.12.0
2 | torch>=1.5.0
3 | torchvision>=0.6.0
4 | datasets>=1.17.0
5 | evaluate
6 | onnx>=1.9.0
7 | onnxruntime-training>=1.9.0
8 | torch-ort
9 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/language-modeling/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 |     http://www.apache.org/licenses/LICENSE-2.0
 7 | Unless required by applicable law or agreed to in writing, software
 8 | distributed under the License is distributed on an "AS IS" BASIS,
 9 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10 | See the License for the specific language governing permissions and
11 | limitations under the License.
12 | -->
13 | 
14 | # Language Modeling
15 | 
16 | ## Language Modeling Training
17 | 
18 | By running the scripts [`run_clm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_clm.py)
19 | and [`run_mlm.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/language-modeling/run_mlm.py),
20 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to train the language models from the
21 | [HuggingFace hub](https://huggingface.co/models).
22 | 
23 | 
24 | __The following example applies the acceleration features powered by ONNX Runtime.__
25 | 
26 | 
27 | ### ONNX Runtime Training
28 | 
29 | The following example trains GPT2 on wikitext-2 with mixed precision (fp16).
30 | 
31 | ```bash
32 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_clm.py \
33 |     --model_name_or_path gpt2 \
34 |     --dataset_name wikitext \
35 |     --dataset_config_name wikitext-2-raw-v1 \
36 |     --do_train \
37 |     --output_dir /tmp/test-clm \
38 |     --fp16
39 | ```
40 | 
41 | 
42 | __Note__
43 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
44 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
45 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
46 | 
47 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
48 | ---
49 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/language-modeling/requirements.txt:
--------------------------------------------------------------------------------
 1 | datasets >= 1.8.0
 2 | sentencepiece != 0.1.92
 3 | scipy
 4 | scikit-learn
 5 | protobuf == 4.25.8
 6 | torch >= 1.9.0
 7 | transformers>=4.16.0
 8 | onnx>=1.9.0
 9 | onnxruntime-training>=1.9.0
10 | torch-ort
11 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/question-answering/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Question answering
18 | 
19 | ## SQuAD Tasks
20 | 
21 | By running the script [`run_qa.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/question-answering/run_qa.py),
22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) to fine-tune the models from the
23 | [HuggingFace hub](https://huggingface.co/models) for question answering tasks such as SQuAD.
24 | 
25 | Note that if your dataset contains samples with no possible answers (like SQuAD version 2), you need to pass along
26 | the flag `--version_2_with_negative`.
27 | 
28 | __The following example applies the acceleration features powered by ONNX Runtime.__
29 | 
30 | 
31 | ### Onnxruntime Training
32 | 
33 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset.
34 | 
35 | ```bash
36 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_qa.py \
37 |     --model_name_or_path bert-base-uncased \
38 |     --dataset_name squad \
39 |     --do_train \
40 |     --do_eval \
41 |     --output_dir /tmp/ort_bert_squad/
42 | ```
43 | 
44 | __Note__
45 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
46 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
47 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
48 | 
49 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
50 | ---


--------------------------------------------------------------------------------
/examples/onnxruntime/training/question-answering/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | torch >= 1.9.0
7 | torch-ort
8 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/stable-diffusion/text-to-image/requirements.txt:
--------------------------------------------------------------------------------
1 | accelerate>=0.16.0
2 | transformers>=4.25.1
3 | datasets
4 | git+https://github.com/huggingface/diffusers
5 | ftfy
6 | tensorboard
7 | Jinja2
8 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/summarization/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | ## Summarization
18 | 
19 | By running the script [`run_summarization.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/summarization/run_summarization.py),
20 | you will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune and evaluate models from the
21 | [HuggingFace hub](https://huggingface.co/models) on summarization tasks.
22 | 
23 | ### Supported models
24 | 
25 | Theoretically, all sequence-to-sequence models with [ONNXConfig](https://github.com/huggingface/transformers/blob/main/src/transformers/onnx/features.py) support in Transformers shall work, here are the models that the Optimum team has tested and validated.
26 | 
27 | * `Bart`
28 | * `T5`
29 | 
30 | `run_summarization.py` is a lightweight example of how to download and preprocess a dataset from the 🤗 Datasets library or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it.
31 | 
32 | 
33 | __The following example applies the acceleration features powered by ONNX Runtime.__
34 | 
35 | 
36 | ### Onnx Runtime Training
37 | 
38 | The following example fine-tunes a BERT on the SQuAD 1.0 dataset.
39 | 
40 | ```bash
41 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_summarization.py \
42 |     --model_name_or_path t5-small \
43 |     --dataset_name cnn_dailymail \
44 |     --dataset_config "3.0.0" \
45 |     --source_prefix "summarize: " \
46 |     --do_train \
47 |     --do_eval \
48 |     --per_device_train_batch_size=4 \
49 |     --per_device_eval_batch_size=4 \
50 |     --output_dir /tmp/ort_summarization_t5/ \
51 |     --overwrite_output_dir \
52 |     --predict_with_generate
53 | ```
54 | 
55 | __Note__
56 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
57 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
58 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
59 | 
60 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
61 | ---


--------------------------------------------------------------------------------
/examples/onnxruntime/training/summarization/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate
 2 | evaluate
 3 | datasets >= 1.8.0
 4 | sentencepiece != 0.1.92
 5 | scipy
 6 | scikit-learn
 7 | protobuf
 8 | rouge-score
 9 | nltk
10 | py7zr
11 | torch >= 1.9.0
12 | torch-ort
13 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/text-classification/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.8.0
2 | sentencepiece != 0.1.92
3 | scipy
4 | scikit-learn
5 | protobuf
6 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/text-classification/zero_stage_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "steps_per_print": 100,
 3 |     "zero_optimization": {
 4 |         "stage": 2
 5 |     },
 6 |     "zero_allow_untested_optimizer": true,
 7 |     "fp16": {
 8 |         "enabled": true,
 9 |         "initial_scale_power": 12
10 |     },
11 |     "tensorboard":{
12 |         "enabled": false
13 |     },
14 |     "train_micro_batch_size_per_gpu": "auto",
15 |     "gradient_accumulation_steps": "auto"
16 | }


--------------------------------------------------------------------------------
/examples/onnxruntime/training/token-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Token classification
18 | 
19 | ## NER Tasks
20 | 
21 | By running the script [`run_ner.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/token-classification/run_ner.py),
22 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) accelerator to fine-tune the models from the
23 | [HuggingFace hub](https://huggingface.co/models) for token classification tasks such as Named Entity Recognition (NER).
24 | 
25 | 
26 | __The following example applies the acceleration features powered by ONNX Runtime.__
27 | 
28 | 
29 | ### ONNX Runtime Training
30 | 
31 | The following example fine-tunes a BERT on the sst-2 task.
32 | 
33 | ```bash
34 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_ner.py \
35 |     --model_name_or_path bert-base-cased \
36 |     --dataset_name conll2003 \
37 |     --do_train \
38 |     --do_eval \
39 |     --output_dir /tmp/ort_bert_conll2003/
40 | ```
41 | 
42 | ### Performance
43 | 
44 | We get the following results for [bert-large-cased](https://huggingface.co/bert-large-cased) model mixed precision training(fp16) on the previous
45 | task under PyTorch and ONNX Runtime backends. A single Nvidia A100 card was used to run the experiment for 7 epochs:
46 | 
47 | | Model            | Backend      | Runtime(s) | Train samples(/s) |
48 | | ---------------- | ------------ | ---------- | ----------------- |
49 | | bert-large-cased | PyTorch      | 711.5      | 138.1             |
50 | | bert-large-cased | ONNX Runtime | 637.2      | 154.3             |
51 | 
52 | We observe the gain of ONNX Runtime compared to PyTorch as follow:
53 | 
54 | |       | Latency | Throughput |
55 | | ----- | ------- | ---------- |
56 | | Gain  | 10.45%  | 11.67%     |
57 | 
58 | 
59 | __Note__
60 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
61 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
62 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
63 | 
64 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
65 | ---
66 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/token-classification/requirements.txt:
--------------------------------------------------------------------------------
 1 | datasets >= 1.18.3
 2 | scipy
 3 | scikit-learn
 4 | sentencepiece != 0.1.92
 5 | seqeval
 6 | torch >= 1.8.1
 7 | seqeval
 8 | sentencepiece != 0.1.92
 9 | torch >= 1.9
10 | torch-ort
11 | 


--------------------------------------------------------------------------------
/examples/onnxruntime/training/translation/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Translation
18 | 
19 | By running the script [`run_translation.py`](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/translation/run_translation.py),
20 | we will be able to leverage the [`ONNX Runtime`](https://github.com/microsoft/onnxruntime) to fine-tune the models from the
21 | [HuggingFace hub](https://huggingface.co/models) for translation tasks.
22 | 
23 | ### Supported Architectures
24 | 
25 | - `BartForConditionalGeneration`
26 | - `T5ForConditionalGeneration`
27 | 
28 | `run_translation.py` is a lightweight examples of how to download and preprocess a dataset from the [🤗 Datasets](https://github.com/huggingface/datasets) library
29 | or use your own files (jsonlines or csv), then fine-tune one of the architectures above on it.
30 | 
31 | For custom datasets in `jsonlines` format please see: https://huggingface.co/docs/datasets/loading_datasets.html#json-files.
32 | 
33 | __The following example applies the acceleration features powered by ONNX Runtime.__
34 | 
35 | 
36 | ### Onnxruntime Training
37 | 
38 | The following example fine-tunes a T5 large model on the wmt16 dataset.
39 | 
40 | ```bash
41 | torchrun --nproc_per_node=NUM_GPUS_YOU_HAVE run_translation.py \
42 |     --model_name_or_path t5-large \
43 |     --dataset_name wmt16 \
44 |     --dataset_config ro-en \
45 |     --label_smoothing 0.1 \
46 |     --predict_with_generate \
47 |     --source_lang en \
48 |     --target_lang ro \
49 |     --do_train \
50 |     --max_train_samples 30000 \
51 |     --fp16 \
52 |     --output_dir /tmp/ort_t5_translation/
53 | ```
54 | 
55 | ### Performance
56 | 
57 | We get the following results for [t5-large](https://huggingface.co/t5-large) mixed precision training(fp16) on the previous
58 | task under PyTorch and ONNX Runtime backends. A single Nvidia A100 card was used to run the experiment for 3 epochs::
59 | 
60 | | Model    | Backend      | Runtime(s) | Train samples(/s) |
61 | | -------- | ------------ | ---------- | ----------------- |
62 | | t5-large | PyTorch      | 2038.8     | 44.1              |
63 | | t5-large | ONNX Runtime | 1536.7     | 58.6              |
64 | 
65 | We observe the gain of ONNX Runtime compared to PyTorch as follow:
66 | 
67 | |       | Latency | Throughput |
68 | | ----- | ------- | ---------- |
69 | | Gain  | 24.63%  | 32.67%     |
70 | 
71 | 
72 | __Note__
73 | 
74 | > *To enable ONNX Runtime training, your devices need to be equipped with GPU. Install the dependencies either with our prepared*
75 | *[Dockerfiles](https://github.com/huggingface/optimum/blob/main/examples/onnxruntime/training/docker/) or follow the instructions*
76 | *in [`torch_ort`](https://github.com/pytorch/ort/blob/main/torch_ort/docker/README.md).*
77 | 
78 | > *The inference will use PyTorch by default, if you want to use ONNX Runtime backend instead, add the flag `--inference_with_ort`.*
79 | ---


--------------------------------------------------------------------------------
/examples/onnxruntime/training/translation/requirements.txt:
--------------------------------------------------------------------------------
1 | datasets >= 1.18.0
2 | sentencepiece != 0.1.92
3 | protobuf
4 | sacrebleu >= 1.4.12
5 | py7zr
6 | torch >= 1.8


--------------------------------------------------------------------------------
/optimum/bettertransformer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace and Meta Team.  All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from ..utils.import_utils import _transformers_version, is_transformers_version
17 | 
18 | 
19 | if is_transformers_version(">=", "4.49"):
20 |     raise RuntimeError(
21 |         f"BetterTransformer requires transformers<4.49 but found {_transformers_version}. "
22 |         "`optimum.bettertransformer` is deprecated and will be removed in optimum v2.0."
23 |     )
24 | 
25 | from .models import BetterTransformerManager
26 | from .transformation import BetterTransformer
27 | 


--------------------------------------------------------------------------------
/optimum/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .base import BaseOptimumCLICommand, CommandInfo, RootOptimumCLICommand
16 | from .env import EnvironmentCommand
17 | from .export import ExportCommand, ONNXExportCommand, TFLiteExportCommand
18 | from .optimum_cli import optimum_cli_subcommand
19 | 


--------------------------------------------------------------------------------
/optimum/commands/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import platform
16 | 
17 | import huggingface_hub
18 | from transformers import __version__ as transformers_version
19 | from transformers.utils import is_tf_available, is_torch_available
20 | 
21 | from ..version import __version__ as version
22 | from . import BaseOptimumCLICommand, CommandInfo
23 | 
24 | 
25 | class EnvironmentCommand(BaseOptimumCLICommand):
26 |     COMMAND = CommandInfo(name="env", help="Get information about the environment used.")
27 | 
28 |     @staticmethod
29 |     def format_dict(d):
30 |         return "\n".join([f"- {prop}: {val}" for prop, val in d.items()]) + "\n"
31 | 
32 |     def run(self):
33 |         pt_version = "not installed"
34 |         pt_cuda_available = "NA"
35 |         if is_torch_available():
36 |             import torch
37 | 
38 |             pt_version = torch.__version__
39 |             pt_cuda_available = torch.cuda.is_available()
40 | 
41 |         tf_version = "not installed"
42 |         tf_cuda_available = "NA"
43 |         if is_tf_available():
44 |             import tensorflow as tf
45 | 
46 |             tf_version = tf.__version__
47 |             try:
48 |                 # deprecated in v2.1
49 |                 tf_cuda_available = tf.test.is_gpu_available()
50 |             except AttributeError:
51 |                 # returns list of devices, convert to bool
52 |                 tf_cuda_available = bool(tf.config.list_physical_devices("GPU"))
53 | 
54 |         info = {
55 |             "`optimum` version": version,
56 |             "`transformers` version": transformers_version,
57 |             "Platform": platform.platform(),
58 |             "Python version": platform.python_version(),
59 |             "Huggingface_hub version": huggingface_hub.__version__,
60 |             "PyTorch version (GPU?)": f"{pt_version} (cuda availabe: {pt_cuda_available})",
61 |             "Tensorflow version (GPU?)": f"{tf_version} (cuda availabe: {tf_cuda_available})",
62 |         }
63 | 
64 |         print("\nCopy-and-paste the text below in your GitHub issue:\n")
65 |         print(self.format_dict(info))
66 | 
67 |         return info
68 | 


--------------------------------------------------------------------------------
/optimum/commands/export/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from .base import ExportCommand
17 | from .onnx import ONNXExportCommand
18 | from .tflite import TFLiteExportCommand
19 | 


--------------------------------------------------------------------------------
/optimum/commands/export/base.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """optimum.exporters command-line interface base classes."""
16 | 
17 | from .. import BaseOptimumCLICommand, CommandInfo
18 | from .onnx import ONNXExportCommand
19 | from .tflite import TFLiteExportCommand
20 | 
21 | 
22 | class ExportCommand(BaseOptimumCLICommand):
23 |     COMMAND = CommandInfo(
24 |         name="export",
25 |         help="Export PyTorch and TensorFlow models to several format.",
26 |     )
27 |     SUBCOMMANDS = (
28 |         CommandInfo(
29 |             name="onnx",
30 |             help="Export PyTorch and TensorFlow to ONNX.",
31 |             subcommand_class=ONNXExportCommand,
32 |         ),
33 |         CommandInfo(
34 |             name="tflite",
35 |             help="Export TensorFlow to TensorFlow Lite.",
36 |             subcommand_class=TFLiteExportCommand,
37 |         ),
38 |     )
39 | 


--------------------------------------------------------------------------------
/optimum/commands/register/README.md:
--------------------------------------------------------------------------------
 1 | # Register commands in the Optimum CLI from a subpackage
 2 | 
 3 | It is possible to register a command in the Optimum CLI, either as a command or a subcommand of an already existing command.
 4 | 
 5 | Steps to follow:
 6 | 
 7 | 1. Create a command as a subclass of `optimum.commands.BaseOptimumCLICommand`.
 8 | 2. Create a Python file under `optimum/commands/register/`, and define a `REGISTER_COMMANDS` list variable there.
 9 | 3. Fill the `REGISTER_COMMANDS` as follows:
10 | 
11 | ```python
12 | # CustomCommand1 and CustomCommand2 could also be defined in this file actually.
13 | from ..my_custom_commands import CustomCommand1, CustomCommand2
14 | from ..export import ExportCommand
15 | 
16 | REGISTER_COMMANDS = [
17 |   # CustomCommand1 will be registered as a subcommand of the root Optimum CLI. 
18 |   CustomCommand1, 
19 |   # CustomCommand2 will be registered as a subcommand of the `optimum-cli export` command. 
20 |   (CustomCommand2, ExportCommand) # CustomCommand2 will be registered
21 | ]
22 | ```
23 | 


--------------------------------------------------------------------------------
/optimum/commands/register/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/optimum/conftest.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | # tests directory-specific settings - this file is run automatically
16 | # by pytest before any tests are run
17 | 
18 | import doctest
19 | import sys
20 | from os.path import abspath, dirname, join
21 | 
22 | 
23 | # allow having multiple repository checkouts and not needing to remember to rerun
24 | # 'pip install -e .[dev]' when switching between checkouts and running tests.
25 | git_repo_path = abspath(join(dirname(__file__), "src"))
26 | sys.path.insert(1, git_repo_path)
27 | 
28 | # Doctest custom flag to ignore output.
29 | IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")
30 | 
31 | OutputChecker = doctest.OutputChecker
32 | 
33 | 
34 | class CustomOutputChecker(OutputChecker):
35 |     def check_output(self, want, got, optionflags):
36 |         if IGNORE_RESULT & optionflags:
37 |             return True
38 |         return OutputChecker.check_output(self, want, got, optionflags)
39 | 
40 | 
41 | doctest.OutputChecker = CustomOutputChecker
42 | 


--------------------------------------------------------------------------------
/optimum/exporters/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from . import onnx  # noqa
16 | from .tasks import TasksManager  # noqa
17 | 


--------------------------------------------------------------------------------
/optimum/exporters/error_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Utilities related to error handling."""
16 | 
17 | 
18 | class ShapeError(ValueError):
19 |     pass
20 | 
21 | 
22 | class AtolError(ValueError):
23 |     pass
24 | 
25 | 
26 | class OutputMatchError(ValueError):
27 |     pass
28 | 
29 | 
30 | class NumberOfInputsMatchError(ValueError):
31 |     pass
32 | 
33 | 
34 | class NumberOfOutputsMatchError(ValueError):
35 |     pass
36 | 
37 | 
38 | class MinimumVersionError(ValueError):
39 |     pass
40 | 


--------------------------------------------------------------------------------
/optimum/exporters/onnx/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from typing import TYPE_CHECKING
17 | 
18 | from transformers.utils import _LazyModule
19 | 
20 | 
21 | _import_structure = {
22 |     "base": ["OnnxConfig", "OnnxConfigWithLoss", "OnnxConfigWithPast", "OnnxSeq2SeqConfigWithPast"],
23 |     "config": ["TextDecoderOnnxConfig", "TextEncoderOnnxConfig", "TextSeq2SeqOnnxConfig"],
24 |     "convert": [
25 |         "export",
26 |         "export_models",
27 |         "validate_model_outputs",
28 |         "validate_models_outputs",
29 |         "onnx_export_from_model",
30 |     ],
31 |     "utils": [
32 |         "get_decoder_models_for_export",
33 |         "get_encoder_decoder_models_for_export",
34 |         "get_diffusion_models_for_export",
35 |         "MODEL_TYPES_REQUIRING_POSITION_IDS",
36 |     ],
37 |     "__main__": ["main_export"],
38 | }
39 | 
40 | if TYPE_CHECKING:
41 |     from .base import OnnxConfig, OnnxConfigWithLoss, OnnxConfigWithPast, OnnxSeq2SeqConfigWithPast  # noqa
42 |     from .config import TextDecoderOnnxConfig, TextEncoderOnnxConfig, TextSeq2SeqOnnxConfig  # noqa
43 |     from .convert import (
44 |         export,
45 |         export_models,
46 |         validate_model_outputs,
47 |         validate_models_outputs,
48 |         onnx_export_from_model,
49 |     )  # noqa
50 |     from .utils import (
51 |         get_decoder_models_for_export,
52 |         get_encoder_decoder_models_for_export,
53 |         get_diffusion_models_for_export,
54 |         MODEL_TYPES_REQUIRING_POSITION_IDS,
55 |     )
56 |     from .__main__ import main_export
57 | else:
58 |     import sys
59 | 
60 |     sys.modules[__name__] = _LazyModule(
61 |         __name__,
62 |         globals()["__file__"],
63 |         _import_structure,
64 |         module_spec=__spec__,
65 |     )
66 | 


--------------------------------------------------------------------------------
/optimum/exporters/onnx/constants.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | # 2 GB
17 | EXTERNAL_DATA_FORMAT_SIZE_LIMIT = 2 * 1024 * 1024 * 1024
18 | 
19 | ONNX_ENCODER_NAME = "encoder_model"
20 | ONNX_DECODER_NAME = "decoder_model"
21 | ONNX_DECODER_WITH_PAST_NAME = "decoder_with_past_model"
22 | ONNX_DECODER_MERGED_NAME = "decoder_model_merged"
23 | 
24 | UNPICKABLE_ARCHS = [
25 |     "encodec",
26 |     "hubert",
27 |     "sew",
28 |     "sew-d",
29 |     "speecht5",
30 |     "unispeech",
31 |     "unispeech-sat",
32 |     "wav2vec2",
33 |     "wav2vec2-conformer",
34 |     "wavlm",
35 | ]
36 | 
37 | SDPA_ARCHS_ONNX_EXPORT_NOT_SUPPORTED = [
38 |     "bart",
39 |     "musicgen",
40 |     "whisper",
41 | ]
42 | 


--------------------------------------------------------------------------------
/optimum/exporters/tflite/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from typing import TYPE_CHECKING
17 | 
18 | from transformers.utils import _LazyModule
19 | 
20 | 
21 | _import_structure = {
22 |     "base": ["QuantizationApproach", "TFLiteQuantizationConfig", "TFLiteConfig"],
23 |     "convert": ["export", "validate_model_outputs"],
24 | }
25 | 
26 | if TYPE_CHECKING:
27 |     from .base import QuantizationApproach, TFLiteQuantizationConfig, TFLiteConfig  # noqa
28 |     from .convert import export, validate_model_outputs  # noqa
29 | else:
30 |     import sys
31 | 
32 |     sys.modules[__name__] = _LazyModule(
33 |         __name__,
34 |         globals()["__file__"],
35 |         _import_structure,
36 |         module_spec=__spec__,
37 |     )
38 | 


--------------------------------------------------------------------------------
/optimum/exporters/tflite/config.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """
16 | Common TensorFlow Lite configuration classes that handle most of the features for building model specific
17 | configurations.
18 | """
19 | 
20 | from ...utils import DummyTextInputGenerator, DummyVisionInputGenerator, logging
21 | from .base import TFLiteConfig
22 | 
23 | 
24 | logger = logging.get_logger(__name__)
25 | 
26 | 
27 | class TextEncoderTFliteConfig(TFLiteConfig):
28 |     """
29 |     Handles encoder-based text architectures.
30 |     """
31 | 
32 |     DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator,)
33 |     MANDATORY_AXES = ("batch_size", "sequence_length", ("multiple-choice", "num_choices"))
34 | 
35 | 
36 | class VisionTFLiteConfig(TFLiteConfig):
37 |     """
38 |     Handles vision architectures.
39 |     """
40 | 
41 |     DUMMY_INPUT_GENERATOR_CLASSES = (DummyVisionInputGenerator,)
42 |     MANDATORY_AXES = ("batch_size", "num_channels", "width", "height")
43 | 


--------------------------------------------------------------------------------
/optimum/fx/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | #  Licensed under the Apache License, Version 2.0 (the "License");
 5 | #  you may not use this file except in compliance with the License.
 6 | #  You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | #  Unless required by applicable law or agreed to in writing, software
11 | #  distributed under the License is distributed on an "AS IS" BASIS,
12 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | #  See the License for the specific language governing permissions and
14 | #  limitations under the License.
15 | from . import optimization
16 | 


--------------------------------------------------------------------------------
/optimum/fx/optimization/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | #  Licensed under the Apache License, Version 2.0 (the "License");
 5 | #  you may not use this file except in compliance with the License.
 6 | #  You may obtain a copy of the License at
 7 | #
 8 | #      http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | #  Unless required by applicable law or agreed to in writing, software
11 | #  distributed under the License is distributed on an "AS IS" BASIS,
12 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | #  See the License for the specific language governing permissions and
14 | #  limitations under the License.
15 | from .transformations import (  # noqa
16 |     ChangeTrueDivToMulByInverse,
17 |     FuseBatchNorm1dInLinear,
18 |     FuseBatchNorm2dInConv2d,
19 |     FuseBiasInLinear,
20 |     MergeLinears,
21 |     ReversibleTransformation,
22 |     Transformation,
23 |     compose,
24 | )
25 | 


--------------------------------------------------------------------------------
/optimum/fx/parallelization/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from .api import parallelize_backend, parallelize_model
16 | from .core import Config, ParallelExecutionCtx
17 | 


--------------------------------------------------------------------------------
/optimum/fx/parallelization/distributed/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from .dist_ops import (
16 |     differentiable_all_gather,
17 |     differentiable_all_reduce_sum,
18 |     differentiable_identity,
19 |     differentiable_scatter,
20 |     scatter,
21 | )
22 | 


--------------------------------------------------------------------------------
/optimum/fx/parallelization/op_registry/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from .op_handlers import REGISTRY, FallbackParallelAxisPropagateHandler
16 | 


--------------------------------------------------------------------------------
/optimum/fx/parallelization/parallel_layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from .embedding import VocabParallelEmbedding
16 | from .linear import ColumnParallelLinear, RowParallelLinear
17 | from .loss import VocabParallelCrossEntropyLoss, sharded_cross_entropy_wrapper_fn
18 | 


--------------------------------------------------------------------------------
/optimum/fx/utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from functools import wraps
16 | 
17 | import transformers
18 | from packaging import version
19 | 
20 | 
21 | _TRANSFORMERS_MIN_VERSION = version.parse("4.20.0.dev0")
22 | 
23 | transformers_version = version.parse(transformers.__version__)
24 | _fx_features_available = (_TRANSFORMERS_MIN_VERSION.major, _TRANSFORMERS_MIN_VERSION.minor) <= (
25 |     transformers_version.major,
26 |     transformers_version.minor,
27 | )
28 | 
29 | 
30 | def are_fx_features_available():
31 |     return _fx_features_available
32 | 
33 | 
34 | def check_if_available(func):
35 |     @wraps(func)
36 |     def wrapper(*args, **kwargs):
37 |         if not are_fx_features_available():
38 |             raise ImportError(
39 |                 f"Found an incompatible version of transformers. Found version {transformers_version}, but only {_TRANSFORMERS_MIN_VERSION} and above are supported."
40 |             )
41 |         return func(*args, **kwargs)
42 | 
43 |     return wrapper
44 | 


--------------------------------------------------------------------------------
/optimum/gptq/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 HuggingFace Inc. team.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | from .quantizer import GPTQQuantizer, load_quantized_model
16 | 


--------------------------------------------------------------------------------
/optimum/gptq/constants.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | SEQLEN_KEYS_TRANFORMERS = ["max_position_embeddings", "seq_length", "n_positions"]
16 | BLOCK_PATTERNS = [
17 |     "transformer.h",
18 |     "model.decoder.layers",
19 |     "gpt_neox.layers",
20 |     "model.layers",
21 |     "model.language_model.layers",
22 |     # modules loaded by AutoModel vs AutoModelForCausalLM have different prefixes
23 |     "h",
24 |     "decoder.layers",
25 |     "layers",
26 | ]
27 | 
28 | GPTQ_CONFIG = "quantize_config.json"
29 | 


--------------------------------------------------------------------------------
/optimum/gptq/eval.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from datasets import load_dataset
 4 | from tqdm import tqdm
 5 | 
 6 | 
 7 | def evaluate_perplexity(model, tokenizer):
 8 |     def _perplexity(nlls, n_samples, seqlen):
 9 |         return torch.exp(torch.stack(nlls).sum() / (n_samples * seqlen))
10 | 
11 |     # load and prepare dataset
12 |     data = load_dataset("wikitext", "wikitext-2-raw-v1", split="test")
13 |     data = tokenizer("\n\n".join(data["text"]), return_tensors="pt")
14 |     data = data.input_ids.to(model.device)
15 | 
16 |     seqlen = 512
17 |     model = model.eval()
18 |     n_samples = data.numel() // seqlen
19 | 
20 |     nlls = []
21 | 
22 |     with tqdm(range(n_samples), desc="Perplexity -") as progress_bar:
23 |         for i in progress_bar:
24 |             start_index = i * seqlen
25 |             end_index = (i + 1) * seqlen
26 |             batch = data[:, start_index:end_index].to(model.device)
27 |             with torch.no_grad():
28 |                 logits = model(batch).logits
29 |             shift_logits = logits[:, :-1, :].contiguous().float()
30 |             shift_labels = data[:, start_index:end_index][:, 1:]
31 |             loss_fct = nn.CrossEntropyLoss()
32 |             loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
33 |             neg_log_likelihood = loss.float() * seqlen
34 |             nlls.append(neg_log_likelihood)
35 | 
36 |             curr_ppl = _perplexity(nlls, i + 1, seqlen)
37 |             progress_bar.set_description(f"Perplexity {curr_ppl:.3f}")
38 | 
39 |     ppl = _perplexity(nlls, n_samples, seqlen)
40 | 
41 |     return ppl.item()
42 | 


--------------------------------------------------------------------------------
/optimum/onnx/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import TYPE_CHECKING
15 | 
16 | from transformers.utils import _LazyModule
17 | 
18 | 
19 | _import_structure = {
20 |     "graph_transformations": [
21 |         "cast_slice_nodes_inputs_to_int32",
22 |         "merge_decoders",
23 |         "remove_duplicate_weights",
24 |         "replace_atenops_to_gather",
25 |         "remove_duplicate_weights_from_tied_info",
26 |     ],
27 | }
28 | 
29 | if TYPE_CHECKING:
30 |     from .graph_transformations import (
31 |         cast_slice_nodes_inputs_to_int32,
32 |         merge_decoders,
33 |         remove_duplicate_weights,
34 |         remove_duplicate_weights_from_tied_info,
35 |         replace_atenops_to_gather,
36 |     )
37 | else:
38 |     import sys
39 | 
40 |     sys.modules[__name__] = _LazyModule(
41 |         __name__,
42 |         globals()["__file__"],
43 |         _import_structure,
44 |         module_spec=__spec__,
45 |     )
46 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/constants.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | ENCODER_ONNX_FILE_PATTERN = r"(.*)?encoder(.*)?\.onnx"
16 | DECODER_ONNX_FILE_PATTERN = r"(.*)?decoder((?!(with_past|merged)).)*?\.onnx"
17 | DECODER_WITH_PAST_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?with_past(.*)?\.onnx"
18 | DECODER_MERGED_ONNX_FILE_PATTERN = r"(.*)?decoder(.*)?merged(.*)?\.onnx"
19 | ONNX_FILE_PATTERN = r".*\.onnx
quot;
20 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | from .quantization import PreprocessorPass, QuantizationPreprocessor
16 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | from .excluders import ExcludeNodeAfter, ExcludeNodeFollowedBy
16 | from .gelu import ExcludeGeLUNodes
17 | from .layernorm import ExcludeLayerNormNodes
18 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/excluders.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import Set, Tuple
15 | 
16 | from onnx import ModelProto
17 | 
18 | from onnxruntime.transformers.onnx_model import OnnxModel
19 | 
20 | from .. import PreprocessorPass
21 | 
22 | 
23 | class ExcludeNodeFollowedBy(PreprocessorPass):
24 |     def __init__(self, operator_type_to_exclude: str, following_operator_type: str):
25 |         super().__init__()
26 | 
27 |         self.operator_type_to_exclude = operator_type_to_exclude
28 |         self.following_operator_type = following_operator_type
29 | 
30 |     def __call__(self, _: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
31 |         # Find out the nodes to exclude in the graph
32 |         candidate_nodes_to_exclude = {
33 |             candidate_output: candidate.name
34 |             for candidate in model.get_nodes_by_op_type(self.operator_type_to_exclude)
35 |             for candidate_output in candidate.output
36 |         }
37 | 
38 |         nodes_of_following_type = {
39 |             node_input: node.name
40 |             for node in model.get_nodes_by_op_type(self.following_operator_type)
41 |             for node_input in node.input
42 |         }
43 | 
44 |         # Intersection of both are the one we want to remove
45 |         to_exclude = set(candidate_nodes_to_exclude.keys()).intersection(nodes_of_following_type.keys())
46 |         nodes_to_exclude = {candidate_nodes_to_exclude[node] for node in to_exclude}
47 | 
48 |         return set(), nodes_to_exclude
49 | 
50 | 
51 | class ExcludeNodeAfter(PreprocessorPass):
52 |     def __init__(self, parent_operator_type: str, operator_type_to_exclude: str):
53 |         super().__init__()
54 | 
55 |         self.parent_operator_type = parent_operator_type
56 |         self.operator_type_to_exclude = operator_type_to_exclude
57 | 
58 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
59 |         # Find out the nodes to exclude in the graph
60 |         candidate_nodes_to_exclude = {
61 |             candidate_input: candidate.name
62 |             for candidate in model.get_nodes_by_op_type(self.operator_type_to_exclude)
63 |             for candidate_input in candidate.input
64 |         }
65 | 
66 |         parent_node = {
67 |             node_output: node.name
68 |             for node in model.get_nodes_by_op_type(self.parent_operator_type)
69 |             for node_output in node.output
70 |         }
71 | 
72 |         # Intersection of both are the one we want to remove
73 |         to_exclude = set(candidate_nodes_to_exclude.keys()).intersection(parent_node.keys())
74 |         nodes_to_exclude = {candidate_nodes_to_exclude[node] for node in to_exclude}
75 | 
76 |         return set(), nodes_to_exclude
77 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/fully_connected.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import Set, Tuple
15 | 
16 | from onnx import ModelProto
17 | 
18 | from onnxruntime.transformers.onnx_model import OnnxModel
19 | 
20 | from .. import PreprocessorPass
21 | 
22 | 
23 | class IncludeFullyConnectedNodes(PreprocessorPass):
24 |     def __init__(self):
25 |         super().__init__()
26 | 
27 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
28 |         fc_subgraphs = []
29 |         for add_node in model.get_nodes_by_op_type("Add"):
30 |             fc_components = model.match_parent_path(add_node, ["MatMul"], [1])
31 |             if fc_components is not None:
32 |                 fc_components.append(add_node)
33 |                 fc_subgraphs.append(fc_components)
34 |         fc_components = {node.name for fc in fc_subgraphs for node in fc}
35 |         return fc_components, set()
36 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/gelu.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import Set, Tuple
15 | 
16 | from onnx import ModelProto
17 | 
18 | from onnxruntime.transformers.onnx_model import OnnxModel
19 | 
20 | from .. import PreprocessorPass
21 | 
22 | 
23 | class ExcludeGeLUNodes(PreprocessorPass):
24 |     def __init__(self):
25 |         super().__init__()
26 | 
27 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
28 |         gelu_subgraphs = []
29 |         for mul_node in model.get_nodes_by_op_type("Mul"):
30 |             gelu_components = model.match_parent_path(mul_node, ["Mul", "Add", "Erf", "Div"], [0, 1, 0, 0])
31 | 
32 |             if gelu_components is not None:
33 |                 gelu_components.append(mul_node)
34 |                 gelu_subgraphs.append(gelu_components)
35 | 
36 |         gl_components = (node.name for gl in gelu_subgraphs for node in gl)
37 |         return set(), set(gl_components)
38 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/passes/layernorm.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from typing import Set, Tuple
15 | 
16 | from onnx import ModelProto
17 | 
18 | from onnxruntime.transformers.onnx_model import OnnxModel
19 | 
20 | from .. import PreprocessorPass
21 | 
22 | 
23 | class ExcludeLayerNormNodes(PreprocessorPass):
24 |     def __init__(self):
25 |         super().__init__()
26 | 
27 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Set[str], Set[str]]:
28 |         layer_norm_subgraphs = []
29 |         for add_node in model.get_nodes_by_op_type("Add"):
30 |             layer_norm_components = model.match_parent_path(
31 |                 add_node,
32 |                 ["Mul", "Div", "Sqrt", "Add", "ReduceMean", "Pow", "Sub", "ReduceMean"],
33 |                 [0, 0, 1, 0, 0, 0, 0, 1],
34 |             )
35 | 
36 |             if layer_norm_components is not None:
37 |                 layer_norm_components.append(add_node)
38 |                 layer_norm_subgraphs.append(layer_norm_components)
39 | 
40 |         ln_components = (node.name for ln in layer_norm_subgraphs for node in ln)
41 |         return set(), set(ln_components)
42 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/preprocessors/quantization.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | from abc import ABC, abstractmethod
15 | from logging import getLogger
16 | from os import PathLike
17 | from pathlib import Path
18 | from typing import Optional, Set, Tuple, Union
19 | 
20 | from onnx import ModelProto, load_model
21 | 
22 | from onnxruntime.transformers.onnx_model import OnnxModel
23 | 
24 | 
25 | LOGGER = getLogger("GraphWalker")
26 | 
27 | 
28 | class PreprocessorPass(ABC):
29 |     def __init__(self):
30 |         self._logger = LOGGER
31 | 
32 |     @abstractmethod
33 |     def __call__(self, graph: ModelProto, model: OnnxModel) -> Tuple[Optional[Set[str]], Optional[Set[str]]]:
34 |         raise NotImplementedError()
35 | 
36 | 
37 | class QuantizationPreprocessor:
38 |     __slots__ = ("_passes",)
39 | 
40 |     def __init__(self):
41 |         self._passes = []
42 | 
43 |     def from_config(self, config):
44 |         pass
45 | 
46 |     def register_pass(self, target: PreprocessorPass):
47 |         if target not in self._passes:
48 |             self._passes.append(target)
49 | 
50 |     def collect(self, model_or_path: Union[str, PathLike, Path, bytes]) -> Tuple[Set[str], Set[str]]:
51 |         global_nodes_to_quantize, global_nodes_to_exclude = set(), set()
52 |         graph = load_model(model_or_path.as_posix() if isinstance(model_or_path, Path) else model_or_path)
53 |         model = OnnxModel(graph)
54 | 
55 |         for walking_pass in self._passes:
56 |             nodes_to_quantize, nodes_to_exclude = walking_pass(graph, model)
57 | 
58 |             if nodes_to_quantize is not None:
59 |                 global_nodes_to_quantize.update(nodes_to_quantize)
60 | 
61 |             if nodes_to_exclude is not None:
62 |                 global_nodes_to_exclude.update(nodes_to_exclude)
63 | 
64 |         # Exclude the nodes from quantization when present in both sets
65 |         global_nodes_to_quantize = global_nodes_to_quantize - global_nodes_to_exclude
66 | 
67 |         return global_nodes_to_quantize, global_nodes_to_exclude
68 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/runs/utils.py:
--------------------------------------------------------------------------------
 1 | from ..modeling_decoder import ORTModelForCausalLM
 2 | from ..modeling_ort import (
 3 |     ORTModelForFeatureExtraction,
 4 |     ORTModelForImageClassification,
 5 |     ORTModelForQuestionAnswering,
 6 |     ORTModelForSequenceClassification,
 7 |     ORTModelForTokenClassification,
 8 | )
 9 | 
10 | 
11 | task_ortmodel_map = {
12 |     "text-generation": ORTModelForCausalLM,
13 |     "feature-extraction": ORTModelForFeatureExtraction,
14 |     "image-classification": ORTModelForImageClassification,
15 |     "question-answering": ORTModelForQuestionAnswering,
16 |     "text-classification": ORTModelForSequenceClassification,
17 |     "token-classification": ORTModelForTokenClassification,
18 | }
19 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/subpackage/__init__.py:
--------------------------------------------------------------------------------
1 | from .commands import ONNXRuntimeCommand
2 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/subpackage/commands/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .base import ONNXRuntimeCommand
17 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/subpackage/commands/base.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """optimum.onnxruntime command-line interface base classes."""
16 | 
17 | from optimum.commands import BaseOptimumCLICommand, CommandInfo, optimum_cli_subcommand
18 | 
19 | from .optimize import ONNXRuntimeOptimizeCommand
20 | from .quantize import ONNXRuntimeQuantizeCommand
21 | 
22 | 
23 | @optimum_cli_subcommand()
24 | class ONNXRuntimeCommand(BaseOptimumCLICommand):
25 |     COMMAND = CommandInfo(
26 |         name="onnxruntime",
27 |         help="ONNX Runtime optimize and quantize utilities.",
28 |     )
29 |     SUBCOMMANDS = (
30 |         CommandInfo(
31 |             name="optimize",
32 |             help="Optimize ONNX models.",
33 |             subcommand_class=ONNXRuntimeOptimizeCommand,
34 |         ),
35 |         CommandInfo(
36 |             name="quantize",
37 |             help="Dynammic quantization for ONNX models.",
38 |             subcommand_class=ONNXRuntimeQuantizeCommand,
39 |         ),
40 |     )
41 | 


--------------------------------------------------------------------------------
/optimum/onnxruntime/training_args_seq2seq.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2020 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from dataclasses import dataclass, field
16 | from typing import Optional
17 | 
18 | from transformers import Seq2SeqTrainingArguments
19 | 
20 | from .training_args import ORTTrainingArguments
21 | 
22 | 
23 | @dataclass
24 | class ORTSeq2SeqTrainingArguments(Seq2SeqTrainingArguments, ORTTrainingArguments):
25 |     """
26 |     Parameters:
27 |         optim (`str` or [`training_args.ORTOptimizerNames`] or [`transformers.training_args.OptimizerNames`], *optional*, defaults to `"adamw_hf"`):
28 |             The optimizer to use, including optimizers in Transformers: adamw_hf, adamw_torch, adamw_apex_fused, or adafactor. And optimizers implemented by ONNX Runtime: adamw_ort_fused.
29 |     """
30 | 
31 |     optim: Optional[str] = field(
32 |         default="adamw_hf",
33 |         metadata={"help": "The optimizer to use."},
34 |     )
35 | 


--------------------------------------------------------------------------------
/optimum/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .pipelines_base import (
17 |     MAPPING_LOADING_FUNC,
18 |     ORT_SUPPORTED_TASKS,
19 |     load_bettertransformer,
20 |     load_ort_pipeline,
21 |     pipeline,
22 | )
23 | 


--------------------------------------------------------------------------------
/optimum/quantization_base.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from abc import ABC, abstractmethod
 3 | from pathlib import Path
 4 | from typing import Optional, Union
 5 | 
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class OptimumQuantizer(ABC):
11 |     @classmethod
12 |     def from_pretrained(
13 |         cls,
14 |         model_or_path: Union[str, Path],
15 |         file_name: Optional[str] = None,
16 |     ):
17 |         """Overwrite this method in subclass to define how to load your model from pretrained"""
18 |         raise NotImplementedError(
19 |             "Overwrite this method in subclass to define how to load your model from pretrained for quantization"
20 |         )
21 | 
22 |     @abstractmethod
23 |     def quantize(self, save_dir: Union[str, Path], file_prefix: Optional[str] = None, **kwargs):
24 |         """Overwrite this method in subclass to define how to quantize your model for quantization"""
25 |         raise NotImplementedError(
26 |             "Overwrite this method in subclass to define how to quantize your model for quantization"
27 |         )
28 | 


--------------------------------------------------------------------------------
/optimum/subpackages.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import importlib
17 | import logging
18 | import sys
19 | 
20 | 
21 | if sys.version_info >= (3, 8):
22 |     from importlib import metadata as importlib_metadata
23 | else:
24 |     import importlib_metadata
25 | from importlib.util import find_spec, module_from_spec
26 | 
27 | from .utils import is_onnxruntime_available
28 | 
29 | 
30 | logger = logging.getLogger(__name__)
31 | 
32 | 
33 | def load_namespace_modules(namespace: str, module: str):
34 |     """Load modules with a specific name inside a namespace
35 | 
36 |     This method operates on namespace packages:
37 |     https://packaging.python.org/en/latest/guides/packaging-namespace-packages/
38 | 
39 |     For each package inside the specified `namespace`, it looks for the specified `module` and loads it.
40 | 
41 |     Args:
42 |         namespace (`str`):
43 |             The namespace containing modules to be loaded.
44 |         module (`str`):
45 |             The name of the module to load in each namespace package.
46 |     """
47 |     for dist in importlib_metadata.distributions():
48 |         dist_name = dist.metadata["Name"]
49 |         if dist_name is None:
50 |             continue
51 |         if dist_name == f"{namespace}-benchmark":
52 |             continue
53 |         if not dist_name.startswith(f"{namespace}-"):
54 |             continue
55 |         package_import_name = dist_name.replace("-", ".")
56 |         module_import_name = f"{package_import_name}.{module}"
57 |         if module_import_name in sys.modules:
58 |             # Module already loaded
59 |             continue
60 |         backend_spec = find_spec(module_import_name)
61 |         if backend_spec is None:
62 |             continue
63 |         try:
64 |             imported_module = module_from_spec(backend_spec)
65 |             sys.modules[module_import_name] = imported_module
66 |             backend_spec.loader.exec_module(imported_module)
67 |             logger.debug(f"Successfully loaded {module_import_name}")
68 |         except Exception as e:
69 |             logger.error(f"An exception occured while loading {module_import_name}: {e}.")
70 | 
71 | 
72 | def load_subpackages():
73 |     """Load optimum subpackages
74 | 
75 |     This method goes through packages inside the `optimum` namespace and loads the `subpackage` module if it exists.
76 | 
77 |     This module is then in charge of registering the subpackage commands.
78 |     """
79 |     SUBPACKAGE_LOADER = "subpackage"
80 |     load_namespace_modules("optimum", SUBPACKAGE_LOADER)
81 | 
82 |     # Load subpackages from internal modules not explicitly defined as namespace packages
83 |     loader_name = "." + SUBPACKAGE_LOADER
84 |     if is_onnxruntime_available():
85 |         importlib.import_module(loader_name, package="optimum.onnxruntime")
86 | 


--------------------------------------------------------------------------------
/optimum/utils/constant.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | CONFIG_NAME = "config.json"
17 | ONNX_WEIGHTS_NAME = "model.onnx"
18 | 
19 | DIFFUSION_MODEL_UNET_SUBFOLDER = "unet"
20 | DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer"
21 | DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER = "vae_decoder"
22 | DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER = "vae_encoder"
23 | DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder"
24 | DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER = "text_encoder_2"
25 | DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3"
26 | DIFFUSION_PIPELINE_CONFIG_FILE_NAME = "model_index.json"
27 | DIFFUSION_MODEL_CONFIG_FILE_NAME = "config.json"
28 | DIFFUSION_MODEL_ONNX_FILE_NAME = "model.onnx"
29 | 


--------------------------------------------------------------------------------
/optimum/utils/doc.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | from dataclasses import fields
17 | 
18 | 
19 | def generate_doc_dataclass(cls) -> str:
20 |     """Class decorator for generate the documentation for dataclass."""
21 |     doc = "\f\nAttributes:\n"
22 |     for attribute in fields(cls):
23 |         doc += f"   {attribute.name}"  # attribute name
24 | 
25 |         # whether optional
26 |         attribute_type = str(attribute.type)
27 |         if attribute_type.startswith("typing.Optional"):
28 |             optional = True
29 |             type_display = attribute_type[attribute_type.find("[") + 1 : -1]
30 |             type_display = type_display.split(".")[-1]
31 |         else:
32 |             optional = False
33 | 
34 |             if attribute_type.startswith("typing"):
35 |                 type_display = attribute_type.split(".")[-1]
36 |             else:
37 |                 type_display = attribute.type.__name__
38 | 
39 |         if optional:
40 |             doc += f" (`{type_display}`, *optional*): "
41 |         else:
42 |             doc += f" (`{type_display}`): "
43 | 
44 |         doc += f"{attribute.metadata['description']}\n"  # argument description
45 |     cls.__doc__ = (cls.__doc__ if cls.__doc__ is not None else "") + "\n\n" + "".join(doc)
46 |     return cls
47 | 
48 | 
49 | def add_dynamic_docstring(
50 |     *docstr,
51 |     text,
52 |     dynamic_elements,
53 | ):
54 |     def docstring_decorator(fn):
55 |         func_doc = (fn.__doc__ or "") + "".join(docstr)
56 |         fn.__doc__ = func_doc + text.format(**dynamic_elements)
57 |         return fn
58 | 
59 |     return docstring_decorator
60 | 


--------------------------------------------------------------------------------
/optimum/utils/dummy_bettertransformer_objects.py:
--------------------------------------------------------------------------------
1 | from .import_utils import DummyObject, requires_backends
2 | 
3 | 
4 | class BarkSelfAttention(metaclass=DummyObject):
5 |     _backends = ["transformers_431"]
6 | 
7 |     def __init__(self, *args, **kwargs):
8 |         requires_backends(self, ["transformers_431"])
9 | 


--------------------------------------------------------------------------------
/optimum/utils/modeling_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 The HuggingFace Inc. team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import functools
16 | 
17 | 
18 | MODEL_TO_PATCH_FOR_PAST = {
19 |     "bart",
20 |     "blenderbot",
21 |     "blenderbot-small",
22 |     "bloom",
23 |     "llama",
24 |     "mistral",
25 |     "mpt",
26 |     "opt",
27 |     "pegasus",
28 | }
29 | 
30 | 
31 | def recurse_getattr(obj, attr: str):
32 |     """
33 |     Recursive `getattr`.
34 | 
35 |     Args:
36 |         obj:
37 |             A class instance holding the attribute.
38 |         attr (`str`):
39 |             The attribute that is to be retrieved, e.g. 'attribute1.attribute2'.
40 |     """
41 | 
42 |     def _getattr(obj, attr):
43 |         return getattr(obj, attr)
44 | 
45 |     return functools.reduce(_getattr, [obj] + attr.split("."))
46 | 
47 | 
48 | def recurse_setattr(module, name, value):
49 |     """A function to recursively set attributes to a module."""
50 |     if "." not in name:
51 |         setattr(module, name, value)
52 |     else:
53 |         name, rest = name.split(".", 1)
54 |         recurse_setattr(getattr(module, name), rest, value)
55 | 


--------------------------------------------------------------------------------
/optimum/utils/preprocessing/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | from .base import Preprocessor, TaskProcessor
17 | from .image_classification import ImageClassificationProcessing
18 | from .question_answering import QuestionAnsweringProcessing
19 | from .task_processors_manager import TaskProcessorsManager
20 | from .text_classification import TextClassificationProcessing
21 | from .token_classification import TokenClassificationProcessing
22 | 


--------------------------------------------------------------------------------
/optimum/utils/preprocessing/task_processors_manager.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Dataset processing factory."""
16 | 
17 | from typing import TYPE_CHECKING, Any, Type
18 | 
19 | from optimum.utils.preprocessing.image_classification import ImageClassificationProcessing
20 | from optimum.utils.preprocessing.question_answering import QuestionAnsweringProcessing
21 | from optimum.utils.preprocessing.text_classification import TextClassificationProcessing
22 | from optimum.utils.preprocessing.token_classification import TokenClassificationProcessing
23 | 
24 | 
25 | if TYPE_CHECKING:
26 |     from .base import TaskProcessor
27 | 
28 | 
29 | class TaskProcessorsManager:
30 |     _TASK_TO_DATASET_PROCESSING_CLASS = {
31 |         "text-classification": TextClassificationProcessing,
32 |         "token-classification": TokenClassificationProcessing,
33 |         "question-answering": QuestionAnsweringProcessing,
34 |         "image-classification": ImageClassificationProcessing,
35 |     }
36 | 
37 |     @classmethod
38 |     def get_task_processor_class_for_task(cls, task: str) -> Type["TaskProcessor"]:
39 |         if task not in cls._TASK_TO_DATASET_PROCESSING_CLASS:
40 |             supported_tasks = ", ".join(cls._TASK_TO_DATASET_PROCESSING_CLASS.keys())
41 |             raise KeyError(
42 |                 f"Could not find a `TaskProcessor` class for the task called {task}, supported tasks: "
43 |                 f"{supported_tasks}."
44 |             )
45 |         return cls._TASK_TO_DATASET_PROCESSING_CLASS[task]
46 | 
47 |     @classmethod
48 |     def for_task(cls, task: str, *dataset_processing_args, **dataset_processing_kwargs: Any) -> "TaskProcessor":
49 |         return cls.get_task_processor_class_for_task(task)(*dataset_processing_args, **dataset_processing_kwargs)
50 | 


--------------------------------------------------------------------------------
/optimum/utils/save_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | """Utilities related to saving files."""
16 | 
17 | import logging
18 | from pathlib import Path
19 | from typing import List, Union
20 | 
21 | from transformers import AutoFeatureExtractor, AutoImageProcessor, AutoProcessor, AutoTokenizer
22 | 
23 | 
24 | logger = logging.getLogger(__name__)
25 | 
26 | 
27 | def maybe_load_preprocessors(
28 |     src_name_or_path: Union[str, Path], subfolder: str = "", trust_remote_code: bool = False
29 | ) -> List:
30 |     preprocessors = []
31 |     try:
32 |         preprocessors.append(
33 |             AutoTokenizer.from_pretrained(src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code)
34 |         )
35 |     except Exception:
36 |         pass
37 | 
38 |     try:
39 |         preprocessors.append(
40 |             AutoProcessor.from_pretrained(src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code)
41 |         )
42 |     except Exception:
43 |         pass
44 | 
45 |     try:
46 |         preprocessors.append(
47 |             AutoFeatureExtractor.from_pretrained(
48 |                 src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
49 |             )
50 |         )
51 |     except Exception:
52 |         pass
53 | 
54 |     try:
55 |         preprocessors.append(
56 |             AutoImageProcessor.from_pretrained(
57 |                 src_name_or_path, subfolder=subfolder, trust_remote_code=trust_remote_code
58 |             )
59 |         )
60 |     except Exception:
61 |         pass
62 |     return preprocessors
63 | 
64 | 
65 | def maybe_save_preprocessors(
66 |     src_name_or_path: Union[str, Path],
67 |     dest_dir: Union[str, Path],
68 |     src_subfolder: str = "",
69 |     trust_remote_code: bool = False,
70 | ):
71 |     """
72 |     Saves the tokenizer, the processor and the feature extractor when found in `src_dir` in `dest_dir`.
73 | 
74 |     Args:
75 |         src_dir (`Union[str, Path]`):
76 |             The source directory from which to copy the files.
77 |         dest_dir (`Union[str, Path]`):
78 |             The destination directory to copy the files to.
79 |         src_subfolder (`str`, defaults to `""`):
80 |             In case the preprocessor files are located inside a subfolder of the model directory / repo on the Hugging
81 |             Face Hub, you can specify the subfolder name here.
82 |         trust_remote_code (`bool`, defaults to `False`):
83 |             Whether to allow to save preprocessors that is allowed to run arbitrary code. Use this option at your own risk.
84 |     """
85 |     if not isinstance(dest_dir, Path):
86 |         dest_dir = Path(dest_dir)
87 | 
88 |     dest_dir.mkdir(exist_ok=True)
89 |     for preprocessor in maybe_load_preprocessors(
90 |         src_name_or_path, subfolder=src_subfolder, trust_remote_code=trust_remote_code
91 |     ):
92 |         preprocessor.save_pretrained(dest_dir)
93 | 


--------------------------------------------------------------------------------
/optimum/version.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | __version__ = "1.27.0.dev0"
16 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2021 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | [tool.black]
16 | line-length = 119
17 | target-version = ['py37']
18 | 
19 | [tool.ruff]
20 | # Never enforce `E501` (line length violations).
21 | ignore = ["C901", "E501", "E741", "W605"]
22 | select = ["C", "E", "F", "I", "W"]
23 | line-length = 119
24 | 
25 | # Ignore import violations in all `__init__.py` files.
26 | [tool.ruff.per-file-ignores]
27 | "__init__.py" = ["E402", "F401", "F403", "F811"]
28 | 
29 | [tool.ruff.isort]
30 | lines-after-imports = 2
31 | known-first-party = ["optimum"]
32 | 
33 | [tool.pytest.ini_options]
34 | markers = [
35 |     "gpu_test",
36 |     "cuda_ep_test",
37 |     "trt_ep_test",
38 |     "rocm_ep_test",
39 |     "tensorflow_test",
40 |     "datasets_test",
41 |     "run_in_series",
42 |     "run_slow",
43 |     "accelerate_test",
44 |     "fp16",
45 |     "quantization",
46 | ]
47 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = optimum
 7 | line_length = 119
 8 | lines_after_imports = 2
 9 | multi_line_output = 3
10 | use_parentheses = True
11 | 
12 | [flake8]
13 | ignore = E203, E501, E741, W503, W605
14 | max-line-length = 119
15 | 
16 | [tool:pytest]
17 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
18 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | # Helpful tips for testing & debugging optimum
 2 | 
 3 | ## VSCODE
 4 | 
 5 | If you are using vscode you might have hard time discovering the test for the "testing" menu to run tests individually or debug them. You can copy the snippet below into `.vscode/settings.json`. 
 6 | 
 7 | ```json
 8 | {
 9 |   "python.testing.pytestArgs": [
10 |       "tests/onnxruntime",
11 |       "tests/test_*"
12 |   ],
13 |   "python.testing.unittestEnabled": false,
14 |   "python.testing.pytestEnabled": true
15 | }
16 | ```
17 | 
18 | This snippet will discover all base tests and the tests inside the `tests/onnxruntime` folder.
19 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/optimum/9564248ea95926cfa25381447d08774420ff20a2/tests/__init__.py


--------------------------------------------------------------------------------
/tests/assets/onnx/config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "_name_or_path": "tiny-distilbert-classification",
 3 |   "activation": "gelu",
 4 |   "architectures": [
 5 |     "DistilBertForSequenceClassification"
 6 |   ],
 7 |   "attention_dropout": 0.1,
 8 |   "dim": 2,
 9 |   "dropout": 0.1,
10 |   "finetuning_task": "sst-2",
11 |   "hidden_dim": 2,
12 |   "id2label": {
13 |     "0": "NEGATIVE",
14 |     "1": "POSITIVE"
15 |   },
16 |   "initializer_range": 0.02,
17 |   "label2id": {
18 |     "NEGATIVE": 0,
19 |     "POSITIVE": 1
20 |   },
21 |   "max_position_embeddings": 512,
22 |   "model_type": "distilbert",
23 |   "n_heads": 2,
24 |   "n_layers": 2,
25 |   "output_past": true,
26 |   "pad_token_id": 0,
27 |   "qa_dropout": 0.1,
28 |   "seq_classif_dropout": 0.2,
29 |   "sinusoidal_pos_embds": false,
30 |   "tie_weights_": true,
31 |   "torch_dtype": "float32",
32 |   "transformers_version": "4.10.0.dev0",
33 |   "vocab_size": 30522
34 | }
35 | 


--------------------------------------------------------------------------------
/tests/assets/onnx/model.onnx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/optimum/9564248ea95926cfa25381447d08774420ff20a2/tests/assets/onnx/model.onnx


--------------------------------------------------------------------------------
/tests/bettertransformer/Dockerfile_bettertransformer_gpu:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
 2 | CMD nvidia-smi
 3 | 
 4 | # Ignore interactive questions during `docker build`
 5 | ENV DEBIAN_FRONTEND noninteractive
 6 | 
 7 | # Install and update tools to minimize security vulnerabilities
 8 | RUN apt-get update
 9 | RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
10 |     bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev python3-pip && \
11 |     apt-get clean
12 | RUN unattended-upgrade
13 | RUN apt-get autoremove -y
14 | 
15 | RUN python3 -m pip install -U pip
16 | 
17 | RUN pip install torch torchvision torchaudio
18 | RUN pip install transformers==4.48.* accelerate datasets
19 | 
20 | # Install Optimum
21 | COPY . /workspace/optimum
22 | RUN pip install /workspace/optimum[tests]
23 | 
24 | ENV RUN_SLOW=1
25 | WORKDIR /workspace/optimum/tests/
26 | CMD pytest bettertransformer/test_*.py -s --durations=0 -m gpu_test
27 | 


--------------------------------------------------------------------------------
/tests/cli/cli_with_custom_command.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | 
18 | from optimum.commands import BaseOptimumCLICommand, CommandInfo, ExportCommand
19 | 
20 | 
21 | class MyCustomCommand(BaseOptimumCLICommand):
22 |     COMMAND = CommandInfo(name="blablabla", help="Says something.")
23 | 
24 |     def run(self):
25 |         print("If the CI can read this, it means it worked!")
26 | 
27 | 
28 | parent_command_cls = os.environ.get("TEST_REGISTER_COMMAND_WITH_SUBCOMMAND", None)
29 | 
30 | if parent_command_cls == "true":
31 |     REGISTER_COMMANDS = [
32 |         (MyCustomCommand, ExportCommand),
33 |     ]
34 | else:
35 |     REGISTER_COMMANDS = [
36 |         MyCustomCommand,
37 |     ]
38 | 


--------------------------------------------------------------------------------
/tests/common/test_configuration_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2021 HuggingFace Inc.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import tempfile
16 | import unittest
17 | 
18 | from huggingface_hub import login
19 | from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test
20 | 
21 | from optimum.configuration_utils import BaseConfig
22 | 
23 | 
24 | class FakeConfig(BaseConfig):
25 |     CONFIG_NAME = "fake_config.json"
26 |     FULL_CONFIGURATION_FILE = "fake_config.json"
27 | 
28 |     def __init__(self, attribute=1, **kwargs):
29 |         self.attribute = attribute
30 |         super().__init__(**kwargs)
31 | 
32 | 
33 | class ConfigTester(unittest.TestCase):
34 |     def test_create_and_test_config_from_and_save_pretrained(self):
35 |         config_first = FakeConfig(attribute=10)
36 | 
37 |         with tempfile.TemporaryDirectory() as tmpdirname:
38 |             config_first.save_pretrained(tmpdirname)
39 |             config_second = FakeConfig.from_pretrained(tmpdirname)
40 | 
41 |         self.assertEqual(config_second.to_dict(), config_first.to_dict())
42 | 
43 | 
44 | @is_staging_test
45 | class ConfigPushToHubTester(unittest.TestCase):
46 |     @classmethod
47 |     def setUpClass(cls):
48 |         login(token=TOKEN)
49 | 
50 |     def test_push_to_hub(self):
51 |         config = FakeConfig(attribute=15)
52 | 
53 |         with TemporaryHubRepo(token=TOKEN) as tmp_repo:
54 |             config.push_to_hub(tmp_repo.repo_id, token=TOKEN)
55 | 
56 |             new_config = FakeConfig.from_pretrained(tmp_repo.repo_id, token=TOKEN)
57 |             for k, v in config.to_dict().items():
58 |                 if k != "optimum_version" and k != "transformers_version":
59 |                     self.assertEqual(v, getattr(new_config, k))
60 | 
61 |     def test_push_to_hub_in_organization(self):
62 |         config = FakeConfig(attribute=15)
63 | 
64 |         with TemporaryHubRepo(namespace="valid_org", token=TOKEN) as tmp_repo:
65 |             config.push_to_hub(tmp_repo.repo_id, token=TOKEN)
66 |             new_config = FakeConfig.from_pretrained(tmp_repo.repo_id, token=TOKEN)
67 |             for k, v in config.to_dict().items():
68 |                 if k != "optimum_version" and k != "transformers_version":
69 |                     self.assertEqual(v, getattr(new_config, k))
70 | 


--------------------------------------------------------------------------------
/tests/exporters/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/optimum/9564248ea95926cfa25381447d08774420ff20a2/tests/exporters/__init__.py


--------------------------------------------------------------------------------
/tests/exporters/onnx/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/optimum/9564248ea95926cfa25381447d08774420ff20a2/tests/exporters/onnx/__init__.py


--------------------------------------------------------------------------------
/tests/exporters/tflite/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huggingface/optimum/9564248ea95926cfa25381447d08774420ff20a2/tests/exporters/tflite/__init__.py


--------------------------------------------------------------------------------
/tests/fx/parallelization/dist_utils.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import os
16 | from typing import Callable, List, Optional
17 | 
18 | import torch
19 | import torch.distributed as dist
20 | import torch.multiprocessing as mp
21 | from transformers import set_seed
22 | 
23 | 
24 | SEED = 42
25 | NUM_AVAILABLE_DEVICES = torch.cuda.device_count()
26 | 
27 | 
28 | def dist_init(
29 |     rank: int,
30 |     world_size: int,
31 |     backend: str = "nccl",
32 |     master_addr: str = "127.0.0.1",
33 |     master_port: str = "29501",
34 | ):
35 |     os.environ["RANK"] = str(rank)
36 |     os.environ["WORLD_SIZE"] = str(world_size)
37 |     os.environ["MASTER_ADDR"] = master_addr
38 |     os.environ["MASTER_PORT"] = master_port
39 | 
40 |     dist.init_process_group(
41 |         backend=backend,
42 |         init_method="env://",
43 |         world_size=world_size,
44 |         rank=rank,
45 |     )
46 | 
47 |     torch.cuda.set_device(rank)
48 | 
49 | 
50 | def runner(rank: int, fn: Callable, deterministic: bool, *args, **kwargs):
51 |     if deterministic:
52 |         set_seed(SEED)
53 |     fn(rank, *args, **kwargs)
54 | 
55 | 
56 | def spawn(world_size: int, fn: Callable, *args, deterministic: bool = False):
57 |     mp.spawn(fn=runner, args=(fn, deterministic, world_size, *args), nprocs=world_size, join=True)
58 | 
59 | 
60 | def tearDown(group: Optional[dist.ProcessGroup] = None):
61 |     dist.destroy_process_group(group)
62 | 
63 | 
64 | def gather_at_main_process(
65 |     tensor: torch.Tensor, group: dist.ProcessGroup, rank: int, world_size: int
66 | ) -> List[torch.Tensor]:
67 |     if world_size == 1:
68 |         return [tensor]
69 | 
70 |     tensor = tensor.contiguous()
71 |     if rank == 0:
72 |         tensors = [torch.empty_like(tensor) for _ in range(world_size)]
73 |         tensors[rank] = tensor
74 |     else:
75 |         tensors = None
76 |     dist.gather(tensor=tensor, gather_list=tensors, dst=0, group=group)
77 |     return tensors
78 | 


--------------------------------------------------------------------------------
/tests/onnx/test_onnx_export_custom_module.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import os
16 | from unittest import TestCase
17 | 
18 | import torch
19 | from transformers.models.sew_d import modeling_sew_d
20 | 
21 | 
22 | class StableDropoutTestCase(TestCase):
23 |     """Tests export of StableDropout module."""
24 | 
25 |     def test_training(self):
26 |         """Tests export of StableDropout in training mode."""
27 | 
28 |         devnull = open(os.devnull, "wb")
29 |         # drop_prob must be > 0 for the test to be meaningful
30 |         sd = modeling_sew_d.StableDropout(0.1)
31 |         # Avoid warnings in training mode
32 |         do_constant_folding = False
33 |         # Dropout is a no-op in inference mode
34 |         training = torch.onnx.TrainingMode.PRESERVE
35 |         input = (torch.randn(2, 2),)
36 | 
37 |         # Expected to pass on torch >= 2.5
38 |         torch.onnx.export(
39 |             sd,
40 |             input,
41 |             devnull,
42 |             opset_version=12,
43 |             do_constant_folding=do_constant_folding,
44 |             training=training,
45 |         )
46 | 
47 |         devnull.close()
48 | 
49 |     def test_inference(self):
50 |         """Tests export of StableDropout in inference mode."""
51 | 
52 |         devnull = open(os.devnull, "wb")
53 |         # drop_prob must be > 0 for the test to be meaningful
54 |         sd = modeling_sew_d.StableDropout(0.1)
55 |         # Dropout is a no-op in inference mode
56 |         training = torch.onnx.TrainingMode.EVAL
57 |         input = (torch.randn(2, 2),)
58 | 
59 |         # Expected to pass on torch >= 2.5
60 |         torch.onnx.export(
61 |             sd,
62 |             input,
63 |             devnull,
64 |             opset_version=12,
65 |             do_constant_folding=True,
66 |             training=training,
67 |         )
68 | 
69 |         devnull.close()
70 | 


--------------------------------------------------------------------------------
/tests/onnxruntime-training/ds_configs/ds_config_zero_stage_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 |      "fp16": {
 3 |             "enabled": "auto",
 4 |             "loss_scale": 0,
 5 |             "loss_scale_window": 1000,
 6 |             "hysteresis": 2,
 7 |             "min_loss_scale": 1
 8 |     },
 9 | 
10 |     "bf16": {
11 |         "enabled": "auto"
12 |     },
13 | 
14 |     "zero_optimization": {
15 |             "stage": 1,
16 |             "allgather_partitions": true,
17 |             "allgather_bucket_size": 2e8,
18 |             "overlap_comm": true,
19 |             "reduce_scatter": true,
20 |             "reduce_bucket_size": 2e8,
21 |             "contiguous_gradients": false,
22 |             "cpu_offload": false
23 |     },
24 | 
25 |     "zero_allow_untested_optimizer": true,
26 | 
27 |     "optimizer": {
28 |             "type": "AdamW",
29 |             "params": {
30 |                     "lr": "auto",
31 |                     "betas": "auto",
32 |                     "eps": "auto",
33 |                     "weight_decay": "auto"
34 |             }
35 |     },
36 | 
37 |     "scheduler": {
38 |             "type": "WarmupLR",
39 |             "params": {
40 |                     "warmup_min_lr": "auto",
41 |                     "warmup_max_lr": "auto",
42 |                     "warmup_num_steps": "auto"
43 |             }
44 |     },
45 | 
46 |     "steps_per_print": 2000,
47 |     "train_batch_size": "auto",
48 |     "train_micro_batch_size_per_gpu": "auto",
49 |     "wall_clock_breakdown": false
50 | }


--------------------------------------------------------------------------------
/tests/onnxruntime-training/ds_configs/ds_config_zero_stage_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "bf16": {
 3 |         "enabled": "auto"
 4 |     },
 5 | 
 6 |     "fp16": {
 7 |         "enabled": "auto",
 8 |         "loss_scale": 0,
 9 |         "loss_scale_window": 1000,
10 |         "initial_scale_power": 16,
11 |         "hysteresis": 2,
12 |         "min_loss_scale": 1
13 |     },
14 | 
15 |     "optimizer": {
16 |         "type": "AdamW",
17 |         "params": {
18 |                 "lr": "auto",
19 |                 "betas": "auto",
20 |                 "eps": "auto",
21 |                 "weight_decay": "auto"
22 |         }
23 |     },
24 | 
25 |     "scheduler": {
26 |             "type": "WarmupLR",
27 |             "params": {
28 |                     "warmup_min_lr": "auto",
29 |                     "warmup_max_lr": "auto",
30 |                     "warmup_num_steps": "auto"
31 |             }
32 |     },
33 | 
34 |     "zero_optimization": {
35 |        "stage": 2,
36 |        "offload_optimizer": {
37 |            "device": "cpu",
38 |            "pin_memory": true
39 |        },
40 |        "allgather_partitions": true,
41 |        "allgather_bucket_size": 2e8,
42 |        "overlap_comm": true,
43 |        "reduce_scatter": true,
44 |        "reduce_bucket_size": 2e8,
45 |        "contiguous_gradients": true
46 |     },
47 | 
48 |     "gradient_accumulation_steps": "auto",
49 |     "gradient_clipping": "auto",
50 |     "steps_per_print": 2000,
51 |     "train_batch_size": "auto",
52 |     "train_micro_batch_size_per_gpu": "auto",
53 |     "wall_clock_breakdown": false
54 | }
55 | 


--------------------------------------------------------------------------------
/tests/onnxruntime-training/ds_configs/ds_config_zero_stage_3.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "fp16": {
 3 |         "enabled": "auto",
 4 |         "loss_scale": 0,
 5 |         "loss_scale_window": 1000,
 6 |         "initial_scale_power": 16,
 7 |         "hysteresis": 2,
 8 |         "min_loss_scale": 1
 9 |     },
10 | 
11 |     "optimizer": {
12 |         "type": "AdamW",
13 |         "params": {
14 |             "lr": "auto",
15 |             "betas": "auto",
16 |             "eps": "auto",
17 |             "weight_decay": "auto"
18 |         }
19 |     },
20 | 
21 |     "scheduler": {
22 |         "type": "WarmupLR",
23 |         "params": {
24 |             "warmup_min_lr": "auto",
25 |             "warmup_max_lr": "auto",
26 |             "warmup_num_steps": "auto"
27 |         }
28 |     },
29 | 
30 |     "zero_optimization": {
31 |         "stage": 3,
32 |         "offload_optimizer": {
33 |             "device": "cpu",
34 |             "pin_memory": true
35 |         },
36 |         "offload_param": {
37 |             "device": "cpu",
38 |             "pin_memory": true
39 |         },
40 |         "overlap_comm": true,
41 |         "contiguous_gradients": true,
42 |         "sub_group_size": 1e9,
43 |         "reduce_bucket_size": "auto",
44 |         "stage3_prefetch_bucket_size": "auto",
45 |         "stage3_param_persistence_threshold": "auto",
46 |         "stage3_max_live_parameters": 1e9,
47 |         "stage3_max_reuse_distance": 1e9,
48 |         "stage3_gather_16bit_weights_on_model_save": true
49 |     },
50 | 
51 |     "gradient_accumulation_steps": "auto",
52 |     "gradient_clipping": "auto",
53 |     "steps_per_print": 2000,
54 |     "train_batch_size": "auto",
55 |     "train_micro_batch_size_per_gpu": "auto",
56 |     "wall_clock_breakdown": false
57 | }


--------------------------------------------------------------------------------
/tests/onnxruntime-training/ds_configs/ds_config_zero_stage_inifinity.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "zero_optimization": {
 3 |         "stage": 3,
 4 |         "offload_optimizer": {
 5 |             "device": "nvme",
 6 |             "nvme_path": "/dev/nvme1n1",
 7 |             "pin_memory": true,
 8 |             "buffer_count": 4,
 9 |             "fast_init": false
10 |         },
11 |         "offload_param": {
12 |             "device": "nvme",
13 |             "nvme_path": "/dev/nvme1n1",
14 |             "pin_memory": true,
15 |             "buffer_count": 5,
16 |             "buffer_size": 1e8,
17 |             "max_in_cpu": 1e9
18 |         },
19 |         "aio": {
20 |             "block_size": 262144,
21 |             "queue_depth": 32,
22 |             "thread_count": 1,
23 |             "single_submit": false,
24 |             "overlap_events": true
25 |         },
26 |         "overlap_comm": true,
27 |         "contiguous_gradients": true,
28 |         "sub_group_size": 1e9,
29 |         "reduce_bucket_size": "auto",
30 |         "stage3_prefetch_bucket_size": "auto",
31 |         "stage3_param_persistence_threshold": "auto",
32 |         "stage3_max_live_parameters": 1e9,
33 |         "stage3_max_reuse_distance": 1e9,
34 |         "stage3_gather_16bit_weights_on_model_save": true
35 |     }
36 | }


--------------------------------------------------------------------------------
/tests/onnxruntime/test_timm.py:
--------------------------------------------------------------------------------
 1 | import gc
 2 | 
 3 | import onnxruntime
 4 | import requests
 5 | import timm
 6 | import torch
 7 | from parameterized import parameterized
 8 | from PIL import Image
 9 | from testing_utils import ORTModelTestMixin
10 | from transformers import PretrainedConfig
11 | 
12 | from optimum.onnxruntime import ORTModelForImageClassification
13 | 
14 | 
15 | class ORTModelForImageClassificationIntegrationTest(ORTModelTestMixin):
16 |     TIMM_SUPPORTED_MODELS = ["timm/inception_v3.tf_adv_in1k"]  # only one is required for testing
17 | 
18 |     @parameterized.expand(TIMM_SUPPORTED_MODELS)
19 |     def test_compare_to_timm(self, model_id):
20 |         onnx_model = ORTModelForImageClassification.from_pretrained(model_id)
21 |         self.assertIsInstance(onnx_model.model, onnxruntime.InferenceSession)
22 |         self.assertIsInstance(onnx_model.config, PretrainedConfig)
23 | 
24 |         timm_model = timm.create_model(model_id, pretrained=True)
25 |         timm_model = timm_model.eval()
26 | 
27 |         # get model specific transforms (normalization, resize)
28 |         data_config = timm.data.resolve_model_data_config(timm_model)
29 |         transforms = timm.data.create_transform(**data_config, is_training=False)
30 | 
31 |         url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png"
32 |         image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
33 |         inputs = transforms(image).unsqueeze(0)
34 | 
35 |         with torch.no_grad():
36 |             timm_outputs = timm_model(inputs)
37 | 
38 |         for input_type in ["pt", "np"]:
39 |             if input_type == "np":
40 |                 inputs = inputs.cpu().detach().numpy()
41 | 
42 |             onnx_outputs = onnx_model(inputs)
43 | 
44 |             self.assertIn("logits", onnx_outputs)
45 |             self.assertIsInstance(onnx_outputs.logits, self.TENSOR_ALIAS_TO_TYPE[input_type])
46 | 
47 |             # compare tensor outputs
48 |             torch.testing.assert_close(torch.Tensor(onnx_outputs.logits), timm_outputs, atol=self.ATOL, rtol=self.RTOL)
49 | 
50 |         gc.collect()
51 | 


--------------------------------------------------------------------------------
/tests/onnxruntime/test_utils.py:
--------------------------------------------------------------------------------
 1 | import tempfile
 2 | import unittest
 3 | 
 4 | import onnxruntime as ort
 5 | import torch
 6 | 
 7 | from optimum.onnxruntime.configuration import AutoQuantizationConfig, OptimizationConfig, ORTConfig
 8 | from optimum.onnxruntime.utils import get_device_for_provider, get_provider_for_device
 9 | 
10 | 
11 | class ProviderAndDeviceGettersTest(unittest.TestCase):
12 |     def test_get_device_for_provider(self):
13 |         self.assertEqual(get_device_for_provider("CPUExecutionProvider", provider_options={}), torch.device("cpu"))
14 |         self.assertEqual(
15 |             get_device_for_provider("CUDAExecutionProvider", provider_options={"device_id": 1}), torch.device("cuda:1")
16 |         )
17 | 
18 |     def test_get_provider_for_device(self):
19 |         self.assertEqual(get_provider_for_device(torch.device("cpu")), "CPUExecutionProvider")
20 | 
21 |         if "ROCMExecutionProvider" in ort.get_available_providers():
22 |             self.assertEqual(get_provider_for_device(torch.device("cuda")), "ROCMExecutionProvider")
23 |         else:
24 |             self.assertEqual(get_provider_for_device(torch.device("cuda")), "CUDAExecutionProvider")
25 | 
26 | 
27 | class ORTConfigTest(unittest.TestCase):
28 |     def test_save_and_load(self):
29 |         with tempfile.TemporaryDirectory() as tmp_dir:
30 |             quantization_config = AutoQuantizationConfig.arm64(is_static=False, per_channel=False)
31 |             optimization_config = OptimizationConfig(optimization_level=2)
32 |             ort_config = ORTConfig(opset=11, quantization=quantization_config, optimization=optimization_config)
33 |             ort_config.save_pretrained(tmp_dir)
34 |             loaded_ort_config = ORTConfig.from_pretrained(tmp_dir)
35 |             self.assertEqual(ort_config.to_dict(), loaded_ort_config.to_dict())
36 | 


--------------------------------------------------------------------------------
/tests/run_doctest.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # we use 4.48 for compatibility with bettertransformer
3 | pip install .[tests] transformers==4.48.* optuna
4 | python tests/utils/prepare_for_doc_test.py optimum docs
5 | pytest --verbose -s --doctest-modules $(cat tests/utils/documentation_tests.txt) --doctest-continue-on-failure --doctest-glob='*.mdx'
6 | python tests/utils/prepare_for_doc_test.py optimum docs --remove_new_line


--------------------------------------------------------------------------------