├── docs
    └── source
    │   ├── _toctree.yml
    │   ├── installation.mdx
    │   ├── index.mdx
    │   ├── quickstart.mdx
    │   └── guides
    │       ├── contribute.mdx
    │       └── export.mdx
├── .github
    └── workflows
    │   ├── upload_pr_documentation.yml
    │   ├── quality.yml
    │   ├── build_pr_documentation.yml
    │   ├── build_documentation.yml
    │   └── test_models.yml
├── tests
    ├── __init__.py
    └── models
    │   ├── __init__.py
    │   ├── test_modeling_gptneox.py
    │   ├── test_modeling_gpt2.py
    │   ├── test_modeling_starcoder2.py
    │   ├── test_modeling_phi.py
    │   ├── test_modeling_mistral.py
    │   ├── test_modeling_glm.py
    │   ├── test_modeling_granite.py
    │   ├── test_modeling_gptj.py
    │   ├── test_modeling_codegen.py
    │   ├── test_modeling_gptneoxjapanese.py
    │   ├── test_modeling_cvt.py
    │   ├── test_modeling_pvt.py
    │   ├── test_modeling_dit.py
    │   ├── test_modeling_focalnet.py
    │   ├── test_modeling_swin.py
    │   ├── test_modeling_deit.py
    │   ├── test_modeling_mobilevit.py
    │   ├── test_modeling_albert.py
    │   ├── test_modeling_roberta.py
    │   ├── test_modeling_mobilevit2.py
    │   ├── test_modeling_distilbert.py
    │   ├── test_modeling_efficientnet.py
    │   ├── test_modeling_vit.py
    │   ├── test_modeling_qwen3_embedding.py
    │   ├── test_modeling_smollm3.py
    │   ├── test_modeling_granite_speech.py
    │   ├── test_modeling_bert.py
    │   ├── test_modeling_qwen2.py
    │   ├── test_modeling_gemma.py
    │   ├── test_modeling_gemma2.py
    │   ├── test_modeling_whisper.py
    │   └── test_modeling_llama.py
├── optimum
    ├── executorch
    │   ├── version.py
    │   ├── passes
    │   │   └── remove_padding_idx_embedding_pass.py
    │   └── __init__.py
    ├── exporters
    │   └── executorch
    │   │   ├── recipes
    │   │       ├── __init__.py
    │   │       ├── cuda-windows.py
    │   │       ├── portable.py
    │   │       ├── xnnpack.py
    │   │       ├── metal.py
    │   │       ├── cuda.py
    │   │       └── coreml.py
    │   │   ├── tasks
    │   │       ├── __init__.py
    │   │       ├── image_classification.py
    │   │       ├── masked_lm.py
    │   │       ├── seq2seq_lm.py
    │   │       └── asr.py
    │   │   ├── __init__.py
    │   │   ├── task_registry.py
    │   │   ├── recipe_registry.py
    │   │   ├── README.md
    │   │   └── convert.py
    └── commands
    │   └── register
    │       └── register_export.py
├── Makefile
├── install_dev.py
├── pyproject.toml
├── .gitignore
└── CONTRIBUTING.MD


/docs/source/_toctree.yml:
--------------------------------------------------------------------------------
 1 | - sections:
 2 |   - local: index
 3 |     title: 🤗 Optimum ExecuTorch
 4 |   - local: installation
 5 |     title: Installation
 6 |   - local: quickstart
 7 |     title: Quickstart
 8 |   - sections:
 9 |     - local: guides/export
10 |       title: Export
11 |     - local: guides/contribute
12 |       title: Contribution
13 |     title: How-To Guides
14 |   title: Optimum ExecuTorch
15 |   isExpanded: true
16 | 


--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Upload PR Documentation
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Build PR Documentation"]
 6 |     types:
 7 |       - completed
 8 | 
 9 | jobs:
10 |   build:
11 |     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12 |     with:
13 |       package_name: optimum-executorch
14 |     secrets:
15 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16 |       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/tests/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 


--------------------------------------------------------------------------------
/optimum/executorch/version.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | __version__ = "0.2.0.dev0"
16 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipes/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import xnnpack
16 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import causal_lm, image_classification, masked_lm, multimodal_text_to_text, seq2seq_lm
16 | 


--------------------------------------------------------------------------------
/docs/source/installation.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Installation
14 | 
15 | 
16 | To install Optimum ExecuTorch, you can do:
17 | 
18 | ```bash
19 | git clone https://github.com/huggingface/optimum-executorch.git
20 | cd optimum-executorch
21 | pip install .
22 | ```
23 | 


--------------------------------------------------------------------------------
/optimum/commands/register/register_export.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from optimum.commands.export.base import ExportCommand
16 | from optimum.commands.export.executorch import ExecuTorchExportCommand
17 | 
18 | 
19 | REGISTER_COMMANDS = [(ExecuTorchExportCommand, ExportCommand)]
20 | 


--------------------------------------------------------------------------------
/.github/workflows/quality.yml:
--------------------------------------------------------------------------------
 1 | name: Code Quality
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 |       - v*-release
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   quality:
17 |     runs-on: ubuntu-22.04
18 | 
19 |     steps:
20 |       - name: Checkout code
21 |         uses: actions/checkout@v4
22 | 
23 |       - name: Setup Python
24 |         uses: actions/setup-python@v5
25 |         with:
26 |           python-version: 3.9
27 | 
28 |       - name: Install dependencies
29 |         run: |
30 |           pip install --upgrade pip
31 |           pip install "black~=23.1" "ruff==0.4.4"
32 | 
33 |       - name: Check style with black
34 |         run: |
35 |           black --check .
36 | 
37 |       - name: Check style with ruff
38 |         run: |
39 |           ruff check .


--------------------------------------------------------------------------------
/optimum/executorch/passes/remove_padding_idx_embedding_pass.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from executorch.exir.dialects._ops import ops as exir_ops
 3 | from executorch.exir.pass_base import ExportPass, PassResult
 4 | 
 5 | 
 6 | class RemovePaddingIdxEmbeddingPass(ExportPass):
 7 |     """
 8 |     An ExportPass that removes the `padding_idx` keyword argument
 9 |     from all aten.embedding.default operator calls.
10 |     """
11 | 
12 |     def __init__(self) -> None:
13 |         super().__init__()
14 | 
15 |     def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
16 |         for node in graph_module.graph.nodes:
17 |             if node.op == "call_function" and node.target == exir_ops.edge.aten.embedding.default:
18 |                 # node.args[2] is the padding_idx
19 |                 if len(node.args) == 3:
20 |                     node.args = (node.args[0], node.args[1])
21 |         graph_module.recompile()
22 |         return PassResult(graph_module, True)
23 | 


--------------------------------------------------------------------------------
/docs/source/index.mdx:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2025 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # 🤗 Optimum ExecuTorch
18 | 
19 | Optimum ExecuTorch enables efficient deployment of transformer models using Meta's ExecuTorch framework. It provides:
20 | 
21 | * 🔄 Easy conversion of Hugging Face models to ExecuTorch format
22 | 
23 | * ⚡ Optimized inference with hardware-specific optimizations
24 | 
25 | * 🤝 Seamless integration with Hugging Face Transformers
26 | 
27 | * Efficient deployment on various devices


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | SHELL := /bin/bash
15 | CURRENT_DIR = $(shell pwd)
16 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum-executorch.git
17 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL
18 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
19 | 
20 | .PHONY:	style test
21 | 
22 | # Run code quality checks
23 | style_check:
24 | 	black --check .
25 | 	ruff check .
26 | 
27 | style:
28 | 	black .
29 | 	ruff check . --fix
30 | 
31 | # Run tests for the library
32 | test:
33 | 	python -m pytest tests
34 | 
35 | # Utilities to release to PyPi
36 | build_dist_install_tools:
37 | 	pip install build
38 | 	pip install twine
39 | 
40 | build_dist:
41 | 	rm -rf build
42 | 	rm -rf dist
43 | 	python -m build
44 | 
45 | pypi_upload: build_dist
46 | 	python -m twine upload dist/*


--------------------------------------------------------------------------------
/optimum/executorch/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from transformers.utils import _LazyModule
18 | 
19 | 
20 | _import_structure = {
21 |     "modeling": [
22 |         "ExecuTorchModelForCausalLM",
23 |         "ExecuTorchModelForImageClassification",
24 |         "ExecuTorchModelForMaskedLM",
25 |         "ExecuTorchModelForSeq2SeqLM",
26 |         "ExecuTorchModelForSpeechSeq2Seq",
27 |         "ExecuTorchModelForMultiModalToText",
28 |     ],
29 | }
30 | 
31 | if TYPE_CHECKING:
32 |     from .modeling import (
33 |         ExecuTorchModelForCausalLM,
34 |         ExecuTorchModelForImageClassification,
35 |         ExecuTorchModelForMaskedLM,
36 |         ExecuTorchModelForMultiModalToText,
37 |         ExecuTorchModelForSeq2SeqLM,
38 |         ExecuTorchModelForSpeechSeq2Seq,
39 |     )
40 | else:
41 |     import sys
42 | 
43 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
44 | 


--------------------------------------------------------------------------------
/docs/source/quickstart.mdx:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2025 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Quickstart
18 | 
19 | ## Export
20 | 
21 | You can export your 🤗 Transformers models to ExecuTorch easily:
22 | 
23 | ```bash
24 | optimum-cli export executorch --model meta-llama/Llama-3.2-1B --recipe xnnpack --output_dir meta_llama3_2_1b_executorch
25 | ```
26 | 
27 | 
28 | ## Inference
29 | 
30 | To load a model and run inference, you can just replace your `AutoModelForCausalLM` class with the corresponding `ExecuTorchModelForCausalLM` class. You can also load a PyTorch checkpoint and convert it to ExecuTorch on-the-fly when loading your model.
31 | 
32 | ```diff
33 | - from transformers import AutoModelForCausalLM
34 | + from optimum.executorch import ExecuTorchModelForCausalLM
35 |   from transformers import AutoTokenizer
36 | 
37 |   model_id = "meta-llama/Llama-3.2-1B"
38 |   tokenizer = AutoTokenizer.from_pretrained(model_id)
39 | - model = AutoModelForCausalLM.from_pretrained(model_id)
40 | + model = ExecuTorchModelForCausalLM.from_pretrained(model_id)
41 | ```


--------------------------------------------------------------------------------
/optimum/exporters/executorch/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from transformers.utils import _LazyModule
18 | 
19 | 
20 | _import_structure = {
21 |     "convert": [
22 |         "export_to_executorch",
23 |     ],
24 |     "recipe_registry": [
25 |         "discover_recipes",
26 |         "register_recipe",
27 |     ],
28 |     "task_registry": [
29 |         "discover_tasks",
30 |         "register_task",
31 |     ],
32 |     "tasks": [
33 |         "causal_lm",
34 |         "seq2seq_lm",
35 |     ],
36 |     "recipes": [
37 |         "xnnpack",
38 |     ],
39 |     "utils": [
40 |         "save_config_to_constant_methods",
41 |     ],
42 |     "integrations": [
43 |         "Seq2SeqLMExportableModule",
44 |     ],
45 |     "__main__": ["main_export"],
46 | }
47 | 
48 | if TYPE_CHECKING:
49 |     from .__main__ import main_export
50 |     from .convert import export_to_executorch
51 | else:
52 |     import sys
53 | 
54 |     sys.modules[__name__] = _LazyModule(
55 |         __name__,
56 |         globals()["__file__"],
57 |         _import_structure,
58 |         module_spec=__spec__,
59 |     )
60 | 


--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build PR Documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 | 
 8 | concurrency:
 9 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
10 |   cancel-in-progress: true
11 | 
12 | jobs:
13 |   build_documentation:
14 |     runs-on: ubuntu-22.04
15 |     env:
16 |       COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
17 |       PR_NUMBER: ${{ github.event.number }}
18 |       EVENT_CONTEXT: ${{ toJSON(github.event) }}
19 |       PR_CLONE_URL: ${{ github.event.pull_request.head.repo.clone_url }}
20 | 
21 |     steps:
22 |       - uses: actions/checkout@v4
23 |       - uses: actions/setup-node@v4
24 |         with:
25 |           node-version: '18'
26 |           cache-dependency-path: "kit/package-lock.json"
27 | 
28 |       - name: Set up Python
29 |         uses: actions/setup-python@v4
30 |         with:
31 |           python-version: '3.11'
32 | 
33 |       - name: Setup environment
34 |         run: |
35 |           pip install --upgrade pip
36 |           pip install git+https://github.com/huggingface/doc-builder
37 |           pip install .[quality]
38 | 
39 |       - name: Make documentation
40 |         shell: bash
41 |         run: |
42 |           doc-builder build optimum.executorch docs/source/ \
43 |             --repo_name optimum-executorch \
44 |             --build_dir executorch-doc-build/ \
45 |             --version pr_${{ env.PR_NUMBER }} \
46 |             --version_tag_suffix "" \
47 |             --html \
48 |             --clean \
49 | 
50 |       - name: Save commit_sha & pr_number
51 |         run: |
52 |           sudo chmod -R ugo+rwx executorch-doc-build
53 |           cd executorch-doc-build
54 |           sudo mv optimum.executorch optimum-executorch
55 |           echo ${{ env.COMMIT_SHA }} > ./commit_sha
56 |           echo ${{ env.PR_NUMBER }} > ./pr_number
57 | 
58 |       - uses: actions/upload-artifact@v4
59 |         with:
60 |           name: doc-build-artifact
61 |           path: executorch-doc-build/
62 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/tasks/image_classification.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from transformers import AutoModelForImageClassification
16 | 
17 | from ..integrations import VisionEncoderExportableModule
18 | from ..task_registry import register_task
19 | 
20 | 
21 | # NOTE: It’s important to map the registered task name to the pipeline name in https://github.com/huggingface/transformers/blob/main/utils/update_metadata.py.
22 | # This will streamline using inferred task names and make exporting models to Hugging Face pipelines easier.
23 | @register_task("image-classification")
24 | def load_image_classification_model(model_name_or_path: str, **kwargs) -> VisionEncoderExportableModule:
25 |     """
26 |     Loads a vision model for image classification and registers it under the task
27 |     'image-classification' using Hugging Face's `AutoModelForImageClassification`.
28 | 
29 |     Args:
30 |         model_name_or_path (str):
31 |             Model ID on huggingface.co or path on disk to the model repository to export. For example:
32 |             `model_name_or_path="google/vit-base-patch16-224"` or `mode_name_or_path="/path/to/model_folder`
33 |         **kwargs:
34 |             Additional configuration options for the model.
35 | 
36 |     Returns:
37 |         VisionEncoderExportableModule:
38 |             An instance of `VisionEncoderExportableModule` for exporting and lowering to ExecuTorch.
39 |     """
40 | 
41 |     eager_model = AutoModelForImageClassification.from_pretrained(model_name_or_path, **kwargs).to("cpu").eval()
42 |     return VisionEncoderExportableModule(eager_model)
43 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/task_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib
16 | import logging
17 | import pkgutil
18 | 
19 | 
20 | logger = logging.getLogger(__name__)
21 | 
22 | task_registry = {}
23 | 
24 | package_name = "optimum.exporters.executorch.tasks"
25 | 
26 | 
27 | def register_task(task_name):
28 |     """
29 |     Decorator to register a task under a specific name.
30 | 
31 |     Args:
32 |         task_name (`str`):
33 |             The name of the task to associate with a callable task.
34 | 
35 |     Returns:
36 |         `Callable`:
37 |             The original function wrapped as a registered task.
38 | 
39 |     Example:
40 |         ```python
41 |         @register_task("my_new_task")
42 |         def my_new_task(...):
43 |             ...
44 |         ```
45 |     """
46 | 
47 |     def decorator(func):
48 |         task_registry[task_name] = func
49 |         return func
50 | 
51 |     return decorator
52 | 
53 | 
54 | def discover_tasks():
55 |     """
56 |     Dynamically discovers and imports all task modules within the `optimum.exporters.executorch.tasks` package.
57 | 
58 |     Ensures tasks under `./tasks` directory are dynamically loaded without requiring manual imports.
59 | 
60 |     Notes:
61 |         New tasks **must** be added to the `./tasks` directory to be discovered and used by `main_export`.
62 |         Failure to do so will prevent dynamic discovery and registration. Tasks must also use the
63 |         `@register_task` decorator to be properly registered in the `task_registry`.
64 |     """
65 |     package = importlib.import_module(package_name)
66 |     package_path = package.__path__
67 | 
68 |     for _, module_name, _ in pkgutil.iter_modules(package_path):
69 |         logger.info(f"Importing {package_name}.{module_name}")
70 |         importlib.import_module(f"{package_name}.{module_name}")
71 | 


--------------------------------------------------------------------------------
/.github/workflows/build_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build documentation
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     tags:
 8 |       - 'v[0-9]+.[0-9]+.[0-9]+'
 9 |   workflow_dispatch:
10 | 
11 | jobs:
12 |   build_documentation:
13 |     runs-on: ubuntu-22.04
14 |     env:
15 |       COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
16 |       PR_NUMBER: ${{ github.event.number }}
17 |       EVENT_CONTEXT: ${{ toJSON(github.event) }}
18 |       PR_CLONE_URL: ${{ github.event.pull_request.head.repo.clone_url }}
19 | 
20 |     steps:
21 |       - uses: actions/checkout@v4
22 |       - uses: actions/setup-node@v4
23 |         with:
24 |           node-version: '18'
25 |           cache-dependency-path: "kit/package-lock.json"
26 |       
27 |       - name: Set up Python
28 |         uses: actions/setup-python@v4
29 |         with:
30 |           python-version: '3.11'
31 | 
32 |       - name: Set environment variables
33 |         run: |
34 |           cd optimum
35 |           version=`echo "$(grep '^__version__ =' executorch/version.py | cut -d '=' -f 2- | xargs)"`
36 | 
37 |           if [[ $version == *.dev0 ]]
38 |           then
39 |             echo "VERSION=main" >> $GITHUB_ENV
40 |           else
41 |             echo "VERSION=v$version" >> $GITHUB_ENV
42 |           fi
43 | 
44 |           cd ..
45 | 
46 |       - name: Setup environment
47 |         run: |
48 |           python -m pip install --upgrade pip
49 |           python -m ensurepip --upgrade
50 |           python -m pip install --upgrade setuptools
51 |           python -m pip install git+https://github.com/huggingface/doc-builder
52 |           python -m pip install .[quality]
53 | 
54 |       - name: Make documentation
55 |         shell: bash
56 |         run: |
57 |           doc-builder build optimum.executorch docs/source/ \
58 |             --repo_name optimum-executorch \
59 |             --build_dir executorch-doc-build/ \
60 |             --version ${{ env.VERSION }} \
61 |             --version_tag_suffix "" \
62 |             --html \
63 |             --clean \
64 | 
65 |       - name: Push documentation
66 |         run: |
67 |           sudo chmod -R ugo+rwx executorch-doc-build
68 |           cd executorch-doc-build
69 |           sudo mv optimum.executorch optimum-executorch
70 |           doc-builder push optimum-executorch --doc_build_repo_id "hf-doc-build/doc-build" --token "${{ secrets.HF_DOC_BUILD_PUSH }}" --commit_msg "Updated with commit $COMMIT_SHA See: https://github.com/huggingface/optimum-executorch/commit/$COMMIT_SHA" --n_retries 5
71 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipe_registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import importlib
16 | import logging
17 | import pkgutil
18 | 
19 | 
20 | logger = logging.getLogger(__name__)
21 | 
22 | recipe_registry = {}
23 | 
24 | package_name = "optimum.exporters.executorch.recipes"
25 | 
26 | 
27 | def register_recipe(recipe_name):
28 |     """
29 |     Decorator to register a recipe for exporting and lowering an ExecuTorch model under a specific name.
30 | 
31 |     Args:
32 |         recipe_name (`str`):
33 |             The name of the recipe to associate with a callable recipe.
34 | 
35 |     Returns:
36 |         `Callable`:
37 |             The original function wrapped as a registered recipe.
38 | 
39 |     Example:
40 |         ```python
41 |         @register_recipe("my_new_recipe")
42 |         def my_new_recipe(...):
43 |             ...
44 |         ```
45 |     """
46 | 
47 |     def decorator(func):
48 |         recipe_registry[recipe_name] = func
49 |         return func
50 | 
51 |     return decorator
52 | 
53 | 
54 | def discover_recipes():
55 |     """
56 |     Dynamically discovers and imports all recipe modules within the `optimum.exporters.executorch.recipes` package.
57 | 
58 |     Ensures recipes under `./recipes` directory are dynamically loaded without requiring manual imports.
59 | 
60 |     Notes:
61 |         New recipes **must** be added to the `./recipes` directory to be discovered and used by `main_export`.
62 |         Failure to do so will prevent dynamic discovery and registration. Recipes must also use the
63 |         `@register_recipe` decorator to be properly registered in the `recipe_registry`.
64 |     """
65 |     package = importlib.import_module(package_name)
66 |     package_path = package.__path__
67 | 
68 |     for _, module_name, _ in pkgutil.iter_modules(package_path):
69 |         logger.info(f"Importing {package_name}.{module_name}")
70 |         importlib.import_module(f"{package_name}.{module_name}")
71 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/tasks/masked_lm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from transformers import AutoModelForMaskedLM
16 | 
17 | from ..integrations import MaskedLMExportableModule
18 | from ..quantization import quantize_model_
19 | from ..task_registry import register_task
20 | 
21 | 
22 | # NOTE: It’s important to map the registered task name to the pipeline name in https://github.com/huggingface/transformers/blob/main/utils/update_metadata.py.
23 | # This will streamline using inferred task names and make exporting models to Hugging Face pipelines easier.
24 | @register_task("fill-mask")
25 | def load_masked_lm_model(model_name_or_path: str, **kwargs) -> MaskedLMExportableModule:
26 |     """
27 |     Loads a seq2seq language model for conditional text generation and registers it under the task
28 |     'fill-mask' using Hugging Face's `AutoModelForMaskedLM`.
29 | 
30 |     Args:
31 |         model_name_or_path (str):
32 |             Model ID on huggingface.co or path on disk to the model repository to export. For example:
33 |             `model_name_or_path="google-bert/bert-base-uncased"` or `mode_name_or_path="/path/to/model_folder`
34 |         **kwargs:
35 |             Additional configuration options for the model.
36 | 
37 |     Returns:
38 |         MaskedLMExportableModule:
39 |             An instance of `MaskedLMExportableModule` for exporting and lowering to ExecuTorch.
40 |     """
41 | 
42 |     eager_model = AutoModelForMaskedLM.from_pretrained(model_name_or_path).to("cpu").eval()
43 | 
44 |     qlinear_config = kwargs.get("qlinear", None)
45 |     qlinear_packing_format = kwargs.get("qlinear_packing_format", None)
46 |     qembedding_config = kwargs.get("qembedding", None)
47 |     quantize_model_(
48 |         eager_model,
49 |         qlinear_config=qlinear_config,
50 |         qlinear_packing_format=qlinear_packing_format,
51 |         qembedding_config=qembedding_config,
52 |     )
53 | 
54 |     return MaskedLMExportableModule(eager_model)
55 | 


--------------------------------------------------------------------------------
/install_dev.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import subprocess
 3 | import sys
 4 | 
 5 | 
 6 | def install_torch_nightly_deps():
 7 |     """Install torch related dependencies from pinned nightly"""
 8 |     EXECUTORCH_NIGHTLY_VERSION = "dev20251104"
 9 |     TORCHAO_NIGHTLY_VERSION = "dev20251104"
10 |     # Torch nightly is aligned with pinned nightly in https://github.com/pytorch/executorch/blob/main/torch_pin.py#L2
11 |     TORCH_NIGHTLY_VERSION = "dev20251104"
12 |     subprocess.check_call(
13 |         [
14 |             sys.executable,
15 |             "-m",
16 |             "pip",
17 |             "install",
18 |             "--no-cache-dir",  # Prevent cached CUDA packages
19 |             f"executorch==1.1.0.{EXECUTORCH_NIGHTLY_VERSION}",
20 |             f"torch==2.10.0.{TORCH_NIGHTLY_VERSION}",
21 |             f"torchvision==0.25.0.{TORCH_NIGHTLY_VERSION}",
22 |             f"torchaudio==2.10.0.{TORCH_NIGHTLY_VERSION}",
23 |             f"torchao==0.15.0.{TORCHAO_NIGHTLY_VERSION}",
24 |             "--extra-index-url",
25 |             "https://download.pytorch.org/whl/nightly/cpu",
26 |         ]
27 |     )
28 | 
29 | 
30 | def install_dep_from_source():
31 |     """Install deps from source at pinned commits"""
32 |     subprocess.check_call(
33 |         [
34 |             sys.executable,
35 |             "-m",
36 |             "pip",
37 |             "install",
38 |             "git+https://github.com/huggingface/transformers@bdc85cb85c8772d37aa29ce447860b44d7fad6ef#egg=transformers",  # v5.0.0rc0
39 |         ]
40 |     )
41 |     subprocess.check_call(
42 |         [
43 |             sys.executable,
44 |             "-m",
45 |             "pip",
46 |             "install",
47 |             "git+https://github.com/pytorch-labs/tokenizers@3aada3fe28c945d14d5ec62254eb56ccdf10eb11#egg=pytorch-tokenizers",
48 |         ]
49 |     )
50 | 
51 | 
52 | def main():
53 |     """Install optimum-executorch in dev mode with nightly dependencies"""
54 |     parser = argparse.ArgumentParser()
55 |     parser.add_argument(
56 |         "--skip_override_torch",
57 |         action="store_true",
58 |         help="Skip installation of nightly executorch and torch dependencies",
59 |     )
60 |     args = parser.parse_args()
61 | 
62 |     # Install nightly torch dependencies FIRST to avoid pulling CUDA versions
63 |     if not args.skip_override_torch:
64 |         install_torch_nightly_deps()
65 | 
66 |     # Install package with dev extras
67 |     subprocess.check_call([sys.executable, "-m", "pip", "install", ".[dev]"])
68 | 
69 |     # Install source dependencies
70 |     install_dep_from_source()
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipes/cuda-windows.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Union
16 | 
17 | from ..integrations import (
18 |     CausalLMExportableModule,
19 |     MaskedLMExportableModule,
20 |     MultiModalTextToTextExportableModule,
21 |     Seq2SeqLMExportableModule,
22 | )
23 | from ..recipe_registry import register_recipe
24 | from .cuda import lower_to_executorch
25 | 
26 | 
27 | @register_recipe("cuda-windows")
28 | def export_to_executorch_with_cuda_windows(
29 |     model: Union[
30 |         CausalLMExportableModule,
31 |         MaskedLMExportableModule,
32 |         Seq2SeqLMExportableModule,
33 |         MultiModalTextToTextExportableModule,
34 |     ],
35 |     **kwargs,
36 | ):
37 |     """
38 |     Export a PyTorch model to ExecuTorch w/ delegation to CUDA backend.
39 |     This function also write metadata required by the ExecuTorch runtime to the .pte file.
40 |     Args:
41 |         model (Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule, MultiModalTextToTextExportableModule]):
42 |             The PyTorch model to be exported to ExecuTorch.
43 |         **kwargs:
44 |             Additional keyword arguments for recipe-specific configurations, e.g. export using different example inputs, or different compile/bechend configs.
45 |     Returns:
46 |         Dict[str, ExecutorchProgram]:
47 |             A map of exported and optimized program for ExecuTorch.
48 |             For encoder-decoder models or multimodal models, it may generate multiple programs.
49 |     """
50 |     if (
51 |         model.config._attn_implementation == "custom_sdpa"
52 |         or model.config._attn_implementation == "custom_sdpa_ring_kv_cache"
53 |     ):
54 |         raise NotImplementedError(
55 |             "Custom SDPA implementation is not supported for CUDA yet. Please use 'flash_attention' instead."
56 |         )
57 | 
58 |     exported_progs = model.export()
59 | 
60 |     return lower_to_executorch(
61 |         exported_progs, model.metadata, is_windows=True, model_config=getattr(model, "config", None)
62 |     )
63 | 


--------------------------------------------------------------------------------
/docs/source/guides/contribute.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Adding support for an unsupported architecture
14 | 
15 | We welcome contributions to extend the functionality of ExecuTorch export. This guide provides high-level instructions for contributors who want to:
16 | 
17 | 1. Export a new model that is not currently supported.
18 | 2. Add new recipes or support a new task for export.
19 | 
20 | ---
21 | 
22 | ## Exporting a New Model
23 | 
24 | If you want to export a model that is not already supported by the library, follow these steps:
25 | 
26 | ### Step 1: Export and Test the Model
27 | 1. Attempt to export and lower the model using an existing task and recipe. On success, it will store the exported model in a `.pte` file.
28 | 2. Add a test case for the model in the appropriate test suite.
29 |    - For example, you can make sure tests pass for the new `my_new_model` by running:
30 |      ```bash
31 |      pytest tests/executorch/export/test_*.py -k "test_my_new_model"  # doctest: +SKIP
32 |      pytest tests/executorch/runtime/test_*.py -k "test_my_new_model"  # doctest: +SKIP
33 |      ```
34 | 
35 | ### Step 2: Handle Export Failures
36 | 1. If the export fails in Step 1, report the issue by opening a GitHub issue.
37 | 2. If the issue requires changes to the model’s architecture or its Hugging Face implementation, these modifications may be made upstream in the Hugging Face Transformers library.
38 | 
39 | ---
40 | 
41 | ## Adding New Recipes or Tasks
42 | 
43 | To extend ExecuTorch with new recipes or tasks, follow these guidelines:
44 | 
45 | ### Registering a New Recipe
46 | You can add a custom recipe to define specific optimizations or configurations for exporting models. Below is an example:
47 | 
48 | ```python
49 | from exporters.executorch import register_recipe
50 | 
51 | @register_recipe("my_custom_recipe")
52 | def export_with_custom_recipe(model, config, *args, **kwargs):
53 |     # Example: Apply a custom quantization
54 | ```
55 | 
56 | ### Registering a Task
57 | The task registration process is same as adding a recipe. Besides that you may need to implement a new `ExecuTorchModelForXXX` class.
58 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_gptneox.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_gpt2neox_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
47 |         model_id = "EleutherAI/pythia-14m"
48 |         prompt = "My favorite food is"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         model = ExecuTorchModelForCausalLM.from_pretrained(
51 |             model_id,
52 |             recipe="xnnpack",
53 |             attn_implementation="custom_sdpa",
54 |             use_custom_kv_cache=True,
55 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
56 |         )
57 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
58 |         self.assertIsInstance(model.model, ExecuTorchModule)
59 |         generated_text = model.text_generation(
60 |             tokenizer=tokenizer,
61 |             prompt=prompt,
62 |             max_seq_len=64,
63 |         )
64 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
65 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
66 | 
67 |         # Free memory before loading eager for quality check
68 |         del model
69 |         del tokenizer
70 |         gc.collect()
71 | 
72 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
73 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_gpt2.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_gpt2sw3_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
47 |         model_id = "AI-Sweden-Models/gpt-sw3-126m"
48 |         prompt = "Träd är fina för att"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         model = ExecuTorchModelForCausalLM.from_pretrained(
51 |             model_id,
52 |             recipe="xnnpack",
53 |             attn_implementation="custom_sdpa",
54 |             use_custom_kv_cache=True,
55 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
56 |         )
57 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
58 |         self.assertIsInstance(model.model, ExecuTorchModule)
59 |         generated_text = model.text_generation(
60 |             tokenizer=tokenizer,
61 |             prompt=prompt,
62 |             max_seq_len=64,
63 |         )
64 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
65 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
66 | 
67 |         # Free memory before loading eager for quality check
68 |         del model
69 |         del tokenizer
70 |         gc.collect()
71 | 
72 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
73 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_starcoder2.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_starcoder2_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
47 |         model_id = "bigcode/starcoder2-3b"
48 |         prompt = "def hello_world():"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         model = ExecuTorchModelForCausalLM.from_pretrained(
51 |             model_id,
52 |             recipe="xnnpack",
53 |             attn_implementation="custom_sdpa",
54 |             use_custom_kv_cache=True,
55 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
56 |         )
57 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
58 |         self.assertIsInstance(model.model, ExecuTorchModule)
59 |         generated_text = model.text_generation(
60 |             tokenizer=tokenizer,
61 |             prompt=prompt,
62 |             max_seq_len=64,
63 |         )
64 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
65 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
66 | 
67 |         # Free memory before loading eager for quality check
68 |         del model
69 |         del tokenizer
70 |         gc.collect()
71 | 
72 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
73 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_phi.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_phi_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
47 |         model_id = "johnsnowlabs/JSL-MedPhi2-2.7B"
48 |         prompt = "What is a large language model?"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         model = ExecuTorchModelForCausalLM.from_pretrained(
51 |             model_id,
52 |             recipe="xnnpack",
53 |             attn_implementation="custom_sdpa",
54 |             use_custom_kv_cache=True,
55 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
56 |         )
57 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
58 |         self.assertIsInstance(model.model, ExecuTorchModule)
59 |         generated_text = model.text_generation(
60 |             tokenizer=tokenizer,
61 |             prompt=prompt,
62 |             max_seq_len=64,
63 |         )
64 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
65 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
66 | 
67 |         # Free memory before loading eager for quality check
68 |         del model
69 |         del tokenizer
70 |         gc.collect()
71 | 
72 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
73 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_mistral.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_mistral_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
47 |         model_id = "ministral/Ministral-3b-instruct"
48 |         prompt = "My favourite condiment is "
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         model = ExecuTorchModelForCausalLM.from_pretrained(
51 |             model_id,
52 |             recipe="xnnpack",
53 |             attn_implementation="custom_sdpa",
54 |             use_custom_kv_cache=True,
55 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
56 |         )
57 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
58 |         self.assertIsInstance(model.model, ExecuTorchModule)
59 |         generated_text = model.text_generation(
60 |             tokenizer=tokenizer,
61 |             prompt=prompt,
62 |             max_seq_len=64,
63 |         )
64 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
65 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
66 | 
67 |         # Free memory before loading eager for quality check
68 |         del model
69 |         del tokenizer
70 |         gc.collect()
71 | 
72 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
73 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_glm.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_glm_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
47 |         model_id = "THUDM/glm-edge-1.5b-chat"
48 |         prompt = "hello!"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         model = ExecuTorchModelForCausalLM.from_pretrained(
51 |             model_id,
52 |             task="text-generation",
53 |             recipe="xnnpack",
54 |             attn_implementation="custom_sdpa",
55 |             use_custom_kv_cache=True,
56 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
57 |         )
58 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
59 |         self.assertIsInstance(model.model, ExecuTorchModule)
60 |         generated_text = model.text_generation(
61 |             tokenizer=tokenizer,
62 |             prompt=prompt,
63 |             max_seq_len=64,
64 |         )
65 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
66 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
67 | 
68 |         # Free memory before loading eager for quality check
69 |         del model
70 |         del tokenizer
71 |         gc.collect()
72 | 
73 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
74 | 


--------------------------------------------------------------------------------
/.github/workflows/test_models.yml:
--------------------------------------------------------------------------------
 1 | name: ExecuTorch E2E / Python - Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | concurrency:
10 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
11 |   cancel-in-progress: true
12 | 
13 | jobs:
14 |   discover-tests:
15 |     runs-on: ubuntu-22.04
16 |     outputs:
17 |       model_names: ${{ steps.set-matrix.outputs.model_names }}
18 |     steps:
19 |       - uses: actions/checkout@v3
20 |       - name: Find model tests
21 |         id: set-matrix
22 |         run: |
23 |           # Find all test files and extract model names correctly
24 |           MODEL_NAMES=$(find tests/models -name "test_modeling_*.py" -type f | sed 's|tests/models/test_modeling_||' | sed 's|\.py$||' | paste -sd "," -)
25 |           echo "model_names=[\"${MODEL_NAMES//,/\",\"}\"]" >> $GITHUB_OUTPUT
26 | 
27 |           # Display all discovered models
28 |           echo "Discovered models:"
29 |           echo "$MODEL_NAMES" | tr ',' '\n' | sort | awk '{print "- " $0}'
30 | 
31 |   run-tests:
32 |     needs: discover-tests
33 |     strategy:
34 |       fail-fast: false
35 |       matrix:
36 |         test-modeling: ${{ fromJson(needs.discover-tests.outputs.model_names) }}
37 |         executorch-version: ['1.0.0', 'nightly']
38 |         python-version: ['3.11']
39 |         # os: [macos-15, ubuntu-22.04]  # TODO(#122): Re-enable the mac tests after fixing seg fault.
40 |         os: [ubuntu-22.04]
41 | 
42 |     # Custom job name, now shortened and cleaner
43 |     name: ${{ matrix.test-modeling }} (et=${{ matrix.executorch-version }}, py=${{ matrix.python-version }}, ${{ matrix.os }})
44 |     runs-on: ${{ matrix.os }}
45 |     env:
46 |       MODEL_NAME: ${{ matrix.test-modeling }}
47 |     steps:
48 |       - uses: actions/checkout@v2
49 |       - name: Setup Python ${{ matrix.python-version }}
50 |         uses: actions/setup-python@v2
51 |         with:
52 |           python-version: ${{ matrix.python-version }}
53 |       - name: Install dependencies for ExecuTorch
54 |         run: |
55 |           # Clean up cache to save space
56 |           pip cache purge || true
57 |           rm -rf ~/.cache/huggingface/hub/* || true
58 | 
59 |           if [ "${{ matrix.executorch-version }}" == "nightly" ]; then
60 |             python install_dev.py
61 |           else
62 |             # Use CPU-only torch to avoid CUDA dependencies (saves ~5GB)
63 |             pip install --no-cache-dir '.[dev]' \
64 |               --extra-index-url https://download.pytorch.org/whl/cpu
65 |             pip install --no-cache-dir executorch==${{ matrix.executorch-version }}
66 |           fi
67 |           pip list
68 |       - name: Run tests
69 |         run: |
70 |           RUN_SLOW=1 pytest tests/models/test_modeling_${{ matrix.test-modeling }}.py -s -vvvv --durations=0 --log-cli-level=INFO
71 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "optimum-executorch"
 3 | dynamic = ["version"]
 4 | description = "Optimum Executorch is an interface between the Hugging Face libraries and ExecuTorch"
 5 | readme = { file = "README.md", content-type = "text/markdown" }
 6 | license = { text = "Apache" }
 7 | authors = [
 8 |   { name = "HuggingFace Inc. Special Ops Team", email = "hardware@huggingface.co" },
 9 | ]
10 | requires-python = ">=3.10.0"
11 | keywords = ["transformers", "quantization", "inference", "executorch"]
12 | classifiers = [
13 |   "Development Status :: 2 - Pre-Alpha",
14 |   "License :: OSI Approved :: Apache Software License",
15 |   "Intended Audience :: Developers",
16 |   "Intended Audience :: Education",
17 |   "Intended Audience :: Science/Research",
18 |   "Operating System :: OS Independent",
19 |   "Programming Language :: Python :: 3",
20 |   "Programming Language :: Python :: 3.10",
21 |   "Programming Language :: Python :: 3.11",
22 |   "Programming Language :: Python :: 3.12",
23 |   "Topic :: Scientific/Engineering :: Artificial Intelligence",
24 | ]
25 | 
26 | dependencies = [
27 |   "optimum~=2.0.0",
28 |   "executorch>=1.0.0",
29 |   "transformers==5.0.0rc1",
30 |   "pytorch-tokenizers>=1.0.1",
31 |   "accelerate>=0.26.0",
32 | ]
33 | 
34 | [project.optional-dependencies]
35 | dev = [
36 |   "accelerate>=0.26.0",
37 |   "coremltools>=8.2.0",
38 |   "datasets==3.6.0",
39 |   "parameterized",
40 |   "pytest",
41 |   "safetensors",
42 |   "sentencepiece",
43 |   "numba!=0.58.0",
44 |   "librosa",
45 |   "soundfile",
46 |   "tiktoken",
47 |   "black~=23.1",
48 |   "ruff==0.4.4",
49 | ]
50 | 
51 | [project.urls]
52 | Homepage = "https://github.com/huggingface/optimum-executorch"
53 | 
54 | # ---- setuptools config ----
55 | 
56 | [tool.setuptools]
57 | # Equivalent of include_package_data=True
58 | include-package-data = true
59 | 
60 | [tool.setuptools.packages.find]
61 | # Mirrors find_namespace_packages(include=["optimum*"])
62 | include = ["optimum*"]
63 | namespaces = true
64 | 
65 | [tool.setuptools.dynamic]
66 | # Pull version from the Python attribute
67 | version = { attr = "optimum.executorch.version.__version__" }
68 | 
69 | # ---- your existing tool configs (kept, but one tweak suggested) ----
70 | 
71 | [tool.black]
72 | line-length = 119
73 | # Recommended to match your supported interpreters:
74 | target-version = ["py310", "py311", "py312"]
75 | 
76 | [tool.ruff]
77 | ignore = ["C901", "E501", "E741", "W605"]
78 | select = ["C", "E", "F", "I", "W"]
79 | line-length = 119
80 | 
81 | [tool.ruff.per-file-ignores]
82 | "__init__.py" = ["E402", "F401", "F403", "F811"]
83 | 
84 | [tool.ruff.isort]
85 | lines-after-imports = 2
86 | known-first-party = ["optimum"]
87 | 
88 | [tool.pytest.ini_options]
89 | markers = [
90 |   "run_slow",
91 |   "portable",
92 | ]
93 | 
94 | [build-system]
95 | requires = ["setuptools >= 77.0.3", "wheel"]
96 | build-backend = "setuptools.build_meta"
97 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_granite.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_granite_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
47 |         model_id = "ibm-granite/granite-3.3-2b-instruct"
48 |         prompt = "Take a current environmental issue and work backward to devise an innovative prevention strategy"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         model = ExecuTorchModelForCausalLM.from_pretrained(
51 |             model_id,
52 |             recipe="xnnpack",
53 |             attn_implementation="custom_sdpa",
54 |             use_custom_kv_cache=True,
55 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
56 |         )
57 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
58 |         self.assertIsInstance(model.model, ExecuTorchModule)
59 |         generated_text = model.text_generation(
60 |             tokenizer=tokenizer,
61 |             prompt=prompt,
62 |             max_seq_len=64,
63 |         )
64 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
65 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
66 | 
67 |         # Free memory before loading eager for quality check
68 |         del model
69 |         del tokenizer
70 |         gc.collect()
71 | 
72 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
73 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_gptj.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoConfig, AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_gptj_text_generation_with_8da4w_8we(self):
47 |         model_id = "Milos/slovak-gpt-j-405M"
48 |         prompt = "Tradičné jedlo na Orave sú"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         config = AutoConfig.from_pretrained(model_id)
51 |         config.bos_token_id = tokenizer.bos_token_id
52 |         config.eos_token_id = tokenizer.eos_token_id
53 |         model = ExecuTorchModelForCausalLM.from_pretrained(
54 |             model_id,
55 |             config=config,
56 |             recipe="xnnpack",
57 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
58 |         )
59 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
60 |         self.assertIsInstance(model.model, ExecuTorchModule)
61 |         generated_text = model.text_generation(
62 |             tokenizer=tokenizer,
63 |             prompt=prompt,
64 |             max_seq_len=64,
65 |         )
66 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
67 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
68 | 
69 |         # Free memory before loading eager for quality check
70 |         del model
71 |         del tokenizer
72 |         gc.collect()
73 | 
74 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
75 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_codegen.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | import torchao
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoConfig, AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     @pytest.mark.skipif(
43 |         parse(torchao.__version__) < parse("0.11.0"),
44 |         reason="Quantization is only available on torchao >= 0.11.0.",
45 |     )
46 |     def test_codegen_text_generation_with_8da4w_8we(self):
47 |         model_id = "Salesforce/codegen-350M-mono"
48 |         prompt = "def hello_world():"
49 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
50 |         config = AutoConfig.from_pretrained(model_id)
51 |         config.bos_token_id = tokenizer.bos_token_id
52 |         config.eos_token_id = tokenizer.eos_token_id
53 |         model = ExecuTorchModelForCausalLM.from_pretrained(
54 |             model_id,
55 |             config=config,
56 |             recipe="xnnpack",
57 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
58 |         )
59 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
60 |         self.assertIsInstance(model.model, ExecuTorchModule)
61 |         generated_text = model.text_generation(
62 |             tokenizer=tokenizer,
63 |             prompt=prompt,
64 |             max_seq_len=64,
65 |         )
66 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
67 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
68 | 
69 |         # Free memory before loading eager for quality check
70 |         del model
71 |         del tokenizer
72 |         gc.collect()
73 | 
74 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
75 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/tasks/seq2seq_lm.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from transformers import AutoModelForSeq2SeqLM
16 | 
17 | from ..integrations import Seq2SeqLMExportableModule
18 | from ..task_registry import register_task
19 | 
20 | 
21 | # NOTE: It’s important to map the registered task name to the pipeline name in https://github.com/huggingface/transformers/blob/main/utils/update_metadata.py.
22 | # This will streamline using inferred task names and make exporting models to Hugging Face pipelines easier.
23 | @register_task("text2text-generation")
24 | def load_seq2seq_lm_model(model_name_or_path: str, **kwargs) -> Seq2SeqLMExportableModule:
25 |     """
26 |         Loads a seq2seq language model for conditional text generation and registers it under the task
27 |         'text2text-generation' using Hugging Face's `AutoModelForSeq2SeqLM`.
28 | 
29 |         Args:
30 |             model_name_or_path (str):
31 |                 Model ID on huggingface.co or path on disk to the model repository to export. For example:
32 |                 `model_name_or_path="google-t5/t5-small"` or `mode_name_or_path="/path/to/model_folder`
33 |             **kwargs:
34 |                 Additional configuration options for the model:
35 |                     - dtype (str, optional):
36 |                         Data type for model weights (default: "float32").
37 |                         Options include "float16" and "bfloat16".
38 |                     - max_hidden_seq_length (int, optional):
39 |                         Maximum hidden sequence length (default: 4096).
40 |                     - max_cache_length (int, optional):
41 |                         Maximum sequence length for generation (default: 1024).
42 | 
43 |         Returns:
44 |             Seq2SeqLMExportableModule:
45 |                 An instance of `Seq2SeqLMExportableModule` for exporting and lowering to ExecuTorch.
46 |     n"""
47 |     device = "cpu"
48 |     batch_size = 1
49 |     max_hidden_seq_len = kwargs.get("max_hidden_seq_len", 4096)
50 |     max_seq_len = kwargs.get("max_seq_len", 1024)
51 | 
52 |     full_model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path).to(device).eval()
53 |     return Seq2SeqLMExportableModule(
54 |         full_model,
55 |         batch_size=batch_size,
56 |         max_seq_len=max_seq_len,
57 |         max_hidden_seq_len=max_hidden_seq_len,
58 |     )
59 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_gptneoxjapanese.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import sys
20 | import unittest
21 | 
22 | import pytest
23 | import torchao
24 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
25 | from packaging.version import parse
26 | from transformers import AutoConfig, AutoTokenizer
27 | from transformers.testing_utils import slow
28 | 
29 | from optimum.executorch import ExecuTorchModelForCausalLM
30 | 
31 | from ..utils import check_causal_lm_output_quality
32 | 
33 | 
34 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
35 | is_ci = os.environ.get("GITHUB_ACTIONS") == "true"
36 | is_linux_ci = sys.platform.startswith("linux") and is_ci
37 | 
38 | 
39 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
40 |     def __init__(self, *args, **kwargs):
41 |         super().__init__(*args, **kwargs)
42 | 
43 |     @slow
44 |     @pytest.mark.run_slow
45 |     @pytest.mark.skipif(
46 |         is_linux_ci or parse(torchao.__version__) < parse("0.11.0"),
47 |         reason="Quantization is only available on torchao >= 0.11.0.",
48 |     )
49 |     def test_gptneoxjapanese_text_generation_with_8da4w_8we(self):
50 |         model_id = "abeja/gpt-neox-japanese-2.7b"
51 |         prompt = "人とAIが協調するためには、"
52 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         config.bos_token_id = tokenizer.bos_token_id
55 |         config.eos_token_id = tokenizer.eos_token_id
56 |         model = ExecuTorchModelForCausalLM.from_pretrained(
57 |             model_id,
58 |             config=config,
59 |             recipe="xnnpack",
60 |             **{"qlinear": "8da4w", "qembeeding": "8w"},
61 |         )
62 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
63 |         self.assertIsInstance(model.model, ExecuTorchModule)
64 |         generated_text = model.text_generation(
65 |             tokenizer=tokenizer,
66 |             prompt=prompt,
67 |             max_seq_len=64,
68 |         )
69 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
70 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
71 |         # Free memory before loading eager for quality check
72 |         del model
73 |         del tokenizer
74 |         gc.collect()
75 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
76 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/README.md:
--------------------------------------------------------------------------------
 1 | # Exporting Transformers Models to ExecuTorch
 2 | 
 3 | Optimum ExecuTorch enables exporting models from Transformers to ExecuTorch.
 4 | The models supported by Optimum ExecuTorch are listed [here](../../../README.md#-supported-models).
 5 | 
 6 | ### LLMs (Large Language Models)
 7 | LLMs can be exported using the `text-generation` task like so:
 8 | ```
 9 | optimum-cli export executorch \
10 |   --model <model-id> \
11 |   --task text-generation \
12 |   --recipe xnnpack \
13 |   --use_custom_sdpa \
14 |   --use_custom_kv_cache \
15 |   --qlinear 8da4w \
16 |   --qembedding 8w
17 |   ...etc...
18 | ```
19 | 
20 | The export will produce a `.pte` with a single forward method for the decoder: `model`.
21 | 
22 | Note that most of the arguments here are only applicable to LLMs (multimodal included):
23 | ```
24 | --use_custom_sdpa \
25 | --use_custom_kv_cache \
26 | --qlinear 8da4w \
27 | --qembedding 8w
28 | ```
29 | 
30 | ### Multimodal LLMs
31 | Multimodal LLMs can be exported using the `multimodal-text-to-text` task like so:
32 | ```
33 | optimum-cli export executorch \
34 |   --model mistralai/Voxtral-Mini-3B-2507 \
35 |   --task multimodal-text-to-text \
36 |   --recipe xnnpack \
37 |   --use_custom_sdpa \
38 |   --use_custom_kv_cache \
39 |   --qlinear 8da4w \
40 |   --qembedding 8w
41 |   ...etc...
42 | ```
43 | 
44 | The export will produce a `.pte` with the following methods:
45 | - `text_decoder`: the text decoder or language model backbone
46 | - `audio_encoder` or `vision_encoder`: the encoder which feeds into the decoder
47 | - `token_embedding`: the embedding layer of the language model backbone
48 |   -  This is needed in order to cleanly separate the entire multimodal model into subgraphs. The text decoder subgraph will take in token embeddings, so multimodal input will be processed into embeddings by the encoder while text input will be processed into embeddings by this method.
49 | 
50 | ### Seq2Seq
51 | Seq2Seq models can be exported using the `text2text-generation` task like so:
52 | ```
53 | optimum-cli export executorch \
54 |   --model google-t5/t5-small \
55 |   --task text2text-generation \
56 |   --recipe xnnpack
57 | ```
58 | 
59 | The export will produce a `.pte` with the following methods:
60 | - `text_decoder`: the decoder half of the Seq2Seq model
61 | - `encoder`: the encoder half of the Seq2Seq model. This encoder can support a variety of modalities, such as text for T5 and audio for Whisper.
62 | 
63 | ### Image classification
64 | Image classification models can be exported using the `image-classification` task like so:
65 | ```
66 | optimum-cli export executorch \
67 |   --model google/vit-base-patch16-224 \
68 |   --task image-classification \
69 |   --recipe xnnpack
70 | ```
71 | 
72 | The export will produce a `.pte` with a single forward method for the decoder: `model`.
73 | 
74 | ### ASR (Automatic speech recognition)
75 | ASR is a special case of Seq2Seq that uses the base Seq2Seq exportable modules. It can be exported using the `automatic-speech-recognition` task like so:
76 | ```
77 | optimum-cli export executorch \
78 |   --model openai/whisper-tiny \
79 |   --task automatic-speech-recognition \
80 |   --recipe xnnpack
81 | ```
82 | 
83 | The export will produce a `.pte` with the following methods:
84 | - `text_decoder`: the decoder half of the Seq2Seq model
85 | - `encoder`: the encoder half of the Seq2Seq model.
86 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_cvt.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_cvt_export_to_executorch(self):
39 |         model_id = "microsoft/cvt-13"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_cvt_image_classification(self, recipe: str):
51 |         model_id = "microsoft/cvt-13"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_cvt_image_classification(self):
76 |         self._helper_cvt_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_cvt_image_classification_portable(self):
82 |         self._helper_cvt_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_pvt.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_pvt_export_to_executorch(self):
39 |         model_id = "Zetatech/pvt-tiny-224"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_pvt_image_classification(self, recipe: str):
51 |         model_id = "Zetatech/pvt-tiny-224"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_pvt_image_classification(self):
76 |         self._helper_pvt_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_pvt_image_classification_portable(self):
82 |         self._helper_pvt_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_dit.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_dit_export_to_executorch(self):
39 |         model_id = "microsoft/dit-base-finetuned-rvlcdip"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_dit_image_classification(self, recipe: str):
51 |         model_id = "microsoft/dit-base-finetuned-rvlcdip"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_dit_image_classification(self):
76 |         self._helper_dit_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_dit_image_classification_portable(self):
82 |         self._helper_dit_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_focalnet.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_focalnet_export_to_executorch(self):
39 |         model_id = "microsoft/focalnet-tiny"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_focalnet_image_classification(self, recipe: str):
51 |         model_id = "microsoft/focalnet-tiny"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_focalnet_image_classification(self):
76 |         self._helper_focalnet_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_focalnet_image_classification_portable(self):
82 |         self._helper_focalnet_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_swin.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_swin_export_to_executorch(self):
39 |         model_id = "microsoft/swin-tiny-patch4-window7-224"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_swin_image_classification(self, recipe: str):
51 |         model_id = "microsoft/swin-tiny-patch4-window7-224"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_swin_image_classification(self):
76 |         self._helper_swin_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_swin_image_classification_portable(self):
82 |         self._helper_swin_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_deit.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_deit_export_to_executorch(self):
39 |         model_id = "facebook/deit-base-distilled-patch16-224"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_deit_image_classification(self, recipe: str):
51 |         model_id = "facebook/deit-base-distilled-patch16-224"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_deit_image_classification(self):
76 |         self._helper_deit_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_deit_image_classification_portable(self):
82 |         self._helper_deit_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_mobilevit.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_mobilevit_export_to_executorch(self):
39 |         model_id = "apple/mobilevit-xx-small"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_mobilevit_image_classification(self, recipe: str):
51 |         model_id = "apple/mobilevit-xx-small"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_mobilevit_image_classification(self):
76 |         self._helper_mobilevit_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_mobilevit_image_classification_portable(self):
82 |         self._helper_mobilevit_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_albert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import logging
17 | import os
18 | import subprocess
19 | import tempfile
20 | import unittest
21 | 
22 | import pytest
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoTokenizer
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForMaskedLM
28 | 
29 | 
30 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
31 |     def __init__(self, *args, **kwargs):
32 |         super().__init__(*args, **kwargs)
33 | 
34 |     @slow
35 |     @pytest.mark.run_slow
36 |     def test_albert_export_to_executorch(self):
37 |         model_id = "albert/albert-base-v2"
38 |         task = "fill-mask"
39 |         recipe = "xnnpack"
40 |         with tempfile.TemporaryDirectory() as tempdir:
41 |             subprocess.run(
42 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
43 |                 shell=True,
44 |                 check=True,
45 |             )
46 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
47 | 
48 |     def _helper_albert_fill_mask(self, recipe: str):
49 |         model_id = "albert/albert-base-v2"
50 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
51 | 
52 |         # Test fetching and lowering the model to ExecuTorch
53 |         model = ExecuTorchModelForMaskedLM.from_pretrained(model_id=model_id, recipe=recipe)
54 |         self.assertIsInstance(model, ExecuTorchModelForMaskedLM)
55 |         self.assertIsInstance(model.model, ExecuTorchModule)
56 | 
57 |         input_text = f"Paris is the {tokenizer.mask_token} of France."
58 |         inputs = tokenizer(
59 |             input_text,
60 |             return_tensors="pt",
61 |             padding="max_length",
62 |             max_length=10,
63 |         )
64 | 
65 |         # Test inference using ExecuTorch model
66 |         exported_outputs = model.forward(inputs["input_ids"], inputs["attention_mask"])
67 |         predicted_masks = tokenizer.decode(exported_outputs[0, 4].topk(5).indices)
68 |         logging.info(f"\nInput text:\n\t{input_text}\nPredicted masks:\n\t{predicted_masks}")
69 |         self.assertTrue(
70 |             any(word in predicted_masks for word in ["capital", "center", "heart", "birthplace"]),
71 |             f"Exported model predictions {predicted_masks} don't contain any of the most common expected words",
72 |         )
73 | 
74 |     @slow
75 |     @pytest.mark.run_slow
76 |     def test_albert_fill_mask(self):
77 |         self._helper_albert_fill_mask("xnnpack")
78 | 
79 |     @slow
80 |     @pytest.mark.run_slow
81 |     @pytest.mark.portable
82 |     def test_albert_fill_mask_portable(self):
83 |         self._helper_albert_fill_mask("portable")
84 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_roberta.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import logging
17 | import os
18 | import subprocess
19 | import tempfile
20 | import unittest
21 | 
22 | import pytest
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoTokenizer
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForMaskedLM
28 | 
29 | 
30 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
31 |     def __init__(self, *args, **kwargs):
32 |         super().__init__(*args, **kwargs)
33 | 
34 |     @slow
35 |     @pytest.mark.run_slow
36 |     def test_roberta_export_to_executorch(self):
37 |         model_id = "FacebookAI/xlm-roberta-base"
38 |         task = "fill-mask"
39 |         recipe = "xnnpack"
40 |         with tempfile.TemporaryDirectory() as tempdir:
41 |             subprocess.run(
42 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
43 |                 shell=True,
44 |                 check=True,
45 |             )
46 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
47 | 
48 |     def _helper_roberta_fill_mask(self, recipe: str):
49 |         model_id = "FacebookAI/xlm-roberta-base"
50 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
51 | 
52 |         # Test fetching and lowering the model to ExecuTorch
53 |         model = ExecuTorchModelForMaskedLM.from_pretrained(model_id=model_id, recipe=recipe)
54 |         self.assertIsInstance(model, ExecuTorchModelForMaskedLM)
55 |         self.assertIsInstance(model.model, ExecuTorchModule)
56 | 
57 |         input_text = f"Paris is the {tokenizer.mask_token} of France."
58 |         inputs = tokenizer(
59 |             input_text,
60 |             return_tensors="pt",
61 |             padding="max_length",
62 |             max_length=10,
63 |         )
64 | 
65 |         # Test inference using ExecuTorch model
66 |         exported_outputs = model.forward(inputs["input_ids"], inputs["attention_mask"])
67 |         predicted_masks = tokenizer.decode(exported_outputs[0, 4].topk(5).indices)
68 |         logging.info(f"\nInput text:\n\t{input_text}\nPredicted masks:\n\t{predicted_masks}")
69 |         self.assertTrue(
70 |             any(word in predicted_masks for word in ["capital", "center", "heart", "birthplace"]),
71 |             f"Exported model predictions {predicted_masks} don't contain any of the most common expected words",
72 |         )
73 | 
74 |     @slow
75 |     @pytest.mark.run_slow
76 |     def test_roberta_fill_mask(self):
77 |         self._helper_roberta_fill_mask(recipe="xnnpack")
78 | 
79 |     @slow
80 |     @pytest.mark.run_slow
81 |     @pytest.mark.portable
82 |     def test_roberta_fill_mask_portable(self):
83 |         self._helper_roberta_fill_mask(recipe="portable")
84 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_mobilevit2.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoConfig, AutoModelForImageClassification
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForImageClassification
28 | 
29 | from ..utils import check_close_recursively
30 | 
31 | 
32 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
33 |     def __init__(self, *args, **kwargs):
34 |         super().__init__(*args, **kwargs)
35 | 
36 |     @slow
37 |     @pytest.mark.run_slow
38 |     def test_mobilevit2_export_to_executorch(self):
39 |         model_id = "apple/mobilevitv2-1.0-imagenet1k-256"
40 |         task = "image-classification"
41 |         recipe = "xnnpack"
42 |         with tempfile.TemporaryDirectory() as tempdir:
43 |             subprocess.run(
44 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
45 |                 shell=True,
46 |                 check=True,
47 |             )
48 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
49 | 
50 |     def _helper_mobilevit2_image_classification(self, recipe: str):
51 |         model_id = "apple/mobilevitv2-1.0-imagenet1k-256"
52 | 
53 |         config = AutoConfig.from_pretrained(model_id)
54 |         batch_size = 1
55 |         num_channels = config.num_channels
56 |         height = config.image_size
57 |         width = config.image_size
58 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
59 | 
60 |         # Test fetching and lowering the model to ExecuTorch
61 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
62 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
63 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
64 | 
65 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
66 |         with torch.no_grad():
67 |             eager_output = eager_model(pixel_values)
68 |             et_output = et_model.forward(pixel_values)
69 | 
70 |         # Compare with eager outputs
71 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output, atol=1e-3, rtol=1e-3))
72 | 
73 |     @slow
74 |     @pytest.mark.run_slow
75 |     def test_mobilevit2_image_classification(self):
76 |         self._helper_mobilevit2_image_classification(recipe="xnnpack")
77 | 
78 |     @slow
79 |     @pytest.mark.run_slow
80 |     @pytest.mark.portable
81 |     def test_mobilevit2_image_classification_portable(self):
82 |         self._helper_mobilevit2_image_classification(recipe="portable")
83 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_distilbert.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import logging
17 | import os
18 | import subprocess
19 | import tempfile
20 | import unittest
21 | 
22 | import pytest
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoTokenizer
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForMaskedLM
28 | 
29 | 
30 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
31 |     def __init__(self, *args, **kwargs):
32 |         super().__init__(*args, **kwargs)
33 | 
34 |     @slow
35 |     @pytest.mark.run_slow
36 |     def test_distilbert_export_to_executorch(self):
37 |         model_id = "distilbert/distilbert-base-uncased"
38 |         task = "fill-mask"
39 |         recipe = "xnnpack"
40 |         with tempfile.TemporaryDirectory() as tempdir:
41 |             subprocess.run(
42 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
43 |                 shell=True,
44 |                 check=True,
45 |             )
46 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
47 | 
48 |     def _helper_distilbert_fill_mask(self, recipe: str):
49 |         model_id = "distilbert/distilbert-base-uncased"
50 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
51 | 
52 |         # Test fetching and lowering the model to ExecuTorch
53 |         model = ExecuTorchModelForMaskedLM.from_pretrained(model_id=model_id, recipe=recipe)
54 |         self.assertIsInstance(model, ExecuTorchModelForMaskedLM)
55 |         self.assertIsInstance(model.model, ExecuTorchModule)
56 | 
57 |         input_text = f"Paris is the {tokenizer.mask_token} of France."
58 |         inputs = tokenizer(
59 |             input_text,
60 |             return_tensors="pt",
61 |             padding="max_length",
62 |             max_length=10,
63 |         )
64 | 
65 |         # Test inference using ExecuTorch model
66 |         exported_outputs = model.forward(inputs["input_ids"], inputs["attention_mask"])
67 |         predicted_masks = tokenizer.decode(exported_outputs[0, 4].topk(5).indices)
68 |         logging.info(f"\nInput text:\n\t{input_text}\nPredicted masks:\n\t{predicted_masks}")
69 |         self.assertTrue(
70 |             any(word in predicted_masks for word in ["capital", "center", "heart", "birthplace"]),
71 |             f"Exported model predictions {predicted_masks} don't contain any of the most common expected words",
72 |         )
73 | 
74 |     @slow
75 |     @pytest.mark.run_slow
76 |     def test_distilbert_fill_mask(self):
77 |         self._helper_distilbert_fill_mask(recipe="xnnpack")
78 | 
79 |     @slow
80 |     @pytest.mark.run_slow
81 |     @pytest.mark.portable
82 |     def test_distilbert_fill_mask_portable(self):
83 |         self._helper_distilbert_fill_mask(recipe="portable")
84 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_efficientnet.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import tempfile
19 | import unittest
20 | 
21 | import pytest
22 | import torch
23 | from executorch import version
24 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
25 | from transformers import AutoConfig, AutoModelForImageClassification
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForImageClassification
29 | 
30 | from ..utils import check_close_recursively
31 | 
32 | 
33 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
34 |     def __init__(self, *args, **kwargs):
35 |         super().__init__(*args, **kwargs)
36 | 
37 |     @slow
38 |     @pytest.mark.run_slow
39 |     def test_efficientnet_export_to_executorch(self):
40 |         model_id = "google/efficientnet-b7"  # ~66M params
41 |         task = "image-classification"
42 |         recipe = "xnnpack"
43 |         with tempfile.TemporaryDirectory() as tempdir:
44 |             subprocess.run(
45 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
46 |                 shell=True,
47 |                 check=True,
48 |             )
49 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
50 | 
51 |     def _helper_efficientnet_image_classification(self, recipe: str):
52 |         model_id = "google/efficientnet-b0"  # ~5.3M params
53 | 
54 |         config = AutoConfig.from_pretrained(model_id)
55 |         batch_size = 1
56 |         num_channels = config.num_channels
57 |         height = config.image_size
58 |         width = config.image_size
59 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
60 | 
61 |         # Test fetching and lowering the model to ExecuTorch
62 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
63 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
64 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
65 | 
66 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
67 |         with torch.no_grad():
68 |             eager_output = eager_model(pixel_values)
69 |             et_output = et_model.forward(pixel_values)
70 | 
71 |         # Compare with eager outputs
72 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
73 | 
74 |     @slow
75 |     @pytest.mark.run_slow
76 |     @pytest.mark.skipif(
77 |         version.__version__ < "0.6.0",
78 |         reason="The fix in XNNPACK is cherry-picked in 0.6.0 release",
79 |     )
80 |     def test_efficientnet_image_classification(self):
81 |         self._helper_efficientnet_image_classification(recipe="xnnpack")
82 | 
83 |     @slow
84 |     @pytest.mark.run_slow
85 |     @pytest.mark.portable
86 |     def test_efficientnet_image_classification_portable(self):
87 |         self._helper_efficientnet_image_classification(recipe="portable")
88 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_vit.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import os
17 | import subprocess
18 | import sys
19 | import tempfile
20 | import unittest
21 | 
22 | import pytest
23 | import torch
24 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
25 | from transformers import AutoConfig, AutoModelForImageClassification
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForImageClassification
29 | 
30 | from ..utils import check_close_recursively
31 | 
32 | 
33 | is_not_macos = sys.platform != "darwin"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     def test_vit_export_to_executorch(self):
43 |         model_id = "google/vit-base-patch16-224"
44 |         task = "image-classification"
45 |         recipe = "xnnpack"
46 |         with tempfile.TemporaryDirectory() as tempdir:
47 |             subprocess.run(
48 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
49 |                 shell=True,
50 |                 check=True,
51 |             )
52 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
53 | 
54 |     def _helper_vit_image_classification(self, recipe: str):
55 |         model_id = "google/vit-base-patch16-224"
56 | 
57 |         config = AutoConfig.from_pretrained(model_id)
58 |         batch_size = 1
59 |         num_channels = config.num_channels
60 |         height = config.image_size
61 |         width = config.image_size
62 |         pixel_values = torch.rand(batch_size, num_channels, height, width)
63 | 
64 |         # Test fetching and lowering the model to ExecuTorch
65 |         et_model = ExecuTorchModelForImageClassification.from_pretrained(model_id=model_id, recipe=recipe)
66 |         self.assertIsInstance(et_model, ExecuTorchModelForImageClassification)
67 |         self.assertIsInstance(et_model.model, ExecuTorchModule)
68 | 
69 |         eager_model = AutoModelForImageClassification.from_pretrained(model_id).eval().to("cpu")
70 |         with torch.no_grad():
71 |             eager_output = eager_model(pixel_values)
72 |             et_output = et_model.forward(pixel_values)
73 | 
74 |         # Compare with eager outputs
75 |         self.assertTrue(check_close_recursively(eager_output.logits, et_output))
76 | 
77 |     @slow
78 |     @pytest.mark.run_slow
79 |     def test_vit_image_classification(self):
80 |         self._helper_vit_image_classification(recipe="xnnpack")
81 | 
82 |     @slow
83 |     @pytest.mark.run_slow
84 |     @pytest.mark.portable
85 |     def test_vit_image_classification_portable(self):
86 |         self._helper_vit_image_classification(recipe="portable")
87 | 
88 |     @slow
89 |     @pytest.mark.run_slow
90 |     @pytest.mark.skipif(is_not_macos, reason="Only runs on MacOS")
91 |     def test_vit_image_classification_coreml_fp32_cpu(self):
92 |         self._helper_vit_image_classification(recipe="coreml_fp32_cpu")
93 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipes/portable.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import Dict, Union
16 | 
17 | from torch.export import ExportedProgram
18 | 
19 | from executorch.exir import (
20 |     EdgeCompileConfig,
21 |     ExecutorchProgram,
22 |     to_edge_transform_and_lower,
23 | )
24 | from optimum.executorch.passes.remove_padding_idx_embedding_pass import RemovePaddingIdxEmbeddingPass
25 | 
26 | from ..integrations import (
27 |     CausalLMExportableModule,
28 |     MaskedLMExportableModule,
29 |     Seq2SeqLMExportableModule,
30 | )
31 | from ..recipe_registry import register_recipe
32 | 
33 | 
34 | @register_recipe("portable")
35 | def export_to_executorch_with_portable(
36 |     model: Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule],
37 |     **kwargs,
38 | ):
39 |     """
40 |     Export a PyTorch model to ExecuTorch with Portable kernels.
41 | 
42 |     This function also write metadata required by the ExecuTorch runtime to the model.
43 | 
44 |     Args:
45 |         model (Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule]):
46 |             The PyTorch model to be exported to ExecuTorch.
47 |         **kwargs:
48 |             Additional keyword arguments for recipe-specific configurations, e.g. export using different example inputs, or different compile/bechend configs.
49 | 
50 |     Returns:
51 |         Dict[str, ExecutorchProgram]:
52 |             A map of exported and optimized program for ExecuTorch.
53 |             For encoder-decoder models or multimodal models, it may generate multiple programs.
54 |     """
55 | 
56 |     def _lower_to_executorch(
57 |         exported_programs: Dict[str, ExportedProgram],
58 |         metadata=None,
59 |     ) -> Dict[str, ExecutorchProgram]:
60 |         # If just one exported program, the method name in the .pte for it should be "forward".
61 |         if len(exported_programs) == 1:
62 |             exported_programs = {"forward": next(iter(exported_programs.values()))}
63 | 
64 |         et_prog = to_edge_transform_and_lower(
65 |             exported_programs,
66 |             partitioner=[],
67 |             compile_config=EdgeCompileConfig(
68 |                 _check_ir_validity=False,
69 |                 _skip_dim_order=True,
70 |             ),
71 |             constant_methods=metadata,
72 |             transform_passes=[RemovePaddingIdxEmbeddingPass()],
73 |         ).to_executorch()
74 |         pte_name = "model"
75 |         return {pte_name: et_prog}
76 | 
77 |     exported_progs = model.export()
78 | 
79 |     if (
80 |         model.config._attn_implementation == "custom_sdpa"
81 |         or model.config._attn_implementation == "custom_sdpa_ring_kv_cache"
82 |     ):
83 |         # Sanity check to make sure the exported program contains the custom sdpa operator.
84 |         if not any(
85 |             node.op == "call_function" and "custom_sdpa" in str(node.target)
86 |             for exported_program in exported_progs.values()
87 |             for node in exported_program.graph_module.graph.nodes
88 |         ):
89 |             raise ValueError("'custom_sdpa' not found in the graph.")
90 | 
91 |     return _lower_to_executorch(exported_progs, model.metadata)
92 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_qwen3_embedding.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import unittest
20 | 
21 | import pytest
22 | from executorch import version
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from packaging.version import parse
25 | from transformers import AutoTokenizer
26 | from transformers.testing_utils import slow
27 | 
28 | from optimum.executorch import ExecuTorchModelForCausalLM
29 | 
30 | from ..utils import check_causal_lm_output_quality
31 | 
32 | 
33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
34 | 
35 | 
36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
37 |     def __init__(self, *args, **kwargs):
38 |         super().__init__(*args, **kwargs)
39 | 
40 |     @slow
41 |     @pytest.mark.run_slow
42 |     def test_qwen3_embedding_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
43 |         model_id = "Qwen/Qwen3-Embedding-0.6B"
44 |         prompt = "Explain gravity"
45 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
46 |         model = ExecuTorchModelForCausalLM.from_pretrained(
47 |             model_id,
48 |             task="text-generation",
49 |             recipe="xnnpack",
50 |             attn_implementation="custom_sdpa",
51 |             use_custom_kv_cache=True,
52 |             **{"qlinear": "8da4w", "qembedding": "8w"},
53 |         )
54 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
55 |         self.assertIsInstance(model.model, ExecuTorchModule)
56 |         generated_text = model.text_generation(
57 |             tokenizer=tokenizer,
58 |             prompt=prompt,
59 |             max_seq_len=64,
60 |         )
61 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
62 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
63 | 
64 |         # Free memory before loading eager for quality check
65 |         del model
66 |         del tokenizer
67 |         gc.collect()
68 | 
69 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
70 | 
71 |     @slow
72 |     @pytest.mark.run_slow
73 |     @pytest.mark.portable
74 |     @pytest.mark.skipif(
75 |         parse(version.__version__) < parse("0.7.0"),
76 |         reason="Fixed on executorch >= 0.7.0",
77 |     )
78 |     def test_qwen3_embedding_text_generation_portable(self):
79 |         model_id = "Qwen/Qwen3-Embedding-0.6B"
80 |         prompt = "Explain gravity"
81 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
82 |         model = ExecuTorchModelForCausalLM.from_pretrained(model_id, task="text-generation", recipe="portable")
83 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
84 |         self.assertIsInstance(model.model, ExecuTorchModule)
85 |         generated_text = model.text_generation(
86 |             tokenizer=tokenizer,
87 |             prompt=prompt,
88 |             max_seq_len=64,
89 |         )
90 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
91 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
92 | 
93 |         # Free memory before loading eager for quality check
94 |         del model
95 |         del tokenizer
96 |         gc.collect()
97 | 
98 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
99 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_smollm3.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import sys
20 | import unittest
21 | 
22 | import pytest
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoTokenizer
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForCausalLM
28 | 
29 | from ..utils import check_causal_lm_output_quality
30 | 
31 | 
32 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
33 | is_ci = os.environ.get("GITHUB_ACTIONS") == "true"
34 | is_linux_ci = sys.platform.startswith("linux") and is_ci
35 | 
36 | 
37 | @pytest.mark.skipif(is_linux_ci, reason="Runner OOM")
38 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
39 |     def __init__(self, *args, **kwargs):
40 |         super().__init__(*args, **kwargs)
41 | 
42 |     @slow
43 |     @pytest.mark.run_slow
44 |     def test_smollm3_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
45 |         model_id = "HuggingFaceTB/SmolLM3-3B"
46 |         prompt = "Give me a brief explanation of gravity in simple terms."
47 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
48 |         model = ExecuTorchModelForCausalLM.from_pretrained(
49 |             model_id,
50 |             recipe="xnnpack",
51 |             attn_implementation="custom_sdpa",
52 |             use_custom_kv_cache=True,
53 |             **{"qlinear": "8da4w", "qembedding": "8w"},
54 |         )
55 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
56 |         self.assertIsInstance(model.model, ExecuTorchModule)
57 |         generated_text = model.text_generation(
58 |             tokenizer=tokenizer,
59 |             prompt=prompt,
60 |             max_seq_len=64,
61 |         )
62 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
63 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
64 | 
65 |         # Free memory before loading eager for quality check
66 |         del model
67 |         del tokenizer
68 |         gc.collect()
69 | 
70 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
71 | 
72 |     @slow
73 |     @pytest.mark.run_slow
74 |     @pytest.mark.portable
75 |     @pytest.mark.skipif(is_ci, reason="Runner OOM")
76 |     def test_smollm3_text_generation_portable(self):
77 |         model_id = "HuggingFaceTB/SmolLM3-3B"
78 |         prompt = "Give me a brief explanation of gravity in simple terms."
79 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
80 |         model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="portable")
81 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
82 |         self.assertIsInstance(model.model, ExecuTorchModule)
83 |         generated_text = model.text_generation(
84 |             tokenizer=tokenizer,
85 |             prompt=prompt,
86 |             max_seq_len=64,
87 |         )
88 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
89 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
90 | 
91 |         # Free memory before loading eager for quality check
92 |         del model
93 |         del tokenizer
94 |         gc.collect()
95 | 
96 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
97 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/convert.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | """ExecuTorch model check and export functions."""
16 | 
17 | import logging
18 | import os
19 | from pathlib import Path
20 | from typing import Union
21 | 
22 | from transformers.masking_utils import ALL_MASK_ATTENTION_FUNCTIONS, AttentionMaskInterface
23 | from transformers.modeling_utils import AttentionInterface
24 | 
25 | from optimum.executorch.attentions.custom_sdpa import custom_sdpa_with_start_pos_forward
26 | 
27 | from .recipe_registry import discover_recipes, recipe_registry
28 | 
29 | 
30 | AttentionInterface.register("custom_sdpa", custom_sdpa_with_start_pos_forward)
31 | AttentionMaskInterface.register("custom_sdpa", ALL_MASK_ATTENTION_FUNCTIONS["sdpa"])
32 | 
33 | 
34 | def export_to_executorch(
35 |     model,
36 |     task: str,
37 |     recipe: str,
38 |     output_dir: Union[str, Path],
39 |     **kwargs,
40 | ):
41 |     """
42 |     Export a pre-trained PyTorch model to the ExecuTorch format using a specified recipe.
43 | 
44 |     This function facilitates the transformation of a PyTorch model into an optimized ExecuTorch program.
45 | 
46 |     Args:
47 |         model (`Union["PreTrainedModel", "TorchExportableModuleWithStaticCache"]`):
48 |             A PyTorch model to be exported. This can be a standard HuggingFace `PreTrainedModel` or a wrapped
49 |             module like `TorchExportableModuleWithStaticCache` for text generation task.
50 |         task (`str`):
51 |             The specific task the exported model will perform, e.g., "text-generation".
52 |         recipe (`str`):
53 |             The recipe to guide the export process, e.g., "xnnpack". Recipes define the optimization and lowering steps.
54 |             Will raise an exception if the specified recipe is not registered in the recipe registry.
55 |         output_dir (`Union[str, Path]`):
56 |             Path to the directory where the resulting ExecuTorch model will be saved.
57 |         **kwargs:
58 |             Additional configuration options passed to the recipe.
59 | 
60 |     Returns:
61 |         `ExecuTorchProgram`:
62 |             The lowered ExecuTorch program object.
63 | 
64 |     Notes:
65 |         - The function uses a dynamic recipe discovery mechanism to identify and import the specified recipe.
66 |         - The exported model is stored in the specified output directory with the fixed filename `model.pte`.
67 |         - The resulting ExecuTorch program is serialized and saved to the output directory.
68 |     """
69 | 
70 |     # Dynamically discover and import registered recipes
71 |     discover_recipes()
72 | 
73 |     # Export and lower the model to ExecuTorch with the recipe
74 |     try:
75 |         recipe_func = recipe_registry.get(recipe)
76 |     except KeyError as e:
77 |         raise RuntimeError(f"The recipe '{recipe}' isn't registered. Detailed error: {e}")
78 | 
79 |     executorch_progs = recipe_func(model, **kwargs)
80 | 
81 |     for name, prog in executorch_progs.items():
82 |         full_path = os.path.join(f"{output_dir}", f"{name}.pte")
83 |         with open(full_path, "wb") as f:
84 |             prog.write_to_file(f)
85 |             logging.info(
86 |                 f"Saved exported program to {full_path} ({os.path.getsize(full_path) / (1024 * 1024):.2f} MB)"
87 |             )
88 |         prog.write_tensor_data_to_file(output_dir)
89 | 
90 |     return executorch_progs
91 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_granite_speech.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
 3 | #
 4 | # Licensed under the Apache License, Version 2.0 (the "License");
 5 | # you may not use this file except in compliance with the License.
 6 | # You may obtain a copy of the License at
 7 | #
 8 | #     http://www.apache.org/licenses/LICENSE-2.0
 9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | 
16 | import gc
17 | import logging
18 | import os
19 | import sys
20 | import unittest
21 | 
22 | import pytest
23 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
24 | from transformers import AutoProcessor, AutoTokenizer
25 | from transformers.testing_utils import slow
26 | 
27 | from optimum.executorch import ExecuTorchModelForMultiModalToText
28 | 
29 | from ..utils import check_multimodal_output_quality
30 | 
31 | 
32 | is_linux_ci = sys.platform.startswith("linux") and os.environ.get("GITHUB_ACTIONS") == "true"
33 | 
34 | logging.basicConfig(level=logging.DEBUG)
35 | 
36 | 
37 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
38 |     def __init__(self, *args, **kwargs):
39 |         super().__init__(*args, **kwargs)
40 | 
41 |     @slow
42 |     @pytest.mark.run_slow
43 |     @pytest.mark.skipif(is_linux_ci, reason="OOM")
44 |     def test_granite_audio_text_to_text_generation_with_custom_sdpa_kv_cache_8da4w_8we_pte(self):
45 |         model_id = "ibm-granite/granite-speech-3.3-2b"
46 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
47 |         processor = AutoProcessor.from_pretrained(model_id)
48 |         system_prompt = "Knowledge Cutoff Date: April 2024.\nToday's Date: April 9, 2025.\nYou are Granite, developed by IBM. You are a helpful AI assistant"
49 |         user_prompt = "<|audio|>can you transcribe the speech into a written format?"
50 |         conversation = [
51 |             {"role": "system", "content": system_prompt},
52 |             {"role": "user", "content": user_prompt},
53 |             {
54 |                 "role": "user",
55 |                 "type": "audio",
56 |                 "content": "https://huggingface.co/ibm-granite/granite-speech-3.3-2b/resolve/main/10226_10111_000000.wav",
57 |             },
58 |         ]
59 | 
60 |         model = ExecuTorchModelForMultiModalToText.from_pretrained(
61 |             model_id,
62 |             # "/home/jackzhxng/models/granite/granite_1",
63 |             recipe="xnnpack",
64 |             attn_implementation="custom_sdpa",
65 |             use_custom_kv_cache=True,
66 |             **{
67 |                 "qlinear": "8da4w",
68 |                 "qlinear_encoder": "8da4w",
69 |                 "qembedding": "4w",
70 |                 "qembedding_group_size": 32,
71 |                 "task": "multimodal-text-to-text",
72 |             },
73 |         )
74 |         self.assertIsInstance(model, ExecuTorchModelForMultiModalToText)
75 |         self.assertIsInstance(model.model, ExecuTorchModule)
76 | 
77 |         generated_text = model.text_generation(
78 |             processor=processor,
79 |             tokenizer=tokenizer,
80 |             input_conversation=conversation,
81 |             max_seq_len=64,
82 |         )
83 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
84 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
85 | 
86 |         del model
87 |         del tokenizer
88 |         gc.collect()
89 | 
90 |         # Should be something like: 'Certainly! Here's the transcribed written format of Timothy's actions and thoughts:
91 |         # After his nap, Timothy leisurely stretched, first one gray velvet foot, then the other. He then slowly rolled,
92 |         # indolently, to his plate.'
93 |         self.assertTrue("Timothy" in generated_text)
94 |         self.assertTrue("nap" in generated_text)
95 |         self.assertTrue("stretch" in generated_text)
96 |         self.assertTrue(
97 |             check_multimodal_output_quality(model_id, generated_tokens, conversation, max_perplexity_threshold=5)
98 |         )
99 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # UV
 98 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #uv.lock
102 | 
103 | # poetry
104 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
106 | #   commonly ignored for libraries.
107 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108 | #poetry.lock
109 | 
110 | # pdm
111 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112 | #pdm.lock
113 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114 | #   in version control.
115 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116 | .pdm.toml
117 | .pdm-python
118 | .pdm-build/
119 | 
120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121 | __pypackages__/
122 | 
123 | # Celery stuff
124 | celerybeat-schedule
125 | celerybeat.pid
126 | 
127 | # SageMath parsed files
128 | *.sage.py
129 | 
130 | # Environments
131 | .env
132 | .venv
133 | env/
134 | venv/
135 | ENV/
136 | env.bak/
137 | venv.bak/
138 | 
139 | # Spyder project settings
140 | .spyderproject
141 | .spyproject
142 | 
143 | # Rope project settings
144 | .ropeproject
145 | 
146 | # mkdocs documentation
147 | /site
148 | 
149 | # mypy
150 | .mypy_cache/
151 | .dmypy.json
152 | dmypy.json
153 | 
154 | # Pyre type checker
155 | .pyre/
156 | 
157 | # pytype static type analyzer
158 | .pytype/
159 | 
160 | # Cython debug symbols
161 | cython_debug/
162 | 
163 | # PyCharm
164 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
167 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
168 | #.idea/
169 | 
170 | # PyPI configuration file
171 | .pypirc
172 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_bert.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import logging
 17 | import os
 18 | import subprocess
 19 | import tempfile
 20 | import unittest
 21 | 
 22 | import pytest
 23 | import torchao
 24 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 25 | from packaging.version import parse
 26 | from transformers import AutoTokenizer
 27 | from transformers.testing_utils import slow
 28 | 
 29 | from optimum.executorch import ExecuTorchModelForMaskedLM
 30 | 
 31 | 
 32 | @pytest.mark.skipif(
 33 |     parse(torchao.__version__) < parse("0.11.0.dev0"),
 34 |     reason="Only available on torchao >= 0.11.0.dev0",
 35 | )
 36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
 37 |     def __init__(self, *args, **kwargs):
 38 |         super().__init__(*args, **kwargs)
 39 | 
 40 |     @slow
 41 |     @pytest.mark.run_slow
 42 |     def test_bert_export_to_executorch(self):
 43 |         model_id = "google-bert/bert-base-uncased"
 44 |         task = "fill-mask"
 45 |         recipe = "xnnpack"
 46 |         with tempfile.TemporaryDirectory() as tempdir:
 47 |             subprocess.run(
 48 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
 49 |                 shell=True,
 50 |                 check=True,
 51 |             )
 52 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
 53 | 
 54 |     @slow
 55 |     @pytest.mark.run_slow
 56 |     def test_bert_export_to_executorch_quantized(self):
 57 |         model_id = "google-bert/bert-base-uncased"
 58 |         task = "fill-mask"
 59 |         recipe = "xnnpack"
 60 |         with tempfile.TemporaryDirectory() as tempdir:
 61 |             subprocess.run(
 62 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --qlinear 8da4w --output_dir {tempdir}/executorch",
 63 |                 shell=True,
 64 |                 check=True,
 65 |             )
 66 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
 67 | 
 68 |     def _helper_bert_fill_mask(self, recipe: str):
 69 |         model_id = "google-bert/bert-base-uncased"
 70 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
 71 | 
 72 |         # Test fetching and lowering the model to ExecuTorch
 73 |         model = ExecuTorchModelForMaskedLM.from_pretrained(model_id=model_id, recipe=recipe)
 74 |         self.assertIsInstance(model, ExecuTorchModelForMaskedLM)
 75 |         self.assertIsInstance(model.model, ExecuTorchModule)
 76 | 
 77 |         input_text = f"Paris is the {tokenizer.mask_token} of France."
 78 |         inputs = tokenizer(
 79 |             input_text,
 80 |             return_tensors="pt",
 81 |             padding="max_length",
 82 |             max_length=10,
 83 |         )
 84 | 
 85 |         # Test inference using ExecuTorch model
 86 |         exported_outputs = model.forward(inputs["input_ids"], inputs["attention_mask"])
 87 |         predicted_masks = tokenizer.decode(exported_outputs[0, 4].topk(5).indices)
 88 |         logging.info(f"\nInput text:\n\t{input_text}\nPredicted masks:\n\t{predicted_masks}")
 89 |         self.assertTrue(
 90 |             any(word in predicted_masks for word in ["capital", "center", "heart", "birthplace"]),
 91 |             f"Exported model predictions {predicted_masks} don't contain any of the most common expected words",
 92 |         )
 93 | 
 94 |     @slow
 95 |     @pytest.mark.run_slow
 96 |     def test_bert_fill_mask(self):
 97 |         self._helper_bert_fill_mask("xnnpack")
 98 | 
 99 |     @slow
100 |     @pytest.mark.run_slow
101 |     @pytest.mark.portable
102 |     def test_bert_fill_mask_portable(self):
103 |         self._helper_bert_fill_mask("portable")
104 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_qwen2.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import gc
 17 | import logging
 18 | import os
 19 | import subprocess
 20 | import tempfile
 21 | import unittest
 22 | 
 23 | import pytest
 24 | from executorch import version
 25 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 26 | from packaging.version import parse
 27 | from transformers import AutoTokenizer
 28 | from transformers.testing_utils import slow
 29 | 
 30 | from optimum.executorch import ExecuTorchModelForCausalLM
 31 | 
 32 | from ..utils import check_causal_lm_output_quality
 33 | 
 34 | 
 35 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
 36 | 
 37 | 
 38 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
 39 |     def __init__(self, *args, **kwargs):
 40 |         super().__init__(*args, **kwargs)
 41 | 
 42 |     @slow
 43 |     @pytest.mark.run_slow
 44 |     def test_qwen2_5_export_to_executorch(self):
 45 |         model_id = "Qwen/Qwen2.5-0.5B"
 46 |         task = "text-generation"
 47 |         recipe = "xnnpack"
 48 |         with tempfile.TemporaryDirectory() as tempdir:
 49 |             subprocess.run(
 50 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
 51 |                 shell=True,
 52 |                 check=True,
 53 |             )
 54 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
 55 | 
 56 |     def _helper_qwen2_5_text_generation(self, recipe: str):
 57 |         model_id = "Qwen/Qwen2.5-0.5B"
 58 |         model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe=recipe)
 59 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
 60 |         self.assertIsInstance(model.model, ExecuTorchModule)
 61 | 
 62 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
 63 |         generated_text = model.text_generation(
 64 |             tokenizer=tokenizer,
 65 |             prompt="My favourite condiment is ",
 66 |             max_seq_len=32,
 67 |         )
 68 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
 69 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
 70 | 
 71 |         # Free memory before loading eager for quality check
 72 |         del model
 73 |         del tokenizer
 74 |         gc.collect()
 75 | 
 76 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
 77 | 
 78 |     @slow
 79 |     @pytest.mark.run_slow
 80 |     def test_qwen2_5_text_generation(self):
 81 |         self._helper_qwen2_5_text_generation(recipe="xnnpack")
 82 | 
 83 |     @slow
 84 |     @pytest.mark.run_slow
 85 |     @pytest.mark.portable
 86 |     @pytest.mark.skipif(
 87 |         parse(version.__version__) < parse("0.7.0"),
 88 |         reason="Fixed on executorch >= 0.7.0",
 89 |     )
 90 |     def test_qwen2_5_text_generation_portable(self):
 91 |         self._helper_qwen2_5_text_generation(recipe="portable")
 92 | 
 93 |     @slow
 94 |     @pytest.mark.run_slow
 95 |     def test_qwen2_5_text_generation_with_custom_sdpa(self):
 96 |         model_id = "Qwen/Qwen2.5-0.5B"
 97 |         prompt = "My favourite condiment is "
 98 |         max_seq_len = 32
 99 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
100 | 
101 |         # ExecuTorch model + custom sdpa
102 |         model = ExecuTorchModelForCausalLM.from_pretrained(
103 |             model_id,
104 |             recipe="xnnpack",
105 |             attn_implementation="custom_sdpa",
106 |         )
107 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
108 |         self.assertIsInstance(model.model, ExecuTorchModule)
109 |         generated_text = model.text_generation(
110 |             tokenizer=tokenizer,
111 |             prompt=prompt,
112 |             max_seq_len=max_seq_len,
113 |         )
114 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
115 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
116 | 
117 |         # Free memory before loading eager for quality check
118 |         del model
119 |         del tokenizer
120 |         gc.collect()
121 | 
122 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
123 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_gemma.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import gc
 17 | import logging
 18 | import os
 19 | import subprocess
 20 | import tempfile
 21 | import unittest
 22 | 
 23 | import pytest
 24 | import torchao
 25 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 26 | from packaging.version import parse
 27 | from transformers import AutoTokenizer
 28 | from transformers.testing_utils import slow
 29 | 
 30 | from optimum.executorch import ExecuTorchModelForCausalLM
 31 | 
 32 | 
 33 | is_ci = os.environ.get("GITHUB_ACTIONS") == "true"
 34 | 
 35 | 
 36 | @pytest.mark.skipif(
 37 |     parse(torchao.__version__) < parse("0.11.0.dev0"),
 38 |     reason="Only available on torchao >= 0.11.0.dev0",
 39 | )
 40 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
 41 |     def __init__(self, *args, **kwargs):
 42 |         super().__init__(*args, **kwargs)
 43 | 
 44 |     @slow
 45 |     @pytest.mark.run_slow
 46 |     def test_gemma_export_to_executorch(self):
 47 |         model_id = "weqweasdas/RM-Gemma-2B"
 48 |         task = "text-generation"
 49 |         recipe = "xnnpack"
 50 |         with tempfile.TemporaryDirectory() as tempdir:
 51 |             out_dir = f"{tempdir}/executorch"
 52 |             subprocess.run(
 53 |                 f"optimum-cli export executorch \
 54 |                     --model {model_id} \
 55 |                     --task {task} \
 56 |                     --recipe {recipe} \
 57 |                     --output_dir {tempdir}/executorch \
 58 |                     --use_custom_sdpa \
 59 |                     --qlinear 8da4w \
 60 |                     --qembedding 8w",
 61 |                 shell=True,
 62 |                 check=True,
 63 |             )
 64 |             pte_full_path = f"{out_dir}/model.pte"
 65 |             self.assertTrue(os.path.exists(pte_full_path))
 66 | 
 67 |             # Explicitly delete the PTE file to free up disk space
 68 |             if os.path.exists(pte_full_path):
 69 |                 os.remove(pte_full_path)
 70 |             gc.collect()
 71 | 
 72 |     @slow
 73 |     @pytest.mark.run_slow
 74 |     def test_gemma_text_generation_with_custom_sdpa_8da4w_8we(self):
 75 |         # TODO: Switch to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
 76 |         # model_id = "google/gemma-2b"
 77 |         model_id = "weqweasdas/RM-Gemma-2B"
 78 |         # ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
 79 |         kwargs = {"qlinear": "8da4w", "qembedding": "8w"}
 80 |         model = ExecuTorchModelForCausalLM.from_pretrained(
 81 |             model_id,
 82 |             task="text-generation",
 83 |             recipe="xnnpack",
 84 |             attn_implementation="custom_sdpa",
 85 |             **kwargs,
 86 |         )
 87 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
 88 |         self.assertIsInstance(model.model, ExecuTorchModule)
 89 | 
 90 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
 91 |         generated_text = model.text_generation(
 92 |             tokenizer=tokenizer,
 93 |             prompt="Hello I am doing",
 94 |             max_seq_len=21,
 95 |         )
 96 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
 97 | 
 98 |     @slow
 99 |     @pytest.mark.run_slow
100 |     @pytest.mark.portable
101 |     @pytest.mark.skipif(is_ci, reason="Too big for CI runners")
102 |     def test_gemma_text_generation_portable(self):
103 |         # TODO: Switch to use google/gemma-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
104 |         # model_id = "google/gemma-2b"
105 |         model_id = "weqweasdas/RM-Gemma-2B"
106 |         model = ExecuTorchModelForCausalLM.from_pretrained(model_id, task="text-generation", recipe="portable")
107 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
108 |         self.assertIsInstance(model.model, ExecuTorchModule)
109 | 
110 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
111 |         generated_text = model.text_generation(
112 |             tokenizer=tokenizer,
113 |             prompt="Hello I am doing",
114 |             max_seq_len=21,
115 |         )
116 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
117 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/tasks/asr.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | import torchao
 15 | from transformers import AutoModelForSpeechSeq2Seq
 16 | 
 17 | from ..integrations import Seq2SeqLMExportableModule
 18 | from ..quantization import quantize_model_
 19 | from ..task_registry import register_task
 20 | 
 21 | 
 22 | # NOTE: It’s important to map the registered task name to the pipeline name in https://github.com/huggingface/transformers/blob/main/utils/update_metadata.py.
 23 | # This will streamline using inferred task names and make exporting models to Hugging Face pipelines easier.
 24 | @register_task("automatic-speech-recognition")
 25 | def load_seq2seq_speech_model(model_name_or_path: str, **kwargs) -> Seq2SeqLMExportableModule:
 26 |     """
 27 |     Loads a model for speech seq2seq and registers it under the task
 28 |     'automatic-speech-recognition' using Hugging Face's `AutoModelForSpeechSeq2Seq`.
 29 | 
 30 |     Args:
 31 |         model_name_or_path (str):
 32 |             Model ID on huggingface.co or path on disk to the model repository to export. For example:
 33 |             `model_name_or_path="openai/whisper-tiny"` or `mode_name_or_path="/path/to/model_folder`
 34 |         **kwargs:
 35 |             Additional configuration options for the model:
 36 |                 - dtype (str, optional):
 37 |                     Data type for model weights (default: "float32").
 38 |                     Options include "float16" and "bfloat16".
 39 |                 - max_hidden_seq_length (int, optional):
 40 |                     Maximum hidden sequence length (default: 4096).
 41 |                 - max_cache_length (int, optional):
 42 |                     Maximum sequence length for generation (default: 1024).
 43 | 
 44 |     Returns:
 45 |         Seq2SeqLMExportableModule:
 46 |             An instance of `Seq2SeqLMExportableModule` for exporting and lowering to ExecuTorch.
 47 |     """
 48 |     device = kwargs.get("device", "cpu")
 49 |     batch_size = 1
 50 |     max_hidden_seq_len = kwargs.get("max_hidden_seq_len", 4096)
 51 |     max_seq_len = kwargs.get("max_seq_len", 1024)
 52 |     dtype = kwargs.get("dtype", "float32")
 53 | 
 54 |     full_model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name_or_path, dtype=dtype, device_map=device).eval()
 55 | 
 56 |     for param in full_model.parameters():
 57 |         if isinstance(param, torchao.utils.TorchAOBaseTensor):
 58 |             param.requires_grad = False
 59 | 
 60 |     qlinear_config = kwargs.get("qlinear", None)
 61 |     qlinear_group_size = kwargs.get("qlinear_group_size", None)
 62 |     qlinear_packing_format = kwargs.get("qlinear_packing_format", None)
 63 |     qlinear_encoder_config = kwargs.get("qlinear_encoder", None)
 64 |     qlinear_encoder_group_size = kwargs.get("qlinear_encoder_group_size", None)
 65 |     qlinear_encoder_packing_format = kwargs.get("qlinear_encoder_packing_format", None)
 66 |     qembedding_config = kwargs.get("qembedding", None)
 67 |     qembedding_group_size = kwargs.get("qembedding_group_size", None)
 68 | 
 69 |     # Quantize decoder linear weights.
 70 |     quantize_decoder_kwargs = {
 71 |         "eager_model": getattr(full_model.model, "decoder"),
 72 |         "qlinear_config": qlinear_config,
 73 |     }
 74 |     if qlinear_group_size is not None:
 75 |         quantize_decoder_kwargs["qlinear_group_size"] = qlinear_group_size
 76 |     if qlinear_packing_format is not None:
 77 |         quantize_decoder_kwargs["qlinear_packing_format"] = qlinear_packing_format
 78 |     quantize_model_(**quantize_decoder_kwargs)
 79 | 
 80 |     # Quantize encoder linear weights.
 81 |     quantize_encoder_kwargs = {
 82 |         "eager_model": getattr(full_model.model, "encoder"),
 83 |         "qlinear_config": qlinear_encoder_config,
 84 |     }
 85 |     if qlinear_encoder_group_size is not None:
 86 |         quantize_encoder_kwargs["qlinear_group_size"] = qlinear_encoder_group_size
 87 |     if qlinear_encoder_packing_format is not None:
 88 |         quantize_encoder_kwargs["qlinear_packing_format"] = qlinear_encoder_packing_format
 89 |     quantize_model_(**quantize_encoder_kwargs)
 90 | 
 91 |     # Quantize decoder embeddings.
 92 |     quantize_decoder_embedding_kwargs = {
 93 |         "eager_model": full_model,
 94 |         "qembedding_config": qembedding_config,
 95 |     }
 96 |     if qembedding_group_size is not None:
 97 |         quantize_decoder_embedding_kwargs["qembedding_group_size"] = qembedding_group_size
 98 |     quantize_model_(**quantize_decoder_embedding_kwargs)
 99 | 
100 |     return Seq2SeqLMExportableModule(
101 |         full_model,
102 |         batch_size=batch_size,
103 |         max_seq_len=max_seq_len,
104 |         max_hidden_seq_len=max_hidden_seq_len,
105 |     )
106 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.MD:
--------------------------------------------------------------------------------
 1 | Thank you for your interest in contributing to Optimum ExecuTorch!
 2 | 
 3 | ## Developing Optimum ExecuTorch
 4 | 
 5 | ### Setting up the development environment
 6 | To install Optimum ExecuTorch for development:
 7 | ```
 8 | python install_dev.py
 9 | ```
10 | 
11 | ### Testing local chagnes
12 | Optimum ExecuTorch does not have an editable install at the moment, so to test your local changes, you will need to reinstall.
13 | To prevent the reinstall from overwriting other dependencies, some of which you may have modified, you can run the following ahead of your test:
14 | ```
15 | pip install --no-deps --no-build-isolation .
16 | ```
17 | 
18 | An example command for testing local changes to Gemma3:
19 | ```
20 | pip install --no-deps --no-build-isolation .
21 | RUN_SLOW=1 python -m pytest tests/models/test_modeling_gemma3.py -s -k test_gemma3_image_vision_with_custom_sdpa_kv_cache_8da4w_8we --log-cli-level=INFO
22 | ```
23 | 
24 | To run tests marked with `@slow`, just set `RUN_SLOW=1`.
25 | 
26 | ## Enabling a new model on Optimum
27 | 
28 | Our design philsophy is to have as little model-specific code as possible, which means all optimizations, export code, etc. are model-agnostic.
29 | This allows us to theoretically export any new model straight from the source, with a few caveats which will be explained later.
30 | For example, most Large Language Models should be able to be exported using this library.
31 | 
32 | ### 💡 How to "enable" a model on Optimum
33 | ❓ Currently, the [homepage README](README.md?tab=readme-ov-file#-supported-models) lists all of the "supported" models. What does this mean, and what about models not on this list?
34 | 
35 | 👉 These supported models all have a test file associated with them, such as [Gemma3](https://github.com/huggingface/optimum-executorch/blob/main/tests/models/test_modeling_gemma3.py), which has been used to validate the E2E of the model (export + run generation loop on exported artifact).
36 | The test file is then used in CI to guard against potential regressions.
37 | Once you have a PR up for adding the test to the repo, feel free to edit the homepage README to include the new model.
38 | 
39 | As an example, in the Gemma3 test file, we have validated that the model is able to export and returns correct output to a test prompt for different export configurations - now other users will know that Gemma3 works and are able to export the model like so:
40 | ```
41 | optimum-cli export executorch \
42 |   --model google/gemma-3-1b-it \
43 |   --task text-generation \
44 |   --recipe xnnpack \
45 |   --use_custom_sdpa \
46 |   --use_custom_kv_cache \
47 |   --qlinear 8da4w \
48 |   --qembedding 8w
49 | ```
50 | 
51 | However, there are many models without test files in Optimum that probably still work - just that no one has went through the trouble of validating them.
52 | This is where you come in - feel free to contribute if there is a model you are interested in that does not yet have a test file!
53 | 
54 | If you run into any issues, they will most likely stem from the following:
55 | - ❓ How much model-specific code is in Transformers for this model?
56 | - ❓ Do we already have the model type supported in Optimum?
57 | - ❓ Is the model itself torch.exportable?
58 | 
59 | ### ❌ Model-specific code is in Transformers
60 | To address this issue, we will need to upstream changes to the Transformers library, or update our code to match.
61 | For instance, if hypothetically Transformers introduced a new type of cache, and this cache is used in a new LLM, we would need to handle this new cache type in Optimum.
62 | Or, hypothetically if we are expecting a certain attribute in a Transformers model and it exists instead with a slighly different name, this may be an opportunity to upstream some naming standardization changes to Transformers.
63 | [Here](https://github.com/huggingface/transformers/pull/40919) is an example of one such standardization.
64 | 
65 | ### ❌ Model type is not supported in Optimum
66 | All of the supported model types are in [integrations.py](https://github.com/huggingface/optimum-executorch/blob/main/optimum/exporters/executorch/integrations.py), which contains wrapper classes that facilitate torch.exporting a model:
67 | - `CausalLMExportableModule` - LLMs (Large Language Models)
68 | - `MultiModalTextToTextExportableModule` - Multimodal LLMs (Large Language Models with support for audio/image input)
69 | - `VisionEncoderExportableModule` - Vision Encoder backbones (such as DiT or MobileViT)
70 | - `MaskedLMExportableModule` - Masked language models (for predicting masked characters)
71 | - `Seq2SeqLMExportableModule` - General Seq2Seq encoder-decoder models (such as T5 and Whisper)
72 | 
73 | This is where most of the complexity around "enabling" a model on Optimum arises from, since post torch.export() every model follows the same flow per backend for transforming the torch.export() artifact into an Excecutorch `.pte` artifact.
74 | If the model type doesn't exist in Optimum then we will need to write a new class for it.
75 | 
76 | ### ❌ Model is not torch.exportable
77 | To address this issue, we will need to upstream changes to the model's modeling file in Transformers to make the model exportable.
78 | After doing this, it's a good idea to add a torch.export test to guard against future regressions (which tend to happen frequently since Transformers moves fast).
79 | [Here](https://github.com/huggingface/transformers/blob/87f38dbfcec48027d4bf2ea7ec8b8eecd5a7bc85/tests/models/smollm3/test_modeling_smollm3.py#L175) is an example.
80 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_gemma2.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import gc
 17 | import logging
 18 | import os
 19 | import subprocess
 20 | import sys
 21 | import tempfile
 22 | import unittest
 23 | 
 24 | import pytest
 25 | import torchao
 26 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 27 | from packaging.version import parse
 28 | from transformers import AutoTokenizer
 29 | from transformers.testing_utils import slow
 30 | 
 31 | from optimum.executorch import ExecuTorchModelForCausalLM
 32 | 
 33 | from ..utils import check_causal_lm_output_quality
 34 | 
 35 | 
 36 | is_ci = os.environ.get("GITHUB_ACTIONS") == "true"
 37 | is_linux_ci = sys.platform.startswith("linux") and is_ci
 38 | 
 39 | 
 40 | @pytest.mark.skipif(
 41 |     parse(torchao.__version__) < parse("0.11.0.dev0"),
 42 |     reason="Only available on torchao >= 0.11.0.dev0",
 43 | )
 44 | @pytest.mark.skipif(is_linux_ci, reason="OOM on linux runner")
 45 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
 46 |     def __init__(self, *args, **kwargs):
 47 |         super().__init__(*args, **kwargs)
 48 | 
 49 |     @slow
 50 |     @pytest.mark.run_slow
 51 |     def test_gemma2_export_to_executorch(self):
 52 |         model_id = "unsloth/gemma-2-2b-it"
 53 |         task = "text-generation"
 54 |         recipe = "xnnpack"
 55 |         with tempfile.TemporaryDirectory() as tempdir:
 56 |             out_dir = f"{tempdir}/executorch"
 57 |             subprocess.run(
 58 |                 f"optimum-cli export executorch \
 59 |                     --model {model_id} \
 60 |                     --task {task} \
 61 |                     --recipe {recipe} \
 62 |                     --output_dir {tempdir}/executorch \
 63 |                     --use_custom_sdpa \
 64 |                     --qlinear 8da4w \
 65 |                     --qembedding 8w",
 66 |                 shell=True,
 67 |                 check=True,
 68 |             )
 69 |             pte_full_path = f"{out_dir}/model.pte"
 70 |             self.assertTrue(os.path.exists(pte_full_path))
 71 | 
 72 |             # Explicitly delete the PTE file to free up disk space
 73 |             if os.path.exists(pte_full_path):
 74 |                 os.remove(pte_full_path)
 75 |             gc.collect()
 76 | 
 77 |     @slow
 78 |     @pytest.mark.run_slow
 79 |     def test_gemma2_text_generation_with_custom_sdpa_8da4w_8we(self):
 80 |         # TODO: Switch to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
 81 |         # model_id = "google/gemma-2-2b"
 82 |         model_id = "unsloth/gemma-2-2b-it"
 83 |         # ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
 84 |         kwargs = {"qlinear": "8da4w", "qembedding": "8w"}
 85 |         model = ExecuTorchModelForCausalLM.from_pretrained(
 86 |             model_id,
 87 |             recipe="xnnpack",
 88 |             attn_implementation="custom_sdpa",
 89 |             **kwargs,
 90 |         )
 91 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
 92 |         self.assertIsInstance(model.model, ExecuTorchModule)
 93 | 
 94 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
 95 |         generated_text = model.text_generation(
 96 |             tokenizer=tokenizer,
 97 |             prompt="Hello I am doing a project",
 98 |             max_seq_len=12,
 99 |         )
100 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
101 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
102 | 
103 |         # Free memory before loading eager for quality check
104 |         del model
105 |         del tokenizer
106 |         gc.collect()
107 | 
108 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
109 | 
110 |     @slow
111 |     @pytest.mark.run_slow
112 |     @pytest.mark.portable
113 |     @pytest.mark.skipif(is_ci, reason="Too big for CI runners")
114 |     def test_gemma2_text_generation_portable(self):
115 |         # TODO: Switch to use google/gemma-2-2b once https://github.com/huggingface/optimum/issues/2127 is fixed
116 |         # model_id = "google/gemma-2-2b"
117 |         model_id = "unsloth/gemma-2-2b-it"
118 |         model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="portable")
119 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
120 |         self.assertIsInstance(model.model, ExecuTorchModule)
121 | 
122 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
123 |         generated_text = model.text_generation(
124 |             tokenizer=tokenizer,
125 |             prompt="Hello I am doing a project",
126 |             max_seq_len=12,
127 |         )
128 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
129 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
130 | 
131 |         # Free memory before loading eager for quality check
132 |         del model
133 |         del tokenizer
134 |         gc.collect()
135 | 
136 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
137 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipes/xnnpack.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | from typing import Dict, Union
 17 | 
 18 | from packaging.version import parse
 19 | from tabulate import tabulate
 20 | from torch import __version__ as torch_version
 21 | from torch.export import ExportedProgram
 22 | from torchao.utils import unwrap_tensor_subclass
 23 | 
 24 | from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
 25 | from executorch.devtools.backend_debug import get_delegation_info
 26 | from executorch.exir import (
 27 |     EdgeCompileConfig,
 28 |     ExecutorchBackendConfig,
 29 |     ExecutorchProgram,
 30 |     to_edge_transform_and_lower,
 31 | )
 32 | from executorch.exir.passes import MemoryPlanningPass
 33 | from optimum.executorch.passes.remove_padding_idx_embedding_pass import RemovePaddingIdxEmbeddingPass
 34 | 
 35 | from ..integrations import (
 36 |     CausalLMExportableModule,
 37 |     MaskedLMExportableModule,
 38 |     MultiModalTextToTextExportableModule,
 39 |     Seq2SeqLMExportableModule,
 40 | )
 41 | from ..recipe_registry import register_recipe
 42 | 
 43 | 
 44 | @register_recipe("xnnpack")
 45 | def export_to_executorch_with_xnnpack(
 46 |     model: Union[
 47 |         CausalLMExportableModule,
 48 |         MaskedLMExportableModule,
 49 |         Seq2SeqLMExportableModule,
 50 |         MultiModalTextToTextExportableModule,
 51 |     ],
 52 |     **kwargs,
 53 | ):
 54 |     """
 55 |     Export a PyTorch model to ExecuTorch w/ delegation to XNNPACK backend.
 56 | 
 57 |     This function also write metadata required by the ExecuTorch runtime to the model.
 58 | 
 59 |     Args:
 60 |         model (Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule, MultiModalTextToTextExportableModule]):
 61 |             The PyTorch model to be exported to ExecuTorch.
 62 |         **kwargs:
 63 |             Additional keyword arguments for recipe-specific configurations, e.g. export using different example inputs, or different compile/bechend configs.
 64 | 
 65 |     Returns:
 66 |         Dict[str, ExecutorchProgram]:
 67 |             A map of exported and optimized program for ExecuTorch.
 68 |             For encoder-decoder models or multimodal models, it may generate multiple programs.
 69 |     """
 70 | 
 71 |     def _lower_to_executorch(
 72 |         exported_programs: Dict[str, ExportedProgram],
 73 |         metadata=None,
 74 |     ) -> Dict[str, ExecutorchProgram]:
 75 |         backend_config_dict = {
 76 |             "extract_delegate_segments": True,
 77 |             "memory_planning_pass": MemoryPlanningPass(alloc_graph_input=False),
 78 |         }
 79 |         backend_config_dict["do_quant_fusion_and_const_prop"] = True
 80 |         logging.debug(f"\nExported program: {exported_programs}")
 81 | 
 82 |         # If just one exported program, the method name in the .pte for it should be "forward".
 83 |         if len(exported_programs) == 1:
 84 |             exported_programs = {"forward": next(iter(exported_programs.values()))}
 85 | 
 86 |         et_prog = to_edge_transform_and_lower(
 87 |             exported_programs,
 88 |             partitioner=[XnnpackPartitioner()],
 89 |             compile_config=EdgeCompileConfig(
 90 |                 _check_ir_validity=False,
 91 |                 _skip_dim_order=True,
 92 |             ),
 93 |             constant_methods=metadata,
 94 |             transform_passes=[RemovePaddingIdxEmbeddingPass()],
 95 |         )
 96 |         et_prog = et_prog.to_executorch(
 97 |             config=ExecutorchBackendConfig(**backend_config_dict),
 98 |         )
 99 |         pte_name = "model"
100 |         for method in et_prog.methods:
101 |             logging.debug(f"---------------------- Method: {method} ----------------------")
102 |             logging.debug(f"\nExecuTorch program for {pte_name}.pte: {et_prog.exported_program(method).graph_module}")
103 |             delegation_info = get_delegation_info(et_prog.exported_program(method).graph_module)
104 |             logging.debug(f"\nDelegation info Summary for {pte_name}.pte: {delegation_info.get_summary()}")
105 |             logging.debug(
106 |                 f"\nDelegation info for {pte_name}.pte: {tabulate(delegation_info.get_operator_delegation_dataframe(), headers='keys', tablefmt='fancy_grid')}"
107 |             )
108 |         return {pte_name: et_prog}
109 | 
110 |     # TODO: remove after ExecuTorch dep on Torch >= 2.10.0.
111 |     if parse(torch_version) < parse("2.10.0.dev20251104"):
112 |         model = unwrap_tensor_subclass(model)
113 |     exported_progs = model.export()
114 | 
115 |     if (
116 |         model.config._attn_implementation == "custom_sdpa"
117 |         or model.config._attn_implementation == "custom_sdpa_ring_kv_cache"
118 |     ):
119 |         # Sanity check to make sure the exported program contains the custom sdpa operator.
120 |         if not any(
121 |             node.op == "call_function" and "custom_sdpa" in str(node.target)
122 |             for exported_program in exported_progs.values()
123 |             for node in exported_program.graph_module.graph.nodes
124 |         ):
125 |             raise ValueError("'custom_sdpa' not found in the graph.")
126 | 
127 |     return _lower_to_executorch(exported_progs, model.metadata)
128 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipes/metal.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | from typing import Dict, Union
 17 | 
 18 | from packaging.version import parse
 19 | 
 20 | from executorch import version as executorch_version
 21 | 
 22 | 
 23 | EXECUTORCH_VERSION = parse(executorch_version.__version__)
 24 | METAL_BACKEND_AVAILABLE = EXECUTORCH_VERSION >= parse("1.1.0.dev20251017")
 25 | 
 26 | if METAL_BACKEND_AVAILABLE:
 27 |     try:
 28 |         from executorch.backends.apple.metal.metal_backend import MetalBackend
 29 |         from executorch.backends.apple.metal.metal_partitioner import MetalPartitioner
 30 |     except ImportError:
 31 |         METAL_BACKEND_AVAILABLE = False
 32 | 
 33 | if METAL_BACKEND_AVAILABLE:
 34 |     from tabulate import tabulate
 35 |     from torch.export import ExportedProgram
 36 | 
 37 |     from executorch.backends.apple.metal.metal_backend import MetalBackend
 38 |     from executorch.backends.apple.metal.metal_partitioner import MetalPartitioner
 39 |     from executorch.devtools.backend_debug import get_delegation_info
 40 |     from executorch.exir import (
 41 |         EdgeCompileConfig,
 42 |         ExecutorchProgram,
 43 |         to_edge_transform_and_lower,
 44 |     )
 45 |     from optimum.executorch.passes.remove_padding_idx_embedding_pass import RemovePaddingIdxEmbeddingPass
 46 | 
 47 |     from ..integrations import (
 48 |         CausalLMExportableModule,
 49 |         MaskedLMExportableModule,
 50 |         MultiModalTextToTextExportableModule,
 51 |         Seq2SeqLMExportableModule,
 52 |     )
 53 |     from ..recipe_registry import register_recipe
 54 | 
 55 |     @register_recipe("metal")
 56 |     def export_to_executorch_with_metal(
 57 |         model: Union[
 58 |             CausalLMExportableModule,
 59 |             MaskedLMExportableModule,
 60 |             Seq2SeqLMExportableModule,
 61 |             MultiModalTextToTextExportableModule,
 62 |         ],
 63 |         **kwargs,
 64 |     ):
 65 |         """
 66 |         Export a PyTorch model to ExecuTorch w/ delegation to Metal backend.
 67 | 
 68 |         This function also write metadata required by the ExecuTorch runtime to the model.
 69 | 
 70 |         Args:
 71 |             model (Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule, MultiModalTextToTextExportableModule]):
 72 |                 The PyTorch model to be exported to ExecuTorch.
 73 |             **kwargs:
 74 |                 Additional keyword arguments for recipe-specific configurations, e.g. export using different example inputs, or different compile/bechend configs.
 75 | 
 76 |         Returns:
 77 |             Dict[str, ExecutorchProgram]:
 78 |                 A map of exported and optimized program for ExecuTorch.
 79 |                 For encoder-decoder models or multimodal models, it may generate multiple programs.
 80 |         """
 81 | 
 82 |         def _lower_to_executorch(
 83 |             exported_programs: Dict[str, ExportedProgram],
 84 |             metadata=None,
 85 |         ) -> Dict[str, ExecutorchProgram]:
 86 |             logging.debug(f"\nExported program: {exported_programs}")
 87 | 
 88 |             # If just one exported program, the method name in the .pte for it should be "forward".
 89 |             if len(exported_programs) == 1:
 90 |                 exported_programs = {"forward": next(iter(exported_programs.values()))}
 91 | 
 92 |             partitioners = {
 93 |                 key: [MetalPartitioner([MetalBackend.generate_method_name_compile_spec(key)])]
 94 |                 for key in exported_programs.keys()
 95 |             }
 96 | 
 97 |             et_prog = to_edge_transform_and_lower(
 98 |                 exported_programs,
 99 |                 partitioner=partitioners,
100 |                 compile_config=EdgeCompileConfig(
101 |                     _check_ir_validity=False,
102 |                     _skip_dim_order=True,
103 |                 ),
104 |                 constant_methods=metadata,
105 |                 transform_passes=[RemovePaddingIdxEmbeddingPass()],
106 |             )
107 |             et_prog = et_prog.to_executorch()
108 |             pte_name = "model"
109 |             for method in et_prog.methods:
110 |                 logging.debug(f"---------------------- Method: {method} ----------------------")
111 |                 logging.debug(
112 |                     f"\nExecuTorch program for {pte_name}.pte: {et_prog.exported_program(method).graph_module}"
113 |                 )
114 |                 delegation_info = get_delegation_info(et_prog.exported_program(method).graph_module)
115 |                 logging.debug(f"\nDelegation info Summary for {pte_name}.pte: {delegation_info.get_summary()}")
116 |                 logging.debug(
117 |                     f"\nDelegation info for {pte_name}.pte: {tabulate(delegation_info.get_operator_delegation_dataframe(), headers='keys', tablefmt='fancy_grid')}"
118 |                 )
119 |             return {pte_name: et_prog}
120 | 
121 |         if (
122 |             model.config._attn_implementation == "custom_sdpa"
123 |             or model.config._attn_implementation == "custom_sdpa_ring_kv_cache"
124 |         ):
125 |             raise NotImplementedError("Custom SDPA implementation is not supported for Metal.")
126 | 
127 |         exported_progs = model.export()
128 | 
129 |         return _lower_to_executorch(exported_progs, model.metadata)
130 | 


--------------------------------------------------------------------------------
/docs/source/guides/export.mdx:
--------------------------------------------------------------------------------
  1 | <!--Copyright 2025 The HuggingFace Team. All rights reserved.
  2 | 
  3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
  4 | the License. You may obtain a copy of the License at
  5 | 
  6 | http://www.apache.org/licenses/LICENSE-2.0
  7 | 
  8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
  9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 10 | specific language governing permissions and limitations under the License.
 11 | -->
 12 | 
 13 | # Export a model to ExecuTorch with optimum.exporters.executorch
 14 | 
 15 | If you need to deploy 🤗 Transformers models for on-device use cases, we recommend
 16 | exporting them to a serialized format that can be distributed and executed on specialized
 17 | runtimes and hardware. In this guide, we'll show you how to export these
 18 | models to [ExecuTorch](https://pytorch.org/executorch/main/intro-overview.html).
 19 | 
 20 | 
 21 | ## Why ExecuTorch?
 22 | 
 23 | ExecuTorch is the ideal solution for deploying PyTorch models on edge devices, offering a streamlined process from
 24 | export to deployment without leaving PyTorch ecosystem.
 25 | 
 26 | Supporting on-device AI presents unique challenges with diverse hardware, critical power requirements, low/no internet
 27 | connectivity, and realtime processing needs. These constraints have historically prevented or slowed down the creation
 28 | of scalable and performant on-device AI solutions. We designed ExecuTorch, backed by our industry partners like Meta,
 29 | Arm, Apple, Qualcomm, MediaTek, etc. to be highly portable and provide superior developer productivity without losing on
 30 | performance.
 31 | 
 32 | 
 33 | ## Summary
 34 | 
 35 | Exporting a PyTorch model to ExecuTorch is as simple as
 36 | 
 37 | ```bash
 38 | optimum-cli export executorch \
 39 |   --model HuggingFaceTB/SmolLM2-135M \
 40 |   --task text-generation \
 41 |   --recipe xnnpack \
 42 |   --output_dir hf_smollm2 \
 43 |   --use_custom_sdpa
 44 | ```
 45 | 
 46 | Check out the help for more options:
 47 | 
 48 | ```bash
 49 | optimum-cli export executorch --help
 50 | ```
 51 | 
 52 | 
 53 | ## Exporting a model to ExecuTorch using the CLI
 54 | 
 55 | The Optimum ExecuTorch export can be used through Optimum command-line:
 56 | 
 57 | ```bash
 58 | optimum-cli export executorch --help
 59 | 
 60 | usage: optimum-cli export executorch [-h] -m MODEL [-o OUTPUT_DIR] [--task TASK] [--recipe RECIPE]
 61 | 
 62 | options:
 63 |   -h, --help            show this help message and exit
 64 | 
 65 | Required arguments:
 66 |   -m MODEL, --model MODEL
 67 |                         Model ID on huggingface.co or path on disk to load model from.
 68 |   -o OUTPUT_DIR, --output_dir OUTPUT_DIR
 69 |                         Path indicating the directory where to store the generated ExecuTorch model.
 70 |   --task TASK           The task to export the model for. Available tasks depend on the model, but are among: ['audio-classification', 'feature-extraction', 'image-to-text',
 71 |                         'sentence-similarity', 'depth-estimation', 'image-segmentation', 'audio-frame-classification', 'masked-im', 'semantic-segmentation', 'text-classification',
 72 |                         'audio-xvector', 'mask-generation', 'question-answering', 'text-to-audio', 'automatic-speech-recognition', 'image-to-image', 'multiple-choice', 'image-
 73 |                         classification', 'text2text-generation', 'token-classification', 'object-detection', 'zero-shot-object-detection', 'zero-shot-image-classification', 'text-
 74 |                         generation', 'fill-mask'].
 75 |   --recipe RECIPE       Pre-defined recipes for export to ExecuTorch. Defaults to "xnnpack".
 76 |   --use_custom_sdpa     For decoder-only models to use custom sdpa with static kv cache to boost performance. Defaults to False.
 77 | 
 78 | ```
 79 | 
 80 | You should see a `model.pte` file is stored under "./hf_smollm2/":
 81 | 
 82 | ```bash
 83 | hf_smollm2/
 84 | └── model.pte
 85 | ```
 86 | 
 87 | This will fetch the model on the Hub and exports the PyTorch model with the specialized recipe. The resulting `model.pte` file can then be run on the [XNNPACK backend](https://pytorch.org/executorch/main/tutorial-xnnpack-delegate-lowering.html), or on many
 88 | other ExecuTorh supported backends if exports with different recipes, e.g. Apple's [Core ML](https://pytorch.org/executorch/main/build-run-coreml.html) or [MPS](https://pytorch.org/executorch/main/build-run-mps.html), [Qualcomm's SoCs](https://pytorch.org/executorch/main/build-run-qualcomm-ai-engine-direct-backend.html), [ARM's Ethos-U](https://pytorch.org/executorch/main/executorch-arm-delegate-tutorial.html), [Xtensa HiFi4 DSP](https://pytorch.org/executorch/main/build-run-xtensa.html), [Vulkan GPU](https://pytorch.org/executorch/main/build-run-vulkan.html), [MediaTek](https://pytorch.org/executorch/main/build-run-mediatek-backend.html), etc.
 89 | 
 90 | For example, we can load and run the model with [ExecuTorch Runtime](https://pytorch.org/executorch/main/runtime-overview.html) using the `optimum.executorch` package as follows:
 91 | 
 92 | ```python
 93 | from transformers import AutoTokenizer
 94 | from optimum.executorch import ExecuTorchModelForCausalLM
 95 | 
 96 | tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M")
 97 | model = ExecuTorchModelForCausalLM.from_pretrained("hf_smollm2/")
 98 | prompt = "Simply put, the theory of relativity states that"
 99 | print(f"\nGenerated texts:\n\t{model.text_generation(tokenizer=tokenizer, prompt=prompt, max_seq_len=45)}")
100 | ```
101 | 
102 | As you can see, converting a model to ExecuTorch does not mean leaving the Hugging Face ecosystem. You end up with a similar API as regular 🤗 Transformers models!
103 | 
104 | In case your model wasn't already exported to ExecuTorch, it can also be converted on-the-fly when loading your model:
105 | 
106 | ```python
107 | from optimum.executorch import ExecuTorchModelForCausalLM
108 | 
109 | model = ExecuTorchModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M", recipe="xnnpack", attn_implementation="custom_sdpa")
110 | ```
111 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_whisper.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import logging
 17 | import os
 18 | import subprocess
 19 | import tempfile
 20 | import unittest
 21 | 
 22 | import pytest
 23 | from datasets import load_dataset
 24 | from executorch import version
 25 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 26 | from packaging.version import parse
 27 | from transformers import AutoProcessor, AutoTokenizer
 28 | from transformers.testing_utils import slow
 29 | 
 30 | from optimum.executorch import ExecuTorchModelForSpeechSeq2Seq
 31 | 
 32 | 
 33 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
 34 | 
 35 | 
 36 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
 37 |     def __init__(self, *args, **kwargs):
 38 |         super().__init__(*args, **kwargs)
 39 | 
 40 |     # @slow
 41 |     # @pytest.mark.run_slow
 42 |     def test_whisper_export_to_executorch(self):
 43 |         model_id = "openai/whisper-tiny"
 44 |         task = "automatic-speech-recognition"
 45 |         recipe = "xnnpack"
 46 |         with tempfile.TemporaryDirectory() as tempdir:
 47 |             subprocess.run(
 48 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch",
 49 |                 shell=True,
 50 |                 check=True,
 51 |             )
 52 |             self.assertTrue(os.path.exists(f"{tempdir}/executorch/model.pte"))
 53 |             model = ExecuTorchModelForSpeechSeq2Seq.from_pretrained(f"{tempdir}/executorch")
 54 |             self._test_whisper_transcription(model_id, model)
 55 | 
 56 |     def _test_whisper_transcription(self, model_id: str, model: ExecuTorchModelForSpeechSeq2Seq):
 57 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
 58 |         processor = AutoProcessor.from_pretrained(model_id)
 59 | 
 60 |         self.assertIsInstance(model, ExecuTorchModelForSpeechSeq2Seq)
 61 |         self.assertTrue(hasattr(model, "model"))
 62 |         self.assertIsInstance(model.model, ExecuTorchModule)
 63 | 
 64 |         dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
 65 |         sample = dataset[0]["audio"]
 66 | 
 67 |         input_features = processor(
 68 |             sample["array"],
 69 |             return_tensors="pt",
 70 |             truncation=False,
 71 |             sampling_rate=sample["sampling_rate"],
 72 |         ).input_features
 73 |         # Current implementation of the transcibe method accepts up to 30 seconds of audio, therefore I trim the audio here.
 74 |         input_features_trimmed = input_features[:, :, :3000].contiguous()
 75 | 
 76 |         generated_transcription = model.transcribe(tokenizer, input_features_trimmed)
 77 |         expected_text = " Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel. Nor is Mr. Quilter's manner less interesting than his matter. He tells us that at this festive season of the year, with Christmas and roast beef looming before us, similarly drawn from eating and its results occur most readily to the mind. He has grave doubts whether Sir Frederick Latins work is really Greek after all, and can discover that."
 78 |         logging.info(
 79 |             f"\nExpected transcription:\n\t{expected_text}\nGenerated transcription:\n\t{generated_transcription}"
 80 |         )
 81 |         self.assertEqual(generated_transcription, expected_text)
 82 | 
 83 |     def _helper_whisper_transcription(self, recipe: str):
 84 |         model_id = "openai/whisper-tiny"
 85 |         model = ExecuTorchModelForSpeechSeq2Seq.from_pretrained(model_id, recipe=recipe)
 86 |         self._test_whisper_transcription(model_id, model)
 87 | 
 88 |     @slow
 89 |     @pytest.mark.run_slow
 90 |     def test_whisper_transcription(self):
 91 |         self._helper_whisper_transcription(recipe="xnnpack")
 92 | 
 93 |     @slow
 94 |     @pytest.mark.run_slow
 95 |     @pytest.mark.portable
 96 |     @pytest.mark.skipif(
 97 |         parse(version.__version__) < parse("0.7.0"),
 98 |         reason="Fixed on executorch >= 0.7.0",
 99 |     )
100 |     def test_whisper_transcription_portable(self):
101 |         self._helper_whisper_transcription(recipe="portable")
102 | 
103 |     @slow
104 |     @pytest.mark.run_slow
105 |     def test_whisper_large_v3_turbo_export_bfloat16(self):
106 |         """Test exporting whisper-large-v3-turbo with bfloat16 and check file size is ~1.6GB"""
107 |         model_id = "openai/whisper-large-v3-turbo"
108 |         task = "automatic-speech-recognition"
109 |         recipe = "xnnpack"
110 |         dtype = "bfloat16"
111 |         with tempfile.TemporaryDirectory() as tempdir:
112 |             subprocess.run(
113 |                 f"optimum-cli export executorch --model {model_id} --task {task} --recipe {recipe} --output_dir {tempdir}/executorch --dtype {dtype}",
114 |                 shell=True,
115 |                 check=True,
116 |             )
117 | 
118 |             # Check that model.pte exists
119 |             model_path = os.path.join(tempdir, "executorch", "model.pte")
120 |             self.assertTrue(os.path.exists(model_path), f"model.pte not found at {model_path}")
121 | 
122 |             # Check file size is approximately 1.6GB (allow 10% tolerance)
123 |             file_size_bytes = os.path.getsize(model_path)
124 |             file_size_gb = file_size_bytes / (1024**3)
125 |             expected_size_gb = 1.6
126 |             tolerance = 0.1  # 10% tolerance
127 | 
128 |             logging.info(f"model.pte size: {file_size_gb:.2f} GB")
129 |             self.assertAlmostEqual(
130 |                 file_size_gb,
131 |                 expected_size_gb,
132 |                 delta=expected_size_gb * tolerance,
133 |                 msg=f"Expected file size ~{expected_size_gb}GB, but got {file_size_gb:.2f}GB",
134 |             )
135 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipes/cuda.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | from typing import Dict, Union
 17 | 
 18 | import torch
 19 | from tabulate import tabulate
 20 | from torch.export import ExportedProgram
 21 | 
 22 | from executorch.devtools.backend_debug import get_delegation_info
 23 | from executorch.exir import (
 24 |     EdgeCompileConfig,
 25 |     ExecutorchBackendConfig,
 26 |     ExecutorchProgram,
 27 |     to_edge_transform_and_lower,
 28 | )
 29 | from executorch.exir.backend.compile_spec_schema import CompileSpec
 30 | from executorch.exir.passes import MemoryPlanningPass
 31 | from optimum.executorch.passes.remove_padding_idx_embedding_pass import (
 32 |     RemovePaddingIdxEmbeddingPass,
 33 | )
 34 | 
 35 | from ..integrations import (
 36 |     CausalLMExportableModule,
 37 |     MaskedLMExportableModule,
 38 |     MultiModalTextToTextExportableModule,
 39 |     Seq2SeqLMExportableModule,
 40 | )
 41 | from ..recipe_registry import register_recipe
 42 | 
 43 | 
 44 | aten = torch.ops.aten
 45 | 
 46 | 
 47 | def lower_to_executorch(
 48 |     exported_programs: Dict[str, ExportedProgram],
 49 |     metadata=None,
 50 |     is_windows: bool = False,
 51 |     model_config=None,
 52 | ) -> Dict[str, ExecutorchProgram]:
 53 |     # Import here to avoid version conflicts.
 54 |     from torch._inductor.decomposition import conv1d_to_conv2d
 55 | 
 56 |     from executorch.backends.cuda.cuda_backend import CudaBackend
 57 |     from executorch.backends.cuda.cuda_partitioner import CudaPartitioner
 58 | 
 59 |     logging.debug(f"\nExported program: {exported_programs}")
 60 | 
 61 |     # If just one exported program, the method name in the .pte for it should be "forward".
 62 |     if len(exported_programs) == 1:
 63 |         exported_programs = {"forward": next(iter(exported_programs.values()))}
 64 | 
 65 |     # Check if this is a Gemma3 model
 66 |     model_type = getattr(model_config, "model_type", None) if model_config else None
 67 | 
 68 |     # CUDA backend compile spec with method name.
 69 |     partitioners = {}
 70 |     for key in exported_programs.keys():
 71 |         compile_specs = [CudaBackend.generate_method_name_compile_spec(key)]
 72 |         if is_windows:
 73 |             compile_specs.append(CompileSpec("platform", "windows".encode("utf-8")))
 74 | 
 75 |         # Add Gemma3-specific compile spec if needed
 76 |         if model_type == "gemma3":
 77 |             compile_specs.append(CompileSpec(key="triton_kernel_mode", value=b"OFF"))
 78 | 
 79 |         partitioners[key] = [CudaPartitioner(compile_specs)]
 80 | 
 81 |     # Add decompositions for triton to generate kernels.
 82 |     for key, ep in exported_programs.items():
 83 |         exported_programs[key] = ep.run_decompositions(
 84 |             {
 85 |                 aten.conv1d.default: conv1d_to_conv2d,
 86 |             }
 87 |         )
 88 |     et_prog = to_edge_transform_and_lower(
 89 |         exported_programs,
 90 |         partitioner=partitioners,
 91 |         compile_config=EdgeCompileConfig(
 92 |             _check_ir_validity=False,
 93 |             _skip_dim_order=True,
 94 |         ),
 95 |         constant_methods=metadata,
 96 |         transform_passes=[RemovePaddingIdxEmbeddingPass()],
 97 |     )
 98 |     et_prog = et_prog.to_executorch(
 99 |         ExecutorchBackendConfig(
100 |             memory_planning_pass=MemoryPlanningPass(
101 |                 alloc_graph_input=False,
102 |             )
103 |         ),
104 |     )
105 |     pte_name = "model"
106 |     for method in et_prog.methods:
107 |         logging.debug(f"---------------------- Method: {method} ----------------------")
108 |         logging.debug(f"\nExecuTorch program for {pte_name}.pte: {et_prog.exported_program(method).graph_module}")
109 |         delegation_info = get_delegation_info(et_prog.exported_program(method).graph_module)
110 |         logging.debug(f"\nDelegation info Summary for {pte_name}.pte: {delegation_info.get_summary()}")
111 |         logging.debug(
112 |             f"\nDelegation info for {pte_name}.pte: {tabulate(delegation_info.get_operator_delegation_dataframe(), headers='keys', tablefmt='fancy_grid')}"
113 |         )
114 |     return {pte_name: et_prog}
115 | 
116 | 
117 | @register_recipe("cuda")
118 | def export_to_executorch_with_cuda(
119 |     model: Union[
120 |         CausalLMExportableModule,
121 |         MaskedLMExportableModule,
122 |         Seq2SeqLMExportableModule,
123 |         MultiModalTextToTextExportableModule,
124 |     ],
125 |     **kwargs,
126 | ):
127 |     """
128 |     Export a PyTorch model to ExecuTorch w/ delegation to CUDA backend.
129 |     This function also write metadata required by the ExecuTorch runtime to the .pte file.
130 |     Args:
131 |         model (Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule, MultiModalTextToTextExportableModule]):
132 |             The PyTorch model to be exported to ExecuTorch.
133 |         **kwargs:
134 |             Additional keyword arguments for recipe-specific configurations, e.g. export using different example inputs, or different compile/bechend configs.
135 |     Returns:
136 |         Dict[str, ExecutorchProgram]:
137 |             A map of exported and optimized program for ExecuTorch.
138 |             For encoder-decoder models or multimodal models, it may generate multiple programs.
139 |     """
140 |     if (
141 |         model.config._attn_implementation == "custom_sdpa"
142 |         or model.config._attn_implementation == "custom_sdpa_ring_kv_cache"
143 |     ):
144 |         raise NotImplementedError(
145 |             "Custom SDPA implementation is not supported for CUDA yet. Please use 'flash_attention' instead."
146 |         )
147 | 
148 |     exported_progs = model.export()
149 | 
150 |     return lower_to_executorch(exported_progs, model.metadata, model_config=getattr(model, "config", None))
151 | 


--------------------------------------------------------------------------------
/optimum/exporters/executorch/recipes/coreml.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | from itertools import product
 17 | from typing import Any, Dict, Union
 18 | 
 19 | from tabulate import tabulate
 20 | from torch.export import ExportedProgram
 21 | 
 22 | from executorch.devtools.backend_debug import get_delegation_info
 23 | from executorch.exir import (
 24 |     EdgeCompileConfig,
 25 |     ExecutorchBackendConfig,
 26 |     ExecutorchProgram,
 27 |     to_edge_transform_and_lower,
 28 | )
 29 | 
 30 | from ..integrations import (
 31 |     CausalLMExportableModule,
 32 |     MaskedLMExportableModule,
 33 |     Seq2SeqLMExportableModule,
 34 | )
 35 | from ..recipe_registry import register_recipe
 36 | 
 37 | 
 38 | def _export_to_executorch(
 39 |     model: Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule],
 40 |     **kwargs,
 41 | ):
 42 |     """
 43 |     Export a PyTorch model to ExecuTorch w/ delegation to CoreML backend.
 44 | 
 45 |     This function also write metadata required by the ExecuTorch runtime to the model.
 46 | 
 47 |     Args:
 48 |         model (Union[CausalLMExportableModule, MaskedLMExportableModule, Seq2SeqLMExportableModule]):
 49 |             The PyTorch model to be exported to ExecuTorch.
 50 |         **kwargs:
 51 |             Additional keyword arguments for recipe-specific configurations, e.g. export using different example inputs, or different compile/bechend configs.
 52 | 
 53 |     Returns:
 54 |         Dict[str, ExecutorchProgram]:
 55 |             A map of exported and optimized program for ExecuTorch.
 56 |             For encoder-decoder models or multimodal models, it may generate multiple programs.
 57 |     """
 58 |     # Import here because coremltools might not be available in all environments
 59 |     import coremltools as ct
 60 | 
 61 |     from executorch.backends.apple.coreml.compiler import CoreMLBackend
 62 |     from executorch.backends.apple.coreml.partition import CoreMLPartitioner
 63 | 
 64 |     def _lower_to_executorch(
 65 |         exported_programs: Dict[str, ExportedProgram],
 66 |         metadata,
 67 |         compute_unit,
 68 |         minimum_deployment_target,
 69 |         compute_precision,
 70 |     ) -> Dict[str, ExecutorchProgram]:
 71 |         et_progs = {}
 72 |         backend_config_dict = {}
 73 |         for pte_name, exported_program in exported_programs.items():
 74 |             logging.debug(f"\nExported program for {pte_name}.pte: {exported_program}")
 75 |             et_progs[pte_name] = to_edge_transform_and_lower(
 76 |                 exported_program,
 77 |                 partitioner=[
 78 |                     CoreMLPartitioner(
 79 |                         compile_specs=CoreMLBackend.generate_compile_specs(
 80 |                             compute_unit=compute_unit,
 81 |                             minimum_deployment_target=minimum_deployment_target,
 82 |                             compute_precision=compute_precision,
 83 |                             model_type=CoreMLBackend.MODEL_TYPE.MODEL,
 84 |                         ),
 85 |                         take_over_mutable_buffer=(minimum_deployment_target >= ct.target.iOS18),
 86 |                     )
 87 |                 ],
 88 |                 compile_config=EdgeCompileConfig(
 89 |                     _check_ir_validity=False,
 90 |                     # In ET 0.7, we can set _skip_dim_order=False
 91 |                     _skip_dim_order=True,
 92 |                 ),
 93 |                 constant_methods=metadata,
 94 |             ).to_executorch(
 95 |                 config=ExecutorchBackendConfig(**backend_config_dict),
 96 |             )
 97 |             logging.debug(
 98 |                 f"\nExecuTorch program for {pte_name}.pte: {et_progs[pte_name].exported_program().graph_module}"
 99 |             )
100 |             delegation_info = get_delegation_info(et_progs[pte_name].exported_program().graph_module)
101 |             logging.debug(f"\nDelegation info Summary for {pte_name}.pte: {delegation_info.get_summary()}")
102 |             logging.debug(
103 |                 f"\nDelegation info for {pte_name}.pte: {tabulate(delegation_info.get_operator_delegation_dataframe(), headers='keys', tablefmt='fancy_grid')}"
104 |             )
105 |         return et_progs
106 | 
107 |     exported_progs = model.export()
108 |     return _lower_to_executorch(exported_progs, model.metadata, **kwargs)
109 | 
110 | 
111 | def _get_recipe_kwargs(dtype: str, compute_unit: str) -> Dict[str, Any]:
112 |     import coremltools as ct
113 | 
114 |     compute_precision = {
115 |         "fp16": ct.precision.FLOAT16,
116 |         "fp32": ct.precision.FLOAT32,
117 |     }[dtype]
118 | 
119 |     compute_unit = {
120 |         "cpu": ct.ComputeUnit.CPU_ONLY,
121 |         "gpu": ct.ComputeUnit.CPU_AND_GPU,
122 |         "ne": ct.ComputeUnit.CPU_AND_NE,
123 |         "all": ct.ComputeUnit.ALL,
124 |     }[compute_unit]
125 | 
126 |     recipe_kwargs = {
127 |         "compute_precision": compute_precision,
128 |         "compute_unit": compute_unit,
129 |         "minimum_deployment_target": ct.target.iOS18,
130 |     }
131 |     return recipe_kwargs
132 | 
133 | 
134 | def _make_recipe(recipe_name, recipe_kwargs):
135 |     @register_recipe(recipe_name)
136 |     def recipe_fn(exported_programs: Dict[str, ExportedProgram], **kwargs):
137 |         return _export_to_executorch(
138 |             exported_programs,
139 |             **recipe_kwargs,
140 |         )
141 | 
142 |     return recipe_fn
143 | 
144 | 
145 | # Register recipes for CoreML backend
146 | for dtype, compute_unit in product(["fp32", "fp16"], ["cpu", "gpu", "ne", "all"]):
147 |     recipe_name = f"coreml_{dtype}"
148 |     if compute_unit != "all":
149 |         recipe_name += f"_{compute_unit}"
150 |     recipe_kwargs = _get_recipe_kwargs(dtype=dtype, compute_unit=compute_unit)
151 |     _make_recipe(recipe_name, recipe_kwargs)
152 | 


--------------------------------------------------------------------------------
/tests/models/test_modeling_llama.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2024 The HuggingFace Team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | 
 16 | import gc
 17 | import logging
 18 | import os
 19 | import subprocess
 20 | import tempfile
 21 | import unittest
 22 | 
 23 | import pytest
 24 | import torchao
 25 | from executorch.extension.pybindings.portable_lib import ExecuTorchModule
 26 | from packaging.version import parse
 27 | from transformers import AutoTokenizer
 28 | from transformers.testing_utils import slow
 29 | 
 30 | from optimum.executorch import ExecuTorchModelForCausalLM
 31 | 
 32 | from ..utils import check_causal_lm_output_quality
 33 | 
 34 | 
 35 | @pytest.mark.skipif(
 36 |     parse(torchao.__version__) < parse("0.11.0.dev0"),
 37 |     reason="Only available on torchao >= 0.11.0.dev0",
 38 | )
 39 | class ExecuTorchModelIntegrationTest(unittest.TestCase):
 40 |     def __init__(self, *args, **kwargs):
 41 |         super().__init__(*args, **kwargs)
 42 | 
 43 |     @slow
 44 |     @pytest.mark.run_slow
 45 |     def test_llama3_2_1b_export_to_executorch(self):
 46 |         model_id = "NousResearch/Llama-3.2-1B"
 47 |         task = "text-generation"
 48 |         recipe = "xnnpack"
 49 |         with tempfile.TemporaryDirectory() as tempdir:
 50 |             out_dir = f"{tempdir}/executorch"
 51 |             subprocess.run(
 52 |                 f"optimum-cli export executorch \
 53 |                     --model {model_id} \
 54 |                     --task {task} \
 55 |                     --recipe {recipe} \
 56 |                     --use_custom_sdpa \
 57 |                     --use_custom_kv_cache \
 58 |                     --qlinear 8da4w \
 59 |                     --qembedding 8w \
 60 |                     --output_dir {tempdir}/executorch",
 61 |                 shell=True,
 62 |                 check=True,
 63 |             )
 64 |             pte_full_path = f"{out_dir}/model.pte"
 65 |             self.assertTrue(os.path.exists(pte_full_path))
 66 | 
 67 |             # Explicitly delete the PTE file to free up disk space
 68 |             if os.path.exists(pte_full_path):
 69 |                 os.remove(pte_full_path)
 70 |             gc.collect()
 71 | 
 72 |     @slow
 73 |     @pytest.mark.run_slow
 74 |     def test_llama_text_generation_with_custom_sdpa_8da4w_8we(self):
 75 |         # ExecuTorch model + custom sdpa + 8da4w linear quantization + int8 embedding quantization
 76 |         model_id = "NousResearch/Llama-3.2-1B"
 77 |         kwargs = {"qlinear": "8da4w", "qembedding": "8w"}
 78 |         model = ExecuTorchModelForCausalLM.from_pretrained(
 79 |             model_id,
 80 |             recipe="xnnpack",
 81 |             attn_implementation="custom_sdpa",
 82 |             **kwargs,
 83 |         )
 84 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
 85 |         self.assertIsInstance(model.model, ExecuTorchModule)
 86 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
 87 |         generated_text = model.text_generation(
 88 |             tokenizer=tokenizer,
 89 |             prompt="Simply put, the theory of relativity states that",
 90 |             max_seq_len=32,
 91 |         )
 92 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
 93 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
 94 | 
 95 |         # Free memory before loading eager for quality check
 96 |         del model
 97 |         del tokenizer
 98 |         gc.collect()
 99 | 
100 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
101 | 
102 |     @slow
103 |     @pytest.mark.run_slow
104 |     def test_llama_text_generation_with_custom_sdpa_and_kv_cache_8da4w_8we(self):
105 |         model_id = "NousResearch/Llama-3.2-1B"
106 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
107 |         model = ExecuTorchModelForCausalLM.from_pretrained(
108 |             model_id,
109 |             recipe="xnnpack",
110 |             attn_implementation="custom_sdpa",
111 |             use_custom_kv_cache=True,
112 |             **{"qlinear": "8da4w", "qembedding": "8w"},
113 |         )
114 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
115 |         self.assertIsInstance(model.model, ExecuTorchModule)
116 |         generated_text = model.text_generation(
117 |             tokenizer=tokenizer,
118 |             prompt="Simply put, the theory of relativity states that",
119 |             max_seq_len=32,
120 |         )
121 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
122 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
123 | 
124 |         # Free memory before loading eager for quality check
125 |         del model
126 |         del tokenizer
127 |         gc.collect()
128 | 
129 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
130 | 
131 |     @slow
132 |     @pytest.mark.run_slow
133 |     @pytest.mark.portable
134 |     def test_llama_text_generation_portable(self):
135 |         model_id = "NousResearch/Llama-3.2-1B"
136 |         model = ExecuTorchModelForCausalLM.from_pretrained(model_id, recipe="portable")
137 |         self.assertIsInstance(model, ExecuTorchModelForCausalLM)
138 |         self.assertIsInstance(model.model, ExecuTorchModule)
139 |         tokenizer = AutoTokenizer.from_pretrained(model_id)
140 |         generated_text = model.text_generation(
141 |             tokenizer=tokenizer,
142 |             prompt="Simply put, the theory of relativity states that",
143 |             max_seq_len=32,
144 |         )
145 |         logging.info(f"\nGenerated text:\n\t{generated_text}")
146 |         generated_tokens = tokenizer(generated_text, return_tensors="pt").input_ids
147 | 
148 |         # Free memory before loading eager for quality check
149 |         del model
150 |         del tokenizer
151 |         gc.collect()
152 | 
153 |         self.assertTrue(check_causal_lm_output_quality(model_id, generated_tokens))
154 | 


--------------------------------------------------------------------------------