├── .github └── workflows │ ├── delete_doc_comment_trigger.yml │ ├── upload_pr_documentation.yml │ ├── delete_doc_comment.yml │ ├── security.yml │ ├── test.yml │ ├── check_code_quality.yml │ └── build_pr_documentation.yml ├── docs ├── README.md └── source │ ├── _toctree.yml │ ├── package_reference │ ├── quantization.mdx │ ├── modeling.mdx │ └── configuration.mdx │ ├── usage_guides │ ├── overview.mdx │ ├── models.mdx │ └── quantization.mdx │ ├── installation.md │ └── index.md ├── optimum └── furiosa │ ├── version.py │ ├── quantization_base.py │ ├── __init__.py │ ├── utils.py │ ├── modeling.py │ ├── configuration.py │ ├── quantization.py │ └── modeling_base.py ├── notebooks └── quantization │ └── image-classification │ └── __init__.py ├── setup.cfg ├── pyproject.toml ├── examples └── quantization │ └── image-classification │ ├── README.md │ └── run_image_classification.py ├── Makefile ├── README.md ├── .gitignore ├── setup.py ├── tests ├── test_quantization.py └── test_modeling.py └── LICENSE /.github/workflows/delete_doc_comment_trigger.yml: -------------------------------------------------------------------------------- 1 | name: Delete doc comment trigger 2 | 3 | on: 4 | pull_request: 5 | types: [ closed ] 6 | 7 | 8 | jobs: 9 | delete: 10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main 11 | with: 12 | pr_number: ${{ github.event.number }} 13 | -------------------------------------------------------------------------------- /.github/workflows/upload_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Upload PR Documentation 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Build PR Documentation"] 6 | types: 7 | - completed 8 | 9 | jobs: 10 | build: 11 | uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main 12 | with: 13 | package_name: optimum-furiosa 14 | secrets: 15 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }} 16 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} 17 | -------------------------------------------------------------------------------- /.github/workflows/delete_doc_comment.yml: -------------------------------------------------------------------------------- 1 | name: Delete PR documentation 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Delete doc comment trigger"] 6 | types: 7 | - completed 8 | paths: 9 | - "optimum/**.py" 10 | - "docs/**" 11 | - ".github/workflows/build_pr_documentation.yml" 12 | - ".github/workflows/delete_doc_comment.yml" 13 | 14 | 15 | jobs: 16 | delete: 17 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main 18 | secrets: 19 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }} 20 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Optimum Furiosa documentation 2 | 3 | 1. Setup 4 | ```bash 5 | pip install hf-doc-builder==0.4.0 watchdog --upgrade 6 | ``` 7 | 8 | 2. Local Development 9 | ```bash 10 | doc-builder preview optimum.furiosa docs/source/ 11 | ``` 12 | 3. Build Docs 13 | ```bash 14 | doc-builder build optimum.furiosa docs/source/ --build_dir build/ 15 | ``` 16 | 17 | ## Add assets/Images 18 | 19 | Adding images/assets is only possible through `https://` links meaning you need to use `https://raw.githubusercontent.com/huggingface/optimum-furiosa/main/docs/assets/` prefix. 20 | -------------------------------------------------------------------------------- /docs/source/_toctree.yml: -------------------------------------------------------------------------------- 1 | - sections: 2 | - local: index 3 | title: 🤗 Optimum Furiosa 4 | - local: installation 5 | title: Installation 6 | - sections: 7 | - local: usage_guides/overview 8 | title: Overview 9 | - local: usage_guides/models 10 | title: Modeling 11 | - local: usage_guides/quantization 12 | title: Quantization 13 | title: How-To Guides 14 | - sections: 15 | - local: package_reference/modeling 16 | title: Models 17 | - local: package_reference/configuration 18 | title: Configuration 19 | - local: package_reference/quantization 20 | title: Quantization 21 | title: Reference 22 | title: Optimum Furiosa 23 | isExpanded: false -------------------------------------------------------------------------------- /optimum/furiosa/version.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __version__ = "0.1.0.dev0" 16 | -------------------------------------------------------------------------------- /docs/source/package_reference/quantization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Quantization 14 | 15 | ## FuriosaAIQuantizer 16 | 17 | [[autodoc]] FuriosaAIQuantizer -------------------------------------------------------------------------------- /notebooks/quantization/image-classification/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | # Copyright 2023 The HuggingFace Team. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /docs/source/usage_guides/overview.mdx: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Overview 18 | 19 | Welcome to the 🤗 Optimum Furiosa how-to guides! 20 | These guides tackle more advanced topics and will show you how to easily get the best from NPUs: 21 | - [Accelerating inference](./models) 22 | - [Quantization](./quantization) 23 | -------------------------------------------------------------------------------- /.github/workflows/security.yml: -------------------------------------------------------------------------------- 1 | name: Security Checks 2 | 3 | on: 4 | push: 5 | 6 | permissions: 7 | contents: read 8 | 9 | jobs: 10 | secrets: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - shell: bash 14 | run: | 15 | if [ "${{ github.event_name }}" == "push" ]; then 16 | echo "depth=$(($(jq length <<< '${{ toJson(github.event.commits) }}') + 2))" >> $GITHUB_ENV 17 | echo "branch=${{ github.ref_name }}" >> $GITHUB_ENV 18 | fi 19 | if [ "${{ github.event_name }}" == "pull_request" ]; then 20 | echo "depth=$((${{ github.event.pull_request.commits }}+2))" >> $GITHUB_ENV 21 | echo "branch=${{ github.event.pull_request.head.ref }}" >> $GITHUB_ENV 22 | fi 23 | - name: Checkout code 24 | uses: actions/checkout@v4 25 | with: 26 | ref: ${{env.branch}} 27 | fetch-depth: ${{env.depth}} 28 | - name: Scan for secrets 29 | uses: trufflesecurity/trufflehog@main 30 | -------------------------------------------------------------------------------- /docs/source/installation.md: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Installation 14 | 15 | To install 🤗 Optimum Furiosa, you first need to install Furiosa SDK drivers by following the official [installation guide](https://furiosa-ai.github.io/docs/latest/en/software/installation.html). Then, 🤗 Optimum Furiosa can be installed using `pip` as follows: 16 | 17 | ```bash 18 | python -m pip install git+https://github.com/huggingface/optimum-furiosa.git 19 | ``` 20 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | jobs: 12 | build: 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | python-version: [3.8] 17 | os: [self-hosted] 18 | runs-on: ${{ matrix.os }} 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Setup Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v2 23 | with: 24 | python-version: ${{ matrix.python-version }} 25 | - name: Create and start a virtual environment 26 | run: | 27 | python -m venv venv 28 | source venv/bin/activate 29 | - name: Install dependencies 30 | run: | 31 | source venv/bin/activate 32 | python -m pip install --upgrade pip 33 | pip install .[testing] 34 | - name: Test with Pytest 35 | run: | 36 | source venv/bin/activate 37 | pytest -s tests/ 38 | - name: Cleanup 39 | run: | 40 | rm -rf venv -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | default_section = FIRSTPARTY 3 | ensure_newline_before_comments = True 4 | force_grid_wrap = 0 5 | include_trailing_comma = True 6 | known_first_party = transformers 7 | known_third_party = 8 | absl 9 | conllu 10 | datasets 11 | elasticsearch 12 | fairseq 13 | faiss-cpu 14 | fastprogress 15 | fire 16 | fugashi 17 | git 18 | h5py 19 | matplotlib 20 | nltk 21 | numpy 22 | packaging 23 | pandas 24 | PIL 25 | psutil 26 | pytest 27 | pytorch_lightning 28 | rouge_score 29 | sacrebleu 30 | seqeval 31 | sklearn 32 | streamlit 33 | tensorboardX 34 | tensorflow 35 | tensorflow_datasets 36 | timeout_decorator 37 | torch 38 | torchaudio 39 | torchtext 40 | torchvision 41 | torch_xla 42 | tqdm 43 | 44 | line_length = 119 45 | lines_after_imports = 2 46 | multi_line_output = 3 47 | use_parentheses = True 48 | 49 | [flake8] 50 | ignore = E203, E501, E741, W503, W605 51 | max-line-length = 119 52 | 53 | [tool:pytest] 54 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS -------------------------------------------------------------------------------- /docs/source/package_reference/modeling.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Models 14 | 15 | ## Generic model classes 16 | 17 | The following Furiosa classes are available for instantiating a base model class without a specific head. 18 | 19 | ### FuriosaAIModel 20 | 21 | [[autodoc]] FuriosaAIModel 22 | 23 | ## Computer vision 24 | 25 | The following classes are available for the following computer vision tasks. 26 | 27 | ### FuriosaAIModelForImageClassification 28 | 29 | [[autodoc]] FuriosaAIModelForImageClassification 30 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | [tool.black] 16 | line-length = 119 17 | target-version = ['py37'] 18 | 19 | [tool.ruff] 20 | # Never enforce `E501` (line length violations). 21 | ignore = ["C901", "E501", "E741", "W605"] 22 | select = ["C", "E", "F", "I", "W"] 23 | line-length = 119 24 | 25 | # Ignore import violations in all `__init__.py` files. 26 | [tool.ruff.per-file-ignores] 27 | "__init__.py" = ["E402", "F401", "F403", "F811"] 28 | 29 | [tool.ruff.isort] 30 | lines-after-imports = 2 31 | known-first-party = ["optimum"] -------------------------------------------------------------------------------- /.github/workflows/check_code_quality.yml: -------------------------------------------------------------------------------- 1 | name: Check code quality 2 | 3 | on: 4 | push: 5 | branches: [ main ] 6 | paths: 7 | - "optimum/**.py" 8 | - "tests/**.py" 9 | - "examples/**.py" 10 | 11 | pull_request: 12 | branches: [ main ] 13 | paths: 14 | - "optimum/**.py" 15 | - "tests/**.py" 16 | - "examples/**.py" 17 | 18 | jobs: 19 | build: 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | python-version: ['3.8'] 24 | os: [ubuntu-22.04] 25 | 26 | runs-on: ${{ matrix.os }} 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Setup Python ${{ matrix.python-version }} 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | - name: Create and start a virtual environment 34 | run: | 35 | python -m venv venv 36 | source venv/bin/activate 37 | - name: Install dependencies 38 | run: | 39 | source venv/bin/activate 40 | pip install --upgrade pip 41 | pip install black ruff 42 | - name: Check style with black 43 | run: | 44 | source venv/bin/activate 45 | black --check . 46 | - name: Check style with ruff 47 | run: | 48 | source venv/bin/activate 49 | ruff . -------------------------------------------------------------------------------- /docs/source/package_reference/configuration.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Configuration 14 | 15 | The configuration classes are the way to specify how a task should be done. Here is how a quantization can be configered: 16 | 17 | 1. Quantization: Performed by the [`~furiosa.FuriosaQuantizer`], quantization can be set using a [`~furiosa.configuration.QuantizationConfig`]. A calibration step is required in some cases (post training static quantization), which can be specified using a [`~furiosa.configuration.CalibrationConfig`]. 18 | 19 | ## QuantizationConfig 20 | 21 | [[autodoc]] configuration.QuantizationConfig 22 | 23 | ## CalibrationConfig 24 | 25 | [[autodoc]] configuration.CalibrationConfig 26 | 27 | ## FuriosaConfig 28 | 29 | [[autodoc]] configuration.FuriosaAIConfig -------------------------------------------------------------------------------- /examples/quantization/image-classification/README.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # Image classification 18 | 19 | The script [`run_image_classification.py`](https://github.com/huggingface/optimum-furiosa/blob/main/examples/quantization/image_classification/run_image_classification.py) allows us to apply different quantization using [FuriosaAI SDK](https://furiosa-ai.github.io/docs/latest/en/software/quantization.html) for image classification tasks. 20 | 21 | The following example applies quantization on a Resnet model fine-tuned on the beans classification dataset. 22 | 23 | ```bash 24 | python run_image_classification.py \ 25 | --model_name_or_path eugenecamus/resnet-50-base-beans-demo \ 26 | --dataset_name beans \ 27 | --do_eval \ 28 | --output_dir /tmp/image_classification_resnet_beans 29 | ``` 30 | -------------------------------------------------------------------------------- /optimum/furiosa/quantization_base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | from abc import ABC, abstractmethod 17 | from pathlib import Path 18 | from typing import Optional, Union 19 | 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class OptimumQuantizer(ABC): 25 | @classmethod 26 | def from_pretrained( 27 | cls, 28 | model_or_path: Union[str, Path], 29 | file_name: Optional[str] = None, 30 | ): 31 | """Overwrite this method in subclass to define how to load your model from pretrained""" 32 | raise NotImplementedError( 33 | "Overwrite this method in subclass to define how to load your model from pretrained for quantization" 34 | ) 35 | 36 | @abstractmethod 37 | def quantize(self, save_dir: Union[str, Path], file_prefix: Optional[str] = None, **kwargs): 38 | """Overwrite this method in subclass to define how to quantize your model for quantization""" 39 | raise NotImplementedError( 40 | "Overwrite this method in subclass to define how to quantize your model for quantization" 41 | ) 42 | -------------------------------------------------------------------------------- /optimum/furiosa/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import TYPE_CHECKING 16 | 17 | from transformers.utils import OptionalDependencyNotAvailable, _LazyModule 18 | 19 | from .utils import FURIOSA_ENF_FILE_NAME 20 | 21 | 22 | _import_structure = { 23 | "configuration": [ 24 | "CalibrationConfig", 25 | "AutoCalibrationConfig", 26 | "QuantizationMode", 27 | "FuriosaAIConfig", 28 | "QuantizationConfig", 29 | ], 30 | "modeling": [ 31 | "FuriosaAIModel", 32 | "FuriosaAIModelForImageClassification", 33 | ], 34 | "quantization": ["FuriosaAIQuantizer"], 35 | "utils": [ 36 | "export_model_to_onnx", 37 | ], 38 | "version": ["__version__"], 39 | } 40 | 41 | # Direct imports for type-checking 42 | if TYPE_CHECKING: 43 | from .configuration import FuriosaAIConfig, QuantizationConfig 44 | from .modeling import ( 45 | FuriosaAIModelForImageClassification, 46 | ) 47 | from .quantization import FuriosaAIQuantizer 48 | from .utils import export_model_to_onnx 49 | from .version import __version__ 50 | else: 51 | import sys 52 | 53 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__) 54 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | SHELL := /bin/bash 15 | CURRENT_DIR = $(shell pwd) 16 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum-furiosa.git 17 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL 18 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL)) 19 | 20 | .PHONY: style test 21 | 22 | # Run code quality checks 23 | style_check: 24 | black --check . 25 | ruff . 26 | 27 | style: 28 | black . 29 | ruff . --fix 30 | 31 | # Run tests for the library 32 | test: 33 | python -m pytest tests 34 | 35 | # Utilities to release to PyPi 36 | build_dist_install_tools: 37 | pip install build 38 | pip install twine 39 | 40 | build_dist: 41 | rm -fr build 42 | rm -fr dist 43 | python -m build 44 | 45 | pypi_upload: build_dist 46 | python -m twine upload dist/* 47 | 48 | build_doc_docker_image: 49 | docker build -t doc_maker --build-arg commit_sha=$(COMMIT_SHA_SUBPACKAGE) --build-arg clone_url=$(REAL_CLONE_URL) ./docs 50 | 51 | doc: build_doc_docker_image 52 | @test -n "$(BUILD_DIR)" || (echo "BUILD_DIR is empty." ; exit 1) 53 | @test -n "$(VERSION)" || (echo "VERSION is empty." ; exit 1) 54 | docker run -v $(CURRENT_DIR):/doc_folder --workdir=/doc_folder doc_maker \ 55 | doc-builder build optimum.furiosa /optimum-furiosa/docs/source/ \ 56 | --build_dir $(BUILD_DIR) \ 57 | --version $(VERSION) \ 58 | --version_tag_suffix "" \ 59 | --html \ 60 | --clean -------------------------------------------------------------------------------- /docs/source/index.md: -------------------------------------------------------------------------------- 1 | 16 | 17 | # 🤗 Optimum Furiosa 18 | 19 | 🤗 Optimum Furiosa is the interface between the 🤗 Transformers library and Furiosa NPUs [Furiosa Warboy](https://furiosa-ai.github.io/docs/latest/en/npu/intro.html#furiosaai-warboy). 20 | It provides a set of tools enabling easy model loading and inference for different downstream tasks. 21 | 22 |
23 |
24 | 25 |
26 | How-to guides 27 |
28 |

29 | Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum 30 | Furiosa to solve real-world problems. 31 |

32 |
33 | 37 |
38 | Reference 39 |
40 |

Technical descriptions of how the classes and methods of 🤗 Optimum Furiosa work.

41 |
42 |
43 |
-------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Test](https://github.com/huggingface/optimum-furiosa/actions/workflows/test.yml/badge.svg)](https://github.com/huggingface/optimum-furiosa/actions/workflows/test.yml) 2 | 3 | 4 | # optimum-furiosa 5 | Accelerated inference of 🤗 models using FuriosaAI NPU chips. 6 | 7 | ## Furiosa SDK setup 8 | A Furiosa SDK environment needs to be enabled to use this library. Please refer to Furiosa's [Installation](https://furiosa-ai.github.io/docs/latest/en/software/installation.html) guide. 9 | 10 | ## Install 11 | Optimum Furiosa is a fast-moving project, and you may want to install from source. 12 | 13 | `pip install git+https://github.com/huggingface/optimum-furiosa.git` 14 | 15 | ### Installing in developer mode 16 | 17 | If you are working on the `optimum-furiosa` code then you should use an editable install 18 | by cloning and installing `optimum` and `optimum-furiosa`: 19 | 20 | ``` 21 | git clone https://github.com/huggingface/optimum 22 | git clone https://github.com/huggingface/optimum-furiosa 23 | pip install -e optimum -e optimum-furiosa 24 | ``` 25 | 26 | Now whenever you change the code, you'll be able to run with those changes instantly. 27 | 28 | 29 | ## How to use it? 30 | To load a model and run inference with Furiosa NPU, you can just replace your `AutoModelForXxx` class with the corresponding `FuriosaAIModelForXxx` class. 31 | 32 | ```diff 33 | import requests 34 | from PIL import Image 35 | 36 | - from transformers import AutoModelForImageClassification 37 | + from optimum.furiosa import FuriosaAIModelForImageClassification 38 | from transformers import AutoFeatureExtractor, pipeline 39 | 40 | url = "http://images.cocodataset.org/val2017/000000039769.jpg" 41 | image = Image.open(requests.get(url, stream=True).raw) 42 | 43 | model_id = "microsoft/resnet-50" 44 | - model = AutoModelForImageClassification.from_pretrained(model_id) 45 | + model = FuriosaAIModelForImageClassification.from_pretrained(model_id, export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},) 46 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) 47 | cls_pipe = pipeline("image-classification", model=model, feature_extractor=feature_extractor) 48 | outputs = cls_pipe(image) 49 | ``` 50 | 51 | If you find any issue while using those, please open an issue or a pull request. 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ -------------------------------------------------------------------------------- /.github/workflows/build_pr_documentation.yml: -------------------------------------------------------------------------------- 1 | name: Build PR documentation 2 | 3 | on: 4 | pull_request: 5 | branches: [ main ] 6 | paths: 7 | - "optimum/**.py" 8 | - "docs/**" 9 | - ".github/workflows/build_pr_documentation.yml" 10 | - ".github/workflows/delete_doc_comment.yml" 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | build_documentation: 18 | runs-on: self-hosted 19 | env: 20 | COMMIT_SHA: ${{ github.event.pull_request.head.sha }} 21 | PR_NUMBER: ${{ github.event.number }} 22 | EVENT_CONTEXT: ${{ toJSON(github.event) }} 23 | PR_CLONE_URL: ${{ github.event.pull_request.head.repo.clone_url }} 24 | 25 | steps: 26 | - uses: actions/checkout@v2 27 | with: 28 | repository: 'huggingface/doc-builder' 29 | path: doc-builder 30 | 31 | - uses: actions/checkout@v2 32 | with: 33 | repository: 'huggingface/optimum-furiosa' 34 | path: optimum-furiosa 35 | 36 | - name: Create and start a virtual environment 37 | run: | 38 | python -m venv venv_docs 39 | source venv_docs/bin/activate 40 | python -m pip install --upgrade pip 41 | 42 | - name: Setup environment 43 | run: | 44 | source venv_docs/bin/activate 45 | pip uninstall -y doc-builder 46 | cd doc-builder 47 | git pull origin main 48 | pip install . 49 | pip install black 50 | cd .. 51 | cd optimum-furiosa 52 | pip install . 53 | cd .. 54 | 55 | - name: Make documentation 56 | run: | 57 | source venv_docs/bin/activate 58 | cd optimum-furiosa 59 | doc-builder build optimum.furiosa docs/source/ --build_dir furiosa-doc-build --version pr_$PR_NUMBER --version_tag_suffix "" --html --clean 60 | cd .. 61 | 62 | - name: Save commit_sha & pr_number 63 | run: | 64 | source venv_docs/bin/activate 65 | cd optimum-furiosa/furiosa-doc-build 66 | sudo mv optimum.furiosa optimum-furiosa 67 | echo ${{ env.COMMIT_SHA }} > ./commit_sha 68 | echo ${{ env.PR_NUMBER }} > ./pr_number 69 | 70 | - uses: actions/upload-artifact@v3 71 | with: 72 | name: doc-build-artifact 73 | path: optimum-furiosa/furiosa-doc-build/ 74 | 75 | - name: Cleanup 76 | run: | 77 | rm -rf venv_docs 78 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | from setuptools import find_namespace_packages, setup 4 | 5 | 6 | # Ensure we match the version set in optimum/furiosa/version.py 7 | try: 8 | filepath = "optimum/furiosa/version.py" 9 | with open(filepath) as version_file: 10 | (__version__,) = re.findall('__version__ = "(.*)"', version_file.read()) 11 | except Exception as error: 12 | assert False, "Error: Could not open '%s' due %s\n" % (filepath, error) 13 | 14 | INSTALL_REQUIRE = [ 15 | "optimum==1.8.0", 16 | "transformers>=4.20.0", 17 | "datasets>=1.4.0", 18 | "furiosa-optimizer", 19 | "furiosa-quantizer==0.9.0", 20 | "furiosa-quantizer-impl==0.9.1", 21 | "furiosa-sdk", 22 | "onnx>=1.12.0", 23 | "sentencepiece", 24 | "scipy", 25 | ] 26 | 27 | TESTS_REQUIRE = ["pytest", "parameterized", "Pillow", "evaluate", "diffusers", "py-cpuinfo"] 28 | 29 | QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"] 30 | 31 | EXTRA_REQUIRE = { 32 | "testing": [ 33 | "filelock", 34 | "GitPython", 35 | "parameterized", 36 | "psutil", 37 | "pytest", 38 | "pytest-pythonpath", 39 | "pytest-xdist", 40 | "Pillow", 41 | "librosa", 42 | "soundfile", 43 | ], 44 | "quality": QUALITY_REQUIRE, 45 | } 46 | 47 | setup( 48 | name="optimum-furiosa", 49 | version=__version__, 50 | description="Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to " 51 | "integrate third-party libraries from Hardware Partners and interface with their specific " 52 | "functionality.", 53 | long_description=open("README.md", "r", encoding="utf-8").read(), 54 | long_description_content_type="text/markdown", 55 | classifiers=[ 56 | "Development Status :: 5 - Production/Stable", 57 | "License :: OSI Approved :: Apache Software License", 58 | "Intended Audience :: Developers", 59 | "Intended Audience :: Education", 60 | "Intended Audience :: Science/Research", 61 | "Operating System :: OS Independent", 62 | "Programming Language :: Python :: 3.7", 63 | "Programming Language :: Python :: 3.8", 64 | "Programming Language :: Python :: 3.9", 65 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 66 | ], 67 | keywords="transformers, quantization, pruning, knowledge distillation, optimization, training", 68 | url="https://huggingface.co/hardware", 69 | author="HuggingFace Inc. Special Ops Team", 70 | author_email="hardware@huggingface.co", 71 | license="Apache", 72 | packages=find_namespace_packages(include=["optimum*"]), 73 | install_requires=INSTALL_REQUIRE, 74 | extras_require=EXTRA_REQUIRE, 75 | include_package_data=True, 76 | zip_safe=False, 77 | entry_points={"console_scripts": ["optimum-cli=optimum.commands.optimum_cli:main"]}, 78 | ) 79 | -------------------------------------------------------------------------------- /docs/source/usage_guides/models.mdx: -------------------------------------------------------------------------------- 1 | # Optimum Inference with Furiosa NPU 2 | 3 | Optimum Furiosa is a utility package for building and running inference with Furiosa NPUs. 4 | Optimum can be used to load optimized models from the [Hugging Face Hub](hf.co/models) and create pipelines 5 | to run accelerated inference without rewriting your APIs. 6 | 7 | ## Switching from Transformers to Optimum Furiosa 8 | 9 | The `optimum.furiosa.FuriosaAIModelForXXX` model classes are API compatible with Hugging Face models. This 10 | means you can just replace your `AutoModelForXXX` class with the corresponding `FuriosaAIModelForXXX` class in `optimum.furiosa`. 11 | 12 | You do not need to adapt your code to get it to work with `FuriosaAIModelForXXX` classes: 13 | 14 | Because the model you want to work with might not be already converted to ONNX, [`~optimum.furiosa.FuriosaAIModel`] 15 | includes a method to convert vanilla Hugging Face models to ONNX ones. Simply pass `export=True` to the 16 | [`~optimum.furiosa.FuriosaAIModel.from_pretrained`] method, and your model will be loaded and converted to ONNX on-the-fly: 17 | 18 | ### Loading and inference of a vanilla Transformers model 19 | 20 | ```diff 21 | import requests 22 | from PIL import Image 23 | 24 | - from transformers import AutoModelForImageClassification 25 | + from optimum.furiosa import FuriosaAIModelForImageClassification 26 | from transformers import AutoFeatureExtractor, pipeline 27 | 28 | url = "http://images.cocodataset.org/val2017/000000039769.jpg" 29 | image = Image.open(requests.get(url, stream=True).raw) 30 | 31 | model_id = "microsoft/resnet-50" 32 | - model = AutoModelForImageClassification.from_pretrained(model_id) 33 | + model = FuriosaAIModelForImageClassification.from_pretrained(model_id, export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},) 34 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) 35 | cls_pipe = pipeline("image-classification", model=model, feature_extractor=feature_extractor) 36 | outputs = cls_pipe(image) 37 | ``` 38 | 39 | 40 | ### Pushing compiled models to the Hugging Face Hub 41 | 42 | It is also possible, just as with regular [`~transformers.PreTrainedModel`]s, to push your `FurisoaAIModelForXXX` to the 43 | [Hugging Face Model Hub](https://hf.co/models): 44 | 45 | ```python 46 | >>> from optimum.furiosa import FuriosaAIModelForImageClassification 47 | 48 | >>> # Load the model from the hub 49 | >>> model = FuriosaAIModelForImageClassification.from_pretrained( 50 | ... "microsoft/resnet-50", export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]}, 51 | ... ) 52 | 53 | >>> # Save the converted model 54 | >>> model.save_pretrained("a_local_path_for_compiled_model") 55 | 56 | # Push the compiled model to HF Hub 57 | >>> model.push_to_hub( # doctest: +SKIP 58 | ... "a_local_path_for_compiled_model", repository_id="my-furiosa-repo", use_auth_token=True 59 | ... ) 60 | ``` -------------------------------------------------------------------------------- /docs/source/usage_guides/quantization.mdx: -------------------------------------------------------------------------------- 1 | 12 | 13 | # Quantization 14 | 15 | 🤗 Optimum provides an `optimum.furiosa` package that enables you to apply quantization on many models hosted on 16 | the Hugging Face Hub using the [Furiosa](https://furiosa-ai.github.io/docs/latest/en/software/quantization.html) 17 | quantization tool. 18 | 19 | The quantization process is abstracted via the [`~optimum.furiosa.FuriosaAIConfig`] and 20 | the [`~optimum.furiosa.FuriosaAIQuantizer`] classes. The former allows you to specify how quantization should be done, 21 | while the latter effectively handles quantization. 22 | 23 | ## Static Quantization example 24 | 25 | The [`~optimum.furiosa.FuriosaAIQuantizer`] class can be used to quantize statically your ONNX model. Below you will find 26 | an easy end-to-end example on how to quantize statically 27 | [eugenecamus/resnet-50-base-beans-demo](https://huggingface.co/eugenecamus/resnet-50-base-beans-demo). 28 | 29 | ```python 30 | >>> from functools import partial 31 | >>> from pathlib import Path 32 | >>> from transformers import AutoFeatureExtractor 33 | >>> from optimum.furiosa import FuriosaAIQuantizer, FuriosaAIModelForImageClassification 34 | >>> from optimum.furiosa.configuration import AutoCalibrationConfig 35 | >>> from optimum.furiosa.utils import export_model_to_onnx 36 | 37 | >>> model_id = "eugenecamus/resnet-50-base-beans-demo" 38 | 39 | # Convert PyTorch model convert to ONNX and create Quantizer and setup config 40 | 41 | >>> feature_extractor = AutoFeatureExtractor.from_pretrained(model_id) 42 | 43 | >>> batch_size = 1 44 | >>> image_size = feature_extractor.size["shortest_edge"] 45 | >>> num_labels = 3 46 | >>> onnx_model_name = "model.onnx" 47 | >>> output_dir = "output" 48 | >>> onnx_model_path = Path(output_dir) / onnx_model_name 49 | 50 | >>> export_model_to_onnx( 51 | ... model_id, 52 | ... save_dir=output_dir, 53 | ... input_shape_dict={"pixel_values": [batch_size, 3, image_size, image_size]}, 54 | ... output_shape_dict={"logits": [batch_size, num_labels]}, 55 | ... file_name=onnx_model_name, 56 | ) 57 | >>> quantizer = FuriosaAIQuantizer.from_pretrained(output_dir, file_name=onnx_model_name) 58 | >>> qconfig = QuantizationConfig() 59 | 60 | # Create the calibration dataset 61 | >>> def preprocess_fn(ex, feature_extractor): 62 | ... return feature_extractor(ex["image"]) 63 | 64 | >>> calibration_dataset = quantizer.get_calibration_dataset( 65 | ... "beans", 66 | ... preprocess_function=partial(preprocess_fn, feature_extractor=feature_extractor), 67 | ... num_samples=50, 68 | ... dataset_split="train", 69 | ... ) 70 | 71 | # Create the calibration configuration containing the parameters related to calibration. 72 | >>> calibration_config = AutoCalibrationConfig.mse_asym(calibration_dataset) 73 | 74 | # Perform the calibration step: computes the activations quantization ranges 75 | >>> ranges = quantizer.fit( 76 | ... dataset=calibration_dataset, 77 | ... calibration_config=calibration_config, 78 | ... ) 79 | 80 | # Apply static quantization on the model 81 | >>> model_quantized_path = quantizer.quantize( 82 | ... save_dir=output, 83 | ... calibration_tensors_range=ranges, 84 | ... quantization_config=qconfig, 85 | ... ) 86 | ``` 87 | -------------------------------------------------------------------------------- /tests/test_quantization.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | import tempfile 17 | import unittest 18 | from functools import partial 19 | from pathlib import Path 20 | 21 | import requests 22 | from parameterized import parameterized 23 | from PIL import Image 24 | from transformers import AutoFeatureExtractor 25 | 26 | from optimum.furiosa import ( 27 | AutoCalibrationConfig, 28 | FuriosaAIConfig, 29 | FuriosaAIModelForImageClassification, 30 | FuriosaAIQuantizer, 31 | QuantizationConfig, 32 | ) 33 | from optimum.furiosa.utils import export_model_to_onnx 34 | 35 | 36 | class FuriosaAIQuantizationTest(unittest.TestCase): 37 | SUPPORTED_ARCHITECTURES = ((FuriosaAIModelForImageClassification, "fxmarty/resnet-tiny-beans"),) 38 | 39 | @parameterized.expand(SUPPORTED_ARCHITECTURES) 40 | def test_quantization(self, model_cls, model_name): 41 | qconfig = QuantizationConfig() 42 | 43 | def preprocess_fn(ex, feature_extractor): 44 | return feature_extractor(ex["image"]) 45 | 46 | with tempfile.TemporaryDirectory() as tmp_dir: 47 | output_dir = Path(tmp_dir) 48 | export_model_to_onnx( 49 | model_name, 50 | save_dir=tmp_dir, 51 | input_shape_dict={"pixel_values": [1, 3, 224, 224]}, 52 | output_shape_dict={"logits": [1, 3]}, 53 | file_name="model.onnx", 54 | ) 55 | 56 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_name) 57 | 58 | quantizer = FuriosaAIQuantizer.from_pretrained(tmp_dir, file_name="model.onnx") 59 | 60 | calibration_dataset = quantizer.get_calibration_dataset( 61 | "beans", 62 | preprocess_function=partial(preprocess_fn, feature_extractor=feature_extractor), 63 | num_samples=10, 64 | dataset_split="train", 65 | ) 66 | 67 | calibration_config = AutoCalibrationConfig.mse_asym(calibration_dataset) 68 | ranges = quantizer.fit( 69 | dataset=calibration_dataset, 70 | calibration_config=calibration_config, 71 | ) 72 | 73 | quantizer.quantize( 74 | save_dir=output_dir, 75 | calibration_tensors_range=ranges, 76 | quantization_config=qconfig, 77 | ) 78 | 79 | expected_fai_config = FuriosaAIConfig(quantization=qconfig, calibration=calibration_config) 80 | fai_config = FuriosaAIConfig.from_pretrained(tmp_dir) 81 | # Verify the FuriosaAIConfig was correctly created and saved 82 | self.assertEqual(fai_config.to_dict(), expected_fai_config.to_dict()) 83 | 84 | assert os.path.isfile(output_dir.joinpath("model_quantized.dfg")) is True 85 | 86 | fai_model_quantized = model_cls(Path(output_dir) / "model_quantized.dfg") 87 | 88 | url = "http://images.cocodataset.org/val2017/000000039769.jpg" 89 | image = Image.open(requests.get(url, stream=True).raw) 90 | inputs = feature_extractor(images=image, return_tensors="np") 91 | 92 | fai_outputs = fai_model_quantized(**inputs) 93 | self.assertIn("logits", fai_outputs) 94 | -------------------------------------------------------------------------------- /optimum/furiosa/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | from pathlib import Path 17 | from typing import List, Union 18 | 19 | import numpy as np 20 | from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer 21 | 22 | from furiosa.runtime.tensor import DataType 23 | from optimum.exporters.onnx import main_export 24 | 25 | 26 | ONNX_WEIGHTS_NAME = "model.onnx" 27 | ONNX_WEIGHTS_NAME_STATIC = "model_static.onnx" 28 | FURIOSA_ENF_FILE_NAME = "model.enf" 29 | FURIOSA_QUANTIZED_FILE_NAME = "model_quantized.dfg" 30 | 31 | MAX_ONNX_OPSET_2022_2_0 = 10 32 | MAX_ONNX_OPSET = 13 33 | MIN_ONNX_QDQ_OPSET = 13 34 | 35 | WARBOY_DEVICE = "warboy" 36 | 37 | FURIOSA_DTYPE_TO_NUMPY_DTYPE = { 38 | DataType.UINT8: np.uint8, 39 | DataType.INT8: np.int8, 40 | DataType.FLOAT32: np.float32, 41 | } 42 | 43 | _HEAD_TO_AUTOMODELS = { 44 | "image-classification": "FuriosaAIModelForImageClassification", 45 | } 46 | 47 | 48 | def export_model_to_onnx(model_id, save_dir, input_shape_dict, output_shape_dict, file_name="model.onnx"): 49 | task = "image-classification" 50 | main_export(model_id, save_dir, task=task) 51 | 52 | import onnx 53 | from onnx import shape_inference 54 | from onnx.tools import update_model_dims 55 | 56 | save_dir_path = Path(save_dir) / "model.onnx" 57 | model = onnx.load(save_dir_path) 58 | updated_model = update_model_dims.update_inputs_outputs_dims(model, input_shape_dict, output_shape_dict) 59 | inferred_model = shape_inference.infer_shapes(updated_model) 60 | 61 | static_model_path = Path(save_dir_path).parent / file_name 62 | onnx.save(inferred_model, static_model_path) 63 | 64 | 65 | def maybe_load_preprocessors(src_name_or_path: Union[str, Path], subfolder: str = "") -> List: 66 | preprocessors = [] 67 | try: 68 | preprocessors.append(AutoTokenizer.from_pretrained(src_name_or_path, subfolder=subfolder)) 69 | except Exception: 70 | pass 71 | 72 | try: 73 | preprocessors.append(AutoProcessor.from_pretrained(src_name_or_path, subfolder=subfolder)) 74 | except Exception: 75 | pass 76 | 77 | try: 78 | preprocessors.append(AutoFeatureExtractor.from_pretrained(src_name_or_path, subfolder=subfolder)) 79 | except Exception: 80 | pass 81 | return preprocessors 82 | 83 | 84 | def maybe_save_preprocessors(src_name_or_path: Union[str, Path], dest_dir: Union[str, Path], src_subfolder: str = ""): 85 | """ 86 | Saves the tokenizer, the processor and the feature extractor when found in `src_dir` in `dest_dir`. 87 | 88 | Args: 89 | src_dir (`Union[str, Path]`): 90 | The source directory from which to copy the files. 91 | dest_dir (`Union[str, Path]`): 92 | The destination directory to copy the files to. 93 | src_subfolder (`str`, defaults to `""`): 94 | In case the preprocessor files are located inside a subfolder of the model directory / repo on the Hugging 95 | Face Hub, you can specify the subfolder name here. 96 | """ 97 | if not isinstance(dest_dir, Path): 98 | dest_dir = Path(dest_dir) 99 | 100 | dest_dir.mkdir(exist_ok=True) 101 | for preprocessor in maybe_load_preprocessors(src_name_or_path, subfolder=src_subfolder): 102 | preprocessor.save_pretrained(dest_dir) 103 | -------------------------------------------------------------------------------- /tests/test_modeling.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # Copyright 2023 The HuggingFace Team. All rights reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | import gc 16 | import os 17 | import tempfile 18 | import unittest 19 | 20 | import numpy as np 21 | import requests 22 | import torch 23 | from parameterized import parameterized 24 | from PIL import Image 25 | from transformers import AutoFeatureExtractor, AutoModelForImageClassification, PretrainedConfig, pipeline, set_seed 26 | 27 | from optimum.furiosa import FuriosaAIModelForImageClassification 28 | from optimum.furiosa.utils import FURIOSA_ENF_FILE_NAME 29 | from optimum.utils import ( 30 | logging, 31 | ) 32 | 33 | 34 | SEED = 42 35 | 36 | logger = logging.get_logger() 37 | 38 | MODEL_DICT = { 39 | "mobilenet_v1": ["google/mobilenet_v1_0.75_192", {"pixel_values": [1, 3, 192, 192]}, {"logits": [1, 1001]}], 40 | "mobilenet_v2": [ 41 | "hf-internal-testing/tiny-random-MobileNetV2Model", 42 | {"pixel_values": [1, 3, 32, 32]}, 43 | {"logits": [1, 2]}, 44 | ], 45 | "resnet": ["hf-internal-testing/tiny-random-resnet", {"pixel_values": [1, 3, 224, 224]}, {"logits": [1, 1000]}], 46 | } 47 | 48 | 49 | TENSOR_ALIAS_TO_TYPE = { 50 | "pt": torch.Tensor, 51 | "np": np.ndarray, 52 | } 53 | 54 | 55 | class FuriosaAIModelIntegrationTest(unittest.TestCase): 56 | def __init__(self, *args, **kwargs): 57 | super().__init__(*args, **kwargs) 58 | self.MODEL_ID = "mohitsha/furiosa-resnet-tiny-beans" 59 | 60 | def test_load_from_hub_and_save_model(self): 61 | preprocessor = AutoFeatureExtractor.from_pretrained(self.MODEL_ID) 62 | url = "http://images.cocodataset.org/val2017/000000039769.jpg" 63 | image = Image.open(requests.get(url, stream=True).raw) 64 | inputs = preprocessor(images=image, return_tensors="pt") 65 | loaded_model = FuriosaAIModelForImageClassification.from_pretrained(self.MODEL_ID) 66 | self.assertIsInstance(loaded_model.config, PretrainedConfig) 67 | loaded_model_outputs = loaded_model(**inputs) 68 | 69 | with tempfile.TemporaryDirectory() as tmpdirname: 70 | loaded_model.save_pretrained(tmpdirname) 71 | del loaded_model 72 | folder_contents = os.listdir(tmpdirname) 73 | self.assertTrue(FURIOSA_ENF_FILE_NAME in folder_contents) 74 | model = FuriosaAIModelForImageClassification.from_pretrained(tmpdirname) 75 | 76 | outputs = model(**inputs) 77 | self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits)) 78 | 79 | 80 | class FuriosaAIModelForImageClassificationIntegrationTest(unittest.TestCase): 81 | SUPPORTED_ARCHITECTURES = [ 82 | "mobilenet_v1", 83 | "mobilenet_v2", 84 | "resnet", 85 | ] 86 | 87 | FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} 88 | FuriosaAIMODEL_CLASS = FuriosaAIModelForImageClassification 89 | TASK = "image-classification" 90 | 91 | @parameterized.expand(SUPPORTED_ARCHITECTURES) 92 | def test_compare_to_transformers(self, model_arch): 93 | model_id, input_shape_dict, output_shape_dict = MODEL_DICT[model_arch] 94 | set_seed(SEED) 95 | fai_model = FuriosaAIModelForImageClassification.from_pretrained( 96 | model_id, export=True, input_shape_dict=input_shape_dict, output_shape_dict=output_shape_dict 97 | ) 98 | self.assertIsInstance(fai_model.config, PretrainedConfig) 99 | transformers_model = AutoModelForImageClassification.from_pretrained(model_id) 100 | preprocessor = AutoFeatureExtractor.from_pretrained(model_id) 101 | url = "http://images.cocodataset.org/val2017/000000039769.jpg" 102 | image = Image.open(requests.get(url, stream=True).raw) 103 | inputs = preprocessor(images=image, return_tensors="pt") 104 | with torch.no_grad(): 105 | transformers_outputs = transformers_model(**inputs) 106 | for input_type in ["pt", "np"]: 107 | inputs = preprocessor(images=image, return_tensors=input_type) 108 | fai_outputs = fai_model(**inputs) 109 | self.assertIn("logits", fai_outputs) 110 | self.assertIsInstance(fai_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type]) 111 | # Compare tensor outputs 112 | self.assertTrue(torch.allclose(torch.Tensor(fai_outputs.logits), transformers_outputs.logits, atol=1e-4)) 113 | 114 | gc.collect() 115 | 116 | @parameterized.expand(SUPPORTED_ARCHITECTURES) 117 | def test_pipeline(self, model_arch): 118 | model_id, input_shape_dict, output_shape_dict = MODEL_DICT[model_arch] 119 | model = FuriosaAIModelForImageClassification.from_pretrained( 120 | model_id, export=True, input_shape_dict=input_shape_dict, output_shape_dict=output_shape_dict 121 | ) 122 | preprocessor = AutoFeatureExtractor.from_pretrained(model_id) 123 | pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor) 124 | outputs = pipe("http://images.cocodataset.org/val2017/000000039769.jpg") 125 | self.assertGreaterEqual(outputs[0]["score"], 0.0) 126 | self.assertTrue(isinstance(outputs[0]["label"], str)) 127 | gc.collect() 128 | -------------------------------------------------------------------------------- /optimum/furiosa/modeling.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | from typing import Callable, Dict, List, Optional, Union 17 | 18 | import numpy as np 19 | import torch 20 | import tqdm 21 | import transformers 22 | from datasets import Dataset 23 | from transformers import ( 24 | AutoConfig, 25 | AutoModel, 26 | AutoModelForImageClassification, 27 | EvalPrediction, 28 | ) 29 | from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward 30 | from transformers.modeling_outputs import ( 31 | ImageClassifierOutput, 32 | ) 33 | 34 | from .modeling_base import FuriosaAIBaseModel 35 | from .utils import FURIOSA_DTYPE_TO_NUMPY_DTYPE 36 | 37 | 38 | logger = logging.getLogger(__name__) 39 | 40 | 41 | _FEATURE_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor" 42 | 43 | MODEL_START_DOCSTRING = r""" 44 | This model inherits from [`optimum.furiosa.FuriosaAIBaseModel`]. Check the superclass documentation for the generic methods the 45 | library implements for all its model (such as downloading or saving) 46 | Parameters: 47 | model (`furiosa.runtime.model`): is the main class used to run inference. 48 | config (`transformers.PretrainedConfig`): [PretrainedConfig](https://huggingface.co/docs/transformers/main_classes/configuration#transformers.PretrainedConfig) 49 | is the Model configuration class with all the parameters of the model. 50 | Initializing with a config file does not load the weights associated with the model, only the configuration. 51 | Check out the [`~furiosa.modeling.FuriosaAIBaseModel.from_pretrained`] method to load the model weights. 52 | device (`str`, defaults to `"CPU"`): 53 | The device type for which the model will be optimized for. The resulting compiled model will contains nodes specific to this device. 54 | furiosa_config (`Optional[Dict]`, defaults to `None`): 55 | The dictionnary containing the informations related to the model compilation. 56 | compile (`bool`, defaults to `True`): 57 | Disable the model compilation during the loading step when set to `False`. 58 | """ 59 | 60 | IMAGE_INPUTS_DOCSTRING = r""" 61 | Args: 62 | pixel_values (`torch.Tensor`): 63 | Pixel values corresponding to the images in the current batch. 64 | Pixel values can be obtained from encoded images using [`AutoFeatureExtractor`](https://huggingface.co/docs/transformers/autoclass_tutorial#autofeatureextractor). 65 | """ 66 | 67 | 68 | class FuriosaAIModel(FuriosaAIBaseModel): 69 | base_model_prefix = "furiosa_model" 70 | auto_model_class = AutoModel 71 | 72 | def __init__( 73 | self, 74 | model, 75 | config: transformers.PretrainedConfig = None, 76 | compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, 77 | label_names: Optional[List[str]] = None, 78 | **kwargs, 79 | ): 80 | super().__init__(model, config, **kwargs) 81 | # Avoid warnings when creating a transformers pipeline 82 | AutoConfig.register(self.base_model_prefix, AutoConfig) 83 | self.auto_model_class.register(AutoConfig, self.__class__) 84 | self.device = torch.device("cpu") 85 | 86 | # Evaluation args 87 | self.compute_metrics = compute_metrics 88 | self.label_names = ["labels"] if label_names is None else label_names 89 | 90 | def to(self, device: str): 91 | """ 92 | Use the specified `device` for inference. For example: "cpu" or "gpu". `device` can 93 | be in upper or lower case. To speed up first inference, call `.compile()` after `.to()`. 94 | """ 95 | self._device = device.upper() 96 | self.sess = None 97 | return self 98 | 99 | def forward(self, *args, **kwargs): 100 | raise NotImplementedError 101 | 102 | def evaluation_loop(self, dataset: Dataset): 103 | """ 104 | Run evaluation and returns metrics and predictions. 105 | 106 | Args: 107 | dataset (`datasets.Dataset`): 108 | Dataset to use for the evaluation step. 109 | """ 110 | logger.info("***** Running evaluation *****") 111 | 112 | # from transformers import EvalPrediction 113 | from transformers.trainer_pt_utils import nested_concat 114 | from transformers.trainer_utils import EvalLoopOutput 115 | 116 | all_preds = None 117 | all_labels = None 118 | for step, inputs in tqdm.tqdm(enumerate(dataset), total=len(dataset)): 119 | has_labels = all(inputs.get(k) is not None for k in self.label_names) 120 | if has_labels: 121 | labels = tuple(np.array([inputs.get(name)]) for name in self.label_names) 122 | if len(labels) == 1: 123 | labels = labels[0] 124 | else: 125 | labels = None 126 | 127 | inputs = [ 128 | np.array([inputs[key]], dtype=FURIOSA_DTYPE_TO_NUMPY_DTYPE[self.inputs_to_dtype[k]]) 129 | for k, key in enumerate(self.input_names) 130 | if key in inputs 131 | ] 132 | 133 | preds = self.sess.run(inputs) 134 | if len(preds) == 1: 135 | preds = preds[0].numpy() 136 | all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100) 137 | all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100) 138 | 139 | if self.compute_metrics is not None and all_preds is not None and all_labels is not None: 140 | metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels)) 141 | else: 142 | metrics = {} 143 | return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset)) 144 | 145 | 146 | IMAGE_CLASSIFICATION_EXAMPLE = r""" 147 | Example of image classification using `transformers.pipelines`: 148 | ```python 149 | >>> from transformers import {processor_class}, pipeline 150 | >>> from optimum.furiosa import {model_class} 151 | 152 | >>> preprocessor = {processor_class}.from_pretrained("{checkpoint}") 153 | >>> model = {model_class}.from_pretrained("{checkpoint}", export=True, input_shape_dict="dict('pixel_values': [1, 3, 224, 224])", output_shape_dict="dict("logits": [1, 1000])",) 154 | >>> pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor) 155 | >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg" 156 | >>> outputs = pipe(url) 157 | ``` 158 | """ 159 | 160 | 161 | @add_start_docstrings( 162 | """ 163 | FuriosaAI Model with a ImageClassifierOutput for image classification tasks. 164 | """, 165 | MODEL_START_DOCSTRING, 166 | ) 167 | class FuriosaAIModelForImageClassification(FuriosaAIModel): 168 | export_feature = "image-classification" 169 | auto_model_class = AutoModelForImageClassification 170 | 171 | def __init__(self, model=None, config=None, **kwargs): 172 | super().__init__(model, config, **kwargs) 173 | self.input_names = ["pixel_values"] 174 | 175 | @add_start_docstrings_to_model_forward( 176 | IMAGE_INPUTS_DOCSTRING.format("batch_size, num_channels, height, width") 177 | + IMAGE_CLASSIFICATION_EXAMPLE.format( 178 | processor_class=_FEATURE_EXTRACTOR_FOR_DOC, 179 | model_class="FuriosaAIModelForImageClassification", 180 | checkpoint="microsoft/resnet50", 181 | ) 182 | ) 183 | def forward( 184 | self, 185 | pixel_values: Union[torch.Tensor, np.ndarray], 186 | **kwargs, 187 | ): 188 | np_inputs = isinstance(pixel_values, np.ndarray) 189 | if not np_inputs: 190 | pixel_values = np.array(pixel_values) 191 | 192 | # Run inference 193 | outputs = self.sess.run(pixel_values) 194 | logits = torch.from_numpy(outputs[0].numpy()) if not np_inputs else outputs[0].numpy() 195 | return ImageClassifierOutput(logits=logits) 196 | -------------------------------------------------------------------------------- /optimum/furiosa/configuration.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from dataclasses import asdict, dataclass 16 | from enum import Enum 17 | from typing import Optional, Sequence, Union 18 | 19 | import onnx 20 | from datasets import Dataset 21 | 22 | from furiosa.quantizer import CalibrationMethod, Calibrator 23 | from optimum.configuration_utils import BaseConfig 24 | 25 | 26 | DEFAULT_QUANTIZATION_CONFIG = {} 27 | 28 | 29 | @dataclass 30 | class CalibrationConfig: 31 | """ 32 | CalibrationConfig is the configuration class handling all the FurioaAI parameters related to the calibration 33 | step of static quantization. 34 | 35 | Args: 36 | dataset_name (`str`): 37 | The name of the calibration dataset. 38 | dataset_config_name (`str`): 39 | The name of the calibration dataset configuration. 40 | dataset_split (`str`): 41 | Which split of the dataset is used to perform the calibration step. 42 | dataset_num_samples (`int`): 43 | The number of samples composing the calibration dataset. 44 | method (`CalibrationMethod`): 45 | The method chosen to calculate the activations quantization parameters using the calibration dataset. 46 | percentage (`Optional[float]`, defaults to `None`): 47 | The percentage to use when computing the activations quantization ranges when performing the calibration 48 | step using the Percentile method. 49 | """ 50 | 51 | dataset_name: str 52 | dataset_config_name: str 53 | dataset_split: str 54 | dataset_num_samples: int 55 | method: CalibrationMethod 56 | percentage: Optional[float] = None 57 | 58 | def create_calibrator( 59 | self, 60 | model: Union[onnx.ModelProto, bytes], 61 | ) -> Calibrator: 62 | return Calibrator(model, self.method, percentage=self.percentage) 63 | 64 | 65 | class AutoCalibrationConfig: 66 | @staticmethod 67 | def create_calibration_config(dataset: Dataset, method: CalibrationMethod, percentile: float = None): 68 | return CalibrationConfig( 69 | dataset_name=dataset.info.builder_name, 70 | dataset_config_name=dataset.info.config_name, 71 | dataset_split=str(dataset.split), 72 | dataset_num_samples=dataset.num_rows, 73 | method=method, 74 | percentage=percentile, 75 | ) 76 | 77 | @staticmethod 78 | def minmax_asym(dataset: Dataset) -> CalibrationConfig: 79 | """ 80 | Args: 81 | dataset (`Dataset`): 82 | The dataset to use when performing the calibration step. 83 | 84 | Returns: 85 | The calibration configuration. 86 | """ 87 | return AutoCalibrationConfig.create_calibration_config( 88 | dataset, 89 | method=CalibrationMethod.MIN_MAX_ASYM, 90 | ) 91 | 92 | def minmax_sym(dataset: Dataset) -> CalibrationConfig: 93 | """ 94 | Args: 95 | dataset (`Dataset`): 96 | The dataset to use when performing the calibration step. 97 | 98 | Returns: 99 | The calibration configuration. 100 | """ 101 | return AutoCalibrationConfig.create_calibration_config( 102 | dataset, 103 | method=CalibrationMethod.MIN_MAX_SYM, 104 | ) 105 | 106 | @staticmethod 107 | def entropy_asym( 108 | dataset: Dataset, 109 | ) -> CalibrationConfig: 110 | """ 111 | Args: 112 | dataset (`Dataset`): 113 | The dataset to use when performing the calibration step. 114 | 115 | Returns: 116 | The calibration configuration. 117 | """ 118 | return AutoCalibrationConfig.create_calibration_config( 119 | dataset, 120 | method=CalibrationMethod.ENTROPY_ASYM, 121 | ) 122 | 123 | @staticmethod 124 | def entropy_sym( 125 | dataset: Dataset, 126 | ) -> CalibrationConfig: 127 | """ 128 | Args: 129 | dataset (`Dataset`): 130 | The dataset to use when performing the calibration step. 131 | 132 | Returns: 133 | The calibration configuration. 134 | """ 135 | return AutoCalibrationConfig.create_calibration_config( 136 | dataset, 137 | method=CalibrationMethod.ENTROPY_SYM, 138 | ) 139 | 140 | @staticmethod 141 | def percentiles_asym(dataset: Dataset, percentile: float = 99.999) -> CalibrationConfig: 142 | """ 143 | Args: 144 | dataset (`Dataset`): 145 | The dataset to use when performing the calibration step. 146 | percentile (`float`): 147 | The percentile to use when computing the activations quantization ranges. 148 | 149 | Returns: 150 | The calibration configuration. 151 | """ 152 | return AutoCalibrationConfig.create_calibration_config( 153 | dataset, 154 | method=CalibrationMethod.PERCENTILE_ASYM, 155 | percentile=percentile, 156 | ) 157 | 158 | @staticmethod 159 | def percentiles_sym(dataset: Dataset, percentile: float = 99.999) -> CalibrationConfig: 160 | """ 161 | Args: 162 | dataset (`Dataset`): 163 | The dataset to use when performing the calibration step. 164 | percentile (`float`): 165 | The percentile to use when computing the activations quantization ranges. 166 | 167 | Returns: 168 | The calibration configuration. 169 | """ 170 | return AutoCalibrationConfig.create_calibration_config( 171 | dataset, 172 | method=CalibrationMethod.PERCENTILE_SYM, 173 | percentile=percentile, 174 | ) 175 | 176 | @staticmethod 177 | def mse_asym(dataset: Dataset) -> CalibrationConfig: 178 | """ 179 | Args: 180 | dataset (`Dataset`): 181 | The dataset to use when performing the calibration step. 182 | 183 | Returns: 184 | The calibration configuration. 185 | """ 186 | return AutoCalibrationConfig.create_calibration_config( 187 | dataset, 188 | method=CalibrationMethod.MSE_ASYM, 189 | ) 190 | 191 | @staticmethod 192 | def mse_sym(dataset: Dataset) -> CalibrationConfig: 193 | """ 194 | Args: 195 | dataset (`Dataset`): 196 | The dataset to use when performing the calibration step. 197 | 198 | Returns: 199 | The calibration configuration. 200 | """ 201 | return AutoCalibrationConfig.create_calibration_config( 202 | dataset, 203 | method=CalibrationMethod.MSE_SYM, 204 | ) 205 | 206 | @staticmethod 207 | def sqnr_asym(dataset: Dataset) -> CalibrationConfig: 208 | """ 209 | Args: 210 | dataset (`Dataset`): 211 | The dataset to use when performing the calibration step. 212 | 213 | Returns: 214 | The calibration configuration. 215 | """ 216 | return AutoCalibrationConfig.create_calibration_config( 217 | dataset, 218 | method=CalibrationMethod.SQNR_ASYM, 219 | ) 220 | 221 | @staticmethod 222 | def sqnr_sym(dataset: Dataset) -> CalibrationConfig: 223 | """ 224 | Args: 225 | dataset (`Dataset`): 226 | The dataset to use when performing the calibration step. 227 | 228 | Returns: 229 | The calibration configuration. 230 | """ 231 | return AutoCalibrationConfig.create_calibration_config( 232 | dataset, 233 | method=CalibrationMethod.SQNR_SYM, 234 | ) 235 | 236 | 237 | @dataclass 238 | class QuantizationConfig: 239 | """ 240 | QuantizationConfig is the configuration class handling all the FuriosaAI quantization parameters. 241 | 242 | Args: 243 | with_quantize (`bool`, defaults to `True`): 244 | WWhether to put a Quantize operator at the beginning of the resulting model. 245 | normalized_pixel_outputs (` Sequence[int`, defaults to `None`):: 246 | A sequence of indices of output tensors in the ONNX model that produce pixel values in a normalized format 247 | ranging from 0.0 to 1.0. If specified, the corresponding output tensors in the resulting quantized model 248 | will generate pixel values in an unnormalized format from 0 to 255, represented as unsigned 8-bit integers (uint8). 249 | """ 250 | 251 | with_quantize: bool = True 252 | normalized_pixel_outputs: Sequence[int] = None 253 | 254 | 255 | class FuriosaAIConfig(BaseConfig): 256 | CONFIG_NAME = "furiosa_config.json" 257 | FULL_CONFIGURATION_FILE = "furiosa_config.json" 258 | 259 | def __init__( 260 | self, 261 | opset: Optional[int] = None, 262 | quantization: Optional[QuantizationConfig] = None, 263 | calibration: Optional[CalibrationConfig] = None, 264 | **kwargs, 265 | ): 266 | super().__init__() 267 | self.quantization = self.dataclass_to_dict(quantization) 268 | self.calibration = self.dataclass_to_dict(calibration) 269 | self.optimum_version = kwargs.pop("optimum_version", None) 270 | 271 | @staticmethod 272 | def dataclass_to_dict(config) -> dict: 273 | new_config = {} 274 | if config is None: 275 | return new_config 276 | if isinstance(config, dict): 277 | return config 278 | for k, v in asdict(config).items(): 279 | if isinstance(v, Enum): 280 | v = v.name 281 | elif isinstance(v, list): 282 | v = [elem.name if isinstance(elem, Enum) else elem for elem in v] 283 | new_config[k] = v 284 | return new_config 285 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /examples/quantization/image-classification/run_image_classification.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | # Copyright 2023 The HuggingFace Team. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """ Finetuning the library models for image classification.""" 18 | # You can also adapt this script on your own image classification task. Pointers for this are left as comments. 19 | import json 20 | import logging 21 | import os 22 | import sys 23 | from dataclasses import dataclass, field 24 | from functools import partial 25 | from pathlib import Path 26 | from typing import Optional 27 | 28 | import datasets 29 | import numpy as np 30 | import torch 31 | import transformers 32 | from datasets import load_dataset 33 | from evaluate import load 34 | from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor 35 | from transformers import AutoConfig, AutoFeatureExtractor, EvalPrediction, HfArgumentParser, TrainingArguments 36 | from transformers.utils.versions import require_version 37 | 38 | from optimum.furiosa import FuriosaAIModelForImageClassification, FuriosaAIQuantizer 39 | from optimum.furiosa.configuration import AutoCalibrationConfig, QuantizationConfig 40 | from optimum.furiosa.utils import export_model_to_onnx 41 | 42 | 43 | logger = logging.getLogger(__name__) 44 | 45 | require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt") 46 | 47 | 48 | @dataclass 49 | class DataTrainingArguments: 50 | """ 51 | Arguments pertaining to what data we are going to input our model for training and eval. 52 | 53 | Using `HfArgumentParser` we can turn this class 54 | into argparse arguments to be able to specify them on 55 | the command line. 56 | """ 57 | 58 | dataset_name: Optional[str] = field( 59 | default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} 60 | ) 61 | dataset_config_name: Optional[str] = field( 62 | default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} 63 | ) 64 | max_seq_length: int = field( 65 | default=128, 66 | metadata={ 67 | "help": "The maximum total input sequence length after tokenization. Sequences longer " 68 | "than this will be truncated, sequences shorter will be padded." 69 | }, 70 | ) 71 | overwrite_cache: bool = field( 72 | default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."} 73 | ) 74 | max_eval_samples: Optional[int] = field( 75 | default=None, 76 | metadata={ 77 | "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " 78 | "value if set." 79 | }, 80 | ) 81 | max_predict_samples: Optional[int] = field( 82 | default=None, 83 | metadata={ 84 | "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this " 85 | "value if set." 86 | }, 87 | ) 88 | train_dir: Optional[str] = field(default=None, metadata={"help": "A directory path for the training data."}) 89 | validation_dir: Optional[str] = field(default=None, metadata={"help": "A directory path for the validation data."}) 90 | 91 | 92 | @dataclass 93 | class ModelArguments: 94 | """ 95 | Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. 96 | """ 97 | 98 | model_name_or_path: str = field( 99 | metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"} 100 | ) 101 | cache_dir: Optional[str] = field( 102 | default=None, 103 | metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"}, 104 | ) 105 | batch_size: int = field( 106 | default=1, 107 | metadata={"help": "The batch size for the model."}, 108 | ) 109 | num_labels: int = field( 110 | default=3, 111 | metadata={"help": "The batch size for the model."}, 112 | ) 113 | 114 | 115 | @dataclass 116 | class OptimizationArguments: 117 | """ 118 | Arguments pertaining to what type of optimization we are going to apply on the model. 119 | """ 120 | 121 | quantization_approach: str = field( 122 | default="static", 123 | metadata={"help": "The quantization approach. Supported approach are static and dynamic."}, 124 | ) 125 | calibration_method: str = field( 126 | default="minmax_asym", 127 | metadata={ 128 | "help": "The method chosen to calculate the activation quantization parameters using the calibration " 129 | "dataset. Current supported calibration methods are minmax, entropy and percentile." 130 | }, 131 | ) 132 | num_calibration_samples: int = field( 133 | default=100, 134 | metadata={"help": "Number of examples to use for the calibration step resulting from static quantization."}, 135 | ) 136 | num_calibration_shards: int = field( 137 | default=1, 138 | metadata={ 139 | "help": "How many shards to split the calibration dataset into. Useful for the entropy and percentile " 140 | "calibration method." 141 | }, 142 | ) 143 | calibration_batch_size: int = field( 144 | default=1, 145 | metadata={"help": "The batch size for the calibration step."}, 146 | ) 147 | calibration_histogram_percentile: float = field( 148 | default=99.999, 149 | metadata={"help": "The percentile used for the percentile calibration method."}, 150 | ) 151 | 152 | 153 | def main(): 154 | # We now keep distinct sets of args, for a cleaner separation of concerns. 155 | parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, OptimizationArguments)) 156 | if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): 157 | # If we pass only one argument to the script and it's the path to a json file, 158 | # let's parse it to get our arguments. 159 | model_args, data_args, training_args, optim_args, onnx_export_args = parser.parse_json_file( 160 | json_file=os.path.abspath(sys.argv[1]) 161 | ) 162 | else: 163 | model_args, data_args, training_args, optim_args = parser.parse_args_into_dataclasses() 164 | 165 | # Setup logging 166 | logging.basicConfig( 167 | format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", 168 | datefmt="%m/%d/%Y %H:%M:%S", 169 | handlers=[logging.StreamHandler(sys.stdout)], 170 | ) 171 | 172 | log_level = training_args.get_process_log_level() 173 | logger.setLevel(log_level) 174 | datasets.utils.logging.set_verbosity(log_level) 175 | transformers.utils.logging.set_verbosity(log_level) 176 | transformers.utils.logging.enable_default_handler() 177 | transformers.utils.logging.enable_explicit_format() 178 | 179 | logger.info(f"Optimization with the following parameters {optim_args}") 180 | 181 | if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir: 182 | raise ValueError( 183 | f"Output directory ({training_args.output_dir}) already exists and is not empty. " 184 | "Use --overwrite_output_dir to overcome." 185 | ) 186 | 187 | # Sanity checks 188 | if data_args.dataset_name is None and data_args.train_dir is None and data_args.validation_dir is None: 189 | raise ValueError("Need either a dataset name or a training/validation folder.") 190 | 191 | os.makedirs(training_args.output_dir, exist_ok=True) 192 | 193 | # Get the datasets: you can either provide your own training and evaluation files (see below) 194 | # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub). 195 | if data_args.dataset_name is not None: 196 | # Downloading and loading a dataset from the hub. 197 | dataset = load_dataset(data_args.dataset_name) 198 | else: 199 | data_files = {} 200 | if data_args.train_dir is not None: 201 | data_files["train"] = os.path.join(data_args.train_dir, "**") 202 | if data_args.validation_dir is not None: 203 | data_files["validation"] = os.path.join(data_args.validation_dir, "**") 204 | dataset = load_dataset( 205 | "imagefolder", 206 | data_files=data_files, 207 | cache_dir=model_args.cache_dir, 208 | task="image-classification", 209 | ) 210 | # See more about loading custom images at 211 | # https://huggingface.co/docs/datasets/v2.0.0/en/image_process#imagefolder. 212 | 213 | labels_column = ( 214 | "labels" if "labels" in dataset["validation"].column_names else dataset["validation"].column_names[1] 215 | ) 216 | 217 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_args.model_name_or_path) 218 | 219 | # Define torchvision transforms to be applied to each image. 220 | normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std) 221 | image_size = feature_extractor.size["shortest_edge"] 222 | transforms = Compose( 223 | [ 224 | Resize(image_size), 225 | CenterCrop(image_size), 226 | ToTensor(), 227 | normalize, 228 | ] 229 | ) 230 | 231 | def preprocess_function(example_batch): 232 | """Apply transforms across a batch.""" 233 | example_batch["pixel_values"] = [ 234 | transforms(image.convert("RGB")).to(torch.float32).numpy() for image in example_batch["image"] 235 | ] 236 | return example_batch 237 | 238 | metric = load("accuracy") 239 | 240 | # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a 241 | # predictions and label_ids field) and has to return a dictionary string to float. 242 | def compute_metrics(p: EvalPrediction): 243 | preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions 244 | preds = np.argmax(preds, axis=1) 245 | 246 | result = metric.compute(predictions=preds, references=p.label_ids) 247 | return result 248 | 249 | # Export the model 250 | export_model_to_onnx( 251 | model_args.model_name_or_path, 252 | save_dir=training_args.output_dir, 253 | input_shape_dict={"pixel_values": [model_args.batch_size, 3, image_size, image_size]}, 254 | output_shape_dict={"logits": [model_args.batch_size, model_args.num_labels]}, 255 | ) 256 | 257 | # Create the quantizer 258 | quantizer = FuriosaAIQuantizer.from_pretrained(training_args.output_dir, file_name="model.onnx") 259 | 260 | # Create the quantization configuration containing all the quantization parameters 261 | qconfig = QuantizationConfig() 262 | 263 | ranges = None 264 | 265 | calibration_dataset = dataset["train"] 266 | if optim_args.num_calibration_samples is not None: 267 | calibration_dataset = calibration_dataset.shuffle(seed=training_args.seed).select( 268 | range(optim_args.num_calibration_samples) 269 | ) 270 | 271 | # all images are loaded in memory, which could prove expensive if num_calibration_samples is large 272 | calibration_dataset = calibration_dataset.map( 273 | partial(preprocess_function), 274 | batched=True, 275 | load_from_cache_file=not data_args.overwrite_cache, 276 | desc="Running preprocessing on calibration dataset", 277 | ) 278 | 279 | # Remove the unnecessary columns of the calibration dataset before the calibration step 280 | calibration_dataset = quantizer.clean_calibration_dataset(calibration_dataset) 281 | 282 | # Create the calibration configuration given the selected calibration method 283 | if optim_args.calibration_method == "percentile_asym": 284 | calibration_config = AutoCalibrationConfig.percentiles_asym( 285 | calibration_dataset, 286 | percentile=optim_args.calibration_histogram_percentile, 287 | ) 288 | else: 289 | calibration_config = AutoCalibrationConfig.minmax_asym(calibration_dataset) 290 | 291 | if not 1 <= optim_args.num_calibration_shards <= len(calibration_dataset): 292 | raise ValueError( 293 | f"Invalid value of number of shards {optim_args.num_calibration_shards} chosen to split the calibration" 294 | f" dataset, should be higher than 0 and lower or equal to the number of samples " 295 | f"{len(calibration_dataset)}." 296 | ) 297 | 298 | for i in range(optim_args.num_calibration_shards): 299 | shard = calibration_dataset.shard(optim_args.num_calibration_shards, i) 300 | quantizer.partial_fit( 301 | dataset=shard, 302 | calibration_config=calibration_config, 303 | batch_size=optim_args.calibration_batch_size, 304 | ) 305 | ranges = quantizer.compute_ranges() 306 | 307 | # Apply quantization on the model 308 | quantizer.quantize( 309 | save_dir=training_args.output_dir, 310 | calibration_tensors_range=ranges, 311 | quantization_config=qconfig, 312 | ) 313 | 314 | # Evaluation 315 | if training_args.do_eval: 316 | logger.info("*** Evaluate ***") 317 | 318 | model_config = AutoConfig.from_pretrained(model_args.model_name_or_path) 319 | eval_dataset = dataset["validation"] 320 | if data_args.max_eval_samples is not None: 321 | eval_dataset = eval_dataset.shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples)) 322 | 323 | try: 324 | eval_dataset = eval_dataset.align_labels_with_mapping( 325 | label2id=model_config.label2id, label_column=labels_column 326 | ) 327 | except Exception: 328 | logger.warning( 329 | f"\nModel label mapping: {model_config.label2id}" 330 | f"\nDataset label features: {eval_dataset.features[labels_column]}" 331 | f"\nCould not guarantee the model label mapping and the dataset labels match." 332 | f" Evaluation results may suffer from a wrong matching." 333 | ) 334 | 335 | # Set the validation transforms 336 | eval_dataset = eval_dataset.with_transform(preprocess_function) 337 | 338 | furiosa_model = FuriosaAIModelForImageClassification( 339 | Path(training_args.output_dir) / "model_quantized.dfg", 340 | compute_metrics=compute_metrics, 341 | label_names=[labels_column], 342 | ) 343 | outputs = furiosa_model.evaluation_loop(eval_dataset) 344 | # Save metrics 345 | with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f: 346 | json.dump(outputs.metrics, f, indent=4, sort_keys=True) 347 | 348 | 349 | if __name__ == "__main__": 350 | main() 351 | -------------------------------------------------------------------------------- /optimum/furiosa/quantization.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | from pathlib import Path 18 | from typing import TYPE_CHECKING, Callable, Dict, Optional, Tuple, Union 19 | 20 | import numpy as np 21 | import onnx 22 | import tqdm 23 | from datasets import Dataset, load_dataset 24 | from transformers import AutoConfig 25 | 26 | from furiosa.optimizer import optimize_model 27 | from furiosa.quantizer import quantize 28 | 29 | from .configuration import CalibrationConfig, FuriosaAIConfig, QuantizationConfig 30 | from .modeling import FuriosaAIModel 31 | from .quantization_base import OptimumQuantizer 32 | 33 | 34 | if TYPE_CHECKING: 35 | from transformers import PretrainedConfig 36 | 37 | LOGGER = logging.getLogger(__name__) 38 | 39 | 40 | class FuriosaAICalibrationDataReader: 41 | __slots__ = ["batch_size", "dataset", "_dataset_iter", "input_datatypes"] 42 | 43 | def __init__(self, dataset: Dataset, input_datatypes, batch_size: int = 1): 44 | if dataset is None: 45 | raise ValueError("Provided dataset is None.") 46 | 47 | if input_datatypes is None: 48 | raise ValueError("Provided input_datatypes is None.") 49 | 50 | if batch_size <= 0: 51 | raise ValueError(f"Provided batch_size should be >= 1 (got: {batch_size}).") 52 | 53 | self.dataset = dataset 54 | self.input_datatypes = input_datatypes 55 | self.batch_size = batch_size 56 | 57 | self._dataset_iter = iter(self.dataset) 58 | 59 | def __len__(self): 60 | return len(self.dataset) // self.batch_size 61 | 62 | def __next__(self): 63 | featurized_samples = None 64 | try: 65 | featurized_samples = [] 66 | for _ in range(self.batch_size): 67 | sample = next(self._dataset_iter) 68 | 69 | input_list = [[] for i in range(len(sample))] 70 | for i, name in enumerate(sample): 71 | input_list[i] += [sample[name]] 72 | input_list = [ 73 | np.array(d, onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[self.input_datatypes[i]]) 74 | for i, d in enumerate(input_list) 75 | ] 76 | 77 | featurized_samples.append(input_list) 78 | 79 | except StopIteration: 80 | raise StopIteration 81 | 82 | if len(featurized_samples) > 0: 83 | return featurized_samples 84 | 85 | raise StopIteration 86 | 87 | def __iter__(self): 88 | return self 89 | 90 | 91 | class FuriosaAIQuantizer(OptimumQuantizer): 92 | """ 93 | Handles the FuriosaAI quantization process for models shared on huggingface.co/models. 94 | """ 95 | 96 | def __init__(self, model_path: Path, config: Optional["PretrainedConfig"] = None): 97 | """ 98 | Args: 99 | model_path (`Path`): 100 | Path to the onnx model files you want to quantize. 101 | config (`Optional[PretrainedConfig]`, *optional*): 102 | The configuration of the model. 103 | """ 104 | super().__init__() 105 | self.model_path = model_path 106 | self.config = config 107 | if self.config is None: 108 | try: 109 | self.config = AutoConfig.from_pretrained(self.model_path.parent) 110 | except OSError: 111 | LOGGER.warning( 112 | f"Could not load the config for {self.model_path} automatically, this might make " 113 | "the quantized model harder to use because it will not be able to be loaded by an FuriosaAIModel without " 114 | "having to specify the configuration explicitly." 115 | ) 116 | self._calibrator = None 117 | self._calibration_config = None 118 | 119 | @classmethod 120 | def from_pretrained( 121 | cls, 122 | model_or_path: Union["FuriosaAIQuantizer", str, Path], 123 | file_name: Optional[str] = None, 124 | ) -> "FuriosaAIQuantizer": 125 | """ 126 | Instantiates a `FuriosaAIQuantizer` from a model path. 127 | 128 | Args: 129 | model_or_path (`Union[FuriosaAIModel, str, Path]`): 130 | Can be either: 131 | - A path to a saved exported ONNX Intermediate Representation (IR) model, e.g., `./my_model_directory/. 132 | - Or an `FuriosaAIModelModelForXX` class, e.g., `FuriosaAIModelModelForImageClassification`. 133 | file_name(`Optional[str]`, *optional*): 134 | Overwrites the default model file name from `"model.onnx"` to `file_name`. 135 | This allows you to load different model files from the same repository or directory. 136 | Returns: 137 | An instance of `FuriosaAIQuantizer`. 138 | """ 139 | furiosa_quantizer_error_message = "FuriosaAIQuantizer does not support multi-file quantization. Please create separate FuriosaAIQuantizer instances for each model/file, by passing the argument `file_name` to FuriosaAIQuantizer.from_pretrained()." 140 | 141 | if isinstance(model_or_path, str): 142 | model_or_path = Path(model_or_path) 143 | 144 | path = None 145 | if isinstance(model_or_path, Path) and file_name is None: 146 | onnx_files = list(model_or_path.glob("*.onnx")) 147 | if len(onnx_files) == 0: 148 | raise FileNotFoundError(f"Could not find any model file in {model_or_path}") 149 | elif len(onnx_files) > 1: 150 | raise RuntimeError( 151 | f"Found too many ONNX model files in {model_or_path}. {furiosa_quantizer_error_message}" 152 | ) 153 | file_name = onnx_files[0].name 154 | 155 | if isinstance(model_or_path, FuriosaAIModel): 156 | if path is None: 157 | if isinstance(model_or_path.model, str) and model_or_path.model.endswith(".onnx"): 158 | path = Path(model_or_path.model) 159 | else: 160 | raise ValueError( 161 | "Currently, quantization of only ONNX files is supported using the optimum-furiosa repository!" 162 | ) 163 | elif os.path.isdir(model_or_path): 164 | path = Path(model_or_path) / file_name 165 | else: 166 | raise ValueError(f"Unable to load model from {model_or_path}.") 167 | return cls(path) 168 | 169 | def fit( 170 | self, 171 | dataset: Dataset, 172 | calibration_config: CalibrationConfig, 173 | batch_size: int = 1, 174 | ) -> Dict[str, Tuple[float, float]]: 175 | """ 176 | Performs the calibration step and computes the quantization ranges. 177 | 178 | Args: 179 | dataset (`Dataset`): 180 | The dataset to use when performing the calibration step. 181 | calibration_config ([`~CalibrationConfig`]): 182 | The configuration containing the parameters related to the calibration step. 183 | batch_size (`int`, *optional*, defaults to 1): 184 | The batch size to use when collecting the quantization ranges values. 185 | 186 | Returns: 187 | The dictionary mapping the nodes name to their quantization ranges. 188 | """ 189 | # If a dataset is provided, then we are in a static quantization mode 190 | LOGGER.info( 191 | f"Using static quantization schema (" 192 | f"dataset: {calibration_config.dataset_name}, method: {calibration_config.method}" 193 | f")" 194 | ) 195 | 196 | self.partial_fit( 197 | dataset, 198 | calibration_config, 199 | batch_size, 200 | ) 201 | return self.compute_ranges() 202 | 203 | def _load_model_and_optimize(self): 204 | model = onnx.load(Path(self.model_path).as_posix()) 205 | self.onnx_model = optimize_model(model) 206 | 207 | def partial_fit(self, dataset: Dataset, calibration_config: CalibrationConfig, batch_size: int = 1): 208 | """ 209 | Performs the calibration step and collects the quantization ranges without computing them. 210 | 211 | Args: 212 | dataset (`Dataset`): 213 | The dataset to use when performing the calibration step. 214 | calibration_config (`CalibrationConfig`): 215 | The configuration containing the parameters related to the calibration step. 216 | batch_size (`int`, *optional*, defaults to 1): 217 | The batch size to use when collecting the quantization ranges values. 218 | """ 219 | self._calibration_config = calibration_config 220 | 221 | # If no calibrator, then create one 222 | if calibration_config.method is not None: 223 | LOGGER.info(f"Creating calibrator: {calibration_config.method}({calibration_config})") 224 | self._load_model_and_optimize() 225 | 226 | self._calibrator = calibration_config.create_calibrator( 227 | model=self.onnx_model, 228 | ) 229 | 230 | def get_input_datatypes(model): 231 | input_datatypes = [] 232 | 233 | for input in model.graph.input: 234 | input_type = input.type.tensor_type.elem_type 235 | input_datatypes.extend([input_type]) 236 | 237 | return input_datatypes 238 | 239 | input_datatypes = get_input_datatypes(self.onnx_model) 240 | 241 | LOGGER.info("Collecting tensors statistics...") 242 | reader = FuriosaAICalibrationDataReader(dataset, input_datatypes, batch_size) 243 | for data in tqdm.tqdm(reader): 244 | self._calibrator.collect_data(data) 245 | 246 | def compute_ranges(self) -> Dict[str, Tuple[float, float]]: 247 | """ 248 | Computes the quantization ranges. 249 | 250 | Returns: 251 | The dictionary mapping the nodes name to their quantization ranges. 252 | """ 253 | if self._calibrator is None: 254 | raise ValueError( 255 | "Calibrator is None, please call `partial_fit` or `fit` method at least ones to compute ranges." 256 | ) 257 | 258 | LOGGER.info("Computing calibration ranges") 259 | return self._calibrator.compute_range() 260 | 261 | def quantize( 262 | self, 263 | quantization_config: QuantizationConfig, 264 | save_dir: Union[str, Path], 265 | file_suffix: Optional[str] = "quantized", 266 | calibration_tensors_range: Optional[Dict[str, Tuple[float, float]]] = None, 267 | ) -> Path: 268 | """ 269 | Quantizes a model given the optimization specifications defined in `quantization_config`. 270 | 271 | Args: 272 | quantization_config (`QuantizationConfig`): 273 | The configuration containing the parameters related to quantization. 274 | save_dir (`Union[str, Path]`): 275 | The directory where the quantized model should be saved. 276 | file_suffix (`Optional[str]`, *optional*, defaults to `"quantized"`): 277 | The file_suffix used to save the quantized model. 278 | calibration_tensors_range (`Optional[Dict[NodeName, Tuple[float, float]]]`, *optional*): 279 | The dictionary mapping the nodes name to their quantization ranges, used and required only when applying 280 | static quantization. 281 | 282 | Returns: 283 | The path of the resulting quantized model. 284 | """ 285 | 286 | save_dir = Path(save_dir) 287 | save_dir.mkdir(parents=True, exist_ok=True) 288 | 289 | if self.onnx_model is None: 290 | self._load_model_and_optimize() 291 | 292 | LOGGER.info("Quantizing model...") 293 | model_quantized = quantize( 294 | self.onnx_model, 295 | calibration_tensors_range, 296 | with_quantize=quantization_config.with_quantize, 297 | normalized_pixel_outputs=quantization_config.normalized_pixel_outputs, 298 | ) 299 | 300 | suffix = f"_{file_suffix}" if file_suffix else "" 301 | quantized_model_path = save_dir.joinpath(f"{self.model_path.stem}{suffix}").with_suffix(".dfg") 302 | LOGGER.info(f"Saving quantized model at: {save_dir}") 303 | with open(quantized_model_path.as_posix(), "wb") as f: 304 | f.write(bytes(model_quantized)) 305 | 306 | # Create and save the configuration summarizing all the parameters related to quantization 307 | furiosa_config = FuriosaAIConfig(quantization=quantization_config, calibration=self._calibration_config) 308 | furiosa_config.save_pretrained(save_dir) 309 | 310 | if self.config is not None: 311 | self.config.save_pretrained(save_dir) 312 | 313 | return Path(save_dir) 314 | 315 | def get_calibration_dataset( 316 | self, 317 | dataset_name: str, 318 | num_samples: int = 100, 319 | dataset_config_name: Optional[str] = None, 320 | dataset_split: Optional[str] = None, 321 | preprocess_function: Optional[Callable] = None, 322 | preprocess_batch: bool = True, 323 | seed: int = 2016, 324 | use_auth_token: bool = False, 325 | ) -> Dataset: 326 | """ 327 | Creates the calibration `datasets.Dataset` to use for the post-training static quantization calibration step. 328 | 329 | Args: 330 | dataset_name (`str`): 331 | The dataset repository name on the Hugging Face Hub or path to a local directory containing data files 332 | to load to use for the calibration step. 333 | num_samples (`int`, *optional*, defaults to 100): 334 | The maximum number of samples composing the calibration dataset. 335 | dataset_config_name (`Optional[str]`, *optional*): 336 | The name of the dataset configuration. 337 | dataset_split (`Optional[str]`, *optional*): 338 | Which split of the dataset to use to perform the calibration step. 339 | preprocess_function (`Optional[Callable]`, *optional*): 340 | Processing function to apply to each example after loading dataset. 341 | preprocess_batch (`bool`, *optional*, defaults to `True`): 342 | Whether the `preprocess_function` should be batched. 343 | seed (`int`, *optional*, defaults to 2016): 344 | The random seed to use when shuffling the calibration dataset. 345 | use_auth_token (`bool`, *optional*, defaults to `False`): 346 | Whether to use the token generated when running `transformers-cli login` (necessary for some datasets 347 | like ImageNet). 348 | Returns: 349 | The calibration `datasets.Dataset` to use for the post-training static quantization calibration 350 | step. 351 | """ 352 | calib_dataset = load_dataset( 353 | dataset_name, 354 | name=dataset_config_name, 355 | split=dataset_split, 356 | use_auth_token=use_auth_token, 357 | ) 358 | 359 | if num_samples is not None: 360 | num_samples = min(num_samples, len(calib_dataset)) 361 | calib_dataset = calib_dataset.shuffle(seed=seed).select(range(num_samples)) 362 | 363 | if preprocess_function is not None: 364 | processed_calib_dataset = calib_dataset.map(preprocess_function, batched=preprocess_batch) 365 | else: 366 | processed_calib_dataset = calib_dataset 367 | 368 | return self.clean_calibration_dataset(processed_calib_dataset) 369 | 370 | def clean_calibration_dataset(self, dataset: Dataset) -> Dataset: 371 | model = onnx.load(self.model_path) 372 | model_inputs = {input.name for input in model.graph.input} 373 | ignored_columns = list(set(dataset.column_names) - model_inputs) 374 | return dataset.remove_columns(ignored_columns) 375 | -------------------------------------------------------------------------------- /optimum/furiosa/modeling_base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 The HuggingFace Team. All rights reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | from pathlib import Path 17 | from shutil import copyfile 18 | from tempfile import TemporaryDirectory 19 | from typing import Dict, Optional, Tuple, Union 20 | 21 | import onnx 22 | from huggingface_hub import hf_hub_download 23 | from transformers import PretrainedConfig 24 | from transformers.file_utils import add_start_docstrings 25 | 26 | # Import Furiosa SDK 27 | from furiosa import optimizer 28 | from furiosa.runtime import session 29 | from furiosa.tools.compiler.api import compile 30 | from optimum.exporters.onnx import main_export 31 | from optimum.modeling_base import OptimizedModel 32 | 33 | from .utils import ( 34 | FURIOSA_ENF_FILE_NAME, 35 | FURIOSA_QUANTIZED_FILE_NAME, 36 | ONNX_WEIGHTS_NAME, 37 | ONNX_WEIGHTS_NAME_STATIC, 38 | maybe_load_preprocessors, 39 | maybe_save_preprocessors, 40 | ) 41 | 42 | 43 | logger = logging.getLogger(__name__) 44 | 45 | 46 | @add_start_docstrings( 47 | """ 48 | Base FuriosaAIModel class. 49 | """, 50 | ) 51 | class FuriosaAIBaseModel(OptimizedModel): 52 | auto_model_class = None 53 | export_feature = None 54 | 55 | def __init__( 56 | self, 57 | model: Union[bytes, str, Path], 58 | config: PretrainedConfig = None, 59 | device: str = None, 60 | furiosa_config: Optional[Dict[str, str]] = None, 61 | model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, 62 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 63 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 64 | **kwargs, 65 | ): 66 | self.config = config 67 | self.model_save_dir = model_save_dir 68 | self.furiosa_config = furiosa_config 69 | self.preprocessors = kwargs.get("preprocessors", []) 70 | enable_compilation = kwargs.get("compile", True) 71 | 72 | self.model = model 73 | 74 | if enable_compilation: 75 | self.model = self.compile(model, input_shape_dict, output_shape_dict) 76 | 77 | self.create_session() 78 | 79 | def _save_pretrained(self, save_directory: Union[str, Path], file_name: Optional[str] = None, **kwargs): 80 | dst_path = Path(save_directory) / FURIOSA_ENF_FILE_NAME 81 | 82 | if isinstance(self.model, (str, Path)): 83 | copyfile(self.model, dst_path) 84 | else: 85 | with open(dst_path, "wb") as f: 86 | f.write(self.model) 87 | 88 | def create_session(self): 89 | """ 90 | Create a Furiosa runtime session for the model. 91 | 92 | Creates a session object using the Furiosa runtime for executing the model. 93 | 94 | Returns: 95 | None 96 | """ 97 | self.sess = session.create(self.model) 98 | self.input_num = self.sess.input_num 99 | self.inputs_to_dtype = [] 100 | for i in range(self.input_num): 101 | self.inputs_to_dtype.append(self.sess.input(i).dtype) 102 | 103 | @classmethod 104 | def _from_pretrained( 105 | cls, 106 | model_id: Union[str, Path], 107 | config: PretrainedConfig, 108 | use_auth_token: Optional[Union[bool, str, None]] = None, 109 | revision: Optional[Union[str, None]] = None, 110 | force_download: bool = False, 111 | cache_dir: Optional[str] = None, 112 | file_name: Optional[str] = None, 113 | subfolder: str = "", 114 | from_onnx: bool = False, 115 | from_quantized: bool = False, 116 | local_files_only: bool = False, 117 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 118 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 119 | **kwargs, 120 | ): 121 | """ 122 | Loads a model and its configuration file from a directory or the Hugging Face Hub. 123 | 124 | Args: 125 | model_id (Union[str, Path]): 126 | The directory from which to load the model. Can be either: 127 | - The model ID of a pretrained model hosted inside a model repo on huggingface.co. 128 | - The path to a directory containing the model weights. 129 | config (PretrainedConfig): 130 | The configuration object associated with the model. 131 | use_auth_token (Union[bool, str, None], defaults to None): 132 | The token to use as HTTP bearer authorization for remote files. Needed to load models from a private repository. 133 | revision (Union[str, None], defaults to None): 134 | The specific model version to use. It can be a branch name, a tag name, or a commit ID. 135 | force_download (bool, defaults to False): 136 | Whether or not to force the (re-)download of the model weights and configuration files, overriding the cached versions if they exist. 137 | cache_dir (str, defaults to None): 138 | The path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used. 139 | file_name (str, defaults to None): 140 | The file name of the model to load. Overwrites the default file name and allows one to load the model with a different name. 141 | subfolder (str, defaults to ""): 142 | The subfolder to load the model. 143 | from_onnx (bool, defaults to False): 144 | Whether the model is being loaded from an ONNX file. 145 | from_quantized (bool, defaults to False): 146 | Whether the model is being loaded from a quantized file. 147 | local_files_only (bool, defaults to False): 148 | Whether or not to only look at local files (i.e., do not try to download the model). 149 | input_shape_dict (Dict[str, Tuple[int]], defaults to None): 150 | A dictionary specifying the input shapes for dynamic models. 151 | output_shape_dict (Dict[str, Tuple[int]], defaults to None): 152 | A dictionary specifying the output shapes for dynamic models. 153 | **kwargs: 154 | Additional keyword arguments to be passed to the underlying model loading function. 155 | 156 | Returns: 157 | An instance of the model class loaded from the specified directory or Hugging Face Hub. 158 | """ 159 | if from_onnx: 160 | default_file_name = ONNX_WEIGHTS_NAME 161 | elif from_quantized: 162 | default_file_name = FURIOSA_QUANTIZED_FILE_NAME 163 | else: 164 | default_file_name = FURIOSA_ENF_FILE_NAME 165 | 166 | file_name = file_name or default_file_name 167 | 168 | # Load the model from local directory 169 | if Path(model_id).is_dir(): 170 | file_path = Path(model_id) / file_name 171 | model_save_dir = model_id 172 | preprocessors = maybe_load_preprocessors(model_id) 173 | # Download the model from the hub 174 | else: 175 | file_path = hf_hub_download( 176 | repo_id=model_id, 177 | filename=file_name, 178 | subfolder=subfolder, 179 | use_auth_token=use_auth_token, 180 | revision=revision, 181 | cache_dir=cache_dir, 182 | force_download=force_download, 183 | local_files_only=local_files_only, 184 | ) 185 | model_save_dir = Path(file_path).parent 186 | preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder) 187 | 188 | model = cls.load_model(file_path, input_shape_dict, output_shape_dict) 189 | 190 | return cls( 191 | model, config=config, model_save_dir=model_save_dir, compile=False, preprocessors=preprocessors, **kwargs 192 | ) 193 | 194 | @classmethod 195 | def _from_transformers( 196 | cls, 197 | model_id: str, 198 | config: PretrainedConfig, 199 | use_auth_token: Optional[Union[bool, str]] = None, 200 | revision: Optional[str] = None, 201 | force_download: bool = False, 202 | cache_dir: Optional[str] = None, 203 | subfolder: str = "", 204 | local_files_only: bool = False, 205 | task: Optional[str] = None, 206 | **kwargs, 207 | ): 208 | """ 209 | Export a vanilla Transformers model into an ONNX model using `transformers.onnx.export_onnx`. 210 | 211 | Arguments: 212 | model_id (`str` or `Path`): 213 | The directory from which to load the model. 214 | Can be either: 215 | - The model id of a pretrained model hosted inside a model repo on huggingface.co. 216 | - The path to a directory containing the model weights. save_dir (`str` or `Path`): 217 | The directory where the exported ONNX model should be saved, default to 218 | `transformers.file_utils.default_cache_path`, which is the cache directory for transformers. 219 | use_auth_token (`str` or `bool`): 220 | Is needed to load models from a private repository 221 | revision (`str`): 222 | Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id 223 | kwargs (`Dict`, *optional*): 224 | kwargs will be passed to the model during initialization 225 | """ 226 | if task is None: 227 | task = cls.export_feature 228 | 229 | save_dir = TemporaryDirectory() 230 | save_dir_path = Path(save_dir.name) 231 | 232 | # Export the model to the ONNX format 233 | main_export( 234 | model_name_or_path=model_id, 235 | output=save_dir_path, 236 | task=task, 237 | do_validation=False, 238 | no_post_process=True, 239 | subfolder=subfolder, 240 | revision=revision, 241 | cache_dir=cache_dir, 242 | use_auth_token=use_auth_token, 243 | local_files_only=local_files_only, 244 | force_download=force_download, 245 | ) 246 | 247 | config.save_pretrained(save_dir_path) 248 | maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder) 249 | 250 | return cls._from_pretrained( 251 | model_id=save_dir_path, 252 | config=config, 253 | from_onnx=True, 254 | use_auth_token=use_auth_token, 255 | revision=revision, 256 | force_download=force_download, 257 | cache_dir=cache_dir, 258 | local_files_only=local_files_only, 259 | **kwargs, 260 | ) 261 | 262 | @classmethod 263 | def load_model( 264 | cls, 265 | model_path: Union[str, Path], 266 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 267 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 268 | ): 269 | """ 270 | Loads and processes a model for use with the Furiosa framework. 271 | 272 | Args: 273 | model_path (Union[str, Path]): 274 | The path to the model file. 275 | input_shape_dict (Dict[str, Tuple[int]], defaults to None): 276 | A dictionary specifying the input shapes for dynamic models. 277 | output_shape_dict (Dict[str, Tuple[int]], defaults to None): 278 | A dictionary specifying the output shapes for dynamic models. 279 | 280 | Returns: 281 | If the model is in the 'onnx' or 'dfg' format, the compiled model in the Furiosa binary format is returned. 282 | If the model is in the 'enf' format, the model path is returned as-is. 283 | 284 | Raises: 285 | ValueError: If the model format is not supported or invalid. 286 | """ 287 | model_path = Path(model_path) 288 | if model_path.suffix in (".onnx", ".dfg"): 289 | compiled_model = cls.compile(model_path, input_shape_dict, output_shape_dict) 290 | return compiled_model 291 | if model_path.suffix == ".enf": 292 | return model_path 293 | 294 | raise ValueError("Invalid model types. Supported formats are 'onnx', 'dfg', or 'enf'.") 295 | 296 | @classmethod 297 | def compile( 298 | cls, 299 | model: Union[str, Path, bytes], 300 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 301 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None, 302 | ): 303 | """ 304 | Compiles the model to the Furiosa binary format. 305 | 306 | Args: 307 | model (Union[str, Path]): 308 | The model to be compiled. 309 | input_shape_dict (Dict[str, Tuple[int]], defaults to None): 310 | A dictionary specifying the input shapes for dynamic models. 311 | output_shape_dict (Dict[str, Tuple[int]], defaults to None): 312 | A dictionary specifying the output shapes for dynamic models. 313 | Returns: 314 | The compiled model in the Furiosa binary format. 315 | 316 | Raises: 317 | ValueError: If the model format is not supported or invalid. 318 | """ 319 | if isinstance(model, (str, Path)): 320 | model = cls._reshape(model, input_shape_dict, output_shape_dict) 321 | input_bytes = Path(model).read_bytes() 322 | else: 323 | input_bytes = model 324 | 325 | logger.info("Compiling the model...") 326 | compiled_model = compile(input_bytes, target_ir="enf") 327 | return compiled_model 328 | 329 | @staticmethod 330 | def _check_is_dynamic(model_path: Union[str, Path]): 331 | is_dynamic = False 332 | if Path(model_path).suffix == ".onnx": 333 | model = onnx.load(model_path) 334 | is_dynamic = any(any(dim.dim_param for dim in inp.type.tensor_type.shape.dim) for inp in model.graph.input) 335 | 336 | return is_dynamic 337 | 338 | @staticmethod 339 | def optimize_model(model: onnx.ModelProto) -> Path: 340 | return optimizer.frontend.onnx.optimize_model(model) 341 | 342 | @staticmethod 343 | def _update_inputs_outputs_dims( 344 | model_path: Union[str, Path], 345 | input_shape_dict: Dict[str, Tuple[int]], 346 | output_shape_dict: Dict[str, Tuple[int]], 347 | ) -> onnx.ModelProto: 348 | from onnx import shape_inference 349 | from onnx.tools import update_model_dims 350 | 351 | model = onnx.load(model_path) 352 | 353 | updated_model = update_model_dims.update_inputs_outputs_dims(model, input_shape_dict, output_shape_dict) 354 | return shape_inference.infer_shapes(updated_model) 355 | 356 | @classmethod 357 | def _reshape( 358 | cls, 359 | model_path: Union[str, Path], 360 | input_shape_dict: Dict[str, Tuple[int]], 361 | output_shape_dict: Dict[str, Tuple[int]], 362 | ) -> Union[str, Path]: 363 | """ 364 | Propagates the given input shapes on the model's layers, fixing the input shapes of the model. 365 | 366 | Args: 367 | model_path (Union[str, Path]): 368 | Path to the model. 369 | input_shape_dict (Dict[str, Tuple[int]]): 370 | Input shapes for the model. 371 | output_shape_dict (Dict[str, Tuple[int]]): 372 | Output shapes for the model. 373 | 374 | Returns: 375 | Union[str, Path]: 376 | Path to the model after updating the input shapes. 377 | 378 | Raises: 379 | ValueError: If the model provided has dynamic axes in input/output and no input/output shape is provided. 380 | """ 381 | if isinstance(model_path, (str, Path)) and Path(model_path).suffix == ".onnx": 382 | is_dynamic = cls._check_is_dynamic(model_path) 383 | if is_dynamic: 384 | if input_shape_dict is None or output_shape_dict is None: 385 | raise ValueError( 386 | "The model provided has dynamic axes in input/output. Please provide input and output shapes for compilation." 387 | ) 388 | 389 | model = cls._update_inputs_outputs_dims(model_path, input_shape_dict, output_shape_dict) 390 | optimized_model = cls.optimize_model(model) 391 | 392 | static_model_path = Path(model_path).parent / ONNX_WEIGHTS_NAME_STATIC 393 | onnx.save(optimized_model, static_model_path) 394 | 395 | return static_model_path 396 | 397 | return model_path 398 | 399 | def forward(self, *args, **kwargs): 400 | raise NotImplementedError 401 | --------------------------------------------------------------------------------