├── .github
└── workflows
│ ├── delete_doc_comment_trigger.yml
│ ├── upload_pr_documentation.yml
│ ├── delete_doc_comment.yml
│ ├── security.yml
│ ├── test.yml
│ ├── check_code_quality.yml
│ └── build_pr_documentation.yml
├── docs
├── README.md
└── source
│ ├── _toctree.yml
│ ├── package_reference
│ ├── quantization.mdx
│ ├── modeling.mdx
│ └── configuration.mdx
│ ├── usage_guides
│ ├── overview.mdx
│ ├── models.mdx
│ └── quantization.mdx
│ ├── installation.md
│ └── index.md
├── optimum
└── furiosa
│ ├── version.py
│ ├── quantization_base.py
│ ├── __init__.py
│ ├── utils.py
│ ├── modeling.py
│ ├── configuration.py
│ ├── quantization.py
│ └── modeling_base.py
├── notebooks
└── quantization
│ └── image-classification
│ └── __init__.py
├── setup.cfg
├── pyproject.toml
├── examples
└── quantization
│ └── image-classification
│ ├── README.md
│ └── run_image_classification.py
├── Makefile
├── README.md
├── .gitignore
├── setup.py
├── tests
├── test_quantization.py
└── test_modeling.py
└── LICENSE
/.github/workflows/delete_doc_comment_trigger.yml:
--------------------------------------------------------------------------------
1 | name: Delete doc comment trigger
2 |
3 | on:
4 | pull_request:
5 | types: [ closed ]
6 |
7 |
8 | jobs:
9 | delete:
10 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main
11 | with:
12 | pr_number: ${{ github.event.number }}
13 |
--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
1 | name: Upload PR Documentation
2 |
3 | on:
4 | workflow_run:
5 | workflows: ["Build PR Documentation"]
6 | types:
7 | - completed
8 |
9 | jobs:
10 | build:
11 | uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12 | with:
13 | package_name: optimum-furiosa
14 | secrets:
15 | hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
17 |
--------------------------------------------------------------------------------
/.github/workflows/delete_doc_comment.yml:
--------------------------------------------------------------------------------
1 | name: Delete PR documentation
2 |
3 | on:
4 | workflow_run:
5 | workflows: ["Delete doc comment trigger"]
6 | types:
7 | - completed
8 | paths:
9 | - "optimum/**.py"
10 | - "docs/**"
11 | - ".github/workflows/build_pr_documentation.yml"
12 | - ".github/workflows/delete_doc_comment.yml"
13 |
14 |
15 | jobs:
16 | delete:
17 | uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main
18 | secrets:
19 | comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
20 |
--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
1 | # Optimum Furiosa documentation
2 |
3 | 1. Setup
4 | ```bash
5 | pip install hf-doc-builder==0.4.0 watchdog --upgrade
6 | ```
7 |
8 | 2. Local Development
9 | ```bash
10 | doc-builder preview optimum.furiosa docs/source/
11 | ```
12 | 3. Build Docs
13 | ```bash
14 | doc-builder build optimum.furiosa docs/source/ --build_dir build/
15 | ```
16 |
17 | ## Add assets/Images
18 |
19 | Adding images/assets is only possible through `https://` links meaning you need to use `https://raw.githubusercontent.com/huggingface/optimum-furiosa/main/docs/assets/` prefix.
20 |
--------------------------------------------------------------------------------
/docs/source/_toctree.yml:
--------------------------------------------------------------------------------
1 | - sections:
2 | - local: index
3 | title: 🤗 Optimum Furiosa
4 | - local: installation
5 | title: Installation
6 | - sections:
7 | - local: usage_guides/overview
8 | title: Overview
9 | - local: usage_guides/models
10 | title: Modeling
11 | - local: usage_guides/quantization
12 | title: Quantization
13 | title: How-To Guides
14 | - sections:
15 | - local: package_reference/modeling
16 | title: Models
17 | - local: package_reference/configuration
18 | title: Configuration
19 | - local: package_reference/quantization
20 | title: Quantization
21 | title: Reference
22 | title: Optimum Furiosa
23 | isExpanded: false
--------------------------------------------------------------------------------
/optimum/furiosa/version.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | __version__ = "0.1.0.dev0"
16 |
--------------------------------------------------------------------------------
/docs/source/package_reference/quantization.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Quantization
14 |
15 | ## FuriosaAIQuantizer
16 |
17 | [[autodoc]] FuriosaAIQuantizer
--------------------------------------------------------------------------------
/notebooks/quantization/image-classification/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 | # Copyright 2023 The HuggingFace Team. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
--------------------------------------------------------------------------------
/docs/source/usage_guides/overview.mdx:
--------------------------------------------------------------------------------
1 |
16 |
17 | # Overview
18 |
19 | Welcome to the 🤗 Optimum Furiosa how-to guides!
20 | These guides tackle more advanced topics and will show you how to easily get the best from NPUs:
21 | - [Accelerating inference](./models)
22 | - [Quantization](./quantization)
23 |
--------------------------------------------------------------------------------
/.github/workflows/security.yml:
--------------------------------------------------------------------------------
1 | name: Security Checks
2 |
3 | on:
4 | push:
5 |
6 | permissions:
7 | contents: read
8 |
9 | jobs:
10 | secrets:
11 | runs-on: ubuntu-latest
12 | steps:
13 | - shell: bash
14 | run: |
15 | if [ "${{ github.event_name }}" == "push" ]; then
16 | echo "depth=$(($(jq length <<< '${{ toJson(github.event.commits) }}') + 2))" >> $GITHUB_ENV
17 | echo "branch=${{ github.ref_name }}" >> $GITHUB_ENV
18 | fi
19 | if [ "${{ github.event_name }}" == "pull_request" ]; then
20 | echo "depth=$((${{ github.event.pull_request.commits }}+2))" >> $GITHUB_ENV
21 | echo "branch=${{ github.event.pull_request.head.ref }}" >> $GITHUB_ENV
22 | fi
23 | - name: Checkout code
24 | uses: actions/checkout@v4
25 | with:
26 | ref: ${{env.branch}}
27 | fetch-depth: ${{env.depth}}
28 | - name: Scan for secrets
29 | uses: trufflesecurity/trufflehog@main
30 |
--------------------------------------------------------------------------------
/docs/source/installation.md:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Installation
14 |
15 | To install 🤗 Optimum Furiosa, you first need to install Furiosa SDK drivers by following the official [installation guide](https://furiosa-ai.github.io/docs/latest/en/software/installation.html). Then, 🤗 Optimum Furiosa can be installed using `pip` as follows:
16 |
17 | ```bash
18 | python -m pip install git+https://github.com/huggingface/optimum-furiosa.git
19 | ```
20 |
--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
1 | name: Test
2 |
3 | on:
4 | push:
5 | branches:
6 | - main
7 | pull_request:
8 | branches:
9 | - main
10 |
11 | jobs:
12 | build:
13 | strategy:
14 | fail-fast: false
15 | matrix:
16 | python-version: [3.8]
17 | os: [self-hosted]
18 | runs-on: ${{ matrix.os }}
19 | steps:
20 | - uses: actions/checkout@v2
21 | - name: Setup Python ${{ matrix.python-version }}
22 | uses: actions/setup-python@v2
23 | with:
24 | python-version: ${{ matrix.python-version }}
25 | - name: Create and start a virtual environment
26 | run: |
27 | python -m venv venv
28 | source venv/bin/activate
29 | - name: Install dependencies
30 | run: |
31 | source venv/bin/activate
32 | python -m pip install --upgrade pip
33 | pip install .[testing]
34 | - name: Test with Pytest
35 | run: |
36 | source venv/bin/activate
37 | pytest -s tests/
38 | - name: Cleanup
39 | run: |
40 | rm -rf venv
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [isort]
2 | default_section = FIRSTPARTY
3 | ensure_newline_before_comments = True
4 | force_grid_wrap = 0
5 | include_trailing_comma = True
6 | known_first_party = transformers
7 | known_third_party =
8 | absl
9 | conllu
10 | datasets
11 | elasticsearch
12 | fairseq
13 | faiss-cpu
14 | fastprogress
15 | fire
16 | fugashi
17 | git
18 | h5py
19 | matplotlib
20 | nltk
21 | numpy
22 | packaging
23 | pandas
24 | PIL
25 | psutil
26 | pytest
27 | pytorch_lightning
28 | rouge_score
29 | sacrebleu
30 | seqeval
31 | sklearn
32 | streamlit
33 | tensorboardX
34 | tensorflow
35 | tensorflow_datasets
36 | timeout_decorator
37 | torch
38 | torchaudio
39 | torchtext
40 | torchvision
41 | torch_xla
42 | tqdm
43 |
44 | line_length = 119
45 | lines_after_imports = 2
46 | multi_line_output = 3
47 | use_parentheses = True
48 |
49 | [flake8]
50 | ignore = E203, E501, E741, W503, W605
51 | max-line-length = 119
52 |
53 | [tool:pytest]
54 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS
--------------------------------------------------------------------------------
/docs/source/package_reference/modeling.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Models
14 |
15 | ## Generic model classes
16 |
17 | The following Furiosa classes are available for instantiating a base model class without a specific head.
18 |
19 | ### FuriosaAIModel
20 |
21 | [[autodoc]] FuriosaAIModel
22 |
23 | ## Computer vision
24 |
25 | The following classes are available for the following computer vision tasks.
26 |
27 | ### FuriosaAIModelForImageClassification
28 |
29 | [[autodoc]] FuriosaAIModelForImageClassification
30 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | [tool.black]
16 | line-length = 119
17 | target-version = ['py37']
18 |
19 | [tool.ruff]
20 | # Never enforce `E501` (line length violations).
21 | ignore = ["C901", "E501", "E741", "W605"]
22 | select = ["C", "E", "F", "I", "W"]
23 | line-length = 119
24 |
25 | # Ignore import violations in all `__init__.py` files.
26 | [tool.ruff.per-file-ignores]
27 | "__init__.py" = ["E402", "F401", "F403", "F811"]
28 |
29 | [tool.ruff.isort]
30 | lines-after-imports = 2
31 | known-first-party = ["optimum"]
--------------------------------------------------------------------------------
/.github/workflows/check_code_quality.yml:
--------------------------------------------------------------------------------
1 | name: Check code quality
2 |
3 | on:
4 | push:
5 | branches: [ main ]
6 | paths:
7 | - "optimum/**.py"
8 | - "tests/**.py"
9 | - "examples/**.py"
10 |
11 | pull_request:
12 | branches: [ main ]
13 | paths:
14 | - "optimum/**.py"
15 | - "tests/**.py"
16 | - "examples/**.py"
17 |
18 | jobs:
19 | build:
20 | strategy:
21 | fail-fast: false
22 | matrix:
23 | python-version: ['3.8']
24 | os: [ubuntu-22.04]
25 |
26 | runs-on: ${{ matrix.os }}
27 | steps:
28 | - uses: actions/checkout@v2
29 | - name: Setup Python ${{ matrix.python-version }}
30 | uses: actions/setup-python@v2
31 | with:
32 | python-version: ${{ matrix.python-version }}
33 | - name: Create and start a virtual environment
34 | run: |
35 | python -m venv venv
36 | source venv/bin/activate
37 | - name: Install dependencies
38 | run: |
39 | source venv/bin/activate
40 | pip install --upgrade pip
41 | pip install black ruff
42 | - name: Check style with black
43 | run: |
44 | source venv/bin/activate
45 | black --check .
46 | - name: Check style with ruff
47 | run: |
48 | source venv/bin/activate
49 | ruff .
--------------------------------------------------------------------------------
/docs/source/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Configuration
14 |
15 | The configuration classes are the way to specify how a task should be done. Here is how a quantization can be configered:
16 |
17 | 1. Quantization: Performed by the [`~furiosa.FuriosaQuantizer`], quantization can be set using a [`~furiosa.configuration.QuantizationConfig`]. A calibration step is required in some cases (post training static quantization), which can be specified using a [`~furiosa.configuration.CalibrationConfig`].
18 |
19 | ## QuantizationConfig
20 |
21 | [[autodoc]] configuration.QuantizationConfig
22 |
23 | ## CalibrationConfig
24 |
25 | [[autodoc]] configuration.CalibrationConfig
26 |
27 | ## FuriosaConfig
28 |
29 | [[autodoc]] configuration.FuriosaAIConfig
--------------------------------------------------------------------------------
/examples/quantization/image-classification/README.md:
--------------------------------------------------------------------------------
1 |
16 |
17 | # Image classification
18 |
19 | The script [`run_image_classification.py`](https://github.com/huggingface/optimum-furiosa/blob/main/examples/quantization/image_classification/run_image_classification.py) allows us to apply different quantization using [FuriosaAI SDK](https://furiosa-ai.github.io/docs/latest/en/software/quantization.html) for image classification tasks.
20 |
21 | The following example applies quantization on a Resnet model fine-tuned on the beans classification dataset.
22 |
23 | ```bash
24 | python run_image_classification.py \
25 | --model_name_or_path eugenecamus/resnet-50-base-beans-demo \
26 | --dataset_name beans \
27 | --do_eval \
28 | --output_dir /tmp/image_classification_resnet_beans
29 | ```
30 |
--------------------------------------------------------------------------------
/optimum/furiosa/quantization_base.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import logging
16 | from abc import ABC, abstractmethod
17 | from pathlib import Path
18 | from typing import Optional, Union
19 |
20 |
21 | logger = logging.getLogger(__name__)
22 |
23 |
24 | class OptimumQuantizer(ABC):
25 | @classmethod
26 | def from_pretrained(
27 | cls,
28 | model_or_path: Union[str, Path],
29 | file_name: Optional[str] = None,
30 | ):
31 | """Overwrite this method in subclass to define how to load your model from pretrained"""
32 | raise NotImplementedError(
33 | "Overwrite this method in subclass to define how to load your model from pretrained for quantization"
34 | )
35 |
36 | @abstractmethod
37 | def quantize(self, save_dir: Union[str, Path], file_prefix: Optional[str] = None, **kwargs):
38 | """Overwrite this method in subclass to define how to quantize your model for quantization"""
39 | raise NotImplementedError(
40 | "Overwrite this method in subclass to define how to quantize your model for quantization"
41 | )
42 |
--------------------------------------------------------------------------------
/optimum/furiosa/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from typing import TYPE_CHECKING
16 |
17 | from transformers.utils import OptionalDependencyNotAvailable, _LazyModule
18 |
19 | from .utils import FURIOSA_ENF_FILE_NAME
20 |
21 |
22 | _import_structure = {
23 | "configuration": [
24 | "CalibrationConfig",
25 | "AutoCalibrationConfig",
26 | "QuantizationMode",
27 | "FuriosaAIConfig",
28 | "QuantizationConfig",
29 | ],
30 | "modeling": [
31 | "FuriosaAIModel",
32 | "FuriosaAIModelForImageClassification",
33 | ],
34 | "quantization": ["FuriosaAIQuantizer"],
35 | "utils": [
36 | "export_model_to_onnx",
37 | ],
38 | "version": ["__version__"],
39 | }
40 |
41 | # Direct imports for type-checking
42 | if TYPE_CHECKING:
43 | from .configuration import FuriosaAIConfig, QuantizationConfig
44 | from .modeling import (
45 | FuriosaAIModelForImageClassification,
46 | )
47 | from .quantization import FuriosaAIQuantizer
48 | from .utils import export_model_to_onnx
49 | from .version import __version__
50 | else:
51 | import sys
52 |
53 | sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
54 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | SHELL := /bin/bash
15 | CURRENT_DIR = $(shell pwd)
16 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum-furiosa.git
17 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL
18 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
19 |
20 | .PHONY: style test
21 |
22 | # Run code quality checks
23 | style_check:
24 | black --check .
25 | ruff .
26 |
27 | style:
28 | black .
29 | ruff . --fix
30 |
31 | # Run tests for the library
32 | test:
33 | python -m pytest tests
34 |
35 | # Utilities to release to PyPi
36 | build_dist_install_tools:
37 | pip install build
38 | pip install twine
39 |
40 | build_dist:
41 | rm -fr build
42 | rm -fr dist
43 | python -m build
44 |
45 | pypi_upload: build_dist
46 | python -m twine upload dist/*
47 |
48 | build_doc_docker_image:
49 | docker build -t doc_maker --build-arg commit_sha=$(COMMIT_SHA_SUBPACKAGE) --build-arg clone_url=$(REAL_CLONE_URL) ./docs
50 |
51 | doc: build_doc_docker_image
52 | @test -n "$(BUILD_DIR)" || (echo "BUILD_DIR is empty." ; exit 1)
53 | @test -n "$(VERSION)" || (echo "VERSION is empty." ; exit 1)
54 | docker run -v $(CURRENT_DIR):/doc_folder --workdir=/doc_folder doc_maker \
55 | doc-builder build optimum.furiosa /optimum-furiosa/docs/source/ \
56 | --build_dir $(BUILD_DIR) \
57 | --version $(VERSION) \
58 | --version_tag_suffix "" \
59 | --html \
60 | --clean
--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
1 |
16 |
17 | # 🤗 Optimum Furiosa
18 |
19 | 🤗 Optimum Furiosa is the interface between the 🤗 Transformers library and Furiosa NPUs [Furiosa Warboy](https://furiosa-ai.github.io/docs/latest/en/npu/intro.html#furiosaai-warboy).
20 | It provides a set of tools enabling easy model loading and inference for different downstream tasks.
21 |
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://github.com/huggingface/optimum-furiosa/actions/workflows/test.yml)
2 |
3 |
4 | # optimum-furiosa
5 | Accelerated inference of 🤗 models using FuriosaAI NPU chips.
6 |
7 | ## Furiosa SDK setup
8 | A Furiosa SDK environment needs to be enabled to use this library. Please refer to Furiosa's [Installation](https://furiosa-ai.github.io/docs/latest/en/software/installation.html) guide.
9 |
10 | ## Install
11 | Optimum Furiosa is a fast-moving project, and you may want to install from source.
12 |
13 | `pip install git+https://github.com/huggingface/optimum-furiosa.git`
14 |
15 | ### Installing in developer mode
16 |
17 | If you are working on the `optimum-furiosa` code then you should use an editable install
18 | by cloning and installing `optimum` and `optimum-furiosa`:
19 |
20 | ```
21 | git clone https://github.com/huggingface/optimum
22 | git clone https://github.com/huggingface/optimum-furiosa
23 | pip install -e optimum -e optimum-furiosa
24 | ```
25 |
26 | Now whenever you change the code, you'll be able to run with those changes instantly.
27 |
28 |
29 | ## How to use it?
30 | To load a model and run inference with Furiosa NPU, you can just replace your `AutoModelForXxx` class with the corresponding `FuriosaAIModelForXxx` class.
31 |
32 | ```diff
33 | import requests
34 | from PIL import Image
35 |
36 | - from transformers import AutoModelForImageClassification
37 | + from optimum.furiosa import FuriosaAIModelForImageClassification
38 | from transformers import AutoFeatureExtractor, pipeline
39 |
40 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
41 | image = Image.open(requests.get(url, stream=True).raw)
42 |
43 | model_id = "microsoft/resnet-50"
44 | - model = AutoModelForImageClassification.from_pretrained(model_id)
45 | + model = FuriosaAIModelForImageClassification.from_pretrained(model_id, export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},)
46 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
47 | cls_pipe = pipeline("image-classification", model=model, feature_extractor=feature_extractor)
48 | outputs = cls_pipe(image)
49 | ```
50 |
51 | If you find any issue while using those, please open an issue or a pull request.
52 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
1 | name: Build PR documentation
2 |
3 | on:
4 | pull_request:
5 | branches: [ main ]
6 | paths:
7 | - "optimum/**.py"
8 | - "docs/**"
9 | - ".github/workflows/build_pr_documentation.yml"
10 | - ".github/workflows/delete_doc_comment.yml"
11 |
12 | concurrency:
13 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
14 | cancel-in-progress: true
15 |
16 | jobs:
17 | build_documentation:
18 | runs-on: self-hosted
19 | env:
20 | COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
21 | PR_NUMBER: ${{ github.event.number }}
22 | EVENT_CONTEXT: ${{ toJSON(github.event) }}
23 | PR_CLONE_URL: ${{ github.event.pull_request.head.repo.clone_url }}
24 |
25 | steps:
26 | - uses: actions/checkout@v2
27 | with:
28 | repository: 'huggingface/doc-builder'
29 | path: doc-builder
30 |
31 | - uses: actions/checkout@v2
32 | with:
33 | repository: 'huggingface/optimum-furiosa'
34 | path: optimum-furiosa
35 |
36 | - name: Create and start a virtual environment
37 | run: |
38 | python -m venv venv_docs
39 | source venv_docs/bin/activate
40 | python -m pip install --upgrade pip
41 |
42 | - name: Setup environment
43 | run: |
44 | source venv_docs/bin/activate
45 | pip uninstall -y doc-builder
46 | cd doc-builder
47 | git pull origin main
48 | pip install .
49 | pip install black
50 | cd ..
51 | cd optimum-furiosa
52 | pip install .
53 | cd ..
54 |
55 | - name: Make documentation
56 | run: |
57 | source venv_docs/bin/activate
58 | cd optimum-furiosa
59 | doc-builder build optimum.furiosa docs/source/ --build_dir furiosa-doc-build --version pr_$PR_NUMBER --version_tag_suffix "" --html --clean
60 | cd ..
61 |
62 | - name: Save commit_sha & pr_number
63 | run: |
64 | source venv_docs/bin/activate
65 | cd optimum-furiosa/furiosa-doc-build
66 | sudo mv optimum.furiosa optimum-furiosa
67 | echo ${{ env.COMMIT_SHA }} > ./commit_sha
68 | echo ${{ env.PR_NUMBER }} > ./pr_number
69 |
70 | - uses: actions/upload-artifact@v3
71 | with:
72 | name: doc-build-artifact
73 | path: optimum-furiosa/furiosa-doc-build/
74 |
75 | - name: Cleanup
76 | run: |
77 | rm -rf venv_docs
78 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | import re
2 |
3 | from setuptools import find_namespace_packages, setup
4 |
5 |
6 | # Ensure we match the version set in optimum/furiosa/version.py
7 | try:
8 | filepath = "optimum/furiosa/version.py"
9 | with open(filepath) as version_file:
10 | (__version__,) = re.findall('__version__ = "(.*)"', version_file.read())
11 | except Exception as error:
12 | assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)
13 |
14 | INSTALL_REQUIRE = [
15 | "optimum==1.8.0",
16 | "transformers>=4.20.0",
17 | "datasets>=1.4.0",
18 | "furiosa-optimizer",
19 | "furiosa-quantizer==0.9.0",
20 | "furiosa-quantizer-impl==0.9.1",
21 | "furiosa-sdk",
22 | "onnx>=1.12.0",
23 | "sentencepiece",
24 | "scipy",
25 | ]
26 |
27 | TESTS_REQUIRE = ["pytest", "parameterized", "Pillow", "evaluate", "diffusers", "py-cpuinfo"]
28 |
29 | QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
30 |
31 | EXTRA_REQUIRE = {
32 | "testing": [
33 | "filelock",
34 | "GitPython",
35 | "parameterized",
36 | "psutil",
37 | "pytest",
38 | "pytest-pythonpath",
39 | "pytest-xdist",
40 | "Pillow",
41 | "librosa",
42 | "soundfile",
43 | ],
44 | "quality": QUALITY_REQUIRE,
45 | }
46 |
47 | setup(
48 | name="optimum-furiosa",
49 | version=__version__,
50 | description="Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to "
51 | "integrate third-party libraries from Hardware Partners and interface with their specific "
52 | "functionality.",
53 | long_description=open("README.md", "r", encoding="utf-8").read(),
54 | long_description_content_type="text/markdown",
55 | classifiers=[
56 | "Development Status :: 5 - Production/Stable",
57 | "License :: OSI Approved :: Apache Software License",
58 | "Intended Audience :: Developers",
59 | "Intended Audience :: Education",
60 | "Intended Audience :: Science/Research",
61 | "Operating System :: OS Independent",
62 | "Programming Language :: Python :: 3.7",
63 | "Programming Language :: Python :: 3.8",
64 | "Programming Language :: Python :: 3.9",
65 | "Topic :: Scientific/Engineering :: Artificial Intelligence",
66 | ],
67 | keywords="transformers, quantization, pruning, knowledge distillation, optimization, training",
68 | url="https://huggingface.co/hardware",
69 | author="HuggingFace Inc. Special Ops Team",
70 | author_email="hardware@huggingface.co",
71 | license="Apache",
72 | packages=find_namespace_packages(include=["optimum*"]),
73 | install_requires=INSTALL_REQUIRE,
74 | extras_require=EXTRA_REQUIRE,
75 | include_package_data=True,
76 | zip_safe=False,
77 | entry_points={"console_scripts": ["optimum-cli=optimum.commands.optimum_cli:main"]},
78 | )
79 |
--------------------------------------------------------------------------------
/docs/source/usage_guides/models.mdx:
--------------------------------------------------------------------------------
1 | # Optimum Inference with Furiosa NPU
2 |
3 | Optimum Furiosa is a utility package for building and running inference with Furiosa NPUs.
4 | Optimum can be used to load optimized models from the [Hugging Face Hub](hf.co/models) and create pipelines
5 | to run accelerated inference without rewriting your APIs.
6 |
7 | ## Switching from Transformers to Optimum Furiosa
8 |
9 | The `optimum.furiosa.FuriosaAIModelForXXX` model classes are API compatible with Hugging Face models. This
10 | means you can just replace your `AutoModelForXXX` class with the corresponding `FuriosaAIModelForXXX` class in `optimum.furiosa`.
11 |
12 | You do not need to adapt your code to get it to work with `FuriosaAIModelForXXX` classes:
13 |
14 | Because the model you want to work with might not be already converted to ONNX, [`~optimum.furiosa.FuriosaAIModel`]
15 | includes a method to convert vanilla Hugging Face models to ONNX ones. Simply pass `export=True` to the
16 | [`~optimum.furiosa.FuriosaAIModel.from_pretrained`] method, and your model will be loaded and converted to ONNX on-the-fly:
17 |
18 | ### Loading and inference of a vanilla Transformers model
19 |
20 | ```diff
21 | import requests
22 | from PIL import Image
23 |
24 | - from transformers import AutoModelForImageClassification
25 | + from optimum.furiosa import FuriosaAIModelForImageClassification
26 | from transformers import AutoFeatureExtractor, pipeline
27 |
28 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
29 | image = Image.open(requests.get(url, stream=True).raw)
30 |
31 | model_id = "microsoft/resnet-50"
32 | - model = AutoModelForImageClassification.from_pretrained(model_id)
33 | + model = FuriosaAIModelForImageClassification.from_pretrained(model_id, export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},)
34 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
35 | cls_pipe = pipeline("image-classification", model=model, feature_extractor=feature_extractor)
36 | outputs = cls_pipe(image)
37 | ```
38 |
39 |
40 | ### Pushing compiled models to the Hugging Face Hub
41 |
42 | It is also possible, just as with regular [`~transformers.PreTrainedModel`]s, to push your `FurisoaAIModelForXXX` to the
43 | [Hugging Face Model Hub](https://hf.co/models):
44 |
45 | ```python
46 | >>> from optimum.furiosa import FuriosaAIModelForImageClassification
47 |
48 | >>> # Load the model from the hub
49 | >>> model = FuriosaAIModelForImageClassification.from_pretrained(
50 | ... "microsoft/resnet-50", export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},
51 | ... )
52 |
53 | >>> # Save the converted model
54 | >>> model.save_pretrained("a_local_path_for_compiled_model")
55 |
56 | # Push the compiled model to HF Hub
57 | >>> model.push_to_hub( # doctest: +SKIP
58 | ... "a_local_path_for_compiled_model", repository_id="my-furiosa-repo", use_auth_token=True
59 | ... )
60 | ```
--------------------------------------------------------------------------------
/docs/source/usage_guides/quantization.mdx:
--------------------------------------------------------------------------------
1 |
12 |
13 | # Quantization
14 |
15 | 🤗 Optimum provides an `optimum.furiosa` package that enables you to apply quantization on many models hosted on
16 | the Hugging Face Hub using the [Furiosa](https://furiosa-ai.github.io/docs/latest/en/software/quantization.html)
17 | quantization tool.
18 |
19 | The quantization process is abstracted via the [`~optimum.furiosa.FuriosaAIConfig`] and
20 | the [`~optimum.furiosa.FuriosaAIQuantizer`] classes. The former allows you to specify how quantization should be done,
21 | while the latter effectively handles quantization.
22 |
23 | ## Static Quantization example
24 |
25 | The [`~optimum.furiosa.FuriosaAIQuantizer`] class can be used to quantize statically your ONNX model. Below you will find
26 | an easy end-to-end example on how to quantize statically
27 | [eugenecamus/resnet-50-base-beans-demo](https://huggingface.co/eugenecamus/resnet-50-base-beans-demo).
28 |
29 | ```python
30 | >>> from functools import partial
31 | >>> from pathlib import Path
32 | >>> from transformers import AutoFeatureExtractor
33 | >>> from optimum.furiosa import FuriosaAIQuantizer, FuriosaAIModelForImageClassification
34 | >>> from optimum.furiosa.configuration import AutoCalibrationConfig
35 | >>> from optimum.furiosa.utils import export_model_to_onnx
36 |
37 | >>> model_id = "eugenecamus/resnet-50-base-beans-demo"
38 |
39 | # Convert PyTorch model convert to ONNX and create Quantizer and setup config
40 |
41 | >>> feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
42 |
43 | >>> batch_size = 1
44 | >>> image_size = feature_extractor.size["shortest_edge"]
45 | >>> num_labels = 3
46 | >>> onnx_model_name = "model.onnx"
47 | >>> output_dir = "output"
48 | >>> onnx_model_path = Path(output_dir) / onnx_model_name
49 |
50 | >>> export_model_to_onnx(
51 | ... model_id,
52 | ... save_dir=output_dir,
53 | ... input_shape_dict={"pixel_values": [batch_size, 3, image_size, image_size]},
54 | ... output_shape_dict={"logits": [batch_size, num_labels]},
55 | ... file_name=onnx_model_name,
56 | )
57 | >>> quantizer = FuriosaAIQuantizer.from_pretrained(output_dir, file_name=onnx_model_name)
58 | >>> qconfig = QuantizationConfig()
59 |
60 | # Create the calibration dataset
61 | >>> def preprocess_fn(ex, feature_extractor):
62 | ... return feature_extractor(ex["image"])
63 |
64 | >>> calibration_dataset = quantizer.get_calibration_dataset(
65 | ... "beans",
66 | ... preprocess_function=partial(preprocess_fn, feature_extractor=feature_extractor),
67 | ... num_samples=50,
68 | ... dataset_split="train",
69 | ... )
70 |
71 | # Create the calibration configuration containing the parameters related to calibration.
72 | >>> calibration_config = AutoCalibrationConfig.mse_asym(calibration_dataset)
73 |
74 | # Perform the calibration step: computes the activations quantization ranges
75 | >>> ranges = quantizer.fit(
76 | ... dataset=calibration_dataset,
77 | ... calibration_config=calibration_config,
78 | ... )
79 |
80 | # Apply static quantization on the model
81 | >>> model_quantized_path = quantizer.quantize(
82 | ... save_dir=output,
83 | ... calibration_tensors_range=ranges,
84 | ... quantization_config=qconfig,
85 | ... )
86 | ```
87 |
--------------------------------------------------------------------------------
/tests/test_quantization.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import os
16 | import tempfile
17 | import unittest
18 | from functools import partial
19 | from pathlib import Path
20 |
21 | import requests
22 | from parameterized import parameterized
23 | from PIL import Image
24 | from transformers import AutoFeatureExtractor
25 |
26 | from optimum.furiosa import (
27 | AutoCalibrationConfig,
28 | FuriosaAIConfig,
29 | FuriosaAIModelForImageClassification,
30 | FuriosaAIQuantizer,
31 | QuantizationConfig,
32 | )
33 | from optimum.furiosa.utils import export_model_to_onnx
34 |
35 |
36 | class FuriosaAIQuantizationTest(unittest.TestCase):
37 | SUPPORTED_ARCHITECTURES = ((FuriosaAIModelForImageClassification, "fxmarty/resnet-tiny-beans"),)
38 |
39 | @parameterized.expand(SUPPORTED_ARCHITECTURES)
40 | def test_quantization(self, model_cls, model_name):
41 | qconfig = QuantizationConfig()
42 |
43 | def preprocess_fn(ex, feature_extractor):
44 | return feature_extractor(ex["image"])
45 |
46 | with tempfile.TemporaryDirectory() as tmp_dir:
47 | output_dir = Path(tmp_dir)
48 | export_model_to_onnx(
49 | model_name,
50 | save_dir=tmp_dir,
51 | input_shape_dict={"pixel_values": [1, 3, 224, 224]},
52 | output_shape_dict={"logits": [1, 3]},
53 | file_name="model.onnx",
54 | )
55 |
56 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
57 |
58 | quantizer = FuriosaAIQuantizer.from_pretrained(tmp_dir, file_name="model.onnx")
59 |
60 | calibration_dataset = quantizer.get_calibration_dataset(
61 | "beans",
62 | preprocess_function=partial(preprocess_fn, feature_extractor=feature_extractor),
63 | num_samples=10,
64 | dataset_split="train",
65 | )
66 |
67 | calibration_config = AutoCalibrationConfig.mse_asym(calibration_dataset)
68 | ranges = quantizer.fit(
69 | dataset=calibration_dataset,
70 | calibration_config=calibration_config,
71 | )
72 |
73 | quantizer.quantize(
74 | save_dir=output_dir,
75 | calibration_tensors_range=ranges,
76 | quantization_config=qconfig,
77 | )
78 |
79 | expected_fai_config = FuriosaAIConfig(quantization=qconfig, calibration=calibration_config)
80 | fai_config = FuriosaAIConfig.from_pretrained(tmp_dir)
81 | # Verify the FuriosaAIConfig was correctly created and saved
82 | self.assertEqual(fai_config.to_dict(), expected_fai_config.to_dict())
83 |
84 | assert os.path.isfile(output_dir.joinpath("model_quantized.dfg")) is True
85 |
86 | fai_model_quantized = model_cls(Path(output_dir) / "model_quantized.dfg")
87 |
88 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
89 | image = Image.open(requests.get(url, stream=True).raw)
90 | inputs = feature_extractor(images=image, return_tensors="np")
91 |
92 | fai_outputs = fai_model_quantized(**inputs)
93 | self.assertIn("logits", fai_outputs)
94 |
--------------------------------------------------------------------------------
/optimum/furiosa/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | from pathlib import Path
17 | from typing import List, Union
18 |
19 | import numpy as np
20 | from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
21 |
22 | from furiosa.runtime.tensor import DataType
23 | from optimum.exporters.onnx import main_export
24 |
25 |
26 | ONNX_WEIGHTS_NAME = "model.onnx"
27 | ONNX_WEIGHTS_NAME_STATIC = "model_static.onnx"
28 | FURIOSA_ENF_FILE_NAME = "model.enf"
29 | FURIOSA_QUANTIZED_FILE_NAME = "model_quantized.dfg"
30 |
31 | MAX_ONNX_OPSET_2022_2_0 = 10
32 | MAX_ONNX_OPSET = 13
33 | MIN_ONNX_QDQ_OPSET = 13
34 |
35 | WARBOY_DEVICE = "warboy"
36 |
37 | FURIOSA_DTYPE_TO_NUMPY_DTYPE = {
38 | DataType.UINT8: np.uint8,
39 | DataType.INT8: np.int8,
40 | DataType.FLOAT32: np.float32,
41 | }
42 |
43 | _HEAD_TO_AUTOMODELS = {
44 | "image-classification": "FuriosaAIModelForImageClassification",
45 | }
46 |
47 |
48 | def export_model_to_onnx(model_id, save_dir, input_shape_dict, output_shape_dict, file_name="model.onnx"):
49 | task = "image-classification"
50 | main_export(model_id, save_dir, task=task)
51 |
52 | import onnx
53 | from onnx import shape_inference
54 | from onnx.tools import update_model_dims
55 |
56 | save_dir_path = Path(save_dir) / "model.onnx"
57 | model = onnx.load(save_dir_path)
58 | updated_model = update_model_dims.update_inputs_outputs_dims(model, input_shape_dict, output_shape_dict)
59 | inferred_model = shape_inference.infer_shapes(updated_model)
60 |
61 | static_model_path = Path(save_dir_path).parent / file_name
62 | onnx.save(inferred_model, static_model_path)
63 |
64 |
65 | def maybe_load_preprocessors(src_name_or_path: Union[str, Path], subfolder: str = "") -> List:
66 | preprocessors = []
67 | try:
68 | preprocessors.append(AutoTokenizer.from_pretrained(src_name_or_path, subfolder=subfolder))
69 | except Exception:
70 | pass
71 |
72 | try:
73 | preprocessors.append(AutoProcessor.from_pretrained(src_name_or_path, subfolder=subfolder))
74 | except Exception:
75 | pass
76 |
77 | try:
78 | preprocessors.append(AutoFeatureExtractor.from_pretrained(src_name_or_path, subfolder=subfolder))
79 | except Exception:
80 | pass
81 | return preprocessors
82 |
83 |
84 | def maybe_save_preprocessors(src_name_or_path: Union[str, Path], dest_dir: Union[str, Path], src_subfolder: str = ""):
85 | """
86 | Saves the tokenizer, the processor and the feature extractor when found in `src_dir` in `dest_dir`.
87 |
88 | Args:
89 | src_dir (`Union[str, Path]`):
90 | The source directory from which to copy the files.
91 | dest_dir (`Union[str, Path]`):
92 | The destination directory to copy the files to.
93 | src_subfolder (`str`, defaults to `""`):
94 | In case the preprocessor files are located inside a subfolder of the model directory / repo on the Hugging
95 | Face Hub, you can specify the subfolder name here.
96 | """
97 | if not isinstance(dest_dir, Path):
98 | dest_dir = Path(dest_dir)
99 |
100 | dest_dir.mkdir(exist_ok=True)
101 | for preprocessor in maybe_load_preprocessors(src_name_or_path, subfolder=src_subfolder):
102 | preprocessor.save_pretrained(dest_dir)
103 |
--------------------------------------------------------------------------------
/tests/test_modeling.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
3 | #
4 | # Licensed under the Apache License, Version 2.0 (the "License");
5 | # you may not use this file except in compliance with the License.
6 | # You may obtain a copy of the License at
7 | #
8 | # http://www.apache.org/licenses/LICENSE-2.0
9 | #
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | import gc
16 | import os
17 | import tempfile
18 | import unittest
19 |
20 | import numpy as np
21 | import requests
22 | import torch
23 | from parameterized import parameterized
24 | from PIL import Image
25 | from transformers import AutoFeatureExtractor, AutoModelForImageClassification, PretrainedConfig, pipeline, set_seed
26 |
27 | from optimum.furiosa import FuriosaAIModelForImageClassification
28 | from optimum.furiosa.utils import FURIOSA_ENF_FILE_NAME
29 | from optimum.utils import (
30 | logging,
31 | )
32 |
33 |
34 | SEED = 42
35 |
36 | logger = logging.get_logger()
37 |
38 | MODEL_DICT = {
39 | "mobilenet_v1": ["google/mobilenet_v1_0.75_192", {"pixel_values": [1, 3, 192, 192]}, {"logits": [1, 1001]}],
40 | "mobilenet_v2": [
41 | "hf-internal-testing/tiny-random-MobileNetV2Model",
42 | {"pixel_values": [1, 3, 32, 32]},
43 | {"logits": [1, 2]},
44 | ],
45 | "resnet": ["hf-internal-testing/tiny-random-resnet", {"pixel_values": [1, 3, 224, 224]}, {"logits": [1, 1000]}],
46 | }
47 |
48 |
49 | TENSOR_ALIAS_TO_TYPE = {
50 | "pt": torch.Tensor,
51 | "np": np.ndarray,
52 | }
53 |
54 |
55 | class FuriosaAIModelIntegrationTest(unittest.TestCase):
56 | def __init__(self, *args, **kwargs):
57 | super().__init__(*args, **kwargs)
58 | self.MODEL_ID = "mohitsha/furiosa-resnet-tiny-beans"
59 |
60 | def test_load_from_hub_and_save_model(self):
61 | preprocessor = AutoFeatureExtractor.from_pretrained(self.MODEL_ID)
62 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
63 | image = Image.open(requests.get(url, stream=True).raw)
64 | inputs = preprocessor(images=image, return_tensors="pt")
65 | loaded_model = FuriosaAIModelForImageClassification.from_pretrained(self.MODEL_ID)
66 | self.assertIsInstance(loaded_model.config, PretrainedConfig)
67 | loaded_model_outputs = loaded_model(**inputs)
68 |
69 | with tempfile.TemporaryDirectory() as tmpdirname:
70 | loaded_model.save_pretrained(tmpdirname)
71 | del loaded_model
72 | folder_contents = os.listdir(tmpdirname)
73 | self.assertTrue(FURIOSA_ENF_FILE_NAME in folder_contents)
74 | model = FuriosaAIModelForImageClassification.from_pretrained(tmpdirname)
75 |
76 | outputs = model(**inputs)
77 | self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))
78 |
79 |
80 | class FuriosaAIModelForImageClassificationIntegrationTest(unittest.TestCase):
81 | SUPPORTED_ARCHITECTURES = [
82 | "mobilenet_v1",
83 | "mobilenet_v2",
84 | "resnet",
85 | ]
86 |
87 | FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES}
88 | FuriosaAIMODEL_CLASS = FuriosaAIModelForImageClassification
89 | TASK = "image-classification"
90 |
91 | @parameterized.expand(SUPPORTED_ARCHITECTURES)
92 | def test_compare_to_transformers(self, model_arch):
93 | model_id, input_shape_dict, output_shape_dict = MODEL_DICT[model_arch]
94 | set_seed(SEED)
95 | fai_model = FuriosaAIModelForImageClassification.from_pretrained(
96 | model_id, export=True, input_shape_dict=input_shape_dict, output_shape_dict=output_shape_dict
97 | )
98 | self.assertIsInstance(fai_model.config, PretrainedConfig)
99 | transformers_model = AutoModelForImageClassification.from_pretrained(model_id)
100 | preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
101 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
102 | image = Image.open(requests.get(url, stream=True).raw)
103 | inputs = preprocessor(images=image, return_tensors="pt")
104 | with torch.no_grad():
105 | transformers_outputs = transformers_model(**inputs)
106 | for input_type in ["pt", "np"]:
107 | inputs = preprocessor(images=image, return_tensors=input_type)
108 | fai_outputs = fai_model(**inputs)
109 | self.assertIn("logits", fai_outputs)
110 | self.assertIsInstance(fai_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
111 | # Compare tensor outputs
112 | self.assertTrue(torch.allclose(torch.Tensor(fai_outputs.logits), transformers_outputs.logits, atol=1e-4))
113 |
114 | gc.collect()
115 |
116 | @parameterized.expand(SUPPORTED_ARCHITECTURES)
117 | def test_pipeline(self, model_arch):
118 | model_id, input_shape_dict, output_shape_dict = MODEL_DICT[model_arch]
119 | model = FuriosaAIModelForImageClassification.from_pretrained(
120 | model_id, export=True, input_shape_dict=input_shape_dict, output_shape_dict=output_shape_dict
121 | )
122 | preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
123 | pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor)
124 | outputs = pipe("http://images.cocodataset.org/val2017/000000039769.jpg")
125 | self.assertGreaterEqual(outputs[0]["score"], 0.0)
126 | self.assertTrue(isinstance(outputs[0]["label"], str))
127 | gc.collect()
128 |
--------------------------------------------------------------------------------
/optimum/furiosa/modeling.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import logging
16 | from typing import Callable, Dict, List, Optional, Union
17 |
18 | import numpy as np
19 | import torch
20 | import tqdm
21 | import transformers
22 | from datasets import Dataset
23 | from transformers import (
24 | AutoConfig,
25 | AutoModel,
26 | AutoModelForImageClassification,
27 | EvalPrediction,
28 | )
29 | from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
30 | from transformers.modeling_outputs import (
31 | ImageClassifierOutput,
32 | )
33 |
34 | from .modeling_base import FuriosaAIBaseModel
35 | from .utils import FURIOSA_DTYPE_TO_NUMPY_DTYPE
36 |
37 |
38 | logger = logging.getLogger(__name__)
39 |
40 |
41 | _FEATURE_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor"
42 |
43 | MODEL_START_DOCSTRING = r"""
44 | This model inherits from [`optimum.furiosa.FuriosaAIBaseModel`]. Check the superclass documentation for the generic methods the
45 | library implements for all its model (such as downloading or saving)
46 | Parameters:
47 | model (`furiosa.runtime.model`): is the main class used to run inference.
48 | config (`transformers.PretrainedConfig`): [PretrainedConfig](https://huggingface.co/docs/transformers/main_classes/configuration#transformers.PretrainedConfig)
49 | is the Model configuration class with all the parameters of the model.
50 | Initializing with a config file does not load the weights associated with the model, only the configuration.
51 | Check out the [`~furiosa.modeling.FuriosaAIBaseModel.from_pretrained`] method to load the model weights.
52 | device (`str`, defaults to `"CPU"`):
53 | The device type for which the model will be optimized for. The resulting compiled model will contains nodes specific to this device.
54 | furiosa_config (`Optional[Dict]`, defaults to `None`):
55 | The dictionnary containing the informations related to the model compilation.
56 | compile (`bool`, defaults to `True`):
57 | Disable the model compilation during the loading step when set to `False`.
58 | """
59 |
60 | IMAGE_INPUTS_DOCSTRING = r"""
61 | Args:
62 | pixel_values (`torch.Tensor`):
63 | Pixel values corresponding to the images in the current batch.
64 | Pixel values can be obtained from encoded images using [`AutoFeatureExtractor`](https://huggingface.co/docs/transformers/autoclass_tutorial#autofeatureextractor).
65 | """
66 |
67 |
68 | class FuriosaAIModel(FuriosaAIBaseModel):
69 | base_model_prefix = "furiosa_model"
70 | auto_model_class = AutoModel
71 |
72 | def __init__(
73 | self,
74 | model,
75 | config: transformers.PretrainedConfig = None,
76 | compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
77 | label_names: Optional[List[str]] = None,
78 | **kwargs,
79 | ):
80 | super().__init__(model, config, **kwargs)
81 | # Avoid warnings when creating a transformers pipeline
82 | AutoConfig.register(self.base_model_prefix, AutoConfig)
83 | self.auto_model_class.register(AutoConfig, self.__class__)
84 | self.device = torch.device("cpu")
85 |
86 | # Evaluation args
87 | self.compute_metrics = compute_metrics
88 | self.label_names = ["labels"] if label_names is None else label_names
89 |
90 | def to(self, device: str):
91 | """
92 | Use the specified `device` for inference. For example: "cpu" or "gpu". `device` can
93 | be in upper or lower case. To speed up first inference, call `.compile()` after `.to()`.
94 | """
95 | self._device = device.upper()
96 | self.sess = None
97 | return self
98 |
99 | def forward(self, *args, **kwargs):
100 | raise NotImplementedError
101 |
102 | def evaluation_loop(self, dataset: Dataset):
103 | """
104 | Run evaluation and returns metrics and predictions.
105 |
106 | Args:
107 | dataset (`datasets.Dataset`):
108 | Dataset to use for the evaluation step.
109 | """
110 | logger.info("***** Running evaluation *****")
111 |
112 | # from transformers import EvalPrediction
113 | from transformers.trainer_pt_utils import nested_concat
114 | from transformers.trainer_utils import EvalLoopOutput
115 |
116 | all_preds = None
117 | all_labels = None
118 | for step, inputs in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
119 | has_labels = all(inputs.get(k) is not None for k in self.label_names)
120 | if has_labels:
121 | labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
122 | if len(labels) == 1:
123 | labels = labels[0]
124 | else:
125 | labels = None
126 |
127 | inputs = [
128 | np.array([inputs[key]], dtype=FURIOSA_DTYPE_TO_NUMPY_DTYPE[self.inputs_to_dtype[k]])
129 | for k, key in enumerate(self.input_names)
130 | if key in inputs
131 | ]
132 |
133 | preds = self.sess.run(inputs)
134 | if len(preds) == 1:
135 | preds = preds[0].numpy()
136 | all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
137 | all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
138 |
139 | if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
140 | metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
141 | else:
142 | metrics = {}
143 | return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
144 |
145 |
146 | IMAGE_CLASSIFICATION_EXAMPLE = r"""
147 | Example of image classification using `transformers.pipelines`:
148 | ```python
149 | >>> from transformers import {processor_class}, pipeline
150 | >>> from optimum.furiosa import {model_class}
151 |
152 | >>> preprocessor = {processor_class}.from_pretrained("{checkpoint}")
153 | >>> model = {model_class}.from_pretrained("{checkpoint}", export=True, input_shape_dict="dict('pixel_values': [1, 3, 224, 224])", output_shape_dict="dict("logits": [1, 1000])",)
154 | >>> pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor)
155 | >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
156 | >>> outputs = pipe(url)
157 | ```
158 | """
159 |
160 |
161 | @add_start_docstrings(
162 | """
163 | FuriosaAI Model with a ImageClassifierOutput for image classification tasks.
164 | """,
165 | MODEL_START_DOCSTRING,
166 | )
167 | class FuriosaAIModelForImageClassification(FuriosaAIModel):
168 | export_feature = "image-classification"
169 | auto_model_class = AutoModelForImageClassification
170 |
171 | def __init__(self, model=None, config=None, **kwargs):
172 | super().__init__(model, config, **kwargs)
173 | self.input_names = ["pixel_values"]
174 |
175 | @add_start_docstrings_to_model_forward(
176 | IMAGE_INPUTS_DOCSTRING.format("batch_size, num_channels, height, width")
177 | + IMAGE_CLASSIFICATION_EXAMPLE.format(
178 | processor_class=_FEATURE_EXTRACTOR_FOR_DOC,
179 | model_class="FuriosaAIModelForImageClassification",
180 | checkpoint="microsoft/resnet50",
181 | )
182 | )
183 | def forward(
184 | self,
185 | pixel_values: Union[torch.Tensor, np.ndarray],
186 | **kwargs,
187 | ):
188 | np_inputs = isinstance(pixel_values, np.ndarray)
189 | if not np_inputs:
190 | pixel_values = np.array(pixel_values)
191 |
192 | # Run inference
193 | outputs = self.sess.run(pixel_values)
194 | logits = torch.from_numpy(outputs[0].numpy()) if not np_inputs else outputs[0].numpy()
195 | return ImageClassifierOutput(logits=logits)
196 |
--------------------------------------------------------------------------------
/optimum/furiosa/configuration.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from dataclasses import asdict, dataclass
16 | from enum import Enum
17 | from typing import Optional, Sequence, Union
18 |
19 | import onnx
20 | from datasets import Dataset
21 |
22 | from furiosa.quantizer import CalibrationMethod, Calibrator
23 | from optimum.configuration_utils import BaseConfig
24 |
25 |
26 | DEFAULT_QUANTIZATION_CONFIG = {}
27 |
28 |
29 | @dataclass
30 | class CalibrationConfig:
31 | """
32 | CalibrationConfig is the configuration class handling all the FurioaAI parameters related to the calibration
33 | step of static quantization.
34 |
35 | Args:
36 | dataset_name (`str`):
37 | The name of the calibration dataset.
38 | dataset_config_name (`str`):
39 | The name of the calibration dataset configuration.
40 | dataset_split (`str`):
41 | Which split of the dataset is used to perform the calibration step.
42 | dataset_num_samples (`int`):
43 | The number of samples composing the calibration dataset.
44 | method (`CalibrationMethod`):
45 | The method chosen to calculate the activations quantization parameters using the calibration dataset.
46 | percentage (`Optional[float]`, defaults to `None`):
47 | The percentage to use when computing the activations quantization ranges when performing the calibration
48 | step using the Percentile method.
49 | """
50 |
51 | dataset_name: str
52 | dataset_config_name: str
53 | dataset_split: str
54 | dataset_num_samples: int
55 | method: CalibrationMethod
56 | percentage: Optional[float] = None
57 |
58 | def create_calibrator(
59 | self,
60 | model: Union[onnx.ModelProto, bytes],
61 | ) -> Calibrator:
62 | return Calibrator(model, self.method, percentage=self.percentage)
63 |
64 |
65 | class AutoCalibrationConfig:
66 | @staticmethod
67 | def create_calibration_config(dataset: Dataset, method: CalibrationMethod, percentile: float = None):
68 | return CalibrationConfig(
69 | dataset_name=dataset.info.builder_name,
70 | dataset_config_name=dataset.info.config_name,
71 | dataset_split=str(dataset.split),
72 | dataset_num_samples=dataset.num_rows,
73 | method=method,
74 | percentage=percentile,
75 | )
76 |
77 | @staticmethod
78 | def minmax_asym(dataset: Dataset) -> CalibrationConfig:
79 | """
80 | Args:
81 | dataset (`Dataset`):
82 | The dataset to use when performing the calibration step.
83 |
84 | Returns:
85 | The calibration configuration.
86 | """
87 | return AutoCalibrationConfig.create_calibration_config(
88 | dataset,
89 | method=CalibrationMethod.MIN_MAX_ASYM,
90 | )
91 |
92 | def minmax_sym(dataset: Dataset) -> CalibrationConfig:
93 | """
94 | Args:
95 | dataset (`Dataset`):
96 | The dataset to use when performing the calibration step.
97 |
98 | Returns:
99 | The calibration configuration.
100 | """
101 | return AutoCalibrationConfig.create_calibration_config(
102 | dataset,
103 | method=CalibrationMethod.MIN_MAX_SYM,
104 | )
105 |
106 | @staticmethod
107 | def entropy_asym(
108 | dataset: Dataset,
109 | ) -> CalibrationConfig:
110 | """
111 | Args:
112 | dataset (`Dataset`):
113 | The dataset to use when performing the calibration step.
114 |
115 | Returns:
116 | The calibration configuration.
117 | """
118 | return AutoCalibrationConfig.create_calibration_config(
119 | dataset,
120 | method=CalibrationMethod.ENTROPY_ASYM,
121 | )
122 |
123 | @staticmethod
124 | def entropy_sym(
125 | dataset: Dataset,
126 | ) -> CalibrationConfig:
127 | """
128 | Args:
129 | dataset (`Dataset`):
130 | The dataset to use when performing the calibration step.
131 |
132 | Returns:
133 | The calibration configuration.
134 | """
135 | return AutoCalibrationConfig.create_calibration_config(
136 | dataset,
137 | method=CalibrationMethod.ENTROPY_SYM,
138 | )
139 |
140 | @staticmethod
141 | def percentiles_asym(dataset: Dataset, percentile: float = 99.999) -> CalibrationConfig:
142 | """
143 | Args:
144 | dataset (`Dataset`):
145 | The dataset to use when performing the calibration step.
146 | percentile (`float`):
147 | The percentile to use when computing the activations quantization ranges.
148 |
149 | Returns:
150 | The calibration configuration.
151 | """
152 | return AutoCalibrationConfig.create_calibration_config(
153 | dataset,
154 | method=CalibrationMethod.PERCENTILE_ASYM,
155 | percentile=percentile,
156 | )
157 |
158 | @staticmethod
159 | def percentiles_sym(dataset: Dataset, percentile: float = 99.999) -> CalibrationConfig:
160 | """
161 | Args:
162 | dataset (`Dataset`):
163 | The dataset to use when performing the calibration step.
164 | percentile (`float`):
165 | The percentile to use when computing the activations quantization ranges.
166 |
167 | Returns:
168 | The calibration configuration.
169 | """
170 | return AutoCalibrationConfig.create_calibration_config(
171 | dataset,
172 | method=CalibrationMethod.PERCENTILE_SYM,
173 | percentile=percentile,
174 | )
175 |
176 | @staticmethod
177 | def mse_asym(dataset: Dataset) -> CalibrationConfig:
178 | """
179 | Args:
180 | dataset (`Dataset`):
181 | The dataset to use when performing the calibration step.
182 |
183 | Returns:
184 | The calibration configuration.
185 | """
186 | return AutoCalibrationConfig.create_calibration_config(
187 | dataset,
188 | method=CalibrationMethod.MSE_ASYM,
189 | )
190 |
191 | @staticmethod
192 | def mse_sym(dataset: Dataset) -> CalibrationConfig:
193 | """
194 | Args:
195 | dataset (`Dataset`):
196 | The dataset to use when performing the calibration step.
197 |
198 | Returns:
199 | The calibration configuration.
200 | """
201 | return AutoCalibrationConfig.create_calibration_config(
202 | dataset,
203 | method=CalibrationMethod.MSE_SYM,
204 | )
205 |
206 | @staticmethod
207 | def sqnr_asym(dataset: Dataset) -> CalibrationConfig:
208 | """
209 | Args:
210 | dataset (`Dataset`):
211 | The dataset to use when performing the calibration step.
212 |
213 | Returns:
214 | The calibration configuration.
215 | """
216 | return AutoCalibrationConfig.create_calibration_config(
217 | dataset,
218 | method=CalibrationMethod.SQNR_ASYM,
219 | )
220 |
221 | @staticmethod
222 | def sqnr_sym(dataset: Dataset) -> CalibrationConfig:
223 | """
224 | Args:
225 | dataset (`Dataset`):
226 | The dataset to use when performing the calibration step.
227 |
228 | Returns:
229 | The calibration configuration.
230 | """
231 | return AutoCalibrationConfig.create_calibration_config(
232 | dataset,
233 | method=CalibrationMethod.SQNR_SYM,
234 | )
235 |
236 |
237 | @dataclass
238 | class QuantizationConfig:
239 | """
240 | QuantizationConfig is the configuration class handling all the FuriosaAI quantization parameters.
241 |
242 | Args:
243 | with_quantize (`bool`, defaults to `True`):
244 | WWhether to put a Quantize operator at the beginning of the resulting model.
245 | normalized_pixel_outputs (` Sequence[int`, defaults to `None`)::
246 | A sequence of indices of output tensors in the ONNX model that produce pixel values in a normalized format
247 | ranging from 0.0 to 1.0. If specified, the corresponding output tensors in the resulting quantized model
248 | will generate pixel values in an unnormalized format from 0 to 255, represented as unsigned 8-bit integers (uint8).
249 | """
250 |
251 | with_quantize: bool = True
252 | normalized_pixel_outputs: Sequence[int] = None
253 |
254 |
255 | class FuriosaAIConfig(BaseConfig):
256 | CONFIG_NAME = "furiosa_config.json"
257 | FULL_CONFIGURATION_FILE = "furiosa_config.json"
258 |
259 | def __init__(
260 | self,
261 | opset: Optional[int] = None,
262 | quantization: Optional[QuantizationConfig] = None,
263 | calibration: Optional[CalibrationConfig] = None,
264 | **kwargs,
265 | ):
266 | super().__init__()
267 | self.quantization = self.dataclass_to_dict(quantization)
268 | self.calibration = self.dataclass_to_dict(calibration)
269 | self.optimum_version = kwargs.pop("optimum_version", None)
270 |
271 | @staticmethod
272 | def dataclass_to_dict(config) -> dict:
273 | new_config = {}
274 | if config is None:
275 | return new_config
276 | if isinstance(config, dict):
277 | return config
278 | for k, v in asdict(config).items():
279 | if isinstance(v, Enum):
280 | v = v.name
281 | elif isinstance(v, list):
282 | v = [elem.name if isinstance(elem, Enum) else elem for elem in v]
283 | new_config[k] = v
284 | return new_config
285 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/examples/quantization/image-classification/run_image_classification.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 | # Copyright 2023 The HuggingFace Team. All rights reserved.
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """ Finetuning the library models for image classification."""
18 | # You can also adapt this script on your own image classification task. Pointers for this are left as comments.
19 | import json
20 | import logging
21 | import os
22 | import sys
23 | from dataclasses import dataclass, field
24 | from functools import partial
25 | from pathlib import Path
26 | from typing import Optional
27 |
28 | import datasets
29 | import numpy as np
30 | import torch
31 | import transformers
32 | from datasets import load_dataset
33 | from evaluate import load
34 | from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor
35 | from transformers import AutoConfig, AutoFeatureExtractor, EvalPrediction, HfArgumentParser, TrainingArguments
36 | from transformers.utils.versions import require_version
37 |
38 | from optimum.furiosa import FuriosaAIModelForImageClassification, FuriosaAIQuantizer
39 | from optimum.furiosa.configuration import AutoCalibrationConfig, QuantizationConfig
40 | from optimum.furiosa.utils import export_model_to_onnx
41 |
42 |
43 | logger = logging.getLogger(__name__)
44 |
45 | require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
46 |
47 |
48 | @dataclass
49 | class DataTrainingArguments:
50 | """
51 | Arguments pertaining to what data we are going to input our model for training and eval.
52 |
53 | Using `HfArgumentParser` we can turn this class
54 | into argparse arguments to be able to specify them on
55 | the command line.
56 | """
57 |
58 | dataset_name: Optional[str] = field(
59 | default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
60 | )
61 | dataset_config_name: Optional[str] = field(
62 | default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
63 | )
64 | max_seq_length: int = field(
65 | default=128,
66 | metadata={
67 | "help": "The maximum total input sequence length after tokenization. Sequences longer "
68 | "than this will be truncated, sequences shorter will be padded."
69 | },
70 | )
71 | overwrite_cache: bool = field(
72 | default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
73 | )
74 | max_eval_samples: Optional[int] = field(
75 | default=None,
76 | metadata={
77 | "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
78 | "value if set."
79 | },
80 | )
81 | max_predict_samples: Optional[int] = field(
82 | default=None,
83 | metadata={
84 | "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
85 | "value if set."
86 | },
87 | )
88 | train_dir: Optional[str] = field(default=None, metadata={"help": "A directory path for the training data."})
89 | validation_dir: Optional[str] = field(default=None, metadata={"help": "A directory path for the validation data."})
90 |
91 |
92 | @dataclass
93 | class ModelArguments:
94 | """
95 | Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
96 | """
97 |
98 | model_name_or_path: str = field(
99 | metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
100 | )
101 | cache_dir: Optional[str] = field(
102 | default=None,
103 | metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
104 | )
105 | batch_size: int = field(
106 | default=1,
107 | metadata={"help": "The batch size for the model."},
108 | )
109 | num_labels: int = field(
110 | default=3,
111 | metadata={"help": "The batch size for the model."},
112 | )
113 |
114 |
115 | @dataclass
116 | class OptimizationArguments:
117 | """
118 | Arguments pertaining to what type of optimization we are going to apply on the model.
119 | """
120 |
121 | quantization_approach: str = field(
122 | default="static",
123 | metadata={"help": "The quantization approach. Supported approach are static and dynamic."},
124 | )
125 | calibration_method: str = field(
126 | default="minmax_asym",
127 | metadata={
128 | "help": "The method chosen to calculate the activation quantization parameters using the calibration "
129 | "dataset. Current supported calibration methods are minmax, entropy and percentile."
130 | },
131 | )
132 | num_calibration_samples: int = field(
133 | default=100,
134 | metadata={"help": "Number of examples to use for the calibration step resulting from static quantization."},
135 | )
136 | num_calibration_shards: int = field(
137 | default=1,
138 | metadata={
139 | "help": "How many shards to split the calibration dataset into. Useful for the entropy and percentile "
140 | "calibration method."
141 | },
142 | )
143 | calibration_batch_size: int = field(
144 | default=1,
145 | metadata={"help": "The batch size for the calibration step."},
146 | )
147 | calibration_histogram_percentile: float = field(
148 | default=99.999,
149 | metadata={"help": "The percentile used for the percentile calibration method."},
150 | )
151 |
152 |
153 | def main():
154 | # We now keep distinct sets of args, for a cleaner separation of concerns.
155 | parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, OptimizationArguments))
156 | if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
157 | # If we pass only one argument to the script and it's the path to a json file,
158 | # let's parse it to get our arguments.
159 | model_args, data_args, training_args, optim_args, onnx_export_args = parser.parse_json_file(
160 | json_file=os.path.abspath(sys.argv[1])
161 | )
162 | else:
163 | model_args, data_args, training_args, optim_args = parser.parse_args_into_dataclasses()
164 |
165 | # Setup logging
166 | logging.basicConfig(
167 | format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
168 | datefmt="%m/%d/%Y %H:%M:%S",
169 | handlers=[logging.StreamHandler(sys.stdout)],
170 | )
171 |
172 | log_level = training_args.get_process_log_level()
173 | logger.setLevel(log_level)
174 | datasets.utils.logging.set_verbosity(log_level)
175 | transformers.utils.logging.set_verbosity(log_level)
176 | transformers.utils.logging.enable_default_handler()
177 | transformers.utils.logging.enable_explicit_format()
178 |
179 | logger.info(f"Optimization with the following parameters {optim_args}")
180 |
181 | if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
182 | raise ValueError(
183 | f"Output directory ({training_args.output_dir}) already exists and is not empty. "
184 | "Use --overwrite_output_dir to overcome."
185 | )
186 |
187 | # Sanity checks
188 | if data_args.dataset_name is None and data_args.train_dir is None and data_args.validation_dir is None:
189 | raise ValueError("Need either a dataset name or a training/validation folder.")
190 |
191 | os.makedirs(training_args.output_dir, exist_ok=True)
192 |
193 | # Get the datasets: you can either provide your own training and evaluation files (see below)
194 | # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub).
195 | if data_args.dataset_name is not None:
196 | # Downloading and loading a dataset from the hub.
197 | dataset = load_dataset(data_args.dataset_name)
198 | else:
199 | data_files = {}
200 | if data_args.train_dir is not None:
201 | data_files["train"] = os.path.join(data_args.train_dir, "**")
202 | if data_args.validation_dir is not None:
203 | data_files["validation"] = os.path.join(data_args.validation_dir, "**")
204 | dataset = load_dataset(
205 | "imagefolder",
206 | data_files=data_files,
207 | cache_dir=model_args.cache_dir,
208 | task="image-classification",
209 | )
210 | # See more about loading custom images at
211 | # https://huggingface.co/docs/datasets/v2.0.0/en/image_process#imagefolder.
212 |
213 | labels_column = (
214 | "labels" if "labels" in dataset["validation"].column_names else dataset["validation"].column_names[1]
215 | )
216 |
217 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_args.model_name_or_path)
218 |
219 | # Define torchvision transforms to be applied to each image.
220 | normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
221 | image_size = feature_extractor.size["shortest_edge"]
222 | transforms = Compose(
223 | [
224 | Resize(image_size),
225 | CenterCrop(image_size),
226 | ToTensor(),
227 | normalize,
228 | ]
229 | )
230 |
231 | def preprocess_function(example_batch):
232 | """Apply transforms across a batch."""
233 | example_batch["pixel_values"] = [
234 | transforms(image.convert("RGB")).to(torch.float32).numpy() for image in example_batch["image"]
235 | ]
236 | return example_batch
237 |
238 | metric = load("accuracy")
239 |
240 | # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
241 | # predictions and label_ids field) and has to return a dictionary string to float.
242 | def compute_metrics(p: EvalPrediction):
243 | preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
244 | preds = np.argmax(preds, axis=1)
245 |
246 | result = metric.compute(predictions=preds, references=p.label_ids)
247 | return result
248 |
249 | # Export the model
250 | export_model_to_onnx(
251 | model_args.model_name_or_path,
252 | save_dir=training_args.output_dir,
253 | input_shape_dict={"pixel_values": [model_args.batch_size, 3, image_size, image_size]},
254 | output_shape_dict={"logits": [model_args.batch_size, model_args.num_labels]},
255 | )
256 |
257 | # Create the quantizer
258 | quantizer = FuriosaAIQuantizer.from_pretrained(training_args.output_dir, file_name="model.onnx")
259 |
260 | # Create the quantization configuration containing all the quantization parameters
261 | qconfig = QuantizationConfig()
262 |
263 | ranges = None
264 |
265 | calibration_dataset = dataset["train"]
266 | if optim_args.num_calibration_samples is not None:
267 | calibration_dataset = calibration_dataset.shuffle(seed=training_args.seed).select(
268 | range(optim_args.num_calibration_samples)
269 | )
270 |
271 | # all images are loaded in memory, which could prove expensive if num_calibration_samples is large
272 | calibration_dataset = calibration_dataset.map(
273 | partial(preprocess_function),
274 | batched=True,
275 | load_from_cache_file=not data_args.overwrite_cache,
276 | desc="Running preprocessing on calibration dataset",
277 | )
278 |
279 | # Remove the unnecessary columns of the calibration dataset before the calibration step
280 | calibration_dataset = quantizer.clean_calibration_dataset(calibration_dataset)
281 |
282 | # Create the calibration configuration given the selected calibration method
283 | if optim_args.calibration_method == "percentile_asym":
284 | calibration_config = AutoCalibrationConfig.percentiles_asym(
285 | calibration_dataset,
286 | percentile=optim_args.calibration_histogram_percentile,
287 | )
288 | else:
289 | calibration_config = AutoCalibrationConfig.minmax_asym(calibration_dataset)
290 |
291 | if not 1 <= optim_args.num_calibration_shards <= len(calibration_dataset):
292 | raise ValueError(
293 | f"Invalid value of number of shards {optim_args.num_calibration_shards} chosen to split the calibration"
294 | f" dataset, should be higher than 0 and lower or equal to the number of samples "
295 | f"{len(calibration_dataset)}."
296 | )
297 |
298 | for i in range(optim_args.num_calibration_shards):
299 | shard = calibration_dataset.shard(optim_args.num_calibration_shards, i)
300 | quantizer.partial_fit(
301 | dataset=shard,
302 | calibration_config=calibration_config,
303 | batch_size=optim_args.calibration_batch_size,
304 | )
305 | ranges = quantizer.compute_ranges()
306 |
307 | # Apply quantization on the model
308 | quantizer.quantize(
309 | save_dir=training_args.output_dir,
310 | calibration_tensors_range=ranges,
311 | quantization_config=qconfig,
312 | )
313 |
314 | # Evaluation
315 | if training_args.do_eval:
316 | logger.info("*** Evaluate ***")
317 |
318 | model_config = AutoConfig.from_pretrained(model_args.model_name_or_path)
319 | eval_dataset = dataset["validation"]
320 | if data_args.max_eval_samples is not None:
321 | eval_dataset = eval_dataset.shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
322 |
323 | try:
324 | eval_dataset = eval_dataset.align_labels_with_mapping(
325 | label2id=model_config.label2id, label_column=labels_column
326 | )
327 | except Exception:
328 | logger.warning(
329 | f"\nModel label mapping: {model_config.label2id}"
330 | f"\nDataset label features: {eval_dataset.features[labels_column]}"
331 | f"\nCould not guarantee the model label mapping and the dataset labels match."
332 | f" Evaluation results may suffer from a wrong matching."
333 | )
334 |
335 | # Set the validation transforms
336 | eval_dataset = eval_dataset.with_transform(preprocess_function)
337 |
338 | furiosa_model = FuriosaAIModelForImageClassification(
339 | Path(training_args.output_dir) / "model_quantized.dfg",
340 | compute_metrics=compute_metrics,
341 | label_names=[labels_column],
342 | )
343 | outputs = furiosa_model.evaluation_loop(eval_dataset)
344 | # Save metrics
345 | with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
346 | json.dump(outputs.metrics, f, indent=4, sort_keys=True)
347 |
348 |
349 | if __name__ == "__main__":
350 | main()
351 |
--------------------------------------------------------------------------------
/optimum/furiosa/quantization.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import logging
16 | import os
17 | from pathlib import Path
18 | from typing import TYPE_CHECKING, Callable, Dict, Optional, Tuple, Union
19 |
20 | import numpy as np
21 | import onnx
22 | import tqdm
23 | from datasets import Dataset, load_dataset
24 | from transformers import AutoConfig
25 |
26 | from furiosa.optimizer import optimize_model
27 | from furiosa.quantizer import quantize
28 |
29 | from .configuration import CalibrationConfig, FuriosaAIConfig, QuantizationConfig
30 | from .modeling import FuriosaAIModel
31 | from .quantization_base import OptimumQuantizer
32 |
33 |
34 | if TYPE_CHECKING:
35 | from transformers import PretrainedConfig
36 |
37 | LOGGER = logging.getLogger(__name__)
38 |
39 |
40 | class FuriosaAICalibrationDataReader:
41 | __slots__ = ["batch_size", "dataset", "_dataset_iter", "input_datatypes"]
42 |
43 | def __init__(self, dataset: Dataset, input_datatypes, batch_size: int = 1):
44 | if dataset is None:
45 | raise ValueError("Provided dataset is None.")
46 |
47 | if input_datatypes is None:
48 | raise ValueError("Provided input_datatypes is None.")
49 |
50 | if batch_size <= 0:
51 | raise ValueError(f"Provided batch_size should be >= 1 (got: {batch_size}).")
52 |
53 | self.dataset = dataset
54 | self.input_datatypes = input_datatypes
55 | self.batch_size = batch_size
56 |
57 | self._dataset_iter = iter(self.dataset)
58 |
59 | def __len__(self):
60 | return len(self.dataset) // self.batch_size
61 |
62 | def __next__(self):
63 | featurized_samples = None
64 | try:
65 | featurized_samples = []
66 | for _ in range(self.batch_size):
67 | sample = next(self._dataset_iter)
68 |
69 | input_list = [[] for i in range(len(sample))]
70 | for i, name in enumerate(sample):
71 | input_list[i] += [sample[name]]
72 | input_list = [
73 | np.array(d, onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[self.input_datatypes[i]])
74 | for i, d in enumerate(input_list)
75 | ]
76 |
77 | featurized_samples.append(input_list)
78 |
79 | except StopIteration:
80 | raise StopIteration
81 |
82 | if len(featurized_samples) > 0:
83 | return featurized_samples
84 |
85 | raise StopIteration
86 |
87 | def __iter__(self):
88 | return self
89 |
90 |
91 | class FuriosaAIQuantizer(OptimumQuantizer):
92 | """
93 | Handles the FuriosaAI quantization process for models shared on huggingface.co/models.
94 | """
95 |
96 | def __init__(self, model_path: Path, config: Optional["PretrainedConfig"] = None):
97 | """
98 | Args:
99 | model_path (`Path`):
100 | Path to the onnx model files you want to quantize.
101 | config (`Optional[PretrainedConfig]`, *optional*):
102 | The configuration of the model.
103 | """
104 | super().__init__()
105 | self.model_path = model_path
106 | self.config = config
107 | if self.config is None:
108 | try:
109 | self.config = AutoConfig.from_pretrained(self.model_path.parent)
110 | except OSError:
111 | LOGGER.warning(
112 | f"Could not load the config for {self.model_path} automatically, this might make "
113 | "the quantized model harder to use because it will not be able to be loaded by an FuriosaAIModel without "
114 | "having to specify the configuration explicitly."
115 | )
116 | self._calibrator = None
117 | self._calibration_config = None
118 |
119 | @classmethod
120 | def from_pretrained(
121 | cls,
122 | model_or_path: Union["FuriosaAIQuantizer", str, Path],
123 | file_name: Optional[str] = None,
124 | ) -> "FuriosaAIQuantizer":
125 | """
126 | Instantiates a `FuriosaAIQuantizer` from a model path.
127 |
128 | Args:
129 | model_or_path (`Union[FuriosaAIModel, str, Path]`):
130 | Can be either:
131 | - A path to a saved exported ONNX Intermediate Representation (IR) model, e.g., `./my_model_directory/.
132 | - Or an `FuriosaAIModelModelForXX` class, e.g., `FuriosaAIModelModelForImageClassification`.
133 | file_name(`Optional[str]`, *optional*):
134 | Overwrites the default model file name from `"model.onnx"` to `file_name`.
135 | This allows you to load different model files from the same repository or directory.
136 | Returns:
137 | An instance of `FuriosaAIQuantizer`.
138 | """
139 | furiosa_quantizer_error_message = "FuriosaAIQuantizer does not support multi-file quantization. Please create separate FuriosaAIQuantizer instances for each model/file, by passing the argument `file_name` to FuriosaAIQuantizer.from_pretrained()."
140 |
141 | if isinstance(model_or_path, str):
142 | model_or_path = Path(model_or_path)
143 |
144 | path = None
145 | if isinstance(model_or_path, Path) and file_name is None:
146 | onnx_files = list(model_or_path.glob("*.onnx"))
147 | if len(onnx_files) == 0:
148 | raise FileNotFoundError(f"Could not find any model file in {model_or_path}")
149 | elif len(onnx_files) > 1:
150 | raise RuntimeError(
151 | f"Found too many ONNX model files in {model_or_path}. {furiosa_quantizer_error_message}"
152 | )
153 | file_name = onnx_files[0].name
154 |
155 | if isinstance(model_or_path, FuriosaAIModel):
156 | if path is None:
157 | if isinstance(model_or_path.model, str) and model_or_path.model.endswith(".onnx"):
158 | path = Path(model_or_path.model)
159 | else:
160 | raise ValueError(
161 | "Currently, quantization of only ONNX files is supported using the optimum-furiosa repository!"
162 | )
163 | elif os.path.isdir(model_or_path):
164 | path = Path(model_or_path) / file_name
165 | else:
166 | raise ValueError(f"Unable to load model from {model_or_path}.")
167 | return cls(path)
168 |
169 | def fit(
170 | self,
171 | dataset: Dataset,
172 | calibration_config: CalibrationConfig,
173 | batch_size: int = 1,
174 | ) -> Dict[str, Tuple[float, float]]:
175 | """
176 | Performs the calibration step and computes the quantization ranges.
177 |
178 | Args:
179 | dataset (`Dataset`):
180 | The dataset to use when performing the calibration step.
181 | calibration_config ([`~CalibrationConfig`]):
182 | The configuration containing the parameters related to the calibration step.
183 | batch_size (`int`, *optional*, defaults to 1):
184 | The batch size to use when collecting the quantization ranges values.
185 |
186 | Returns:
187 | The dictionary mapping the nodes name to their quantization ranges.
188 | """
189 | # If a dataset is provided, then we are in a static quantization mode
190 | LOGGER.info(
191 | f"Using static quantization schema ("
192 | f"dataset: {calibration_config.dataset_name}, method: {calibration_config.method}"
193 | f")"
194 | )
195 |
196 | self.partial_fit(
197 | dataset,
198 | calibration_config,
199 | batch_size,
200 | )
201 | return self.compute_ranges()
202 |
203 | def _load_model_and_optimize(self):
204 | model = onnx.load(Path(self.model_path).as_posix())
205 | self.onnx_model = optimize_model(model)
206 |
207 | def partial_fit(self, dataset: Dataset, calibration_config: CalibrationConfig, batch_size: int = 1):
208 | """
209 | Performs the calibration step and collects the quantization ranges without computing them.
210 |
211 | Args:
212 | dataset (`Dataset`):
213 | The dataset to use when performing the calibration step.
214 | calibration_config (`CalibrationConfig`):
215 | The configuration containing the parameters related to the calibration step.
216 | batch_size (`int`, *optional*, defaults to 1):
217 | The batch size to use when collecting the quantization ranges values.
218 | """
219 | self._calibration_config = calibration_config
220 |
221 | # If no calibrator, then create one
222 | if calibration_config.method is not None:
223 | LOGGER.info(f"Creating calibrator: {calibration_config.method}({calibration_config})")
224 | self._load_model_and_optimize()
225 |
226 | self._calibrator = calibration_config.create_calibrator(
227 | model=self.onnx_model,
228 | )
229 |
230 | def get_input_datatypes(model):
231 | input_datatypes = []
232 |
233 | for input in model.graph.input:
234 | input_type = input.type.tensor_type.elem_type
235 | input_datatypes.extend([input_type])
236 |
237 | return input_datatypes
238 |
239 | input_datatypes = get_input_datatypes(self.onnx_model)
240 |
241 | LOGGER.info("Collecting tensors statistics...")
242 | reader = FuriosaAICalibrationDataReader(dataset, input_datatypes, batch_size)
243 | for data in tqdm.tqdm(reader):
244 | self._calibrator.collect_data(data)
245 |
246 | def compute_ranges(self) -> Dict[str, Tuple[float, float]]:
247 | """
248 | Computes the quantization ranges.
249 |
250 | Returns:
251 | The dictionary mapping the nodes name to their quantization ranges.
252 | """
253 | if self._calibrator is None:
254 | raise ValueError(
255 | "Calibrator is None, please call `partial_fit` or `fit` method at least ones to compute ranges."
256 | )
257 |
258 | LOGGER.info("Computing calibration ranges")
259 | return self._calibrator.compute_range()
260 |
261 | def quantize(
262 | self,
263 | quantization_config: QuantizationConfig,
264 | save_dir: Union[str, Path],
265 | file_suffix: Optional[str] = "quantized",
266 | calibration_tensors_range: Optional[Dict[str, Tuple[float, float]]] = None,
267 | ) -> Path:
268 | """
269 | Quantizes a model given the optimization specifications defined in `quantization_config`.
270 |
271 | Args:
272 | quantization_config (`QuantizationConfig`):
273 | The configuration containing the parameters related to quantization.
274 | save_dir (`Union[str, Path]`):
275 | The directory where the quantized model should be saved.
276 | file_suffix (`Optional[str]`, *optional*, defaults to `"quantized"`):
277 | The file_suffix used to save the quantized model.
278 | calibration_tensors_range (`Optional[Dict[NodeName, Tuple[float, float]]]`, *optional*):
279 | The dictionary mapping the nodes name to their quantization ranges, used and required only when applying
280 | static quantization.
281 |
282 | Returns:
283 | The path of the resulting quantized model.
284 | """
285 |
286 | save_dir = Path(save_dir)
287 | save_dir.mkdir(parents=True, exist_ok=True)
288 |
289 | if self.onnx_model is None:
290 | self._load_model_and_optimize()
291 |
292 | LOGGER.info("Quantizing model...")
293 | model_quantized = quantize(
294 | self.onnx_model,
295 | calibration_tensors_range,
296 | with_quantize=quantization_config.with_quantize,
297 | normalized_pixel_outputs=quantization_config.normalized_pixel_outputs,
298 | )
299 |
300 | suffix = f"_{file_suffix}" if file_suffix else ""
301 | quantized_model_path = save_dir.joinpath(f"{self.model_path.stem}{suffix}").with_suffix(".dfg")
302 | LOGGER.info(f"Saving quantized model at: {save_dir}")
303 | with open(quantized_model_path.as_posix(), "wb") as f:
304 | f.write(bytes(model_quantized))
305 |
306 | # Create and save the configuration summarizing all the parameters related to quantization
307 | furiosa_config = FuriosaAIConfig(quantization=quantization_config, calibration=self._calibration_config)
308 | furiosa_config.save_pretrained(save_dir)
309 |
310 | if self.config is not None:
311 | self.config.save_pretrained(save_dir)
312 |
313 | return Path(save_dir)
314 |
315 | def get_calibration_dataset(
316 | self,
317 | dataset_name: str,
318 | num_samples: int = 100,
319 | dataset_config_name: Optional[str] = None,
320 | dataset_split: Optional[str] = None,
321 | preprocess_function: Optional[Callable] = None,
322 | preprocess_batch: bool = True,
323 | seed: int = 2016,
324 | use_auth_token: bool = False,
325 | ) -> Dataset:
326 | """
327 | Creates the calibration `datasets.Dataset` to use for the post-training static quantization calibration step.
328 |
329 | Args:
330 | dataset_name (`str`):
331 | The dataset repository name on the Hugging Face Hub or path to a local directory containing data files
332 | to load to use for the calibration step.
333 | num_samples (`int`, *optional*, defaults to 100):
334 | The maximum number of samples composing the calibration dataset.
335 | dataset_config_name (`Optional[str]`, *optional*):
336 | The name of the dataset configuration.
337 | dataset_split (`Optional[str]`, *optional*):
338 | Which split of the dataset to use to perform the calibration step.
339 | preprocess_function (`Optional[Callable]`, *optional*):
340 | Processing function to apply to each example after loading dataset.
341 | preprocess_batch (`bool`, *optional*, defaults to `True`):
342 | Whether the `preprocess_function` should be batched.
343 | seed (`int`, *optional*, defaults to 2016):
344 | The random seed to use when shuffling the calibration dataset.
345 | use_auth_token (`bool`, *optional*, defaults to `False`):
346 | Whether to use the token generated when running `transformers-cli login` (necessary for some datasets
347 | like ImageNet).
348 | Returns:
349 | The calibration `datasets.Dataset` to use for the post-training static quantization calibration
350 | step.
351 | """
352 | calib_dataset = load_dataset(
353 | dataset_name,
354 | name=dataset_config_name,
355 | split=dataset_split,
356 | use_auth_token=use_auth_token,
357 | )
358 |
359 | if num_samples is not None:
360 | num_samples = min(num_samples, len(calib_dataset))
361 | calib_dataset = calib_dataset.shuffle(seed=seed).select(range(num_samples))
362 |
363 | if preprocess_function is not None:
364 | processed_calib_dataset = calib_dataset.map(preprocess_function, batched=preprocess_batch)
365 | else:
366 | processed_calib_dataset = calib_dataset
367 |
368 | return self.clean_calibration_dataset(processed_calib_dataset)
369 |
370 | def clean_calibration_dataset(self, dataset: Dataset) -> Dataset:
371 | model = onnx.load(self.model_path)
372 | model_inputs = {input.name for input in model.graph.input}
373 | ignored_columns = list(set(dataset.column_names) - model_inputs)
374 | return dataset.remove_columns(ignored_columns)
375 |
--------------------------------------------------------------------------------
/optimum/furiosa/modeling_base.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 The HuggingFace Team. All rights reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import logging
16 | from pathlib import Path
17 | from shutil import copyfile
18 | from tempfile import TemporaryDirectory
19 | from typing import Dict, Optional, Tuple, Union
20 |
21 | import onnx
22 | from huggingface_hub import hf_hub_download
23 | from transformers import PretrainedConfig
24 | from transformers.file_utils import add_start_docstrings
25 |
26 | # Import Furiosa SDK
27 | from furiosa import optimizer
28 | from furiosa.runtime import session
29 | from furiosa.tools.compiler.api import compile
30 | from optimum.exporters.onnx import main_export
31 | from optimum.modeling_base import OptimizedModel
32 |
33 | from .utils import (
34 | FURIOSA_ENF_FILE_NAME,
35 | FURIOSA_QUANTIZED_FILE_NAME,
36 | ONNX_WEIGHTS_NAME,
37 | ONNX_WEIGHTS_NAME_STATIC,
38 | maybe_load_preprocessors,
39 | maybe_save_preprocessors,
40 | )
41 |
42 |
43 | logger = logging.getLogger(__name__)
44 |
45 |
46 | @add_start_docstrings(
47 | """
48 | Base FuriosaAIModel class.
49 | """,
50 | )
51 | class FuriosaAIBaseModel(OptimizedModel):
52 | auto_model_class = None
53 | export_feature = None
54 |
55 | def __init__(
56 | self,
57 | model: Union[bytes, str, Path],
58 | config: PretrainedConfig = None,
59 | device: str = None,
60 | furiosa_config: Optional[Dict[str, str]] = None,
61 | model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
62 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
63 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
64 | **kwargs,
65 | ):
66 | self.config = config
67 | self.model_save_dir = model_save_dir
68 | self.furiosa_config = furiosa_config
69 | self.preprocessors = kwargs.get("preprocessors", [])
70 | enable_compilation = kwargs.get("compile", True)
71 |
72 | self.model = model
73 |
74 | if enable_compilation:
75 | self.model = self.compile(model, input_shape_dict, output_shape_dict)
76 |
77 | self.create_session()
78 |
79 | def _save_pretrained(self, save_directory: Union[str, Path], file_name: Optional[str] = None, **kwargs):
80 | dst_path = Path(save_directory) / FURIOSA_ENF_FILE_NAME
81 |
82 | if isinstance(self.model, (str, Path)):
83 | copyfile(self.model, dst_path)
84 | else:
85 | with open(dst_path, "wb") as f:
86 | f.write(self.model)
87 |
88 | def create_session(self):
89 | """
90 | Create a Furiosa runtime session for the model.
91 |
92 | Creates a session object using the Furiosa runtime for executing the model.
93 |
94 | Returns:
95 | None
96 | """
97 | self.sess = session.create(self.model)
98 | self.input_num = self.sess.input_num
99 | self.inputs_to_dtype = []
100 | for i in range(self.input_num):
101 | self.inputs_to_dtype.append(self.sess.input(i).dtype)
102 |
103 | @classmethod
104 | def _from_pretrained(
105 | cls,
106 | model_id: Union[str, Path],
107 | config: PretrainedConfig,
108 | use_auth_token: Optional[Union[bool, str, None]] = None,
109 | revision: Optional[Union[str, None]] = None,
110 | force_download: bool = False,
111 | cache_dir: Optional[str] = None,
112 | file_name: Optional[str] = None,
113 | subfolder: str = "",
114 | from_onnx: bool = False,
115 | from_quantized: bool = False,
116 | local_files_only: bool = False,
117 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
118 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
119 | **kwargs,
120 | ):
121 | """
122 | Loads a model and its configuration file from a directory or the Hugging Face Hub.
123 |
124 | Args:
125 | model_id (Union[str, Path]):
126 | The directory from which to load the model. Can be either:
127 | - The model ID of a pretrained model hosted inside a model repo on huggingface.co.
128 | - The path to a directory containing the model weights.
129 | config (PretrainedConfig):
130 | The configuration object associated with the model.
131 | use_auth_token (Union[bool, str, None], defaults to None):
132 | The token to use as HTTP bearer authorization for remote files. Needed to load models from a private repository.
133 | revision (Union[str, None], defaults to None):
134 | The specific model version to use. It can be a branch name, a tag name, or a commit ID.
135 | force_download (bool, defaults to False):
136 | Whether or not to force the (re-)download of the model weights and configuration files, overriding the cached versions if they exist.
137 | cache_dir (str, defaults to None):
138 | The path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.
139 | file_name (str, defaults to None):
140 | The file name of the model to load. Overwrites the default file name and allows one to load the model with a different name.
141 | subfolder (str, defaults to ""):
142 | The subfolder to load the model.
143 | from_onnx (bool, defaults to False):
144 | Whether the model is being loaded from an ONNX file.
145 | from_quantized (bool, defaults to False):
146 | Whether the model is being loaded from a quantized file.
147 | local_files_only (bool, defaults to False):
148 | Whether or not to only look at local files (i.e., do not try to download the model).
149 | input_shape_dict (Dict[str, Tuple[int]], defaults to None):
150 | A dictionary specifying the input shapes for dynamic models.
151 | output_shape_dict (Dict[str, Tuple[int]], defaults to None):
152 | A dictionary specifying the output shapes for dynamic models.
153 | **kwargs:
154 | Additional keyword arguments to be passed to the underlying model loading function.
155 |
156 | Returns:
157 | An instance of the model class loaded from the specified directory or Hugging Face Hub.
158 | """
159 | if from_onnx:
160 | default_file_name = ONNX_WEIGHTS_NAME
161 | elif from_quantized:
162 | default_file_name = FURIOSA_QUANTIZED_FILE_NAME
163 | else:
164 | default_file_name = FURIOSA_ENF_FILE_NAME
165 |
166 | file_name = file_name or default_file_name
167 |
168 | # Load the model from local directory
169 | if Path(model_id).is_dir():
170 | file_path = Path(model_id) / file_name
171 | model_save_dir = model_id
172 | preprocessors = maybe_load_preprocessors(model_id)
173 | # Download the model from the hub
174 | else:
175 | file_path = hf_hub_download(
176 | repo_id=model_id,
177 | filename=file_name,
178 | subfolder=subfolder,
179 | use_auth_token=use_auth_token,
180 | revision=revision,
181 | cache_dir=cache_dir,
182 | force_download=force_download,
183 | local_files_only=local_files_only,
184 | )
185 | model_save_dir = Path(file_path).parent
186 | preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder)
187 |
188 | model = cls.load_model(file_path, input_shape_dict, output_shape_dict)
189 |
190 | return cls(
191 | model, config=config, model_save_dir=model_save_dir, compile=False, preprocessors=preprocessors, **kwargs
192 | )
193 |
194 | @classmethod
195 | def _from_transformers(
196 | cls,
197 | model_id: str,
198 | config: PretrainedConfig,
199 | use_auth_token: Optional[Union[bool, str]] = None,
200 | revision: Optional[str] = None,
201 | force_download: bool = False,
202 | cache_dir: Optional[str] = None,
203 | subfolder: str = "",
204 | local_files_only: bool = False,
205 | task: Optional[str] = None,
206 | **kwargs,
207 | ):
208 | """
209 | Export a vanilla Transformers model into an ONNX model using `transformers.onnx.export_onnx`.
210 |
211 | Arguments:
212 | model_id (`str` or `Path`):
213 | The directory from which to load the model.
214 | Can be either:
215 | - The model id of a pretrained model hosted inside a model repo on huggingface.co.
216 | - The path to a directory containing the model weights. save_dir (`str` or `Path`):
217 | The directory where the exported ONNX model should be saved, default to
218 | `transformers.file_utils.default_cache_path`, which is the cache directory for transformers.
219 | use_auth_token (`str` or `bool`):
220 | Is needed to load models from a private repository
221 | revision (`str`):
222 | Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id
223 | kwargs (`Dict`, *optional*):
224 | kwargs will be passed to the model during initialization
225 | """
226 | if task is None:
227 | task = cls.export_feature
228 |
229 | save_dir = TemporaryDirectory()
230 | save_dir_path = Path(save_dir.name)
231 |
232 | # Export the model to the ONNX format
233 | main_export(
234 | model_name_or_path=model_id,
235 | output=save_dir_path,
236 | task=task,
237 | do_validation=False,
238 | no_post_process=True,
239 | subfolder=subfolder,
240 | revision=revision,
241 | cache_dir=cache_dir,
242 | use_auth_token=use_auth_token,
243 | local_files_only=local_files_only,
244 | force_download=force_download,
245 | )
246 |
247 | config.save_pretrained(save_dir_path)
248 | maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder)
249 |
250 | return cls._from_pretrained(
251 | model_id=save_dir_path,
252 | config=config,
253 | from_onnx=True,
254 | use_auth_token=use_auth_token,
255 | revision=revision,
256 | force_download=force_download,
257 | cache_dir=cache_dir,
258 | local_files_only=local_files_only,
259 | **kwargs,
260 | )
261 |
262 | @classmethod
263 | def load_model(
264 | cls,
265 | model_path: Union[str, Path],
266 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
267 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
268 | ):
269 | """
270 | Loads and processes a model for use with the Furiosa framework.
271 |
272 | Args:
273 | model_path (Union[str, Path]):
274 | The path to the model file.
275 | input_shape_dict (Dict[str, Tuple[int]], defaults to None):
276 | A dictionary specifying the input shapes for dynamic models.
277 | output_shape_dict (Dict[str, Tuple[int]], defaults to None):
278 | A dictionary specifying the output shapes for dynamic models.
279 |
280 | Returns:
281 | If the model is in the 'onnx' or 'dfg' format, the compiled model in the Furiosa binary format is returned.
282 | If the model is in the 'enf' format, the model path is returned as-is.
283 |
284 | Raises:
285 | ValueError: If the model format is not supported or invalid.
286 | """
287 | model_path = Path(model_path)
288 | if model_path.suffix in (".onnx", ".dfg"):
289 | compiled_model = cls.compile(model_path, input_shape_dict, output_shape_dict)
290 | return compiled_model
291 | if model_path.suffix == ".enf":
292 | return model_path
293 |
294 | raise ValueError("Invalid model types. Supported formats are 'onnx', 'dfg', or 'enf'.")
295 |
296 | @classmethod
297 | def compile(
298 | cls,
299 | model: Union[str, Path, bytes],
300 | input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
301 | output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
302 | ):
303 | """
304 | Compiles the model to the Furiosa binary format.
305 |
306 | Args:
307 | model (Union[str, Path]):
308 | The model to be compiled.
309 | input_shape_dict (Dict[str, Tuple[int]], defaults to None):
310 | A dictionary specifying the input shapes for dynamic models.
311 | output_shape_dict (Dict[str, Tuple[int]], defaults to None):
312 | A dictionary specifying the output shapes for dynamic models.
313 | Returns:
314 | The compiled model in the Furiosa binary format.
315 |
316 | Raises:
317 | ValueError: If the model format is not supported or invalid.
318 | """
319 | if isinstance(model, (str, Path)):
320 | model = cls._reshape(model, input_shape_dict, output_shape_dict)
321 | input_bytes = Path(model).read_bytes()
322 | else:
323 | input_bytes = model
324 |
325 | logger.info("Compiling the model...")
326 | compiled_model = compile(input_bytes, target_ir="enf")
327 | return compiled_model
328 |
329 | @staticmethod
330 | def _check_is_dynamic(model_path: Union[str, Path]):
331 | is_dynamic = False
332 | if Path(model_path).suffix == ".onnx":
333 | model = onnx.load(model_path)
334 | is_dynamic = any(any(dim.dim_param for dim in inp.type.tensor_type.shape.dim) for inp in model.graph.input)
335 |
336 | return is_dynamic
337 |
338 | @staticmethod
339 | def optimize_model(model: onnx.ModelProto) -> Path:
340 | return optimizer.frontend.onnx.optimize_model(model)
341 |
342 | @staticmethod
343 | def _update_inputs_outputs_dims(
344 | model_path: Union[str, Path],
345 | input_shape_dict: Dict[str, Tuple[int]],
346 | output_shape_dict: Dict[str, Tuple[int]],
347 | ) -> onnx.ModelProto:
348 | from onnx import shape_inference
349 | from onnx.tools import update_model_dims
350 |
351 | model = onnx.load(model_path)
352 |
353 | updated_model = update_model_dims.update_inputs_outputs_dims(model, input_shape_dict, output_shape_dict)
354 | return shape_inference.infer_shapes(updated_model)
355 |
356 | @classmethod
357 | def _reshape(
358 | cls,
359 | model_path: Union[str, Path],
360 | input_shape_dict: Dict[str, Tuple[int]],
361 | output_shape_dict: Dict[str, Tuple[int]],
362 | ) -> Union[str, Path]:
363 | """
364 | Propagates the given input shapes on the model's layers, fixing the input shapes of the model.
365 |
366 | Args:
367 | model_path (Union[str, Path]):
368 | Path to the model.
369 | input_shape_dict (Dict[str, Tuple[int]]):
370 | Input shapes for the model.
371 | output_shape_dict (Dict[str, Tuple[int]]):
372 | Output shapes for the model.
373 |
374 | Returns:
375 | Union[str, Path]:
376 | Path to the model after updating the input shapes.
377 |
378 | Raises:
379 | ValueError: If the model provided has dynamic axes in input/output and no input/output shape is provided.
380 | """
381 | if isinstance(model_path, (str, Path)) and Path(model_path).suffix == ".onnx":
382 | is_dynamic = cls._check_is_dynamic(model_path)
383 | if is_dynamic:
384 | if input_shape_dict is None or output_shape_dict is None:
385 | raise ValueError(
386 | "The model provided has dynamic axes in input/output. Please provide input and output shapes for compilation."
387 | )
388 |
389 | model = cls._update_inputs_outputs_dims(model_path, input_shape_dict, output_shape_dict)
390 | optimized_model = cls.optimize_model(model)
391 |
392 | static_model_path = Path(model_path).parent / ONNX_WEIGHTS_NAME_STATIC
393 | onnx.save(optimized_model, static_model_path)
394 |
395 | return static_model_path
396 |
397 | return model_path
398 |
399 | def forward(self, *args, **kwargs):
400 | raise NotImplementedError
401 |
--------------------------------------------------------------------------------