├── .github
    └── workflows
    │   ├── delete_doc_comment_trigger.yml
    │   ├── upload_pr_documentation.yml
    │   ├── delete_doc_comment.yml
    │   ├── security.yml
    │   ├── test.yml
    │   ├── check_code_quality.yml
    │   └── build_pr_documentation.yml
├── docs
    ├── README.md
    └── source
    │   ├── _toctree.yml
    │   ├── package_reference
    │       ├── quantization.mdx
    │       ├── modeling.mdx
    │       └── configuration.mdx
    │   ├── usage_guides
    │       ├── overview.mdx
    │       ├── models.mdx
    │       └── quantization.mdx
    │   ├── installation.md
    │   └── index.md
├── optimum
    └── furiosa
    │   ├── version.py
    │   ├── quantization_base.py
    │   ├── __init__.py
    │   ├── utils.py
    │   ├── modeling.py
    │   ├── configuration.py
    │   ├── quantization.py
    │   └── modeling_base.py
├── notebooks
    └── quantization
    │   └── image-classification
    │       └── __init__.py
├── setup.cfg
├── pyproject.toml
├── examples
    └── quantization
    │   └── image-classification
    │       ├── README.md
    │       └── run_image_classification.py
├── Makefile
├── README.md
├── .gitignore
├── setup.py
├── tests
    ├── test_quantization.py
    └── test_modeling.py
└── LICENSE


/.github/workflows/delete_doc_comment_trigger.yml:
--------------------------------------------------------------------------------
 1 | name: Delete doc comment trigger
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types: [ closed ]
 6 | 
 7 | 
 8 | jobs:
 9 |   delete:
10 |     uses: huggingface/doc-builder/.github/workflows/delete_doc_comment_trigger.yml@main
11 |     with:
12 |       pr_number: ${{ github.event.number }}
13 | 


--------------------------------------------------------------------------------
/.github/workflows/upload_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Upload PR Documentation
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Build PR Documentation"]
 6 |     types:
 7 |       - completed
 8 | 
 9 | jobs:
10 |   build:
11 |     uses: huggingface/doc-builder/.github/workflows/upload_pr_documentation.yml@main
12 |     with:
13 |       package_name: optimum-furiosa
14 |     secrets:
15 |       hf_token: ${{ secrets.HF_DOC_BUILD_PUSH }}
16 |       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
17 | 


--------------------------------------------------------------------------------
/.github/workflows/delete_doc_comment.yml:
--------------------------------------------------------------------------------
 1 | name: Delete PR documentation
 2 | 
 3 | on:
 4 |   workflow_run:
 5 |     workflows: ["Delete doc comment trigger"]
 6 |     types:
 7 |       - completed
 8 |     paths:
 9 |       - "optimum/**.py"
10 |       - "docs/**"
11 |       - ".github/workflows/build_pr_documentation.yml"
12 |       - ".github/workflows/delete_doc_comment.yml"
13 | 
14 | 
15 | jobs:
16 |   delete:
17 |     uses: huggingface/doc-builder/.github/workflows/delete_doc_comment.yml@main
18 |     secrets:
19 |       comment_bot_token: ${{ secrets.COMMENT_BOT_TOKEN }}
20 | 


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # Optimum Furiosa documentation
 2 | 
 3 | 1. Setup
 4 | ```bash
 5 | pip install hf-doc-builder==0.4.0 watchdog --upgrade
 6 | ```
 7 | 
 8 | 2. Local Development
 9 | ```bash
10 | doc-builder preview optimum.furiosa docs/source/
11 | ```
12 | 3. Build Docs
13 | ```bash
14 | doc-builder build optimum.furiosa docs/source/ --build_dir build/ 
15 | ```
16 | 
17 | ## Add assets/Images
18 | 
19 | Adding images/assets is only possible through `https://` links meaning you need to use `https://raw.githubusercontent.com/huggingface/optimum-furiosa/main/docs/assets/` prefix.
20 | 


--------------------------------------------------------------------------------
/docs/source/_toctree.yml:
--------------------------------------------------------------------------------
 1 | - sections:
 2 |   - local: index
 3 |     title: 🤗 Optimum Furiosa
 4 |   - local: installation
 5 |     title: Installation
 6 |   - sections:
 7 |     - local: usage_guides/overview
 8 |       title: Overview
 9 |     - local: usage_guides/models
10 |       title: Modeling
11 |     - local: usage_guides/quantization
12 |       title: Quantization
13 |     title: How-To Guides
14 |   - sections:
15 |     - local: package_reference/modeling
16 |       title: Models
17 |     - local: package_reference/configuration
18 |       title: Configuration
19 |     - local: package_reference/quantization
20 |       title: Quantization
21 |     title: Reference
22 |   title: Optimum Furiosa
23 |   isExpanded: false


--------------------------------------------------------------------------------
/optimum/furiosa/version.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | __version__ = "0.1.0.dev0"
16 | 


--------------------------------------------------------------------------------
/docs/source/package_reference/quantization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Quantization
14 | 
15 | ##  FuriosaAIQuantizer
16 | 
17 | [[autodoc]] FuriosaAIQuantizer


--------------------------------------------------------------------------------
/notebooks/quantization/image-classification/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 4 | #
 5 | #  Licensed under the Apache License, Version 2.0 (the "License");
 6 | #  you may not use this file except in compliance with the License.
 7 | #  You may obtain a copy of the License at
 8 | #
 9 | #      http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | #  Unless required by applicable law or agreed to in writing, software
12 | #  distributed under the License is distributed on an "AS IS" BASIS,
13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | #  See the License for the specific language governing permissions and
15 | #  limitations under the License.
16 | 


--------------------------------------------------------------------------------
/docs/source/usage_guides/overview.mdx:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2022 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Overview
18 | 
19 | Welcome to the 🤗 Optimum Furiosa how-to guides!
20 | These guides tackle more advanced topics and will show you how to easily get the best from NPUs:
21 | - [Accelerating inference](./models)
22 | - [Quantization](./quantization)
23 | 


--------------------------------------------------------------------------------
/.github/workflows/security.yml:
--------------------------------------------------------------------------------
 1 | name: Security Checks
 2 | 
 3 | on:
 4 |   push:
 5 | 
 6 | permissions:
 7 |   contents: read
 8 | 
 9 | jobs:
10 |   secrets:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - shell: bash
14 |         run: |
15 |           if [ "${{ github.event_name }}" == "push" ]; then
16 |             echo "depth=$(($(jq length <<< '${{ toJson(github.event.commits) }}') + 2))" >> $GITHUB_ENV
17 |             echo "branch=${{ github.ref_name }}" >> $GITHUB_ENV
18 |           fi
19 |           if [ "${{ github.event_name }}" == "pull_request" ]; then
20 |             echo "depth=$((${{ github.event.pull_request.commits }}+2))" >> $GITHUB_ENV
21 |             echo "branch=${{ github.event.pull_request.head.ref }}" >> $GITHUB_ENV
22 |           fi
23 |       - name: Checkout code
24 |         uses: actions/checkout@v4
25 |         with:
26 |           ref: ${{env.branch}}
27 |           fetch-depth: ${{env.depth}}
28 |       - name: Scan for secrets
29 |         uses: trufflesecurity/trufflehog@main
30 | 


--------------------------------------------------------------------------------
/docs/source/installation.md:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Installation
14 | 
15 | To install 🤗 Optimum Furiosa, you first need to install Furiosa SDK drivers by following the official [installation guide](https://furiosa-ai.github.io/docs/latest/en/software/installation.html). Then, 🤗 Optimum Furiosa can be installed using `pip` as follows:
16 | 
17 | ```bash
18 | python -m pip install git+https://github.com/huggingface/optimum-furiosa.git
19 | ```
20 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | jobs:
12 |   build:
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         python-version: [3.8]
17 |         os: [self-hosted]
18 |     runs-on: ${{ matrix.os }}
19 |     steps:
20 |     - uses: actions/checkout@v2
21 |     - name: Setup Python ${{ matrix.python-version }}
22 |       uses: actions/setup-python@v2
23 |       with:
24 |         python-version: ${{ matrix.python-version }}
25 |     - name: Create and start a virtual environment
26 |       run: |
27 |         python -m venv venv
28 |         source venv/bin/activate
29 |     - name: Install dependencies
30 |       run: |
31 |         source venv/bin/activate
32 |         python -m pip install --upgrade pip
33 |         pip install .[testing]
34 |     - name: Test with Pytest
35 |       run: |
36 |         source venv/bin/activate
37 |         pytest -s tests/
38 |     - name: Cleanup
39 |       run: |
40 |         rm -rf venv


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | default_section = FIRSTPARTY
 3 | ensure_newline_before_comments = True
 4 | force_grid_wrap = 0
 5 | include_trailing_comma = True
 6 | known_first_party = transformers
 7 | known_third_party =
 8 |     absl
 9 |     conllu
10 |     datasets
11 |     elasticsearch
12 |     fairseq
13 |     faiss-cpu
14 |     fastprogress
15 |     fire
16 |     fugashi
17 |     git
18 |     h5py
19 |     matplotlib
20 |     nltk
21 |     numpy
22 |     packaging
23 |     pandas
24 |     PIL
25 |     psutil
26 |     pytest
27 |     pytorch_lightning
28 |     rouge_score
29 |     sacrebleu
30 |     seqeval
31 |     sklearn
32 |     streamlit
33 |     tensorboardX
34 |     tensorflow
35 |     tensorflow_datasets
36 |     timeout_decorator
37 |     torch
38 |     torchaudio
39 |     torchtext
40 |     torchvision
41 |     torch_xla
42 |     tqdm
43 | 
44 | line_length = 119
45 | lines_after_imports = 2
46 | multi_line_output = 3
47 | use_parentheses = True
48 | 
49 | [flake8]
50 | ignore = E203, E501, E741, W503, W605
51 | max-line-length = 119
52 | 
53 | [tool:pytest]
54 | doctest_optionflags=NUMBER NORMALIZE_WHITESPACE ELLIPSIS


--------------------------------------------------------------------------------
/docs/source/package_reference/modeling.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Models
14 | 
15 | ## Generic model classes
16 | 
17 | The following Furiosa classes are available for instantiating a base model class without a specific head.
18 | 
19 | ### FuriosaAIModel
20 | 
21 | [[autodoc]] FuriosaAIModel
22 | 
23 | ## Computer vision
24 | 
25 | The following classes are available for the following computer vision tasks.
26 | 
27 | ### FuriosaAIModelForImageClassification
28 | 
29 | [[autodoc]] FuriosaAIModelForImageClassification
30 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | [tool.black]
16 | line-length = 119
17 | target-version = ['py37']
18 | 
19 | [tool.ruff]
20 | # Never enforce `E501` (line length violations).
21 | ignore = ["C901", "E501", "E741", "W605"]
22 | select = ["C", "E", "F", "I", "W"]
23 | line-length = 119
24 | 
25 | # Ignore import violations in all `__init__.py` files.
26 | [tool.ruff.per-file-ignores]
27 | "__init__.py" = ["E402", "F401", "F403", "F811"]
28 | 
29 | [tool.ruff.isort]
30 | lines-after-imports = 2
31 | known-first-party = ["optimum"]


--------------------------------------------------------------------------------
/.github/workflows/check_code_quality.yml:
--------------------------------------------------------------------------------
 1 | name: Check code quality
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ main ]
 6 |     paths:
 7 |       - "optimum/**.py"
 8 |       - "tests/**.py"
 9 |       - "examples/**.py"
10 | 
11 |   pull_request:
12 |     branches: [ main ]
13 |     paths:
14 |       - "optimum/**.py"
15 |       - "tests/**.py"
16 |       - "examples/**.py"
17 | 
18 | jobs:
19 |   build:
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         python-version: ['3.8']
24 |         os: [ubuntu-22.04]
25 | 
26 |     runs-on: ${{ matrix.os }}
27 |     steps:
28 |     - uses: actions/checkout@v2
29 |     - name: Setup Python ${{ matrix.python-version }}
30 |       uses: actions/setup-python@v2
31 |       with:
32 |         python-version: ${{ matrix.python-version }}
33 |     - name: Create and start a virtual environment
34 |       run: |
35 |         python -m venv venv
36 |         source venv/bin/activate
37 |     - name: Install dependencies
38 |       run: |
39 |         source venv/bin/activate
40 |         pip install --upgrade pip
41 |         pip install black ruff
42 |     - name: Check style with black
43 |       run: |
44 |         source venv/bin/activate
45 |         black --check .
46 |     - name: Check style with ruff
47 |       run: |
48 |         source venv/bin/activate
49 |         ruff .


--------------------------------------------------------------------------------
/docs/source/package_reference/configuration.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Configuration
14 | 
15 | The configuration classes are the way to specify how a task should be done. Here is how a quantization can be configered:
16 | 
17 | 1. Quantization: Performed by the [`~furiosa.FuriosaQuantizer`], quantization can be set using a [`~furiosa.configuration.QuantizationConfig`]. A calibration step is required in some cases (post training static quantization), which can be specified using a [`~furiosa.configuration.CalibrationConfig`].
18 | 
19 | ## QuantizationConfig
20 | 
21 | [[autodoc]] configuration.QuantizationConfig
22 | 
23 | ## CalibrationConfig
24 | 
25 | [[autodoc]] configuration.CalibrationConfig
26 | 
27 | ## FuriosaConfig
28 | 
29 | [[autodoc]] configuration.FuriosaAIConfig


--------------------------------------------------------------------------------
/examples/quantization/image-classification/README.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # Image classification
18 | 
19 | The script [`run_image_classification.py`](https://github.com/huggingface/optimum-furiosa/blob/main/examples/quantization/image_classification/run_image_classification.py) allows us to apply different quantization using [FuriosaAI SDK](https://furiosa-ai.github.io/docs/latest/en/software/quantization.html) for image classification tasks.
20 | 
21 | The following example applies quantization on a Resnet model fine-tuned on the beans classification dataset.
22 | 
23 | ```bash
24 | python run_image_classification.py \
25 |     --model_name_or_path eugenecamus/resnet-50-base-beans-demo \
26 |     --dataset_name beans \
27 |     --do_eval \
28 |     --output_dir /tmp/image_classification_resnet_beans
29 | ```
30 | 


--------------------------------------------------------------------------------
/optimum/furiosa/quantization_base.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | import logging
16 | from abc import ABC, abstractmethod
17 | from pathlib import Path
18 | from typing import Optional, Union
19 | 
20 | 
21 | logger = logging.getLogger(__name__)
22 | 
23 | 
24 | class OptimumQuantizer(ABC):
25 |     @classmethod
26 |     def from_pretrained(
27 |         cls,
28 |         model_or_path: Union[str, Path],
29 |         file_name: Optional[str] = None,
30 |     ):
31 |         """Overwrite this method in subclass to define how to load your model from pretrained"""
32 |         raise NotImplementedError(
33 |             "Overwrite this method in subclass to define how to load your model from pretrained for quantization"
34 |         )
35 | 
36 |     @abstractmethod
37 |     def quantize(self, save_dir: Union[str, Path], file_prefix: Optional[str] = None, **kwargs):
38 |         """Overwrite this method in subclass to define how to quantize your model for quantization"""
39 |         raise NotImplementedError(
40 |             "Overwrite this method in subclass to define how to quantize your model for quantization"
41 |         )
42 | 


--------------------------------------------------------------------------------
/optimum/furiosa/__init__.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | from typing import TYPE_CHECKING
16 | 
17 | from transformers.utils import OptionalDependencyNotAvailable, _LazyModule
18 | 
19 | from .utils import FURIOSA_ENF_FILE_NAME
20 | 
21 | 
22 | _import_structure = {
23 |     "configuration": [
24 |         "CalibrationConfig",
25 |         "AutoCalibrationConfig",
26 |         "QuantizationMode",
27 |         "FuriosaAIConfig",
28 |         "QuantizationConfig",
29 |     ],
30 |     "modeling": [
31 |         "FuriosaAIModel",
32 |         "FuriosaAIModelForImageClassification",
33 |     ],
34 |     "quantization": ["FuriosaAIQuantizer"],
35 |     "utils": [
36 |         "export_model_to_onnx",
37 |     ],
38 |     "version": ["__version__"],
39 | }
40 | 
41 | # Direct imports for type-checking
42 | if TYPE_CHECKING:
43 |     from .configuration import FuriosaAIConfig, QuantizationConfig
44 |     from .modeling import (
45 |         FuriosaAIModelForImageClassification,
46 |     )
47 |     from .quantization import FuriosaAIQuantizer
48 |     from .utils import export_model_to_onnx
49 |     from .version import __version__
50 | else:
51 |     import sys
52 | 
53 |     sys.modules[__name__] = _LazyModule(__name__, globals()["__file__"], _import_structure, module_spec=__spec__)
54 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | SHELL := /bin/bash
15 | CURRENT_DIR = $(shell pwd)
16 | DEFAULT_CLONE_URL := https://github.com/huggingface/optimum-furiosa.git
17 | # If CLONE_URL is empty, revert to DEFAULT_CLONE_URL
18 | REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))
19 | 
20 | .PHONY:	style test
21 | 
22 | # Run code quality checks
23 | style_check:
24 | 	black --check .
25 | 	ruff .
26 | 
27 | style:
28 | 	black .
29 | 	ruff . --fix
30 | 
31 | # Run tests for the library
32 | test:
33 | 	python -m pytest tests
34 | 
35 | # Utilities to release to PyPi
36 | build_dist_install_tools:
37 | 	pip install build
38 | 	pip install twine
39 | 
40 | build_dist:
41 | 	rm -fr build
42 | 	rm -fr dist
43 | 	python -m build
44 | 
45 | pypi_upload: build_dist
46 | 	python -m twine upload dist/*
47 | 
48 | build_doc_docker_image:
49 | 	docker build -t doc_maker --build-arg commit_sha=$(COMMIT_SHA_SUBPACKAGE) --build-arg clone_url=$(REAL_CLONE_URL) ./docs
50 | 
51 | doc: build_doc_docker_image
52 | 	@test -n "$(BUILD_DIR)" || (echo "BUILD_DIR is empty." ; exit 1)
53 | 	@test -n "$(VERSION)" || (echo "VERSION is empty." ; exit 1)
54 | 	docker run -v $(CURRENT_DIR):/doc_folder --workdir=/doc_folder doc_maker \
55 | 	doc-builder build optimum.furiosa /optimum-furiosa/docs/source/ \
56 | 		--build_dir $(BUILD_DIR) \
57 | 		--version $(VERSION) \
58 | 		--version_tag_suffix "" \
59 | 		--html \
60 | 		--clean


--------------------------------------------------------------------------------
/docs/source/index.md:
--------------------------------------------------------------------------------
 1 | <!---
 2 | Copyright 2023 The HuggingFace Team. All rights reserved.
 3 | 
 4 | Licensed under the Apache License, Version 2.0 (the "License");
 5 | you may not use this file except in compliance with the License.
 6 | You may obtain a copy of the License at
 7 | 
 8 |     http://www.apache.org/licenses/LICENSE-2.0
 9 | 
10 | Unless required by applicable law or agreed to in writing, software
11 | distributed under the License is distributed on an "AS IS" BASIS,
12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | See the License for the specific language governing permissions and
14 | limitations under the License.
15 | -->
16 | 
17 | # 🤗 Optimum Furiosa
18 | 
19 | 🤗 Optimum Furiosa is the interface between the 🤗 Transformers library and Furiosa NPUs [Furiosa Warboy](https://furiosa-ai.github.io/docs/latest/en/npu/intro.html#furiosaai-warboy).
20 | It provides a set of tools enabling easy model loading and inference for different downstream tasks.
21 | 
22 | <div class="mt-10">
23 |   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-2 md:gap-y-4 md:gap-x-5">
24 |     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./usage_guides/overview">
25 |       <div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">
26 |         How-to guides
27 |       </div>
28 |       <p class="text-gray-700">
29 |         Practical guides to help you achieve a specific goal. Take a look at these guides to learn how to use 🤗 Optimum
30 |         Furiosa to solve real-world problems.
31 |       </p>
32 |     </a>
33 |     <a
34 |       class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg"
35 |       href="./package_reference/modeling"
36 |     >
37 |       <div class="w-full text-center bg-gradient-to-br from-purple-400 to-purple-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">
38 |         Reference
39 |       </div>
40 |       <p class="text-gray-700">Technical descriptions of how the classes and methods of 🤗 Optimum Furiosa work.</p>
41 |     </a>
42 |   </div>
43 | </div>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Test](https://github.com/huggingface/optimum-furiosa/actions/workflows/test.yml/badge.svg)](https://github.com/huggingface/optimum-furiosa/actions/workflows/test.yml)
 2 | 
 3 | 
 4 | # optimum-furiosa
 5 | Accelerated inference of 🤗 models using FuriosaAI NPU chips.
 6 | 
 7 | ## Furiosa SDK setup
 8 | A Furiosa SDK environment needs to be enabled to use this library. Please refer to Furiosa's [Installation](https://furiosa-ai.github.io/docs/latest/en/software/installation.html) guide.
 9 | 
10 | ## Install
11 | Optimum Furiosa is a fast-moving project, and you may want to install from source.
12 | 
13 | `pip install git+https://github.com/huggingface/optimum-furiosa.git`
14 | 
15 | ### Installing in developer mode
16 | 
17 | If you are working on the `optimum-furiosa` code then you should use an editable install
18 | by cloning and installing `optimum` and `optimum-furiosa`:
19 | 
20 | ```
21 | git clone https://github.com/huggingface/optimum
22 | git clone https://github.com/huggingface/optimum-furiosa
23 | pip install -e optimum -e optimum-furiosa
24 | ```
25 | 
26 | Now whenever you change the code, you'll be able to run with those changes instantly.
27 | 
28 | 
29 | ## How to use it?
30 | To load a model and run inference with Furiosa NPU, you can just replace your `AutoModelForXxx` class with the corresponding `FuriosaAIModelForXxx` class. 
31 | 
32 | ```diff
33 | import requests
34 | from PIL import Image
35 | 
36 | - from transformers import AutoModelForImageClassification
37 | + from optimum.furiosa import FuriosaAIModelForImageClassification
38 | from transformers import AutoFeatureExtractor, pipeline
39 | 
40 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
41 | image = Image.open(requests.get(url, stream=True).raw)
42 | 
43 | model_id = "microsoft/resnet-50"
44 | - model = AutoModelForImageClassification.from_pretrained(model_id)
45 | + model = FuriosaAIModelForImageClassification.from_pretrained(model_id, export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},)
46 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
47 | cls_pipe = pipeline("image-classification", model=model, feature_extractor=feature_extractor)
48 | outputs = cls_pipe(image)
49 | ```
50 | 
51 | If you find any issue while using those, please open an issue or a pull request.
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/


--------------------------------------------------------------------------------
/.github/workflows/build_pr_documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Build PR documentation
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [ main ]
 6 |     paths:
 7 |       - "optimum/**.py"
 8 |       - "docs/**"
 9 |       - ".github/workflows/build_pr_documentation.yml"
10 |       - ".github/workflows/delete_doc_comment.yml"
11 | 
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
14 |   cancel-in-progress: true
15 | 
16 | jobs:
17 |   build_documentation:
18 |     runs-on: self-hosted
19 |     env:
20 |       COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
21 |       PR_NUMBER: ${{ github.event.number }}
22 |       EVENT_CONTEXT: ${{ toJSON(github.event) }}
23 |       PR_CLONE_URL: ${{ github.event.pull_request.head.repo.clone_url }}
24 | 
25 |     steps:
26 |       - uses: actions/checkout@v2
27 |         with:
28 |           repository: 'huggingface/doc-builder'
29 |           path: doc-builder
30 | 
31 |       - uses: actions/checkout@v2
32 |         with:
33 |           repository: 'huggingface/optimum-furiosa'
34 |           path: optimum-furiosa
35 | 
36 |       - name: Create and start a virtual environment
37 |         run: |
38 |           python -m venv venv_docs
39 |           source venv_docs/bin/activate
40 |           python -m pip install --upgrade pip
41 | 
42 |       - name: Setup environment
43 |         run: |
44 |           source venv_docs/bin/activate
45 |           pip uninstall -y doc-builder
46 |           cd doc-builder
47 |           git pull origin main
48 |           pip install .
49 |           pip install black
50 |           cd ..
51 |           cd optimum-furiosa
52 |           pip install .
53 |           cd ..
54 | 
55 |       - name: Make documentation
56 |         run: |
57 |           source venv_docs/bin/activate
58 |           cd optimum-furiosa
59 |           doc-builder build optimum.furiosa docs/source/ --build_dir furiosa-doc-build --version pr_$PR_NUMBER --version_tag_suffix "" --html --clean
60 |           cd ..
61 | 
62 |       - name: Save commit_sha & pr_number
63 |         run: |
64 |           source venv_docs/bin/activate
65 |           cd optimum-furiosa/furiosa-doc-build
66 |           sudo mv optimum.furiosa optimum-furiosa
67 |           echo ${{ env.COMMIT_SHA }} > ./commit_sha
68 |           echo ${{ env.PR_NUMBER }} > ./pr_number
69 | 
70 |       - uses: actions/upload-artifact@v3
71 |         with:
72 |           name: doc-build-artifact
73 |           path: optimum-furiosa/furiosa-doc-build/
74 | 
75 |       - name: Cleanup
76 |         run: |
77 |          rm -rf venv_docs
78 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | from setuptools import find_namespace_packages, setup
 4 | 
 5 | 
 6 | # Ensure we match the version set in optimum/furiosa/version.py
 7 | try:
 8 |     filepath = "optimum/furiosa/version.py"
 9 |     with open(filepath) as version_file:
10 |         (__version__,) = re.findall('__version__ = "(.*)"', version_file.read())
11 | except Exception as error:
12 |     assert False, "Error: Could not open '%s' due %s\n" % (filepath, error)
13 | 
14 | INSTALL_REQUIRE = [
15 |     "optimum==1.8.0",
16 |     "transformers>=4.20.0",
17 |     "datasets>=1.4.0",
18 |     "furiosa-optimizer",
19 |     "furiosa-quantizer==0.9.0",
20 |     "furiosa-quantizer-impl==0.9.1",
21 |     "furiosa-sdk",
22 |     "onnx>=1.12.0",
23 |     "sentencepiece",
24 |     "scipy",
25 | ]
26 | 
27 | TESTS_REQUIRE = ["pytest", "parameterized", "Pillow", "evaluate", "diffusers", "py-cpuinfo"]
28 | 
29 | QUALITY_REQUIRE = ["black~=23.1", "ruff>=0.0.241"]
30 | 
31 | EXTRA_REQUIRE = {
32 |     "testing": [
33 |         "filelock",
34 |         "GitPython",
35 |         "parameterized",
36 |         "psutil",
37 |         "pytest",
38 |         "pytest-pythonpath",
39 |         "pytest-xdist",
40 |         "Pillow",
41 |         "librosa",
42 |         "soundfile",
43 |     ],
44 |     "quality": QUALITY_REQUIRE,
45 | }
46 | 
47 | setup(
48 |     name="optimum-furiosa",
49 |     version=__version__,
50 |     description="Optimum Library is an extension of the Hugging Face Transformers library, providing a framework to "
51 |     "integrate third-party libraries from Hardware Partners and interface with their specific "
52 |     "functionality.",
53 |     long_description=open("README.md", "r", encoding="utf-8").read(),
54 |     long_description_content_type="text/markdown",
55 |     classifiers=[
56 |         "Development Status :: 5 - Production/Stable",
57 |         "License :: OSI Approved :: Apache Software License",
58 |         "Intended Audience :: Developers",
59 |         "Intended Audience :: Education",
60 |         "Intended Audience :: Science/Research",
61 |         "Operating System :: OS Independent",
62 |         "Programming Language :: Python :: 3.7",
63 |         "Programming Language :: Python :: 3.8",
64 |         "Programming Language :: Python :: 3.9",
65 |         "Topic :: Scientific/Engineering :: Artificial Intelligence",
66 |     ],
67 |     keywords="transformers, quantization, pruning, knowledge distillation, optimization, training",
68 |     url="https://huggingface.co/hardware",
69 |     author="HuggingFace Inc. Special Ops Team",
70 |     author_email="hardware@huggingface.co",
71 |     license="Apache",
72 |     packages=find_namespace_packages(include=["optimum*"]),
73 |     install_requires=INSTALL_REQUIRE,
74 |     extras_require=EXTRA_REQUIRE,
75 |     include_package_data=True,
76 |     zip_safe=False,
77 |     entry_points={"console_scripts": ["optimum-cli=optimum.commands.optimum_cli:main"]},
78 | )
79 | 


--------------------------------------------------------------------------------
/docs/source/usage_guides/models.mdx:
--------------------------------------------------------------------------------
 1 | # Optimum Inference with Furiosa NPU
 2 | 
 3 | Optimum Furiosa is a utility package for building and running inference with Furiosa NPUs.
 4 | Optimum can be used to load optimized models from the [Hugging Face Hub](hf.co/models) and create pipelines
 5 | to run accelerated inference without rewriting your APIs.
 6 | 
 7 | ## Switching from Transformers to Optimum Furiosa
 8 | 
 9 | The `optimum.furiosa.FuriosaAIModelForXXX` model classes are API compatible with Hugging Face models. This
10 | means you can just replace your `AutoModelForXXX` class with the corresponding `FuriosaAIModelForXXX` class in `optimum.furiosa`.
11 | 
12 | You do not need to adapt your code to get it to work with `FuriosaAIModelForXXX` classes:
13 | 
14 | Because the model you want to work with might not be already converted to ONNX,  [`~optimum.furiosa.FuriosaAIModel`]
15 | includes a method to convert vanilla Hugging Face models to ONNX ones. Simply pass `export=True` to the
16 | [`~optimum.furiosa.FuriosaAIModel.from_pretrained`] method, and your model will be loaded and converted to ONNX on-the-fly:
17 | 
18 | ### Loading and inference of a vanilla Transformers model
19 | 
20 | ```diff
21 | import requests
22 | from PIL import Image
23 | 
24 | - from transformers import AutoModelForImageClassification
25 | + from optimum.furiosa import FuriosaAIModelForImageClassification
26 | from transformers import AutoFeatureExtractor, pipeline
27 | 
28 | url = "http://images.cocodataset.org/val2017/000000039769.jpg"
29 | image = Image.open(requests.get(url, stream=True).raw)
30 | 
31 | model_id = "microsoft/resnet-50"
32 | - model = AutoModelForImageClassification.from_pretrained(model_id)
33 | + model = FuriosaAIModelForImageClassification.from_pretrained(model_id, export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},)
34 | feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
35 | cls_pipe = pipeline("image-classification", model=model, feature_extractor=feature_extractor)
36 | outputs = cls_pipe(image)
37 | ```
38 | 
39 | 
40 | ### Pushing compiled models to the Hugging Face Hub
41 | 
42 | It is also possible, just as with regular [`~transformers.PreTrainedModel`]s, to push your `FurisoaAIModelForXXX` to the
43 | [Hugging Face Model Hub](https://hf.co/models):
44 | 
45 | ```python
46 | >>> from optimum.furiosa import FuriosaAIModelForImageClassification
47 | 
48 | >>> # Load the model from the hub
49 | >>> model = FuriosaAIModelForImageClassification.from_pretrained(
50 | ...     "microsoft/resnet-50", export=True, input_shape_dict={"pixel_values": [1, 3, 224, 224]}, output_shape_dict={"logits": [1, 1000]},
51 | ... )
52 | 
53 | >>> # Save the converted model
54 | >>> model.save_pretrained("a_local_path_for_compiled_model")
55 | 
56 | # Push the compiled model to HF Hub
57 | >>> model.push_to_hub(  # doctest: +SKIP
58 | ...   "a_local_path_for_compiled_model", repository_id="my-furiosa-repo", use_auth_token=True
59 | ... )
60 | ```


--------------------------------------------------------------------------------
/docs/source/usage_guides/quantization.mdx:
--------------------------------------------------------------------------------
 1 | <!--Copyright 2022 The HuggingFace Team. All rights reserved.
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 4 | the License. You may obtain a copy of the License at
 5 | 
 6 | http://www.apache.org/licenses/LICENSE-2.0
 7 | 
 8 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 9 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
10 | specific language governing permissions and limitations under the License.
11 | -->
12 | 
13 | # Quantization
14 | 
15 | 🤗 Optimum provides an `optimum.furiosa` package that enables you to apply quantization on many models hosted on
16 | the Hugging Face Hub using the [Furiosa](https://furiosa-ai.github.io/docs/latest/en/software/quantization.html)
17 | quantization tool.
18 | 
19 | The quantization process is abstracted via the [`~optimum.furiosa.FuriosaAIConfig`] and
20 | the [`~optimum.furiosa.FuriosaAIQuantizer`] classes. The former allows you to specify how quantization should be done,
21 | while the latter effectively handles quantization.
22 | 
23 | ## Static Quantization example
24 | 
25 | The [`~optimum.furiosa.FuriosaAIQuantizer`] class can be used to quantize statically your ONNX model. Below you will find
26 | an easy end-to-end example on how to quantize statically
27 | [eugenecamus/resnet-50-base-beans-demo](https://huggingface.co/eugenecamus/resnet-50-base-beans-demo).
28 | 
29 | ```python
30 | >>> from functools import partial
31 | >>> from pathlib import Path
32 | >>> from transformers import AutoFeatureExtractor
33 | >>> from optimum.furiosa import FuriosaAIQuantizer, FuriosaAIModelForImageClassification
34 | >>> from optimum.furiosa.configuration import AutoCalibrationConfig
35 | >>> from optimum.furiosa.utils import export_model_to_onnx
36 | 
37 | >>> model_id = "eugenecamus/resnet-50-base-beans-demo"
38 | 
39 | # Convert PyTorch model convert to ONNX and create Quantizer and setup config
40 | 
41 | >>> feature_extractor = AutoFeatureExtractor.from_pretrained(model_id)
42 | 
43 | >>> batch_size = 1
44 | >>> image_size = feature_extractor.size["shortest_edge"]
45 | >>> num_labels = 3
46 | >>> onnx_model_name = "model.onnx"
47 | >>> output_dir = "output"
48 | >>> onnx_model_path = Path(output_dir) / onnx_model_name
49 | 
50 | >>> export_model_to_onnx(
51 | ...    model_id,
52 | ...    save_dir=output_dir,
53 | ...    input_shape_dict={"pixel_values": [batch_size, 3, image_size, image_size]},
54 | ...    output_shape_dict={"logits": [batch_size, num_labels]},
55 | ...    file_name=onnx_model_name,
56 | )
57 | >>> quantizer = FuriosaAIQuantizer.from_pretrained(output_dir, file_name=onnx_model_name)
58 | >>> qconfig = QuantizationConfig()
59 | 
60 | # Create the calibration dataset
61 | >>> def preprocess_fn(ex, feature_extractor):
62 | ...     return feature_extractor(ex["image"])
63 | 
64 | >>> calibration_dataset = quantizer.get_calibration_dataset(
65 | ...     "beans",
66 | ...     preprocess_function=partial(preprocess_fn, feature_extractor=feature_extractor),
67 | ...     num_samples=50,
68 | ...     dataset_split="train",
69 | ... )
70 | 
71 | # Create the calibration configuration containing the parameters related to calibration.
72 | >>> calibration_config = AutoCalibrationConfig.mse_asym(calibration_dataset)
73 | 
74 | # Perform the calibration step: computes the activations quantization ranges
75 | >>> ranges = quantizer.fit(
76 | ...     dataset=calibration_dataset,
77 | ...     calibration_config=calibration_config,
78 | ... )
79 | 
80 | # Apply static quantization on the model
81 | >>> model_quantized_path = quantizer.quantize(
82 | ...     save_dir=output,
83 | ...     calibration_tensors_range=ranges,
84 | ...     quantization_config=qconfig,
85 | ... )
86 | ```
87 | 


--------------------------------------------------------------------------------
/tests/test_quantization.py:
--------------------------------------------------------------------------------
 1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
 2 | #
 3 | #  Licensed under the Apache License, Version 2.0 (the "License");
 4 | #  you may not use this file except in compliance with the License.
 5 | #  You may obtain a copy of the License at
 6 | #
 7 | #      http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | #  Unless required by applicable law or agreed to in writing, software
10 | #  distributed under the License is distributed on an "AS IS" BASIS,
11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | #  See the License for the specific language governing permissions and
13 | #  limitations under the License.
14 | 
15 | import os
16 | import tempfile
17 | import unittest
18 | from functools import partial
19 | from pathlib import Path
20 | 
21 | import requests
22 | from parameterized import parameterized
23 | from PIL import Image
24 | from transformers import AutoFeatureExtractor
25 | 
26 | from optimum.furiosa import (
27 |     AutoCalibrationConfig,
28 |     FuriosaAIConfig,
29 |     FuriosaAIModelForImageClassification,
30 |     FuriosaAIQuantizer,
31 |     QuantizationConfig,
32 | )
33 | from optimum.furiosa.utils import export_model_to_onnx
34 | 
35 | 
36 | class FuriosaAIQuantizationTest(unittest.TestCase):
37 |     SUPPORTED_ARCHITECTURES = ((FuriosaAIModelForImageClassification, "fxmarty/resnet-tiny-beans"),)
38 | 
39 |     @parameterized.expand(SUPPORTED_ARCHITECTURES)
40 |     def test_quantization(self, model_cls, model_name):
41 |         qconfig = QuantizationConfig()
42 | 
43 |         def preprocess_fn(ex, feature_extractor):
44 |             return feature_extractor(ex["image"])
45 | 
46 |         with tempfile.TemporaryDirectory() as tmp_dir:
47 |             output_dir = Path(tmp_dir)
48 |             export_model_to_onnx(
49 |                 model_name,
50 |                 save_dir=tmp_dir,
51 |                 input_shape_dict={"pixel_values": [1, 3, 224, 224]},
52 |                 output_shape_dict={"logits": [1, 3]},
53 |                 file_name="model.onnx",
54 |             )
55 | 
56 |             feature_extractor = AutoFeatureExtractor.from_pretrained(model_name)
57 | 
58 |             quantizer = FuriosaAIQuantizer.from_pretrained(tmp_dir, file_name="model.onnx")
59 | 
60 |             calibration_dataset = quantizer.get_calibration_dataset(
61 |                 "beans",
62 |                 preprocess_function=partial(preprocess_fn, feature_extractor=feature_extractor),
63 |                 num_samples=10,
64 |                 dataset_split="train",
65 |             )
66 | 
67 |             calibration_config = AutoCalibrationConfig.mse_asym(calibration_dataset)
68 |             ranges = quantizer.fit(
69 |                 dataset=calibration_dataset,
70 |                 calibration_config=calibration_config,
71 |             )
72 | 
73 |             quantizer.quantize(
74 |                 save_dir=output_dir,
75 |                 calibration_tensors_range=ranges,
76 |                 quantization_config=qconfig,
77 |             )
78 | 
79 |             expected_fai_config = FuriosaAIConfig(quantization=qconfig, calibration=calibration_config)
80 |             fai_config = FuriosaAIConfig.from_pretrained(tmp_dir)
81 |             # Verify the FuriosaAIConfig was correctly created and saved
82 |             self.assertEqual(fai_config.to_dict(), expected_fai_config.to_dict())
83 | 
84 |             assert os.path.isfile(output_dir.joinpath("model_quantized.dfg")) is True
85 | 
86 |             fai_model_quantized = model_cls(Path(output_dir) / "model_quantized.dfg")
87 | 
88 |             url = "http://images.cocodataset.org/val2017/000000039769.jpg"
89 |             image = Image.open(requests.get(url, stream=True).raw)
90 |             inputs = feature_extractor(images=image, return_tensors="np")
91 | 
92 |             fai_outputs = fai_model_quantized(**inputs)
93 |             self.assertIn("logits", fai_outputs)
94 | 


--------------------------------------------------------------------------------
/optimum/furiosa/utils.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
  2 | #
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | 
 16 | from pathlib import Path
 17 | from typing import List, Union
 18 | 
 19 | import numpy as np
 20 | from transformers import AutoFeatureExtractor, AutoProcessor, AutoTokenizer
 21 | 
 22 | from furiosa.runtime.tensor import DataType
 23 | from optimum.exporters.onnx import main_export
 24 | 
 25 | 
 26 | ONNX_WEIGHTS_NAME = "model.onnx"
 27 | ONNX_WEIGHTS_NAME_STATIC = "model_static.onnx"
 28 | FURIOSA_ENF_FILE_NAME = "model.enf"
 29 | FURIOSA_QUANTIZED_FILE_NAME = "model_quantized.dfg"
 30 | 
 31 | MAX_ONNX_OPSET_2022_2_0 = 10
 32 | MAX_ONNX_OPSET = 13
 33 | MIN_ONNX_QDQ_OPSET = 13
 34 | 
 35 | WARBOY_DEVICE = "warboy"
 36 | 
 37 | FURIOSA_DTYPE_TO_NUMPY_DTYPE = {
 38 |     DataType.UINT8: np.uint8,
 39 |     DataType.INT8: np.int8,
 40 |     DataType.FLOAT32: np.float32,
 41 | }
 42 | 
 43 | _HEAD_TO_AUTOMODELS = {
 44 |     "image-classification": "FuriosaAIModelForImageClassification",
 45 | }
 46 | 
 47 | 
 48 | def export_model_to_onnx(model_id, save_dir, input_shape_dict, output_shape_dict, file_name="model.onnx"):
 49 |     task = "image-classification"
 50 |     main_export(model_id, save_dir, task=task)
 51 | 
 52 |     import onnx
 53 |     from onnx import shape_inference
 54 |     from onnx.tools import update_model_dims
 55 | 
 56 |     save_dir_path = Path(save_dir) / "model.onnx"
 57 |     model = onnx.load(save_dir_path)
 58 |     updated_model = update_model_dims.update_inputs_outputs_dims(model, input_shape_dict, output_shape_dict)
 59 |     inferred_model = shape_inference.infer_shapes(updated_model)
 60 | 
 61 |     static_model_path = Path(save_dir_path).parent / file_name
 62 |     onnx.save(inferred_model, static_model_path)
 63 | 
 64 | 
 65 | def maybe_load_preprocessors(src_name_or_path: Union[str, Path], subfolder: str = "") -> List:
 66 |     preprocessors = []
 67 |     try:
 68 |         preprocessors.append(AutoTokenizer.from_pretrained(src_name_or_path, subfolder=subfolder))
 69 |     except Exception:
 70 |         pass
 71 | 
 72 |     try:
 73 |         preprocessors.append(AutoProcessor.from_pretrained(src_name_or_path, subfolder=subfolder))
 74 |     except Exception:
 75 |         pass
 76 | 
 77 |     try:
 78 |         preprocessors.append(AutoFeatureExtractor.from_pretrained(src_name_or_path, subfolder=subfolder))
 79 |     except Exception:
 80 |         pass
 81 |     return preprocessors
 82 | 
 83 | 
 84 | def maybe_save_preprocessors(src_name_or_path: Union[str, Path], dest_dir: Union[str, Path], src_subfolder: str = ""):
 85 |     """
 86 |     Saves the tokenizer, the processor and the feature extractor when found in `src_dir` in `dest_dir`.
 87 | 
 88 |     Args:
 89 |         src_dir (`Union[str, Path]`):
 90 |             The source directory from which to copy the files.
 91 |         dest_dir (`Union[str, Path]`):
 92 |             The destination directory to copy the files to.
 93 |         src_subfolder (`str`, defaults to `""`):
 94 |             In case the preprocessor files are located inside a subfolder of the model directory / repo on the Hugging
 95 |             Face Hub, you can specify the subfolder name here.
 96 |     """
 97 |     if not isinstance(dest_dir, Path):
 98 |         dest_dir = Path(dest_dir)
 99 | 
100 |     dest_dir.mkdir(exist_ok=True)
101 |     for preprocessor in maybe_load_preprocessors(src_name_or_path, subfolder=src_subfolder):
102 |         preprocessor.save_pretrained(dest_dir)
103 | 


--------------------------------------------------------------------------------
/tests/test_modeling.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # Copyright 2023 The HuggingFace Team. All rights reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | import gc
 16 | import os
 17 | import tempfile
 18 | import unittest
 19 | 
 20 | import numpy as np
 21 | import requests
 22 | import torch
 23 | from parameterized import parameterized
 24 | from PIL import Image
 25 | from transformers import AutoFeatureExtractor, AutoModelForImageClassification, PretrainedConfig, pipeline, set_seed
 26 | 
 27 | from optimum.furiosa import FuriosaAIModelForImageClassification
 28 | from optimum.furiosa.utils import FURIOSA_ENF_FILE_NAME
 29 | from optimum.utils import (
 30 |     logging,
 31 | )
 32 | 
 33 | 
 34 | SEED = 42
 35 | 
 36 | logger = logging.get_logger()
 37 | 
 38 | MODEL_DICT = {
 39 |     "mobilenet_v1": ["google/mobilenet_v1_0.75_192", {"pixel_values": [1, 3, 192, 192]}, {"logits": [1, 1001]}],
 40 |     "mobilenet_v2": [
 41 |         "hf-internal-testing/tiny-random-MobileNetV2Model",
 42 |         {"pixel_values": [1, 3, 32, 32]},
 43 |         {"logits": [1, 2]},
 44 |     ],
 45 |     "resnet": ["hf-internal-testing/tiny-random-resnet", {"pixel_values": [1, 3, 224, 224]}, {"logits": [1, 1000]}],
 46 | }
 47 | 
 48 | 
 49 | TENSOR_ALIAS_TO_TYPE = {
 50 |     "pt": torch.Tensor,
 51 |     "np": np.ndarray,
 52 | }
 53 | 
 54 | 
 55 | class FuriosaAIModelIntegrationTest(unittest.TestCase):
 56 |     def __init__(self, *args, **kwargs):
 57 |         super().__init__(*args, **kwargs)
 58 |         self.MODEL_ID = "mohitsha/furiosa-resnet-tiny-beans"
 59 | 
 60 |     def test_load_from_hub_and_save_model(self):
 61 |         preprocessor = AutoFeatureExtractor.from_pretrained(self.MODEL_ID)
 62 |         url = "http://images.cocodataset.org/val2017/000000039769.jpg"
 63 |         image = Image.open(requests.get(url, stream=True).raw)
 64 |         inputs = preprocessor(images=image, return_tensors="pt")
 65 |         loaded_model = FuriosaAIModelForImageClassification.from_pretrained(self.MODEL_ID)
 66 |         self.assertIsInstance(loaded_model.config, PretrainedConfig)
 67 |         loaded_model_outputs = loaded_model(**inputs)
 68 | 
 69 |         with tempfile.TemporaryDirectory() as tmpdirname:
 70 |             loaded_model.save_pretrained(tmpdirname)
 71 |             del loaded_model
 72 |             folder_contents = os.listdir(tmpdirname)
 73 |             self.assertTrue(FURIOSA_ENF_FILE_NAME in folder_contents)
 74 |             model = FuriosaAIModelForImageClassification.from_pretrained(tmpdirname)
 75 | 
 76 |         outputs = model(**inputs)
 77 |         self.assertTrue(torch.equal(loaded_model_outputs.logits, outputs.logits))
 78 | 
 79 | 
 80 | class FuriosaAIModelForImageClassificationIntegrationTest(unittest.TestCase):
 81 |     SUPPORTED_ARCHITECTURES = [
 82 |         "mobilenet_v1",
 83 |         "mobilenet_v2",
 84 |         "resnet",
 85 |     ]
 86 | 
 87 |     FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES}
 88 |     FuriosaAIMODEL_CLASS = FuriosaAIModelForImageClassification
 89 |     TASK = "image-classification"
 90 | 
 91 |     @parameterized.expand(SUPPORTED_ARCHITECTURES)
 92 |     def test_compare_to_transformers(self, model_arch):
 93 |         model_id, input_shape_dict, output_shape_dict = MODEL_DICT[model_arch]
 94 |         set_seed(SEED)
 95 |         fai_model = FuriosaAIModelForImageClassification.from_pretrained(
 96 |             model_id, export=True, input_shape_dict=input_shape_dict, output_shape_dict=output_shape_dict
 97 |         )
 98 |         self.assertIsInstance(fai_model.config, PretrainedConfig)
 99 |         transformers_model = AutoModelForImageClassification.from_pretrained(model_id)
100 |         preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
101 |         url = "http://images.cocodataset.org/val2017/000000039769.jpg"
102 |         image = Image.open(requests.get(url, stream=True).raw)
103 |         inputs = preprocessor(images=image, return_tensors="pt")
104 |         with torch.no_grad():
105 |             transformers_outputs = transformers_model(**inputs)
106 |         for input_type in ["pt", "np"]:
107 |             inputs = preprocessor(images=image, return_tensors=input_type)
108 |             fai_outputs = fai_model(**inputs)
109 |             self.assertIn("logits", fai_outputs)
110 |             self.assertIsInstance(fai_outputs.logits, TENSOR_ALIAS_TO_TYPE[input_type])
111 |             # Compare tensor outputs
112 |             self.assertTrue(torch.allclose(torch.Tensor(fai_outputs.logits), transformers_outputs.logits, atol=1e-4))
113 | 
114 |         gc.collect()
115 | 
116 |     @parameterized.expand(SUPPORTED_ARCHITECTURES)
117 |     def test_pipeline(self, model_arch):
118 |         model_id, input_shape_dict, output_shape_dict = MODEL_DICT[model_arch]
119 |         model = FuriosaAIModelForImageClassification.from_pretrained(
120 |             model_id, export=True, input_shape_dict=input_shape_dict, output_shape_dict=output_shape_dict
121 |         )
122 |         preprocessor = AutoFeatureExtractor.from_pretrained(model_id)
123 |         pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor)
124 |         outputs = pipe("http://images.cocodataset.org/val2017/000000039769.jpg")
125 |         self.assertGreaterEqual(outputs[0]["score"], 0.0)
126 |         self.assertTrue(isinstance(outputs[0]["label"], str))
127 |         gc.collect()
128 | 


--------------------------------------------------------------------------------
/optimum/furiosa/modeling.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
  2 | #
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | import logging
 16 | from typing import Callable, Dict, List, Optional, Union
 17 | 
 18 | import numpy as np
 19 | import torch
 20 | import tqdm
 21 | import transformers
 22 | from datasets import Dataset
 23 | from transformers import (
 24 |     AutoConfig,
 25 |     AutoModel,
 26 |     AutoModelForImageClassification,
 27 |     EvalPrediction,
 28 | )
 29 | from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
 30 | from transformers.modeling_outputs import (
 31 |     ImageClassifierOutput,
 32 | )
 33 | 
 34 | from .modeling_base import FuriosaAIBaseModel
 35 | from .utils import FURIOSA_DTYPE_TO_NUMPY_DTYPE
 36 | 
 37 | 
 38 | logger = logging.getLogger(__name__)
 39 | 
 40 | 
 41 | _FEATURE_EXTRACTOR_FOR_DOC = "AutoFeatureExtractor"
 42 | 
 43 | MODEL_START_DOCSTRING = r"""
 44 |     This model inherits from [`optimum.furiosa.FuriosaAIBaseModel`]. Check the superclass documentation for the generic methods the
 45 |     library implements for all its model (such as downloading or saving)
 46 |     Parameters:
 47 |         model (`furiosa.runtime.model`): is the main class used to run inference.
 48 |         config (`transformers.PretrainedConfig`): [PretrainedConfig](https://huggingface.co/docs/transformers/main_classes/configuration#transformers.PretrainedConfig)
 49 |             is the Model configuration class with all the parameters of the model.
 50 |             Initializing with a config file does not load the weights associated with the model, only the configuration.
 51 |             Check out the [`~furiosa.modeling.FuriosaAIBaseModel.from_pretrained`] method to load the model weights.
 52 |         device (`str`, defaults to `"CPU"`):
 53 |             The device type for which the model will be optimized for. The resulting compiled model will contains nodes specific to this device.
 54 |         furiosa_config (`Optional[Dict]`, defaults to `None`):
 55 |             The dictionnary containing the informations related to the model compilation.
 56 |         compile (`bool`, defaults to `True`):
 57 |             Disable the model compilation during the loading step when set to `False`.
 58 | """
 59 | 
 60 | IMAGE_INPUTS_DOCSTRING = r"""
 61 |     Args:
 62 |         pixel_values (`torch.Tensor`):
 63 |             Pixel values corresponding to the images in the current batch.
 64 |             Pixel values can be obtained from encoded images using [`AutoFeatureExtractor`](https://huggingface.co/docs/transformers/autoclass_tutorial#autofeatureextractor).
 65 | """
 66 | 
 67 | 
 68 | class FuriosaAIModel(FuriosaAIBaseModel):
 69 |     base_model_prefix = "furiosa_model"
 70 |     auto_model_class = AutoModel
 71 | 
 72 |     def __init__(
 73 |         self,
 74 |         model,
 75 |         config: transformers.PretrainedConfig = None,
 76 |         compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
 77 |         label_names: Optional[List[str]] = None,
 78 |         **kwargs,
 79 |     ):
 80 |         super().__init__(model, config, **kwargs)
 81 |         # Avoid warnings when creating a transformers pipeline
 82 |         AutoConfig.register(self.base_model_prefix, AutoConfig)
 83 |         self.auto_model_class.register(AutoConfig, self.__class__)
 84 |         self.device = torch.device("cpu")
 85 | 
 86 |         # Evaluation args
 87 |         self.compute_metrics = compute_metrics
 88 |         self.label_names = ["labels"] if label_names is None else label_names
 89 | 
 90 |     def to(self, device: str):
 91 |         """
 92 |         Use the specified `device` for inference. For example: "cpu" or "gpu". `device` can
 93 |         be in upper or lower case. To speed up first inference, call `.compile()` after `.to()`.
 94 |         """
 95 |         self._device = device.upper()
 96 |         self.sess = None
 97 |         return self
 98 | 
 99 |     def forward(self, *args, **kwargs):
100 |         raise NotImplementedError
101 | 
102 |     def evaluation_loop(self, dataset: Dataset):
103 |         """
104 |         Run evaluation and returns metrics and predictions.
105 | 
106 |         Args:
107 |             dataset (`datasets.Dataset`):
108 |                 Dataset to use for the evaluation step.
109 |         """
110 |         logger.info("***** Running evaluation *****")
111 | 
112 |         # from transformers import EvalPrediction
113 |         from transformers.trainer_pt_utils import nested_concat
114 |         from transformers.trainer_utils import EvalLoopOutput
115 | 
116 |         all_preds = None
117 |         all_labels = None
118 |         for step, inputs in tqdm.tqdm(enumerate(dataset), total=len(dataset)):
119 |             has_labels = all(inputs.get(k) is not None for k in self.label_names)
120 |             if has_labels:
121 |                 labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
122 |                 if len(labels) == 1:
123 |                     labels = labels[0]
124 |             else:
125 |                 labels = None
126 | 
127 |             inputs = [
128 |                 np.array([inputs[key]], dtype=FURIOSA_DTYPE_TO_NUMPY_DTYPE[self.inputs_to_dtype[k]])
129 |                 for k, key in enumerate(self.input_names)
130 |                 if key in inputs
131 |             ]
132 | 
133 |             preds = self.sess.run(inputs)
134 |             if len(preds) == 1:
135 |                 preds = preds[0].numpy()
136 |             all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
137 |             all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
138 | 
139 |         if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
140 |             metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
141 |         else:
142 |             metrics = {}
143 |         return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
144 | 
145 | 
146 | IMAGE_CLASSIFICATION_EXAMPLE = r"""
147 |     Example of image classification using `transformers.pipelines`:
148 |     ```python
149 |     >>> from transformers import {processor_class}, pipeline
150 |     >>> from optimum.furiosa import {model_class}
151 | 
152 |     >>> preprocessor = {processor_class}.from_pretrained("{checkpoint}")
153 |     >>> model = {model_class}.from_pretrained("{checkpoint}", export=True, input_shape_dict="dict('pixel_values': [1, 3, 224, 224])", output_shape_dict="dict("logits": [1, 1000])",)
154 |     >>> pipe = pipeline("image-classification", model=model, feature_extractor=preprocessor)
155 |     >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
156 |     >>> outputs = pipe(url)
157 |     ```
158 | """
159 | 
160 | 
161 | @add_start_docstrings(
162 |     """
163 |     FuriosaAI Model with a ImageClassifierOutput for image classification tasks.
164 |     """,
165 |     MODEL_START_DOCSTRING,
166 | )
167 | class FuriosaAIModelForImageClassification(FuriosaAIModel):
168 |     export_feature = "image-classification"
169 |     auto_model_class = AutoModelForImageClassification
170 | 
171 |     def __init__(self, model=None, config=None, **kwargs):
172 |         super().__init__(model, config, **kwargs)
173 |         self.input_names = ["pixel_values"]
174 | 
175 |     @add_start_docstrings_to_model_forward(
176 |         IMAGE_INPUTS_DOCSTRING.format("batch_size, num_channels, height, width")
177 |         + IMAGE_CLASSIFICATION_EXAMPLE.format(
178 |             processor_class=_FEATURE_EXTRACTOR_FOR_DOC,
179 |             model_class="FuriosaAIModelForImageClassification",
180 |             checkpoint="microsoft/resnet50",
181 |         )
182 |     )
183 |     def forward(
184 |         self,
185 |         pixel_values: Union[torch.Tensor, np.ndarray],
186 |         **kwargs,
187 |     ):
188 |         np_inputs = isinstance(pixel_values, np.ndarray)
189 |         if not np_inputs:
190 |             pixel_values = np.array(pixel_values)
191 | 
192 |         # Run inference
193 |         outputs = self.sess.run(pixel_values)
194 |         logits = torch.from_numpy(outputs[0].numpy()) if not np_inputs else outputs[0].numpy()
195 |         return ImageClassifierOutput(logits=logits)
196 | 


--------------------------------------------------------------------------------
/optimum/furiosa/configuration.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
  2 | #
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | from dataclasses import asdict, dataclass
 16 | from enum import Enum
 17 | from typing import Optional, Sequence, Union
 18 | 
 19 | import onnx
 20 | from datasets import Dataset
 21 | 
 22 | from furiosa.quantizer import CalibrationMethod, Calibrator
 23 | from optimum.configuration_utils import BaseConfig
 24 | 
 25 | 
 26 | DEFAULT_QUANTIZATION_CONFIG = {}
 27 | 
 28 | 
 29 | @dataclass
 30 | class CalibrationConfig:
 31 |     """
 32 |     CalibrationConfig is the configuration class handling all the FurioaAI parameters related to the calibration
 33 |     step of static quantization.
 34 | 
 35 |     Args:
 36 |         dataset_name (`str`):
 37 |             The name of the calibration dataset.
 38 |         dataset_config_name (`str`):
 39 |             The name of the calibration dataset configuration.
 40 |         dataset_split (`str`):
 41 |             Which split of the dataset is used to perform the calibration step.
 42 |         dataset_num_samples (`int`):
 43 |             The number of samples composing the calibration dataset.
 44 |         method (`CalibrationMethod`):
 45 |             The method chosen to calculate the activations quantization parameters using the calibration dataset.
 46 |         percentage (`Optional[float]`, defaults to `None`):
 47 |             The percentage to use when computing the activations quantization ranges when performing the calibration
 48 |             step using the Percentile method.
 49 |     """
 50 | 
 51 |     dataset_name: str
 52 |     dataset_config_name: str
 53 |     dataset_split: str
 54 |     dataset_num_samples: int
 55 |     method: CalibrationMethod
 56 |     percentage: Optional[float] = None
 57 | 
 58 |     def create_calibrator(
 59 |         self,
 60 |         model: Union[onnx.ModelProto, bytes],
 61 |     ) -> Calibrator:
 62 |         return Calibrator(model, self.method, percentage=self.percentage)
 63 | 
 64 | 
 65 | class AutoCalibrationConfig:
 66 |     @staticmethod
 67 |     def create_calibration_config(dataset: Dataset, method: CalibrationMethod, percentile: float = None):
 68 |         return CalibrationConfig(
 69 |             dataset_name=dataset.info.builder_name,
 70 |             dataset_config_name=dataset.info.config_name,
 71 |             dataset_split=str(dataset.split),
 72 |             dataset_num_samples=dataset.num_rows,
 73 |             method=method,
 74 |             percentage=percentile,
 75 |         )
 76 | 
 77 |     @staticmethod
 78 |     def minmax_asym(dataset: Dataset) -> CalibrationConfig:
 79 |         """
 80 |         Args:
 81 |             dataset (`Dataset`):
 82 |                 The dataset to use when performing the calibration step.
 83 | 
 84 |         Returns:
 85 |             The calibration configuration.
 86 |         """
 87 |         return AutoCalibrationConfig.create_calibration_config(
 88 |             dataset,
 89 |             method=CalibrationMethod.MIN_MAX_ASYM,
 90 |         )
 91 | 
 92 |     def minmax_sym(dataset: Dataset) -> CalibrationConfig:
 93 |         """
 94 |         Args:
 95 |             dataset (`Dataset`):
 96 |                 The dataset to use when performing the calibration step.
 97 | 
 98 |         Returns:
 99 |             The calibration configuration.
100 |         """
101 |         return AutoCalibrationConfig.create_calibration_config(
102 |             dataset,
103 |             method=CalibrationMethod.MIN_MAX_SYM,
104 |         )
105 | 
106 |     @staticmethod
107 |     def entropy_asym(
108 |         dataset: Dataset,
109 |     ) -> CalibrationConfig:
110 |         """
111 |         Args:
112 |             dataset (`Dataset`):
113 |                 The dataset to use when performing the calibration step.
114 | 
115 |         Returns:
116 |             The calibration configuration.
117 |         """
118 |         return AutoCalibrationConfig.create_calibration_config(
119 |             dataset,
120 |             method=CalibrationMethod.ENTROPY_ASYM,
121 |         )
122 | 
123 |     @staticmethod
124 |     def entropy_sym(
125 |         dataset: Dataset,
126 |     ) -> CalibrationConfig:
127 |         """
128 |         Args:
129 |             dataset (`Dataset`):
130 |                 The dataset to use when performing the calibration step.
131 | 
132 |         Returns:
133 |             The calibration configuration.
134 |         """
135 |         return AutoCalibrationConfig.create_calibration_config(
136 |             dataset,
137 |             method=CalibrationMethod.ENTROPY_SYM,
138 |         )
139 | 
140 |     @staticmethod
141 |     def percentiles_asym(dataset: Dataset, percentile: float = 99.999) -> CalibrationConfig:
142 |         """
143 |         Args:
144 |             dataset (`Dataset`):
145 |                 The dataset to use when performing the calibration step.
146 |             percentile (`float`):
147 |                 The percentile to use when computing the activations quantization ranges.
148 | 
149 |         Returns:
150 |             The calibration configuration.
151 |         """
152 |         return AutoCalibrationConfig.create_calibration_config(
153 |             dataset,
154 |             method=CalibrationMethod.PERCENTILE_ASYM,
155 |             percentile=percentile,
156 |         )
157 | 
158 |     @staticmethod
159 |     def percentiles_sym(dataset: Dataset, percentile: float = 99.999) -> CalibrationConfig:
160 |         """
161 |         Args:
162 |             dataset (`Dataset`):
163 |                 The dataset to use when performing the calibration step.
164 |             percentile (`float`):
165 |                 The percentile to use when computing the activations quantization ranges.
166 | 
167 |         Returns:
168 |             The calibration configuration.
169 |         """
170 |         return AutoCalibrationConfig.create_calibration_config(
171 |             dataset,
172 |             method=CalibrationMethod.PERCENTILE_SYM,
173 |             percentile=percentile,
174 |         )
175 | 
176 |     @staticmethod
177 |     def mse_asym(dataset: Dataset) -> CalibrationConfig:
178 |         """
179 |         Args:
180 |             dataset (`Dataset`):
181 |                 The dataset to use when performing the calibration step.
182 | 
183 |         Returns:
184 |             The calibration configuration.
185 |         """
186 |         return AutoCalibrationConfig.create_calibration_config(
187 |             dataset,
188 |             method=CalibrationMethod.MSE_ASYM,
189 |         )
190 | 
191 |     @staticmethod
192 |     def mse_sym(dataset: Dataset) -> CalibrationConfig:
193 |         """
194 |         Args:
195 |             dataset (`Dataset`):
196 |                 The dataset to use when performing the calibration step.
197 | 
198 |         Returns:
199 |             The calibration configuration.
200 |         """
201 |         return AutoCalibrationConfig.create_calibration_config(
202 |             dataset,
203 |             method=CalibrationMethod.MSE_SYM,
204 |         )
205 | 
206 |     @staticmethod
207 |     def sqnr_asym(dataset: Dataset) -> CalibrationConfig:
208 |         """
209 |         Args:
210 |             dataset (`Dataset`):
211 |                 The dataset to use when performing the calibration step.
212 | 
213 |         Returns:
214 |             The calibration configuration.
215 |         """
216 |         return AutoCalibrationConfig.create_calibration_config(
217 |             dataset,
218 |             method=CalibrationMethod.SQNR_ASYM,
219 |         )
220 | 
221 |     @staticmethod
222 |     def sqnr_sym(dataset: Dataset) -> CalibrationConfig:
223 |         """
224 |         Args:
225 |             dataset (`Dataset`):
226 |                 The dataset to use when performing the calibration step.
227 | 
228 |         Returns:
229 |             The calibration configuration.
230 |         """
231 |         return AutoCalibrationConfig.create_calibration_config(
232 |             dataset,
233 |             method=CalibrationMethod.SQNR_SYM,
234 |         )
235 | 
236 | 
237 | @dataclass
238 | class QuantizationConfig:
239 |     """
240 |     QuantizationConfig is the configuration class handling all the FuriosaAI quantization parameters.
241 | 
242 |      Args:
243 |         with_quantize  (`bool`, defaults to `True`):
244 |             WWhether to put a Quantize operator at the beginning of the resulting model.
245 |         normalized_pixel_outputs (` Sequence[int`, defaults to `None`)::
246 |             A sequence of indices of output tensors in the ONNX model that produce pixel values in a normalized format
247 |             ranging from 0.0 to 1.0. If specified, the corresponding output tensors in the resulting quantized model
248 |             will generate pixel values in an unnormalized format from 0 to 255, represented as unsigned 8-bit integers (uint8).
249 |     """
250 | 
251 |     with_quantize: bool = True
252 |     normalized_pixel_outputs: Sequence[int] = None
253 | 
254 | 
255 | class FuriosaAIConfig(BaseConfig):
256 |     CONFIG_NAME = "furiosa_config.json"
257 |     FULL_CONFIGURATION_FILE = "furiosa_config.json"
258 | 
259 |     def __init__(
260 |         self,
261 |         opset: Optional[int] = None,
262 |         quantization: Optional[QuantizationConfig] = None,
263 |         calibration: Optional[CalibrationConfig] = None,
264 |         **kwargs,
265 |     ):
266 |         super().__init__()
267 |         self.quantization = self.dataclass_to_dict(quantization)
268 |         self.calibration = self.dataclass_to_dict(calibration)
269 |         self.optimum_version = kwargs.pop("optimum_version", None)
270 | 
271 |     @staticmethod
272 |     def dataclass_to_dict(config) -> dict:
273 |         new_config = {}
274 |         if config is None:
275 |             return new_config
276 |         if isinstance(config, dict):
277 |             return config
278 |         for k, v in asdict(config).items():
279 |             if isinstance(v, Enum):
280 |                 v = v.name
281 |             elif isinstance(v, list):
282 |                 v = [elem.name if isinstance(elem, Enum) else elem for elem in v]
283 |             new_config[k] = v
284 |         return new_config
285 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/examples/quantization/image-classification/run_image_classification.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
  4 | #
  5 | #  Licensed under the Apache License, Version 2.0 (the "License");
  6 | #  you may not use this file except in compliance with the License.
  7 | #  You may obtain a copy of the License at
  8 | #
  9 | #      http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | #  Unless required by applicable law or agreed to in writing, software
 12 | #  distributed under the License is distributed on an "AS IS" BASIS,
 13 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | #  See the License for the specific language governing permissions and
 15 | #  limitations under the License.
 16 | 
 17 | """ Finetuning the library models for image classification."""
 18 | # You can also adapt this script on your own image classification task. Pointers for this are left as comments.
 19 | import json
 20 | import logging
 21 | import os
 22 | import sys
 23 | from dataclasses import dataclass, field
 24 | from functools import partial
 25 | from pathlib import Path
 26 | from typing import Optional
 27 | 
 28 | import datasets
 29 | import numpy as np
 30 | import torch
 31 | import transformers
 32 | from datasets import load_dataset
 33 | from evaluate import load
 34 | from torchvision.transforms import CenterCrop, Compose, Normalize, Resize, ToTensor
 35 | from transformers import AutoConfig, AutoFeatureExtractor, EvalPrediction, HfArgumentParser, TrainingArguments
 36 | from transformers.utils.versions import require_version
 37 | 
 38 | from optimum.furiosa import FuriosaAIModelForImageClassification, FuriosaAIQuantizer
 39 | from optimum.furiosa.configuration import AutoCalibrationConfig, QuantizationConfig
 40 | from optimum.furiosa.utils import export_model_to_onnx
 41 | 
 42 | 
 43 | logger = logging.getLogger(__name__)
 44 | 
 45 | require_version("datasets>=2.0.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
 46 | 
 47 | 
 48 | @dataclass
 49 | class DataTrainingArguments:
 50 |     """
 51 |     Arguments pertaining to what data we are going to input our model for training and eval.
 52 | 
 53 |     Using `HfArgumentParser` we can turn this class
 54 |     into argparse arguments to be able to specify them on
 55 |     the command line.
 56 |     """
 57 | 
 58 |     dataset_name: Optional[str] = field(
 59 |         default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
 60 |     )
 61 |     dataset_config_name: Optional[str] = field(
 62 |         default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
 63 |     )
 64 |     max_seq_length: int = field(
 65 |         default=128,
 66 |         metadata={
 67 |             "help": "The maximum total input sequence length after tokenization. Sequences longer "
 68 |             "than this will be truncated, sequences shorter will be padded."
 69 |         },
 70 |     )
 71 |     overwrite_cache: bool = field(
 72 |         default=False, metadata={"help": "Overwrite the cached preprocessed datasets or not."}
 73 |     )
 74 |     max_eval_samples: Optional[int] = field(
 75 |         default=None,
 76 |         metadata={
 77 |             "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
 78 |             "value if set."
 79 |         },
 80 |     )
 81 |     max_predict_samples: Optional[int] = field(
 82 |         default=None,
 83 |         metadata={
 84 |             "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
 85 |             "value if set."
 86 |         },
 87 |     )
 88 |     train_dir: Optional[str] = field(default=None, metadata={"help": "A directory path for the training data."})
 89 |     validation_dir: Optional[str] = field(default=None, metadata={"help": "A directory path for the validation data."})
 90 | 
 91 | 
 92 | @dataclass
 93 | class ModelArguments:
 94 |     """
 95 |     Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
 96 |     """
 97 | 
 98 |     model_name_or_path: str = field(
 99 |         metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
100 |     )
101 |     cache_dir: Optional[str] = field(
102 |         default=None,
103 |         metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
104 |     )
105 |     batch_size: int = field(
106 |         default=1,
107 |         metadata={"help": "The batch size for the model."},
108 |     )
109 |     num_labels: int = field(
110 |         default=3,
111 |         metadata={"help": "The batch size for the model."},
112 |     )
113 | 
114 | 
115 | @dataclass
116 | class OptimizationArguments:
117 |     """
118 |     Arguments pertaining to what type of optimization we are going to apply on the model.
119 |     """
120 | 
121 |     quantization_approach: str = field(
122 |         default="static",
123 |         metadata={"help": "The quantization approach. Supported approach are static and dynamic."},
124 |     )
125 |     calibration_method: str = field(
126 |         default="minmax_asym",
127 |         metadata={
128 |             "help": "The method chosen to calculate the activation quantization parameters using the calibration "
129 |             "dataset. Current supported calibration methods are minmax, entropy and percentile."
130 |         },
131 |     )
132 |     num_calibration_samples: int = field(
133 |         default=100,
134 |         metadata={"help": "Number of examples to use for the calibration step resulting from static quantization."},
135 |     )
136 |     num_calibration_shards: int = field(
137 |         default=1,
138 |         metadata={
139 |             "help": "How many shards to split the calibration dataset into. Useful for the entropy and percentile "
140 |             "calibration method."
141 |         },
142 |     )
143 |     calibration_batch_size: int = field(
144 |         default=1,
145 |         metadata={"help": "The batch size for the calibration step."},
146 |     )
147 |     calibration_histogram_percentile: float = field(
148 |         default=99.999,
149 |         metadata={"help": "The percentile used for the percentile calibration method."},
150 |     )
151 | 
152 | 
153 | def main():
154 |     # We now keep distinct sets of args, for a cleaner separation of concerns.
155 |     parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, OptimizationArguments))
156 |     if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
157 |         # If we pass only one argument to the script and it's the path to a json file,
158 |         # let's parse it to get our arguments.
159 |         model_args, data_args, training_args, optim_args, onnx_export_args = parser.parse_json_file(
160 |             json_file=os.path.abspath(sys.argv[1])
161 |         )
162 |     else:
163 |         model_args, data_args, training_args, optim_args = parser.parse_args_into_dataclasses()
164 | 
165 |     # Setup logging
166 |     logging.basicConfig(
167 |         format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
168 |         datefmt="%m/%d/%Y %H:%M:%S",
169 |         handlers=[logging.StreamHandler(sys.stdout)],
170 |     )
171 | 
172 |     log_level = training_args.get_process_log_level()
173 |     logger.setLevel(log_level)
174 |     datasets.utils.logging.set_verbosity(log_level)
175 |     transformers.utils.logging.set_verbosity(log_level)
176 |     transformers.utils.logging.enable_default_handler()
177 |     transformers.utils.logging.enable_explicit_format()
178 | 
179 |     logger.info(f"Optimization with the following parameters {optim_args}")
180 | 
181 |     if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
182 |         raise ValueError(
183 |             f"Output directory ({training_args.output_dir}) already exists and is not empty. "
184 |             "Use --overwrite_output_dir to overcome."
185 |         )
186 | 
187 |     # Sanity checks
188 |     if data_args.dataset_name is None and data_args.train_dir is None and data_args.validation_dir is None:
189 |         raise ValueError("Need either a dataset name or a training/validation folder.")
190 | 
191 |     os.makedirs(training_args.output_dir, exist_ok=True)
192 | 
193 |     # Get the datasets: you can either provide your own training and evaluation files (see below)
194 |     # or specify a Dataset from the hub (the dataset will be downloaded automatically from the datasets Hub).
195 |     if data_args.dataset_name is not None:
196 |         # Downloading and loading a dataset from the hub.
197 |         dataset = load_dataset(data_args.dataset_name)
198 |     else:
199 |         data_files = {}
200 |         if data_args.train_dir is not None:
201 |             data_files["train"] = os.path.join(data_args.train_dir, "**")
202 |         if data_args.validation_dir is not None:
203 |             data_files["validation"] = os.path.join(data_args.validation_dir, "**")
204 |         dataset = load_dataset(
205 |             "imagefolder",
206 |             data_files=data_files,
207 |             cache_dir=model_args.cache_dir,
208 |             task="image-classification",
209 |         )
210 |         # See more about loading custom images at
211 |         # https://huggingface.co/docs/datasets/v2.0.0/en/image_process#imagefolder.
212 | 
213 |     labels_column = (
214 |         "labels" if "labels" in dataset["validation"].column_names else dataset["validation"].column_names[1]
215 |     )
216 | 
217 |     feature_extractor = AutoFeatureExtractor.from_pretrained(model_args.model_name_or_path)
218 | 
219 |     # Define torchvision transforms to be applied to each image.
220 |     normalize = Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
221 |     image_size = feature_extractor.size["shortest_edge"]
222 |     transforms = Compose(
223 |         [
224 |             Resize(image_size),
225 |             CenterCrop(image_size),
226 |             ToTensor(),
227 |             normalize,
228 |         ]
229 |     )
230 | 
231 |     def preprocess_function(example_batch):
232 |         """Apply transforms across a batch."""
233 |         example_batch["pixel_values"] = [
234 |             transforms(image.convert("RGB")).to(torch.float32).numpy() for image in example_batch["image"]
235 |         ]
236 |         return example_batch
237 | 
238 |     metric = load("accuracy")
239 | 
240 |     # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
241 |     # predictions and label_ids field) and has to return a dictionary string to float.
242 |     def compute_metrics(p: EvalPrediction):
243 |         preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
244 |         preds = np.argmax(preds, axis=1)
245 | 
246 |         result = metric.compute(predictions=preds, references=p.label_ids)
247 |         return result
248 | 
249 |     # Export the model
250 |     export_model_to_onnx(
251 |         model_args.model_name_or_path,
252 |         save_dir=training_args.output_dir,
253 |         input_shape_dict={"pixel_values": [model_args.batch_size, 3, image_size, image_size]},
254 |         output_shape_dict={"logits": [model_args.batch_size, model_args.num_labels]},
255 |     )
256 | 
257 |     # Create the quantizer
258 |     quantizer = FuriosaAIQuantizer.from_pretrained(training_args.output_dir, file_name="model.onnx")
259 | 
260 |     # Create the quantization configuration containing all the quantization parameters
261 |     qconfig = QuantizationConfig()
262 | 
263 |     ranges = None
264 | 
265 |     calibration_dataset = dataset["train"]
266 |     if optim_args.num_calibration_samples is not None:
267 |         calibration_dataset = calibration_dataset.shuffle(seed=training_args.seed).select(
268 |             range(optim_args.num_calibration_samples)
269 |         )
270 | 
271 |     # all images are loaded in memory, which could prove expensive if num_calibration_samples is large
272 |     calibration_dataset = calibration_dataset.map(
273 |         partial(preprocess_function),
274 |         batched=True,
275 |         load_from_cache_file=not data_args.overwrite_cache,
276 |         desc="Running preprocessing on calibration dataset",
277 |     )
278 | 
279 |     # Remove the unnecessary columns of the calibration dataset before the calibration step
280 |     calibration_dataset = quantizer.clean_calibration_dataset(calibration_dataset)
281 | 
282 |     # Create the calibration configuration given the selected calibration method
283 |     if optim_args.calibration_method == "percentile_asym":
284 |         calibration_config = AutoCalibrationConfig.percentiles_asym(
285 |             calibration_dataset,
286 |             percentile=optim_args.calibration_histogram_percentile,
287 |         )
288 |     else:
289 |         calibration_config = AutoCalibrationConfig.minmax_asym(calibration_dataset)
290 | 
291 |     if not 1 <= optim_args.num_calibration_shards <= len(calibration_dataset):
292 |         raise ValueError(
293 |             f"Invalid value of number of shards {optim_args.num_calibration_shards} chosen to split the calibration"
294 |             f" dataset, should be higher than 0 and lower or equal to the number of samples "
295 |             f"{len(calibration_dataset)}."
296 |         )
297 | 
298 |     for i in range(optim_args.num_calibration_shards):
299 |         shard = calibration_dataset.shard(optim_args.num_calibration_shards, i)
300 |         quantizer.partial_fit(
301 |             dataset=shard,
302 |             calibration_config=calibration_config,
303 |             batch_size=optim_args.calibration_batch_size,
304 |         )
305 |     ranges = quantizer.compute_ranges()
306 | 
307 |     # Apply quantization on the model
308 |     quantizer.quantize(
309 |         save_dir=training_args.output_dir,
310 |         calibration_tensors_range=ranges,
311 |         quantization_config=qconfig,
312 |     )
313 | 
314 |     # Evaluation
315 |     if training_args.do_eval:
316 |         logger.info("*** Evaluate ***")
317 | 
318 |         model_config = AutoConfig.from_pretrained(model_args.model_name_or_path)
319 |         eval_dataset = dataset["validation"]
320 |         if data_args.max_eval_samples is not None:
321 |             eval_dataset = eval_dataset.shuffle(seed=training_args.seed).select(range(data_args.max_eval_samples))
322 | 
323 |         try:
324 |             eval_dataset = eval_dataset.align_labels_with_mapping(
325 |                 label2id=model_config.label2id, label_column=labels_column
326 |             )
327 |         except Exception:
328 |             logger.warning(
329 |                 f"\nModel label mapping: {model_config.label2id}"
330 |                 f"\nDataset label features: {eval_dataset.features[labels_column]}"
331 |                 f"\nCould not guarantee the model label mapping and the dataset labels match."
332 |                 f" Evaluation results may suffer from a wrong matching."
333 |             )
334 | 
335 |         # Set the validation transforms
336 |         eval_dataset = eval_dataset.with_transform(preprocess_function)
337 | 
338 |         furiosa_model = FuriosaAIModelForImageClassification(
339 |             Path(training_args.output_dir) / "model_quantized.dfg",
340 |             compute_metrics=compute_metrics,
341 |             label_names=[labels_column],
342 |         )
343 |         outputs = furiosa_model.evaluation_loop(eval_dataset)
344 |         # Save metrics
345 |         with open(os.path.join(training_args.output_dir, "eval_results.json"), "w") as f:
346 |             json.dump(outputs.metrics, f, indent=4, sort_keys=True)
347 | 
348 | 
349 | if __name__ == "__main__":
350 |     main()
351 | 


--------------------------------------------------------------------------------
/optimum/furiosa/quantization.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
  2 | #
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | import logging
 16 | import os
 17 | from pathlib import Path
 18 | from typing import TYPE_CHECKING, Callable, Dict, Optional, Tuple, Union
 19 | 
 20 | import numpy as np
 21 | import onnx
 22 | import tqdm
 23 | from datasets import Dataset, load_dataset
 24 | from transformers import AutoConfig
 25 | 
 26 | from furiosa.optimizer import optimize_model
 27 | from furiosa.quantizer import quantize
 28 | 
 29 | from .configuration import CalibrationConfig, FuriosaAIConfig, QuantizationConfig
 30 | from .modeling import FuriosaAIModel
 31 | from .quantization_base import OptimumQuantizer
 32 | 
 33 | 
 34 | if TYPE_CHECKING:
 35 |     from transformers import PretrainedConfig
 36 | 
 37 | LOGGER = logging.getLogger(__name__)
 38 | 
 39 | 
 40 | class FuriosaAICalibrationDataReader:
 41 |     __slots__ = ["batch_size", "dataset", "_dataset_iter", "input_datatypes"]
 42 | 
 43 |     def __init__(self, dataset: Dataset, input_datatypes, batch_size: int = 1):
 44 |         if dataset is None:
 45 |             raise ValueError("Provided dataset is None.")
 46 | 
 47 |         if input_datatypes is None:
 48 |             raise ValueError("Provided input_datatypes is None.")
 49 | 
 50 |         if batch_size <= 0:
 51 |             raise ValueError(f"Provided batch_size should be >= 1 (got: {batch_size}).")
 52 | 
 53 |         self.dataset = dataset
 54 |         self.input_datatypes = input_datatypes
 55 |         self.batch_size = batch_size
 56 | 
 57 |         self._dataset_iter = iter(self.dataset)
 58 | 
 59 |     def __len__(self):
 60 |         return len(self.dataset) // self.batch_size
 61 | 
 62 |     def __next__(self):
 63 |         featurized_samples = None
 64 |         try:
 65 |             featurized_samples = []
 66 |             for _ in range(self.batch_size):
 67 |                 sample = next(self._dataset_iter)
 68 | 
 69 |                 input_list = [[] for i in range(len(sample))]
 70 |                 for i, name in enumerate(sample):
 71 |                     input_list[i] += [sample[name]]
 72 |                 input_list = [
 73 |                     np.array(d, onnx.mapping.TENSOR_TYPE_TO_NP_TYPE[self.input_datatypes[i]])
 74 |                     for i, d in enumerate(input_list)
 75 |                 ]
 76 | 
 77 |                 featurized_samples.append(input_list)
 78 | 
 79 |         except StopIteration:
 80 |             raise StopIteration
 81 | 
 82 |         if len(featurized_samples) > 0:
 83 |             return featurized_samples
 84 | 
 85 |         raise StopIteration
 86 | 
 87 |     def __iter__(self):
 88 |         return self
 89 | 
 90 | 
 91 | class FuriosaAIQuantizer(OptimumQuantizer):
 92 |     """
 93 |     Handles the FuriosaAI quantization process for models shared on huggingface.co/models.
 94 |     """
 95 | 
 96 |     def __init__(self, model_path: Path, config: Optional["PretrainedConfig"] = None):
 97 |         """
 98 |         Args:
 99 |             model_path (`Path`):
100 |                 Path to the onnx model files you want to quantize.
101 |             config (`Optional[PretrainedConfig]`, *optional*):
102 |                 The configuration of the model.
103 |         """
104 |         super().__init__()
105 |         self.model_path = model_path
106 |         self.config = config
107 |         if self.config is None:
108 |             try:
109 |                 self.config = AutoConfig.from_pretrained(self.model_path.parent)
110 |             except OSError:
111 |                 LOGGER.warning(
112 |                     f"Could not load the config for {self.model_path} automatically, this might make "
113 |                     "the quantized model harder to use because it will not be able to be loaded by an FuriosaAIModel without "
114 |                     "having to specify the configuration explicitly."
115 |                 )
116 |         self._calibrator = None
117 |         self._calibration_config = None
118 | 
119 |     @classmethod
120 |     def from_pretrained(
121 |         cls,
122 |         model_or_path: Union["FuriosaAIQuantizer", str, Path],
123 |         file_name: Optional[str] = None,
124 |     ) -> "FuriosaAIQuantizer":
125 |         """
126 |         Instantiates a `FuriosaAIQuantizer` from a model path.
127 | 
128 |         Args:
129 |             model_or_path (`Union[FuriosaAIModel, str, Path]`):
130 |                 Can be either:
131 |                     - A path to a saved exported ONNX Intermediate Representation (IR) model, e.g., `./my_model_directory/.
132 |                     - Or an `FuriosaAIModelModelForXX` class, e.g., `FuriosaAIModelModelForImageClassification`.
133 |             file_name(`Optional[str]`, *optional*):
134 |                 Overwrites the default model file name from `"model.onnx"` to `file_name`.
135 |                 This allows you to load different model files from the same repository or directory.
136 |         Returns:
137 |             An instance of `FuriosaAIQuantizer`.
138 |         """
139 |         furiosa_quantizer_error_message = "FuriosaAIQuantizer does not support multi-file quantization. Please create separate FuriosaAIQuantizer instances for each model/file, by passing the argument `file_name` to FuriosaAIQuantizer.from_pretrained()."
140 | 
141 |         if isinstance(model_or_path, str):
142 |             model_or_path = Path(model_or_path)
143 | 
144 |         path = None
145 |         if isinstance(model_or_path, Path) and file_name is None:
146 |             onnx_files = list(model_or_path.glob("*.onnx"))
147 |             if len(onnx_files) == 0:
148 |                 raise FileNotFoundError(f"Could not find any model file in {model_or_path}")
149 |             elif len(onnx_files) > 1:
150 |                 raise RuntimeError(
151 |                     f"Found too many ONNX model files in {model_or_path}. {furiosa_quantizer_error_message}"
152 |                 )
153 |             file_name = onnx_files[0].name
154 | 
155 |         if isinstance(model_or_path, FuriosaAIModel):
156 |             if path is None:
157 |                 if isinstance(model_or_path.model, str) and model_or_path.model.endswith(".onnx"):
158 |                     path = Path(model_or_path.model)
159 |             else:
160 |                 raise ValueError(
161 |                     "Currently, quantization of only ONNX files is supported using the optimum-furiosa repository!"
162 |                 )
163 |         elif os.path.isdir(model_or_path):
164 |             path = Path(model_or_path) / file_name
165 |         else:
166 |             raise ValueError(f"Unable to load model from {model_or_path}.")
167 |         return cls(path)
168 | 
169 |     def fit(
170 |         self,
171 |         dataset: Dataset,
172 |         calibration_config: CalibrationConfig,
173 |         batch_size: int = 1,
174 |     ) -> Dict[str, Tuple[float, float]]:
175 |         """
176 |         Performs the calibration step and computes the quantization ranges.
177 | 
178 |         Args:
179 |             dataset (`Dataset`):
180 |                 The dataset to use when performing the calibration step.
181 |             calibration_config ([`~CalibrationConfig`]):
182 |                 The configuration containing the parameters related to the calibration step.
183 |             batch_size (`int`, *optional*, defaults to 1):
184 |                 The batch size to use when collecting the quantization ranges values.
185 | 
186 |         Returns:
187 |             The dictionary mapping the nodes name to their quantization ranges.
188 |         """
189 |         # If a dataset is provided, then we are in a static quantization mode
190 |         LOGGER.info(
191 |             f"Using static quantization schema ("
192 |             f"dataset: {calibration_config.dataset_name}, method: {calibration_config.method}"
193 |             f")"
194 |         )
195 | 
196 |         self.partial_fit(
197 |             dataset,
198 |             calibration_config,
199 |             batch_size,
200 |         )
201 |         return self.compute_ranges()
202 | 
203 |     def _load_model_and_optimize(self):
204 |         model = onnx.load(Path(self.model_path).as_posix())
205 |         self.onnx_model = optimize_model(model)
206 | 
207 |     def partial_fit(self, dataset: Dataset, calibration_config: CalibrationConfig, batch_size: int = 1):
208 |         """
209 |         Performs the calibration step and collects the quantization ranges without computing them.
210 | 
211 |         Args:
212 |             dataset (`Dataset`):
213 |                 The dataset to use when performing the calibration step.
214 |             calibration_config (`CalibrationConfig`):
215 |                 The configuration containing the parameters related to the calibration step.
216 |             batch_size (`int`, *optional*, defaults to 1):
217 |                 The batch size to use when collecting the quantization ranges values.
218 |         """
219 |         self._calibration_config = calibration_config
220 | 
221 |         # If no calibrator, then create one
222 |         if calibration_config.method is not None:
223 |             LOGGER.info(f"Creating calibrator: {calibration_config.method}({calibration_config})")
224 |             self._load_model_and_optimize()
225 | 
226 |             self._calibrator = calibration_config.create_calibrator(
227 |                 model=self.onnx_model,
228 |             )
229 | 
230 |         def get_input_datatypes(model):
231 |             input_datatypes = []
232 | 
233 |             for input in model.graph.input:
234 |                 input_type = input.type.tensor_type.elem_type
235 |                 input_datatypes.extend([input_type])
236 | 
237 |             return input_datatypes
238 | 
239 |         input_datatypes = get_input_datatypes(self.onnx_model)
240 | 
241 |         LOGGER.info("Collecting tensors statistics...")
242 |         reader = FuriosaAICalibrationDataReader(dataset, input_datatypes, batch_size)
243 |         for data in tqdm.tqdm(reader):
244 |             self._calibrator.collect_data(data)
245 | 
246 |     def compute_ranges(self) -> Dict[str, Tuple[float, float]]:
247 |         """
248 |         Computes the quantization ranges.
249 | 
250 |         Returns:
251 |             The dictionary mapping the nodes name to their quantization ranges.
252 |         """
253 |         if self._calibrator is None:
254 |             raise ValueError(
255 |                 "Calibrator is None, please call `partial_fit` or `fit` method at least ones to compute ranges."
256 |             )
257 | 
258 |         LOGGER.info("Computing calibration ranges")
259 |         return self._calibrator.compute_range()
260 | 
261 |     def quantize(
262 |         self,
263 |         quantization_config: QuantizationConfig,
264 |         save_dir: Union[str, Path],
265 |         file_suffix: Optional[str] = "quantized",
266 |         calibration_tensors_range: Optional[Dict[str, Tuple[float, float]]] = None,
267 |     ) -> Path:
268 |         """
269 |         Quantizes a model given the optimization specifications defined in `quantization_config`.
270 | 
271 |         Args:
272 |             quantization_config (`QuantizationConfig`):
273 |                 The configuration containing the parameters related to quantization.
274 |             save_dir (`Union[str, Path]`):
275 |                 The directory where the quantized model should be saved.
276 |             file_suffix (`Optional[str]`, *optional*, defaults to `"quantized"`):
277 |                 The file_suffix used to save the quantized model.
278 |             calibration_tensors_range (`Optional[Dict[NodeName, Tuple[float, float]]]`, *optional*):
279 |                 The dictionary mapping the nodes name to their quantization ranges, used and required only when applying
280 |                 static quantization.
281 | 
282 |         Returns:
283 |             The path of the resulting quantized model.
284 |         """
285 | 
286 |         save_dir = Path(save_dir)
287 |         save_dir.mkdir(parents=True, exist_ok=True)
288 | 
289 |         if self.onnx_model is None:
290 |             self._load_model_and_optimize()
291 | 
292 |         LOGGER.info("Quantizing model...")
293 |         model_quantized = quantize(
294 |             self.onnx_model,
295 |             calibration_tensors_range,
296 |             with_quantize=quantization_config.with_quantize,
297 |             normalized_pixel_outputs=quantization_config.normalized_pixel_outputs,
298 |         )
299 | 
300 |         suffix = f"_{file_suffix}" if file_suffix else ""
301 |         quantized_model_path = save_dir.joinpath(f"{self.model_path.stem}{suffix}").with_suffix(".dfg")
302 |         LOGGER.info(f"Saving quantized model at: {save_dir}")
303 |         with open(quantized_model_path.as_posix(), "wb") as f:
304 |             f.write(bytes(model_quantized))
305 | 
306 |         # Create and save the configuration summarizing all the parameters related to quantization
307 |         furiosa_config = FuriosaAIConfig(quantization=quantization_config, calibration=self._calibration_config)
308 |         furiosa_config.save_pretrained(save_dir)
309 | 
310 |         if self.config is not None:
311 |             self.config.save_pretrained(save_dir)
312 | 
313 |         return Path(save_dir)
314 | 
315 |     def get_calibration_dataset(
316 |         self,
317 |         dataset_name: str,
318 |         num_samples: int = 100,
319 |         dataset_config_name: Optional[str] = None,
320 |         dataset_split: Optional[str] = None,
321 |         preprocess_function: Optional[Callable] = None,
322 |         preprocess_batch: bool = True,
323 |         seed: int = 2016,
324 |         use_auth_token: bool = False,
325 |     ) -> Dataset:
326 |         """
327 |         Creates the calibration `datasets.Dataset` to use for the post-training static quantization calibration step.
328 | 
329 |         Args:
330 |             dataset_name (`str`):
331 |                 The dataset repository name on the Hugging Face Hub or path to a local directory containing data files
332 |                 to load to use for the calibration step.
333 |             num_samples (`int`, *optional*, defaults to 100):
334 |                 The maximum number of samples composing the calibration dataset.
335 |             dataset_config_name (`Optional[str]`, *optional*):
336 |                 The name of the dataset configuration.
337 |             dataset_split (`Optional[str]`, *optional*):
338 |                 Which split of the dataset to use to perform the calibration step.
339 |             preprocess_function (`Optional[Callable]`, *optional*):
340 |                 Processing function to apply to each example after loading dataset.
341 |             preprocess_batch (`bool`, *optional*, defaults to `True`):
342 |                 Whether the `preprocess_function` should be batched.
343 |             seed (`int`, *optional*, defaults to 2016):
344 |                 The random seed to use when shuffling the calibration dataset.
345 |             use_auth_token (`bool`, *optional*, defaults to `False`):
346 |                 Whether to use the token generated when running `transformers-cli login` (necessary for some datasets
347 |                 like ImageNet).
348 |         Returns:
349 |             The calibration `datasets.Dataset` to use for the post-training static quantization calibration
350 |             step.
351 |         """
352 |         calib_dataset = load_dataset(
353 |             dataset_name,
354 |             name=dataset_config_name,
355 |             split=dataset_split,
356 |             use_auth_token=use_auth_token,
357 |         )
358 | 
359 |         if num_samples is not None:
360 |             num_samples = min(num_samples, len(calib_dataset))
361 |             calib_dataset = calib_dataset.shuffle(seed=seed).select(range(num_samples))
362 | 
363 |         if preprocess_function is not None:
364 |             processed_calib_dataset = calib_dataset.map(preprocess_function, batched=preprocess_batch)
365 |         else:
366 |             processed_calib_dataset = calib_dataset
367 | 
368 |         return self.clean_calibration_dataset(processed_calib_dataset)
369 | 
370 |     def clean_calibration_dataset(self, dataset: Dataset) -> Dataset:
371 |         model = onnx.load(self.model_path)
372 |         model_inputs = {input.name for input in model.graph.input}
373 |         ignored_columns = list(set(dataset.column_names) - model_inputs)
374 |         return dataset.remove_columns(ignored_columns)
375 | 


--------------------------------------------------------------------------------
/optimum/furiosa/modeling_base.py:
--------------------------------------------------------------------------------
  1 | #  Copyright 2023 The HuggingFace Team. All rights reserved.
  2 | #
  3 | #  Licensed under the Apache License, Version 2.0 (the "License");
  4 | #  you may not use this file except in compliance with the License.
  5 | #  You may obtain a copy of the License at
  6 | #
  7 | #      http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | #  Unless required by applicable law or agreed to in writing, software
 10 | #  distributed under the License is distributed on an "AS IS" BASIS,
 11 | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | #  See the License for the specific language governing permissions and
 13 | #  limitations under the License.
 14 | 
 15 | import logging
 16 | from pathlib import Path
 17 | from shutil import copyfile
 18 | from tempfile import TemporaryDirectory
 19 | from typing import Dict, Optional, Tuple, Union
 20 | 
 21 | import onnx
 22 | from huggingface_hub import hf_hub_download
 23 | from transformers import PretrainedConfig
 24 | from transformers.file_utils import add_start_docstrings
 25 | 
 26 | # Import Furiosa SDK
 27 | from furiosa import optimizer
 28 | from furiosa.runtime import session
 29 | from furiosa.tools.compiler.api import compile
 30 | from optimum.exporters.onnx import main_export
 31 | from optimum.modeling_base import OptimizedModel
 32 | 
 33 | from .utils import (
 34 |     FURIOSA_ENF_FILE_NAME,
 35 |     FURIOSA_QUANTIZED_FILE_NAME,
 36 |     ONNX_WEIGHTS_NAME,
 37 |     ONNX_WEIGHTS_NAME_STATIC,
 38 |     maybe_load_preprocessors,
 39 |     maybe_save_preprocessors,
 40 | )
 41 | 
 42 | 
 43 | logger = logging.getLogger(__name__)
 44 | 
 45 | 
 46 | @add_start_docstrings(
 47 |     """
 48 |     Base FuriosaAIModel class.
 49 |     """,
 50 | )
 51 | class FuriosaAIBaseModel(OptimizedModel):
 52 |     auto_model_class = None
 53 |     export_feature = None
 54 | 
 55 |     def __init__(
 56 |         self,
 57 |         model: Union[bytes, str, Path],
 58 |         config: PretrainedConfig = None,
 59 |         device: str = None,
 60 |         furiosa_config: Optional[Dict[str, str]] = None,
 61 |         model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None,
 62 |         input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
 63 |         output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
 64 |         **kwargs,
 65 |     ):
 66 |         self.config = config
 67 |         self.model_save_dir = model_save_dir
 68 |         self.furiosa_config = furiosa_config
 69 |         self.preprocessors = kwargs.get("preprocessors", [])
 70 |         enable_compilation = kwargs.get("compile", True)
 71 | 
 72 |         self.model = model
 73 | 
 74 |         if enable_compilation:
 75 |             self.model = self.compile(model, input_shape_dict, output_shape_dict)
 76 | 
 77 |         self.create_session()
 78 | 
 79 |     def _save_pretrained(self, save_directory: Union[str, Path], file_name: Optional[str] = None, **kwargs):
 80 |         dst_path = Path(save_directory) / FURIOSA_ENF_FILE_NAME
 81 | 
 82 |         if isinstance(self.model, (str, Path)):
 83 |             copyfile(self.model, dst_path)
 84 |         else:
 85 |             with open(dst_path, "wb") as f:
 86 |                 f.write(self.model)
 87 | 
 88 |     def create_session(self):
 89 |         """
 90 |         Create a Furiosa runtime session for the model.
 91 | 
 92 |         Creates a session object using the Furiosa runtime for executing the model.
 93 | 
 94 |         Returns:
 95 |             None
 96 |         """
 97 |         self.sess = session.create(self.model)
 98 |         self.input_num = self.sess.input_num
 99 |         self.inputs_to_dtype = []
100 |         for i in range(self.input_num):
101 |             self.inputs_to_dtype.append(self.sess.input(i).dtype)
102 | 
103 |     @classmethod
104 |     def _from_pretrained(
105 |         cls,
106 |         model_id: Union[str, Path],
107 |         config: PretrainedConfig,
108 |         use_auth_token: Optional[Union[bool, str, None]] = None,
109 |         revision: Optional[Union[str, None]] = None,
110 |         force_download: bool = False,
111 |         cache_dir: Optional[str] = None,
112 |         file_name: Optional[str] = None,
113 |         subfolder: str = "",
114 |         from_onnx: bool = False,
115 |         from_quantized: bool = False,
116 |         local_files_only: bool = False,
117 |         input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
118 |         output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
119 |         **kwargs,
120 |     ):
121 |         """
122 |         Loads a model and its configuration file from a directory or the Hugging Face Hub.
123 | 
124 |         Args:
125 |             model_id (Union[str, Path]):
126 |                 The directory from which to load the model. Can be either:
127 |                     - The model ID of a pretrained model hosted inside a model repo on huggingface.co.
128 |                     - The path to a directory containing the model weights.
129 |             config (PretrainedConfig):
130 |                 The configuration object associated with the model.
131 |             use_auth_token (Union[bool, str, None], defaults to None):
132 |                 The token to use as HTTP bearer authorization for remote files. Needed to load models from a private repository.
133 |             revision (Union[str, None], defaults to None):
134 |                 The specific model version to use. It can be a branch name, a tag name, or a commit ID.
135 |             force_download (bool, defaults to False):
136 |                 Whether or not to force the (re-)download of the model weights and configuration files, overriding the cached versions if they exist.
137 |             cache_dir (str, defaults to None):
138 |                 The path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.
139 |             file_name (str, defaults to None):
140 |                 The file name of the model to load. Overwrites the default file name and allows one to load the model with a different name.
141 |             subfolder (str, defaults to ""):
142 |                 The subfolder to load the model.
143 |             from_onnx (bool, defaults to False):
144 |                 Whether the model is being loaded from an ONNX file.
145 |             from_quantized (bool, defaults to False):
146 |                 Whether the model is being loaded from a quantized file.
147 |             local_files_only (bool, defaults to False):
148 |                 Whether or not to only look at local files (i.e., do not try to download the model).
149 |             input_shape_dict (Dict[str, Tuple[int]], defaults to None):
150 |                 A dictionary specifying the input shapes for dynamic models.
151 |             output_shape_dict (Dict[str, Tuple[int]], defaults to None):
152 |                 A dictionary specifying the output shapes for dynamic models.
153 |             **kwargs:
154 |                 Additional keyword arguments to be passed to the underlying model loading function.
155 | 
156 |         Returns:
157 |             An instance of the model class loaded from the specified directory or Hugging Face Hub.
158 |         """
159 |         if from_onnx:
160 |             default_file_name = ONNX_WEIGHTS_NAME
161 |         elif from_quantized:
162 |             default_file_name = FURIOSA_QUANTIZED_FILE_NAME
163 |         else:
164 |             default_file_name = FURIOSA_ENF_FILE_NAME
165 | 
166 |         file_name = file_name or default_file_name
167 | 
168 |         # Load the model from local directory
169 |         if Path(model_id).is_dir():
170 |             file_path = Path(model_id) / file_name
171 |             model_save_dir = model_id
172 |             preprocessors = maybe_load_preprocessors(model_id)
173 |         # Download the model from the hub
174 |         else:
175 |             file_path = hf_hub_download(
176 |                 repo_id=model_id,
177 |                 filename=file_name,
178 |                 subfolder=subfolder,
179 |                 use_auth_token=use_auth_token,
180 |                 revision=revision,
181 |                 cache_dir=cache_dir,
182 |                 force_download=force_download,
183 |                 local_files_only=local_files_only,
184 |             )
185 |             model_save_dir = Path(file_path).parent
186 |             preprocessors = maybe_load_preprocessors(model_id, subfolder=subfolder)
187 | 
188 |         model = cls.load_model(file_path, input_shape_dict, output_shape_dict)
189 | 
190 |         return cls(
191 |             model, config=config, model_save_dir=model_save_dir, compile=False, preprocessors=preprocessors, **kwargs
192 |         )
193 | 
194 |     @classmethod
195 |     def _from_transformers(
196 |         cls,
197 |         model_id: str,
198 |         config: PretrainedConfig,
199 |         use_auth_token: Optional[Union[bool, str]] = None,
200 |         revision: Optional[str] = None,
201 |         force_download: bool = False,
202 |         cache_dir: Optional[str] = None,
203 |         subfolder: str = "",
204 |         local_files_only: bool = False,
205 |         task: Optional[str] = None,
206 |         **kwargs,
207 |     ):
208 |         """
209 |         Export a vanilla Transformers model into an ONNX model using `transformers.onnx.export_onnx`.
210 | 
211 |         Arguments:
212 |             model_id (`str` or `Path`):
213 |                 The directory from which to load the model.
214 |                 Can be either:
215 |                     - The model id of a pretrained model hosted inside a model repo on huggingface.co.
216 |                     - The path to a directory containing the model weights.            save_dir (`str` or `Path`):
217 |                 The directory where the exported ONNX model should be saved, default to
218 |                 `transformers.file_utils.default_cache_path`, which is the cache directory for transformers.
219 |             use_auth_token (`str` or `bool`):
220 |                 Is needed to load models from a private repository
221 |             revision (`str`):
222 |                 Revision is the specific model version to use. It can be a branch name, a tag name, or a commit id
223 |             kwargs (`Dict`, *optional*):
224 |                 kwargs will be passed to the model during initialization
225 |         """
226 |         if task is None:
227 |             task = cls.export_feature
228 | 
229 |         save_dir = TemporaryDirectory()
230 |         save_dir_path = Path(save_dir.name)
231 | 
232 |         # Export the model to the ONNX format
233 |         main_export(
234 |             model_name_or_path=model_id,
235 |             output=save_dir_path,
236 |             task=task,
237 |             do_validation=False,
238 |             no_post_process=True,
239 |             subfolder=subfolder,
240 |             revision=revision,
241 |             cache_dir=cache_dir,
242 |             use_auth_token=use_auth_token,
243 |             local_files_only=local_files_only,
244 |             force_download=force_download,
245 |         )
246 | 
247 |         config.save_pretrained(save_dir_path)
248 |         maybe_save_preprocessors(model_id, save_dir_path, src_subfolder=subfolder)
249 | 
250 |         return cls._from_pretrained(
251 |             model_id=save_dir_path,
252 |             config=config,
253 |             from_onnx=True,
254 |             use_auth_token=use_auth_token,
255 |             revision=revision,
256 |             force_download=force_download,
257 |             cache_dir=cache_dir,
258 |             local_files_only=local_files_only,
259 |             **kwargs,
260 |         )
261 | 
262 |     @classmethod
263 |     def load_model(
264 |         cls,
265 |         model_path: Union[str, Path],
266 |         input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
267 |         output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
268 |     ):
269 |         """
270 |         Loads and processes a model for use with the Furiosa framework.
271 | 
272 |         Args:
273 |             model_path (Union[str, Path]):
274 |                 The path to the model file.
275 |             input_shape_dict (Dict[str, Tuple[int]], defaults to None):
276 |                 A dictionary specifying the input shapes for dynamic models.
277 |             output_shape_dict (Dict[str, Tuple[int]], defaults to None):
278 |                 A dictionary specifying the output shapes for dynamic models.
279 | 
280 |         Returns:
281 |             If the model is in the 'onnx' or 'dfg' format, the compiled model in the Furiosa binary format is returned.
282 |             If the model is in the 'enf' format, the model path is returned as-is.
283 | 
284 |         Raises:
285 |             ValueError: If the model format is not supported or invalid.
286 |         """
287 |         model_path = Path(model_path)
288 |         if model_path.suffix in (".onnx", ".dfg"):
289 |             compiled_model = cls.compile(model_path, input_shape_dict, output_shape_dict)
290 |             return compiled_model
291 |         if model_path.suffix == ".enf":
292 |             return model_path
293 | 
294 |         raise ValueError("Invalid model types. Supported formats are 'onnx', 'dfg', or 'enf'.")
295 | 
296 |     @classmethod
297 |     def compile(
298 |         cls,
299 |         model: Union[str, Path, bytes],
300 |         input_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
301 |         output_shape_dict: Optional[Dict[str, Tuple[int]]] = None,
302 |     ):
303 |         """
304 |         Compiles the model to the Furiosa binary format.
305 | 
306 |         Args:
307 |             model (Union[str, Path]):
308 |                 The model to be compiled.
309 |             input_shape_dict (Dict[str, Tuple[int]], defaults to None):
310 |                 A dictionary specifying the input shapes for dynamic models.
311 |             output_shape_dict (Dict[str, Tuple[int]], defaults to None):
312 |                 A dictionary specifying the output shapes for dynamic models.
313 |         Returns:
314 |             The compiled model in the Furiosa binary format.
315 | 
316 |         Raises:
317 |             ValueError: If the model format is not supported or invalid.
318 |         """
319 |         if isinstance(model, (str, Path)):
320 |             model = cls._reshape(model, input_shape_dict, output_shape_dict)
321 |             input_bytes = Path(model).read_bytes()
322 |         else:
323 |             input_bytes = model
324 | 
325 |         logger.info("Compiling the model...")
326 |         compiled_model = compile(input_bytes, target_ir="enf")
327 |         return compiled_model
328 | 
329 |     @staticmethod
330 |     def _check_is_dynamic(model_path: Union[str, Path]):
331 |         is_dynamic = False
332 |         if Path(model_path).suffix == ".onnx":
333 |             model = onnx.load(model_path)
334 |             is_dynamic = any(any(dim.dim_param for dim in inp.type.tensor_type.shape.dim) for inp in model.graph.input)
335 | 
336 |         return is_dynamic
337 | 
338 |     @staticmethod
339 |     def optimize_model(model: onnx.ModelProto) -> Path:
340 |         return optimizer.frontend.onnx.optimize_model(model)
341 | 
342 |     @staticmethod
343 |     def _update_inputs_outputs_dims(
344 |         model_path: Union[str, Path],
345 |         input_shape_dict: Dict[str, Tuple[int]],
346 |         output_shape_dict: Dict[str, Tuple[int]],
347 |     ) -> onnx.ModelProto:
348 |         from onnx import shape_inference
349 |         from onnx.tools import update_model_dims
350 | 
351 |         model = onnx.load(model_path)
352 | 
353 |         updated_model = update_model_dims.update_inputs_outputs_dims(model, input_shape_dict, output_shape_dict)
354 |         return shape_inference.infer_shapes(updated_model)
355 | 
356 |     @classmethod
357 |     def _reshape(
358 |         cls,
359 |         model_path: Union[str, Path],
360 |         input_shape_dict: Dict[str, Tuple[int]],
361 |         output_shape_dict: Dict[str, Tuple[int]],
362 |     ) -> Union[str, Path]:
363 |         """
364 |         Propagates the given input shapes on the model's layers, fixing the input shapes of the model.
365 | 
366 |         Args:
367 |             model_path (Union[str, Path]):
368 |                 Path to the model.
369 |             input_shape_dict (Dict[str, Tuple[int]]):
370 |                 Input shapes for the model.
371 |             output_shape_dict (Dict[str, Tuple[int]]):
372 |                 Output shapes for the model.
373 | 
374 |         Returns:
375 |             Union[str, Path]:
376 |                 Path to the model after updating the input shapes.
377 | 
378 |         Raises:
379 |             ValueError: If the model provided has dynamic axes in input/output and no input/output shape is provided.
380 |         """
381 |         if isinstance(model_path, (str, Path)) and Path(model_path).suffix == ".onnx":
382 |             is_dynamic = cls._check_is_dynamic(model_path)
383 |             if is_dynamic:
384 |                 if input_shape_dict is None or output_shape_dict is None:
385 |                     raise ValueError(
386 |                         "The model provided has dynamic axes in input/output. Please provide input and output shapes for compilation."
387 |                     )
388 | 
389 |                 model = cls._update_inputs_outputs_dims(model_path, input_shape_dict, output_shape_dict)
390 |                 optimized_model = cls.optimize_model(model)
391 | 
392 |                 static_model_path = Path(model_path).parent / ONNX_WEIGHTS_NAME_STATIC
393 |                 onnx.save(optimized_model, static_model_path)
394 | 
395 |                 return static_model_path
396 | 
397 |         return model_path
398 | 
399 |     def forward(self, *args, **kwargs):
400 |         raise NotImplementedError
401 | 


--------------------------------------------------------------------------------