├── .gitignore
├── LICENSE
├── README.md
├── apps
    └── imagenet
    │   ├── mobilenetv1
    │       └── conventional
    │       │   ├── mbv1_fix_quant_test.yml
    │       │   ├── mbv1_fix_quant_test_int_model.yml
    │       │   ├── mbv1_fix_quant_test_int_op_only_on_cpu.yml
    │       │   ├── mbv1_fix_quant_test_integize_on_gpu.yml
    │       │   ├── mbv1_fix_quant_train.yml
    │       │   ├── mbv1_floating_test.yml
    │       │   └── mbv1_floating_train.yml
    │   ├── mobilenetv2
    │       └── conventional
    │       │   ├── mbv2_fix_quant_test.yml
    │       │   ├── mbv2_fix_quant_test_int_model.yml
    │       │   ├── mbv2_fix_quant_test_int_op_only_on_cpu.yml
    │       │   ├── mbv2_fix_quant_test_integize_on_gpu.yml
    │       │   ├── mbv2_fix_quant_train.yml
    │       │   ├── mbv2_floating_test.yml
    │       │   └── mbv2_floating_train.yml
    │   ├── resnet18
    │       ├── conventional
    │       │   ├── res18_fix_quant_test.yml
    │       │   ├── res18_fix_quant_test_int_model.yml
    │       │   ├── res18_fix_quant_test_int_op_only.yml
    │       │   ├── res18_fix_quant_test_integize.yml
    │       │   ├── res18_fix_quant_train.yml
    │       │   ├── res18_floating_test.yml
    │       │   └── res18_floating_train.yml
    │       └── tiny_finetuning
    │       │   ├── res18_fix_quant_ptcv_pretrained_test.yml
    │       │   ├── res18_fix_quant_ptcv_pretrained_test_int_model.yml
    │       │   ├── res18_fix_quant_ptcv_pretrained_test_int_op_only_on_cpu.yml
    │       │   ├── res18_fix_quant_ptcv_pretrained_test_integize_on_gpu.yml
    │       │   └── res18_fix_quant_ptcv_pretrained_train.yml
    │   └── resnet50
    │       └── tiny_finetuning
    │           ├── res50_fix_quant_nvidia_pretrained_test.yml
    │           ├── res50_fix_quant_nvidia_pretrained_test_int_model.yml
    │           ├── res50_fix_quant_nvidia_pretrained_test_int_op_only_on_cpu.yml
    │           ├── res50_fix_quant_nvidia_pretrained_test_integize_on_gpu.yml
    │           ├── res50_fix_quant_nvidia_pretrained_train.yml
    │           ├── res50_fix_quant_ptcv_pretrained_test.yml
    │           ├── res50_fix_quant_ptcv_pretrained_test_int_model.yml
    │           ├── res50_fix_quant_ptcv_pretrained_test_int_op_only_on_cpu.yml
    │           ├── res50_fix_quant_ptcv_pretrained_test_integize_on_gpu.yml
    │           └── res50_fix_quant_ptcv_pretrained_train.yml
├── distributed_run.sh
├── error_analysis
    ├── all_results_8bit_signed_False.txt
    ├── all_results_8bit_signed_True.txt
    ├── fixed_quant_analysis.py
    ├── fixed_quant_analysis_all_metrics.py
    ├── sigma_threshold_vs_fl_8bit_signed_False.pdf
    ├── sigma_threshold_vs_fl_8bit_signed_True.pdf
    ├── std_fix_quant_error_analysis_8bit_signed_False.pdf
    ├── std_fix_quant_error_analysis_8bit_signed_True.pdf
    ├── std_opt_fl_and_err_vs_sigma_8bit_signed_False.pdf
    ├── std_opt_fl_and_err_vs_sigma_8bit_signed_True.pdf
    ├── std_opt_sigma_vs_fl_8bit_signed_False.pdf
    └── std_opt_sigma_vs_fl_8bit_signed_True.pdf
├── fix_train.py
├── fraclen_visual
    ├── fraclen_visualizing_mbv2.py
    ├── fraclen_visualizing_res50.py
    ├── mbv2_fix_quant.out
    ├── mobilenetv2_8bit_fraclens.pdf
    ├── mobilenetv2_8bit_fraclens_wo_title.pdf
    ├── res50_fix_quant_nvidia_pretrained.out
    ├── res50_fix_quant_ptcv_pretrained.out
    ├── resnet50_nvidia_8bit_fraclens.pdf
    ├── resnet50_nvidia_8bit_fraclens_wo_title.pdf
    ├── resnet50_ptcv_8bit_fraclens.pdf
    └── resnet50_ptcv_8bit_fraclens_wo_title.pdf
├── imagenet_classes
├── images
    └── comparisons.png
├── models
    ├── fix_mobilenet_v1.py
    ├── fix_mobilenet_v2.py
    ├── fix_quant_ops.py
    └── fix_resnet.py
├── myutils
    ├── config.py
    ├── distributed.py
    ├── export.py
    └── meters.py
├── requirements.in
├── requirements.txt
├── run.sh
└── weight_boxplot
    ├── mobilenetv2_pytorchcv_eff_weight.pdf
    ├── mobilenetv2_pytorchcv_eff_weight_wo_title.pdf
    ├── mobilenetv2_torchvision_eff_weight.pdf
    ├── mobilenetv2_torchvision_eff_weight_wo_title.pdf
    └── weight_boxplot.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | cache
 2 | data
 3 | exp
 4 | logs
 5 | results
 6 | .flake8
 7 | *.bak
 8 | *.pyc
 9 | __pycache__
10 | pretrained*
11 | .vscode
12 | apps_local
13 | .DS_Store*
14 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | For Snap Research F8Net software:
 2 | Copyright (c) 2022 Snap Inc.
 3 | 
 4 | All rights reserved.
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 7 | 
 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 9 | 
10 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
11 | 
12 | --------------------------- LICENSE FOR AdaBits --------------------------------
13 | The following software may be included in this product: AdaBits. This software is subject to the following license and notice:
14 | 
15 | CC 4.0 Attribution-NonCommercial International https://creativecommons.org/licenses/by-nc/4.0/legalcode
16 | 
17 | The software is for educational and academic research purposes only.
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## F8Net<br><sub>Fixed-Point 8-bit Only Multiplication for Network Quantization (ICLR 2022 Oral)</sub>
  2 | 
  3 | [OpenReview](https://openreview.net/forum?id=_CfpJazzXT2) | [arXiv](https://arxiv.org/abs/2202.05239) | [PDF](https://arxiv.org/pdf/2202.05239.pdf) | [Model Zoo](#f8net-model-zoo) | [BibTex](#citation)
  4 | 
  5 | 
  6 | <img src="https://user-images.githubusercontent.com/25779973/164990377-bb692b26-4c7c-41bb-be8c-d91bdc6e8715.png" width=100%/>
  7 | 
  8 | 
  9 | PyTorch implementation of neural network quantization with fixed-point 8-bit only multiplication. <br>
 10 | >[F8Net: Fixed-Point 8-bit Only Multiplication for Network Quantization](https://openreview.net/forum?id=_CfpJazzXT2)<br>
 11 | >[Qing Jin](https://scholar.google.com/citations?user=X9iggBcAAAAJ&hl=zh-CN)<sup>1,2</sup>, [Jian Ren](https://alanspike.github.io/)<sup>1</sup>, [Richard Zhuang](https://www.linkedin.com/in/richard-zhuang-82ba504/)<sup>1</sup>, [Sumant Hanumante](https://www.linkedin.com/in/sumant-hanumante-3a698123/)<sup>1</sup>, [Zhengang Li](https://scholar.google.com/citations?user=hH1Oun0AAAAJ&hl=en)<sup>2</sup>, [Zhiyu Chen](https://vlsi.rice.edu/authors/zhiyu/)<sup>3</sup>, [Yanzhi Wang](https://coe.northeastern.edu/people/wang-yanzhi/)<sup>2</sup>, [Kaiyuan Yang](https://vlsi.rice.edu/authors/admin/)<sup>3</sup>, [Sergey Tulyakov](http://www.stulyakov.com/)<sup>1</sup>  
 12 | ><sup>1</sup>Snap Inc., <sup>2</sup>Northeastern University, <sup>3</sup>Rice University<br>
 13 | >ICLR 2022 Oral.
 14 | 
 15 | 
 16 | <details>
 17 |   <summary>
 18 |   <font size="+1">Overview</font>
 19 |   </summary>
 20 |     Neural network quantization implements efficient inference via reducing the weight and input precisions. Previous methods for quantization can be categorized as simulated quantization, integer-only quantization, and fixed-point quantization, with the former two involving high-precision multiplications with 32-bit floating-point or integer scaling. In contrast, fixed-point models can avoid such high-demanding requirements but demonstrates inferior performance to the other two methods. In this work, we study the problem of how to train such models. Specifically, we conduct statistical analysis on values for quantization and propose to determine the fixed-point format from data during training with some semi-empirical formula. Our method demonstrates that high-precision multiplication is not necessary for the quantized model to achieve comparable performance as their full-precision counterparts.
 21 | </details>
 22 | 
 23 | 
 24 | ## Getting Started
 25 | 
 26 | <details>
 27 |   <summary>Requirements</summary>
 28 | 
 29 |    1. Please check the [requirements](/requirements.txt) and download packages.
 30 | 
 31 |    2. Prepare ImageNet-1k data following pytorch [example](https://github.com/pytorch/examples/tree/master/imagenet), and create a softlink to the ImageNet data path to data under current the code directory (`ln -s /path/to/imagenet data`).
 32 | 
 33 | </details>
 34 | 
 35 | <details><summary>Model Training</summary><blockquote>
 36 | 
 37 |   <details><summary> Conventional training </summary><blockquote>
 38 |   
 39 |   * We train the model with the file [distributed_run.sh](/distributed_run.sh) and the command 
 40 |     ```
 41 |     bash distributed_run.sh /path/to/yml_file batch_size
 42 |     ```
 43 |   * We set `batch_size=2048` for conventional training of floating-/fixed-point ResNet18 and MobileNet V1/V2.
 44 |   * Before training, please update the `dataset_dir` and `log_dir` arguments in the yaml files for training the floating-/fixed-point models.
 45 |   * To train the floating-point model, please use the yaml file `***_floating_train.yml` in the `conventional` subfolder under the corresponding folder of the model.
 46 |   * To train the fixed-point model, please first train the floating-point model as the initialization. Please use the yaml file `***_fix_quant_train.yml` in the `conventional` subfolder under the corresponding folder of the model. Please make sure the argument `fp_pretrained_file` directs to the correct path for the corresponding floating-point checkpoint. We also provide our pretrained floating-point models in the [Model Zoo](#f8net-model-zoo) below.
 47 | </blockquote></details>
 48 | 
 49 | <details><summary> Tiny finetuning </summary><blockquote>
 50 | 
 51 | * We finetune the model with the file [run.sh](/run.sh) and the command 
 52 |     ```
 53 |     bash run.sh /path/to/yml_file batch_size
 54 |     ```
 55 | * We set `batch_size=128` and use one GPU for tiny-finetuning of fixed-point ResNet18/50.
 56 | 
 57 | * Before fine-tuning, please update the `dataset_dir` and `log_dir` arguments in the yaml files for finetuning the fixed-point models.
 58 | 
 59 | * To finetune the fixed-point model, please use the yaml file `***_fix_quant_***_pretrained_train.yml` in the `tiny_finetuning` subfolder under the corresponding folder of the model. For model pretrained with [`PytorchCV`](https://pypi.org/project/pytorchcv/) (Baseline of ResNet18 and Baseline#1 of ResNet50), the floating-point checkpoint will be downloaded automatically during code running. For the model pretrained by [`Nvidia`](https://catalog.ngc.nvidia.com/orgs/nvidia/models/resnet50_pyt_amp/files) (Baseline#2 of ResNet50), please download the checkpoint first and make sure the argument `nvidia_pretrained_file` directs to the correct path of this checkpoint.
 60 | 
 61 | </blockquote></details>
 62 | 
 63 | </blockquote></details>
 64 | 
 65 | <details>
 66 | <summary>Model Testing</summary>
 67 |   
 68 | * We test the model with the file [run.sh](/run.sh) and the command 
 69 |     ```
 70 |     bash run.sh /path/to/yml_file batch_size
 71 |     ```
 72 | * We set `batch_size=128` and use one GPU for model testing.
 73 | 
 74 | * Before testing, please update the `dataset_dir` and `log_dir` arguments in the yaml files. Please update the argument `integize_file_path` and `int_op_only_file_path` arguments in the yaml files `***_fix_quant_test***_integize.yml` and `***_fix_quant_test***_int_op_only.yml`, respectively. Please also update other arguments like `nvidia_pretrained_file` if necessary (even if they are not used during testing).
 75 | 
 76 | * We use the yaml file `***_floating_test.yml` for testing the floating-point model; `***_fix_quant***_test.yml` for testing the fixed-point model with the same setting as during training/tiny-finetuning; `***_fix_quant***_test_int_model.yml` for testing the fixed-point model on GPU with all quantized weights, bias and inputs implemented with integers (but with `float` dtype as GPU does not support integer operations) and use the original modules during training (e.g. with batch normalization layers); `***_fix_quant***_test_integize.yml` for testing the fixed-point model on GPU with all quantized weights, bias and inputs implemented with integers (but with `float` dtype as GPU does not support integer operations) and a new equivalent model with only convolution, pooling and fully-connected layers; `***_fix_quant***_test_int_op_only.yml` for testing the fixed-point model on CPU with all quantized weights, bias and inputs implemented with integers (with `int` dtype) and a new equivalent model with only convolution, pooling and fully-connected layers. Note that the accuracy from the four testing files can differ a little due to numerical error.
 77 | 
 78 | </details>
 79 | 
 80 | 
 81 | <details>
 82 |   <summary>Model Export</summary>
 83 |   
 84 | * We export fixed-point model with integer weights, bias and inputs to run on GPU and CPU during model testing with `***_fix_quant_test_integize.yml` and `***_fix_quant_test_int_op_only.yml` files, respectively.
 85 | 
 86 | * The exported onnx files are saved to the path given by the arguments `integize_file_path` and `int_op_only_file_path`.
 87 | 
 88 | </details>
 89 | 
 90 | 
 91 | 
 92 | 
 93 | ## F8Net Model Zoo
 94 | 
 95 | All checkpoints and onnx files are available at **[here](https://drive.google.com/drive/folders/1lYWPj9TB-c50lIxXlYWbCfpF5pSAP0fc?usp=sharing)**.
 96 | 
 97 | **Conventional**
 98 | 
 99 | | Model | Type | Top-1 Acc.<sup>a</sup> | Checkpoint |
100 | | :--- | :---: | :---: | :---: |
101 | | ResNet18 | FP<br>8-bit | 70.3<br>71.0 | [`Res18_32`](https://drive.google.com/file/d/1BxRPKr7SRQmrRdJt1oUrRxzjas65ItQN/view?usp=sharing)<br>[`Res18_8`](https://drive.google.com/file/d/1U93c7QLHs0Ww_93yY1msbRsghZTGaruG/view?usp=sharing) |
102 | | MobileNet-V1 | FP<br>8-bit | 72.4<br>72.8 | [`MBV1_32`](https://drive.google.com/file/d/14zeH0HLUS8UN7RKDyKWMHPKzXKa6mesp/view?usp=sharing)<br>[`MBV1_8`](https://drive.google.com/file/d/1Q89sIqR2HrCcEOOcLrKl8emcippkT6p3/view?usp=sharing) |
103 | | MobileNet-V2b | FP<br>8-bit | 72.7<br>72.6 | [`MBV2b_32`](https://drive.google.com/file/d/1OYz0CkLLQ2JV-X666HxiBVFAbJ3ojWIw/view?usp=sharing)<br>[`MBV2b_8`](https://drive.google.com/file/d/1YbDKgxHBFrBLhsZ4GJoL5R4sm5L8BT0p/view?usp=sharing) |
104 | 
105 | **Tiny Finetuning**
106 | 
107 | | Model | Type | Top-1 Acc.<sup>a</sup> | Checkpoint |
108 | | :--- | :---: | :---: | :---: |
109 | | ResNet18 | FP<br>8-bit | 73.1<br>72.3 | `Res18_32p`<br>[`Res18_8p`](https://drive.google.com/file/d/1L2vziGb5_OCjlA-cAoUk-54jA9BA-spN/view?usp=sharing) |
110 | | ResNet50b (BL#1) | FP <br>8-bit | 77.6<br>77.6 | `Res50b_32p`<br>[`Res50b_8p`](https://drive.google.com/file/d/1YHe7MB4JpG75uo8GMpCxwsVHvAJflXF0/view?usp=sharing) |
111 | | ResNet50b (BL#2) | FP <br>8-bit | 78.5<br>78.1 | [`Res50b_32n`](https://catalog.ngc.nvidia.com/orgs/nvidia/models/resnet50_pyt_amp/files)<br>[`Res50b_8n`](https://drive.google.com/file/d/1WU_ccesykRVKp9ntEDn_mieYTW-wAkkN/view?usp=sharing) |
112 | 
113 | <sup>a</sup> The accuracies are obtained from the inference step during training. Test accuracy for the final exported model might have some small accuracy difference due to numerical error.
114 | 
115 | 
116 | ## Technical Details
117 | 
118 | The main techniques for neural network quantization with 8-bit fixed-point multiplication includes the following:
119 |   * Quantized methods/modules including determining fixed-point formats from statistics or by grid-search, fusing convolution and batch normalization layers, and reformulating PACT with fixed-point quantization are implemented in [`models/fix_quant_ops`](/models/fix_quant_ops.py).
120 |   * Clipping-level sharing and private fractional length for residual blocks are implemented in the ResNet ([`models/fix_resnet`](/models/fix_resnet.py)) and MobileNet V2 ([`models/fix_mobilenet_v2`](/models/fix_mobilenet_v2.py)).
121 | 
122 | 
123 | ## Acknowledgement
124 | This repo is based on [AdaBits](https://github.com/deJQK/AdaBits).
125 | 
126 | 
127 | ## Citation
128 | If our code or models help your work, please cite our paper:
129 | ```bibtex
130 | @inproceedings{
131 |   jin2022fnet,
132 |   title={F8Net: Fixed-Point 8-bit Only Multiplication for Network Quantization},
133 |   author={Qing Jin and Jian Ren and Richard Zhuang and Sumant Hanumante and Zhengang Li and Zhiyu Chen and Yanzhi Wang and Kaiyuan Yang and Sergey Tulyakov},
134 |   booktitle={International Conference on Learning Representations},
135 |   year={2022},
136 |   url={https://openreview.net/forum?id=_CfpJazzXT2}
137 | }
138 | ```
139 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv1/conventional/mbv1_fix_quant_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: all
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: False
72 | quant_avgpool: False
73 | pool_fusing: False
74 | int_infer: False #True ## For int model
75 | integize: False #True ## For gpu inference
76 | int_op_only: False ## For cpu inference
77 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
78 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
79 | 
80 | test_only: True
81 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit/checkpoints/best_model.pt
82 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1
83 | model: models.fix_mobilenet_v1
84 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv1/conventional/mbv1_fix_quant_test_int_model.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: all
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: True
72 | quant_avgpool: True
73 | pool_fusing: True
74 | int_infer: True ## For int model
75 | integize: False #True ## For gpu inference
76 | int_op_only: False ## For cpu inference
77 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
78 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
79 | 
80 | test_only: True
81 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit/checkpoints/best_model.pt
82 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1
83 | model: models.fix_mobilenet_v1
84 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv1/conventional/mbv1_fix_quant_test_int_op_only_on_cpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: all
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: True
72 | quant_avgpool: True
73 | pool_fusing: True
74 | int_infer: True ## For int model
75 | integize: False #True ## For gpu inference
76 | int_op_only: True #False ## For cpu inference
77 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
78 | int_op_only_file_path: /path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
79 | 
80 | test_only: True
81 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit/checkpoints/best_model.pt
82 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1
83 | model: models.fix_mobilenet_v1
84 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv1/conventional/mbv1_fix_quant_test_integize_on_gpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: all
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: True
72 | quant_avgpool: True
73 | pool_fusing: True
74 | int_infer: True
75 | integize: True ## For gpu inference
76 | int_op_only: False ## For cpu inference
77 | integize_file_path: /path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
78 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit
79 | 
80 | test_only: True
81 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV1/8bit/checkpoints/best_model.pt
82 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1
83 | model: models.fix_mobilenet_v1
84 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv1/conventional/mbv1_fix_quant_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: True
42 | distributed_all_reduce: True
43 | use_diff_seed: True
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: all
69 | 
70 | # =========================== Override Settings ===========================
71 | fp_pretrained_file: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1/fp32/checkpoints/best_model.pt
72 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1
73 | model: models.fix_mobilenet_v1
74 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv1/conventional/mbv1_floating_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | floating_model: True
57 | 
58 | #
59 | weight_format: [8, 7]
60 | input_format: [8, 6]
61 | 
62 | #
63 | format_type: ~ #[~, per_layer, per_channel]
64 | 
65 | #
66 | weight_decay_scheme: all
67 | 
68 | # =========================== Override Settings ===========================
69 | test_only: True
70 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV1/fp32/checkpoints/best_model.pt
71 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1
72 | model: models.fix_mobilenet_v1
73 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv1/conventional/mbv1_floating_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: True
42 | distributed_all_reduce: True
43 | use_diff_seed: True
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | floating_model: True
57 | 
58 | #
59 | weight_format: [8, 7]
60 | input_format: [8, 6]
61 | 
62 | #
63 | format_type: ~ #[~, per_layer, per_channel]
64 | 
65 | #
66 | weight_decay_scheme: all
67 | 
68 | # =========================== Override Settings ===========================
69 | #fp_pretrained_file: /path/to/best_model.pt
70 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv1
71 | model: models.fix_mobilenet_v1
72 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv2/conventional/mbv2_fix_quant_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
69 | 
70 | #
71 | input_fraclen_sharing: False
72 | 
73 | # =========================== Override Settings ===========================
74 | quant_bias: False
75 | quant_avgpool: False
76 | pool_fusing: False
77 | int_infer: False #True ## For int model
78 | integize: False #True ## For gpu inference
79 | int_op_only: False ## For cpu inference
80 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
81 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
82 | 
83 | test_only: True
84 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit/checkpoints/best_model.pt
85 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2
86 | model: models.fix_mobilenet_v2
87 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv2/conventional/mbv2_fix_quant_test_int_model.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
69 | 
70 | #
71 | input_fraclen_sharing: False
72 | 
73 | # =========================== Override Settings ===========================
74 | quant_bias: True
75 | quant_avgpool: True
76 | pool_fusing: True
77 | int_infer: True ## For int model
78 | integize: False #True ## For gpu inference
79 | int_op_only: False ## For cpu inference
80 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
81 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
82 | 
83 | test_only: True
84 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit/checkpoints/best_model.pt
85 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2
86 | model: models.fix_mobilenet_v2
87 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv2/conventional/mbv2_fix_quant_test_int_op_only_on_cpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
69 | 
70 | #
71 | input_fraclen_sharing: False
72 | 
73 | # =========================== Override Settings ===========================
74 | quant_bias: True
75 | quant_avgpool: True
76 | pool_fusing: True
77 | int_infer: True ## For int model
78 | integize: False #True ## For gpu inference
79 | int_op_only: True #False ## For cpu inference
80 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
81 | int_op_only_file_path: /path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
82 | 
83 | test_only: True
84 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit/checkpoints/best_model.pt
85 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2
86 | model: models.fix_mobilenet_v2
87 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv2/conventional/mbv2_fix_quant_test_integize_on_gpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
69 | 
70 | #
71 | input_fraclen_sharing: False
72 | 
73 | # =========================== Override Settings ===========================
74 | quant_bias: True
75 | quant_avgpool: True
76 | pool_fusing: True
77 | int_infer: True ## For int model
78 | integize: True ## For gpu inference
79 | int_op_only: False ## For cpu inference
80 | integize_file_path: /path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
81 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit
82 | 
83 | test_only: True
84 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV2/8bit/checkpoints/best_model.pt
85 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2
86 | model: models.fix_mobilenet_v2
87 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv2/conventional/mbv2_fix_quant_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: True
42 | distributed_all_reduce: True
43 | use_diff_seed: True
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | weight_format: [8, 7]
57 | input_format: [8, 6]
58 | 
59 | #
60 | format_type: per_layer #[~, per_model, per_layer, per_channel]
61 | 
62 | #
63 | format_from_metric: True
64 | momentum_for_metric: 0.1
65 | metric: std
66 | 
67 | #
68 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
69 | 
70 | #
71 | input_fraclen_sharing: False
72 | 
73 | # =========================== Override Settings ===========================
74 | fp_pretrained_file: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2/fp32/checkpoints/best_model.pt
75 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2
76 | model: models.fix_mobilenet_v2
77 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv2/conventional/mbv2_floating_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | floating_model: True
57 | 
58 | #
59 | weight_format: [8, 7]
60 | input_format: [8, 6]
61 | 
62 | #
63 | format_type: ~ #[~, per_layer, per_channel]
64 | 
65 | #
66 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
67 | 
68 | #
69 | input_fraclen_sharing: True
70 | 
71 | # =========================== Override Settings ===========================
72 | test_only: True
73 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/MobileNetV2/fp32/checkpoints/best_model.pt
74 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2
75 | model: models.fix_mobilenet_v2
76 | 


--------------------------------------------------------------------------------
/apps/imagenet/mobilenetv2/conventional/mbv2_floating_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: True
42 | distributed_all_reduce: True
43 | use_diff_seed: True
44 | 
45 | #
46 | width_divisor: 8
47 | min_width: 1
48 | 
49 | #
50 | rescale_forward: True
51 | rescale_forward_conv: False
52 | #normalize: False
53 | rescale_type: constant #[stddev, constant]
54 | 
55 | #
56 | floating_model: True
57 | 
58 | #
59 | weight_format: [8, 7]
60 | input_format: [8, 6]
61 | 
62 | #
63 | format_type: ~ #[~, per_layer, per_channel]
64 | 
65 | #
66 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
67 | 
68 | #
69 | input_fraclen_sharing: True
70 | 
71 | # =========================== Override Settings ===========================
72 | #fp_pretrained_file: /path/to/best_model.pt
73 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/mobilenetv2
74 | model: models.fix_mobilenet_v2
75 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/conventional/res18_fix_quant_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | #normalize: False
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | #
64 | weight_decay_scheme: all
65 | 
66 | #
67 | no_clipping: False
68 | input_fraclen_sharing: False
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: False
72 | quant_avgpool: False
73 | pool_fusing: False
74 | int_infer: False #True ## For int model
75 | integize: False #True ## For gpu inference
76 | int_op_only: False ## For cpu inference
77 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
78 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
79 | 
80 | test_only: True
81 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit/checkpoints/best_model.pt
82 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
83 | model: models.fix_resnet
84 | depth: 18
85 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/conventional/res18_fix_quant_test_int_model.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | #normalize: False
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | #
64 | weight_decay_scheme: all
65 | 
66 | #
67 | no_clipping: False
68 | input_fraclen_sharing: False
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: True
72 | quant_avgpool: True
73 | pool_fusing: True
74 | quant_maxpool: True
75 | int_infer: True ## For int model
76 | integize: False ## For gpu inference
77 | int_op_only: False ## For cpu inference
78 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
79 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
80 | 
81 | test_only: True
82 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit/checkpoints/best_model.pt
83 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
84 | model: models.fix_resnet
85 | depth: 18
86 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/conventional/res18_fix_quant_test_int_op_only.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | #normalize: False
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | #
64 | weight_decay_scheme: all
65 | 
66 | #
67 | no_clipping: False
68 | input_fraclen_sharing: False
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: True
72 | quant_avgpool: True
73 | pool_fusing: True
74 | quant_maxpool: False #True
75 | int_infer: True ## For int model
76 | integize: False ## For gpu inference
77 | int_op_only: True ## For cpu inference
78 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
79 | int_op_only_file_path: /path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
80 | 
81 | test_only: True
82 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit/checkpoints/best_model.pt
83 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
84 | model: models.fix_resnet
85 | depth: 18
86 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/conventional/res18_fix_quant_test_integize.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | #normalize: False
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | #
64 | weight_decay_scheme: all
65 | 
66 | #
67 | no_clipping: False
68 | input_fraclen_sharing: False
69 | 
70 | # =========================== Override Settings ===========================
71 | quant_bias: True
72 | quant_avgpool: True
73 | pool_fusing: True
74 | quant_maxpool: False #True
75 | int_infer: True ## For int model
76 | integize: True ## For gpu inference
77 | int_op_only: False ## For cpu inference
78 | integize_file_path: /path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
79 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit
80 | 
81 | test_only: True
82 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/ResNet18/8bit/checkpoints/best_model.pt
83 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
84 | model: models.fix_resnet
85 | depth: 18
86 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/conventional/res18_fix_quant_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.1
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: True
42 | distributed_all_reduce: True
43 | use_diff_seed: True
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | #normalize: False
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | #
64 | weight_decay_scheme: all
65 | 
66 | #
67 | input_fraclen_sharing: False
68 | 
69 | # =========================== Override Settings ===========================
70 | fp_pretrained_file: /path/to/checkpoints/F8Net/results/imagenet/resnet18/fp32/checkpoints/best_model.pt
71 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
72 | model: models.fix_resnet
73 | depth: 18
74 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/conventional/res18_floating_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | #normalize: False
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | floating_model: True
53 | 
54 | #
55 | weight_format: [8, 7]
56 | input_format: [8, 6]
57 | 
58 | #
59 | format_type: ~ #[~, per_layer, per_channel]
60 | 
61 | #
62 | weight_decay_scheme: all
63 | 
64 | #
65 | input_fraclen_sharing: True
66 | 
67 | # =========================== Override Settings ===========================
68 | test_only: True
69 | pretrained_file: /path/to/pretrained_models/ImageNet/conventional/ResNet18/fp32/checkpoints/best_model.pt
70 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
71 | model: models.fix_resnet
72 | depth: 18
73 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/conventional/res18_floating_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 150
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.00004
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.05
26 | lr_scheduler: cos_annealing_iter
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: True
42 | distributed_all_reduce: True
43 | use_diff_seed: True
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | #normalize: False
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | floating_model: True
53 | 
54 | #
55 | weight_format: [8, 7]
56 | input_format: [8, 6]
57 | 
58 | #
59 | format_type: ~ #[~, per_layer, per_channel]
60 | 
61 | #
62 | weight_decay_scheme: all
63 | 
64 | #
65 | input_fraclen_sharing: True
66 | 
67 | # =========================== Override Settings ===========================
68 | #fp_pretrained_file: /path/to/best_model.pt
69 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
70 | model: models.fix_resnet
71 | depth: 18
72 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/tiny_finetuning/res18_fix_quant_ptcv_pretrained_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 100
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: False
76 | quant_avgpool: False
77 | pool_fusing: False
78 | quant_maxpool: False
79 | int_infer: False ## For int model
80 | integize: False #True ## For gpu inference
81 | int_op_only: False ## For cpu inference
82 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
83 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
89 | model: models.fix_resnet
90 | depth: 18
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/tiny_finetuning/res18_fix_quant_ptcv_pretrained_test_int_model.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 100
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: True
76 | quant_avgpool: True
77 | pool_fusing: True
78 | quant_maxpool: True
79 | int_infer: True ## For int model
80 | integize: False #True ## For gpu inference
81 | int_op_only: False ## For cpu inference
82 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
83 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
89 | model: models.fix_resnet
90 | depth: 18
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/tiny_finetuning/res18_fix_quant_ptcv_pretrained_test_int_op_only_on_cpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 100
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: True
76 | quant_avgpool: True
77 | pool_fusing: True
78 | quant_maxpool: False #True ## Onnx does not support unfold used in MaxPool
79 | int_infer: True ## For int model
80 | integize: False #True ## For gpu inference
81 | int_op_only: True #False ## For cpu inference
82 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
83 | int_op_only_file_path: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
89 | model: models.fix_resnet
90 | depth: 18
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/tiny_finetuning/res18_fix_quant_ptcv_pretrained_test_integize_on_gpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 100
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: True
76 | quant_avgpool: True
77 | pool_fusing: True
78 | quant_maxpool: False #True ## Onnx does not support unfold used in MaxPool
79 | int_infer: True ## For int model
80 | integize: True ## For gpu inference
81 | int_op_only: False ## For cpu inference
82 | integize_file_path: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
83 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet18/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
89 | model: models.fix_resnet
90 | depth: 18
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet18/tiny_finetuning/res18_fix_quant_ptcv_pretrained_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.0
26 | lr_scheduler: constant
27 | 
28 | # pretrain, resume, test_only
29 | pretrained_dir: ''
30 | pretrained_file: ''
31 | resume: ''
32 | test_only: False
33 | 
34 | #
35 | random_seed: 1995
36 | model: ''
37 | reset_parameters: True
38 | 
39 | #
40 | distributed: False
41 | distributed_all_reduce: False
42 | use_diff_seed: False
43 | 
44 | #
45 | rescale_forward: True
46 | rescale_forward_conv: False
47 | normalize: True
48 | rescale_type: constant #[stddev, constant]
49 | 
50 | #
51 | weight_format: [8, 7]
52 | input_format: [8, 6]
53 | 
54 | #
55 | format_type: per_layer #[~, per_model, per_layer, per_channel]
56 | 
57 | #
58 | format_from_metric: False
59 | momentum_for_metric: 0.1
60 | metric: std
61 | 
62 | finetune_iters: 100
63 | ptcv_pretrained: True
64 | 
65 | no_clipping: True
66 | input_fraclen_sharing: False
67 | 
68 | format_grid_search: True
69 | 
70 | #
71 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
72 | 
73 | # =========================== Override Settings ===========================
74 | quant_avgpool: False
75 | pool_fusing: False
76 | 
77 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet18
78 | model: models.fix_resnet
79 | depth: 18
80 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_nvidia_pretrained_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: False
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: False
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | nvidia_pretrained: True
75 | nvidia_pretrained_file: '/path/to/nvidia_model/nvidia_resnet50_200821.pth.tar'
76 | 
77 | # =========================== Override Settings ===========================
78 | quant_bias: False
79 | quant_avgpool: True
80 | pool_fusing: True
81 | quant_maxpool: False
82 | int_infer: False #True ## For int model
83 | integize: False #True ## For gpu inference
84 | int_op_only: False ## For cpu inference
85 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
86 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
87 | print_each_iter: True
88 | 
89 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
90 | test_only: True
91 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
92 | model: models.fix_resnet
93 | depth: 50
94 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_nvidia_pretrained_test_int_model.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: False
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: False
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | nvidia_pretrained: True
75 | nvidia_pretrained_file: '/path/to/nvidia_model/nvidia_resnet50_200821.pth.tar'
76 | 
77 | # =========================== Override Settings ===========================
78 | quant_bias: True
79 | quant_avgpool: True
80 | pool_fusing: True
81 | quant_maxpool: True
82 | int_infer: True ## For int model
83 | integize: False #True ## For gpu inference
84 | int_op_only: False ## For cpu inference
85 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
86 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
87 | print_each_iter: True
88 | 
89 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
90 | test_only: True
91 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
92 | model: models.fix_resnet
93 | depth: 50
94 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_nvidia_pretrained_test_int_op_only_on_cpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: False
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: False
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | nvidia_pretrained: True
75 | nvidia_pretrained_file: '/path/to/nvidia_model/nvidia_resnet50_200821.pth.tar'
76 | 
77 | # =========================== Override Settings ===========================
78 | quant_bias: True
79 | quant_avgpool: True
80 | pool_fusing: True
81 | quant_maxpool: False #True ## Onnx does not support unfold used in MaxPool
82 | int_infer: True ## For int model
83 | integize: False #True ## For gpu inference
84 | int_op_only: True #False ## For cpu inference
85 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
86 | int_op_only_file_path: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
87 | print_each_iter: True
88 | 
89 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
90 | test_only: True
91 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
92 | model: models.fix_resnet
93 | depth: 50
94 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_nvidia_pretrained_test_integize_on_gpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: False
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: False
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | nvidia_pretrained: True
75 | nvidia_pretrained_file: '/path/to/nvidia_model/nvidia_resnet50_200821.pth.tar'
76 | 
77 | # =========================== Override Settings ===========================
78 | quant_bias: True
79 | quant_avgpool: True
80 | pool_fusing: True
81 | quant_maxpool: False #True ## Onnx does not support unfold used in MaxPool
82 | int_infer: True ## For int model
83 | integize: True ## For gpu inference
84 | int_op_only: False ## For cpu inference
85 | integize_file_path: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
86 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit
87 | print_each_iter: True
88 | 
89 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/nvidia_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
90 | test_only: True
91 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
92 | model: models.fix_resnet
93 | depth: 50
94 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_nvidia_pretrained_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.0001
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: False
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: False
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | nvidia_pretrained: True
75 | nvidia_pretrained_file: '/path/to/nvidia_model/nvidia_resnet50_200821.pth.tar'
76 | 
77 | # =========================== Override Settings ===========================
78 | quant_avgpool: True #False
79 | pool_fusing: True #False
80 | 
81 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
82 | model: models.fix_resnet
83 | depth: 50
84 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_ptcv_pretrained_test.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: False
76 | quant_avgpool: True
77 | pool_fusing: True
78 | quant_maxpool: False
79 | int_infer: False #True ## For int model
80 | integize: False #True ## For gpu inference
81 | int_op_only: False ## For cpu inference
82 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
83 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
89 | model: models.fix_resnet
90 | depth: 50
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_ptcv_pretrained_test_int_model.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: True
76 | quant_avgpool: True
77 | pool_fusing: True
78 | quant_maxpool: True
79 | int_infer: True ## For int model
80 | integize: False #True ## For gpu inference
81 | int_op_only: False ## For cpu inference
82 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
83 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
89 | model: models.fix_resnet
90 | depth: 50
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_ptcv_pretrained_test_int_op_only_on_cpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: True
76 | quant_avgpool: True
77 | pool_fusing: True
78 | quant_maxpool: False #True ## Onnx does not support unfold used in MaxPool
79 | int_infer: True ## For int model
80 | integize: False #True ## For gpu inference
81 | int_op_only: True #False ## For cpu inference
82 | integize_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
83 | int_op_only_file_path: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
89 | model: models.fix_resnet
90 | depth: 50
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_ptcv_pretrained_test_integize_on_gpu.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0
26 | lr_scheduler: constant
27 | 
28 | 
29 | # pretrain, resume, test_only
30 | pretrained_dir: ''
31 | pretrained_file: ''
32 | resume: ''
33 | test_only: False
34 | 
35 | #
36 | random_seed: 1995
37 | model: ''
38 | reset_parameters: True
39 | 
40 | #
41 | distributed: False
42 | distributed_all_reduce: False
43 | use_diff_seed: False
44 | 
45 | #
46 | rescale_forward: True
47 | rescale_forward_conv: False
48 | normalize: True
49 | rescale_type: constant #[stddev, constant]
50 | 
51 | #
52 | weight_format: [8, 7]
53 | input_format: [8, 6]
54 | 
55 | #
56 | format_type: per_layer #[~, per_model, per_layer, per_channel]
57 | 
58 | #
59 | format_from_metric: True
60 | momentum_for_metric: 0.1
61 | metric: std
62 | 
63 | finetune_iters: 500
64 | ptcv_pretrained: True
65 | 
66 | no_clipping: True
67 | input_fraclen_sharing: False
68 | 
69 | format_grid_search: True
70 | 
71 | #
72 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
73 | 
74 | # =========================== Override Settings ===========================
75 | quant_bias: True
76 | quant_avgpool: True
77 | pool_fusing: True
78 | quant_maxpool: False #True ## Onnx does not support unfold used in MaxPool
79 | int_infer: True ## For int model
80 | integize: True ## For gpu inference
81 | int_op_only: False ## For cpu inference
82 | integize_file_path: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
83 | int_op_only_file_path: '' #/path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit
84 | print_each_iter: True
85 | 
86 | pretrained_file: /path/to/pretrained_models/ImageNet/tiny_finetuning/ResNet50/ptcv_pretrained/8bit/checkpoints/quant_avgpool_True_pool_fusing_True/best_model.pt
87 | test_only: True
88 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
89 | model: models.fix_resnet
90 | depth: 50
91 | 


--------------------------------------------------------------------------------
/apps/imagenet/resnet50/tiny_finetuning/res50_fix_quant_ptcv_pretrained_train.yml:
--------------------------------------------------------------------------------
 1 | # =========================== Basic Settings ===========================
 2 | # machine info
 3 | num_gpus_per_job: 8  # number of gpus each job need
 4 | 
 5 | # data
 6 | dataset: imagenet1k
 7 | data_transforms: imagenet1k
 8 | data_loader: imagenet1k
 9 | dataset_dir: data
10 | data_loader_workers: 5
11 | 
12 | # info
13 | num_classes: 1000
14 | image_size: 224 
15 | topk: [1, 5]
16 | num_epochs: 1
17 | 
18 | # optimizer
19 | optimizer: sgd 
20 | momentum: 0.9 
21 | weight_decay: 0.0001
22 | nesterov: True
23 | 
24 | # lr
25 | lr: 0.0001
26 | lr_scheduler: constant
27 | 
28 | # pretrain, resume, test_only
29 | pretrained_dir: ''
30 | pretrained_file: ''
31 | resume: ''
32 | test_only: False
33 | 
34 | #
35 | random_seed: 1995
36 | model: ''
37 | reset_parameters: True
38 | 
39 | #
40 | distributed: False
41 | distributed_all_reduce: False
42 | use_diff_seed: False
43 | 
44 | #
45 | rescale_forward: True
46 | rescale_forward_conv: False
47 | normalize: True
48 | rescale_type: constant #[stddev, constant]
49 | 
50 | #
51 | weight_format: [8, 7]
52 | input_format: [8, 6]
53 | 
54 | #
55 | format_type: per_layer #[~, per_model, per_layer, per_channel]
56 | 
57 | #
58 | format_from_metric: False
59 | momentum_for_metric: 0.1
60 | metric: std
61 | 
62 | finetune_iters: 500
63 | ptcv_pretrained: True
64 | 
65 | no_clipping: True
66 | input_fraclen_sharing: False
67 | 
68 | format_grid_search: True
69 | 
70 | #
71 | weight_decay_scheme: no_depthwise_no_bn #[all, only_no_depthwise, only_no_bn, no_depthwise_no_bn]
72 | 
73 | # =========================== Override Settings ===========================
74 | quant_avgpool: True
75 | pool_fusing: True
76 | 
77 | log_dir: /path/to/checkpoints/F8Net/results/imagenet/resnet50
78 | model: models.fix_resnet
79 | depth: 50
80 | 


--------------------------------------------------------------------------------
/distributed_run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | cfg=$1
 3 | bs=${2:-256}
 4 | num_gpus=${3:-8}
 5 | num_proc_per_nodes=$(( num_gpus < 8 ? num_gpus : 8 ))
 6 | echo "Total batch size: " $bs
 7 | echo "No. of processes per node: " $num_proc_per_nodes
 8 | if [ ! -f $cfg ]; then
 9 |     echo "Config not found!"
10 | fi
11 | 
12 | RANK=0 python3 -W ignore -m torch.distributed.launch --nproc_per_node=$num_proc_per_nodes fix_train.py app:$cfg bs:$bs
13 | 


--------------------------------------------------------------------------------
/error_analysis/fixed_quant_analysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | from matplotlib.patches import Ellipse, FancyArrowPatch
  4 | 
  5 | import torch
  6 | 
  7 | 
  8 | def fix_quant(input, wl=8, fl=0, signed=True):
  9 |     assert wl >= 0
 10 |     assert fl >= 0
 11 |     if signed:
 12 |         assert fl <= wl - 1
 13 |     else:
 14 |         assert fl <= wl
 15 |     assert type(wl) == int
 16 |     assert type(fl) == int
 17 |     res = input * (2**fl)
 18 |     res.round_()
 19 |     if signed:
 20 |         bound = 2**(wl - 1) - 1
 21 |         res.clamp_(max=bound, min=-bound)
 22 |     else:
 23 |         bound = 2**wl - 1
 24 |         res.clamp_(max=bound, min=0)
 25 |     res.div_(2**fl)
 26 |     return res
 27 | 
 28 | 
 29 | def main():
 30 |     fl_cmap = {
 31 |         0: [0.2, 0.2, 0.2],
 32 |         1: [155 / 255, 221 / 255, 239 / 255],
 33 |         2: [255 / 255, 102 / 255, 153 / 255],
 34 |         3: [189 / 255, 146 / 255, 222 / 255],
 35 |         4: [75 / 255, 148 / 255, 255 / 255],
 36 |         5: [199 / 255, 225 / 255, 181 / 255],
 37 |         6: [241 / 255, 245 / 255, 161 / 255],
 38 |         7: [255 / 255, 133 / 255, 133 / 255],
 39 |         8: [40 / 255, 240 / 255, 128 / 255]
 40 |     }
 41 | 
 42 |     wl = 8
 43 |     N = 10000
 44 |     signed = True
 45 |     normalize = True
 46 | 
 47 |     fig_size = (6, 4)
 48 |     axes_label_size = 16
 49 |     text_size = 12
 50 |     equation_text_size = 20
 51 |     title_size = 16
 52 |     legend_size = 8
 53 |     font_weight = 'normal'
 54 | 
 55 |     sigma_list = np.logspace(-3, 3, 1000)
 56 |     title_dict = {0: 'Unsigned (Rectified Gaussian)', 1: 'Signed (Gaussian)'}
 57 |     for wl in [8]:
 58 |         for signed in [True, False]:
 59 |             fl_list = list(range(wl + 1 - int(signed)))
 60 |             w_quant_err_sigma = []
 61 |             opt_fl = []
 62 |             opt_err = []
 63 |             for sigma in sigma_list:
 64 |                 w_quant_err_fl = []
 65 |                 w_rand = torch.randn(N) * sigma
 66 |                 if not signed:
 67 |                     w_rand = torch.relu(w_rand)
 68 |                 for fl in fl_list:
 69 |                     w_quant = fix_quant(w_rand, wl, fl, signed)
 70 |                     error = np.mean((w_rand - w_quant).cpu().numpy()**2)**0.5
 71 |                     if normalize:
 72 |                         error = error / sigma
 73 |                     w_quant_err_fl.append(error)
 74 |                 err_min_idx = np.argmin(w_quant_err_fl)
 75 |                 opt_fl.append(fl_list[err_min_idx])
 76 |                 opt_err.append(w_quant_err_fl[err_min_idx])
 77 |                 w_quant_err_sigma.append(w_quant_err_fl)
 78 |             w_quant_err_sigma = np.array(w_quant_err_sigma)
 79 | 
 80 |             ## replace the first zeros by the max
 81 |             opt_fl = np.array(opt_fl)
 82 |             idx_last_max = np.nonzero(opt_fl == max(fl_list))[0][-1]
 83 |             mask_to_change = opt_fl[:idx_last_max] == 0
 84 |             opt_fl[:idx_last_max][mask_to_change] = max(fl_list)
 85 | 
 86 |             ## sigma
 87 |             opt_sigma = []
 88 |             fig = plt.figure(figsize=fig_size)
 89 |             for idx, fl in enumerate(fl_list):
 90 |                 plt.semilogx(sigma_list,
 91 |                              w_quant_err_sigma[:, idx],
 92 |                              label=f'FL={fl}',
 93 |                              c=fl_cmap[fl],
 94 |                              lw=2)
 95 |                 local_min = np.argmin(w_quant_err_sigma[:, idx])
 96 |                 opt_sigma.append(sigma_list[local_min])
 97 |                 plt.scatter(sigma_list[local_min],
 98 |                             w_quant_err_sigma[local_min, idx],
 99 |                             color=fl_cmap[fl],
100 |                             marker='*',
101 |                             s=60)
102 |             plt.gca().annotate('Overflow',
103 |                                fontsize=text_size,
104 |                                fontweight=font_weight,
105 |                                xy=(0.55, 0.87),
106 |                                xycoords='figure fraction',
107 |                                xytext=(0.65, 0.86),
108 |                                textcoords='figure fraction',
109 |                                arrowprops=dict(arrowstyle="<-",
110 |                                                connectionstyle="arc3"))
111 |             plt.gca().annotate('Underflow',
112 |                                fontsize=text_size,
113 |                                fontweight=font_weight,
114 |                                xy=(0.55, 0.87),
115 |                                xycoords='figure fraction',
116 |                                xytext=(0.32, 0.86),
117 |                                textcoords='figure fraction',
118 |                                arrowprops=dict(arrowstyle="<-",
119 |                                                connectionstyle="arc3"))
120 |             plt.legend(ncol=1,
121 |                        loc='lower right',
122 |                        prop={
123 |                            'size': legend_size,
124 |                            'weight': font_weight
125 |                        })
126 |             plt.xlabel(r'$\sigma$',
127 |                        fontsize=axes_label_size,
128 |                        fontweight=font_weight)
129 |             plt.ylabel('Relative Error',
130 |                        fontsize=axes_label_size,
131 |                        fontweight=font_weight)
132 |             plt.ylim(top=plt.ylim()[1] * 1.02)
133 |             plt.setp(plt.gca().get_xticklabels(),
134 |                      fontsize=axes_label_size,
135 |                      fontweight=font_weight)
136 |             plt.setp(plt.gca().get_yticklabels(),
137 |                      fontsize=axes_label_size,
138 |                      fontweight=font_weight)
139 |             title = title_dict[int(signed)].split(' ')
140 |             title.insert(1, f'{wl}-bit')
141 |             title = ' '.join(title)
142 |             plt.title(title, fontsize=title_size)
143 |             plt.subplots_adjust(bottom=0.15, top=0.9, left=0.15, right=0.95)
144 |             plt.savefig(
145 |                 f'./std_fix_quant_error_analysis_{wl}bit_signed_{signed}.pdf',
146 |                 dpi=800)
147 | 
148 |             fig = plt.figure(figsize=fig_size)
149 |             plt.semilogy(fl_list,
150 |                          opt_sigma,
151 |                          color='b',
152 |                          lw=2,
153 |                          ls='--',
154 |                          marker='o',
155 |                          markersize=8)
156 |             plt.xlabel('Fractional Length (FL)',
157 |                        fontsize=axes_label_size,
158 |                        fontweight=font_weight)
159 |             plt.ylabel('Optimal ' + r'$\sigma$',
160 |                        fontsize=axes_label_size,
161 |                        fontweight=font_weight)
162 |             plt.setp(plt.gca().get_xticklabels(),
163 |                      fontsize=axes_label_size,
164 |                      fontweight=font_weight)
165 |             plt.setp(plt.gca().get_yticklabels(),
166 |                      fontsize=axes_label_size,
167 |                      fontweight=font_weight)
168 |             title = title_dict[int(signed)].split(' ')
169 |             title.insert(1, f'{wl}-bit')
170 |             title = ' '.join(title)
171 |             plt.title(title, fontsize=title_size, fontweight=font_weight)
172 |             plt.subplots_adjust(bottom=0.15, top=0.9, left=0.15, right=0.95)
173 |             plt.savefig(f'./std_opt_sigma_vs_fl_{wl}bit_signed_{signed}.pdf',
174 |                         dpi=800)
175 | 
176 |             ax1_color = 'b'
177 |             ax2_color = 'r'
178 |             fig, ax1 = plt.subplots(figsize=fig_size)
179 |             lns1 = ax1.semilogx(sigma_list,
180 |                                 opt_fl,
181 |                                 color=ax1_color,
182 |                                 lw=2,
183 |                                 ls='-',
184 |                                 label='Opt. Frac. Len.')
185 |             if signed:
186 |                 ell1_x = 0.84
187 |             else:
188 |                 ell1_x = 0.88
189 |             ellipse1 = Ellipse(xy=(ell1_x, 0.5),
190 |                                width=0.04,
191 |                                height=0.12,
192 |                                edgecolor=ax1_color,
193 |                                fc='None',
194 |                                lw=2,
195 |                                transform=plt.gca().transAxes)
196 |             arrow_connectionstyle = "arc3,rad=.2"
197 |             arrow_style = "Simple, tail_width=0.5, head_width=4, head_length=8"
198 |             arr1 = FancyArrowPatch((ell1_x, 0.5 - 0.058),
199 |                                    (ell1_x + 0.04 * 2.5, 0.5 - 0.06 * 1.8),
200 |                                    connectionstyle=arrow_connectionstyle,
201 |                                    arrowstyle=arrow_style,
202 |                                    color=ax1_color,
203 |                                    transform=plt.gca().transAxes)
204 |             ax1.set_xlabel(r'$\sigma$',
205 |                            fontsize=axes_label_size,
206 |                            fontweight=font_weight)
207 |             ax1.set_ylabel('Opt. Frac. Len. (' + r'$\mathrm{FL}^*$' + ')',
208 |                            fontsize=axes_label_size,
209 |                            fontweight=font_weight)
210 |             ax1.yaxis.label.set_color(ax1_color)
211 |             ax1.tick_params(axis='y', labelcolor=ax1_color)
212 |             plt.setp(ax1.get_xticklabels(),
213 |                      fontsize=axes_label_size,
214 |                      fontweight=font_weight)
215 |             plt.setp(ax1.get_yticklabels(),
216 |                      fontsize=axes_label_size,
217 |                      fontweight=font_weight)
218 | 
219 |             ax2 = ax1.twinx()
220 |             lns2 = ax2.semilogx(sigma_list,
221 |                                 opt_err,
222 |                                 color=ax2_color,
223 |                                 lw=2,
224 |                                 ls='-',
225 |                                 label='Min. Rel. Err.')
226 |             if signed:
227 |                 ell2_x = 0.3
228 |             else:
229 |                 ell2_x = 0.3
230 |             ellipse2 = Ellipse(xy=(ell2_x, 0.93),
231 |                                width=0.04,
232 |                                height=0.12,
233 |                                edgecolor=ax2_color,
234 |                                fc='None',
235 |                                lw=2,
236 |                                transform=plt.gca().transAxes)
237 |             arrow_connectionstyle = "arc3,rad=-.2"
238 |             arrow_style = "Simple, tail_width=0.5, head_width=4, head_length=8"
239 |             arr2 = FancyArrowPatch((ell2_x, 0.93 - 0.058),
240 |                                    (ell2_x - 0.04 * 2.5, 0.93 - 0.06 * 1.8),
241 |                                    connectionstyle=arrow_connectionstyle,
242 |                                    arrowstyle=arrow_style,
243 |                                    color=ax2_color,
244 |                                    transform=plt.gca().transAxes)
245 |             ax2.set_ylabel('Minimum Relative Error',
246 |                            fontsize=axes_label_size,
247 |                            fontweight=font_weight)
248 |             ax2.yaxis.label.set_color(ax2_color)
249 |             ax2.tick_params(axis='y', labelcolor=ax2_color)
250 |             plt.setp(ax2.get_xticklabels(),
251 |                      fontsize=axes_label_size,
252 |                      fontweight=font_weight)
253 |             plt.setp(ax2.get_yticklabels(),
254 |                      fontsize=axes_label_size,
255 |                      fontweight=font_weight)
256 |             leg = lns1 + lns2
257 |             labs = [l.get_label() for l in leg]
258 |             ax1.legend(leg,
259 |                        labs,
260 |                        ncol=2,
261 |                        loc='upper center',
262 |                        prop={'size': axes_label_size * 0.75},
263 |                        bbox_to_anchor=(0.5, -0.25))
264 |             title = title_dict[int(signed)].split(' ')
265 |             title.insert(1, f'{wl}-bit')
266 |             title = ' '.join(title)
267 |             plt.title(title, fontsize=title_size, fontweight=font_weight)
268 |             plt.subplots_adjust(bottom=0.27, top=0.9, left=0.1, right=0.87)
269 |             plt.savefig(
270 |                 f'./std_opt_fl_and_err_vs_sigma_{wl}bit_signed_{signed}.pdf',
271 |                 dpi=800)
272 | 
273 |             all_result = np.stack(
274 |                 [np.array(sigma_list),
275 |                  np.array(opt_err),
276 |                  np.array(opt_fl)],
277 |                 axis=1)
278 |             np.savetxt(
279 |                 f'./all_results_{wl}bit_signed_{signed}.txt',
280 |                 all_result,
281 |                 header=f'signed={signed}\nsigma\tmae\trms\topt err\t opt fl')
282 | 
283 |             ## threshold plot
284 |             fl_list = np.array(fl_list)
285 |             sigma_th_list = []
286 |             for fl in fl_list[1:]:
287 |                 threshold_idx = np.nonzero(opt_fl == fl - 1)[0][0]
288 |                 sigma_th_list.append(sigma_list[threshold_idx])
289 |             ## sigma
290 |             plt.figure(figsize=fig_size)
291 |             plt.scatter(fl_list[1:],
292 |                         sigma_th_list,
293 |                         color='b',
294 |                         marker='o',
295 |                         s=24,
296 |                         label='Empirical')
297 |             coeff = 2**np.mean(np.array(fl_list[1:]) + np.log2(sigma_th_list))
298 |             coeff = np.around(coeff, 2)
299 |             plt.plot(fl_list[1:],
300 |                      coeff / 2**fl_list[1:],
301 |                      color='r',
302 |                      lw=2,
303 |                      ls='--',
304 |                      label='Linear Fitting')
305 |             plt.text(0.2,
306 |                      0.3,
307 |                      r'$\sigma=$'.format(coeff, 'fl'),
308 |                      transform=plt.gca().transAxes,
309 |                      fontsize=equation_text_size,
310 |                      fontweight=font_weight)
311 |             plt.text(0.31,
312 |                      0.3,
313 |                      r'$\frac{{{}}}{{2^\mathrm{{{}}}}}$'.format(coeff, 'FL'),
314 |                      transform=plt.gca().transAxes,
315 |                      fontsize=equation_text_size * 1.2,
316 |                      fontweight=font_weight)
317 |             plt.gca().set_yscale('log')
318 |             plt.legend(ncol=2,
319 |                        loc='upper center',
320 |                        bbox_to_anchor=(0.5, -0.25),
321 |                        prop={
322 |                            'weight': font_weight,
323 |                            'size': legend_size * 1.5
324 |                        })
325 |             plt.xlabel('Fractional Length (FL)',
326 |                        fontsize=axes_label_size,
327 |                        fontweight=font_weight)
328 |             plt.ylabel('Threshold ' + r'$\sigma$',
329 |                        fontsize=axes_label_size,
330 |                        fontweight=font_weight)
331 |             plt.setp(plt.gca().get_xticklabels(),
332 |                      fontsize=axes_label_size,
333 |                      fontweight=font_weight)
334 |             plt.setp(plt.gca().get_yticklabels(),
335 |                      fontsize=axes_label_size,
336 |                      fontweight=font_weight)
337 |             title = title_dict[int(signed)].split(' ')
338 |             title.insert(1, f'{wl}-bit')
339 |             title = ' '.join(title)
340 |             plt.title(title, fontsize=title_size, fontweight=font_weight)
341 |             plt.subplots_adjust(bottom=0.27, top=0.9, left=0.15, right=0.95)
342 |             plt.savefig(f'./sigma_threshold_vs_fl_{wl}bit_signed_{signed}.pdf',
343 |                         dpi=800)
344 | 
345 | 
346 | if __name__ == '__main__':
347 |     main()
348 | 


--------------------------------------------------------------------------------
/error_analysis/sigma_threshold_vs_fl_8bit_signed_False.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/sigma_threshold_vs_fl_8bit_signed_False.pdf


--------------------------------------------------------------------------------
/error_analysis/sigma_threshold_vs_fl_8bit_signed_True.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/sigma_threshold_vs_fl_8bit_signed_True.pdf


--------------------------------------------------------------------------------
/error_analysis/std_fix_quant_error_analysis_8bit_signed_False.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/std_fix_quant_error_analysis_8bit_signed_False.pdf


--------------------------------------------------------------------------------
/error_analysis/std_fix_quant_error_analysis_8bit_signed_True.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/std_fix_quant_error_analysis_8bit_signed_True.pdf


--------------------------------------------------------------------------------
/error_analysis/std_opt_fl_and_err_vs_sigma_8bit_signed_False.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/std_opt_fl_and_err_vs_sigma_8bit_signed_False.pdf


--------------------------------------------------------------------------------
/error_analysis/std_opt_fl_and_err_vs_sigma_8bit_signed_True.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/std_opt_fl_and_err_vs_sigma_8bit_signed_True.pdf


--------------------------------------------------------------------------------
/error_analysis/std_opt_sigma_vs_fl_8bit_signed_False.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/std_opt_sigma_vs_fl_8bit_signed_False.pdf


--------------------------------------------------------------------------------
/error_analysis/std_opt_sigma_vs_fl_8bit_signed_True.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/error_analysis/std_opt_sigma_vs_fl_8bit_signed_True.pdf


--------------------------------------------------------------------------------
/fraclen_visual/fraclen_visualizing_mbv2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | 
  7 | matplotlib.rcParams['pdf.fonttype'] = 42
  8 | matplotlib.rcParams['ps.fonttype'] = 42
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | import matplotlib.patches as patches
 12 | 
 13 | import torch
 14 | 
 15 | 
 16 | def main():
 17 |     work_dir = './'
 18 |     log_file_name = 'mbv2_fix_quant.out'
 19 | 
 20 |     depthwise_layer = [
 21 |         'stage_0_layer_0.body.0', 'stage_1_layer_0.body.1',
 22 |         'stage_1_layer_1.body.1', 'stage_2_layer_0.body.1',
 23 |         'stage_2_layer_1.body.1', 'stage_2_layer_2.body.1',
 24 |         'stage_3_layer_0.body.1', 'stage_3_layer_1.body.1',
 25 |         'stage_3_layer_2.body.1', 'stage_3_layer_3.body.1',
 26 |         'stage_4_layer_0.body.1', 'stage_4_layer_1.body.1',
 27 |         'stage_4_layer_2.body.1', 'stage_5_layer_0.body.1',
 28 |         'stage_5_layer_1.body.1', 'stage_5_layer_2.body.1',
 29 |         'stage_6_layer_0.body.1'
 30 |     ]
 31 | 
 32 |     layer_fraclen_dict = {}
 33 |     with open(os.path.join(work_dir, log_file_name), 'r') as f:
 34 |         lines = f.read().splitlines()
 35 |         lines = [l for l in lines if 'fraclen' in l or 'name' in l]
 36 |         lines = [
 37 |             l for l in lines
 38 |             if 'setting' not in l and 'model' not in l and 'log' not in l
 39 |         ]
 40 |         lines = [l[:-1] for l in lines]
 41 |         assert len(lines) % 3 == 0
 42 |     for idx in range(len(lines) // 3):
 43 |         assert 'layer name' in lines[idx * 3]
 44 |         assert 'input_fraclen' in lines[idx * 3 + 1]
 45 |         assert 'weight_fraclen' in lines[idx * 3 + 2]
 46 |         layer_name = lines[idx * 3][12:]
 47 |         if layer_name in depthwise_layer:
 48 |             depthwise = True
 49 |         else:
 50 |             depthwise = False
 51 |         if 'shortcut' in layer_name:
 52 |             shortcut = True
 53 |         else:
 54 |             shortcut = False
 55 |         if 'classifier' in layer_name:
 56 |             fc = True
 57 |         else:
 58 |             fc = False
 59 |         if 'body.0' in layer_name and 'stage_0' not in layer_name:
 60 |             double_side = True
 61 |         elif 'tail' in layer_name:
 62 |             double_side = True
 63 |         else:
 64 |             double_side = False
 65 |         input_fraclen = int(
 66 |             torch.round(eval('torch.' + lines[idx * 3 + 1][15:])).item())
 67 |         weight_fraclen = int(np.around(eval(lines[idx * 3 + 2][16:])))
 68 |         layer_fraclen_dict[layer_name] = (weight_fraclen, input_fraclen,
 69 |                                           depthwise, shortcut, fc, double_side)
 70 | 
 71 |     cm = {'b': [80 / 255, 156 / 255, 1.0], 'm': [250 / 255, 0, 101 / 255]}
 72 |     model_name = {'mobilenetv2': 'MobileNet V2'}
 73 |     delta = {'mobilenetv2': 6}
 74 |     bit_cmap = {
 75 |         0: [1.0, 1.0, 1.0],
 76 |         1: [155 / 255, 221 / 255, 239 / 255],
 77 |         2: [255 / 255, 102 / 255, 153 / 255],
 78 |         3: [189 / 255, 146 / 255, 222 / 255],
 79 |         4: [75 / 255, 148 / 255, 255 / 255],
 80 |         5: [199 / 255, 225 / 255, 181 / 255],
 81 |         6: [241 / 255, 245 / 255, 161 / 255],
 82 |         7: [255 / 255, 133 / 255, 133 / 255],
 83 |         8: [40 / 255, 240 / 255, 128 / 255]
 84 |     }
 85 |     fig_size = (6, 4)
 86 |     axes_label_size = 16
 87 |     text_size = 12
 88 |     title_size = 16
 89 |     legend_size = 8
 90 |     font_weight = 'normal'
 91 |     for model in ['mobilenetv2']:
 92 |         if model in ['mobilenetv2']:
 93 |             barwidth = 1
 94 |             patch_width = 2
 95 |             arrow_extension = 3
 96 |         else:
 97 |             barwidth = 1
 98 |             if shortcut:
 99 |                 patch_width = 1
100 |             else:
101 |                 patch_width = .75
102 |             arrow_extension = .75
103 | 
104 |         weight_fraclen_list = [v[0] for v in layer_fraclen_dict.values()]
105 |         input_fraclen_list = [-v[1] for v in layer_fraclen_dict.values()]
106 |         ## The first layer does not quantize input
107 |         input_fraclen_list[0] = -8
108 |         depthwise_list = [v[2] for v in layer_fraclen_dict.values()]
109 |         fc_list = [v[4] for v in layer_fraclen_dict.values()]
110 |         double_side_list = [v[5] for v in layer_fraclen_dict.values()]
111 |         plt.figure(figsize=fig_size)
112 |         plt.gca().spines['top'].set_visible(False)
113 |         plt.gca().spines['bottom'].set_visible(False)
114 |         plt.gca().spines['left'].set_visible(False)
115 |         plt.gca().spines['right'].set_visible(False)
116 |         plt.gca().tick_params(axis='both', which='both', length=0)
117 |         ids = np.arange(len(weight_fraclen_list))
118 |         alpha = 0.5
119 |         if model in ['mobilenetv2']:
120 |             color_w = [
121 |                 cm['b'] + [alpha] if depthwise_list[idx] else cm['b']
122 |                 for idx in ids
123 |             ]
124 |             color_a = [
125 |                 cm['m'] + [alpha] if depthwise_list[idx] else cm['m']
126 |                 for idx in ids
127 |             ]
128 |         else:
129 |             color_w = [cm['b'] for idx in ids]
130 |             color_a = [cm['m'] for idx in ids]
131 |         plt.bar(ids * barwidth,
132 |                 weight_fraclen_list,
133 |                 bottom=0.01,
134 |                 width=barwidth * 0.8,
135 |                 color=color_w,
136 |                 edgecolor='k',
137 |                 linewidth=0.3)
138 |         plt.bar(ids * barwidth,
139 |                 input_fraclen_list,
140 |                 bottom=-0.01,
141 |                 width=barwidth * 0.8,
142 |                 color=color_a,
143 |                 edgecolor='k',
144 |                 linewidth=0.3)
145 |         plt.plot(np.arange(-barwidth / 2,
146 |                            barwidth * len(ids) + barwidth / 2, barwidth),
147 |                  np.ones(len(ids) + 1) * 2,
148 |                  ls='--',
149 |                  lw=0.5,
150 |                  c='k')
151 |         plt.plot(np.arange(-barwidth / 2,
152 |                            barwidth * len(ids) + barwidth / 2, barwidth),
153 |                  np.ones(len(ids) + 1) * 4,
154 |                  ls='--',
155 |                  lw=0.5,
156 |                  c='k')
157 |         plt.plot(np.arange(-barwidth / 2,
158 |                            barwidth * len(ids) + barwidth / 2, barwidth),
159 |                  np.ones(len(ids) + 1) * 6,
160 |                  ls='--',
161 |                  lw=0.5,
162 |                  c='k')
163 |         plt.plot(np.arange(-barwidth / 2,
164 |                            barwidth * len(ids) + barwidth / 2, barwidth),
165 |                  np.ones(len(ids) + 1) * 8,
166 |                  ls='--',
167 |                  lw=0.5,
168 |                  c='k')
169 |         plt.plot(np.arange(-barwidth / 2,
170 |                            barwidth * len(ids) + barwidth / 2, barwidth),
171 |                  -np.ones(len(ids) + 1) * 8,
172 |                  ls='--',
173 |                  lw=0.5,
174 |                  c='k')
175 |         plt.plot(np.arange(-barwidth / 2,
176 |                            barwidth * len(ids) + barwidth / 2, barwidth),
177 |                  -np.ones(len(ids) + 1) * 6,
178 |                  ls='--',
179 |                  lw=0.5,
180 |                  c='k')
181 |         plt.plot(np.arange(-barwidth / 2,
182 |                            barwidth * len(ids) + barwidth / 2, barwidth),
183 |                  -np.ones(len(ids) + 1) * 4,
184 |                  ls='--',
185 |                  lw=0.5,
186 |                  c='k')
187 |         plt.plot(np.arange(-barwidth / 2,
188 |                            barwidth * len(ids) + barwidth / 2, barwidth),
189 |                  -np.ones(len(ids) + 1) * 2,
190 |                  ls='--',
191 |                  lw=0.5,
192 |                  c='k')
193 |         plt.arrow(-barwidth / 2,
194 |                   0,
195 |                   barwidth * len(ids) + barwidth / 2 +
196 |                   barwidth * arrow_extension,
197 |                   0,
198 |                   ls='-',
199 |                   color='k',
200 |                   width=.005,
201 |                   head_width=0.15,
202 |                   head_length=0.1 * arrow_extension)
203 |         rect_w_pw = patches.Rectangle((barwidth * len(ids) / 2 * 0, -10),
204 |                                       1.2 * patch_width,
205 |                                       1.2,
206 |                                       linewidth=.5,
207 |                                       edgecolor='k',
208 |                                       facecolor=cm['b'])
209 |         rect_w_dw = patches.Rectangle((barwidth * len(ids) / 2 * 0, -12),
210 |                                       1.2 * patch_width,
211 |                                       1.2,
212 |                                       linewidth=.5,
213 |                                       edgecolor='k',
214 |                                       facecolor=cm['b'] + [alpha])
215 |         rect_a_pw = patches.Rectangle((barwidth * len(ids) / 2 * 1.1, -10),
216 |                                       1.2 * patch_width,
217 |                                       1.2,
218 |                                       linewidth=.5,
219 |                                       edgecolor='k',
220 |                                       facecolor=cm['m'])
221 |         rect_a_dw = patches.Rectangle((barwidth * len(ids) / 2 * 1.1, -12),
222 |                                       1.2 * patch_width,
223 |                                       1.2,
224 |                                       linewidth=.5,
225 |                                       edgecolor='k',
226 |                                       facecolor=cm['m'] + [alpha])
227 |         plt.gca().add_patch(rect_w_pw)
228 |         plt.gca().add_patch(rect_a_pw)
229 |         if model in ['mobilenetv2']:
230 |             plt.gca().add_patch(rect_w_dw)
231 |             plt.gca().add_patch(rect_a_dw)
232 |         if model in ['mobilenetv2']:
233 |             plt.text(barwidth * len(ids) / 2 * 0 + patch_width * 1.6,
234 |                      -9.65,
235 |                      '#Weight FL (pointwise)',
236 |                      fontsize=text_size)
237 |             plt.text(barwidth * len(ids) / 2 * 1.1 + patch_width * 1.6,
238 |                      -9.65,
239 |                      '#Activation FL (pointwise)',
240 |                      fontsize=text_size)
241 |             plt.text(barwidth * len(ids) / 2 * 0 + patch_width * 1.6,
242 |                      -11.65,
243 |                      '#Weight FL (depthwise)',
244 |                      fontsize=text_size)
245 |             plt.text(barwidth * len(ids) / 2 * 1.1 + patch_width * 1.6,
246 |                      -11.65,
247 |                      '#Activation FL (depthwise)',
248 |                      fontsize=text_size)
249 |         plt.xticks([])
250 |         plt.yticks(np.arange(-8, 9, 2), np.abs(np.arange(-8, 9, 2)))
251 |         plt.xlabel('Layer', fontsize=axes_label_size)
252 |         plt.ylabel('Fractional Length', fontsize=axes_label_size)
253 |         plt.setp(plt.gca().get_xticklabels(),
254 |                  fontsize=axes_label_size,
255 |                  fontweight=font_weight)
256 |         plt.setp(plt.gca().get_yticklabels(),
257 |                  fontsize=axes_label_size,
258 |                  fontweight=font_weight)
259 |         plt.gca().xaxis.set_label_coords(1.0, 0.56)
260 |         plt.gca().yaxis.set_label_coords(-0.05, 0.58)
261 |         plt.xlim(-0.6, barwidth * len(ids) + 1.2 * barwidth * arrow_extension)
262 |         plt.ylim(-12, 8.5)
263 |         plt.savefig(f'./{model}_8bit_fraclens_wo_title.pdf',
264 |                     dpi=300,
265 |                     bbox_inches='tight')
266 |         plt.title(f'Fractional Length vs Layer (8-bit {model_name[model]})',
267 |                   fontsize=title_size)
268 |         plt.savefig(f'./{model}_8bit_fraclens.pdf',
269 |                     dpi=300,
270 |                     bbox_inches='tight')
271 | 
272 | 
273 | if __name__ == '__main__':
274 |     main()
275 | 


--------------------------------------------------------------------------------
/fraclen_visual/fraclen_visualizing_res50.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | import matplotlib
  5 | matplotlib.use('Agg')
  6 | 
  7 | matplotlib.rcParams['pdf.fonttype'] = 42
  8 | matplotlib.rcParams['ps.fonttype'] = 42
  9 | 
 10 | import matplotlib.pyplot as plt
 11 | import matplotlib.patches as patches
 12 | 
 13 | import torch
 14 | 
 15 | 
 16 | def main():
 17 |     work_dir = './'
 18 |     log_file_name_dict = {
 19 |         'ptcv':
 20 |         'res50_fix_quant_ptcv_pretrained.out',
 21 |         'nvidia':
 22 |         'res50_fix_quant_nvidia_pretrained.out'
 23 |     }
 24 |     for pretrained_method in log_file_name_dict.keys():
 25 |         print(f'pretrained with {pretrained_method}')
 26 |         log_file_name = log_file_name_dict[pretrained_method]
 27 | 
 28 |         depthwise_layer = []
 29 | 
 30 |         layer_fraclen_dict = {}
 31 |         with open(os.path.join(work_dir, log_file_name), 'r') as f:
 32 |             lines = f.read().splitlines()
 33 |             lines = [
 34 |                 l for l in lines
 35 |                 if 'fraclen' in l or l.startswith('layer name')
 36 |             ]
 37 |             lines = [
 38 |                 l for l in lines
 39 |                 if 'setting' not in l and 'model' not in l and 'log' not in l
 40 |             ]
 41 |             lines = [l[:-1] for l in lines]
 42 |             assert len(lines) % 3 == 0
 43 |         for idx in range(len(lines) // 3):
 44 |             assert 'layer name' in lines[idx * 3]
 45 |             assert 'input_fraclen' in lines[idx * 3 + 1]
 46 |             assert 'weight_fraclen' in lines[idx * 3 + 2]
 47 |             layer_name = lines[idx * 3][12:]
 48 |             if 'shortcut' in layer_name:
 49 |                 shortcut = True
 50 |             else:
 51 |                 shortcut = False
 52 |             if 'classifier' in layer_name:
 53 |                 fc = True
 54 |             else:
 55 |                 fc = False
 56 |             input_fraclen = int(
 57 |                 torch.round(eval('torch.' + lines[idx * 3 + 1][15:])).item())
 58 |             weight_fraclen = int(np.around(eval(lines[idx * 3 + 2][16:])))
 59 |             layer_fraclen_dict[layer_name] = (weight_fraclen, input_fraclen,
 60 |                                               shortcut, fc)
 61 | 
 62 |         cm = {'b': [80 / 255, 156 / 255, 1.0], 'm': [250 / 255, 0, 101 / 255]}
 63 |         model_name = {'resnet50': 'ResNet50'}
 64 |         bit_cmap = {
 65 |             0: [1.0, 1.0, 1.0],
 66 |             1: [155 / 255, 221 / 255, 239 / 255],
 67 |             2: [255 / 255, 102 / 255, 153 / 255],
 68 |             3: [189 / 255, 146 / 255, 222 / 255],
 69 |             4: [75 / 255, 148 / 255, 255 / 255],
 70 |             5: [199 / 255, 225 / 255, 181 / 255],
 71 |             6: [241 / 255, 245 / 255, 161 / 255],
 72 |             7: [255 / 255, 133 / 255, 133 / 255],
 73 |             8: [40 / 255, 240 / 255, 128 / 255]
 74 |         }
 75 |         fig_size = (6, 4)
 76 |         axes_label_size = 16
 77 |         text_size = 12
 78 |         title_size = 16
 79 |         legend_size = 8
 80 |         font_weight = 'normal'
 81 |         for model in ['resnet50']:
 82 |             barwidth = 1
 83 |             patch_width = 1.0
 84 |             arrow_extension = 2.0
 85 | 
 86 |             weight_fraclen_list = [v[0] for v in layer_fraclen_dict.values()]
 87 |             input_fraclen_list = [-v[1] for v in layer_fraclen_dict.values()]
 88 |             ## The first layer does not quantize input
 89 |             input_fraclen_list[0] = -8
 90 |             fc_list = [v[3] for v in layer_fraclen_dict.values()]
 91 |             plt.figure(figsize=fig_size)
 92 |             plt.gca().spines['top'].set_visible(False)
 93 |             plt.gca().spines['bottom'].set_visible(False)
 94 |             plt.gca().spines['left'].set_visible(False)
 95 |             plt.gca().spines['right'].set_visible(False)
 96 |             plt.gca().tick_params(axis='both', which='both', length=0)
 97 |             ids = np.arange(len(weight_fraclen_list))
 98 |             color_w = [cm['b'] for idx in ids]
 99 |             color_a = [cm['m'] for idx in ids]
100 |             plt.bar(ids * barwidth,
101 |                     weight_fraclen_list,
102 |                     bottom=0.01,
103 |                     width=barwidth * 0.8,
104 |                     color=color_w,
105 |                     edgecolor='k',
106 |                     linewidth=0.3)
107 |             plt.bar(ids * barwidth,
108 |                     input_fraclen_list,
109 |                     bottom=-0.01,
110 |                     width=barwidth * 0.8,
111 |                     color=color_a,
112 |                     edgecolor='k',
113 |                     linewidth=0.3)
114 |             plt.plot(np.arange(-barwidth / 2,
115 |                                barwidth * len(ids) + barwidth / 2, barwidth),
116 |                      np.ones(len(ids) + 1) * 2,
117 |                      ls='--',
118 |                      lw=0.5,
119 |                      c='k')
120 |             plt.plot(np.arange(-barwidth / 2,
121 |                                barwidth * len(ids) + barwidth / 2, barwidth),
122 |                      np.ones(len(ids) + 1) * 4,
123 |                      ls='--',
124 |                      lw=0.5,
125 |                      c='k')
126 |             plt.plot(np.arange(-barwidth / 2,
127 |                                barwidth * len(ids) + barwidth / 2, barwidth),
128 |                      np.ones(len(ids) + 1) * 6,
129 |                      ls='--',
130 |                      lw=0.5,
131 |                      c='k')
132 |             plt.plot(np.arange(-barwidth / 2,
133 |                                barwidth * len(ids) + barwidth / 2, barwidth),
134 |                      np.ones(len(ids) + 1) * 8,
135 |                      ls='--',
136 |                      lw=0.5,
137 |                      c='k')
138 |             plt.plot(np.arange(-barwidth / 2,
139 |                                barwidth * len(ids) + barwidth / 2, barwidth),
140 |                      -np.ones(len(ids) + 1) * 8,
141 |                      ls='--',
142 |                      lw=0.5,
143 |                      c='k')
144 |             plt.plot(np.arange(-barwidth / 2,
145 |                                barwidth * len(ids) + barwidth / 2, barwidth),
146 |                      -np.ones(len(ids) + 1) * 6,
147 |                      ls='--',
148 |                      lw=0.5,
149 |                      c='k')
150 |             plt.plot(np.arange(-barwidth / 2,
151 |                                barwidth * len(ids) + barwidth / 2, barwidth),
152 |                      -np.ones(len(ids) + 1) * 4,
153 |                      ls='--',
154 |                      lw=0.5,
155 |                      c='k')
156 |             plt.plot(np.arange(-barwidth / 2,
157 |                                barwidth * len(ids) + barwidth / 2, barwidth),
158 |                      -np.ones(len(ids) + 1) * 2,
159 |                      ls='--',
160 |                      lw=0.5,
161 |                      c='k')
162 |             plt.arrow(-barwidth / 2,
163 |                       0,
164 |                       barwidth * len(ids) + barwidth / 2 +
165 |                       barwidth * arrow_extension,
166 |                       0,
167 |                       ls='-',
168 |                       color='k',
169 |                       width=.005,
170 |                       head_width=0.15,
171 |                       head_length=0.1 * arrow_extension)
172 |             rect_w_pw = patches.Rectangle((barwidth * len(ids) / 2 * 0, -10),
173 |                                           2.2 * patch_width,
174 |                                           1.2 * patch_width,
175 |                                           linewidth=.5,
176 |                                           edgecolor='k',
177 |                                           facecolor=cm['b'])
178 |             rect_a_pw = patches.Rectangle((barwidth * len(ids) / 2 * 1.0, -10),
179 |                                           2.2 * patch_width,
180 |                                           1.2 * patch_width,
181 |                                           linewidth=.5,
182 |                                           edgecolor='k',
183 |                                           facecolor=cm['m'])
184 |             plt.gca().add_patch(rect_w_pw)
185 |             plt.gca().add_patch(rect_a_pw)
186 |             if model in ['resnet50']:
187 |                 plt.text(barwidth * len(ids) / 2 * 0 + patch_width * 2.6,
188 |                          -9.75,
189 |                          '#Weight FL',
190 |                          fontsize=text_size)
191 |                 plt.text(barwidth * len(ids) / 2 * 1 + patch_width * 2.6,
192 |                          -9.75,
193 |                          '#Activation FL',
194 |                          fontsize=text_size)
195 |             plt.xticks([])
196 |             plt.yticks(np.arange(-8, 9, 2), np.abs(np.arange(-8, 9, 2)))
197 |             plt.xlabel('Layer', fontsize=axes_label_size)
198 |             plt.ylabel('Fractional Length', fontsize=axes_label_size)
199 |             plt.setp(plt.gca().get_xticklabels(),
200 |                      fontsize=axes_label_size,
201 |                      fontweight=font_weight)
202 |             plt.setp(plt.gca().get_yticklabels(),
203 |                      fontsize=axes_label_size,
204 |                      fontweight=font_weight)
205 |             plt.gca().xaxis.set_label_coords(1.02, 0.52)
206 |             plt.gca().yaxis.set_label_coords(-0.05, 0.58)
207 |             plt.xlim(-0.6,
208 |                      barwidth * len(ids) + 1.2 * barwidth * arrow_extension)
209 |             plt.ylim(-10, 8.5)
210 |             plt.savefig(f'./{model}_{pretrained_method}_8bit_fraclens_wo_title.pdf',
211 |                         dpi=300,
212 |                         bbox_inches='tight')
213 |             plt.title(
214 |                 f'Fractional Length vs Layer (8-bit {model_name[model]})',
215 |                 fontsize=title_size)
216 |             plt.savefig(f'./{model}_{pretrained_method}_8bit_fraclens.pdf',
217 |                         dpi=300,
218 |                         bbox_inches='tight')
219 | 
220 | 
221 | if __name__ == '__main__':
222 |     main()
223 | 


--------------------------------------------------------------------------------
/fraclen_visual/mobilenetv2_8bit_fraclens.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/fraclen_visual/mobilenetv2_8bit_fraclens.pdf


--------------------------------------------------------------------------------
/fraclen_visual/mobilenetv2_8bit_fraclens_wo_title.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/fraclen_visual/mobilenetv2_8bit_fraclens_wo_title.pdf


--------------------------------------------------------------------------------
/fraclen_visual/resnet50_nvidia_8bit_fraclens.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/fraclen_visual/resnet50_nvidia_8bit_fraclens.pdf


--------------------------------------------------------------------------------
/fraclen_visual/resnet50_nvidia_8bit_fraclens_wo_title.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/fraclen_visual/resnet50_nvidia_8bit_fraclens_wo_title.pdf


--------------------------------------------------------------------------------
/fraclen_visual/resnet50_ptcv_8bit_fraclens.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/fraclen_visual/resnet50_ptcv_8bit_fraclens.pdf


--------------------------------------------------------------------------------
/fraclen_visual/resnet50_ptcv_8bit_fraclens_wo_title.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/fraclen_visual/resnet50_ptcv_8bit_fraclens_wo_title.pdf


--------------------------------------------------------------------------------
/imagenet_classes:
--------------------------------------------------------------------------------
   1 | n01440764
   2 | n01443537
   3 | n01484850
   4 | n01491361
   5 | n01494475
   6 | n01496331
   7 | n01498041
   8 | n01514668
   9 | n01514859
  10 | n01518878
  11 | n01530575
  12 | n01531178
  13 | n01532829
  14 | n01534433
  15 | n01537544
  16 | n01558993
  17 | n01560419
  18 | n01580077
  19 | n01582220
  20 | n01592084
  21 | n01601694
  22 | n01608432
  23 | n01614925
  24 | n01616318
  25 | n01622779
  26 | n01629819
  27 | n01630670
  28 | n01631663
  29 | n01632458
  30 | n01632777
  31 | n01641577
  32 | n01644373
  33 | n01644900
  34 | n01664065
  35 | n01665541
  36 | n01667114
  37 | n01667778
  38 | n01669191
  39 | n01675722
  40 | n01677366
  41 | n01682714
  42 | n01685808
  43 | n01687978
  44 | n01688243
  45 | n01689811
  46 | n01692333
  47 | n01693334
  48 | n01694178
  49 | n01695060
  50 | n01697457
  51 | n01698640
  52 | n01704323
  53 | n01728572
  54 | n01728920
  55 | n01729322
  56 | n01729977
  57 | n01734418
  58 | n01735189
  59 | n01737021
  60 | n01739381
  61 | n01740131
  62 | n01742172
  63 | n01744401
  64 | n01748264
  65 | n01749939
  66 | n01751748
  67 | n01753488
  68 | n01755581
  69 | n01756291
  70 | n01768244
  71 | n01770081
  72 | n01770393
  73 | n01773157
  74 | n01773549
  75 | n01773797
  76 | n01774384
  77 | n01774750
  78 | n01775062
  79 | n01776313
  80 | n01784675
  81 | n01795545
  82 | n01796340
  83 | n01797886
  84 | n01798484
  85 | n01806143
  86 | n01806567
  87 | n01807496
  88 | n01817953
  89 | n01818515
  90 | n01819313
  91 | n01820546
  92 | n01824575
  93 | n01828970
  94 | n01829413
  95 | n01833805
  96 | n01843065
  97 | n01843383
  98 | n01847000
  99 | n01855032
 100 | n01855672
 101 | n01860187
 102 | n01871265
 103 | n01872401
 104 | n01873310
 105 | n01877812
 106 | n01882714
 107 | n01883070
 108 | n01910747
 109 | n01914609
 110 | n01917289
 111 | n01924916
 112 | n01930112
 113 | n01943899
 114 | n01944390
 115 | n01945685
 116 | n01950731
 117 | n01955084
 118 | n01968897
 119 | n01978287
 120 | n01978455
 121 | n01980166
 122 | n01981276
 123 | n01983481
 124 | n01984695
 125 | n01985128
 126 | n01986214
 127 | n01990800
 128 | n02002556
 129 | n02002724
 130 | n02006656
 131 | n02007558
 132 | n02009229
 133 | n02009912
 134 | n02011460
 135 | n02012849
 136 | n02013706
 137 | n02017213
 138 | n02018207
 139 | n02018795
 140 | n02025239
 141 | n02027492
 142 | n02028035
 143 | n02033041
 144 | n02037110
 145 | n02051845
 146 | n02056570
 147 | n02058221
 148 | n02066245
 149 | n02071294
 150 | n02074367
 151 | n02077923
 152 | n02085620
 153 | n02085782
 154 | n02085936
 155 | n02086079
 156 | n02086240
 157 | n02086646
 158 | n02086910
 159 | n02087046
 160 | n02087394
 161 | n02088094
 162 | n02088238
 163 | n02088364
 164 | n02088466
 165 | n02088632
 166 | n02089078
 167 | n02089867
 168 | n02089973
 169 | n02090379
 170 | n02090622
 171 | n02090721
 172 | n02091032
 173 | n02091134
 174 | n02091244
 175 | n02091467
 176 | n02091635
 177 | n02091831
 178 | n02092002
 179 | n02092339
 180 | n02093256
 181 | n02093428
 182 | n02093647
 183 | n02093754
 184 | n02093859
 185 | n02093991
 186 | n02094114
 187 | n02094258
 188 | n02094433
 189 | n02095314
 190 | n02095570
 191 | n02095889
 192 | n02096051
 193 | n02096177
 194 | n02096294
 195 | n02096437
 196 | n02096585
 197 | n02097047
 198 | n02097130
 199 | n02097209
 200 | n02097298
 201 | n02097474
 202 | n02097658
 203 | n02098105
 204 | n02098286
 205 | n02098413
 206 | n02099267
 207 | n02099429
 208 | n02099601
 209 | n02099712
 210 | n02099849
 211 | n02100236
 212 | n02100583
 213 | n02100735
 214 | n02100877
 215 | n02101006
 216 | n02101388
 217 | n02101556
 218 | n02102040
 219 | n02102177
 220 | n02102318
 221 | n02102480
 222 | n02102973
 223 | n02104029
 224 | n02104365
 225 | n02105056
 226 | n02105162
 227 | n02105251
 228 | n02105412
 229 | n02105505
 230 | n02105641
 231 | n02105855
 232 | n02106030
 233 | n02106166
 234 | n02106382
 235 | n02106550
 236 | n02106662
 237 | n02107142
 238 | n02107312
 239 | n02107574
 240 | n02107683
 241 | n02107908
 242 | n02108000
 243 | n02108089
 244 | n02108422
 245 | n02108551
 246 | n02108915
 247 | n02109047
 248 | n02109525
 249 | n02109961
 250 | n02110063
 251 | n02110185
 252 | n02110341
 253 | n02110627
 254 | n02110806
 255 | n02110958
 256 | n02111129
 257 | n02111277
 258 | n02111500
 259 | n02111889
 260 | n02112018
 261 | n02112137
 262 | n02112350
 263 | n02112706
 264 | n02113023
 265 | n02113186
 266 | n02113624
 267 | n02113712
 268 | n02113799
 269 | n02113978
 270 | n02114367
 271 | n02114548
 272 | n02114712
 273 | n02114855
 274 | n02115641
 275 | n02115913
 276 | n02116738
 277 | n02117135
 278 | n02119022
 279 | n02119789
 280 | n02120079
 281 | n02120505
 282 | n02123045
 283 | n02123159
 284 | n02123394
 285 | n02123597
 286 | n02124075
 287 | n02125311
 288 | n02127052
 289 | n02128385
 290 | n02128757
 291 | n02128925
 292 | n02129165
 293 | n02129604
 294 | n02130308
 295 | n02132136
 296 | n02133161
 297 | n02134084
 298 | n02134418
 299 | n02137549
 300 | n02138441
 301 | n02165105
 302 | n02165456
 303 | n02167151
 304 | n02168699
 305 | n02169497
 306 | n02172182
 307 | n02174001
 308 | n02177972
 309 | n02190166
 310 | n02206856
 311 | n02219486
 312 | n02226429
 313 | n02229544
 314 | n02231487
 315 | n02233338
 316 | n02236044
 317 | n02256656
 318 | n02259212
 319 | n02264363
 320 | n02268443
 321 | n02268853
 322 | n02276258
 323 | n02277742
 324 | n02279972
 325 | n02280649
 326 | n02281406
 327 | n02281787
 328 | n02317335
 329 | n02319095
 330 | n02321529
 331 | n02325366
 332 | n02326432
 333 | n02328150
 334 | n02342885
 335 | n02346627
 336 | n02356798
 337 | n02361337
 338 | n02363005
 339 | n02364673
 340 | n02389026
 341 | n02391049
 342 | n02395406
 343 | n02396427
 344 | n02397096
 345 | n02398521
 346 | n02403003
 347 | n02408429
 348 | n02410509
 349 | n02412080
 350 | n02415577
 351 | n02417914
 352 | n02422106
 353 | n02422699
 354 | n02423022
 355 | n02437312
 356 | n02437616
 357 | n02441942
 358 | n02442845
 359 | n02443114
 360 | n02443484
 361 | n02444819
 362 | n02445715
 363 | n02447366
 364 | n02454379
 365 | n02457408
 366 | n02480495
 367 | n02480855
 368 | n02481823
 369 | n02483362
 370 | n02483708
 371 | n02484975
 372 | n02486261
 373 | n02486410
 374 | n02487347
 375 | n02488291
 376 | n02488702
 377 | n02489166
 378 | n02490219
 379 | n02492035
 380 | n02492660
 381 | n02493509
 382 | n02493793
 383 | n02494079
 384 | n02497673
 385 | n02500267
 386 | n02504013
 387 | n02504458
 388 | n02509815
 389 | n02510455
 390 | n02514041
 391 | n02526121
 392 | n02536864
 393 | n02606052
 394 | n02607072
 395 | n02640242
 396 | n02641379
 397 | n02643566
 398 | n02655020
 399 | n02666196
 400 | n02667093
 401 | n02669723
 402 | n02672831
 403 | n02676566
 404 | n02687172
 405 | n02690373
 406 | n02692877
 407 | n02699494
 408 | n02701002
 409 | n02704792
 410 | n02708093
 411 | n02727426
 412 | n02730930
 413 | n02747177
 414 | n02749479
 415 | n02769748
 416 | n02776631
 417 | n02777292
 418 | n02782093
 419 | n02783161
 420 | n02786058
 421 | n02787622
 422 | n02788148
 423 | n02790996
 424 | n02791124
 425 | n02791270
 426 | n02793495
 427 | n02794156
 428 | n02795169
 429 | n02797295
 430 | n02799071
 431 | n02802426
 432 | n02804414
 433 | n02804610
 434 | n02807133
 435 | n02808304
 436 | n02808440
 437 | n02814533
 438 | n02814860
 439 | n02815834
 440 | n02817516
 441 | n02823428
 442 | n02823750
 443 | n02825657
 444 | n02834397
 445 | n02835271
 446 | n02837789
 447 | n02840245
 448 | n02841315
 449 | n02843684
 450 | n02859443
 451 | n02860847
 452 | n02865351
 453 | n02869837
 454 | n02870880
 455 | n02871525
 456 | n02877765
 457 | n02879718
 458 | n02883205
 459 | n02892201
 460 | n02892767
 461 | n02894605
 462 | n02895154
 463 | n02906734
 464 | n02909870
 465 | n02910353
 466 | n02916936
 467 | n02917067
 468 | n02927161
 469 | n02930766
 470 | n02939185
 471 | n02948072
 472 | n02950826
 473 | n02951358
 474 | n02951585
 475 | n02963159
 476 | n02965783
 477 | n02966193
 478 | n02966687
 479 | n02971356
 480 | n02974003
 481 | n02977058
 482 | n02978881
 483 | n02979186
 484 | n02980441
 485 | n02981792
 486 | n02988304
 487 | n02992211
 488 | n02992529
 489 | n02999410
 490 | n03000134
 491 | n03000247
 492 | n03000684
 493 | n03014705
 494 | n03016953
 495 | n03017168
 496 | n03018349
 497 | n03026506
 498 | n03028079
 499 | n03032252
 500 | n03041632
 501 | n03042490
 502 | n03045698
 503 | n03047690
 504 | n03062245
 505 | n03063599
 506 | n03063689
 507 | n03065424
 508 | n03075370
 509 | n03085013
 510 | n03089624
 511 | n03095699
 512 | n03100240
 513 | n03109150
 514 | n03110669
 515 | n03124043
 516 | n03124170
 517 | n03125729
 518 | n03126707
 519 | n03127747
 520 | n03127925
 521 | n03131574
 522 | n03133878
 523 | n03134739
 524 | n03141823
 525 | n03146219
 526 | n03160309
 527 | n03179701
 528 | n03180011
 529 | n03187595
 530 | n03188531
 531 | n03196217
 532 | n03197337
 533 | n03201208
 534 | n03207743
 535 | n03207941
 536 | n03208938
 537 | n03216828
 538 | n03218198
 539 | n03220513
 540 | n03223299
 541 | n03240683
 542 | n03249569
 543 | n03250847
 544 | n03255030
 545 | n03259280
 546 | n03271574
 547 | n03272010
 548 | n03272562
 549 | n03290653
 550 | n03291819
 551 | n03297495
 552 | n03314780
 553 | n03325584
 554 | n03337140
 555 | n03344393
 556 | n03345487
 557 | n03347037
 558 | n03355925
 559 | n03372029
 560 | n03376595
 561 | n03379051
 562 | n03384352
 563 | n03388043
 564 | n03388183
 565 | n03388549
 566 | n03393912
 567 | n03394916
 568 | n03400231
 569 | n03404251
 570 | n03417042
 571 | n03424325
 572 | n03425413
 573 | n03443371
 574 | n03444034
 575 | n03445777
 576 | n03445924
 577 | n03447447
 578 | n03447721
 579 | n03450230
 580 | n03452741
 581 | n03457902
 582 | n03459775
 583 | n03461385
 584 | n03467068
 585 | n03476684
 586 | n03476991
 587 | n03478589
 588 | n03481172
 589 | n03482405
 590 | n03483316
 591 | n03485407
 592 | n03485794
 593 | n03492542
 594 | n03494278
 595 | n03495258
 596 | n03496892
 597 | n03498962
 598 | n03527444
 599 | n03529860
 600 | n03530642
 601 | n03532672
 602 | n03534580
 603 | n03535780
 604 | n03538406
 605 | n03544143
 606 | n03584254
 607 | n03584829
 608 | n03590841
 609 | n03594734
 610 | n03594945
 611 | n03595614
 612 | n03598930
 613 | n03599486
 614 | n03602883
 615 | n03617480
 616 | n03623198
 617 | n03627232
 618 | n03630383
 619 | n03633091
 620 | n03637318
 621 | n03642806
 622 | n03649909
 623 | n03657121
 624 | n03658185
 625 | n03661043
 626 | n03662601
 627 | n03666591
 628 | n03670208
 629 | n03673027
 630 | n03676483
 631 | n03680355
 632 | n03690938
 633 | n03691459
 634 | n03692522
 635 | n03697007
 636 | n03706229
 637 | n03709823
 638 | n03710193
 639 | n03710637
 640 | n03710721
 641 | n03717622
 642 | n03720891
 643 | n03721384
 644 | n03724870
 645 | n03729826
 646 | n03733131
 647 | n03733281
 648 | n03733805
 649 | n03742115
 650 | n03743016
 651 | n03759954
 652 | n03761084
 653 | n03763968
 654 | n03764736
 655 | n03769881
 656 | n03770439
 657 | n03770679
 658 | n03773504
 659 | n03775071
 660 | n03775546
 661 | n03776460
 662 | n03777568
 663 | n03777754
 664 | n03781244
 665 | n03782006
 666 | n03785016
 667 | n03786901
 668 | n03787032
 669 | n03788195
 670 | n03788365
 671 | n03791053
 672 | n03792782
 673 | n03792972
 674 | n03793489
 675 | n03794056
 676 | n03796401
 677 | n03803284
 678 | n03804744
 679 | n03814639
 680 | n03814906
 681 | n03825788
 682 | n03832673
 683 | n03837869
 684 | n03838899
 685 | n03840681
 686 | n03841143
 687 | n03843555
 688 | n03854065
 689 | n03857828
 690 | n03866082
 691 | n03868242
 692 | n03868863
 693 | n03871628
 694 | n03873416
 695 | n03874293
 696 | n03874599
 697 | n03876231
 698 | n03877472
 699 | n03877845
 700 | n03884397
 701 | n03887697
 702 | n03888257
 703 | n03888605
 704 | n03891251
 705 | n03891332
 706 | n03895866
 707 | n03899768
 708 | n03902125
 709 | n03903868
 710 | n03908618
 711 | n03908714
 712 | n03916031
 713 | n03920288
 714 | n03924679
 715 | n03929660
 716 | n03929855
 717 | n03930313
 718 | n03930630
 719 | n03933933
 720 | n03935335
 721 | n03937543
 722 | n03938244
 723 | n03942813
 724 | n03944341
 725 | n03947888
 726 | n03950228
 727 | n03954731
 728 | n03956157
 729 | n03958227
 730 | n03961711
 731 | n03967562
 732 | n03970156
 733 | n03976467
 734 | n03976657
 735 | n03977966
 736 | n03980874
 737 | n03982430
 738 | n03983396
 739 | n03991062
 740 | n03992509
 741 | n03995372
 742 | n03998194
 743 | n04004767
 744 | n04005630
 745 | n04008634
 746 | n04009552
 747 | n04019541
 748 | n04023962
 749 | n04026417
 750 | n04033901
 751 | n04033995
 752 | n04037443
 753 | n04039381
 754 | n04040759
 755 | n04041544
 756 | n04044716
 757 | n04049303
 758 | n04065272
 759 | n04067472
 760 | n04069434
 761 | n04070727
 762 | n04074963
 763 | n04081281
 764 | n04086273
 765 | n04090263
 766 | n04099969
 767 | n04111531
 768 | n04116512
 769 | n04118538
 770 | n04118776
 771 | n04120489
 772 | n04125021
 773 | n04127249
 774 | n04131690
 775 | n04133789
 776 | n04136333
 777 | n04141076
 778 | n04141327
 779 | n04141975
 780 | n04146614
 781 | n04147183
 782 | n04149813
 783 | n04152593
 784 | n04153751
 785 | n04154565
 786 | n04162706
 787 | n04179913
 788 | n04192698
 789 | n04200800
 790 | n04201297
 791 | n04204238
 792 | n04204347
 793 | n04208210
 794 | n04209133
 795 | n04209239
 796 | n04228054
 797 | n04229816
 798 | n04235860
 799 | n04238763
 800 | n04239074
 801 | n04243546
 802 | n04251144
 803 | n04252077
 804 | n04252225
 805 | n04254120
 806 | n04254680
 807 | n04254777
 808 | n04258138
 809 | n04259630
 810 | n04263257
 811 | n04264628
 812 | n04265275
 813 | n04266014
 814 | n04270147
 815 | n04273569
 816 | n04275548
 817 | n04277352
 818 | n04285008
 819 | n04286575
 820 | n04296562
 821 | n04310018
 822 | n04311004
 823 | n04311174
 824 | n04317175
 825 | n04325704
 826 | n04326547
 827 | n04328186
 828 | n04330267
 829 | n04332243
 830 | n04335435
 831 | n04336792
 832 | n04344873
 833 | n04346328
 834 | n04347754
 835 | n04350905
 836 | n04355338
 837 | n04355933
 838 | n04356056
 839 | n04357314
 840 | n04366367
 841 | n04367480
 842 | n04370456
 843 | n04371430
 844 | n04371774
 845 | n04372370
 846 | n04376876
 847 | n04380533
 848 | n04389033
 849 | n04392985
 850 | n04398044
 851 | n04399382
 852 | n04404412
 853 | n04409515
 854 | n04417672
 855 | n04418357
 856 | n04423845
 857 | n04428191
 858 | n04429376
 859 | n04435653
 860 | n04442312
 861 | n04443257
 862 | n04447861
 863 | n04456115
 864 | n04458633
 865 | n04461696
 866 | n04462240
 867 | n04465501
 868 | n04467665
 869 | n04476259
 870 | n04479046
 871 | n04482393
 872 | n04483307
 873 | n04485082
 874 | n04486054
 875 | n04487081
 876 | n04487394
 877 | n04493381
 878 | n04501370
 879 | n04505470
 880 | n04507155
 881 | n04509417
 882 | n04515003
 883 | n04517823
 884 | n04522168
 885 | n04523525
 886 | n04525038
 887 | n04525305
 888 | n04532106
 889 | n04532670
 890 | n04536866
 891 | n04540053
 892 | n04542943
 893 | n04548280
 894 | n04548362
 895 | n04550184
 896 | n04552348
 897 | n04553703
 898 | n04554684
 899 | n04557648
 900 | n04560804
 901 | n04562935
 902 | n04579145
 903 | n04579432
 904 | n04584207
 905 | n04589890
 906 | n04590129
 907 | n04591157
 908 | n04591713
 909 | n04592741
 910 | n04596742
 911 | n04597913
 912 | n04599235
 913 | n04604644
 914 | n04606251
 915 | n04612504
 916 | n04613696
 917 | n06359193
 918 | n06596364
 919 | n06785654
 920 | n06794110
 921 | n06874185
 922 | n07248320
 923 | n07565083
 924 | n07579787
 925 | n07583066
 926 | n07584110
 927 | n07590611
 928 | n07613480
 929 | n07614500
 930 | n07615774
 931 | n07684084
 932 | n07693725
 933 | n07695742
 934 | n07697313
 935 | n07697537
 936 | n07711569
 937 | n07714571
 938 | n07714990
 939 | n07715103
 940 | n07716358
 941 | n07716906
 942 | n07717410
 943 | n07717556
 944 | n07718472
 945 | n07718747
 946 | n07720875
 947 | n07730033
 948 | n07734744
 949 | n07742313
 950 | n07745940
 951 | n07747607
 952 | n07749582
 953 | n07753113
 954 | n07753275
 955 | n07753592
 956 | n07754684
 957 | n07760859
 958 | n07768694
 959 | n07802026
 960 | n07831146
 961 | n07836838
 962 | n07860988
 963 | n07871810
 964 | n07873807
 965 | n07875152
 966 | n07880968
 967 | n07892512
 968 | n07920052
 969 | n07930864
 970 | n07932039
 971 | n09193705
 972 | n09229709
 973 | n09246464
 974 | n09256479
 975 | n09288635
 976 | n09332890
 977 | n09399592
 978 | n09421951
 979 | n09428293
 980 | n09468604
 981 | n09472597
 982 | n09835506
 983 | n10148035
 984 | n10565667
 985 | n11879895
 986 | n11939491
 987 | n12057211
 988 | n12144580
 989 | n12267677
 990 | n12620546
 991 | n12768682
 992 | n12985857
 993 | n12998815
 994 | n13037406
 995 | n13040303
 996 | n13044778
 997 | n13052670
 998 | n13054560
 999 | n13133613
1000 | n15075141
1001 | 


--------------------------------------------------------------------------------
/images/comparisons.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/images/comparisons.png


--------------------------------------------------------------------------------
/models/fix_mobilenet_v1.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from .fix_quant_ops import (
  8 |     fix_quant,
  9 |     int_op_only_fix_quant,
 10 |     FXQAvgPool2d,
 11 |     ReLUClipFXQConvBN,
 12 |     ReLUClipFXQLinear,
 13 | )
 14 | from myutils.config import FLAGS
 15 | 
 16 | 
 17 | class IntBlock(nn.Module):
 18 |     def __init__(self, body):
 19 |         super(IntBlock, self).__init__()
 20 |         self.body = body
 21 |         self.int_op_only = getattr(body, 'int_op_only', False)
 22 | 
 23 |     def forward(self, x):
 24 |         assert getattr(FLAGS, 'int_infer', False)
 25 |         if getattr(self, 'int_op_only', False):
 26 |             res = x
 27 |             for layer_ in self.body:
 28 |                 if isinstance(layer_, nn.Conv2d):
 29 |                     res = int_op_only_fix_quant(res, 8,
 30 |                                                 layer_.input_fraclen.item(),
 31 |                                                 res.output_fraclen,
 32 |                                                 layer_.input_symmetric)
 33 |                     res = layer_(res)
 34 |                     output_fraclen = (layer_.weight_fraclen +
 35 |                                       layer_.input_fraclen).item()
 36 |                     setattr(res, 'output_fraclen', output_fraclen)
 37 |                 else:
 38 |                     res = layer_(res)
 39 |         else:
 40 |             res = x
 41 |             for layer_ in self.body:
 42 |                 if isinstance(layer_, nn.Conv2d):
 43 |                     res = (fix_quant(res, 8, layer_.input_fraclen, 1,
 44 |                                      layer_.input_symmetric)[0] *
 45 |                            (2**layer_.input_fraclen)).int().float()
 46 |                     res = layer_(res)
 47 |                     res.div_(2**(layer_.weight_fraclen + layer_.input_fraclen))
 48 |                 else:
 49 |                     res = layer_(res)
 50 |         return res
 51 | 
 52 | 
 53 | class DepthwiseSeparableConv(nn.Module):
 54 |     def __init__(self, inp, outp, stride):
 55 |         super(DepthwiseSeparableConv, self).__init__()
 56 |         assert stride in [1, 2]
 57 | 
 58 |         l1 = ReLUClipFXQConvBN(inp, inp, 3, stride, 1, groups=inp)
 59 |         l2 = ReLUClipFXQConvBN(inp, outp, 1, 1, 0)
 60 |         layers = [
 61 |             l1,
 62 |             l2,
 63 |             nn.ReLU(inplace=True),
 64 |         ]
 65 |         self.body = nn.Sequential(*layers)
 66 |         self.layer_dict = {}
 67 | 
 68 |     def forward(self, x):
 69 |         return self.body(x)
 70 | 
 71 |     def set_following_layer(self, following_layer):
 72 |         self.layer_dict['following'] = following_layer
 73 |         self.body[0].set_following_layer(self.body[1])
 74 |         self.body[1].set_following_layer(following_layer)
 75 | 
 76 |     def get_following_layer(self):
 77 |         return self.layer_dict['following']
 78 | 
 79 |     def master_child(self):
 80 |         return self.body[0]
 81 | 
 82 |     def int_block(self, avgpool_scale=1.0):
 83 |         l1 = self.body[0].int_conv()
 84 |         l2 = self.body[1].int_conv(avgpool_scale=avgpool_scale)
 85 |         layers = [
 86 |             l1,
 87 |             nn.ReLU(inplace=True),
 88 |             l2,
 89 |             nn.ReLU(inplace=True),
 90 |         ]
 91 |         body = nn.Sequential(*layers)
 92 |         return IntBlock(body)
 93 | 
 94 | 
 95 | class IntModel(nn.Module):
 96 |     def __init__(self, head, body, classifier, block_setting):
 97 |         super(IntModel, self).__init__()
 98 | 
 99 |         self.block_setting = block_setting
100 | 
101 |         # head
102 |         self.head = head
103 | 
104 |         # body
105 |         for idx, [c, n, s] in enumerate(self.block_setting):
106 |             for i in range(n):
107 |                 setattr(self, f'stage_{idx}_layer_{i}',
108 |                         body[f'stage_{idx}_layer_{i}'])
109 | 
110 |         if getattr(FLAGS, 'quant_avgpool', False):
111 |             self.avgpool = FXQAvgPool2d(7)
112 |         else:
113 |             self.avgpool = nn.AdaptiveAvgPool2d(1)
114 | 
115 |         # classifier
116 |         self.classifier = classifier
117 | 
118 |         self.int_op_only = getattr(head, 'int_op_only', False)
119 | 
120 |     def forward(self, x):
121 |         assert getattr(FLAGS, 'int_infer', False)
122 |         if getattr(self, 'int_op_only', False):
123 |             x = self.head(x)
124 |             output_fraclen = (self.head[0].weight_fraclen +
125 |                               self.head[0].input_fraclen).item()
126 |             setattr(x, 'output_fraclen', output_fraclen)
127 |             for idx, [_, n, _] in enumerate(self.block_setting):
128 |                 for i in range(n):
129 |                     blk = getattr(self, f'stage_{idx}_layer_{i}')
130 |                     x = blk(x)
131 |             if getattr(FLAGS, 'quant_avgpool', False):
132 |                 x = self.avgpool(x)
133 |                 output_fraclen = x.output_fraclen
134 |                 x = x.view(x.size(0), -1)
135 |                 setattr(x, 'output_fraclen', output_fraclen)
136 |                 x = int_op_only_fix_quant(
137 |                     x, 8, self.classifier[0].input_fraclen.item(),
138 |                     x.output_fraclen, self.classifier[0].input_symmetric)
139 |             else:
140 |                 output_fraclen = x.output_fraclen
141 |                 x = self.avgpool(x.float())
142 |                 x = x.view(x.size(0), -1)
143 |                 x.div_(2**output_fraclen)
144 |                 x = (fix_quant(x, 8, self.classifier[0].input_fraclen.float(),
145 |                                1, self.classifier[0].input_symmetric)[0] *
146 |                      (2**self.classifier[0].input_fraclen.float())).int()
147 |             x = self.classifier(x).float()
148 |         else:
149 |             if getattr(FLAGS, 'normalize', False):
150 |                 x = (fix_quant(x, 8, self.head[0].input_fraclen, 1,
151 |                                self.head[0].input_symmetric)[0] *
152 |                      (2**self.head[0].input_fraclen)).int().float()
153 |             else:
154 |                 x = (x * (2**self.head[0].input_fraclen)).int().float()
155 |             x = self.head(x)
156 |             x.div_(2**(self.head[0].input_fraclen +
157 |                        self.head[0].weight_fraclen))
158 |             for idx, [_, n, _] in enumerate(self.block_setting):
159 |                 for i in range(n):
160 |                     blk = getattr(self, f'stage_{idx}_layer_{i}')
161 |                     x = blk(x)
162 |             x = self.avgpool(x)
163 |             x = x.view(x.size(0), -1)
164 |             x = (fix_quant(x, 8, self.classifier[0].input_fraclen, 1,
165 |                            self.classifier[0].input_symmetric)[0] *
166 |                  (2**self.classifier[0].input_fraclen)).int().float()
167 |             x = self.classifier(x)
168 |         return x
169 | 
170 | 
171 | class Model(nn.Module):
172 |     def __init__(self, num_classes=1000):
173 |         super(Model, self).__init__()
174 | 
175 |         # setting of inverted residual blocks
176 |         self.block_setting = [
177 |             # c, n, s
178 |             [64, 1, 1],
179 |             [128, 2, 2],
180 |             [256, 2, 2],
181 |             [512, 6, 2],
182 |             [1024, 2, 2],
183 |         ]
184 | 
185 |         # head
186 |         channels = 32
187 |         first_stride = 2
188 |         self.head = nn.Sequential(
189 |             ReLUClipFXQConvBN(
190 |                 3,
191 |                 channels,
192 |                 3,
193 |                 first_stride,
194 |                 1,
195 |                 bitw_min=None,
196 |                 bita_min=8,
197 |                 weight_only=not getattr(FLAGS, 'normalize', False),
198 |                 double_side=getattr(FLAGS, 'normalize', False)),
199 |             nn.ReLU(inplace=True),
200 |         )
201 |         prev_layer = self.head[0]
202 | 
203 |         # body
204 |         for idx, [c, n, s] in enumerate(self.block_setting):
205 |             outp = c
206 |             for i in range(n):
207 |                 if i == 0:
208 |                     layer = DepthwiseSeparableConv(channels, outp, s)
209 |                 else:
210 |                     layer = DepthwiseSeparableConv(channels, outp, 1)
211 |                 setattr(self, 'stage_{}_layer_{}'.format(idx, i), layer)
212 |                 channels = outp
213 |                 prev_layer.set_following_layer(layer.master_child())
214 |                 prev_layer = layer
215 | 
216 |         if getattr(FLAGS, 'quant_avgpool', False):
217 |             self.avgpool = FXQAvgPool2d(7)
218 |             if getattr(FLAGS, 'pool_fusing', False):
219 |                 stage_idx = len(self.block_setting) - 1
220 |                 layer_idx = self.block_setting[-1][1] - 1
221 |                 last_conv_layer = getattr(
222 |                     self, f'stage_{stage_idx}_layer_{layer_idx}')
223 |                 last_conv_layer.avgpool_scale = self.avgpool.scale
224 |         else:
225 |             self.avgpool = nn.AdaptiveAvgPool2d(1)
226 | 
227 |         # classifier
228 |         self.classifier = nn.Sequential(
229 |             ReLUClipFXQLinear(outp, num_classes, bitw_min=None))
230 |         prev_layer.set_following_layer(self.classifier[0])
231 | 
232 |         if FLAGS.reset_parameters:
233 |             self.reset_parameters()
234 | 
235 |     def forward(self, x):
236 |         x = self.head(x)
237 |         for idx, [_, n, _] in enumerate(self.block_setting):
238 |             for i in range(n):
239 |                 x = getattr(self, 'stage_{}_layer_{}'.format(idx, i))(x)
240 |         x = self.avgpool(x)
241 |         x = x.view(x.size(0), -1)
242 |         x = self.classifier(x)
243 |         return x
244 | 
245 |     def reset_parameters(self):
246 |         for m in self.modules():
247 |             if isinstance(m, nn.Conv2d):
248 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
249 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
250 |                 if m.bias is not None:
251 |                     m.bias.data.zero_()
252 |             elif isinstance(m, nn.BatchNorm2d):
253 |                 if m.weight is not None:
254 |                     m.weight.data.fill_(1)
255 |                 if m.bias is not None:
256 |                     m.bias.data.zero_()
257 |             elif isinstance(m, nn.Linear):
258 |                 n = m.weight.size(1)
259 |                 m.weight.data.normal_(0, 0.01)
260 |                 m.bias.data.zero_()
261 | 
262 |     def int_model(self):
263 |         if getattr(FLAGS, 'quant_avgpool', False):
264 |             avgpool = FXQAvgPool2d(7)
265 |             avgpool_scale = avgpool.scale
266 |         else:
267 |             avgpool_scale = 1.0
268 |         head = self.head
269 |         head[0] = head[0].int_conv()
270 |         body = {
271 |             f'stage_{idx}_layer_{i}':
272 |             getattr(self, f'stage_{idx}_layer_{i}').int_block(
273 |                 avgpool_scale=avgpool_scale if (
274 |                     idx == len(self.block_setting) - 1 and i == n -
275 |                     1) else 1.0)
276 |             for idx, [c, n, s] in enumerate(self.block_setting)
277 |             for i in range(n)
278 |         }
279 |         classifier = self.classifier
280 |         classifier[0] = classifier[0].int_fc()
281 |         return IntModel(head, body, classifier, self.block_setting)
282 | 


--------------------------------------------------------------------------------
/models/fix_mobilenet_v2.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | 
  6 | from .fix_quant_ops import (fix_quant, int_op_only_fix_quant, FXQAvgPool2d,
  7 |                             ReLUClipFXQConvBN, ReLUClipFXQLinear)
  8 | from myutils.config import FLAGS
  9 | 
 10 | 
 11 | class IntBlock(nn.Module):
 12 |     def __init__(self, body, residual_connection):
 13 |         super(IntBlock, self).__init__()
 14 |         self.body = body
 15 |         self.residual_connection = residual_connection
 16 |         self.int_op_only = getattr(body, 'int_op_only', False)
 17 | 
 18 |     def forward(self, x):
 19 |         assert getattr(FLAGS, 'int_infer', False)
 20 |         if getattr(self, 'int_op_only', False):
 21 |             res = x
 22 |             for layer_ in self.body:
 23 |                 if isinstance(layer_, nn.Conv2d):
 24 |                     res = int_op_only_fix_quant(res, 8,
 25 |                                                 layer_.input_fraclen.item(),
 26 |                                                 res.output_fraclen,
 27 |                                                 layer_.input_symmetric)
 28 |                     res = layer_(res)
 29 |                     output_fraclen = (layer_.weight_fraclen +
 30 |                                       layer_.input_fraclen).item()
 31 |                     setattr(res, 'output_fraclen', output_fraclen)
 32 |                 else:
 33 |                     res = layer_(res)
 34 |             if self.residual_connection:
 35 |                 res_fraclen = res.output_fraclen
 36 |                 x_fraclen = x.output_fraclen
 37 |                 if res_fraclen >= x_fraclen:
 38 |                     x = x << (res_fraclen - x_fraclen)
 39 |                     res += x
 40 |                     res.clamp_(max=(1 << 31) - 1, min=-(1 << 31) + 1)
 41 |                     output_fraclen = res_fraclen
 42 |                     setattr(res, 'output_fraclen', output_fraclen)
 43 |                 else:
 44 |                     res = res << (x_fraclen - res_fraclen)
 45 |                     res += x
 46 |                     res.clamp_(max=(1 << 31) - 1, min=-(1 << 31) + 1)
 47 |                     output_fraclen = x_fraclen
 48 |                     setattr(res, 'output_fraclen', output_fraclen)
 49 |         else:
 50 |             res = x
 51 |             for layer_ in self.body:
 52 |                 if isinstance(layer_, nn.Conv2d):
 53 |                     res = (fix_quant(res, 8, layer_.input_fraclen, 1,
 54 |                                      layer_.input_symmetric)[0] *
 55 |                            (2**layer_.input_fraclen)).int().float()
 56 |                     res = layer_(res)
 57 |                     res.div_(2**(layer_.weight_fraclen + layer_.input_fraclen))
 58 |                 else:
 59 |                     res = layer_(res)
 60 |             setattr(res, 'output_fraclen',
 61 |                     self.body[-1].weight_fraclen + self.body[-1].input_fraclen)
 62 |             if self.residual_connection:
 63 |                 res_fraclen = res.output_fraclen
 64 |                 x_fraclen = x.output_fraclen
 65 |                 output_fraclen = max(res_fraclen, x_fraclen)
 66 |                 res = res * 2**output_fraclen
 67 |                 x = x * 2**output_fraclen
 68 |                 res += x
 69 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
 70 |                 res = res / 2**output_fraclen
 71 |                 setattr(res, 'output_fraclen', output_fraclen)
 72 |         return res
 73 | 
 74 | 
 75 | class InvertedResidual(nn.Module):
 76 |     def __init__(self,
 77 |                  inp,
 78 |                  outp,
 79 |                  stride,
 80 |                  expand_ratio,
 81 |                  double_side=False,
 82 |                  master_layer=None):
 83 |         super(InvertedResidual, self).__init__()
 84 |         assert stride in [1, 2]
 85 | 
 86 |         expand_inp = inp * expand_ratio
 87 |         if expand_ratio != 1:
 88 |             # expand
 89 |             l0 = ReLUClipFXQConvBN(inp,
 90 |                                    expand_inp,
 91 |                                    1,
 92 |                                    1,
 93 |                                    0,
 94 |                                    double_side=double_side,
 95 |                                    master_layer=master_layer)
 96 |             # depthwise + project back
 97 |             l1 = ReLUClipFXQConvBN(expand_inp,
 98 |                                    expand_inp,
 99 |                                    3,
100 |                                    stride,
101 |                                    1,
102 |                                    groups=expand_inp)
103 |             l2 = ReLUClipFXQConvBN(expand_inp, outp, 1, 1, 0)
104 |             layers = [
105 |                 l0,
106 |                 l1,
107 |                 l2,
108 |             ]
109 |         else:
110 |             # depthwise + project back
111 |             l1 = ReLUClipFXQConvBN(expand_inp,
112 |                                    expand_inp,
113 |                                    3,
114 |                                    stride,
115 |                                    1,
116 |                                    groups=expand_inp,
117 |                                    double_side=double_side,
118 |                                    master_layer=master_layer)
119 |             l2 = ReLUClipFXQConvBN(expand_inp, outp, 1, 1, 0)
120 |             layers = [
121 |                 l1,
122 |                 l2,
123 |             ]
124 |         self.body = nn.Sequential(*layers)
125 |         self.layer_dict = {}
126 | 
127 |         self.residual_connection = stride == 1 and inp == outp
128 |         if self.residual_connection:
129 |             self.set_master_layer(self.body[0])
130 |         else:
131 |             self.set_master_layer(None)
132 | 
133 |     def forward(self, x):
134 |         res = self.body(x)
135 |         if self.residual_connection:
136 |             if getattr(FLAGS, 'int_infer', False) and not self.training:
137 |                 res_fraclen = res.output_fraclen
138 |                 x_fraclen = x.output_fraclen
139 |                 output_fraclen = max(res_fraclen, x_fraclen)
140 |                 res = res * 2**output_fraclen
141 |                 x = x * 2**output_fraclen
142 |                 res += x
143 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
144 |                 res = res / 2**output_fraclen
145 |                 setattr(res, 'output_fraclen', output_fraclen)
146 |             else:
147 |                 res += x
148 |         return res
149 | 
150 |     def set_master_layer(self, master_layer):
151 |         self.layer_dict['master'] = master_layer
152 | 
153 |     def get_master_layer(self):
154 |         return self.layer_dict['master']
155 | 
156 |     def set_following_layer(self, following_layer):
157 |         self.layer_dict['following'] = following_layer
158 |         for idx in range(len(self.body) - 1):
159 |             self.body[idx].set_following_layer(self.body[idx + 1])
160 |         self.body[-1].set_following_layer(following_layer)
161 | 
162 |     def get_following_layer(self):
163 |         return self.layer_dict['following']
164 | 
165 |     def master_child(self):
166 |         return self.body[0]
167 | 
168 |     def int_block(self):
169 |         layers = []
170 |         layers.append(self.body[0].int_conv())
171 |         for layer_ in self.body[1:]:
172 |             layers.append(nn.ReLU(inplace=True))
173 |             layers.append(layer_.int_conv())
174 |         body = nn.Sequential(*layers)
175 |         residual_connection = self.residual_connection
176 |         return IntBlock(body, residual_connection)
177 | 
178 | 
179 | class IntModel(nn.Module):
180 |     def __init__(self, head, body, tail, classifier, block_setting):
181 |         super(IntModel, self).__init__()
182 | 
183 |         self.block_setting = block_setting
184 | 
185 |         # head
186 |         self.head = head
187 | 
188 |         # body
189 |         for idx, [t, c, n, s] in enumerate(self.block_setting):
190 |             for i in range(n):
191 |                 setattr(self, f'stage_{idx}_layer_{i}',
192 |                         body[f'stage_{idx}_layer_{i}'])
193 | 
194 |         # tail
195 |         self.tail = tail
196 | 
197 |         if getattr(FLAGS, 'quant_avgpool', False):
198 |             self.avgpool = FXQAvgPool2d(7)
199 |         else:
200 |             self.avgpool = nn.AdaptiveAvgPool2d(1)
201 | 
202 |         # classifier
203 |         self.classifier = classifier
204 | 
205 |         self.int_op_only = getattr(head, 'int_op_only', False)
206 | 
207 |     def forward(self, x):
208 |         assert getattr(FLAGS, 'int_infer', False)
209 |         if getattr(self, 'int_op_only', False):
210 |             x = self.head(x)
211 |             output_fraclen = (self.head[0].weight_fraclen +
212 |                               self.head[0].input_fraclen).item()
213 |             setattr(x, 'output_fraclen', output_fraclen)
214 |             for idx, [_, _, n, _] in enumerate(self.block_setting):
215 |                 for i in range(n):
216 |                     blk = getattr(self, f'stage_{idx}_layer_{i}')
217 |                     x = blk(x)
218 |             x = int_op_only_fix_quant(x, 8, self.tail[0].input_fraclen.item(),
219 |                                       x.output_fraclen,
220 |                                       self.tail[0].input_symmetric)
221 |             x = self.tail(x)
222 |             output_fraclen = (self.tail[0].weight_fraclen +
223 |                               self.tail[0].input_fraclen).item()
224 |             setattr(x, 'output_fraclen', output_fraclen)
225 |             if getattr(FLAGS, 'quant_avgpool', False):
226 |                 x = self.avgpool(x)
227 |                 output_fraclen = x.output_fraclen
228 |                 x = x.view(x.size(0), -1)
229 |                 setattr(x, 'output_fraclen', output_fraclen)
230 |                 x = int_op_only_fix_quant(
231 |                     x, 8, self.classifier[0].input_fraclen.item(),
232 |                     x.output_fraclen, self.classifier[0].input_symmetric)
233 |             else:
234 |                 output_fraclen = x.output_fraclen
235 |                 x = self.avgpool(x.float())
236 |                 x = x.view(x.size(0), -1)
237 |                 x.div_(2**output_fraclen)
238 |                 x = (fix_quant(x, 8, self.classifier[0].input_fraclen.float(),
239 |                                1, self.classifier[0].input_symmetric)[0] *
240 |                      (2**self.classifier[0].input_fraclen.float())).int()
241 |             x = self.classifier(x).float()
242 |         else:
243 |             if getattr(FLAGS, 'normalize', False):
244 |                 x = (fix_quant(x, 8, self.head[0].input_fraclen, 1,
245 |                                self.head[0].input_symmetric)[0] *
246 |                      (2**self.head[0].input_fraclen)).int().float()
247 |             else:
248 |                 x = (x * (2**self.head[0].input_fraclen)).int().float()
249 |             for layer_ in self.head:
250 |                 if hasattr(x, 'output_fraclen'):
251 |                     output_fraclen = x.output_fraclen
252 |                     x = layer_(x)
253 |                 else:
254 |                     x = layer_(x)
255 |                     output_fraclen = layer_.weight_fraclen + layer_.input_fraclen
256 |                 setattr(x, 'output_fraclen', output_fraclen)
257 |             x.div_(2**x.output_fraclen)
258 |             for idx, [_, _, n, _] in enumerate(self.block_setting):
259 |                 for i in range(n):
260 |                     blk = getattr(self, f'stage_{idx}_layer_{i}')
261 |                     x = blk(x)
262 |             x = (fix_quant(x, 8, self.tail[0].input_fraclen, 1,
263 |                            self.tail[0].input_symmetric)[0] *
264 |                  (2**self.tail[0].input_fraclen)).int().float()
265 |             x = self.tail(x)
266 |             x.div_(2**(self.tail[0].input_fraclen +
267 |                        self.tail[0].weight_fraclen))
268 |             x = self.avgpool(x)
269 |             x = x.view(x.size(0), -1)
270 |             x = (fix_quant(x, 8, self.classifier[0].input_fraclen, 1,
271 |                            self.classifier[0].input_symmetric)[0] *
272 |                  (2**self.classifier[0].input_fraclen)).int().float()
273 |             x = self.classifier(x)
274 |         return x
275 | 
276 | 
277 | class Model(nn.Module):
278 |     def __init__(self, num_classes=1000):
279 |         super(Model, self).__init__()
280 | 
281 |         # setting of inverted residual blocks
282 |         self.block_setting = [
283 |             # t, c, n, s
284 |             [1, 16, 1, 1],
285 |             [6, 24, 2, 2],
286 |             [6, 32, 3, 2],
287 |             [6, 64, 4, 2],
288 |             [6, 96, 3, 1],
289 |             [6, 160, 3, 2],
290 |             [6, 320, 1, 1],
291 |         ]
292 | 
293 |         # head
294 |         channels = 32
295 |         first_stride = 2
296 |         self.head = nn.Sequential(
297 |             ReLUClipFXQConvBN(
298 |                 3,
299 |                 channels,
300 |                 3,
301 |                 first_stride,
302 |                 1,
303 |                 bitw_min=None,
304 |                 bita_min=8,
305 |                 weight_only=not getattr(FLAGS, 'normalize', False),
306 |                 double_side=getattr(FLAGS, 'normalize', False)),
307 |             nn.ReLU(inplace=True),
308 |         )
309 |         prev_layer = self.head[0]
310 | 
311 |         double_side = True
312 |         # body
313 |         master_layer = None
314 |         for idx, [t, c, n, s] in enumerate(self.block_setting):
315 |             outp = c
316 |             for i in range(n):
317 |                 if i == 0:
318 |                     layer = InvertedResidual(
319 |                         channels,
320 |                         outp,
321 |                         s,
322 |                         t,
323 |                         double_side=double_side if idx != 0 else False,
324 |                         master_layer=master_layer)
325 |                 else:
326 |                     layer = InvertedResidual(channels,
327 |                                              outp,
328 |                                              1,
329 |                                              t,
330 |                                              double_side=double_side,
331 |                                              master_layer=master_layer)
332 |                 setattr(self, 'stage_{}_layer_{}'.format(idx, i), layer)
333 |                 channels = outp
334 |                 master_layer = layer.get_master_layer()
335 |                 prev_layer.set_following_layer(layer.master_child())
336 |                 prev_layer = layer
337 | 
338 |         # tail
339 |         outp = 1280
340 |         self.tail = nn.Sequential(
341 |             ReLUClipFXQConvBN(channels,
342 |                               outp,
343 |                               1,
344 |                               1,
345 |                               0,
346 |                               double_side=double_side,
347 |                               master_layer=master_layer),
348 |             nn.ReLU(inplace=True),
349 |         )
350 |         prev_layer.set_following_layer(self.tail[0])
351 |         prev_layer = self.tail[0]
352 | 
353 |         if getattr(FLAGS, 'quant_avgpool', False):
354 |             self.avgpool = FXQAvgPool2d(7)
355 |             if getattr(FLAGS, 'pool_fusing', False):
356 |                 stage_idx = len(self.block_setting) - 1
357 |                 layer_idx = self.block_setting[-1][1] - 1
358 |                 last_conv_layer = self.tail[0]
359 |                 last_conv_layer.avgpool_scale = self.avgpool.scale
360 |         else:
361 |             self.avgpool = nn.AdaptiveAvgPool2d(1)
362 | 
363 |         # classifier
364 |         self.classifier = nn.Sequential(
365 |             ReLUClipFXQLinear(
366 |                 outp,
367 |                 num_classes,
368 |                 bitw_min=None,
369 |                 bias=True))
370 |         prev_layer.set_following_layer(self.classifier[0])
371 | 
372 |         if FLAGS.reset_parameters:
373 |             self.reset_parameters()
374 | 
375 |     def forward(self, x):
376 |         x = self.head(x)
377 |         for idx, [_, _, n, _] in enumerate(self.block_setting):
378 |             for i in range(n):
379 |                 x = getattr(self, 'stage_{}_layer_{}'.format(idx, i))(x)
380 |         x = self.tail(x)
381 |         x = self.avgpool(x)
382 |         x = x.view(x.size(0), -1)
383 |         x = self.classifier(x)
384 |         return x
385 | 
386 |     def reset_parameters(self):
387 |         for m in self.modules():
388 |             if isinstance(m, nn.Conv2d):
389 |                 n = m.kernel_size[0] * m.kernel_size[
390 |                     1] * m.out_channels
391 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
392 |                 if m.bias is not None:
393 |                     m.bias.data.zero_()
394 |             elif isinstance(m, nn.BatchNorm2d):
395 |                 if m.weight is not None:
396 |                     m.weight.data.fill_(1)
397 |                 if m.bias is not None:
398 |                     m.bias.data.zero_()
399 |             elif isinstance(m, nn.Linear):
400 |                 n = m.weight.size(1)
401 |                 m.weight.data.normal_(0, 0.01)
402 |                 if m.bias is not None:
403 |                     m.bias.data.zero_()
404 | 
405 |     def int_model(self):
406 |         if getattr(FLAGS, 'quant_avgpool', False):
407 |             avgpool = FXQAvgPool2d(7)
408 |             avgpool_scale = avgpool.scale
409 |         else:
410 |             avgpool_scale = 1.0
411 |         head = self.head
412 |         head[0] = head[0].int_conv()
413 |         body = {
414 |             f'stage_{idx}_layer_{i}':
415 |             getattr(self, f'stage_{idx}_layer_{i}').int_block()
416 |             for idx, [t, c, n, s] in enumerate(self.block_setting)
417 |             for i in range(n)
418 |         }
419 |         tail = self.tail
420 |         tail[0] = tail[0].int_conv(avgpool_scale=avgpool_scale)
421 |         classifier = self.classifier
422 |         classifier[0] = classifier[0].int_fc()
423 |         return IntModel(head, body, tail, classifier, self.block_setting)
424 | 


--------------------------------------------------------------------------------
/models/fix_resnet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from .fix_quant_ops import (fix_quant, int_op_only_fix_quant, FXQMaxPool2d,
  8 |                             FXQAvgPool2d, ReLUClipFXQConvBN, ReLUClipFXQLinear)
  9 | 
 10 | from myutils.config import FLAGS
 11 | 
 12 | 
 13 | class IntBlock(nn.Module):
 14 |     def __init__(self, body, shortcut=None):
 15 |         super(IntBlock, self).__init__()
 16 |         self.body = body
 17 | 
 18 |         self.residual_connection = shortcut is None
 19 |         if not self.residual_connection:
 20 |             self.shortcut = shortcut
 21 |         self.post_relu = nn.ReLU(inplace=True)
 22 |         self.int_op_only = getattr(body, 'int_op_only', False)
 23 | 
 24 |     def forward(self, x):
 25 |         assert getattr(FLAGS, 'int_infer', False)
 26 |         if getattr(self, 'int_op_only', False):
 27 |             res = x
 28 |             for layer_ in self.body:
 29 |                 if isinstance(layer_, nn.Conv2d):
 30 |                     res = int_op_only_fix_quant(res, 8,
 31 |                                                 layer_.input_fraclen.item(),
 32 |                                                 res.output_fraclen,
 33 |                                                 layer_.input_symmetric)
 34 |                     res = layer_(res)
 35 |                     output_fraclen = (layer_.weight_fraclen +
 36 |                                       layer_.input_fraclen).item()
 37 |                     setattr(res, 'output_fraclen', output_fraclen)
 38 |                 else:
 39 |                     res = layer_(res)
 40 |             if self.residual_connection:
 41 |                 res_fraclen = res.output_fraclen
 42 |                 x_fraclen = x.output_fraclen
 43 |                 if res_fraclen > x_fraclen:
 44 |                     x = x << (res_fraclen - x_fraclen)
 45 |                     res += x
 46 |                     res.clamp_(max=(1 << 31) - 1, min=-(1 << 31) + 1)
 47 |                     output_fraclen = res_fraclen
 48 |                     setattr(res, 'output_fraclen', output_fraclen)
 49 |                 else:
 50 |                     res = res << (x_fraclen - res_fraclen)
 51 |                     res += x
 52 |                     res.clamp_(max=(1 << 31) - 1, min=-(1 << 31) + 1)
 53 |                     output_fraclen = x_fraclen
 54 |                     setattr(res, 'output_fraclen', output_fraclen)
 55 |             else:
 56 |                 x = int_op_only_fix_quant(
 57 |                     x, 8, self.shortcut[0].input_fraclen.item(),
 58 |                     x.output_fraclen, self.shortcut[0].input_symmetric)
 59 |                 x = self.shortcut(x)
 60 |                 output_fraclen = (self.shortcut[-1].weight_fraclen +
 61 |                                   self.shortcut[-1].input_fraclen).item()
 62 |                 setattr(x, 'output_fraclen', output_fraclen)
 63 |                 res_fraclen = res.output_fraclen
 64 |                 x_fraclen = x.output_fraclen
 65 |                 if res_fraclen > x_fraclen:
 66 |                     x = x << (res_fraclen - x_fraclen)
 67 |                     res += x
 68 |                     res.clamp_(max=(1 << 31) - 1, min=-(1 << 31) + 1)
 69 |                     output_fraclen = res_fraclen
 70 |                     setattr(res, 'output_fraclen', output_fraclen)
 71 |                 else:
 72 |                     res = res << (x_fraclen - res_fraclen)
 73 |                     res += x
 74 |                     res.clamp_(max=(1 << 31) - 1, min=-(1 << 31) + 1)
 75 |                     output_fraclen = x_fraclen
 76 |                     setattr(res, 'output_fraclen', output_fraclen)
 77 |             res = self.post_relu(res)
 78 |         else:
 79 |             res = x
 80 |             for layer_ in self.body:
 81 |                 if isinstance(layer_, nn.Conv2d):
 82 |                     res = (fix_quant(res, 8, layer_.input_fraclen, 1,
 83 |                                      layer_.input_symmetric)[0] *
 84 |                            (2**layer_.input_fraclen)).int().float()
 85 |                     res = layer_(res)
 86 |                     res.div_(2**(layer_.weight_fraclen + layer_.input_fraclen))
 87 |                 else:
 88 |                     res = layer_(res)
 89 |             setattr(res, 'output_fraclen',
 90 |                     self.body[-1].weight_fraclen + self.body[-1].input_fraclen)
 91 |             if self.residual_connection:
 92 |                 res_fraclen = res.output_fraclen
 93 |                 x_fraclen = x.output_fraclen
 94 |                 output_fraclen = max(res_fraclen, x_fraclen)
 95 |                 res = res * 2**output_fraclen
 96 |                 x = x * 2**output_fraclen
 97 |                 res += x
 98 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
 99 |                 res = res / 2**output_fraclen
100 |             else:
101 |                 x = (fix_quant(x, 8, self.shortcut[0].input_fraclen, 1,
102 |                                self.shortcut[0].input_symmetric)[0] *
103 |                      (2**self.shortcut[0].input_fraclen)).int().float()
104 |                 x = self.shortcut(x)
105 |                 setattr(
106 |                     x, 'output_fraclen', self.shortcut[-1].weight_fraclen +
107 |                     self.shortcut[-1].input_fraclen)
108 |                 x.div_(2**x.output_fraclen)
109 |                 res_fraclen = res.output_fraclen
110 |                 x_fraclen = x.output_fraclen
111 |                 output_fraclen = max(res_fraclen, x_fraclen)
112 |                 res = res * 2**output_fraclen
113 |                 x = x * 2**output_fraclen
114 |                 res += x
115 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
116 |                 res = res / 2**output_fraclen
117 |             res = self.post_relu(res)
118 |             setattr(res, 'output_fraclen', output_fraclen)
119 |         return res
120 | 
121 | 
122 | class BasicBlock(nn.Module):
123 |     expansion = 1
124 | 
125 |     def __init__(self, inp, outp, stride, master_layer=None):
126 |         super(BasicBlock, self).__init__()
127 |         assert stride in [1, 2]
128 | 
129 |         l1 = ReLUClipFXQConvBN(inp,
130 |                                outp,
131 |                                3,
132 |                                stride,
133 |                                1,
134 |                                master_layer=master_layer)
135 |         l2 = ReLUClipFXQConvBN(outp, outp, 3, 1, 1)
136 |         layers = [
137 |             l1,
138 |             l2,
139 |         ]
140 |         self.body = nn.Sequential(*layers)
141 |         self.layer_dict = {}
142 | 
143 |         self.residual_connection = stride == 1 and inp == outp
144 |         if not self.residual_connection:
145 |             self.shortcut = nn.Sequential(
146 |                 ReLUClipFXQConvBN(inp,
147 |                                   outp,
148 |                                   1,
149 |                                   stride=stride,
150 |                                   master_layer=master_layer))
151 |             self.set_master_layer(None)
152 |         else:
153 |             self.set_master_layer(self.body[0])
154 |         self.post_relu = nn.ReLU(inplace=True)
155 | 
156 |     def forward(self, x):
157 |         res = self.body(x)
158 |         if getattr(FLAGS, 'int_infer', False) and not self.training:
159 |             if self.residual_connection:
160 |                 res_fraclen = res.output_fraclen
161 |                 x_fraclen = x.output_fraclen
162 |                 output_fraclen = max(res_fraclen, x_fraclen)
163 |                 res = res * 2**output_fraclen
164 |                 x = x * 2**output_fraclen
165 |                 res += x
166 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
167 |                 res = res / 2**output_fraclen
168 |             else:
169 |                 x = self.shortcut(x)
170 |                 res_fraclen = res.output_fraclen
171 |                 x_fraclen = x.output_fraclen
172 |                 output_fraclen = max(res_fraclen, x_fraclen)
173 |                 res = res * 2**output_fraclen
174 |                 x = x * 2**output_fraclen
175 |                 res += x
176 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
177 |                 res = res / 2**output_fraclen
178 |         else:
179 |             if self.residual_connection:
180 |                 res += x
181 |             else:
182 |                 res += self.shortcut(x)
183 |         res = self.post_relu(res)
184 |         if getattr(FLAGS, 'int_infer', False) and not self.training:
185 |             setattr(res, 'output_fraclen', output_fraclen)
186 |         return res
187 | 
188 |     def set_master_layer(self, master_layer):
189 |         self.layer_dict['master'] = master_layer
190 | 
191 |     def get_master_layer(self):
192 |         return self.layer_dict['master']
193 | 
194 |     def set_following_layer(self, following_layer):
195 |         self.layer_dict['following'] = following_layer
196 |         self.body[0].set_following_layer(self.body[1])
197 |         self.body[1].set_following_layer(following_layer)
198 |         if not self.residual_connection:
199 |             self.shortcut[0].set_following_layer(following_layer)
200 | 
201 |     def get_following_layer(self):
202 |         return self.layer_dict['following']
203 | 
204 |     def master_child(self):
205 |         return self.body[0]
206 | 
207 |     def int_block(self, avgpool_scale=1.0):
208 |         l1 = self.body[0].int_conv()
209 |         l2 = self.body[1].int_conv(avgpool_scale=avgpool_scale)
210 |         layers = [
211 |             l1,
212 |             nn.ReLU(inplace=True),
213 |             l2,
214 |         ]
215 |         body = nn.Sequential(*layers)
216 | 
217 |         if not self.residual_connection:
218 |             shortcut = nn.Sequential(self.shortcut[0].int_conv())
219 |         else:
220 |             shortcut = None
221 |         return IntBlock(body, shortcut)
222 | 
223 | 
224 | class Bottleneck(nn.Module):
225 |     expansion = 4
226 | 
227 |     def __init__(self, inp, outp, stride, master_layer=None):
228 |         super(Bottleneck, self).__init__()
229 |         assert stride in [1, 2]
230 | 
231 |         midp = outp // 4
232 |         l1 = ReLUClipFXQConvBN(inp, midp, 1, 1, 0, master_layer=master_layer)
233 |         l2 = ReLUClipFXQConvBN(midp, midp, 3, stride, 1)
234 |         l3 = ReLUClipFXQConvBN(midp, outp, 1, 1, 0)
235 |         layers = [
236 |             l1,
237 |             l2,
238 |             l3,
239 |         ]
240 |         self.body = nn.Sequential(*layers)
241 |         self.layer_dict = {}
242 | 
243 |         self.residual_connection = stride == 1 and inp == outp
244 |         if not self.residual_connection:
245 |             self.shortcut = nn.Sequential(
246 |                 ReLUClipFXQConvBN(inp,
247 |                                   outp,
248 |                                   1,
249 |                                   stride=stride,
250 |                                   master_layer=master_layer))
251 |             self.set_master_layer(None)
252 |         else:
253 |             self.set_master_layer(self.body[0])
254 |         self.post_relu = nn.ReLU(inplace=True)
255 | 
256 |     def forward(self, x):
257 |         res = self.body(x)
258 |         if getattr(FLAGS, 'int_infer', False) and not self.training:
259 |             if self.residual_connection:
260 |                 res_fraclen = res.output_fraclen
261 |                 x_fraclen = x.output_fraclen
262 |                 output_fraclen = max(res_fraclen, x_fraclen)
263 |                 res = res * 2**output_fraclen
264 |                 x = x * 2**output_fraclen
265 |                 res += x
266 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
267 |                 res = res / 2**output_fraclen
268 |             else:
269 |                 x = self.shortcut(x)
270 |                 res_fraclen = res.output_fraclen
271 |                 x_fraclen = x.output_fraclen
272 |                 output_fraclen = max(res_fraclen, x_fraclen)
273 |                 res = res * 2**output_fraclen
274 |                 x = x * 2**output_fraclen
275 |                 res += x
276 |                 res = torch.clamp(res, max=(1 << 31) - 1, min=-(1 << 31) + 1)
277 |                 res = res / 2**output_fraclen
278 |         else:
279 |             if self.residual_connection:
280 |                 res += x
281 |             else:
282 |                 res += self.shortcut(x)
283 |         res = self.post_relu(res)
284 |         if getattr(FLAGS, 'int_infer', False) and not self.training:
285 |             setattr(res, 'output_fraclen', output_fraclen)
286 |         return res
287 | 
288 |     def set_master_layer(self, master_layer):
289 |         self.layer_dict['master'] = master_layer
290 | 
291 |     def get_master_layer(self):
292 |         return self.layer_dict['master']
293 | 
294 |     def set_following_layer(self, following_layer):
295 |         self.layer_dict['following'] = following_layer
296 |         self.body[0].set_following_layer(self.body[1])
297 |         self.body[1].set_following_layer(self.body[2])
298 |         self.body[2].set_following_layer(following_layer)
299 |         if not self.residual_connection:
300 |             self.shortcut[0].set_following_layer(following_layer)
301 | 
302 |     def get_following_layer(self):
303 |         return self.layer_dict['following']
304 | 
305 |     def master_child(self):
306 |         return self.body[0]
307 | 
308 |     def int_block(self, avgpool_scale=1.0):
309 |         l1 = self.body[0].int_conv()
310 |         l2 = self.body[1].int_conv()
311 |         l3 = self.body[2].int_conv(avgpool_scale=avgpool_scale)
312 |         layers = [l1, nn.ReLU(inplace=True), l2, nn.ReLU(inplace=True), l3]
313 |         body = nn.Sequential(*layers)
314 | 
315 |         if not self.residual_connection:
316 |             shortcut = nn.Sequential(self.shortcut[0].int_conv())
317 |         else:
318 |             shortcut = None
319 |         return IntBlock(body, shortcut)
320 | 
321 | 
322 | class IntModel(nn.Module):
323 |     def __init__(self, head, body, classifier, block_setting):
324 |         super(IntModel, self).__init__()
325 | 
326 |         self.block_setting = block_setting
327 | 
328 |         # head
329 |         self.head = head
330 | 
331 |         if getattr(FLAGS, 'quant_maxpool', False):
332 |             self.head[-1] = FXQMaxPool2d(self.head[-1].kernel_size,
333 |                                          self.head[-1].stride,
334 |                                          self.head[-1].padding)
335 | 
336 |         # body
337 |         for idx, n in enumerate(self.block_setting):
338 |             for i in range(n):
339 |                 setattr(self, f'stage_{idx}_layer_{i}',
340 |                         body[f'stage_{idx}_layer_{i}'])
341 | 
342 |         if getattr(FLAGS, 'quant_avgpool', False):
343 |             self.avgpool = FXQAvgPool2d(7)
344 |         else:
345 |             self.avgpool = nn.AdaptiveAvgPool2d(1)
346 | 
347 |         # classifier
348 |         self.classifier = classifier
349 | 
350 |         self.int_op_only = getattr(head, 'int_op_only', False)
351 | 
352 |     def forward(self, x):
353 |         assert getattr(FLAGS, 'int_infer', False)
354 |         if getattr(self, 'int_op_only', False):
355 |             if getattr(FLAGS, 'quant_maxpool', False):
356 |                 x = self.head(x)
357 |             else:
358 |                 x = self.head[:-1](x)
359 |                 x = self.head[-1](x.float()).int()
360 |             output_fraclen = (self.head[0].weight_fraclen +
361 |                               self.head[0].input_fraclen).item()
362 |             setattr(x, 'output_fraclen', output_fraclen)
363 |             for idx, n in enumerate(self.block_setting):
364 |                 for i in range(n):
365 |                     blk = getattr(self, f'stage_{idx}_layer_{i}')
366 |                     x = blk(x)
367 |             if getattr(FLAGS, 'quant_avgpool', False):
368 |                 x = self.avgpool(x)
369 |                 output_fraclen = x.output_fraclen
370 |                 x = x.view(x.size(0), -1)
371 |                 setattr(x, 'output_fraclen', output_fraclen)
372 |                 x = int_op_only_fix_quant(
373 |                     x, 8, self.classifier[0].input_fraclen.item(),
374 |                     x.output_fraclen, self.classifier[0].input_symmetric)
375 |             else:
376 |                 output_fraclen = x.output_fraclen
377 |                 x = self.avgpool(x.float())
378 |                 x = x.view(x.size(0), -1)
379 |                 x.div_(2**output_fraclen)
380 |                 x = (fix_quant(x, 8, self.classifier[0].input_fraclen.float(),
381 |                                1, self.classifier[0].input_symmetric)[0] *
382 |                      (2**self.classifier[0].input_fraclen.float())).int()
383 |             x = self.classifier(x).float()
384 |         else:
385 |             if getattr(FLAGS, 'normalize', False):
386 |                 x = (fix_quant(x, 8, self.head[0].input_fraclen, 1,
387 |                                self.head[0].input_symmetric)[0] *
388 |                      (2**self.head[0].input_fraclen)).int().float()
389 |             else:
390 |                 x = (x * (2**self.head[0].input_fraclen)).int().float()
391 |             for layer_ in self.head:
392 |                 if hasattr(x, 'output_fraclen'):
393 |                     output_fraclen = x.output_fraclen
394 |                     x = layer_(x)
395 |                 else:
396 |                     x = layer_(x)
397 |                     output_fraclen = layer_.weight_fraclen + layer_.input_fraclen
398 |                 setattr(x, 'output_fraclen', output_fraclen)
399 |             x.div_(2**x.output_fraclen)
400 |             for idx, n in enumerate(self.block_setting):
401 |                 for i in range(n):
402 |                     blk = getattr(self, f'stage_{idx}_layer_{i}')
403 |                     x = blk(x)
404 |             x = self.avgpool(x)
405 |             x = x.view(x.size(0), -1)
406 |             x = (fix_quant(x, 8, self.classifier[0].input_fraclen, 1,
407 |                            self.classifier[0].input_symmetric)[0] *
408 |                  (2**self.classifier[0].input_fraclen)).int().float()
409 |             x = self.classifier(x)
410 |         return x
411 | 
412 | 
413 | class Model(nn.Module):
414 |     def __init__(self, num_classes=1000):
415 |         super(Model, self).__init__()
416 | 
417 |         block_type_dict = {
418 |             18: BasicBlock,
419 |             34: BasicBlock,
420 |             50: Bottleneck,
421 |             101: Bottleneck,
422 |             152: Bottleneck
423 |         }
424 |         block = block_type_dict[FLAGS.depth]
425 | 
426 |         # head
427 |         channels = 64
428 |         self.head = nn.Sequential(
429 |             ReLUClipFXQConvBN(
430 |                 3,
431 |                 channels,
432 |                 7,
433 |                 2,
434 |                 3,
435 |                 bitw_min=None,
436 |                 bita_min=8,
437 |                 weight_only=not getattr(FLAGS, 'normalize', False),
438 |                 double_side=getattr(FLAGS, 'normalize', False)),
439 |             nn.ReLU(inplace=True), nn.MaxPool2d(3, 2, 1))
440 |         prev_layer = self.head[0]
441 | 
442 |         # setting of inverted residual blocks
443 |         self.block_setting_dict = {
444 |             # : [stage1, stage2, stage3, stage4]
445 |             18: [2, 2, 2, 2],
446 |             34: [3, 4, 6, 3],
447 |             50: [3, 4, 6, 3],
448 |             101: [3, 4, 23, 3],
449 |             152: [3, 8, 36, 3],
450 |         }
451 |         self.block_setting = self.block_setting_dict[FLAGS.depth]
452 | 
453 |         feats = [64, 128, 256, 512]
454 | 
455 |         # body
456 |         master_layer = None
457 |         for idx, n in enumerate(self.block_setting):
458 |             outp = feats[idx] * block.expansion
459 |             for i in range(n):
460 |                 if i == 0 and idx != 0:
461 |                     layer = block(channels, outp, 2, master_layer=master_layer)
462 |                 else:
463 |                     layer = block(channels, outp, 1, master_layer=master_layer)
464 |                 setattr(self, 'stage_{}_layer_{}'.format(idx, i), layer)
465 |                 channels = outp
466 |                 master_layer = layer.get_master_layer()
467 |                 prev_layer.set_following_layer(layer.master_child())
468 |                 prev_layer = layer
469 | 
470 |         if getattr(FLAGS, 'quant_avgpool', False):
471 |             self.avgpool = FXQAvgPool2d(7)
472 |             if getattr(FLAGS, 'pool_fusing', False):
473 |                 stage_idx = len(self.block_setting) - 1
474 |                 layer_idx = self.block_setting[-1] - 1
475 |                 last_conv_layer = getattr(
476 |                     self, f'stage_{stage_idx}_layer_{layer_idx}')
477 |                 last_conv_layer.avgpool_scale = self.avgpool.scale
478 |         else:
479 |             self.avgpool = nn.AdaptiveAvgPool2d(1)
480 | 
481 |         # classifier
482 |         self.classifier = nn.Sequential(
483 |             ReLUClipFXQLinear(outp, num_classes, bitw_min=None))
484 |         prev_layer.set_following_layer(self.classifier[0])
485 | 
486 |         if FLAGS.reset_parameters:
487 |             self.reset_parameters()
488 | 
489 |     def forward(self, x):
490 |         if getattr(FLAGS, 'int_infer', False) and not self.training:
491 |             for layer_ in self.head:
492 |                 if hasattr(x, 'output_fraclen'):
493 |                     output_fraclen = x.output_fraclen
494 |                     x = layer_(x)
495 |                 else:
496 |                     x = layer_(x)
497 |                     output_fraclen = x.output_fraclen
498 |                 setattr(x, 'output_fraclen', output_fraclen)
499 |         else:
500 |             x = self.head(x)
501 |         for idx, n in enumerate(self.block_setting):
502 |             for i in range(n):
503 |                 x = getattr(self, 'stage_{}_layer_{}'.format(idx, i))(x)
504 |         x = self.avgpool(x)
505 |         x = x.view(x.size(0), -1)
506 |         x = self.classifier(x)
507 |         return x
508 | 
509 |     def reset_parameters(self):
510 |         for m in self.modules():
511 |             if isinstance(m, nn.Conv2d):
512 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
513 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
514 |                 if m.bias is not None:
515 |                     m.bias.data.zero_()
516 |             elif isinstance(m, nn.BatchNorm2d):
517 |                 if m.weight is not None:
518 |                     m.weight.data.fill_(1)
519 |                 if m.bias is not None:
520 |                     m.bias.data.zero_()
521 |             elif isinstance(m, nn.Linear):
522 |                 n = m.weight.size(1)
523 |                 m.weight.data.normal_(0, 0.01)
524 |                 m.bias.data.zero_()
525 | 
526 |     def int_model(self):
527 |         if getattr(FLAGS, 'quant_avgpool', False):
528 |             avgpool = FXQAvgPool2d(7)
529 |             avgpool_scale = avgpool.scale
530 |         else:
531 |             avgpool_scale = 1.0
532 |         head = self.head
533 |         head[0] = head[0].int_conv()
534 |         body = {
535 |             f'stage_{idx}_layer_{i}':
536 |             getattr(self, f'stage_{idx}_layer_{i}').int_block(
537 |                 avgpool_scale=avgpool_scale if (
538 |                     idx == len(self.block_setting) - 1 and i == n -
539 |                     1) else 1.0)
540 |             for idx, n in enumerate(self.block_setting) for i in range(n)
541 |         }
542 |         classifier = self.classifier
543 |         classifier[0] = classifier[0].int_fc()
544 |         return IntModel(head, body, classifier, self.block_setting)
545 | 


--------------------------------------------------------------------------------
/myutils/config.py:
--------------------------------------------------------------------------------
  1 | """config utilities for yml file."""
  2 | import os
  3 | import sys
  4 | import yaml
  5 | 
  6 | # singletone
  7 | FLAGS = None
  8 | 
  9 | 
 10 | class LoaderMeta(type):
 11 |     """Constructor for supporting `!include`.
 12 |     """
 13 |     def __new__(mcs, __name__, __bases__, __dict__):
 14 |         """Add include constructer to class."""
 15 |         # register the include constructor on the class
 16 |         cls = super().__new__(mcs, __name__, __bases__, __dict__)
 17 |         cls.add_constructor('!include', cls.construct_include)
 18 |         return cls
 19 | 
 20 | 
 21 | class Loader(yaml.Loader, metaclass=LoaderMeta):
 22 |     """YAML Loader with `!include` constructor.
 23 |     """
 24 |     def __init__(self, stream):
 25 |         try:
 26 |             self._root = os.path.split(stream.name)[0]
 27 |         except AttributeError:
 28 |             self._root = os.path.curdir
 29 |         super().__init__(stream)
 30 | 
 31 |     def construct_include(self, node):
 32 |         """Include file referenced at node."""
 33 |         filename = os.path.abspath(
 34 |             os.path.join(self._root, self.construct_scalar(node)))
 35 |         extension = os.path.splitext(filename)[1].lstrip('.')
 36 |         with open(filename, 'r') as f:
 37 |             if extension in ('yaml', 'yml'):
 38 |                 return yaml.load(f, Loader)
 39 |             else:
 40 |                 return ''.join(f.readlines())
 41 | 
 42 | 
 43 | class AttrDict(dict):
 44 |     """Dict as attribute trick.
 45 | 
 46 |     """
 47 |     def __init__(self, *args, **kwargs):
 48 |         super(AttrDict, self).__init__(*args, **kwargs)
 49 |         self.__dict__ = self
 50 |         for key in self.__dict__:
 51 |             value = self.__dict__[key]
 52 |             if isinstance(value, dict):
 53 |                 self.__dict__[key] = AttrDict(value)
 54 |             elif isinstance(value, list):
 55 |                 if isinstance(value[0], dict):
 56 |                     self.__dict__[key] = [AttrDict(item) for item in value]
 57 |                 else:
 58 |                     self.__dict__[key] = value
 59 | 
 60 |     def yaml(self):
 61 |         """Convert object to yaml dict and return.
 62 | 
 63 |         """
 64 |         yaml_dict = {}
 65 |         for key in self.__dict__:
 66 |             value = self.__dict__[key]
 67 |             if isinstance(value, AttrDict):
 68 |                 yaml_dict[key] = value.yaml()
 69 |             elif isinstance(value, list):
 70 |                 if isinstance(value[0], AttrDict):
 71 |                     new_l = []
 72 |                     for item in value:
 73 |                         new_l.append(item.yaml())
 74 |                     yaml_dict[key] = new_l
 75 |                 else:
 76 |                     yaml_dict[key] = value
 77 |             else:
 78 |                 yaml_dict[key] = value
 79 |         return yaml_dict
 80 | 
 81 |     def __repr__(self):
 82 |         """Print all variables.
 83 | 
 84 |         """
 85 |         ret_str = []
 86 |         for key in self.__dict__:
 87 |             value = self.__dict__[key]
 88 |             if isinstance(value, AttrDict):
 89 |                 ret_str.append('{}:'.format(key))
 90 |                 child_ret_str = value.__repr__().split('\n')
 91 |                 for item in child_ret_str:
 92 |                     ret_str.append('    ' + item)
 93 |             elif isinstance(value, list):
 94 |                 if isinstance(value[0], AttrDict):
 95 |                     ret_str.append('{}:'.format(key))
 96 |                     for item in value:
 97 |                         # treat as AttrDict above
 98 |                         child_ret_str = item.__repr__().split('\n')
 99 |                         for item in child_ret_str:
100 |                             ret_str.append('    ' + item)
101 |                 else:
102 |                     ret_str.append('{}: {}'.format(key, value))
103 |             else:
104 |                 ret_str.append('{}: {}'.format(key, value))
105 |         return '\n'.join(ret_str)
106 | 
107 | 
108 | class Config(AttrDict):
109 |     """Config with yaml file.
110 | 
111 |     This class is used to config model hyper-parameters, global constants, and
112 |     other settings with yaml file. All settings in yaml file will be
113 |     automatically logged into file.
114 | 
115 |     Args:
116 |         filename(str): File name.
117 | 
118 |     Examples:
119 | 
120 |         yaml file ``model.yml``::
121 | 
122 |             NAME: 'neuralgym'
123 |             ALPHA: 1.0
124 |             DATASET: '/mnt/data/imagenet'
125 | 
126 |         Usage in .py:
127 | 
128 |         >>> from neuralgym import Config
129 |         >>> config = Config('model.yml')
130 |         >>> print(config.NAME)
131 |             neuralgym
132 |         >>> print(config.ALPHA)
133 |             1.0
134 |         >>> print(config.DATASET)
135 |             /mnt/data/imagenet
136 | 
137 |     """
138 |     def __init__(self, filename=None, verbose=False):
139 |         assert os.path.exists(filename), 'File {} not exist.'.format(filename)
140 |         try:
141 |             with open(filename, 'r') as f:
142 |                 cfg_dict = yaml.load(f, Loader)
143 |         except EnvironmentError:
144 |             print('Please check the file with name of "%s"', filename)
145 |         super(Config, self).__init__(cfg_dict)
146 |         if verbose:
147 |             print(' pi.cfg '.center(80, '-'))
148 |             print(self.__repr__())
149 |             print(''.center(80, '-'))
150 | 
151 | 
152 | def app():
153 |     """Load app via stdin from subprocess"""
154 |     global FLAGS
155 |     if FLAGS is None:
156 |         job_yaml_file = None
157 |         batch_size = None
158 |         for arg in sys.argv:
159 |             if arg.startswith('app:'):
160 |                 job_yaml_file = arg[4:]
161 |             if arg.startswith('bs:'):
162 |                 batch_size = int(arg[3:])
163 |         if job_yaml_file is None:
164 |             job_yaml_file = sys.stdin.readline()
165 |         FLAGS = Config(job_yaml_file)
166 |         if batch_size is not None:
167 |             FLAGS.batch_size = batch_size
168 |         if FLAGS.batch_size > 256:
169 |             FLAGS.warmup_epochs = getattr(FLAGS, 'warmup_epochs', 5)
170 |             FLAGS.lr = FLAGS.lr * FLAGS.batch_size / 256
171 |         else:
172 |             FLAGS.warmup_epochs = 0
173 |         return FLAGS
174 |     else:
175 |         return FLAGS
176 | 
177 | 
178 | app()
179 | 


--------------------------------------------------------------------------------
/myutils/distributed.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | 
  3 | import os
  4 | import functools
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.distributed as dist
  9 | import torch.multiprocessing as mp
 10 | from torch.nn.parallel.scatter_gather import scatter_kwargs
 11 | from torch._utils import _flatten_dense_tensors
 12 | from torch._utils import _unflatten_dense_tensors
 13 | from torch._utils import _take_tensors
 14 | 
 15 | 
 16 | def init_dist(launcher='pytorch', backend='nccl', **kwargs):
 17 |     if dist.is_initialized():
 18 |         return torch.cuda.current_device()
 19 |     if mp.get_start_method(allow_none=True) is None:
 20 |         mp.set_start_method('spawn')
 21 |     rank = int(os.environ['RANK'])
 22 |     num_gpus = torch.cuda.device_count()
 23 |     gpu_id = rank % num_gpus
 24 |     torch.cuda.set_device(gpu_id)
 25 |     dist.init_process_group(backend=backend, **kwargs)
 26 |     return gpu_id
 27 | 
 28 | 
 29 | def get_rank():
 30 |     if dist.is_initialized():
 31 |         rank = dist.get_rank()
 32 |     else:
 33 |         rank = 0
 34 |     return rank
 35 | 
 36 | 
 37 | def get_world_size():
 38 |     if dist.is_initialized():
 39 |         world_size = dist.get_world_size()
 40 |     else:
 41 |         world_size = 1
 42 |     return world_size
 43 | 
 44 | 
 45 | def master_only(func):
 46 |     @functools.wraps(func)
 47 |     def wrapper(*args, **kwargs):
 48 |         if get_rank() == 0:
 49 |             return func(*args, **kwargs)
 50 |         else:
 51 |             return None
 52 | 
 53 |     return wrapper
 54 | 
 55 | 
 56 | def is_master():
 57 |     """check if current process is the master"""
 58 |     return get_rank() == 0
 59 | 
 60 | 
 61 | @master_only
 62 | def master_only_print(*args):
 63 |     """master-only print"""
 64 |     print(*args)
 65 | 
 66 | 
 67 | def dist_reduce_tensor(tensor):
 68 |     """ Reduce to rank 0 """
 69 |     world_size = get_world_size()
 70 |     if world_size < 2:
 71 |         return tensor
 72 |     with torch.no_grad():
 73 |         dist.reduce(tensor, dst=0)
 74 |         if get_rank() == 0:
 75 |             tensor /= world_size
 76 |     return tensor
 77 | 
 78 | 
 79 | def dist_all_reduce_tensor(tensor):
 80 |     """ Reduce to all ranks """
 81 |     world_size = get_world_size()
 82 |     if world_size < 2:
 83 |         return tensor
 84 |     with torch.no_grad():
 85 |         dist.all_reduce(tensor)
 86 |         tensor.div_(world_size)
 87 |     return tensor
 88 | 
 89 | 
 90 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
 91 |     if bucket_size_mb > 0:
 92 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
 93 |         buckets = _take_tensors(tensors, bucket_size_bytes)
 94 |     else:
 95 |         buckets = OrderedDict()
 96 |         for tensor in tensors:
 97 |             tp = tensor.type()
 98 |             if tp not in buckets:
 99 |                 buckets[tp] = []
100 |             buckets[tp].append(tensor)
101 |         buckets = buckets.values()
102 | 
103 |     for bucket in buckets:
104 |         flat_tensors = _flatten_dense_tensors(bucket)
105 |         dist.all_reduce(flat_tensors)
106 |         flat_tensors.div_(world_size)
107 |         for tensor, synced in zip(
108 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
109 |             tensor.copy_(synced)
110 | 
111 | 
112 | def allreduce_grads(model, coalesce=True, bucket_size_mb=-1):
113 |     grads = [
114 |         param.grad.data for param in model.parameters()
115 |         if param.requires_grad and param.grad is not None
116 |     ]
117 |     world_size = dist.get_world_size()
118 |     if coalesce:
119 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
120 |     else:
121 |         for tensor in grads:
122 |             dist.all_reduce(tensor.div_(world_size))
123 | 
124 | 
125 | class AllReduceDistributedDataParallel(nn.Module):
126 |     def __init__(self,
127 |                  module,
128 |                  dim=0,
129 |                  broadcast_buffers=True,
130 |                  bucket_cap_mb=25):
131 |         super(AllReduceDistributedDataParallel, self).__init__()
132 |         self.module = module
133 |         self.dim = dim
134 |         self.broadcast_buffers = broadcast_buffers
135 | 
136 |         self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024
137 |         self._sync_params()
138 | 
139 |     def _dist_broadcast_coalesced(self, tensors, buffer_size):
140 |         for tensors in _take_tensors(tensors, buffer_size):
141 |             flat_tensors = _flatten_dense_tensors(tensors)
142 |             dist.broadcast(flat_tensors, 0)
143 |             for tensor, synced in zip(
144 |                     tensors, _unflatten_dense_tensors(flat_tensors, tensors)):
145 |                 tensor.copy_(synced)
146 | 
147 |     def _sync_params(self):
148 |         module_states = list(self.module.state_dict().values())
149 |         if len(module_states) > 0:
150 |             self._dist_broadcast_coalesced(module_states,
151 |                                            self.broadcast_bucket_size)
152 |         if self.broadcast_buffers:
153 |             buffers = [b.data for b in self.module.buffers()]
154 |             if len(buffers) > 0:
155 |                 self._dist_broadcast_coalesced(buffers,
156 |                                                self.broadcast_bucket_size)
157 | 
158 |     def scatter(self, inputs, kwargs, device_ids):
159 |         return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
160 | 
161 |     def forward(self, *inputs, **kwargs):
162 |         inputs, kwargs = self.scatter(inputs, kwargs,
163 |                                       [torch.cuda.current_device()])
164 |         return self.module(*inputs[0], **kwargs[0])
165 | 


--------------------------------------------------------------------------------
/myutils/export.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def onnx_export(model, data_shape, data_dtype, device, output_file):
 5 |     print("\nExporting...\n")
 6 |     batch_size = 1
 7 |     _, data_channel, data_height, data_width = data_shape
 8 |     rand_input = torch.randn(batch_size,
 9 |                              data_channel,
10 |                              data_height,
11 |                              data_width,
12 |                              requires_grad=True).to(dtype=data_dtype,
13 |                                                     device=device)
14 |     torch.onnx.export(model.cpu(),
15 |                       rand_input.cpu(),
16 |                       output_file,
17 |                       export_params=True,
18 |                       opset_version=11,
19 |                       do_constant_folding=True,
20 |                       input_names=['input'],
21 |                       output_names=['output'],
22 |                       dynamic_axes={
23 |                           'input': {
24 |                               0: 'batch_size'
25 |                           },
26 |                           'output': {
27 |                               0: 'batch_size'
28 |                           }
29 |                       })
30 |     model = model.to(device)
31 |     print("\nModel exported!\n")
32 | 


--------------------------------------------------------------------------------
/myutils/meters.py:
--------------------------------------------------------------------------------
 1 | class Meter(object):
 2 |     """Meter is to keep track of statistics along steps.
 3 |     Meters cache values for purpose like printing average values.
 4 |     Meters can be flushed to log files (i.e. TensorBoard) regularly.
 5 | 
 6 |     Args:
 7 |         name (str): the name of meter
 8 | 
 9 |     """
10 |     def __init__(self, name):
11 |         self.name = name
12 |         self.steps = 0
13 |         self.reset()
14 | 
15 |     def __repr__(self):
16 |         return '{}: {!r}'.format(self.__class__, self.__dict__)
17 | 
18 |     def reset(self):
19 |         self.values = []
20 | 
21 |     def cache(self, value, pstep=1):
22 |         self.steps += pstep
23 |         self.values.append(value)
24 | 
25 |     def cache_list(self, value_list, pstep=1):
26 |         self.steps += pstep
27 |         self.values += value_list
28 | 
29 |     def flush(self, value, reset=True):
30 |         pass
31 | 
32 | 
33 | class ScalarMeter(Meter):
34 |     """ScalarMeter records scalar over steps.
35 | 
36 |     """
37 |     def __init__(self, name):
38 |         super(ScalarMeter, self).__init__(name)
39 | 
40 |     def flush(self, value, step=-1, reset=True):
41 |         if reset:
42 |             self.reset()
43 | 
44 | 
45 | def flush_scalar_meters(meters, method='avg'):
46 |     """Docstring for flush_scalar_meters"""
47 |     results = {}
48 |     assert isinstance(meters, dict), "meters should be a dict."
49 |     for name, meter in meters.items():
50 |         if not isinstance(meter, ScalarMeter):
51 |             results[name] = meter
52 |             continue
53 |         if method == 'avg':
54 |             value = sum(meter.values) / len(meter.values)
55 |         elif method == 'sum':
56 |             value = sum(meter.values)
57 |         elif method == 'max':
58 |             value = max(meter.values)
59 |         elif method == 'min':
60 |             value = min(meter.values)
61 |         else:
62 |             raise NotImplementedError(
63 |                 'flush method: {} is not yet implemented.'.format(method))
64 |         results[name] = float(format(value, '.3f'))
65 |         meter.flush(value)
66 |     return results
67 | 


--------------------------------------------------------------------------------
/requirements.in:
--------------------------------------------------------------------------------
1 | numpy==1.19.5
2 | torch==1.11.0
3 | torchvision==0.12.0
4 | pytorchcv==0.0.67
5 | PyYAML==6.0
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | #
 2 | # This file is autogenerated by pip-compile with python 3.7
 3 | # To update, run:
 4 | #
 5 | #    pip-compile --output-file=requirements.txt requirements.in
 6 | #
 7 | certifi==2021.10.8
 8 |     # via requests
 9 | charset-normalizer==2.0.12
10 |     # via requests
11 | idna==3.3
12 |     # via requests
13 | numpy==1.19.5
14 |     # via
15 |     #   -r requirements.in
16 |     #   pytorchcv
17 |     #   torchvision
18 | pillow==9.1.0
19 |     # via torchvision
20 | pytorchcv==0.0.67
21 |     # via -r requirements.in
22 | pyyaml==6.0
23 |     # via -r requirements.in
24 | requests==2.27.1
25 |     # via
26 |     #   pytorchcv
27 |     #   torchvision
28 | torch==1.11.0
29 |     # via
30 |     #   -r requirements.in
31 |     #   torchvision
32 | torchvision==0.12.0
33 |     # via -r requirements.in
34 | typing-extensions==4.2.0
35 |     # via
36 |     #   torch
37 |     #   torchvision
38 | urllib3==1.26.9
39 |     # via requests
40 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | cfg=$1
 3 | bs=${2:-256}
 4 | echo "Total batch size: " $bs
 5 | if [ ! -f $cfg ]; then
 6 |     echo "Config not found!"
 7 | fi
 8 | 
 9 | python3 -W ignore fix_train.py app:$cfg bs:$bs
10 | 


--------------------------------------------------------------------------------
/weight_boxplot/mobilenetv2_pytorchcv_eff_weight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/weight_boxplot/mobilenetv2_pytorchcv_eff_weight.pdf


--------------------------------------------------------------------------------
/weight_boxplot/mobilenetv2_pytorchcv_eff_weight_wo_title.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/weight_boxplot/mobilenetv2_pytorchcv_eff_weight_wo_title.pdf


--------------------------------------------------------------------------------
/weight_boxplot/mobilenetv2_torchvision_eff_weight.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/weight_boxplot/mobilenetv2_torchvision_eff_weight.pdf


--------------------------------------------------------------------------------
/weight_boxplot/mobilenetv2_torchvision_eff_weight_wo_title.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snap-research/F8Net/9f8c3549d1f6f801e8db568411388695c486e585/weight_boxplot/mobilenetv2_torchvision_eff_weight_wo_title.pdf


--------------------------------------------------------------------------------
/weight_boxplot/weight_boxplot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib
 3 | matplotlib.use('Agg')
 4 | import matplotlib.pyplot as plt
 5 | 
 6 | import torch
 7 | import torch.nn as nn
 8 | import torchvision.models as tv_models
 9 | from pytorchcv.model_provider import get_model as ptcv_get_model
10 | 
11 | 
12 | def main():
13 |     fig_size = (6, 4)
14 |     axes_label_size = 16
15 |     text_size = 12
16 |     title_size = 16
17 |     legend_size = 8
18 |     font_weight = 'normal'
19 |     for ckpt in ['torchvision', 'pytorchcv']:
20 |         if ckpt == 'torchvision':
21 |             net = tv_models.mobilenetv2.mobilenet_v2(pretrained=True)
22 |         elif ckpt == 'pytorchcv':
23 |             net = ptcv_get_model('mobilenetv2b_w1', pretrained=True)
24 |         eff_weight_list = []
25 |         conv_weight = None
26 |         fc_weight = None
27 |         for n, m in net.named_modules():
28 |             if isinstance(m, nn.Conv2d):
29 |                 conv_weight = m.weight
30 |             elif isinstance(m, nn.BatchNorm2d):
31 |                 bn_weight = m.weight
32 |                 bn_var = m.running_var
33 |                 bn_eps = m.eps
34 |                 eff_weight = conv_weight * (
35 |                     bn_weight / torch.sqrt(bn_var + bn_eps))[:, None, None,
36 |                                                              None]
37 |                 eff_weight_list.append(
38 |                     eff_weight.detach().cpu().flatten().numpy())
39 |                 conv_weight = None
40 |             elif isinstance(m, nn.Linear):
41 |                 eff_weight = m.weight
42 |                 eff_weight_list.append(
43 |                     eff_weight.detach().cpu().flatten().numpy())
44 |                 conv_weight = None
45 |             else:
46 |                 conv_weight = None
47 |         plt.figure(figsize=fig_size)
48 |         box_edge_color = 'k'
49 |         box_bar_color = 'r'
50 |         plt.boxplot(eff_weight_list,
51 |                     showfliers=False,
52 |                     boxprops={'color': box_edge_color},
53 |                     capprops={'color': box_edge_color},
54 |                     whiskerprops={'color': box_edge_color},
55 |                     flierprops={'markeredgecolor': box_edge_color},
56 |                     medianprops={'color': box_bar_color})
57 |         plt.xticks([])
58 |         plt.xlabel('Layer', fontsize=axes_label_size)
59 |         plt.ylabel('Effective Weight', fontsize=axes_label_size)
60 |         plt.setp(plt.gca().get_xticklabels(),
61 |                  fontsize=axes_label_size,
62 |                  fontweight=font_weight)
63 |         plt.setp(plt.gca().get_yticklabels(),
64 |                  fontsize=axes_label_size,
65 |                  fontweight=font_weight)
66 |         plt.savefig(f'./mobilenetv2_{ckpt}_eff_weight_wo_title.pdf',
67 |                     dpi=300,
68 |                     bbox_inches='tight')
69 |         plt.title('Effective Weight Range (FP MobileNet V2)',
70 |                   fontsize=title_size)
71 |         plt.savefig(f'./mobilenetv2_{ckpt}_eff_weight.pdf',
72 |                     dpi=300,
73 |                     bbox_inches='tight')
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     main()
78 | 


--------------------------------------------------------------------------------