├── .gitignore
├── LICENSE
├── README.md
├── function.py
├── model.py
├── module.py
├── post_training_quantize.py
├── quantization_aware_training.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Pytorch weight
132 | *.pt
133 | 
134 | # data dir
135 | /data


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # pytorch-quantization-demo
2 | A simple network quantization demo using pytorch from scratch. This is the code for my [tutorial](https://mp.weixin.qq.com/s?__biz=Mzg4ODA3MDkyMA==&mid=2247483692&idx=1&sn=3e28db4881d591f4e6a66c83d4213823&chksm=cf81f74bf8f67e5d0f2a98fd7bf7a91864d14010d88a5ed89120b7b4fcd94fc34789f0d0db9a&token=680347690&lang=zh_CN#rd) about network quantization written in Chinese. 
3 | 
4 | 也欢迎感兴趣的读者关注我的知乎专栏：[大白话模型量化](https://zhuanlan.zhihu.com/c_1258047709686231040)


--------------------------------------------------------------------------------
/function.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Function
 2 | import torch
 3 | 
 4 | 
 5 | class FakeQuantize(Function):
 6 | 
 7 |     @staticmethod
 8 |     def forward(ctx, x, qparam):
 9 |         x = qparam.quantize_tensor(x)
10 |         x = qparam.dequantize_tensor(x)
11 |         return x
12 | 
13 |     @staticmethod
14 |     def backward(ctx, grad_output):
15 |         return grad_output, None
16 |     
17 | def interp(x: torch.Tensor, xp: torch.Tensor, fp: torch.Tensor) -> torch.Tensor:
18 |     x_ = x.reshape(x.size(0), -1)
19 |     xp = xp.unsqueeze(0)
20 |     fp = fp.unsqueeze(0)
21 |         
22 |     m = (fp[:,1:] - fp[:,:-1]) / (xp[:,1:] - xp[:,:-1])  #slope
23 |     b = fp[:, :-1] - (m.mul(xp[:, :-1]) )
24 | 
25 |     indicies = torch.sum(torch.ge(x_[:, :, None], xp[:, None, :]), -1) - 1  #torch.ge:  x[i] >= xp[i] ? true: false
26 |     indicies = torch.clamp(indicies, 0, m.shape[-1] - 1)
27 | 
28 |     line_idx = torch.linspace(0, indicies.shape[0], 1, device=indicies.device).to(torch.long)
29 |     line_idx = line_idx.expand(indicies.shape)
30 |     # idx = torch.cat([line_idx, indicies] , 0)
31 |     out = m[line_idx, indicies].mul(x_) + b[line_idx, indicies]
32 |     out = out.reshape(x.shape)
33 |     return out


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from module import *
  6 | 
  7 | 
  8 | class Net(nn.Module):
  9 | 
 10 |     def __init__(self, num_channels=1):
 11 |         super(Net, self).__init__()
 12 |         self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
 13 |         self.conv2 = nn.Conv2d(40, 40, 3, 1, groups=20)
 14 |         self.fc = nn.Linear(5*5*40, 10)
 15 | 
 16 |     def forward(self, x):
 17 |         x = F.relu(self.conv1(x))
 18 |         x = F.max_pool2d(x, 2, 2)
 19 |         x = F.relu(self.conv2(x))
 20 |         x = F.max_pool2d(x, 2, 2)
 21 |         x = x.view(-1, 5*5*40)
 22 |         x = self.fc(x)
 23 |         return x
 24 | 
 25 |     def quantize(self, num_bits=8):
 26 |         self.qconv1 = QConv2d(self.conv1, qi=True, qo=True, num_bits=num_bits)
 27 |         self.qrelu1 = QReLU()
 28 |         self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
 29 |         self.qconv2 = QConv2d(self.conv2, qi=False, qo=True, num_bits=num_bits)
 30 |         self.qrelu2 = QReLU()
 31 |         self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
 32 |         self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
 33 | 
 34 |     def quantize_forward(self, x):
 35 |         x = self.qconv1(x)
 36 |         x = self.qrelu1(x)
 37 |         x = self.qmaxpool2d_1(x)
 38 |         x = self.qconv2(x)
 39 |         x = self.qrelu2(x)
 40 |         x = self.qmaxpool2d_2(x)
 41 |         x = x.view(-1, 5*5*40)
 42 |         x = self.qfc(x)
 43 |         return x
 44 | 
 45 |     def freeze(self):
 46 |         self.qconv1.freeze()
 47 |         self.qrelu1.freeze(self.qconv1.qo)
 48 |         self.qmaxpool2d_1.freeze(self.qconv1.qo)
 49 |         self.qconv2.freeze(qi=self.qconv1.qo)
 50 |         self.qrelu2.freeze(self.qconv2.qo)
 51 |         self.qmaxpool2d_2.freeze(self.qconv2.qo)
 52 |         self.qfc.freeze(qi=self.qconv2.qo)
 53 | 
 54 |     def quantize_inference(self, x):
 55 |         qx = self.qconv1.qi.quantize_tensor(x)
 56 |         qx = self.qconv1.quantize_inference(qx)
 57 |         qx = self.qrelu1.quantize_inference(qx)
 58 |         qx = self.qmaxpool2d_1.quantize_inference(qx)
 59 |         qx = self.qconv2.quantize_inference(qx)
 60 |         qx = self.qrelu2.quantize_inference(qx)
 61 |         qx = self.qmaxpool2d_2.quantize_inference(qx)
 62 |         qx = qx.view(-1, 5*5*40)
 63 |         qx = self.qfc.quantize_inference(qx)
 64 |         out = self.qfc.qo.dequantize_tensor(qx)
 65 |         return out
 66 | 
 67 | 
 68 | class NetBN(nn.Module):
 69 | 
 70 |     def __init__(self, num_channels=1):
 71 |         super(NetBN, self).__init__()
 72 |         self.conv1 = nn.Conv2d(num_channels, 40, 3, 1)
 73 |         self.bn1 = nn.BatchNorm2d(40)
 74 |         self.conv2 = nn.Conv2d(40, 40, 3, 1)
 75 |         self.bn2 = nn.BatchNorm2d(40)
 76 |         self.fc = nn.Linear(5 * 5 * 40, 10)
 77 | 
 78 |     def forward(self, x):
 79 |         x = self.conv1(x)
 80 |         x = self.bn1(x)
 81 |         x = F.relu(x)
 82 |         x = F.max_pool2d(x, 2, 2)
 83 |         x = self.conv2(x)
 84 |         x = self.bn2(x)
 85 |         x = F.relu(x)
 86 |         x = F.max_pool2d(x, 2, 2)
 87 |         x = x.view(-1, 5 * 5 * 40)
 88 |         x = self.fc(x)
 89 |         return x
 90 | 
 91 |     def quantize(self, num_bits=8):
 92 |         self.qconv1 = QConvBNReLU(self.conv1, self.bn1, qi=True, qo=True, num_bits=num_bits)
 93 |         self.qmaxpool2d_1 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
 94 |         self.qconv2 = QConvBNReLU(self.conv2, self.bn2, qi=False, qo=True, num_bits=num_bits)
 95 |         self.qmaxpool2d_2 = QMaxPooling2d(kernel_size=2, stride=2, padding=0)
 96 |         self.qfc = QLinear(self.fc, qi=False, qo=True, num_bits=num_bits)
 97 | 
 98 |     def quantize_forward(self, x):
 99 |         x = self.qconv1(x)
100 |         x = self.qmaxpool2d_1(x)
101 |         x = self.qconv2(x)
102 |         x = self.qmaxpool2d_2(x)
103 |         x = x.view(-1, 5*5*40)
104 |         x = self.qfc(x)
105 |         return x
106 | 
107 |     def freeze(self):
108 |         self.qconv1.freeze()
109 |         self.qmaxpool2d_1.freeze(self.qconv1.qo)
110 |         self.qconv2.freeze(qi=self.qconv1.qo)
111 |         self.qmaxpool2d_2.freeze(self.qconv2.qo)
112 |         self.qfc.freeze(qi=self.qconv2.qo)
113 | 
114 |     def quantize_inference(self, x):
115 |         qx = self.qconv1.qi.quantize_tensor(x)
116 |         qx = self.qconv1.quantize_inference(qx)
117 |         qx = self.qmaxpool2d_1.quantize_inference(qx)
118 |         qx = self.qconv2.quantize_inference(qx)
119 |         qx = self.qmaxpool2d_2.quantize_inference(qx)
120 |         qx = qx.view(-1, 5*5*40)
121 | 
122 |         qx = self.qfc.quantize_inference(qx)
123 |         
124 |         out = self.qfc.qo.dequantize_tensor(qx)
125 |         return out
126 | 


--------------------------------------------------------------------------------
/module.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | 
  9 | from function import FakeQuantize, interp
 10 | 
 11 | 
 12 | def calcScaleZeroPoint(min_val, max_val, num_bits=8):
 13 |     qmin = 0.
 14 |     qmax = 2. ** num_bits - 1.
 15 |     scale = (max_val - min_val) / (qmax - qmin)
 16 | 
 17 |     zero_point = qmax - max_val / scale
 18 | 
 19 |     if zero_point < qmin:
 20 |         zero_point = torch.tensor([qmin], dtype=torch.float32).to(min_val.device)
 21 |     elif zero_point > qmax:
 22 |         # zero_point = qmax
 23 |         zero_point = torch.tensor([qmax], dtype=torch.float32).to(max_val.device)
 24 |     
 25 |     zero_point.round_()
 26 | 
 27 |     return scale, zero_point
 28 | 
 29 | def quantize_tensor(x, scale, zero_point, num_bits=8, signed=False):
 30 |     if signed:
 31 |         qmin = - 2. ** (num_bits - 1)
 32 |         qmax = 2. ** (num_bits - 1) - 1
 33 |     else:
 34 |         qmin = 0.
 35 |         qmax = 2. ** num_bits - 1.
 36 |  
 37 |     q_x = zero_point + x / scale
 38 |     q_x.clamp_(qmin, qmax).round_()
 39 |     
 40 |     return q_x
 41 |  
 42 | def dequantize_tensor(q_x, scale, zero_point):
 43 |     return scale * (q_x - zero_point)
 44 | 
 45 | 
 46 | def search(M):
 47 |     P = 7000
 48 |     n = 1
 49 |     while True:
 50 |         Mo = int(round(2 ** n * M))
 51 |         # Mo 
 52 |         approx_result = Mo * P >> n
 53 |         result = int(round(M * P))
 54 |         error = approx_result - result
 55 | 
 56 |         print("n=%d, Mo=%f, approx=%d, result=%d, error=%f" % \
 57 |             (n, Mo, approx_result, result, error))
 58 | 
 59 |         if math.fabs(error) < 1e-9 or n >= 22:
 60 |             return Mo, n
 61 |         n += 1
 62 | 
 63 | 
 64 | class QParam(nn.Module):
 65 | 
 66 |     def __init__(self, num_bits=8):
 67 |         super(QParam, self).__init__()
 68 |         self.num_bits = num_bits
 69 |         scale = torch.tensor([], requires_grad=False)
 70 |         zero_point = torch.tensor([], requires_grad=False)
 71 |         min = torch.tensor([], requires_grad=False)
 72 |         max = torch.tensor([], requires_grad=False)
 73 |         self.register_buffer('scale', scale)
 74 |         self.register_buffer('zero_point', zero_point)
 75 |         self.register_buffer('min', min)
 76 |         self.register_buffer('max', max)
 77 | 
 78 |     def update(self, tensor):
 79 |         if self.max.nelement() == 0 or self.max.data < tensor.max().data:
 80 |             self.max.data = tensor.max().data
 81 |         self.max.clamp_(min=0)
 82 |         
 83 |         if self.min.nelement() == 0 or self.min.data > tensor.min().data:
 84 |             self.min.data = tensor.min().data
 85 |         self.min.clamp_(max=0)
 86 |         
 87 |         self.scale, self.zero_point = calcScaleZeroPoint(self.min, self.max, self.num_bits)
 88 |     
 89 |     def quantize_tensor(self, tensor):
 90 |         return quantize_tensor(tensor, self.scale, self.zero_point, num_bits=self.num_bits)
 91 | 
 92 |     def dequantize_tensor(self, q_x):
 93 |         return dequantize_tensor(q_x, self.scale, self.zero_point)
 94 | 
 95 |     def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
 96 |         key_names = ['scale', 'zero_point', 'min', 'max']
 97 |         for key in key_names:
 98 |             value = getattr(self, key)
 99 |             value.data = state_dict[prefix + key].data
100 |             state_dict.pop(prefix + key)
101 | 
102 |     def __str__(self):
103 |         info = 'scale: %.10f ' % self.scale
104 |         info += 'zp: %d ' % self.zero_point
105 |         info += 'min: %.6f ' % self.min
106 |         info += 'max: %.6f' % self.max
107 |         return info
108 | 
109 | 
110 | 
111 | class QModule(nn.Module):
112 | 
113 |     def __init__(self, qi=True, qo=True, num_bits=8):
114 |         super(QModule, self).__init__()
115 |         if qi:
116 |             self.qi = QParam(num_bits=num_bits)
117 |         if qo:
118 |             self.qo = QParam(num_bits=num_bits)
119 | 
120 |     def freeze(self):
121 |         pass
122 | 
123 |     def quantize_inference(self, x):
124 |         raise NotImplementedError('quantize_inference should be implemented.')
125 | 
126 | 
127 | class QConv2d(QModule):
128 | 
129 |     def __init__(self, conv_module, qi=True, qo=True, num_bits=8):
130 |         super(QConv2d, self).__init__(qi=qi, qo=qo, num_bits=num_bits)
131 |         self.num_bits = num_bits
132 |         self.conv_module = conv_module
133 |         self.qw = QParam(num_bits=num_bits)
134 |         self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
135 | 
136 |     def freeze(self, qi=None, qo=None):
137 |         
138 |         if hasattr(self, 'qi') and qi is not None:
139 |             raise ValueError('qi has been provided in init function.')
140 |         if not hasattr(self, 'qi') and qi is None:
141 |             raise ValueError('qi is not existed, should be provided.')
142 | 
143 |         if hasattr(self, 'qo') and qo is not None:
144 |             raise ValueError('qo has been provided in init function.')
145 |         if not hasattr(self, 'qo') and qo is None:
146 |             raise ValueError('qo is not existed, should be provided.')
147 | 
148 |         if qi is not None:
149 |             self.qi = qi
150 |         if qo is not None:
151 |             self.qo = qo
152 |         self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
153 | 
154 |         self.conv_module.weight.data = self.qw.quantize_tensor(self.conv_module.weight.data)
155 |         self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
156 | 
157 |         self.conv_module.bias.data = quantize_tensor(self.conv_module.bias.data, scale=self.qi.scale * self.qw.scale,
158 |                                                      zero_point=0, num_bits=32, signed=True)
159 | 
160 |     def forward(self, x):
161 |         if hasattr(self, 'qi'):
162 |             self.qi.update(x)
163 |             x = FakeQuantize.apply(x, self.qi)
164 | 
165 |         self.qw.update(self.conv_module.weight.data)
166 | 
167 |         x = F.conv2d(x, FakeQuantize.apply(self.conv_module.weight, self.qw), self.conv_module.bias, 
168 |                      stride=self.conv_module.stride,
169 |                      padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
170 |                      groups=self.conv_module.groups)
171 | 
172 |         if hasattr(self, 'qo'):
173 |             self.qo.update(x)
174 |             x = FakeQuantize.apply(x, self.qo)
175 | 
176 |         return x
177 | 
178 |     def quantize_inference(self, x):
179 |         x = x - self.qi.zero_point
180 |         x = self.conv_module(x)
181 |         x = self.M * x
182 |         x.round_() 
183 |         x = x + self.qo.zero_point        
184 |         x.clamp_(0., 2.**self.num_bits-1.).round_()
185 |         return x
186 | 
187 | 
188 | class QLinear(QModule):
189 | 
190 |     def __init__(self, fc_module, qi=True, qo=True, num_bits=8):
191 |         super(QLinear, self).__init__(qi=qi, qo=qo, num_bits=num_bits)
192 |         self.num_bits = num_bits
193 |         self.fc_module = fc_module
194 |         self.qw = QParam(num_bits=num_bits)
195 |         self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
196 | 
197 |     def freeze(self, qi=None, qo=None):
198 | 
199 |         if hasattr(self, 'qi') and qi is not None:
200 |             raise ValueError('qi has been provided in init function.')
201 |         if not hasattr(self, 'qi') and qi is None:
202 |             raise ValueError('qi is not existed, should be provided.')
203 | 
204 |         if hasattr(self, 'qo') and qo is not None:
205 |             raise ValueError('qo has been provided in init function.')
206 |         if not hasattr(self, 'qo') and qo is None:
207 |             raise ValueError('qo is not existed, should be provided.')
208 | 
209 |         if qi is not None:
210 |             self.qi = qi
211 |         if qo is not None:
212 |             self.qo = qo
213 |         self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
214 | 
215 |         self.fc_module.weight.data = self.qw.quantize_tensor(self.fc_module.weight.data)
216 |         self.fc_module.weight.data = self.fc_module.weight.data - self.qw.zero_point
217 |         self.fc_module.bias.data = quantize_tensor(self.fc_module.bias.data, scale=self.qi.scale * self.qw.scale,
218 |                                                    zero_point=0, num_bits=32, signed=True)
219 | 
220 |     def forward(self, x):
221 |         if hasattr(self, 'qi'):
222 |             self.qi.update(x)
223 |             x = FakeQuantize.apply(x, self.qi)
224 | 
225 |         self.qw.update(self.fc_module.weight.data)
226 | 
227 |         x = F.linear(x, FakeQuantize.apply(self.fc_module.weight, self.qw), self.fc_module.bias)
228 | 
229 |         if hasattr(self, 'qo'):
230 |             self.qo.update(x)
231 |             x = FakeQuantize.apply(x, self.qo)
232 | 
233 |         return x
234 | 
235 |     def quantize_inference(self, x):
236 |         x = x - self.qi.zero_point
237 |         x = self.fc_module(x)
238 |         x = self.M * x
239 |         x.round_() 
240 |         x = x + self.qo.zero_point
241 |         x.clamp_(0., 2.**self.num_bits-1.).round_()
242 |         return x
243 | 
244 | 
245 | class QReLU(QModule):
246 | 
247 |     def __init__(self, qi=False, num_bits=None):
248 |         super(QReLU, self).__init__(qi=qi, num_bits=num_bits)
249 | 
250 |     def freeze(self, qi=None):
251 |         
252 |         if hasattr(self, 'qi') and qi is not None:
253 |             raise ValueError('qi has been provided in init function.')
254 |         if not hasattr(self, 'qi') and qi is None:
255 |             raise ValueError('qi is not existed, should be provided.')
256 | 
257 |         if qi is not None:
258 |             self.qi = qi
259 | 
260 |     def forward(self, x):
261 |         if hasattr(self, 'qi'):
262 |             self.qi.update(x)
263 |             x = FakeQuantize.apply(x, self.qi)
264 | 
265 |         x = F.relu(x)
266 | 
267 |         return x
268 |     
269 |     def quantize_inference(self, x):
270 |         x = x.clone()
271 |         x[x < self.qi.zero_point] = self.qi.zero_point
272 |         return x
273 | 
274 | class QMaxPooling2d(QModule):
275 | 
276 |     def __init__(self, kernel_size=3, stride=1, padding=0, qi=False, num_bits=None):
277 |         super(QMaxPooling2d, self).__init__(qi=qi, num_bits=num_bits)
278 |         self.kernel_size = kernel_size
279 |         self.stride = stride
280 |         self.padding = padding
281 | 
282 |     def freeze(self, qi=None):
283 |         if hasattr(self, 'qi') and qi is not None:
284 |             raise ValueError('qi has been provided in init function.')
285 |         if not hasattr(self, 'qi') and qi is None:
286 |             raise ValueError('qi is not existed, should be provided.')
287 |         if qi is not None:
288 |             self.qi = qi
289 | 
290 |     def forward(self, x):
291 |         if hasattr(self, 'qi'):
292 |             self.qi.update(x)
293 |             x = FakeQuantize.apply(x, self.qi)
294 | 
295 |         x = F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
296 | 
297 |         return x
298 | 
299 |     def quantize_inference(self, x):
300 |         return F.max_pool2d(x, self.kernel_size, self.stride, self.padding)
301 | 
302 | 
303 | class QConvBNReLU(QModule):
304 | 
305 |     def __init__(self, conv_module, bn_module, qi=True, qo=True, num_bits=8):
306 |         super(QConvBNReLU, self).__init__(qi=qi, qo=qo, num_bits=num_bits)
307 |         self.num_bits = num_bits
308 |         self.conv_module = conv_module
309 |         self.bn_module = bn_module
310 |         self.qw = QParam(num_bits=num_bits)
311 |         self.qb = QParam(num_bits=32)
312 |         self.register_buffer('M', torch.tensor([], requires_grad=False))  # 将M注册为buffer
313 | 
314 |     def fold_bn(self, mean, std):
315 |         if self.bn_module.affine:
316 |             gamma_ = self.bn_module.weight / std
317 |             weight = self.conv_module.weight * gamma_.view(self.conv_module.out_channels, 1, 1, 1)
318 |             if self.conv_module.bias is not None:
319 |                 bias = gamma_ * self.conv_module.bias - gamma_ * mean + self.bn_module.bias
320 |             else:
321 |                 bias = self.bn_module.bias - gamma_ * mean
322 |         else:
323 |             gamma_ = 1 / std
324 |             weight = self.conv_module.weight * gamma_
325 |             if self.conv_module.bias is not None:
326 |                 bias = gamma_ * self.conv_module.bias - gamma_ * mean
327 |             else:
328 |                 bias = -gamma_ * mean
329 |             
330 |         return weight, bias
331 | 
332 | 
333 |     def forward(self, x):
334 | 
335 |         if hasattr(self, 'qi'):
336 |             self.qi.update(x)
337 |             x = FakeQuantize.apply(x, self.qi)
338 | 
339 |         if self.training:
340 |             y = F.conv2d(x, self.conv_module.weight, self.conv_module.bias, 
341 |                             stride=self.conv_module.stride,
342 |                             padding=self.conv_module.padding,
343 |                             dilation=self.conv_module.dilation,
344 |                             groups=self.conv_module.groups)
345 |             y = y.permute(1, 0, 2, 3) # NCHW -> CNHW
346 |             y = y.contiguous().view(self.conv_module.out_channels, -1) # CNHW -> C,NHW
347 |             # mean = y.mean(1)
348 |             # var = y.var(1)
349 |             mean = y.mean(1).detach()
350 |             var = y.var(1).detach()
351 |             self.bn_module.running_mean = \
352 |                 (1 - self.bn_module.momentum) * self.bn_module.running_mean + \
353 |                 self.bn_module.momentum * mean
354 |             self.bn_module.running_var = \
355 |                 (1 - self.bn_module.momentum) * self.bn_module.running_var + \
356 |                 self.bn_module.momentum * var
357 |         else:
358 |             mean = Variable(self.bn_module.running_mean)
359 |             var = Variable(self.bn_module.running_var)
360 | 
361 |         std = torch.sqrt(var + self.bn_module.eps)
362 | 
363 |         weight, bias = self.fold_bn(mean, std)
364 | 
365 |         self.qw.update(weight.data)
366 | 
367 |         x = F.conv2d(x, FakeQuantize.apply(weight, self.qw), bias, 
368 |                 stride=self.conv_module.stride,
369 |                 padding=self.conv_module.padding, dilation=self.conv_module.dilation, 
370 |                 groups=self.conv_module.groups)
371 | 
372 |         x = F.relu(x)
373 | 
374 |         if hasattr(self, 'qo'):
375 |             self.qo.update(x)
376 |             x = FakeQuantize.apply(x, self.qo)
377 | 
378 |         return x
379 | 
380 |     def freeze(self, qi=None, qo=None):
381 |         if hasattr(self, 'qi') and qi is not None:
382 |             raise ValueError('qi has been provided in init function.')
383 |         if not hasattr(self, 'qi') and qi is None:
384 |             raise ValueError('qi is not existed, should be provided.')
385 | 
386 |         if hasattr(self, 'qo') and qo is not None:
387 |             raise ValueError('qo has been provided in init function.')
388 |         if not hasattr(self, 'qo') and qo is None:
389 |             raise ValueError('qo is not existed, should be provided.')
390 | 
391 |         if qi is not None:
392 |             self.qi = qi
393 |         if qo is not None:
394 |             self.qo = qo
395 |         self.M.data = (self.qw.scale * self.qi.scale / self.qo.scale).data
396 | 
397 |         std = torch.sqrt(self.bn_module.running_var + self.bn_module.eps)
398 | 
399 |         weight, bias = self.fold_bn(self.bn_module.running_mean, std)
400 |         self.conv_module.weight.data = self.qw.quantize_tensor(weight.data)
401 |         self.conv_module.weight.data = self.conv_module.weight.data - self.qw.zero_point
402 | 
403 |         self.conv_module.bias.data = quantize_tensor(bias, scale=self.qi.scale * self.qw.scale,
404 |                                                      zero_point=0, num_bits=32, signed=True)
405 | 
406 |     def quantize_inference(self, x):
407 |         x = x - self.qi.zero_point
408 |         x = self.conv_module(x)
409 |         x = self.M * x
410 |         x.round_() 
411 |         x = x + self.qo.zero_point        
412 |         x.clamp_(0., 2.**self.num_bits-1.).round_()
413 |         return x
414 |         
415 | 
416 | class QSigmoid(QModule):
417 | 
418 |     def __init__(self, qi=True, qo=True, num_bits=8, lut_size=64):
419 |         super(QSigmoid, self).__init__(qi=qi, qo=qo, num_bits=num_bits)
420 |         self.num_bits = num_bits
421 |         self.lut_size = lut_size
422 |     
423 |     def forward(self, x):
424 |         if hasattr(self, 'qi'):
425 |             self.qi.update(x)
426 |             x = FakeQuantize.apply(x, self.qi)
427 | 
428 |         x = torch.sigmoid(x)
429 | 
430 |         if hasattr(self, 'qo'):
431 |             self.qo.update(x)
432 |             x = FakeQuantize.apply(x, self.qo)
433 | 
434 |         return x
435 |     
436 |     def freeze(self, qi=None, qo=None):
437 |         if hasattr(self, 'qi') and qi is not None:
438 |             raise ValueError('qi has been provided in init function.')
439 |         if not hasattr(self, 'qi') and qi is None:
440 |             raise ValueError('qi is not existed, should be provided.')
441 | 
442 |         if hasattr(self, 'qo') and qo is not None:
443 |             raise ValueError('qo has been provided in init function.')
444 |         if not hasattr(self, 'qo') and qo is None:
445 |             raise ValueError('qo is not existed, should be provided.')
446 | 
447 |         if qi is not None:
448 |             self.qi = qi
449 |         if qo is not None:
450 |             self.qo = qo
451 | 
452 |         lut_qx = torch.tensor(np.linspace(0, 2 ** self.num_bits - 1, self.lut_size), dtype=torch.uint8)
453 |         lut_x = self.qi.dequantize_tensor(lut_qx)
454 |         lut_y = torch.sigmoid(lut_x)
455 |         lut_qy = self.qo.quantize_tensor(lut_y)
456 | 
457 |         self.register_buffer('lut_qy', lut_qy)
458 |         self.register_buffer('lut_qx', lut_qx)
459 | 
460 | 
461 |     def quantize_inference(self, x):
462 |         y = interp(x, self.lut_qx, self.lut_qy)
463 |         y = y.round_().clamp_(0., 2.**self.num_bits-1.)
464 |         return y


--------------------------------------------------------------------------------
/post_training_quantize.py:
--------------------------------------------------------------------------------
  1 | from torch.serialization import load
  2 | from model import *
  3 | 
  4 | import argparse
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.optim as optim
  8 | from torchvision import datasets, transforms
  9 | import os
 10 | import os.path as osp
 11 | 
 12 | 
 13 | def direct_quantize(model, test_loader):
 14 |     for i, (data, target) in enumerate(test_loader, 1):
 15 |         output = model.quantize_forward(data)
 16 |         if i % 500 == 0:
 17 |             break
 18 |     print('direct quantization finish')
 19 | 
 20 | 
 21 | def full_inference(model, test_loader):
 22 |     correct = 0
 23 |     for i, (data, target) in enumerate(test_loader, 1):
 24 |         output = model(data)
 25 |         pred = output.argmax(dim=1, keepdim=True)
 26 |         correct += pred.eq(target.view_as(pred)).sum().item()
 27 |     print('\nTest set: Full Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset)))
 28 | 
 29 | 
 30 | def quantize_inference(model, test_loader):
 31 |     correct = 0
 32 |     for i, (data, target) in enumerate(test_loader, 1):
 33 |         output = model.quantize_inference(data)
 34 |         pred = output.argmax(dim=1, keepdim=True)
 35 |         correct += pred.eq(target.view_as(pred)).sum().item()
 36 |     print('\nTest set: Quant Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset)))
 37 | 
 38 | 
 39 | if __name__ == "__main__":
 40 |     batch_size = 64
 41 |     using_bn = True
 42 |     load_quant_model_file = None
 43 |     # load_model_file = None
 44 | 
 45 |     train_loader = torch.utils.data.DataLoader(
 46 |         datasets.MNIST('data', train=True, download=True, 
 47 |                        transform=transforms.Compose([
 48 |                             transforms.ToTensor(),
 49 |                             transforms.Normalize((0.1307,), (0.3081,))
 50 |                        ])),
 51 |         batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
 52 |     )
 53 | 
 54 |     test_loader = torch.utils.data.DataLoader(
 55 |         datasets.MNIST('data', train=False, transform=transforms.Compose([
 56 |             transforms.ToTensor(),
 57 |             transforms.Normalize((0.1307,), (0.3081,))
 58 |         ])),
 59 |         batch_size=batch_size, shuffle=False, num_workers=1, pin_memory=True
 60 |     )
 61 | 
 62 |     if using_bn:
 63 |         model = NetBN()
 64 |         model.load_state_dict(torch.load('ckpt/mnist_cnnbn.pt', map_location='cpu'))
 65 |         save_file = "ckpt/mnist_cnnbn_ptq.pt"
 66 |     else:
 67 |         model = Net()
 68 |         model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu'))
 69 |         save_file = "ckpt/mnist_cnn_ptq.pt"
 70 | 
 71 |     model.eval()
 72 |     full_inference(model, test_loader)
 73 | 
 74 |     num_bits = 8
 75 |     model.quantize(num_bits=num_bits)
 76 |     model.eval()
 77 |     print('Quantization bit: %d' % num_bits)
 78 | 
 79 | 
 80 |     if load_quant_model_file is not None:
 81 |         model.load_state_dict(torch.load(load_quant_model_file))
 82 |         print("Successfully load quantized model %s" % load_quant_model_file)
 83 |     
 84 | 
 85 |     direct_quantize(model, train_loader)
 86 | 
 87 |     torch.save(model.state_dict(), save_file)
 88 |     model.freeze()
 89 | 
 90 |     # 测试是否设备转移是否正确
 91 |     # model.cuda()
 92 |     # print(model.qconv1.M.device)
 93 |     # model.cpu()
 94 |     # print(model.qconv1.M.device)
 95 | 
 96 |     quantize_inference(model, test_loader)
 97 | 
 98 |     
 99 | 
100 | 
101 | 
102 |     
103 | 


--------------------------------------------------------------------------------
/quantization_aware_training.py:
--------------------------------------------------------------------------------
  1 | from model import *
  2 | 
  3 | import argparse
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | from torchvision import datasets, transforms
  8 | import os
  9 | import os.path as osp
 10 | 
 11 | 
 12 | def quantize_aware_training(model, device, train_loader, optimizer, epoch):
 13 |     lossLayer = torch.nn.CrossEntropyLoss()
 14 |     for batch_idx, (data, target) in enumerate(train_loader, 1):
 15 |         data, target = data.to(device), target.to(device)
 16 |         optimizer.zero_grad()
 17 |         output = model.quantize_forward(data)
 18 |         loss = lossLayer(output, target)
 19 |         loss.backward()
 20 |         optimizer.step()
 21 | 
 22 |         if batch_idx % 50 == 0:
 23 |             print('Quantize Aware Training Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
 24 |                 epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
 25 |             ))
 26 | 
 27 | 
 28 | def full_inference(model, test_loader):
 29 |     correct = 0
 30 |     for i, (data, target) in enumerate(test_loader, 1):
 31 |         data, target = data.to(device), target.to(device)
 32 |         output = model(data)
 33 |         pred = output.argmax(dim=1, keepdim=True)
 34 |         correct += pred.eq(target.view_as(pred)).sum().item()
 35 |     print('\nTest set: Full Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset)))
 36 | 
 37 | 
 38 | def quantize_inference(model, test_loader):
 39 |     correct = 0
 40 |     for i, (data, target) in enumerate(test_loader, 1):
 41 |         data, target = data.to(device), target.to(device)
 42 |         output = model.quantize_inference(data)
 43 |         pred = output.argmax(dim=1, keepdim=True)
 44 |         correct += pred.eq(target.view_as(pred)).sum().item()
 45 |     print('\nTest set: Quant Model Accuracy: {:.0f}%\n'.format(100. * correct / len(test_loader.dataset)))
 46 | 
 47 | 
 48 | if __name__ == "__main__":
 49 |     batch_size = 64
 50 |     seed = 1
 51 |     epochs = 3
 52 |     lr = 0.01
 53 |     momentum = 0.5
 54 |     using_bn = True
 55 |     load_quant_model_file = None
 56 | #     load_quant_model_file = "ckpt/mnist_cnnbn_qat.pt"
 57 | 
 58 |     torch.manual_seed(seed)
 59 | 
 60 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 61 | 
 62 |     train_loader = torch.utils.data.DataLoader(
 63 |         datasets.MNIST('data', train=True, download=True, 
 64 |                        transform=transforms.Compose([
 65 |                             transforms.ToTensor(),
 66 |                             transforms.Normalize((0.1307,), (0.3081,))
 67 |                        ])),
 68 |         batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
 69 |     )
 70 | 
 71 |     test_loader = torch.utils.data.DataLoader(
 72 |         datasets.MNIST('data', train=False, transform=transforms.Compose([
 73 |             transforms.ToTensor(),
 74 |             transforms.Normalize((0.1307,), (0.3081,))
 75 |         ])),
 76 |         batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=False
 77 |     )
 78 | 
 79 |     if using_bn:
 80 |         model = NetBN()
 81 |         model.load_state_dict(torch.load('ckpt/mnist_cnnbn.pt', map_location='cpu'))
 82 |         save_file = "ckpt/mnist_cnnbn_qat.pt"
 83 |     else:
 84 |         model = Net()
 85 |         model.load_state_dict(torch.load('ckpt/mnist_cnn.pt', map_location='cpu'))
 86 |         save_file = "ckpt/mnist_cnn_qat.pt"
 87 |     model.to(device)
 88 | 
 89 |     optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
 90 | 
 91 |     model.eval()
 92 |     
 93 |     full_inference(model, test_loader)
 94 | 
 95 |     num_bits = 8
 96 |     model.quantize(num_bits=num_bits)
 97 |     print('Quantization bit: %d' % num_bits)
 98 | 
 99 |     if load_quant_model_file is not None:
100 |         model.load_state_dict(torch.load(load_quant_model_file))
101 |         print("Successfully load quantized model %s" % load_quant_model_file)
102 | 
103 |     model.train()
104 | 
105 |     for epoch in range(1, epochs + 1):
106 |         quantize_aware_training(model, device, train_loader, optimizer, epoch)
107 | 
108 |     model.eval()
109 |     torch.save(model.state_dict(), save_file)
110 | 
111 |     model.freeze()
112 | 
113 |     quantize_inference(model, test_loader)
114 | 
115 |     
116 | 
117 | 
118 | 
119 |     
120 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | from model import *
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.optim as optim
 6 | from torchvision import datasets, transforms
 7 | import os
 8 | import os.path as osp
 9 | 
10 | 
11 | def train(model, device, train_loader, optimizer, epoch):
12 |     model.train()
13 |     lossLayer = torch.nn.CrossEntropyLoss()
14 |     for batch_idx, (data, target) in enumerate(train_loader):
15 |         data, target = data.to(device), target.to(device)
16 |         optimizer.zero_grad()
17 |         output = model(data)
18 |         loss = lossLayer(output, target)
19 |         loss.backward()
20 |         optimizer.step()
21 | 
22 |         if batch_idx % 50 == 0:
23 |             print('Train Epoch: {} [{}/{}]\tLoss: {:.6f}'.format(
24 |                 epoch, batch_idx * len(data), len(train_loader.dataset), loss.item()
25 |             ))
26 | 
27 | def test(model, device, test_loader):
28 |     model.eval()
29 |     test_loss = 0
30 |     correct = 0
31 |     lossLayer = torch.nn.CrossEntropyLoss(reduction='sum')
32 |     for data, target in test_loader:
33 |         data, target = data.to(device), target.to(device)
34 |         output = model(data)
35 |         test_loss += lossLayer(output, target).item()
36 |         pred = output.argmax(dim=1, keepdim=True)
37 |         correct += pred.eq(target.view_as(pred)).sum().item()
38 |     
39 |     test_loss /= len(test_loader.dataset)
40 | 
41 |     print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}%\n'.format(
42 |         test_loss, 100. * correct / len(test_loader.dataset)
43 |     ))
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     batch_size = 64
48 |     test_batch_size = 64
49 |     seed = 1
50 |     epochs = 15
51 |     lr = 0.01
52 |     momentum = 0.5
53 |     save_model = True
54 |     using_bn = True
55 | 
56 |     torch.manual_seed(seed)
57 | 
58 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
59 | 
60 |     train_loader = torch.utils.data.DataLoader(
61 |         datasets.MNIST('data', train=True, download=True, 
62 |                        transform=transforms.Compose([
63 |                             transforms.ToTensor(),
64 |                             transforms.Normalize((0.1307,), (0.3081,))
65 |                        ])),
66 |         batch_size=batch_size, shuffle=True, num_workers=1, pin_memory=True
67 |     )
68 | 
69 |     test_loader = torch.utils.data.DataLoader(
70 |         datasets.MNIST('data', train=False, transform=transforms.Compose([
71 |             transforms.ToTensor(),
72 |             transforms.Normalize((0.1307,), (0.3081,))
73 |         ])),
74 |         batch_size=test_batch_size, shuffle=True, num_workers=1, pin_memory=True
75 |     )
76 | 
77 |     if using_bn:
78 |         model = NetBN().to(device)
79 |     else:
80 |         model = Net().to(device)
81 | 
82 |     optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)
83 | 
84 |     for epoch in range(1, epochs + 1):
85 |         train(model, device, train_loader, optimizer, epoch)
86 |         test(model, device, test_loader)
87 |     
88 |     if save_model:
89 |         if not osp.exists('ckpt'):
90 |             os.makedirs('ckpt')
91 |         if using_bn:
92 |             torch.save(model.state_dict(), 'ckpt/mnist_cnnbn.pt')
93 |         else:
94 |             torch.save(model.state_dict(), 'ckpt/mnist_cnn.pt')


--------------------------------------------------------------------------------