├── .idea
    ├── misc.xml
    ├── modules.xml
    ├── quantizednn.iml
    └── vcs.xml
├── README.md
├── config
├── data
    ├── WandA_lr0.01_scalar2.5.png
    └── smurf.jpg
├── graffiti
    ├── QConv2D.py
    ├── README.md
    ├── auto_grad.py
    ├── caffe_guidenet_weight_init.py
    ├── diffierent_gpu_grad_backward.py
    ├── float32touint8.py
    ├── get_module_weight.py
    ├── inference_on_image.py
    ├── merge_conv_bn.py
    ├── nowgood.py
    ├── quantize_test.py
    ├── register_forward_hook.py
    ├── stat_parameters.py
    └── weight_distribute.py
├── main.py
├── net
    ├── net_bn_conv_merge.py
    ├── net_bn_conv_merge_quantize.py
    ├── net_quantize_activation.py
    ├── net_quantize_guide.py
    ├── net_quantize_weight.py
    └── simple_net.py
├── quantize
    ├── guided_distance_view.py
    ├── quantize_guided.py
    ├── quantize_method.py
    ├── quantize_module_.py
    └── quantize_old_plan.py
├── requirements.txt
└── utils
    ├── data_loader.py
    ├── meter.py
    ├── train_val.py
    ├── unzip.sh
    └── valprep.sh


/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.5 (pytorch)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/quantizednn.iml" filepath="$PROJECT_DIR$/.idea/quantizednn.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/quantizednn.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.5 (pytorch)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="projectConfiguration" value="py.test" />
10 |     <option name="PROJECT_TEST_RUNNER" value="py.test" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Quantize CNN Model using PyTorch(python3.5)
  2 |  
  3 | Implement [Towards Effective Low-bitwidth Convolutional Neural Networks](https://arxiv.org/abs/1711.00205)
  4 | 
  5 | ```
  6 | @InProceedings{Zhuang_2018_CVPR,
  7 | author = {Zhuang, Bohan and Shen, Chunhua and Tan, Mingkui and Liu, Lingqiao and Reid, Ian},
  8 | title = {Towards Effective Low-Bitwidth Convolutional Neural Networks},
  9 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
 10 | month = {June},
 11 | year = {2018}
 12 | }
 13 | ```
 14 | 
 15 | ### 下载和配置
 16 | 
 17 | ```bash
 18 | git clone https://github.com/nowgood/QuantizeCNNModel.git && cd QuantizeCNNModel
 19 | pip install -r requirements.txt
 20 | echo export PYTHONPATH=$PYTHONPATH:`pwd` >> ~/.bashrc
 21 | source  ~/.bashrc
 22 | ```
 23 | 
 24 | ### 使用方法
 25 | 
 26 | 使用如下命令查看函数使用方法 
 27 | 
 28 | ```
 29 | python main.py -h 
 30 | ```
 31 | 
 32 | 
 33 | 
 34 | 然后使用 tensorboard 查看训练过程
 35 | 
 36 | ```
 37 | # QuantizeCNNModel 目录下
 38 | tensorboard --logdir model/xxx/ 
 39 | ```
 40 | 然后就可以在 `http:localhost:6006` 查看训练的损失值和精确度， 以及每个epoch的在验证集上的精确度
 41 | 
 42 | ![top5](https://github.com/nowgood/QuantizeCNNModel/raw/master/data/WandA_lr0.01_scalar2.5.png)
 43 | 
 44 | ### 训练方法
 45 | 
 46 | 训练模式选择:
 47 | 
 48 |        0: full precision training from scratch
 49 |        1: only quantize weight
 50 |        2. quantize activation using quantized weight to init model
 51 |        3. joint quantize weight and activation from pre-trained imageNet model
 52 |        4. guided quantize weight and activation from pre-trained imageNet model
 53 | 
 54 | 
 55 | ### 量化权重
 56 | 
 57 | 单机多卡训练, 如： 使用 8 个GPU的后 4 个GPU来训练25个epoch
 58 | 
 59 | ```
 60 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \
 61 |     --mode 1 \
 62 |     --workers 16 \
 63 |     --epochs 5 \
 64 |     --batch-size 1024\
 65 |     --device-ids 0 1 2 3 \
 66 |     --lr 0.0001 \
 67 |     --lr-step 2 \
 68 |     --save-dir model/W_lr1e-4_epoch5 \
 69 |     --data /home/user/wangbin/datasets/ILSVRC2012  \
 70 |     |tee  model/W_lr_1e-4_epoch5.log 2>&1
 71 | ``` 
 72 | 
 73 | ```
 74 | 
 75 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \
 76 |     --mode 1 \
 77 |     --workers 16 \
 78 |     --epochs 10 \
 79 |     --batch-size 1024\
 80 |     --device-ids 0 1 2 3 \
 81 |     --lr 0.0001 \
 82 |     --lr-step 4 \
 83 |     --save-dir model/W_lr1e-4_epoch10 \
 84 |     --data /home/user/wangbin/datasets/ILSVRC2012  \
 85 |     |tee  model/W_lr_1e-4_epoch10.log
 86 | ```   
 87 | 
 88 | ### 使用量化权重的参数来初始化量化激活的网络
 89 | 
 90 | ```bash
 91 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \
 92 |     --mode 2 \
 93 |     --workers 16 \
 94 |     --epochs 35 \
 95 |     --batch-size 1024\
 96 |     --device-ids 0 1 2 3 \
 97 |     --lr 0.001 \
 98 |     --weight-quantized model/W_lr1e-4_epoch2/model_best.pth.tar \
 99 |     --save-dir model/AafterW_lr1e-2_epoch35 \
100 |     --data /home/user/wangbin/datasets/ILSVRC2012  \
101 |     |tee  model/AafterW_lr1e-2_epoch35.log
102 | ```
103 | 
104 | **resume**
105 | 
106 | ```bash
107 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \
108 |     --mode 2 \
109 |     --workers 16 \
110 |     --epochs 35 \
111 |     --batch-size 1024\
112 |     --device-ids 0 1 2 3 \
113 |     --lr 0.001 \
114 |     --resume \
115 |     --weight-quantized model/W_lr1e-4_epoch2/model_best.pth.tar \
116 |     --save-dir model/AafterW_lr1e-3_epoch35 \
117 |     --data /home/user/wangbin/datasets/ILSVRC2012  \
118 |     | tee  model/AafterW_lr1e-3_epoch35.log
119 | ```
120 | 
121 | ### 同时量化权重和激活
122 | 
123 | ```
124 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \
125 |     --mode 3 \
126 |     --arch resnet18 \
127 |     --workers 16 \
128 |     --epochs  35 \
129 |     --batch-size 512 \
130 |     --device-ids 0 1 2 3 \
131 |     --lr 0.001 \
132 |     --lr-step 10 \
133 |     --data /home/user/wangbin/datasets/ILSVRC2012  \
134 |     --save-dir model/AandW_lr1e-3_epoch35 \
135 |     | tee AandW_1e-3_epoch35.log
136 | ```
137 | 
138 | ```
139 | CUDA_VISIBLE_DEVICES=3,4,5,6 python main.py \
140 |     --mode 3 \
141 |     --arch resnet18 \
142 |     --workers 16 \
143 |     --epochs  50 \
144 |     --batch-size 512 \
145 |     --device-ids 0 1 2 3 \
146 |     --lr 0.1 \
147 |     --lr-step 15 \
148 |     --data /home/user/wangbin/datasets/ILSVRC2012  \
149 |     --save-dir model/AandW_gemm_lr1e-1_epoch50 \
150 |     | tee AandW_gemm_1e-1_epoch50.log
151 | ```
152 | 
153 | ### 使用 guidance 信号来同时量化权重和激活
154 | 
155 | ```bash
156 | CUDA_VISIBLE_DEVICES=3,4,5,6 python main.py \
157 |     --mode 4 \
158 |     --workers 16 \
159 |     --epochs  35 \
160 |     --batch-size 512 \
161 |     --device-ids 0 1 2 3\
162 |     --balance 0.1 \
163 |     --lr 0.001 \
164 |     --rate 1 \
165 |     --norm 1 \
166 |     --data /home/user/wangbin/datasets/ILSVRC2012  \
167 |     --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35 \
168 |     | tee model/guided_balance0.11_lr1e-3_rate1_epoch35.log
169 | ```
170 | 
171 | ```bash
172 | CUDA_VISIBLE_DEVICES=1,2,3 python main.py  \
173 |    --mode 4  \
174 |    --workers 16  \
175 |    --epochs  35  \
176 |    --batch-size 384  \
177 |    --device-ids 0 1 2  \
178 |    --balance 0.1  \
179 |    --lr 0.001  \
180 |    --rate 1   \
181 |    --norm 1  \
182 |    --data /home/user/wangbin/datasets/ILSVRC2012 \
183 |    --resume  \
184 |    --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35 \
185 |    | tee model/guided_balance0.1_lr1e-3_rate1_epoch35_resume.log
186 | ```
187 | 
188 | #### view distance
189 | 
190 | ```bash
191 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py  \
192 |    --mode 4  \
193 |    --workers 16  \
194 |    --epochs  35  \
195 |    --batch-size 512  \
196 |    --device-ids 0 1 2 3  \
197 |    --balance 0.1  \
198 |    --lr 0.001  \
199 |    --rate 1   \
200 |    --norm 1  \
201 |    --data /home/user/wangbin/datasets/ILSVRC2012 \
202 |    --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35_view 
203 | ```


--------------------------------------------------------------------------------
/config:
--------------------------------------------------------------------------------
 1 | ## mode 2
 2 | 
 3 | --mode 2
 4 | --epochs 30
 5 | --batch-size 64
 6 | --device-ids 0
 7 | --lr 0.001
 8 | --weight-quantized /home/wangbin/Desktop/uisee/model_quantize/W_lr1e-4_epoch10/checkpoint.pth.tar
 9 | --save-dir model
10 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012
11 | 
12 | ## mode 3
13 | 
14 | --mode 2
15 | --epochs 30
16 | --batch-size 64
17 | --device-ids 0
18 | --lr 0.001
19 | --save-dir model
20 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012
21 | 
22 | ## mode 4
23 | 
24 | --mode 4
25 | --epochs 30
26 | --batch-size 64
27 | --device-ids 0
28 | --balance 100000
29 | --lr 0.001
30 | --save-dir model
31 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012


--------------------------------------------------------------------------------
/data/WandA_lr0.01_scalar2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nowgood/QuantizeCNNModel/531fd8d12fdb42f48a91e9378008a1bb86905d6d/data/WandA_lr0.01_scalar2.5.png


--------------------------------------------------------------------------------
/data/smurf.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nowgood/QuantizeCNNModel/531fd8d12fdb42f48a91e9378008a1bb86905d6d/data/smurf.jpg


--------------------------------------------------------------------------------
/graffiti/QConv2D.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | import torch.nn as nn
 4 | from quantize.quantize_method import QuantizeWeightOrActivation
 5 | import torch.nn.functional as F
 6 | quantize = QuantizeWeightOrActivation()
 7 | 
 8 | 
 9 | class QConv2D(torch.nn.Conv2d):
10 |     def __init__(self, n_channels, out_channels, kernel_size, stride=1,
11 |                  padding=0, dilation=1, groups=1, bias=True):
12 |         super(QConv2D, self).__init__(n_channels, out_channels, kernel_size, stride,
13 |                                       padding, dilation, groups, bias)
14 |         nn.init.constant_(self.weight, 1)
15 | 
16 |     def forward(self, x):
17 |         qweight = quantize.quantize_weights_bias(self.weight)
18 |         x = F.conv2d(x, qweight)
19 |         return x
20 | 
21 | 
22 | if __name__ == "__main__":
23 |     qconv = QConv2D(1, 1, 3)
24 |     qconv.zero_grad()
25 |     x = torch.ones(1, 1, 3, 3, requires_grad=True).float()
26 |     y = qconv(x)
27 |     y.backward()
28 |     print(qconv.weight.grad)
29 | 
30 |     a = torch.ones(3, 3, requires_grad=True).float()
31 |     w = torch.nn.init.constant_(torch.empty(3, 3, requires_grad=True), 1)
32 |     qw = quantize.quantize_weights_bias(w)
33 | 
34 |     z = (qw * a).sum()
35 |     z.backward()
36 |     print(w.grad)
37 | 
38 |     qa = quantize.quantize_weights_bias(a).sum()
39 |     qa.backward()
40 |     print(a.grad)


--------------------------------------------------------------------------------
/graffiti/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ### Usage: [argparse](http://wiki.jikexueyuan.com/project/explore-python/Standard-Modules/argparse.html) 
 3 | 
 4 | ```
 5 |  每个参数解释如下:
 6 | 
 7 |     name or flags - 选项字符串的名字或者列表，例如 foo 或者 -f, --foo。
 8 |     action - 命令行遇到参数时的动作，默认值是 store。
 9 |     store_const，表示赋值为const；
10 |     append，将遇到的值存储成列表，也就是如果参数重复则会保存多个值;
11 |     append_const，将参数规范中定义的一个值保存到一个列表；
12 |     count，存储遇到的次数；此外，也可以继承 argparse.Action 自定义参数解析；
13 |     nargs - 应该读取的命令行参数个数，可以是具体的数字，或者是?号，当不指定值时对于 Positional argument 使用 default，
14 |             对于 Optional argument 使用 const；
15 |             或者是 * 号，表示 0 或多个参数；
16 |             或者是 + 号表示 1 或多个参数。
17 |     const - action 和 nargs 所需要的常量值。
18 |     default - 不指定参数时的默认值。
19 |     type - 命令行参数应该被转换成的类型。
20 |     choices - 参数可允许的值的一个容器。
21 |     required - 可选参数是否可以省略 (仅针对可选参数)。
22 |     help - 参数的帮助信息，当指定为 argparse.SUPPRESS 时表示不显示该参数的帮助信息.
23 |     metavar - 在 usage 说明中的参数名称，对于必选参数默认就是参数名称，对于可选参数默认是全大写的参数名称.
24 |     dest - 解析后的参数名称，默认情况下，对于可选参数选取最长的名称，中划线转换为下划线.
25 | ```
26 | 
27 | ### Usage imagenet.py 
28 | 
29 | ```
30 | usage: guided.py [-h] [--arch ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N]
31 |                [--lr LR] [--momentum M] [--weight-decay W] [--print-freq N]
32 |                [--resume PATH] [-e] [--pretrained]
33 |                DIR
34 | 
35 | PyTorch ImageNet Training
36 | 
37 | positional arguments:
38 |   DIR                   path to dataset
39 | 
40 | optional arguments:
41 |   -h, --help            show this help message and exit
42 |   --arch ARCH, -a ARCH  model architecture: alexnet | resnet | resnet101 |
43 |                         resnet152 | resnet18 | resnet34 | resnet50 | vgg |
44 |                         vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn
45 |                         | vgg19 | vgg19_bn (default: resnet18)
46 |   -j N, --workers N     number of data loading workers (default: 4)
47 |   --epochs N            number of total epochs to run
48 |   --start-epoch N       manual epoch number (useful on restarts)
49 |   -b N, --batch-size N  mini-batch size (default: 256)
50 |   --lr LR, --README.md-rate LR
51 |                         initial README.md rate
52 |   --momentum M          momentum
53 |   --weight-decay W, --wd W
54 |                         weight decay (default: 1e-4)
55 |   --print-freq N, -p N  print frequency (default: 10)
56 |   --resume PATH         path to latest checkpoint (default: none)
57 |   -e, --evaluate        evaluate model on validation set
58 |   --pretrained          use pre-trained model
59 | 
60 | ```
61 | 
62 | ### use pretrained model to initialize your modified model
63 | 
64 | ```
65 | model_dict = your_model.state_dict()
66 | 
67 | pretrained_model = models.__dict__[args.arch](pretrained=True)
68 | pretrained_dict = pretrained_model.state_dict()
69 | 
70 | # 将 pretrained_dict 里不属于 model_dict 的键剔除掉
71 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
72 | 
73 | model_dict.update(pretrained_dict)
74 | your_model.load_state_dict(model_dict)
75 | ```
76 | 
77 | ### how to get nn.DataParallel model filter weight
78 | 
79 | ```python
80 | low_prec_state_dict = low_prec_model.state_dict()
81 | full_prec_state_dict = full_prec_model.state_dict()
82 | low_prec_norm = low_prec_state_dict[qconv1].norm(p=2) + low_prec_state_dict[qlayer4].norm(p=2)
83 | full_prec_norm = full_prec_state_dict[qconv1].norm(p=2) + full_prec_state_dict[qlayer4].norm(p=2)
84 | 
85 | l2 = (low_prec_norm + full_prec_norm) * args.balance
86 | ```
87 | 
88 | ### torch.topk
89 | 
90 | ```
91 | >>> x = torch.arange(1, 6)
92 | >>> x
93 | tensor([ 1.,  2.,  3.,  4.,  5.])
94 | >>> torch.topk(x, 3)
95 | (tensor([ 5.,  4.,  3.]), tensor([ 4,  3,  2]))
96 | ```


--------------------------------------------------------------------------------
/graffiti/auto_grad.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from net import simple_net
  6 | import torch.optim as optim
  7 | from quantize.quantize_method import QuantizeWeightOrActivation
  8 | import queue
  9 | 
 10 | qw = QuantizeWeightOrActivation()
 11 | 
 12 | 
 13 | class MyFunction(torch.autograd.Function):
 14 | 
 15 |     @staticmethod
 16 |     def forward(ctx, i):
 17 | 
 18 |         tanh_i = torch.tanh(i)
 19 |         max_w = torch.max(torch.abs(tanh_i)).data
 20 |         out = tanh_i / max_w
 21 |         ctx.save_for_backward(tanh_i, max_w)
 22 |         return out
 23 | 
 24 |     @staticmethod
 25 |     def backward(ctx, grad_outputs):
 26 |         by, bm, = ctx.saved_tensors
 27 |         return grad_outputs*((1-torch.pow(by, 2.0))/bm)
 28 | 
 29 | 
 30 | def modify_weights(weight):
 31 |     fn = MyFunction.apply
 32 |     return fn(weight)
 33 | 
 34 | 
 35 | def weights_update():
 36 |     feature_map = torch.ones(1, 1, 3, 3, requires_grad=True)
 37 |     kernel = nn.Conv2d(1, 1, kernel_size=3, bias=False)
 38 | 
 39 |     # start
 40 |     print("\n自动求导求量化梯度")
 41 |     # w = Variable(kernel.weight.data.clone(), requires_grad=True)
 42 |     w = kernel.weight
 43 |     y = torch.tanh(w)/torch.max(torch.abs(torch.tanh(w)))
 44 |     z = y.sum()
 45 |     z.backward()
 46 |     print(w.grad)
 47 |     kernel.zero_grad()
 48 |     # end
 49 | 
 50 |     print("权重初始化\n", kernel.weight.data, "\n")
 51 | 
 52 |     tanh_w = torch.tanh(kernel.weight)
 53 |     max_w = torch.max(torch.abs(tanh_w))
 54 |     hand_grad = (1 - torch.pow(kernel.weight, 2.0)) / max_w
 55 |     print("手动求梯度\n", hand_grad, "\n")   # 卷积核的面积=3x3=9, y=(x*x).mean(), y'=2x/9
 56 | 
 57 |     # fn_w = modify_weights(kernel.weight)
 58 |     fn_w = qw.quantize_weights_bias(kernel.weight)
 59 |     fn_w.sum().backward()
 60 | 
 61 |     square_weight_grad = kernel.weight.grad.data.clone()
 62 |     print("自动求梯度\n", square_weight_grad, "\n")  # 只需要在原本的梯度上乘以卷积核的面积就好
 63 | 
 64 |     print("量化前权重\n", kernel.weight.data, "\n")
 65 | 
 66 |     # 这种方式没法更新模型的权重, 看 state_dict 函数可以看出, 返回的是一个新建的有序字典,
 67 |     # 更新的其实是新字典, 而不是模型参数, 使用 load_state_dict 方法
 68 |     # kernel.state_dict().update(weight=fn_w)
 69 | 
 70 |     # state_dict = kernel.state_dict()  # 第 1 种方法更新权重
 71 |     # state_dict.update(weight=square)
 72 |     # kernel.load_state_dict(state_dict)
 73 | 
 74 |     # kernel.weight = nn.Parameter(square)  # 第 2 种方法更新权重
 75 | 
 76 |     kernel.weight.data.copy_(fn_w.data)  # 第 3 种方法更新权重
 77 | 
 78 |     print("量化后权重\n", kernel.weight.data, "\n")
 79 | 
 80 |     # 权重的另一个计算图
 81 |     other_graph = kernel(feature_map)
 82 |     other_graph.backward()
 83 | 
 84 |     print("不使用 Module.zer_grad(), 卷积后权重梯度\n", kernel.weight.grad, "\n")
 85 | 
 86 |     kernel.zero_grad()
 87 |     other_graph = kernel(feature_map)
 88 |     other_graph.backward()
 89 | 
 90 |     print("使用 Module.zer_grad(), 卷积后权重梯度\n", kernel.weight.grad, "\n")
 91 |     print("手动计算梯度更新(加法)\n", kernel.weight.grad + square_weight_grad, "\n")
 92 |     print("手动计算梯度更新(乘法)\n", kernel.weight.grad * square_weight_grad, "\n")
 93 | 
 94 | 
 95 | def module_apply():
 96 |     saved_param = queue.Queue()
 97 |     saved_grad = queue.Queue()
 98 | 
 99 |     def info(s):
100 |         print("\n---{}---\n".format(s))
101 | 
102 |         for k, v in net.state_dict().items():
103 |             print(k, v, "\n")
104 |             break
105 | 
106 |     def square(module):
107 |         if type(module) == nn.Conv2d:
108 |             saved_param.put(module.weight.data.clone())  # 第一步, 保存全精度权重
109 |             quantize_w = modify_weights(module.weight)  # 第二步, 量化权重
110 |             quantize_w.sum().backward()
111 |             saved_grad.put(module.weight.grad.data.clone())  # 第三步, 保存量化梯度
112 |             module.weight.data.copy_(quantize_w.data)  # 第四步, 使用量化权重代替全精度权重
113 | 
114 |     def restore(module):
115 |         if type(module) == nn.Conv2d:
116 |             module.weight.data.copy_(saved_param.get())  # 第四步, 使用量化权重代替全精度权重
117 | 
118 |     def update_weight(module):
119 |         if type(module) == nn.Conv2d:
120 |             module.weight.grad.data.mul_(saved_grad.get())  # 第四步, 使用量化权重代替全精度权重
121 | 
122 |     net = simple_net.Net()
123 |     info("初始化权重")
124 | 
125 |     # net.zero_grad()  # optimizer.zero_grad() is enough
126 |     # 网络输入, 输出
127 |     input_ = torch.ones(1, 1, 6, 6, requires_grad=True)
128 |     lable = torch.ones(1, 2)
129 | 
130 |     optimizer = optim.SGD(net.parameters(), lr=1)
131 |     criterion = nn.MSELoss()
132 | 
133 |     print("\n\n")
134 | 
135 |     print(net.state_dict().keys(), "\n")
136 |     print(optimizer.param_groups)
137 |     print(optimizer.state_dict())
138 | 
139 |     print("\n\n")
140 | 
141 |     for _ in range(5):
142 | 
143 |         net.apply(square)
144 |         info("量化权重\n")
145 |         print("net.conv1.weight.grad\n", net.conv1.weight.grad)
146 |         output = net(input_)
147 |         loss = criterion(output, lable)
148 |         optimizer.zero_grad()  # very important!
149 | 
150 |         print("\nnet.conv1.weight.grad after optimizer.zero_grad()\n", net.conv1.weight.grad)
151 | 
152 |         loss.backward()
153 | 
154 |         net.apply(restore)
155 |         info("恢复全精度权重")
156 | 
157 |         net.apply(update_weight)
158 |         print(net.state_dict().keys(), "\n")
159 | 
160 |         optimizer.step()
161 |         info("更新全精度权重")
162 |         print(net.state_dict().keys(), "\n")
163 | 
164 |     torch.save(net.state_dict(), "../model/model_name_changed.pkl")
165 |     xx = torch.load("../model/model_name_changed.pkl")
166 |     print(xx.keys())
167 | 
168 | 
169 | if __name__ == "__main__":
170 |     module_apply()


--------------------------------------------------------------------------------
/graffiti/caffe_guidenet_weight_init.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import caffe
3 | 
4 | net = caffe.Net("/home/wangbin/github/RFCN-FasterRCNN/objectDetection/UISEE-FRCNN-3/model_config/train.prototxt",
5 |                 "/media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/UISEE/"
6 |                 "caffe_models/PVANET/PVANET-LITE/PVANET-LITE.caffemodel", caffe.TEST)\
7 | 
8 | print(type(net.params))


--------------------------------------------------------------------------------
/graffiti/diffierent_gpu_grad_backward.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | 
4 | a = torch.ones(2, 2, requires_grad=True).cuda(1)
5 | b = torch.rand(2, 2, requires_grad=True).cuda(2)
6 | c = a + b
7 | 
8 | print(c)


--------------------------------------------------------------------------------
/graffiti/float32touint8.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from net import net_quantize_guide
 3 | from torchvision import models
 4 | 
 5 | # coding=utf-8
 6 | model = net_quantize_guide.resnet18()
 7 | print(model.state_dict().keys())
 8 | model = models.resnet18(pretrained=True)
 9 | state_dict = model.state_dict()
10 | state_dict = {k: v.to(torch.uint8) for k, v in state_dict.items()}
11 | torch.save(state_dict, "nowgood.pth")
12 | 


--------------------------------------------------------------------------------
/graffiti/get_module_weight.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | import torchvision.models as models
 4 | 
 5 | CUDA_VISIBLE_DEVICES = 0, 3
 6 | model = models.resnet18(pretrained=True)
 7 | model = torch.nn.DataParallel(model, [0]).cuda()
 8 | 
 9 | state_dict = model.state_dict()
10 | 
11 | second_last_convlayer_weight = state_dict['module.layer4.1.conv1.weight']
12 | last_convlayer_weight = state_dict['module.layer4.1.conv2.weight']
13 | print(second_last_convlayer_weight)
14 | print(last_convlayer_weight)
15 | print(last_convlayer_weight.norm(p=2))
16 | l1 = torch.norm(last_convlayer_weight, p=2)
17 | print(l1)
18 | 
19 | print(len(list(model.modules())), type(model.modules))
20 | print(state_dict.keys())


--------------------------------------------------------------------------------
/graffiti/inference_on_image.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | import torchvision.models as models
 4 | import cv2
 5 | 
 6 | IMG_PATH = "/home/wangbin/PycharmProjects/quantizednn/data/smurf.jpeg"
 7 | 
 8 | 
 9 | def image_read(img_path):
10 |     img = cv2.imread(img_path)
11 |     img = cv2.resize(img, (224, 224))
12 |     img = img.transpose(2, 0, 1)
13 |     img = torch.tensor(img).div(255).sub(0.5).float()
14 |     img = torch.unsqueeze(img, 0)
15 |     return img
16 | 
17 | 
18 | def torch_modules(model_):
19 |     print("module.modules()\n")
20 |     for e in model_.modules():
21 |         print(type(e), e)
22 | 
23 |     print("modules._modules.keys()\n")
24 |     for e in model_._modules.keys():
25 |         print(type(e), e)
26 | 
27 |     print("modules.children.keys()\n")
28 |     for e in model_.children():
29 |         print(type(e), e)
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     image = image_read(IMG_PATH)
34 |     model = models.resnet18(pretrained=True)
35 |     model = torch.nn.DataParallel(model)
36 |     model.eval()
37 | 
38 |     pred = model(image)
39 |     print(pred.size())
40 | 


--------------------------------------------------------------------------------
/graffiti/merge_conv_bn.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | from torchvision import models
 4 | import numpy as np
 5 | import os
 6 | from net import net_bn_conv_merge, net_bn_conv_merge_quantize
 7 | from utils.data_loader import load_val_data
 8 | from utils.train_val import validate
 9 | 
10 | epsilon = 1e-5
11 | data = "/media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012"
12 | 
13 | model = models.resnet18(pretrained=True)
14 | # merge_model = net_bn_conv_merge.resnet18()
15 | merge_model = net_bn_conv_merge_quantize.resnet18()
16 | state_dict = model.state_dict()
17 | merge_state_dict = merge_model.state_dict()
18 | 
19 | # for name in state_dict:
20 | #     print(name)
21 | 
22 | merge_state_dict.update({"fc.weight": state_dict["fc.weight"],
23 |                         "fc.bias": state_dict["fc.bias"]})
24 | del state_dict["fc.weight"]
25 | del state_dict["fc.bias"]
26 | params = np.array(list(state_dict.keys()))
27 | 
28 | params = params.reshape((-1, 5))
29 | for index in range(params.shape[0]):
30 |     weight = state_dict[params[index][0]]
31 |     gamma = state_dict[params[index][1]]
32 |     beta = state_dict[params[index][2]]
33 |     running_mean = state_dict[params[index][3]]
34 |     running_var = state_dict[params[index][4]]
35 |     delta = gamma/(torch.sqrt(running_var+epsilon))
36 |     weight = weight * delta.view(-1, 1, 1, 1)
37 |     bias = (0-running_mean) * delta + beta
38 |     merge_state_dict.update({params[index][0]: weight,
39 |                              params[index][0][:-6] + "bias": bias})
40 | merge_model.load_state_dict(merge_state_dict)
41 | merge_model_name = "resnet18_merge_bn_conv.pth.tar"
42 | torch.save(merge_model.state_dict(), merge_model_name)
43 | 
44 | """
45 |     conv1.weight
46 |     bn1.weight
47 |     bn1.bias
48 |     bn1.running_mean
49 |     bn1.running_var
50 |     layer1.0.conv1.weight
51 |     layer1.0.bn1.weight
52 |     layer1.0.bn1.bias
53 |     layer1.0.bn1.running_mean
54 |     layer1.0.bn1.running_var
55 | """
56 | 
57 | # print("bn1.weight: \n", len(state_dict["bn1.weight"]), state_dict["bn1.weight"])
58 | # print("bn1.bias: \n", len(state_dict["bn1.bias"]), state_dict["bn1.bias"])
59 | # print("bn1.running_mean: \n", state_dict["bn1.running_mean"])
60 | # print("bn1.running_val: \n", state_dict["bn1.running_var"])
61 | 
62 | val_loader = load_val_data(data)
63 | evaluate = merge_model_name
64 | if os.path.isfile(evaluate):
65 |     print("Loading evaluate model '{}'".format(evaluate))
66 |     checkpoint = torch.load(evaluate)
67 |     merge_model.load_state_dict(checkpoint)
68 |     print("Loaded evaluate model '{}'".format(evaluate))
69 | else:
70 |     print("No evaluate mode found at '{}'".format(evaluate))
71 | 
72 | merge_model.cuda()
73 | merge_model.eval()
74 | criterion = torch.nn.CrossEntropyLoss().cuda()
75 | validate(merge_model, val_loader, criterion)
76 | 


--------------------------------------------------------------------------------
/graffiti/nowgood.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from net import net_quantize_guide
 3 | from torchvision import models
 4 | 
 5 | 
 6 | x = torch.ones(5, 3)
 7 | bias = torch.ones(5, 1)
 8 | bias[0][0] = 4
 9 | bias[3][0] = 3
10 | y = x * bias
11 | print(y)


--------------------------------------------------------------------------------
/graffiti/quantize_test.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | from net.simple_net import Net
 4 | from quantize.quantize_method import QuantizeWeightOrActivation
 5 | import torch.nn as nn
 6 | import torch.optim as optim
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | 
10 | def test_quantize_weight():
11 |     qw = QuantizeWeightOrActivation()
12 | 
13 |     net = Net()
14 |     qw.info(net, "初始化权重")
15 | 
16 |     net.apply(qw.quantize)
17 |     qw.info(net, "量化权重")
18 | 
19 |     # 网络输入, 输出
20 |     input_ = torch.ones(1, 1, 6, 6, requires_grad=True)
21 |     lable = torch.ones(1, 2)
22 | 
23 |     optimizer = optim.SGD(net.parameters(), lr=0.01)
24 |     criterion = nn.MSELoss()
25 |     output = net(input_)
26 |     loss = criterion(output, lable)
27 |     optimizer.zero_grad()
28 |     loss.backward()
29 |     print("\nMSE LOSS ", loss, "\n")
30 | 
31 |     net.apply(qw.restore)
32 |     qw.info(net, "恢复全精度权重")
33 | 
34 |     net.apply(qw.update_grad)
35 | 
36 |     print("now")
37 |     optimizer.step()
38 |     qw.info(net, "更新全精度权重")
39 | 
40 | 
41 | def test_quantize_weight_update():
42 |     qw = QuantizeWeightOrActivation()
43 | 
44 |     net = Net()
45 |     input_ = torch.rand(1, 1, 6, 6, requires_grad=True)
46 |     label = torch.ones(1, 2)
47 |     optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.99)
48 |     criterion = nn.MSELoss()
49 |     log = {}
50 |     for step in torch.arange(5000):
51 |         net.apply(qw.quantize)
52 |         output = net(input_)
53 |         loss = criterion(output, label)
54 |         optimizer.zero_grad()
55 |         loss.backward()
56 |         # print("loss ", loss.data)
57 |         net.apply(qw.restore)
58 |         net.apply(qw.update_grad)
59 |         optimizer.step()
60 | 
61 |         log[step] = loss
62 | 
63 |     plt.axis([0, 5000, 0, 0.1])
64 |     plt.plot(log.values(), "r-")
65 |     plt.show()
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     test_quantize_weight_update()


--------------------------------------------------------------------------------
/graffiti/register_forward_hook.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | import torchvision.models as models
 4 | import cv2
 5 | 
 6 | IMG_PATH = "/home/wangbin/PycharmProjects/quantizednn/data/smurf.jpeg"
 7 | 
 8 | 
 9 | def image_read(img_path):
10 |     img = cv2.imread(img_path)
11 |     img = cv2.resize(img, (224, 224))
12 |     img = img.transpose(2, 0, 1)
13 |     img = torch.tensor(img).div(255).sub(0.5).float()
14 |     img = torch.unsqueeze(img, 0)
15 |     return img
16 | 
17 | 
18 | def torch_modules(model_):
19 |     print("module.modules()\n")
20 |     for e in model_.modules():
21 |         print(type(e), e)
22 | 
23 |     print("modules._modules.keys()\n")
24 |     for e in model_._modules.keys():
25 |         print(type(e), e)
26 | 
27 |     print("modules.children.keys()\n")
28 |     for e in model_.children():
29 |         print(type(e), e)
30 | 
31 | 
32 | def my_hook(m, i, o):
33 |     fm[0] = (i[0].data.clone())
34 |     fm[1] = (o.data.clone())
35 |     print('m:', type(m))
36 |     print('i:', type(i))
37 |     print('len(i):', len(i))
38 |     print('i[0]:', type(i[0]))
39 |     print('i[0]:', i[0].size())
40 |     print('o:', type(o))
41 |     print()
42 |     print('i[0] shape:', i[0].size())
43 |     print('o shape:', o.size())
44 | 
45 | 
46 | def my_hook2(m, i, o):
47 |     m.register_buffer("layer3", i[0])
48 |     m.register_buffer("layer4", o)
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     image = image_read(IMG_PATH)
53 |     model = models.resnet18(pretrained=True)
54 |     last = model._modules.get("layer4")
55 |     fm = [0, 0]
56 |     hook = last.register_forward_hook(my_hook2)
57 |     model = torch.nn.DataParallel(model)
58 |     model.eval()
59 |     pred = model(image)
60 |     print(model)
61 |     for k, v in model._modules.items():
62 |         print(k, v)
63 | 
64 |     hook.remove()


--------------------------------------------------------------------------------
/graffiti/stat_parameters.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """
 3 | resnet-18:
 4 |     layer1.0.conv1.weight                    0.003
 5 |     layer1.0.conv2.weight                    0.003
 6 |     layer1.1.conv1.weight                    0.003
 7 |     layer1.1.conv2.weight                    0.003
 8 |     layer2.0.conv1.weight                    0.006
 9 |     layer2.0.conv2.weight                    0.013
10 |     layer2.1.conv1.weight                    0.013
11 |     layer2.1.conv2.weight                    0.013
12 |     layer3.0.conv1.weight                    0.025
13 |     layer3.0.conv2.weight                    0.050
14 |     layer3.0.downsample.0.weight             0.003
15 |     layer3.1.conv1.weight                    0.050
16 |     layer3.1.conv2.weight                    0.050
17 |     layer4.0.conv1.weight                    0.101
18 |     layer4.0.conv2.weight                    0.202
19 |     layer4.0.downsample.0.weight             0.011
20 |     layer4.1.conv1.weight                    0.202
21 |     layer4.1.conv2.weight                    0.202
22 |     fc.weight                                0.044
23 | """
24 | import torchvision.models as models
25 | 
26 | 
27 | def num_features(shape):
28 |     feature = 1
29 |     for dim in shape:
30 |         feature *= dim
31 |     return feature
32 | 
33 | 
34 | def total_parameters(state_dict):
35 |     count = 0
36 |     for value in state_dict.values():
37 |         count += num_features(value.size())
38 |     return count
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     model = models.resnet50()
43 |     total = total_parameters(model.state_dict())
44 |     for k, v in model.state_dict().items():
45 |         rate = num_features(v.size())/total
46 |         if rate > 0.001:
47 |             print("{: <30} {:.3f}".format(k, rate))
48 | 


--------------------------------------------------------------------------------
/graffiti/weight_distribute.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import torch
  3 | import torchvision.models as models
  4 | from quantize.quantize_method import quantize_weights_bias_tanh
  5 | import numpy as np
  6 | 
  7 | 
  8 | checkpoint = "/home/wangbin/Desktop/uisee/model_quantize/AandW_lr1e-3_step10_epoch35/checkpoint.pth.tar"
  9 | 
 10 | 
 11 | def weight_decay():
 12 | 
 13 |     """
 14 |        random init:
 15 |        l2_loss:            (668.1154)
 16 |        l2_loss * 1e-4:     (0.066812)
 17 | 
 18 |        pre-trained
 19 |        l2_loss:            (517.5516)
 20 |        l2_loss * 1e-4:     (0.051755)
 21 |     """
 22 | 
 23 |     model = models.resnet18()
 24 |     l2_loss = 0
 25 |     for i in model.parameters():
 26 |         l2_loss += i.norm(p=2)
 27 | 
 28 |     print(l2_loss)
 29 |     print(l2_loss * 1e-4)
 30 | 
 31 | 
 32 | def quantize_weight_distribute():
 33 |     model_checkpoint = torch.load(checkpoint)
 34 |     state_dict = model_checkpoint['state_dict']
 35 | 
 36 |     for k, v in state_dict.items():
 37 |         if k == "module.layer1.1.conv2.weight":
 38 |             cnts = [0 for _ in range(26)]
 39 |             v = v.view(-1)
 40 |             print(v)
 41 |             v = (quantize_weights_bias_tanh(v) + 1) / 2 * (256 - 1)
 42 |             print(v.size())
 43 |             for ele in v:
 44 |                 cnts[np.abs(int(ele)//10)] += 1
 45 |             for i in range(26):
 46 |                 print(i, " ", '{:.4f}'.format(cnts[i]/len(v)))
 47 | 
 48 |     # 权值越在深层, 方差越小, 越底层, 分布范围越大, 方差越大
 49 |     """
 50 |     conv4.1_layer
 51 |     0   0.0000
 52 |     1   0.0000
 53 |     2   0.0000
 54 |     3   0.0000
 55 |     4   0.0000
 56 |     5   0.0000
 57 |     6   0.0000
 58 |     7   0.0000
 59 |     8   0.0000
 60 |     9   0.0000
 61 |     10   0.0009
 62 |     11   0.0717
 63 |     12   0.5933
 64 |     13   0.3055
 65 |     14   0.0257
 66 |     15   0.0022
 67 |     16   0.0003
 68 |     17   0.0001
 69 |     18   0.0000
 70 |     19   0.0000
 71 |     20   0.0000
 72 |     21   0.0000
 73 |     22   0.0000
 74 |     23   0.0000
 75 |     24   0.0000
 76 |     25   0.0000
 77 |     """
 78 | 
 79 |     '''
 80 |     conv1.1_layer
 81 |     0   0.0001
 82 |     1   0.0000
 83 |     2   0.0001
 84 |     3   0.0002
 85 |     4   0.0004
 86 |     5   0.0007
 87 |     6   0.0019
 88 |     7   0.0032
 89 |     8   0.0084
 90 |     9   0.0204
 91 |     10   0.0566
 92 |     11   0.1618
 93 |     12   0.3274
 94 |     13   0.2621
 95 |     14   0.1029
 96 |     15   0.0341
 97 |     16   0.0116
 98 |     17   0.0050
 99 |     18   0.0019
100 |     19   0.0005
101 |     20   0.0004
102 |     21   0.0002
103 |     22   0.0001
104 |     23   0.0000
105 |     24   0.0000
106 |     25   0.0000
107 | 
108 |     '''
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     quantize_weight_distribute()


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | 
  3 | """
  4 | 1. 量化权重建议:
  5 |     1). 学习率最高设置为 0.001, 0.0001可以很快的收敛, 最很好的选择, 训练个 2~5个 epoch 就好
  6 | 
  7 | 2. 权重和激活同时量化注意事项:
  8 |     1). 学习率设置不能大于 0.01(学习率最大设置 0.01), 当学习率设置为0.01时, 模型可以很好的微调,
  9 |     2). 当学习率设置为 0.1 时, 训练几十个batch之后, 准确率为 千分之一 和 千分之五
 10 |     3). 学习率设置为 0.01 时,大约 5~8 epoch降低一次学习率(除以10)比较好, 然后训练大约 30~40 epoch就好
 11 |     4). 当学习率设置为 0.001 时, 大约 14~16 epoch 降低一次学习率比较好, 然后训练大约 30~40 epoch就好
 12 | 
 13 | 3. 训练模式(mode):
 14 |        0: full precision training from scratch
 15 |        1: only quantize_tanh weight
 16 |        2. quantize_tanh activation using quantized weight to init model
 17 |        3. joint quantize_tanh weight and activation from pre-trained imageNet model
 18 |        4. guided quantize_tanh weight and activation from pre-trained imageNet model
 19 | 
 20 | """
 21 | 
 22 | import argparse
 23 | import torchvision.models as models
 24 | import warnings
 25 | import random
 26 | import os
 27 | import torch.backends.cudnn as cudnn
 28 | import torch.distributed as dist
 29 | import torch
 30 | import torch.optim
 31 | import torch.utils.data
 32 | import torch.utils.data.distributed
 33 | from utils.train_val import train, save_checkpoint, validate
 34 | from utils.data_loader import load_train_data, load_val_data
 35 | from quantize import quantize_guided
 36 | from quantize.quantize_method import quantize_weights_bias_gemm
 37 | from net import net_quantize_activation, net_quantize_weight
 38 | from tensorboardX import SummaryWriter
 39 | 
 40 | 
 41 | model_names = sorted(name for name in models.__dict__
 42 |                      if name.islower() and not name.startswith("__")
 43 |                      and callable(models.__dict__[name]))
 44 | 
 45 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
 46 | 
 47 | parser.add_argument('--data', metavar='DIR', help='path to dataset', required=True)
 48 | parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
 49 |                     choices=model_names,
 50 |                     help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)')
 51 | parser.add_argument('--workers', default=16, type=int, metavar='N',  # 修改为电脑cpu支持的线程数
 52 |                     help='number of data loading workers (default: 16)')
 53 | parser.add_argument('--epochs', default=35, type=int, metavar='N',
 54 |                     help='number of total epochs to run')
 55 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 56 |                     help='manual epoch number (useful on restarts)')
 57 | parser.add_argument('--batch-size', default=128, type=int,
 58 |                     metavar='N', help='mini-batch size (default: 128)')
 59 | 
 60 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
 61 |                     help='momentum')
 62 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
 63 |                     metavar='W', help='weight decay (default: 1e-4)')
 64 | parser.add_argument('--resume', action='store_true',
 65 |                     help='resume training using save-dir checkpoint (default: False)')
 66 | # 如果是验证模型, 设置为True就好, 训练时值为False
 67 | parser.add_argument('--evaluate', default='', type=str,
 68 |                     help='evaluate model on validation set')
 69 | parser.add_argument('--world-size', default=1, type=int,
 70 |                     help='number of distributed processes')
 71 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
 72 |                     help='url used to set up distributed training')
 73 | parser.add_argument('--dist-backend', default='gloo', type=str,
 74 |                     help='distributed backend')
 75 | parser.add_argument('--seed', default=None, type=int,
 76 |                     help='seed for initializing training. ')
 77 | parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.')
 78 | 
 79 | parser.add_argument('--device-ids', default=[0], type=int, nargs='+',
 80 |                     help='GPU ids to be used e.g 0 1 2 3')
 81 | parser.add_argument('--weight-quantized', default='', type=str, help="quantize_tanh weight model path")
 82 | parser.add_argument('--save-dir', default='model', type=str, help='directory to save trained model', required=True)
 83 | parser.add_argument('--mode', default=3, type=int, help='model quantized mode', required=True)
 84 | # l1 norm balance 设置为1或者0.1比较好, l2 norm balance 设置为100(~0.034) ~ 500 比较好
 85 | parser.add_argument('--norm', default=1, type=int, help='feature map norm, default 1')
 86 | parser.add_argument('--balance', default=100, type=float, help='balancing parameter (default: 100)')
 87 | # 论文中初始学习率 0.001, 每 10 epoch 除以 10, 这在只量化权重时候可以
 88 | # 在同时量化权重和激活时, 当使用0.001时, 我们可以观测到权重的持续上升
 89 | # 或许可以将初始学习率调为 0.01, 甚至 0.1
 90 | # guidance 方法中, 全精度模型的的学习率要小一些, 模型已经训练的很好了, 微调而已
 91 | # 不过来低精度模型的学习率可以调高一点
 92 | parser.add_argument('--lr', default=0.001, type=float,  # 论文中初始学习率 0.001, 每 10 epoch 除以 10
 93 |                     help='initial learning rate')
 94 | parser.add_argument('--rate', default=1, type=int,
 95 |                     help='guide training method, full_lr = low_lr * rate')
 96 | 
 97 | parser.add_argument('--lr-step', default=10, type=int, help='learning rate step scheduler')
 98 | 
 99 | 
100 | args = parser.parse_args()
101 | best_prec1 = 0
102 | 
103 | 
104 | def main():
105 |     global best_prec1
106 |     print("\n"
107 |           "=> arch         {: <20}\n"
108 |           "=> init_lr      {: <20}\n"
109 |           "=> lr-step      {: <20}\n"
110 |           "=> momentum     {: <20}\n"
111 |           "=> weight-decay {: <20}\n"
112 |           "=> batch-size   {: <20}\n"
113 |           "=> balance      {: <20}\n"
114 |           "=> save-dir     {: <20}\n".format(
115 |            args.arch, args.lr, args.lr_step, args.momentum, args.weight_decay,
116 |            args.batch_size, args.balance, args.save_dir))
117 | 
118 |     if args.seed is not None:
119 |         random.seed(args.seed)
120 |         torch.manual_seed(args.seed)
121 |         cudnn.deterministic = True
122 |         warnings.warn('You have chosen to seed training. This will turn on the CUDNN deterministic setting, '
123 |                       'which can slow down your training considerably!, You may see unexpected behavior'
124 |                       ' when restarting from checkpoints.')
125 | 
126 |     # 下面的 warning 可以看出, 如果指定一个 gpu id, 就不会使用多 gpu 训练
127 |     if args.gpu is not None:
128 |         warnings.warn('You have chosen a specific GPU, This will completely disable data parallelism.')
129 | 
130 |     # 多机器训练而不是一机多卡(集群训练模式)
131 |     args.distributed = args.world_size > 1
132 |     if args.distributed:
133 |         dist.init_process_group(backend=args.dist_backend,
134 |                                 init_method=args.dist_url,
135 |                                 world_size=args.world_size)
136 | 
137 |     # 根据训练模式加载训练模型
138 |     if args.mode == 0:
139 |         print("=> training mode {}: full precision training from scratch\n".format(args.mode))
140 |         model = models.__dict__[args.arch]()
141 | 
142 |     elif args.mode == 1:
143 |         print("=> training mode {}: quantize weight only\n".format(args.mode))
144 |         print("=> loading imageNet pre-trained model {}".format(args.arch))
145 |         model = net_quantize_weight.__dict__[args.arch]()
146 |         model_dict = model.state_dict()
147 |         init_model = models.__dict__[args.arch](pretrained=True)
148 |         model_dict.update(init_model.state_dict())
149 |         model.load_state_dict(model_dict)
150 |         print("=> loaded imageNet pre-trained model {}".format(args.arch))
151 | 
152 |     elif args.mode == 2:
153 |         print("=> training mode {}: quantize activation using quantized weight\n".format(args.mode))
154 |         model = net_quantize_activation.__dict__[args.arch]()
155 |         if os.path.isfile(args.weight_quantized):
156 |             print("=> loading weight quantized model '{}'".format(args.weight_quantized))
157 |             model_dict = model.state_dict()
158 |             quantized_model = torch.load(args.weight_quantized)
159 |             init_dict = {}
160 |             for k, v in quantized_model['state_dict'].items():
161 |                 if k in model.state_dict():
162 |                     if k.find("conv") != -1 or k.find("fc") != -1:
163 |                         init_dict[k[7:]] = quantize_weights_bias_gemm(v)
164 |                     else:
165 |                         init_dict[k[7:]] = v
166 | 
167 |             model_dict.update(init_dict)
168 |             model.load_state_dict(model_dict)
169 |             print("=> loaded weight_quantized '{}'".format(args.weight_quantized))
170 |         else:
171 |             warnings.warn("=> no weight quantized model found at '{}'".format(args.weight_quantized))
172 |             return
173 | 
174 |     elif args.mode == 3:
175 |         print("=> training mode {}: quantize weight and activation simultaneously\n".format(args.mode))
176 |         print("=> loading imageNet pre-trained model '{}'".format(args.arch))
177 |         # 使用预训练的ResNet18来初始化同时量化网络权重和激活
178 |         model = net_quantize_activation.__dict__[args.arch]()
179 |         # 获取预训练模型参数
180 |         model_dict = model.state_dict()
181 |         init_model = models.__dict__[args.arch](pretrained=True)
182 |         init_dict = {k: v for k, v in init_model.state_dict().items() if k in model_dict}
183 |         model_dict.update(init_dict)
184 |         model.load_state_dict(model_dict)
185 | 
186 |     elif args.mode == 4:
187 |         print("=> Training mode {}: guided quantize weight and activation "
188 |               "from pre-trained imageNet model {}\n ".format(args.mode, args.arch))
189 | 
190 |         # quantize_guided.guided(args)
191 |         quantize_guided.guided(args)
192 |         return
193 |     else:
194 |         raise Exception("invalid mode, valid mode is 0~4!!")
195 | 
196 |     if args.gpu is not None:  # 指定GPU
197 |         model = model.cuda(args.gpu)
198 |     elif args.distributed:  # 集群训练（多机器）
199 |         model.cuda()
200 |         model = torch.nn.parallel.DistributedDataParallel(model)
201 |     else:  # 单机训练（单卡或者多卡）
202 |         if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
203 |             model.features = torch.nn.DataParallel(model.features)
204 |             model.cuda()
205 |         else:
206 |             # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids
207 |             """
208 |                list(model.state_dict().keys())[0]
209 |                model 在使用 torch.nn.DataParallel 之前每层的名字, 如 conv1.weight
210 |                model 在使用 torch.nn.DataParallel 之后每层的名字, 如 module.conv1.weight
211 |                如果训练使用并行化, 而验证使用指定GPU的话就会出现问题, 所以需要在指定GPU代码中,添加解决冲突的代码
212 |             """
213 |             model = torch.nn.DataParallel(model, args.device_ids).cuda()
214 | 
215 |     criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu)
216 |     optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
217 |     # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr,  weight_decay=args.weight_decay)
218 |     lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_step)
219 | 
220 |     # optionally resume from a checkpoint
221 |     if args.resume:
222 |         print("\n=> resume training from checkpoint")
223 |         checkpoint_filename = os.path.join(args.save_dir, "checkpoint.pth.tar")
224 | 
225 |         if os.path.isfile(checkpoint_filename):
226 |             print("=> loading checkpoint '{}'".format(checkpoint_filename))
227 |             checkpoint = torch.load(checkpoint_filename)
228 |             args.start_epoch = checkpoint['epoch']
229 |             best_prec1 = checkpoint['best_prec1']
230 |             model.load_state_dict(checkpoint['state_dict'])
231 |             optimizer.load_state_dict(checkpoint['optimizer'])
232 |             print("=> loaded checkpoint '{}' (epoch {})"
233 |                   .format(checkpoint_filename, checkpoint['epoch']))
234 |         else:
235 |             print("=> no checkpoint found at '{}'".format(checkpoint_filename))
236 | 
237 |     cudnn.benchmark = True
238 | 
239 |     val_loader = load_val_data(args.data, args.batch_size, args.workers)
240 | 
241 |     if args.evaluate:
242 |         if os.path.isfile(args.evaluate):
243 |             print("Loading evaluate model '{}'".format(args.evaluate))
244 |             checkpoint = torch.load(args.evaluate)
245 |             if "state_dict" in checkpoint.keys():
246 |                 model.load_state_dict(checkpoint['state_dict'])
247 |                 print("epoch: {} ".format(checkpoint['epoch']))
248 |             else:
249 |                 checkpoint = {''.join(("module.", k)): v for k, v in checkpoint.items() if not k.startswith("module")}
250 |                 model.load_state_dict(checkpoint)
251 |             print("Loaded evaluate model '{}'".format(args.evaluate))
252 |         else:
253 |             print("No evaluate mode found at '{}'".format(args.evaluate))
254 |             return
255 |         validate(model, val_loader, criterion, args.gpu)
256 |         return
257 | 
258 |     train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed)
259 | 
260 |     summary_writer = SummaryWriter(args.save_dir)
261 |     for epoch in range(args.start_epoch, args.epochs):
262 |         if args.distributed:
263 |             train_sampler.set_epoch(epoch)
264 |         lr_scheduler.step()
265 | 
266 |         # train for one epoch
267 |         train(model, train_loader, criterion, optimizer, args.gpu, epoch, summary_writer)
268 | 
269 |         # evaluate on validation set
270 |         prec1 = validate(model, val_loader, criterion, args.gpu, epoch, summary_writer)
271 | 
272 |         # remember best prec@1 and save checkpoint
273 |         is_best = prec1 > best_prec1
274 |         best_prec1 = max(prec1, best_prec1)
275 |         save_checkpoint({
276 |             'epoch': epoch+1,
277 |             'arch': args.arch,
278 |             'state_dict': model.state_dict(),
279 |             'best_prec1': best_prec1,
280 |             'optimizer': optimizer.state_dict(),
281 |         }, is_best, args.save_dir)
282 | 
283 |     summary_writer.close()
284 | 
285 | 
286 | if __name__ == '__main__':
287 |     main()


--------------------------------------------------------------------------------
/net/net_bn_conv_merge.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import torch.nn as nn
  3 | import math
  4 | import torch.utils.model_zoo as model_zoo
  5 | 
  6 | """
  7 | 网络修改步骤;
  8 |     1. 将卷积层的 bias 设置为 True
  9 |     2. 将 bn 层删掉
 10 | """
 11 | 
 12 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 13 |            'resnet152']
 14 | 
 15 | 
 16 | model_urls = {
 17 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 18 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 19 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 20 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 21 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 22 | }
 23 | 
 24 | 
 25 | def conv3x3(in_planes, out_planes, stride=1):
 26 |     """3x3 convolution with padding"""
 27 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 28 |                      padding=1, bias=True)
 29 | 
 30 | 
 31 | class BasicBlock(nn.Module):
 32 |     expansion = 1
 33 | 
 34 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 35 |         super(BasicBlock, self).__init__()
 36 |         self.conv1 = conv3x3(inplanes, planes, stride)
 37 |         self.relu = nn.ReLU(inplace=True)
 38 |         self.conv2 = conv3x3(planes, planes)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 | 
 42 |     def forward(self, x):
 43 |         residual = x
 44 | 
 45 |         out = self.conv1(x)
 46 |         out = self.relu(out)
 47 | 
 48 |         out = self.conv2(out)
 49 | 
 50 |         if self.downsample is not None:
 51 |             residual = self.downsample(x)
 52 | 
 53 |         out += residual
 54 |         out = self.relu(out)
 55 | 
 56 |         return out
 57 | 
 58 | 
 59 | class Bottleneck(nn.Module):
 60 |     expansion = 4
 61 | 
 62 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 63 |         super(Bottleneck, self).__init__()
 64 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
 65 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
 66 |                                padding=1, bias=True)
 67 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=True)
 68 |         self.relu = nn.ReLU(inplace=True)
 69 |         self.downsample = downsample
 70 |         self.stride = stride
 71 | 
 72 |     def forward(self, x):
 73 |         residual = x
 74 | 
 75 |         out = self.conv1(x)
 76 |         out = self.relu(out)
 77 | 
 78 |         out = self.conv2(out)
 79 |         out = self.relu(out)
 80 | 
 81 |         out = self.conv3(out)
 82 | 
 83 |         if self.downsample is not None:
 84 |             residual = self.downsample(x)
 85 | 
 86 |         out += residual
 87 |         out = self.relu(out)
 88 | 
 89 |         return out
 90 | 
 91 | 
 92 | class ResNet(nn.Module):
 93 | 
 94 |     def __init__(self, block, layers, num_classes=1000):
 95 |         self.inplanes = 64
 96 |         super(ResNet, self).__init__()
 97 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 98 |                                bias=True)
 99 |         self.relu = nn.ReLU(inplace=True)
100 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
101 |         self.layer1 = self._make_layer(block, 64, layers[0])
102 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
103 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
104 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
105 |         self.avgpool = nn.AvgPool2d(7, stride=1)
106 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
107 | 
108 |         for m in self.modules():
109 |             if isinstance(m, nn.Conv2d):
110 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
111 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
112 |             elif isinstance(m, nn.BatchNorm2d):
113 |                 m.weight.data.fill_(1)
114 |                 m.bias.data.zero_()
115 | 
116 |     def _make_layer(self, block, planes, blocks, stride=1):
117 |         downsample = None
118 |         if stride != 1 or self.inplanes != planes * block.expansion:
119 |             downsample = nn.Sequential(
120 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
121 |                           kernel_size=1, stride=stride, bias=True),
122 |             )
123 | 
124 |         layers = []
125 |         layers.append(block(self.inplanes, planes, stride, downsample))
126 |         self.inplanes = planes * block.expansion
127 |         for i in range(1, blocks):
128 |             layers.append(block(self.inplanes, planes))
129 | 
130 |         return nn.Sequential(*layers)
131 | 
132 |     def forward(self, x):
133 |         x = self.conv1(x)
134 |         x = self.relu(x)
135 |         x = self.maxpool(x)
136 | 
137 |         x = self.layer1(x)
138 |         x = self.layer2(x)
139 |         x = self.layer3(x)
140 |         x = self.layer4(x)
141 | 
142 |         x = self.avgpool(x)
143 |         x = x.view(x.size(0), -1)
144 |         x = self.fc(x)
145 | 
146 |         return x
147 | 
148 | 
149 | def resnet18(pretrained=False, **kwargs):
150 |     """Constructs a ResNet-18 model.
151 | 
152 |     Args:
153 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
154 |     """
155 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
156 |     if pretrained:
157 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
158 |     return model
159 | 
160 | 
161 | def resnet34(pretrained=False, **kwargs):
162 |     """Constructs a ResNet-34 model.
163 | 
164 |     Args:
165 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
166 |     """
167 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
168 |     if pretrained:
169 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
170 |     return model
171 | 
172 | 
173 | def resnet50(pretrained=False, **kwargs):
174 |     """Constructs a ResNet-50 model.
175 | 
176 |     Args:
177 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
178 |     """
179 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
180 |     if pretrained:
181 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
182 |     return model
183 | 
184 | 
185 | def resnet101(pretrained=False, **kwargs):
186 |     """Constructs a ResNet-101 model.
187 | 
188 |     Args:
189 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
190 |     """
191 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
192 |     if pretrained:
193 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
194 |     return model
195 | 
196 | 
197 | def resnet152(pretrained=False, **kwargs):
198 |     """Constructs a ResNet-152 model.
199 | 
200 |     Args:
201 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
202 |     """
203 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
204 |     if pretrained:
205 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
206 |     return model
207 | 


--------------------------------------------------------------------------------
/net/net_bn_conv_merge_quantize.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | """
  3 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用QWACvon2D
  4 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升
  5 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化??
  6 | """
  7 | import torch.nn as nn
  8 | import math
  9 | import torch.utils.model_zoo as model_zoo
 10 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar
 11 | 
 12 | 
 13 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 14 |            'resnet152']
 15 | 
 16 | 
 17 | model_urls = {
 18 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 19 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 20 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 21 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 22 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 23 | }
 24 | 
 25 | 
 26 | def conv3x3(in_planes, out_planes, stride=1):
 27 |     """3x3 convolution with padding"""
 28 |     return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True)
 29 | 
 30 | 
 31 | class BasicBlock(nn.Module):
 32 |     expansion = 1
 33 | 
 34 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 35 |         super(BasicBlock, self).__init__()
 36 |         self.conv1 = conv3x3(inplanes, planes, stride)
 37 |         self.relu = nn.ReLU(inplace=True)
 38 |         self.conv2 = conv3x3(planes, planes)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 | 
 42 |     def forward(self, x):
 43 |         residual = x
 44 |         out = self.conv1(x)
 45 |         out = self.relu(out)
 46 | 
 47 |         out = self.conv2(out)
 48 | 
 49 |         if self.downsample is not None:
 50 |             residual = self.downsample(x)
 51 | 
 52 |         out += residual
 53 |         out = self.relu(out)
 54 | 
 55 |         return out
 56 | 
 57 | 
 58 | class Bottleneck(nn.Module):
 59 |     expansion = 4
 60 | 
 61 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 62 |         super(Bottleneck, self).__init__()
 63 |         self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=True)
 64 |         self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride,
 65 |                               padding=1, bias=True)
 66 |         self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=True)
 67 |         self.relu = nn.ReLU(inplace=True)
 68 |         self.downsample = downsample
 69 |         self.stride = stride
 70 | 
 71 |     def forward(self, x):
 72 |         residual = x
 73 | 
 74 |         out = self.conv1(x)
 75 |         out = self.relu(out)
 76 | 
 77 |         out = self.conv2(out)
 78 |         out = self.relu(out)
 79 | 
 80 |         out = self.conv3(out)
 81 | 
 82 |         if self.downsample is not None:
 83 |             residual = self.downsample(x)
 84 | 
 85 |         out += residual
 86 |         out = self.relu(out)
 87 | 
 88 |         return out
 89 | 
 90 | 
 91 | class ResNet(nn.Module):
 92 | 
 93 |     def __init__(self, qblock, layers, num_classes=1000):
 94 |         self.inplanes = 64
 95 |         super(ResNet, self).__init__()
 96 |         self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
 97 |                               bias=True)
 98 |         self.relu = nn.ReLU(inplace=True)
 99 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
100 |         self.layer1 = self._make_layer(qblock, 64, layers[0])
101 |         self.layer2 = self._make_layer(qblock, 128, layers[1], stride=2)
102 |         self.layer3 = self._make_layer(qblock, 256, layers[2], stride=2)
103 |         self.layer4 = self._make_layer(qblock, 512, layers[3], stride=2)
104 |         self.avgpool = nn.AvgPool2d(7, stride=1)
105 |         self.fc = QWALinear(512 * qblock.expansion, num_classes)  # 修改
106 |         self.scalar = Scalar()  # 修改
107 | 
108 |         for m in self.modules():
109 |             if isinstance(m, nn.Conv2d):
110 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
111 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
112 |             elif isinstance(m, nn.BatchNorm2d):
113 |                 m.weight.data.fill_(1)
114 |                 m.bias.data.zero_()
115 | 
116 |     def _make_layer(self, block, planes, blocks, stride=1):
117 |         downsample = None
118 |         if stride != 1 or self.inplanes != planes * block.expansion:
119 |             downsample = nn.Sequential(
120 |                 QWAConv2D(self.inplanes, planes * block.expansion,
121 |                          kernel_size=1, stride=stride, bias=True),
122 |             )
123 | 
124 |         layers = []
125 |         layers.append(block(self.inplanes, planes, stride, downsample))
126 |         self.inplanes = planes * block.expansion
127 |         for i in range(1, blocks):
128 |             layers.append(block(self.inplanes, planes))
129 | 
130 |         return nn.Sequential(*layers)
131 | 
132 |     def forward(self, x):
133 |         x = self.conv1(x)
134 |         x = self.relu(x)
135 |         x = self.maxpool(x)
136 | 
137 |         x = self.layer1(x)
138 |         x = self.layer2(x)
139 |         x = self.layer3(x)
140 |         x = self.layer4(x)
141 | 
142 |         x = self.avgpool(x)
143 |         x = x.view(x.size(0), -1)
144 |         x = self.fc(x)
145 |         x = self.scalar(x)  # 修改
146 | 
147 |         return x
148 | 
149 | 
150 | def resnet18(pretrained=False, **kwargs):
151 |     """Constructs a ResNet-18 model.
152 | 
153 |     Args:
154 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
155 |     """
156 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
157 |     if pretrained:
158 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
159 |     return model
160 | 
161 | 
162 | def resnet34(pretrained=False, **kwargs):
163 |     """Constructs a ResNet-34 model.
164 | 
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
169 |     if pretrained:
170 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
171 |     return model
172 | 
173 | 
174 | def resnet50(pretrained=False, **kwargs):
175 |     """Constructs a ResNet-50 model.
176 | 
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
181 |     if pretrained:
182 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
183 |     return model
184 | 
185 | 
186 | def resnet101(pretrained=False, **kwargs):
187 |     """Constructs a ResNet-101 model.
188 | 
189 |     Args:
190 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
191 |     """
192 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
193 |     if pretrained:
194 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
195 |     return model
196 | 
197 | 
198 | def resnet152(pretrained=False, **kwargs):
199 |     """Constructs a ResNet-152 model.
200 | 
201 |     Args:
202 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
203 |     """
204 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
205 |     if pretrained:
206 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
207 |     return model
208 | 


--------------------------------------------------------------------------------
/net/net_quantize_activation.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | """
  3 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用QWACvon2D
  4 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升
  5 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化??
  6 | """
  7 | import torch.nn as nn
  8 | import math
  9 | import torch.utils.model_zoo as model_zoo
 10 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar
 11 | 
 12 | 
 13 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 14 |            'resnet152']
 15 | 
 16 | 
 17 | model_urls = {
 18 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 19 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 20 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 21 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 22 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 23 | }
 24 | 
 25 | 
 26 | def conv3x3(in_planes, out_planes, stride=1):
 27 |     """3x3 convolution with padding"""
 28 |     return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride,
 29 |                     padding=1, bias=False)
 30 | 
 31 | 
 32 | class BasicBlock(nn.Module):
 33 |     expansion = 1
 34 | 
 35 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 36 |         super(BasicBlock, self).__init__()
 37 |         self.conv1 = conv3x3(inplanes, planes, stride)
 38 |         self.bn1 = nn.BatchNorm2d(planes)
 39 |         self.relu = nn.ReLU(inplace=True)
 40 |         self.conv2 = conv3x3(planes, planes)
 41 |         self.bn2 = nn.BatchNorm2d(planes)
 42 |         self.downsample = downsample
 43 |         self.stride = stride
 44 | 
 45 |     def forward(self, x):
 46 |         residual = x
 47 |         out = self.conv1(x)
 48 |         out = self.bn1(out)
 49 |         out = self.relu(out)
 50 | 
 51 |         out = self.conv2(out)
 52 |         out = self.bn2(out)
 53 | 
 54 |         if self.downsample is not None:
 55 |             residual = self.downsample(x)
 56 | 
 57 |         out += residual
 58 |         out = self.relu(out)
 59 | 
 60 |         return out
 61 | 
 62 | 
 63 | class Bottleneck(nn.Module):
 64 |     expansion = 4
 65 | 
 66 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 67 |         super(Bottleneck, self).__init__()
 68 |         self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=False)
 69 |         self.bn1 = nn.BatchNorm2d(planes)
 70 |         self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride,
 71 |                               padding=1, bias=False)
 72 |         self.bn2 = nn.BatchNorm2d(planes)
 73 |         self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=False)
 74 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 75 |         self.relu = nn.ReLU(inplace=True)
 76 |         self.downsample = downsample
 77 |         self.stride = stride
 78 | 
 79 |     def forward(self, x):
 80 |         residual = x
 81 | 
 82 |         out = self.conv1(x)
 83 |         out = self.bn1(out)
 84 |         out = self.relu(out)
 85 | 
 86 |         out = self.conv2(out)
 87 |         out = self.bn2(out)
 88 |         out = self.relu(out)
 89 | 
 90 |         out = self.conv3(out)
 91 |         out = self.bn3(out)
 92 | 
 93 |         if self.downsample is not None:
 94 |             residual = self.downsample(x)
 95 | 
 96 |         out += residual
 97 |         out = self.relu(out)
 98 | 
 99 |         return out
100 | 
101 | 
102 | class ResNet(nn.Module):
103 | 
104 |     def __init__(self, qblock, layers, num_classes=1000):
105 |         self.inplanes = 64
106 |         super(ResNet, self).__init__()
107 |         self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
108 |                               bias=False)
109 |         self.bn1 = nn.BatchNorm2d(64)
110 |         self.relu = nn.ReLU(inplace=True)
111 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
112 |         self.layer1 = self._make_layer(qblock, 64, layers[0])
113 |         self.layer2 = self._make_layer(qblock, 128, layers[1], stride=2)
114 |         self.layer3 = self._make_layer(qblock, 256, layers[2], stride=2)
115 |         self.layer4 = self._make_layer(qblock, 512, layers[3], stride=2)
116 |         self.avgpool = nn.AvgPool2d(7, stride=1)
117 |         self.fc = QWALinear(512 * qblock.expansion, num_classes)  # 修改
118 |         self.scalar = Scalar()  # 修改
119 | 
120 |         for m in self.modules():
121 |             if isinstance(m, nn.Conv2d):
122 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
123 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
124 |             elif isinstance(m, nn.BatchNorm2d):
125 |                 m.weight.data.fill_(1)
126 |                 m.bias.data.zero_()
127 | 
128 |     def _make_layer(self, block, planes, blocks, stride=1):
129 |         downsample = None
130 |         if stride != 1 or self.inplanes != planes * block.expansion:
131 |             downsample = nn.Sequential(
132 |                 QWAConv2D(self.inplanes, planes * block.expansion,
133 |                          kernel_size=1, stride=stride, bias=False),
134 |                 nn.BatchNorm2d(planes * block.expansion),
135 |             )
136 | 
137 |         layers = []
138 |         layers.append(block(self.inplanes, planes, stride, downsample))
139 |         self.inplanes = planes * block.expansion
140 |         for i in range(1, blocks):
141 |             layers.append(block(self.inplanes, planes))
142 | 
143 |         return nn.Sequential(*layers)
144 | 
145 |     def forward(self, x):
146 |         x = self.conv1(x)
147 |         x = self.bn1(x)
148 |         x = self.relu(x)
149 |         x = self.maxpool(x)
150 | 
151 |         x = self.layer1(x)
152 |         x = self.layer2(x)
153 |         x = self.layer3(x)
154 |         x = self.layer4(x)
155 | 
156 |         x = self.avgpool(x)
157 |         x = x.view(x.size(0), -1)
158 |         x = self.fc(x)
159 |         x = self.scalar(x)  # 修改
160 | 
161 |         return x
162 | 
163 | 
164 | def resnet18(pretrained=False, **kwargs):
165 |     """Constructs a ResNet-18 model.
166 | 
167 |     Args:
168 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
169 |     """
170 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
171 |     if pretrained:
172 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
173 |     return model
174 | 
175 | 
176 | def resnet34(pretrained=False, **kwargs):
177 |     """Constructs a ResNet-34 model.
178 | 
179 |     Args:
180 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
181 |     """
182 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
183 |     if pretrained:
184 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
185 |     return model
186 | 
187 | 
188 | def resnet50(pretrained=False, **kwargs):
189 |     """Constructs a ResNet-50 model.
190 | 
191 |     Args:
192 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
193 |     """
194 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
195 |     if pretrained:
196 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
197 |     return model
198 | 
199 | 
200 | def resnet101(pretrained=False, **kwargs):
201 |     """Constructs a ResNet-101 model.
202 | 
203 |     Args:
204 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
205 |     """
206 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
207 |     if pretrained:
208 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
209 |     return model
210 | 
211 | 
212 | def resnet152(pretrained=False, **kwargs):
213 |     """Constructs a ResNet-152 model.
214 | 
215 |     Args:
216 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
217 |     """
218 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
219 |     if pretrained:
220 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
221 |     return model
222 | 


--------------------------------------------------------------------------------
/net/net_quantize_guide.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import torch.nn as nn
  3 | import math
  4 | import torch.utils.model_zoo as model_zoo
  5 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar
  6 | from quantize.quantize_method import quantize_activations_gemm
  7 | 
  8 | """
  9 | guide 两个模型一起训练的两种思路,
 10 |     1. 将两个模型分别训练, 然后提取中间层的 feature map, 计算 distance
 11 |     2. 讲这两个模型写成一个网络, 一起训练, 加载参数时, 一起加载, 然后提取训练好的低精度模型的参数
 12 | """
 13 | 
 14 | """
 15 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用 QWACvon2D
 16 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升
 17 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化??
 18 | """
 19 | 
 20 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 21 |            'resnet152']
 22 | 
 23 | 
 24 | model_urls = {
 25 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 26 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 27 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 28 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 29 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 30 | }
 31 | 
 32 | 
 33 | def qconv3x3(in_planes, out_planes, stride=1):
 34 |     """3x3 convolution with padding"""
 35 |     return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride,
 36 |                      padding=1, bias=False)
 37 | 
 38 | 
 39 | def conv3x3(in_planes, out_planes, stride=1):
 40 |     """3x3 convolution with padding"""
 41 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 42 |                      padding=1, bias=False)
 43 | 
 44 | 
 45 | class ResNet(nn.Module):
 46 | 
 47 |     def __init__(self, block, layers, num_classes=1000):
 48 |         self.inplanes = 64
 49 |         super(ResNet, self).__init__()
 50 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
 51 |                                bias=False)
 52 |         self.bn1 = nn.BatchNorm2d(64)
 53 |         self.relu = nn.ReLU(inplace=True)
 54 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 55 |         self.layer1 = self._make_layer(block, 64, layers[0])
 56 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
 57 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
 58 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 59 |         self.avgpool = nn.AvgPool2d(7, stride=1)
 60 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
 61 | 
 62 |         for m in self.modules():
 63 |             if isinstance(m, nn.Conv2d):
 64 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 65 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 66 |             elif isinstance(m, nn.BatchNorm2d):
 67 |                 m.weight.data.fill_(1)
 68 |                 m.bias.data.zero_()
 69 | 
 70 |     def _make_layer(self, block, planes, blocks, stride=1):
 71 |         downsample = None
 72 |         if stride != 1 or self.inplanes != planes * block.expansion:
 73 |             downsample = nn.Sequential(
 74 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
 75 |                           kernel_size=1, stride=stride, bias=False),
 76 |                 nn.BatchNorm2d(planes * block.expansion),
 77 |             )
 78 | 
 79 |         layers = []
 80 |         layers.append(block(self.inplanes, planes, stride, downsample))
 81 |         self.inplanes = planes * block.expansion
 82 |         for i in range(1, blocks):
 83 |             layers.append(block(self.inplanes, planes))
 84 | 
 85 |         return nn.Sequential(*layers)
 86 | 
 87 |     def forward(self, x):
 88 |         x = self.conv1(x)
 89 |         x = self.bn1(x)
 90 |         x = self.relu(x)
 91 |         x = self.maxpool(x)
 92 | 
 93 |         x = self.layer1(x)
 94 |         x = self.layer2(x)
 95 |         x = self.layer3(x)
 96 |         x = self.layer4(x)
 97 | 
 98 |         x = self.avgpool(x)
 99 |         x = x.view(x.size(0), -1)
100 |         x = self.fc(x)
101 | 
102 |         return x
103 | 
104 | 
105 | class BasicBlock(nn.Module):
106 |     expansion = 1
107 | 
108 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
109 |         super(BasicBlock, self).__init__()
110 |         self.conv1 = conv3x3(inplanes, planes, stride)
111 |         self.bn1 = nn.BatchNorm2d(planes)
112 |         self.relu = nn.ReLU(inplace=True)
113 |         self.conv2 = conv3x3(planes, planes)
114 |         self.bn2 = nn.BatchNorm2d(planes)
115 |         self.downsample = downsample
116 |         self.stride = stride
117 | 
118 |     def forward(self, x):
119 |         residual = x
120 | 
121 |         out = self.conv1(x)
122 |         out = self.bn1(out)
123 |         out = self.relu(out)
124 | 
125 |         out = self.conv2(out)
126 |         out = self.bn2(out)
127 | 
128 |         if self.downsample is not None:
129 |             residual = self.downsample(x)
130 | 
131 |         out += residual
132 |         out = self.relu(out)
133 | 
134 |         return out
135 | 
136 | 
137 | class QBasicBlock(nn.Module):
138 |     expansion = 1
139 | 
140 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
141 |         super(QBasicBlock, self).__init__()
142 |         self.conv1 = qconv3x3(inplanes, planes, stride)
143 |         self.bn1 = nn.BatchNorm2d(planes)
144 |         self.relu = nn.ReLU(inplace=True)
145 |         self.conv2 = qconv3x3(planes, planes)
146 |         self.bn2 = nn.BatchNorm2d(planes)
147 |         self.downsample = downsample
148 |         self.stride = stride
149 | 
150 |     def forward(self, x):
151 |         residual = x
152 |         out = self.conv1(x)
153 |         out = self.bn1(out)
154 |         out = self.relu(out)
155 | 
156 |         out = self.conv2(out)
157 |         out = self.bn2(out)
158 | 
159 |         if self.downsample is not None:
160 |             residual = self.downsample(x)
161 | 
162 |         out += residual
163 |         out = self.relu(out)
164 | 
165 |         return out
166 | 
167 | 
168 | class Bottleneck(nn.Module):
169 |     expansion = 4
170 | 
171 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
172 |         super(Bottleneck, self).__init__()
173 |         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
174 |         self.bn1 = nn.BatchNorm2d(planes)
175 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
176 |                                padding=1, bias=False)
177 |         self.bn2 = nn.BatchNorm2d(planes)
178 |         self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
179 |         self.bn3 = nn.BatchNorm2d(planes * 4)
180 |         self.relu = nn.ReLU(inplace=True)
181 |         self.downsample = downsample
182 |         self.stride = stride
183 | 
184 |     def forward(self, x):
185 |         residual = x
186 | 
187 |         out = self.conv1(x)
188 |         out = self.bn1(out)
189 |         out = self.relu(out)
190 | 
191 |         out = self.conv2(out)
192 |         out = self.bn2(out)
193 |         out = self.relu(out)
194 | 
195 |         out = self.conv3(out)
196 |         out = self.bn3(out)
197 | 
198 |         if self.downsample is not None:
199 |             residual = self.downsample(x)
200 | 
201 |         out += residual
202 |         out = self.relu(out)
203 | 
204 |         return out
205 | 
206 | 
207 | class QBottleneck(nn.Module):
208 |     expansion = 4
209 | 
210 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
211 |         super(QBottleneck, self).__init__()
212 |         self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=False)
213 |         self.bn1 = nn.BatchNorm2d(planes)
214 |         self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride,
215 |                               padding=1, bias=False)
216 |         self.bn2 = nn.BatchNorm2d(planes)
217 |         self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=False)
218 |         self.bn3 = nn.BatchNorm2d(planes * 4)
219 |         self.relu = nn.ReLU(inplace=True)
220 |         self.downsample = downsample
221 |         self.stride = stride
222 | 
223 |     def forward(self, x):
224 |         residual = x
225 | 
226 |         out = self.conv1(x)
227 |         out = self.bn1(out)
228 |         out = self.relu(out)
229 | 
230 |         out = self.conv2(out)
231 |         out = self.bn2(out)
232 |         out = self.relu(out)
233 | 
234 |         out = self.conv3(out)
235 |         out = self.bn3(out)
236 | 
237 |         if self.downsample is not None:
238 |             residual = self.downsample(x)
239 | 
240 |         out += residual
241 |         out = self.relu(out)
242 | 
243 |         return out
244 | 
245 | 
246 | class ResNet(nn.Module):
247 | 
248 |     def __init__(self, qblock, block, layers, num_classes=1000):
249 |         self.inplanes = 64
250 |         super(ResNet, self).__init__()
251 |         self.qconv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
252 |                                bias=False)
253 |         self.qbn1 = nn.BatchNorm2d(64)
254 |         self.qrelu = nn.ReLU(inplace=True)
255 |         self.qmaxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
256 |         self.qlayer1 = self._qmake_layer(qblock, 64, layers[0])
257 |         self.qlayer2 = self._qmake_layer(qblock, 128, layers[1], stride=2)
258 |         self.qlayer3 = self._qmake_layer(qblock, 256, layers[2], stride=2)
259 |         self.qlayer4 = self._qmake_layer(qblock, 512, layers[3], stride=2)
260 |         self.qavgpool = nn.AvgPool2d(7, stride=1)
261 |         self.qfc = QWALinear(512 * qblock.expansion, num_classes)  # 修改
262 |         self.scalar = Scalar()  # 修改
263 | 
264 |         # 全精度的 module
265 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
266 |                                bias=False)
267 |         self.bn1 = nn.BatchNorm2d(64)
268 |         self.relu = nn.ReLU(inplace=True)
269 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
270 |         self.layer1 = self._make_layer(block, 64, layers[0])
271 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
272 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
273 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
274 |         self.avgpool = nn.AvgPool2d(7, stride=1)
275 |         self.fc = nn.Linear(512 * block.expansion, num_classes)
276 | 
277 |         for m in self.modules():
278 |             if isinstance(m, nn.Conv2d):
279 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
280 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
281 |             elif isinstance(m, nn.BatchNorm2d):
282 |                 m.weight.data.fill_(1)
283 |                 m.bias.data.zero_()
284 | 
285 |     def _qmake_layer(self, block, planes, blocks, stride=1):
286 |         downsample = None
287 |         if stride != 1 or self.inplanes != planes * block.expansion:
288 |             downsample = nn.Sequential(
289 |                 QWAConv2D(self.inplanes, planes * block.expansion,
290 |                          kernel_size=1, stride=stride, bias=False),
291 |                 nn.BatchNorm2d(planes * block.expansion),
292 |             )
293 | 
294 |         layers = []
295 |         layers.append(block(self.inplanes, planes, stride, downsample))
296 |         self.inplanes = planes * block.expansion
297 |         for i in range(1, blocks):
298 |             layers.append(block(self.inplanes, planes))
299 | 
300 |         return nn.Sequential(*layers)
301 | 
302 |     def _make_layer(self, block, planes, blocks, stride=1):
303 |         downsample = None
304 |         if stride != 1 or self.inplanes != planes * block.expansion:
305 |             downsample = nn.Sequential(
306 |                 nn.Conv2d(self.inplanes, planes * block.expansion,
307 |                           kernel_size=1, stride=stride, bias=False),
308 |                 nn.BatchNorm2d(planes * block.expansion),
309 |             )
310 | 
311 |         layers = []
312 |         layers.append(block(self.inplanes, planes, stride, downsample))
313 |         self.inplanes = planes * block.expansion
314 |         for i in range(1, blocks):
315 |             layers.append(block(self.inplanes, planes))
316 | 
317 |         return nn.Sequential(*layers)
318 | 
319 |     @staticmethod
320 |     def num_features(self, fm_shape):
321 |         num = 1
322 |         for dim in fm_shape:
323 |             num *= dim
324 |         return num
325 | 
326 |     def forward(self, x):
327 |         qx = x
328 |         qx = self.qconv1(qx)
329 |         qx = self.qbn1(qx)
330 |         qx = self.qrelu(qx)
331 |         qx = self.qmaxpool(qx)
332 | 
333 |         qx = self.qlayer1(qx)
334 |         qx = self.qlayer2(qx)
335 |         ql3 = self.qlayer3(qx)
336 |         ql4 = self.qlayer4(ql3)
337 | 
338 |         qx = self.qavgpool(ql4)
339 |         qx = x.view(qx.size(0), -1)
340 |         qx = self.qfc(qx)
341 |         qx = self.scalar(qx)  # 修改
342 | 
343 |         # 全进度模型
344 |         x = self.conv1(x)
345 |         x = self.bn1(x)
346 |         x = self.relu(x)
347 |         x = self.maxpool(x)
348 | 
349 |         x = self.layer1(x)
350 |         x = self.layer2(x)
351 |         l3 = self.layer3(x)
352 |         l4 = self.layer4(l3)
353 | 
354 |         x = self.avgpool(l4)
355 |         x = x.view(x.size(0), -1)
356 |         x = self.fc(x)
357 | 
358 |         pair_distance = nn.PairwiseDistance(p=1)
359 |         distance = pair_distance(quantize_activations_gemm(ql3), quantize_activations_gemm(l3)) / self.num_features(l3.size()) \
360 |                    + pair_distance(quantize_activations_gemm(ql4), quantize_activations_gemm(l4) / self.num_features(l4.size()))
361 | 
362 |         return qx, x, distance
363 | 
364 | 
365 | def resnet18(pretrained=False, **kwargs):
366 |     """Constructs a ResNet-18 model.
367 | 
368 |     Args:
369 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
370 |     """
371 |     model = ResNet(QBasicBlock, BasicBlock, [2, 2, 2, 2], **kwargs)
372 |     if pretrained:
373 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
374 |     return model
375 | 
376 | 
377 | def resnet34(pretrained=False, **kwargs):
378 |     """Constructs a ResNet-34 model.
379 | 
380 |     Args:
381 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
382 |     """
383 |     model = ResNet(QBasicBlock, [3, 4, 6, 3], **kwargs)
384 |     if pretrained:
385 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
386 |     return model
387 | 
388 | 
389 | def resnet50(pretrained=False, **kwargs):
390 |     """Constructs a ResNet-50 model.
391 | 
392 |     Args:
393 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
394 |     """
395 |     model = ResNet(QBottleneck, [3, 4, 6, 3], **kwargs)
396 |     if pretrained:
397 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
398 |     return model
399 | 
400 | 
401 | def resnet101(pretrained=False, **kwargs):
402 |     """Constructs a ResNet-101 model.
403 | 
404 |     Args:
405 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
406 |     """
407 |     model = ResNet(QBottleneck, [3, 4, 23, 3], **kwargs)
408 |     if pretrained:
409 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
410 |     return model
411 | 
412 | 
413 | def resnet152(pretrained=False, **kwargs):
414 |     """Constructs a ResNet-152 model.
415 | 
416 |     Args:
417 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
418 |     """
419 |     model = ResNet(QBottleneck, [3, 8, 36, 3], **kwargs)
420 |     if pretrained:
421 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
422 |     return model
423 | 


--------------------------------------------------------------------------------
/net/net_quantize_weight.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | """
  3 | 线性层全部使用 QWLinear
  4 | """
  5 | import torch.nn as nn
  6 | import math
  7 | import torch.utils.model_zoo as model_zoo
  8 | from quantize.quantize_module_ import QWConv2D, Scalar, QWLinear
  9 | 
 10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 11 |            'resnet152']
 12 | 
 13 | 
 14 | model_urls = {
 15 |     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
 16 |     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
 17 |     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
 18 |     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
 19 |     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
 20 | }
 21 | 
 22 | 
 23 | def conv3x3(in_planes, out_planes, stride=1):
 24 |     """3x3 convolution with padding"""
 25 |     return QWConv2D(in_planes, out_planes, kernel_size=3, stride=stride,
 26 |                     padding=1, bias=False)
 27 | 
 28 | 
 29 | class BasicBlock(nn.Module):
 30 |     expansion = 1
 31 | 
 32 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 33 |         super(BasicBlock, self).__init__()
 34 |         self.conv1 = conv3x3(inplanes, planes, stride)
 35 |         self.bn1 = nn.BatchNorm2d(planes)
 36 |         self.relu = nn.ReLU(inplace=True)
 37 |         self.conv2 = conv3x3(planes, planes)
 38 |         self.bn2 = nn.BatchNorm2d(planes)
 39 |         self.downsample = downsample
 40 |         self.stride = stride
 41 | 
 42 |     def forward(self, x):
 43 |         residual = x
 44 | 
 45 |         out = self.conv1(x)
 46 |         out = self.bn1(out)
 47 |         out = self.relu(out)
 48 | 
 49 |         out = self.conv2(out)
 50 |         out = self.bn2(out)
 51 | 
 52 |         if self.downsample is not None:
 53 |             residual = self.downsample(x)
 54 | 
 55 |         out += residual
 56 |         out = self.relu(out)
 57 | 
 58 |         return out
 59 | 
 60 | 
 61 | class Bottleneck(nn.Module):
 62 |     expansion = 4
 63 | 
 64 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 65 |         super(Bottleneck, self).__init__()
 66 |         self.conv1 = QWConv2D(inplanes, planes, kernel_size=1, bias=False)
 67 |         self.bn1 = nn.BatchNorm2d(planes)
 68 |         self.conv2 = QWConv2D(planes, planes, kernel_size=3, stride=stride,
 69 |                               padding=1, bias=False)
 70 |         self.bn2 = nn.BatchNorm2d(planes)
 71 |         self.conv3 = QWConv2D(planes, planes * 4, kernel_size=1, bias=False)
 72 |         self.bn3 = nn.BatchNorm2d(planes * 4)
 73 |         self.relu = nn.ReLU(inplace=True)
 74 |         self.downsample = downsample
 75 |         self.stride = stride
 76 | 
 77 |     def forward(self, x):
 78 |         residual = x
 79 | 
 80 |         out = self.conv1(x)
 81 |         out = self.bn1(out)
 82 |         out = self.relu(out)
 83 | 
 84 |         out = self.conv2(out)
 85 |         out = self.bn2(out)
 86 |         out = self.relu(out)
 87 | 
 88 |         out = self.conv3(out)
 89 |         out = self.bn3(out)
 90 | 
 91 |         if self.downsample is not None:
 92 |             residual = self.downsample(x)
 93 | 
 94 |         out += residual
 95 |         out = self.relu(out)
 96 | 
 97 |         return out
 98 | 
 99 | 
100 | class ResNet(nn.Module):
101 | 
102 |     def __init__(self, block, layers, num_classes=1000):
103 |         self.inplanes = 64
104 |         super(ResNet, self).__init__()
105 |         self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
106 |                               bias=False)
107 |         self.bn1 = nn.BatchNorm2d(64)
108 |         self.relu = nn.ReLU(inplace=True)
109 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
110 |         self.layer1 = self._make_layer(block, 64, layers[0])
111 |         self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
112 |         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
113 |         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
114 |         self.avgpool = nn.AvgPool2d(7, stride=1)
115 |         self.fc = QWLinear(512 * block.expansion, num_classes)
116 |         self.scalar = Scalar()
117 | 
118 |         for m in self.modules():
119 |             if isinstance(m, QWConv2D):
120 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
121 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
122 |             elif isinstance(m, nn.BatchNorm2d):
123 |                 m.weight.data.fill_(1)
124 |                 m.bias.data.zero_()
125 | 
126 |     def _make_layer(self, block, planes, blocks, stride=1):
127 |         downsample = None
128 |         if stride != 1 or self.inplanes != planes * block.expansion:
129 |             downsample = nn.Sequential(
130 |                 QWConv2D(self.inplanes, planes * block.expansion,
131 |                          kernel_size=1, stride=stride, bias=False),
132 |                 nn.BatchNorm2d(planes * block.expansion),
133 |             )
134 | 
135 |         layers = []
136 |         layers.append(block(self.inplanes, planes, stride, downsample))
137 |         self.inplanes = planes * block.expansion
138 |         for i in range(1, blocks):
139 |             layers.append(block(self.inplanes, planes))
140 | 
141 |         return nn.Sequential(*layers)
142 | 
143 |     def forward(self, x):
144 |         x = self.conv1(x)
145 |         x = self.bn1(x)
146 |         x = self.relu(x)
147 |         x = self.maxpool(x)
148 | 
149 |         x = self.layer1(x)
150 |         x = self.layer2(x)
151 |         x = self.layer3(x)
152 |         x = self.layer4(x)
153 | 
154 |         x = self.avgpool(x)
155 |         x = x.view(x.size(0), -1)
156 |         x = self.fc(x)
157 |         x = self.scalar(x)
158 | 
159 |         return x
160 | 
161 | 
162 | def resnet18(pretrained=False, **kwargs):
163 |     """Constructs a ResNet-18 model.
164 | 
165 |     Args:
166 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
167 |     """
168 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
169 |     if pretrained:
170 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
171 |     return model
172 | 
173 | 
174 | def resnet34(pretrained=False, **kwargs):
175 |     """Constructs a ResNet-34 model.
176 | 
177 |     Args:
178 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
179 |     """
180 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
181 |     if pretrained:
182 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
183 |     return model
184 | 
185 | 
186 | def resnet50(pretrained=False, **kwargs):
187 |     """Constructs a ResNet-50 model.
188 | 
189 |     Args:
190 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
191 |     """
192 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
193 |     if pretrained:
194 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
195 |     return model
196 | 
197 | 
198 | def resnet101(pretrained=False, **kwargs):
199 |     """Constructs a ResNet-101 model.
200 | 
201 |     Args:
202 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
203 |     """
204 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
205 |     if pretrained:
206 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
207 |     return model
208 | 
209 | 
210 | def resnet152(pretrained=False, **kwargs):
211 |     """Constructs a ResNet-152 model.
212 | 
213 |     Args:
214 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
215 |     """
216 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
217 |     if pretrained:
218 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
219 |     return model
220 | 


--------------------------------------------------------------------------------
/net/simple_net.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class Net(nn.Module):
 7 | 
 8 |     def __init__(self):
 9 |         super(Net, self).__init__()
10 |         # 1 input image channel, 6 output channels, 5x5 square convolution
11 |         # kernel
12 |         self.conv1 = nn.Conv2d(1, 1, kernel_size=3, padding=0, stride=1, bias=False)
13 |         self.conv2 = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=1, bias=False)
14 |         # an affine operation: y = Wx + b
15 |         self.fc1 = nn.Linear(16, 2)
16 |         self.relu = nn.ReLU(inplace=True)
17 | 
18 |     def forward(self, x):
19 |         x = self.conv1(x)
20 |         x = self.relu(x)
21 |         x = self.conv2(x)
22 |         x = self.relu(x)
23 |         size = x.size()[1:]  # all dimensions except the batch dimension
24 |         num_features = 1
25 |         for s in size:
26 |             num_features *= s
27 |         x = x.view(-1, num_features)
28 |         x = self.fc1(x)
29 |         return x
30 | 
31 | 
32 | if __name__ == "__main__":
33 |     net = Net()
34 |     print(net)
35 | 


--------------------------------------------------------------------------------
/quantize/guided_distance_view.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.parallel
  6 | import torch.backends.cudnn as cudnn
  7 | import torch.optim
  8 | import torch.utils.data
  9 | import torch.utils.data.distributed
 10 | import torchvision.models as models
 11 | from tensorboardX import SummaryWriter
 12 | from collections import defaultdict
 13 | import time
 14 | 
 15 | from utils.train_val import save_checkpoint, validate
 16 | from utils.data_loader import load_train_data, load_val_data
 17 | from utils.meter import AverageMeter, accuracy
 18 | from quantize.quantize_method import quantize_activations_gemm
 19 | from net import net_quantize_activation
 20 | 
 21 | 
 22 | def guided(args):
 23 |     best_low_prec1 = 0
 24 |     full_prec_feature_map1 = defaultdict(torch.Tensor)
 25 |     full_prec_feature_map2 = defaultdict(torch.Tensor)
 26 |     low_prec_feature_map1 = defaultdict(torch.Tensor)
 27 |     low_prec_feature_map2 = defaultdict(torch.Tensor)
 28 | 
 29 |     def full_prec_hook(module, input, output):
 30 |         # 一定要写成 input[0].data.clone()
 31 |         # 而不能写成 input[0].clone(), 否则报错
 32 |         # RuntimeError: Trying to backward through the graph a second time,
 33 |         # but the buffers have already been freed. Specify retain_graph=True
 34 |         # when calling backward the first time
 35 |         cudaid = int(repr(output.device)[-2])
 36 |         full_prec_feature_map1[cudaid] = input[0].data.clone()
 37 |         full_prec_feature_map2[cudaid] = output.data.clone()
 38 | 
 39 |     def low_prec_hook(module, input, output):
 40 |         cudaid = int(repr(output.device)[-2])
 41 |         low_prec_feature_map1[cudaid] = input[0].data.clone()
 42 |         low_prec_feature_map2[cudaid] = output.data.clone()
 43 | 
 44 |     def gpu_config(model):
 45 |         if args.gpu is not None:  # 指定GPU
 46 |             model = model.cuda(args.gpu)
 47 |         elif args.distributed:  # 集群训练（多机器）
 48 |             model.cuda()
 49 |             model = torch.nn.parallel.DistributedDataParallel(model)
 50 | 
 51 |         else:  # 单机训练（单卡或者多卡）
 52 |             if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
 53 |                 model.features = torch.nn.DataParallel(model.features)
 54 |                 model.cuda()
 55 |             else:
 56 |                 # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids
 57 |                 model = torch.nn.DataParallel(model, args.device_ids).cuda()
 58 |         return model
 59 | 
 60 |     def guided_train(summary_writer, log_per_epoch=100, print_freq=20):
 61 | 
 62 |         batch_time = AverageMeter()
 63 |         data_time = AverageMeter()
 64 | 
 65 |         low_prec_losses = AverageMeter()
 66 |         low_prec_top1 = AverageMeter()
 67 |         low_prec_top5 = AverageMeter()
 68 |         distance_meter = AverageMeter()
 69 | 
 70 |         # 状态转化为训练
 71 |         low_prec_model.train()
 72 |         full_prec_model.eval()
 73 | 
 74 |         end = time.time()
 75 | 
 76 |         # 用于控制 tensorboard 的显示频率
 77 |         interval = len(train_loader) // log_per_epoch
 78 |         summary_point = [interval * split for split in torch.arange(log_per_epoch)]
 79 | 
 80 |         for i, (input, target) in enumerate(train_loader):
 81 |             # measure checkpoint.pth data loading time
 82 |             data_time.update(time.time() - end)
 83 | 
 84 |             if args.gpu is not None:
 85 |                 input = input.cuda(args.gpu, non_blocking=True)
 86 | 
 87 |             # target 必须要转为 cuda 类型
 88 |             # If ``True`` and the source is in pinned memory(固定内存),
 89 |             # the copy will be asynchronous(异步) with respect to the host
 90 |             target = target.cuda(args.gpu, non_blocking=True)
 91 | 
 92 |             full_prec_feature_map1.clear()
 93 |             low_prec_feature_map1.clear()
 94 |             full_prec_feature_map2.clear()
 95 |             low_prec_feature_map2.clear()
 96 | 
 97 |             # compute low_pre_output
 98 |             low_pre_output = low_prec_model(input)
 99 |             full_pre_output = full_prec_model(input)
100 | 
101 |             """Guided Key Point start"""
102 | 
103 |             # 将 distance 和 feature map放在同一个一gpu上
104 |             distance = torch.tensor([0.0]).cuda(args.gpu, non_blocking=True)
105 |             num_layer3_features = 1
106 |             for dim in full_prec_feature_map1[0].size():
107 |                 num_layer3_features *= dim
108 | 
109 |             num_layer4_features = 1
110 |             for dim in full_prec_feature_map2[0].size():
111 |                 num_layer4_features *= dim
112 | 
113 |             for cudaid in full_prec_feature_map1:
114 |                 # 手动将feature map都搬到同一个 GPU 上
115 |                 full_prec_feature_map1[cudaid] = full_prec_feature_map1[cudaid].cuda(args.gpu, non_blocking=True)
116 |                 low_prec_feature_map1[cudaid] = low_prec_feature_map1[cudaid].cuda(args.gpu, non_blocking=True)
117 |                 full_prec_feature_map2[cudaid] = full_prec_feature_map2[cudaid].cuda(args.gpu, non_blocking=True)
118 |                 low_prec_feature_map2[cudaid] = low_prec_feature_map2[cudaid].cuda(args.gpu, non_blocking=True)
119 | 
120 |             for cudaid in low_prec_feature_map1:
121 |                 """
122 |                 RuntimeError: arguments are located on different GPUs
123 |                 解决方法在于手动将feature map都搬到同一个 GPU 上
124 |                 """
125 |                 layer3 = (quantize_activations_gemm(low_prec_feature_map1[cudaid]) -
126 |                           quantize_activations_gemm(full_prec_feature_map1[cudaid])).norm(p=args.norm) / num_layer3_features
127 |                 layer4 = (quantize_activations_gemm(low_prec_feature_map2[cudaid]) -
128 |                           quantize_activations_gemm(full_prec_feature_map2[cudaid])).norm(p=args.norm) / num_layer4_features
129 |                 distance += (layer3 + layer4) / len(low_prec_feature_map1)
130 | 
131 |             distance *= args.balance
132 | 
133 |             """Guided Key Point end"""
134 | 
135 |             low_prec_loss = criterion(low_pre_output, target)
136 |             low_prec_prec1, low_prec_prec5 = accuracy(low_pre_output, target, topk=(1, 5))
137 | 
138 |             low_prec_losses.update(low_prec_loss.item(), input.size(0))
139 |             low_prec_top1.update(low_prec_prec1[0], input.size(0))
140 |             low_prec_top5.update(low_prec_prec5[0], input.size(0))
141 |             distance_meter.update(distance[0], 1)
142 | 
143 |             # compute gradient and do SGD step
144 |             low_prec_optimizer.zero_grad()
145 |             low_prec_loss.backward()
146 |             low_prec_optimizer.step()
147 | 
148 |             # measure elapsed time
149 |             batch_time.update(time.time() - end)
150 |             end = time.time()
151 | 
152 |             if i % print_freq == 0:
153 | 
154 |                 print('Epoch: [{0}][{1}/{2}]\t'
155 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
156 |                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
157 |                       'Loss {low_prec_loss.val:.4f} ({low_prec_loss.avg:.4f})\t'
158 |                       'Prec@1 {low_prec_top1.val:.3f} ({low_prec_top1.avg:.3f})\t'
159 |                       'Prec@5 {low_prec_top5.val:.3f} ({low_prec_top5.avg:.3f}) \t'
160 |                       'distance {distance.val:.3f} ({distance.avg:.3f})'.format(
161 |                         epoch, i, len(train_loader), batch_time=batch_time,
162 |                         data_time=data_time, low_prec_loss=low_prec_losses, low_prec_top1=low_prec_top1,
163 |                         low_prec_top5=low_prec_top5, distance=distance_meter))
164 | 
165 |             if summary_writer is not None and (i in summary_point):
166 |                 step = i / interval + (epoch - 1) * log_per_epoch
167 |                 summary_writer.add_scalar("distance", distance_meter.avg, step)
168 |                 summary_writer.add_scalar("loss/low_prec_loss", low_prec_loss, step)
169 |                 summary_writer.add_scalar("train_low_prec/top-1", low_prec_top1.avg, step)
170 |                 summary_writer.add_scalar("train_low_prec/top-5", low_prec_top5.avg, step)
171 | 
172 |     # 代码用于使用预训练的ResNet18来同时量化网络权重和激活
173 |     print("=> using imageNet pre-trained model '{}'".format(args.arch))
174 |     # 获取预训练模型参数
175 |     full_prec_model = models.__dict__[args.arch](pretrained=True)
176 |     low_prec_model = net_quantize_activation.__dict__[args.arch]()
177 | 
178 |     model_dict = low_prec_model.state_dict()
179 |     imagenet_dict = full_prec_model.state_dict()
180 |     model_dict.update(imagenet_dict)
181 |     low_prec_model.load_state_dict(model_dict)
182 | 
183 |     low_prec_layer4 = low_prec_model._modules.get("layer4")
184 |     full_prec_layer4 = full_prec_model._modules.get("layer4")
185 | 
186 |     hook_low_prec = low_prec_layer4.register_forward_hook(low_prec_hook)
187 |     hook_full_prec = full_prec_layer4.register_forward_hook(full_prec_hook)
188 | 
189 |     low_prec_model = gpu_config(low_prec_model)
190 |     full_prec_model = gpu_config(full_prec_model)
191 | 
192 |     # 定义损失函数和优化器
193 |     criterion = nn.CrossEntropyLoss().cuda(args.gpu)
194 |     low_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(),
195 |                                          args.lr,
196 |                                          momentum=args.momentum,
197 |                                          weight_decay=args.weight_decay)
198 | 
199 |     low_prec_scheduler = torch.optim.lr_scheduler.StepLR(low_prec_optimizer, step_size=args.lr_step, gamma=0.1)
200 | 
201 |     cudnn.benchmark = True
202 | 
203 |     val_loader = load_val_data(args.data, args.batch_size, args.workers)
204 |     train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed)
205 | 
206 |     # 加载日志 writer
207 |     writer = SummaryWriter(args.save_dir)
208 | 
209 |     for epoch in range(args.start_epoch, args.epochs+1):
210 |         if args.distributed:
211 |             train_sampler.set_epoch(epoch)
212 | 
213 |         low_prec_scheduler.step()
214 | 
215 |         # train for one epoch
216 |         guided_train(writer)
217 | 
218 |         # evaluate on validation set
219 |         low_prec1 = validate(low_prec_model, val_loader, criterion, args.gpu,
220 |                              epoch, writer, name_prefix='low_prec')
221 | 
222 |         # remember best prec@1 and save low_prec_checkpoint
223 |         is_best_low = low_prec1 > best_low_prec1
224 | 
225 |         best_low_prec1 = max(low_prec1, best_low_prec1)
226 | 
227 |         save_checkpoint({
228 |             'epoch': epoch + 1,
229 |             'arch': args.arch,
230 |             'state_dict': low_prec_model.state_dict(),
231 |             'best_prec1': best_low_prec1,
232 |             'optimizer': low_prec_optimizer.state_dict(),
233 |         }, is_best_low, args.save_dir, name_prefix="low_prec")
234 | 
235 |     # 关闭日志 writer
236 |     writer.close()
237 | 
238 |     # 去掉钩子
239 | 
240 |     hook_full_prec.remove()
241 |     hook_low_prec.remove()
242 | 
243 | 


--------------------------------------------------------------------------------
/quantize/quantize_guided.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.parallel
  6 | import torch.backends.cudnn as cudnn
  7 | import torch.optim
  8 | import torch.utils.data
  9 | import torch.utils.data.distributed
 10 | import torchvision.models as models
 11 | from tensorboardX import SummaryWriter
 12 | from collections import defaultdict
 13 | import time
 14 | import warnings
 15 | 
 16 | from utils.train_val import save_checkpoint, validate
 17 | from utils.data_loader import load_train_data, load_val_data
 18 | from utils.meter import AverageMeter, accuracy
 19 | from quantize.quantize_method import quantize_activations_gemm
 20 | from net import net_quantize_activation
 21 | 
 22 | 
 23 | def guided(args):
 24 |     best_low_prec1 = 0
 25 |     best_full_prec1 = 0
 26 |     full_prec_feature_map1 = defaultdict(torch.Tensor)
 27 |     full_prec_feature_map2 = defaultdict(torch.Tensor)
 28 |     low_prec_feature_map1 = defaultdict(torch.Tensor)
 29 |     low_prec_feature_map2 = defaultdict(torch.Tensor)
 30 | 
 31 |     def full_prec_hook(module, input, output):
 32 |         # 一定要写成 input[0]
 33 |         # 而不能写成 input[0].data.clone(), 否则没法加入反向传播
 34 | 
 35 |         # 而使用直接使用 input[0] 也会有问题, 如下
 36 |         # RuntimeError: Trying to backward through the graph a second time,
 37 |         # but the buffers have already been freed. Specify retain_graph=True
 38 |         # when calling backward the first time
 39 |         # 即 distance_loss 同时参与高精度和低精度的反向传播, 比如先通过低精度的反向传播之后
 40 |         # 该 distance_loss 的计算图被释放, 然后第二次使用的时候, 找不到对应的计算图和相应的参数
 41 |         cudaid = int(repr(output.device)[-2])
 42 |         full_prec_feature_map1[cudaid] = input[0]
 43 |         full_prec_feature_map2[cudaid] = output
 44 | 
 45 |     def low_prec_hook(module, input, output):
 46 |         cudaid = int(repr(output.device)[-2])
 47 |         low_prec_feature_map1[cudaid] = input[0]
 48 |         low_prec_feature_map2[cudaid] = output
 49 | 
 50 |     def gpu_config(model):
 51 |         if args.gpu is not None:  # 指定GPU
 52 |             model = model.cuda(args.gpu)
 53 |         elif args.distributed:  # 集群训练（多机器）
 54 |             model.cuda()
 55 |             model = torch.nn.parallel.DistributedDataParallel(model)
 56 | 
 57 |         else:  # 单机训练（单卡或者多卡）
 58 |             if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
 59 |                 model.features = torch.nn.DataParallel(model.features)
 60 |                 model.cuda()
 61 |             else:
 62 |                 # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids
 63 |                 model = torch.nn.DataParallel(model, args.device_ids).cuda()
 64 |         return model
 65 | 
 66 |     def guided_train(summary_writer, log_per_epoch=100, print_freq=20):
 67 | 
 68 |         batch_time = AverageMeter()
 69 |         data_time = AverageMeter()
 70 | 
 71 |         low_prec_losses = AverageMeter()
 72 |         low_prec_top1 = AverageMeter()
 73 |         low_prec_top5 = AverageMeter()
 74 | 
 75 |         full_prec_losses = AverageMeter()
 76 |         full_prec_top1 = AverageMeter()
 77 |         full_prec_top5 = AverageMeter()
 78 |         distance_meter = AverageMeter()
 79 | 
 80 |         # 状态转化为训练
 81 |         low_prec_model.train()
 82 |         full_prec_model.train()
 83 | 
 84 |         end = time.time()
 85 | 
 86 |         # 用于控制 tensorboard 的显示频率
 87 |         interval = len(train_loader) // log_per_epoch
 88 |         summary_point = [interval * split for split in torch.arange(log_per_epoch)]
 89 | 
 90 |         for i, (input, target) in enumerate(train_loader):
 91 |             # measure checkpoint.pth data loading time
 92 |             data_time.update(time.time() - end)
 93 | 
 94 |             if args.gpu is not None:
 95 |                 input = input.cuda(args.gpu, non_blocking=True)
 96 | 
 97 |             # target 必须要转为 cuda 类型
 98 |             # If ``True`` and the source is in pinned memory(固定内存),
 99 |             # the copy will be asynchronous(异步) with respect to the host
100 |             target = target.cuda(args.gpu, non_blocking=True)
101 | 
102 |             full_prec_feature_map1.clear()
103 |             low_prec_feature_map1.clear()
104 |             full_prec_feature_map2.clear()
105 |             low_prec_feature_map2.clear()
106 | 
107 |             # compute low_pre_output
108 |             low_pre_output = low_prec_model(input)
109 |             full_pre_output = full_prec_model(input)
110 | 
111 |             """Guided Key Point start"""
112 | 
113 |             # 将 distance 和 feature map放在同一个一gpu上
114 |             distance = torch.tensor([0.0]).cuda(args.gpu, non_blocking=True)
115 |             num_layer3_features = 1
116 |             for dim in full_prec_feature_map1[0].size():
117 |                 num_layer3_features *= dim
118 | 
119 |             num_layer4_features = 1
120 |             for dim in full_prec_feature_map2[0].size():
121 |                 num_layer4_features *= dim
122 | 
123 |             for cudaid in low_prec_feature_map1:
124 | 
125 |                 layer3 = (quantize_activations_gemm(low_prec_feature_map1[cudaid]) -
126 |                           quantize_activations_gemm(full_prec_feature_map1[cudaid])).norm(p=args.norm) / num_layer3_features
127 |                 layer4 = (quantize_activations_gemm(low_prec_feature_map2[cudaid]) -
128 |                           quantize_activations_gemm(full_prec_feature_map2[cudaid])).norm(p=args.norm) / num_layer4_features
129 |                 # RuntimeError: arguments are located on different GPUs
130 |                 # 解决方法在于手动将 feature map 都搬到同一个GPU, Tensor.cuda(args.gpu, non_blocking=True)
131 |                 distance += (layer3 + layer4).cuda(args.gpu, non_blocking=True) / len(low_prec_feature_map1)
132 | 
133 |             distance *= args.balance
134 |             low_prec_loss = criterion(low_pre_output, target) + distance
135 |             full_prec_loss = criterion(full_pre_output, target) + distance
136 | 
137 |             low_prec_prec1, low_prec_prec5 = accuracy(low_pre_output, target, topk=(1, 5))
138 |             full_prec_prec1, full_prec_prec5 = accuracy(full_pre_output, target, topk=(1, 5))
139 | 
140 |             low_prec_losses.update(low_prec_loss.item(), input.size(0))
141 |             low_prec_top1.update(low_prec_prec1[0], input.size(0))
142 |             low_prec_top5.update(low_prec_prec5[0], input.size(0))
143 | 
144 |             full_prec_losses.update(full_prec_loss.item(), input.size(0))
145 |             full_prec_top1.update(full_prec_prec1[0], input.size(0))
146 |             full_prec_top5.update(full_prec_prec5[0], input.size(0))
147 |             distance_meter.update(distance[0], 1)
148 | 
149 |             # compute gradient and do SGD step
150 |             low_prec_optimizer.zero_grad()
151 |             full_prec_optimizer.zero_grad()
152 | 
153 |             low_prec_loss.backward()  # retain_graph=True
154 |             # full_prec_loss.backward()
155 | 
156 |             # 第五步, 使用更新的梯度更新权重
157 |             low_prec_optimizer.step()
158 |             full_prec_optimizer.step()
159 | 
160 |             # measure elapsed time
161 |             batch_time.update(time.time() - end)
162 |             end = time.time()
163 | 
164 |             if i % print_freq == 0:
165 | 
166 |                 print('Epoch: [{0}][{1}/{2}]\t'
167 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
168 |                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
169 |                       'Loss {low_prec_loss.val:.4f} ({low_prec_loss.avg:.4f})\t'
170 |                       'Prec@1 {low_prec_top1.val:.3f} ({low_prec_top1.avg:.3f})\t'
171 |                       'Prec@5 {low_prec_top5.val:.3f} ({low_prec_top5.avg:.3f}) \t'
172 |                       'distance {distance.val:.3f} ({distance.avg:.3f})'.format(
173 |                         epoch, i, len(train_loader), batch_time=batch_time,
174 |                         data_time=data_time, low_prec_loss=low_prec_losses, low_prec_top1=low_prec_top1,
175 |                         low_prec_top5=low_prec_top5, distance=distance_meter))
176 | 
177 |                 print('Epoch: [{0}][{1}/{2}]\t'
178 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
179 |                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
180 |                       'Loss {full_prec_loss.val:.4f} ({full_prec_loss.avg:.4f})\t'
181 |                       'Prec@1 {full_prec_top1.val:.3f} ({full_prec_top1.avg:.3f})\t'
182 |                       'Prec@5 {full_prec_top5.val:.3f} ({full_prec_top5.avg:.3f})'.format(
183 |                         epoch, i, len(train_loader), batch_time=batch_time,
184 |                         data_time=data_time, full_prec_loss=full_prec_losses, full_prec_top1=full_prec_top1,
185 |                         full_prec_top5=full_prec_top5))
186 | 
187 |             if summary_writer is not None and (i in summary_point):
188 |                 step = i / interval + (epoch - 1) * log_per_epoch
189 |                 summary_writer.add_scalar("distance", distance_meter.avg, step)
190 |                 summary_writer.add_scalar("loss/low_prec_loss", low_prec_loss, step)
191 |                 summary_writer.add_scalar("train_low_prec/top-1", low_prec_top1.avg, step)
192 |                 summary_writer.add_scalar("train_low_prec/top-5", low_prec_top5.avg, step)
193 | 
194 |                 summary_writer.add_scalar("loss/full_prec_loss", full_prec_loss, step)
195 |                 summary_writer.add_scalar("train_full_prec/top-1", full_prec_top1.avg, step)
196 |                 summary_writer.add_scalar("train_full_prec/top-5", full_prec_top5.avg, step)
197 | 
198 |     if args.weight_quantized:
199 |         print("=> using quantize_tanh-weight model '{}'".format(args.arch))
200 |         full_prec_model = models.__dict__[args.arch](pretrained=True)
201 |         low_prec_model = net_quantize_activation.__dict__[args.arch]()
202 |         if os.path.isfile(args.weight_quantized):
203 |             print("=> loading weight_quantized model '{}'".format(args.weight_quantized))
204 |             model_dict = low_prec_model.state_dict()
205 |             quantized_model = torch.load(args.weight_quantized)
206 |             pretrained__dict = {k[7:]: v for k, v in quantized_model['state_dict'].items()
207 |                                 if k in low_prec_model.state_dict()}
208 |             model_dict.update(pretrained__dict)
209 |             low_prec_model.load_state_dict(model_dict)
210 |             print("=> loaded weight_quantized '{}'".format(args.weight_quantized))
211 |         else:
212 |             print("=> no  quantize_tanh-weight model found at '{}'".format(args.weight_quantized))
213 |     else:
214 |         # 代码用于使用预训练的ResNet18来同时量化网络权重和激活
215 |         print("=> using imageNet pre-trained model '{}'".format(args.arch))
216 |         # 获取预训练模型参数
217 |         full_prec_model = models.__dict__[args.arch](pretrained=True)
218 |         low_prec_model = net_quantize_activation.__dict__[args.arch]()
219 | 
220 |         model_dict = low_prec_model.state_dict()
221 |         imagenet_dict = full_prec_model.state_dict()
222 |         model_dict.update(imagenet_dict)
223 |         low_prec_model.load_state_dict(model_dict)
224 | 
225 |     if not args.evaluate:
226 |         low_prec_layer4 = low_prec_model._modules.get("layer4")
227 |         full_prec_layer4 = full_prec_model._modules.get("layer4")
228 | 
229 |         hook_low_prec = low_prec_layer4.register_forward_hook(low_prec_hook)
230 |         hook_full_prec = full_prec_layer4.register_forward_hook(full_prec_hook)
231 | 
232 |     low_prec_model = gpu_config(low_prec_model)
233 |     full_prec_model = gpu_config(full_prec_model)
234 | 
235 |     # 定义损失函数和优化器
236 |     criterion = nn.CrossEntropyLoss().cuda(args.gpu)
237 |     low_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(),
238 |                                          args.lr,
239 |                                          momentum=args.momentum,
240 |                                          weight_decay=args.weight_decay)
241 |     full_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(),
242 |                                           args.lr * args.rate,
243 |                                           momentum=args.momentum,
244 |                                           weight_decay=args.weight_decay)
245 | 
246 |     # 调整学习率
247 |     full_prec_scheduler = torch.optim.lr_scheduler.StepLR(full_prec_optimizer, step_size=args.lr_step, gamma=0.1)
248 |     low_prec_scheduler = torch.optim.lr_scheduler.StepLR(low_prec_optimizer, step_size=args.lr_step, gamma=0.1)
249 | 
250 |     # optionally resume from a checkpoint
251 |     if args.resume:
252 |         full_prec_resume = os.path.join(args.save_dir, "full_prec-checkpoint.pth.tar")
253 |         low_prec_resume = os.path.join(args.save_dir, "low_prec-checkpoint.pth.tar")
254 |         if os.path.isfile(full_prec_resume) and os.path.isfile(low_prec_resume):
255 |             print("=> loading low_prec_checkpoint from '{}' and '{}'".format(full_prec_resume,
256 |                                                                              low_prec_resume))
257 |             full_prec_checkpoint = torch.load(full_prec_resume)
258 |             low_prec_checkpoint = torch.load(low_prec_resume)
259 | 
260 |             args.start_epoch = low_prec_checkpoint['epoch']
261 |             # 模型的最好精度
262 |             best_low_prec1 = low_prec_checkpoint['best_prec1']
263 |             best_full_prec1 = full_prec_checkpoint['best_prec1']
264 | 
265 |             low_prec_model.load_state_dict(low_prec_checkpoint['state_dict'])
266 |             full_prec_model.load_state_dict(full_prec_checkpoint['state_dict'])
267 | 
268 |             low_prec_optimizer.load_state_dict(low_prec_checkpoint['optimizer'])
269 |             full_prec_optimizer.load_state_dict(full_prec_checkpoint['optimizer'])
270 | 
271 |             print("=> loaded low_prec_checkpoint from '{}' and '{}' (epoch {})".format(
272 |                 full_prec_resume, low_prec_model, low_prec_checkpoint['epoch']))
273 |         else:
274 |             warnings.warn("=> no checkpoint found at directory'{}'".format(args.save_dir))
275 | 
276 |     cudnn.benchmark = True
277 | 
278 |     val_loader = load_val_data(args.data, args.batch_size, args.workers)
279 |     train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed)
280 | 
281 |     # 加载日志 writer
282 |     writer = SummaryWriter(args.save_dir)
283 | 
284 |     for epoch in range(args.start_epoch, args.epochs):
285 |         if args.distributed:
286 |             train_sampler.set_epoch(epoch)
287 | 
288 |         full_prec_scheduler.step()
289 |         low_prec_scheduler.step()
290 | 
291 |         # train for one epoch
292 |         guided_train(writer)
293 | 
294 |         # evaluate on validation set
295 |         low_prec1 = validate(low_prec_model, val_loader, criterion, args.gpu,
296 |                              epoch, writer, name_prefix='low_prec')
297 |         full_prec1 = validate(full_prec_model, val_loader, criterion, args.gpu,
298 |                               epoch, writer, name_prefix='full_prec')
299 | 
300 |         # remember best prec@1 and save low_prec_checkpoint
301 |         is_best_low = low_prec1 > best_low_prec1
302 |         is_best_full = full_prec1 > best_full_prec1
303 | 
304 |         best_low_prec1 = max(low_prec1, best_low_prec1)
305 |         best_full_prec1 = max(full_prec1, best_full_prec1)
306 | 
307 |         save_checkpoint({
308 |             'epoch': epoch + 1,
309 |             'arch': args.arch,
310 |             'state_dict': low_prec_model.state_dict(),
311 |             'best_prec1': best_low_prec1,
312 |             'optimizer': low_prec_optimizer.state_dict(),
313 |         }, is_best_low, args.save_dir, name_prefix="low_prec")
314 | 
315 |         save_checkpoint({
316 |             'epoch': epoch + 1,
317 |             'arch': args.arch,
318 |             'state_dict': full_prec_model.state_dict(),
319 |             'best_prec1': best_full_prec1,
320 |             'optimizer': full_prec_optimizer.state_dict(),
321 |         }, is_best_full, args.save_dir, name_prefix="full_prec")
322 | 
323 |     # 关闭日志 writer
324 |     writer.close()
325 | 
326 |     # 去掉钩子
327 |     if not args.evaluate:
328 |         hook_full_prec.remove()
329 |         hook_low_prec.remove()
330 | 


--------------------------------------------------------------------------------
/quantize/quantize_method.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | """
 3 | """
 4 | import torch
 5 | import math
 6 | import numpy as np
 7 | 
 8 | 
 9 | # 量化比特
10 | QUANTIZE_BIT = 8
11 | 
12 | 
13 | class QuantizeTanh(torch.autograd.Function):
14 | 
15 |     @staticmethod
16 |     def forward(ctx, i):
17 |         n = math.pow(2.0, QUANTIZE_BIT) - 1
18 |         return torch.round(i * n) / n
19 | 
20 |     @staticmethod
21 |     def backward(ctx, grad_outputs):
22 |         return grad_outputs
23 | 
24 | 
25 | class QuantizeGEMM(torch.autograd.Function):
26 | 
27 |     @staticmethod
28 |     def forward(ctx, i):
29 |         n = math.pow(2.0, QUANTIZE_BIT) - 1
30 |         v_max = torch.max(i)
31 |         v_min = torch.min(i)
32 |         scale = (v_max - v_min)/n
33 |         scale = max(scale, 1e-8)
34 |         zero_point = torch.round(torch.clamp(-v_min/scale, 0, n))
35 |         quantize_val = torch.clamp(torch.round(i/scale + zero_point), 0, n)
36 |         return (quantize_val-zero_point) * scale
37 | 
38 |     @staticmethod
39 |     def backward(ctx, grad_outputs):
40 |         return grad_outputs
41 | 
42 | 
43 | quantize_tanh = QuantizeTanh.apply
44 | quantize_gemm = QuantizeGEMM.apply
45 | 
46 | 
47 | def quantize_weights_bias_tanh(weight):
48 |     tanh_w = torch.tanh(weight)
49 |     """
50 |     torch 关于 y = w/max(|w|) 函数在max(|w|)处梯度行为怪异该如何解释?
51 |     tensor w ([[ 0.1229,  0.2390],
52 |              [ 0.8703,  0.6368]])
53 | 
54 |     tensor y ([[ 0.2873,  0.2873],
55 |              [-0.3296,  0.2873]])
56 |     由于没有搞清楚 torch 在 max(|w|) 处如何处理的, 
57 |     不过, 从上面看出梯度为负数, y = w/max(|w|) w>0时, 梯度为负数, 我认为是不正确的.
58 |     为了便于处理, 这里求梯度过程中, 我们把 max(|w|) 当成一个常量来处理,
59 |     代码中通过 Tensor.data 这样求 max(|w|) 的过程就不会加入到计算图中,
60 |     可以看出, max_abs_w 就是一个一个常量
61 |     """
62 |     max_abs_w = torch.max(torch.abs(tanh_w)).data
63 |     norm_weight = ((tanh_w / max_abs_w) + 1) / 2
64 | 
65 |     return 2 * quantize_tanh(norm_weight) - 1
66 | 
67 | 
68 | def quantize_activations_tanh(activation):
69 |     activation = torch.clamp(activation, 0.0, 1.0)
70 |     return 2 * quantize_tanh(activation) - 1
71 | 
72 | 
73 | def quantize_weights_bias_gemm(weight):
74 |     return quantize_gemm(weight)
75 | 
76 | 
77 | def quantize_activations_gemm(activation):
78 |     return quantize_gemm(activation)
79 | 
80 | 


--------------------------------------------------------------------------------
/quantize/quantize_module_.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import torch
  3 | import torch.nn as nn
  4 | from quantize.quantize_method import quantize_weights_bias_gemm, quantize_activations_gemm
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class QWConv2D(torch.nn.Conv2d):
  9 |     def __init__(self, n_channels, out_channels, kernel_size, stride=1,
 10 |                  padding=0, dilation=1, groups=1, bias=True):
 11 |         super(QWConv2D, self).__init__(n_channels, out_channels, kernel_size, stride,
 12 |                                        padding, dilation, groups, bias)
 13 |         # nn.init.xavier_normal_(self.weight, 1)
 14 |         # nn.init.constant_(self.weight, 1)
 15 | 
 16 |     def forward(self, input):
 17 |         """
 18 |         关键在于使用函数 F.conv2d, 而不是使用模块 nn.ConV2d
 19 |         """
 20 |         qweight = quantize_weights_bias_gemm(self.weight)
 21 |         if self.bias is not None:
 22 |             qbias = quantize_weights_bias_gemm(self.bias)
 23 |         else:
 24 |             qbias = None
 25 |         return F.conv2d(input, qweight, qbias, self.stride,
 26 |                         self.padding, self.dilation, self.groups)
 27 | 
 28 | 
 29 | class QWAConv2D(torch.nn.Conv2d):
 30 |     def __init__(self, n_channels, out_channels, kernel_size, stride=1,
 31 |                  padding=0, dilation=1, groups=1, bias=True):
 32 |         super(QWAConv2D, self).__init__(n_channels, out_channels, kernel_size, stride,
 33 |                                        padding, dilation, groups, bias)
 34 |         # nn.init.xavier_normal_(self.weight, 1)
 35 |         # nn.init.constant_(self.weight, 1)
 36 | 
 37 |     def forward(self, input):
 38 |         qweight = quantize_weights_bias_gemm(self.weight)
 39 |         if self.bias is not None:
 40 |             qbias = quantize_weights_bias_gemm(self.bias)
 41 |         else:
 42 |             qbias = None
 43 |         qinput = quantize_activations_gemm(input)
 44 |         return F.conv2d(qinput, qweight, qbias, self.stride,
 45 |                         self.padding, self.dilation, self.groups)
 46 | 
 47 | 
 48 | class QWLinear(nn.Linear):
 49 | 
 50 |     def __init__(self, in_features, out_features, bias=True, num_bits=8, num_bits_weight=None,
 51 |                  num_bits_grad=None, biprecision=False):
 52 |         super(QWLinear, self).__init__(in_features, out_features, bias)
 53 | 
 54 |     def forward(self, input):
 55 |         qweight = quantize_weights_bias_gemm(self.weight)
 56 | 
 57 |         if self.bias is not None:
 58 |             qbias = quantize_weights_bias_gemm(self.bias)
 59 |         else:
 60 |             qbias = None
 61 | 
 62 |         return F.linear(input, qweight, qbias)
 63 | 
 64 | 
 65 | class QWALinear(nn.Linear):
 66 | 
 67 |     def __init__(self, in_features, out_features, bias=True):
 68 |         super(QWALinear, self).__init__(in_features, out_features, bias)
 69 | 
 70 |     def forward(self, input):
 71 |         qinput = quantize_activations_gemm(input)
 72 |         qweight = quantize_weights_bias_gemm(self.weight)
 73 | 
 74 |         if self.bias is not None:
 75 |             qbias = quantize_weights_bias_gemm(self.bias)
 76 |         else:
 77 |             qbias = None
 78 | 
 79 |         return F.linear(qinput, qweight, qbias)
 80 | 
 81 | 
 82 | """
 83 | 论文中 scalar layer 层设计 (多个 GPU )
 84 | """
 85 | 
 86 | 
 87 | class Scalar(nn.Module):
 88 | 
 89 |     def __init__(self):
 90 |         super(Scalar, self).__init__()  # 这一行很重要
 91 |         # 第1种错误
 92 |         # self.scalar = torch.tensor([0.01], requires_grad=True)
 93 |         # RuntimeError: Expected object of type torch.FloatTensor
 94 |         # but found type torch.cuda.FloatTensor for argument
 95 | 
 96 |         # 第2种错误
 97 |         # self.scalar = torch.tensor([0.01], requires_grad=True).cuda()
 98 |         # RuntimeError: arguments are located on different GPUs
 99 | 
100 |         # 第3种错误
101 |         # self.scalar = nn.Parameter(torch.tensor(0.01, requires_grad=True))
102 |         # RuntimeError: slice() cannot be applied to a 0-dim tensor,
103 |         #  而加了方括号正确为 1-dim tensor
104 | 
105 |         # 第4中错误
106 |         #  scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True))
107 |         #  self.register_buffer("scalar", scalar)
108 |         #  scalar没有梯度更新(全是None), register_buffer 用于存储非训练参数, 如bn的平均值存储
109 | 
110 |         # 第1种方法, 可以使用
111 |         # self.scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True))
112 | 
113 |         # 第2种方法, 可以使用
114 |         # scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True))
115 |         # self.register_parameter("scalar", scalar)
116 | 
117 |         # 根据训练经验, 设为 2.5
118 |         self.scalar = nn.Parameter(torch.tensor([1.0], requires_grad=True, dtype=torch.float))
119 | 
120 |     def forward(self, i):
121 |         return self.scalar * i
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     qconv = QWConv2D(1, 1, 3)
126 |     qconv.zero_grad()
127 |     x = torch.ones(1, 1, 3, 3, requires_grad=True).float()
128 |     y = qconv(x)
129 |     y.backward()
130 |     print("QConv2D 权重梯度", qconv.weight.grad)
131 | 
132 |     # 直接求梯度
133 |     a = torch.ones(3, 3, requires_grad=True).float()
134 |     w = nn.init.constant_(torch.empty(3, 3, requires_grad=True), 1)
135 |     qw = quantize_weights_bias_gemm(w)
136 | 
137 |     z = (qw * a).sum()
138 |     z.backward()
139 |     print("求权重梯度", w.grad)
140 | 
141 |     # 验证量化梯度
142 |     qa = quantize_weights_bias_gemm(a).sum()
143 |     qa.backward()
144 |     print("直接求量化权重梯度", a.grad)


--------------------------------------------------------------------------------
/quantize/quantize_old_plan.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | import torch.nn as nn
 4 | import queue
 5 | from quantize.quantize_method import QuantizeTanh
 6 | 
 7 | 
 8 | class QuantizeWeightOrActivation(object):
 9 |     def __init__(self):
10 |         self.saved_param = queue.Queue()
11 |         self.saved_grad = queue.Queue()
12 |         self.quantize_fn = QuantizeTanh.apply  # 量化函数
13 | 
14 |     def quantize_weights_bias(self, weight):
15 |         tanh_w = torch.tanh(weight)
16 |         """
17 |         torch 关于 y = w/max(|w|) 函数在max(|w|)处梯度行为怪异该如何解释?
18 |         tensor w ([[ 0.1229,  0.2390],
19 |                  [ 0.8703,  0.6368]])
20 | 
21 |         tensor y ([[ 0.2873,  0.2873],
22 |                  [-0.3296,  0.2873]])
23 |         由于没有搞清楚 torch 在 max(|w|) 处如何处理的, 
24 |         不过, 从上面看出梯度为负数, y = w/max(|w|) w>0时, 梯度为负数, 我认为是不正确的.
25 |         为了便于处理, 这里求梯度过程中, 我们把 max(|w|) 当成一个常量来处理,
26 |         代码中通过 Tensor.data 这样求 max(|w|) 的过程就不会加入到计算图中,
27 |         可以看出, max_abs_w 就是一个一个常量
28 |         """
29 |         max_abs_w = torch.max(torch.abs(tanh_w)).data
30 |         norm_weight = ((tanh_w / max_abs_w) + 1) / 2
31 | 
32 |         return 2 * self.quantize_fn(norm_weight) - 1
33 | 
34 |     def quantize_activations(self, activation):
35 |         activation = torch.clamp(activation, 0.0, 1.0)
36 |         return self.quantize_fn(activation)
37 | 
38 |     def quantize(self, m):
39 |         # isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear)
40 |         if type(m) == nn.Conv2d or type(m) == nn.Linear:
41 |             self.saved_param.put(m.weight.data.clone())  # 第1步, 保存全精度权重
42 |             quantize_w = self.quantize_weights_bias(m.weight)  # 第2步, 量化权重
43 |             quantize_w.sum().backward()
44 |             self.saved_grad.put(m.weight.grad.data.clone())  # 第3步, 保存量化梯度
45 |             m.weight.data.copy_(quantize_w.data)  # 第4步, 使用量化权重代替全精度权重
46 |             # m.zero_grad() # 不需要, 因为后面调用 optimizer.zero_grad() 会把所有 m 的梯度清零
47 | 
48 |         if type(m) == nn.Linear:  # 量化 bias
49 |             self.saved_param.put(m.bias.data.clone())
50 |             quantize_b = self.quantize_weights_bias(m.bias)
51 |             quantize_b.sum().backward()
52 |             self.saved_grad.put(m.bias.grad.data.clone())
53 |             m.bias.data.copy_(quantize_b.data)
54 | 
55 |     def restore(self, m):
56 |         if type(m) == nn.Conv2d or type(m) == nn.Linear:
57 |             m.weight.data.copy_(self.saved_param.get())  # 第5步, 使用全精度权重代替量化权重
58 | 
59 |         if type(m) == nn.Linear:
60 |             m.bias.data.copy_(self.saved_param.get())
61 | 
62 |     def update_grad(self, m):
63 |         if type(m) == nn.Conv2d or type(m) == nn.Linear:
64 |             m.weight.grad.data.mul_(self.saved_grad.get())  # 第6步, 使用量化权重更新全精度权重
65 | 
66 |         if type(m) == nn.Linear:
67 |             m.bias.grad.data.mul_(self.saved_grad.get())
68 | 
69 |     @staticmethod
70 |     def info(net, s):
71 |         print("\n-----------{}--------\n".format(s))
72 |         for k, v in net.state_dict().items():
73 |             print(k, "\n", v)
74 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | tensorboardx
4 | tensorflow
5 | opencv-python
6 | numpy


--------------------------------------------------------------------------------
/utils/data_loader.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | from torchvision import transforms
 4 | import os
 5 | from torchvision import datasets
 6 | 
 7 | 
 8 | def load_train_data(data_dir, batch_size, workers, distributed):
 9 | 
10 |     traindir = os.path.join(data_dir, 'train')
11 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
12 |                                      std=[0.229, 0.224, 0.225])
13 | 
14 |     # 训练模式， 加载训练数据
15 |     train_dataset = datasets.ImageFolder(
16 |         traindir,
17 |         transforms.Compose([
18 |             transforms.RandomResizedCrop(224),
19 |             transforms.RandomHorizontalFlip(),
20 |             transforms.ToTensor(),
21 |             normalize,
22 |         ]))
23 | 
24 |     if distributed:
25 |         train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
26 |     else:
27 |         train_sampler = None
28 | 
29 |     train_loader = torch.utils.data.DataLoader(
30 |         train_dataset,
31 |         batch_size=batch_size, shuffle=(train_sampler is None),
32 |         num_workers=workers, pin_memory=True,
33 |         sampler=train_sampler)
34 | 
35 |     return train_loader, train_sampler
36 | 
37 | 
38 | def load_val_data(data_dir, batch_size=64, workers=8):
39 |     valdir = os.path.join(data_dir, 'val')
40 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
41 |                                      std=[0.229, 0.224, 0.225])
42 | 
43 |     # 加载验证数据
44 |     val_loader = torch.utils.data.DataLoader(
45 |         datasets.ImageFolder(valdir, transforms.Compose([
46 |             transforms.Resize(256),
47 |             transforms.CenterCrop(224),
48 |             transforms.ToTensor(),
49 |             normalize,
50 |         ])),
51 |         batch_size=batch_size, shuffle=False,
52 |         num_workers=workers, pin_memory=True)
53 |     return val_loader
54 | 


--------------------------------------------------------------------------------
/utils/meter.py:
--------------------------------------------------------------------------------
 1 | # coding=utf-8
 2 | import torch
 3 | import logging
 4 | 
 5 | 
 6 | class AverageMeter(object):
 7 |     """Computes and stores the average and current value"""
 8 | 
 9 |     def __init__(self):
10 |         self.val = 0
11 |         self.avg = 0
12 |         self.sum = 0
13 |         self.count = 0
14 | 
15 |     def reset(self):
16 |         self.val = 0
17 |         self.avg = 0
18 |         self.sum = 0
19 |         self.count = 0
20 | 
21 |     def update(self, val, n=1):
22 |         self.val = val
23 |         self.sum += val * n
24 |         self.count += n
25 |         self.avg = self.sum / self.count
26 | 
27 | 
28 | def accuracy(output, target, topk=(1,)):
29 |     """Computes the precision@k for the specified values of k"""
30 |     with torch.no_grad():
31 |         maxk = max(topk)
32 |         batch_size = target.size(0)
33 | 
34 |         # pred: torch.Size([128, 5])
35 |         _, pred = output.topk(maxk, 1, True, True)
36 |         pred = pred.t()  # 转置
37 |         # pred: torch.Size([5, 128])
38 | 
39 |         # batch_size 128 target: torch.Size([128]),
40 |         # 也就是说 target 不是 one-hot 编码, 而是 class id
41 |         target = target.view(1, -1).expand_as(pred)
42 |         # [128] =>view=> [1, 128] =>expand_as[5, 128]=>[5, 128]
43 |         correct = pred.eq(target)  # eq: Computes element-wise equality
44 | 
45 |         res = []
46 |         for k in topk:
47 |             correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
48 |             res.append(correct_k.mul_(100.0 / batch_size))
49 |         return res
50 | 
51 | 
52 | def get_logger(logger_name="nowgood", filename=None, level=logging.INFO):
53 |     logger = logging.getLogger(logger_name)
54 |     logger.setLevel(level)
55 | 
56 |     # 定义handler的输出格式
57 |     formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s')
58 | 
59 |     if filename is not None:
60 |         # 创建一个handler，用于写入日志文件
61 |         fh = logging.FileHandler(filename)
62 |         fh.setLevel(level)
63 |         fh.setFormatter(formatter)
64 |         logger.addHandler(fh)
65 | 
66 |     # 创建一个handler，用于输出到控制台
67 |     ch = logging.StreamHandler()
68 |     ch.setLevel(level)
69 |     ch.setFormatter(formatter)
70 |     # 给logger添加handler
71 |     logger.addHandler(ch)
72 | 
73 |     # disable logger
74 |     # logger.setLevel(logger.disabled)
75 | 
76 |     return logger
77 | 


--------------------------------------------------------------------------------
/utils/train_val.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | import time
  3 | import torch
  4 | import os
  5 | import shutil
  6 | import torch.nn.parallel
  7 | import torch.optim
  8 | import torch.utils.data
  9 | import torch.utils.data.distributed
 10 | from utils.meter import AverageMeter, accuracy
 11 | import torch
 12 | import time
 13 | 
 14 | best_prec1 = 0
 15 | 
 16 | 
 17 | def train(model, train_loader, criterion, optimizer, gpu, epoch=0,
 18 |           summary_writer=None, log_per_epoch=100, print_freq=30):
 19 | 
 20 |     batch_time = AverageMeter()
 21 |     data_time = AverageMeter()
 22 |     losses = AverageMeter()
 23 |     top1 = AverageMeter()
 24 |     top5 = AverageMeter()
 25 | 
 26 |     # switch to train mode
 27 |     model.train()
 28 | 
 29 |     # if not full_precision:
 30 |     #     qw = QuantizeWeightOrActivation()   # 第一步, 创建量化器
 31 |     end = time.time()
 32 | 
 33 |     # 用于控制 tensorboard 的显示频率
 34 |     interval = len(train_loader) // log_per_epoch
 35 |     summary_point = [interval * split for split in torch.arange(log_per_epoch)]
 36 | 
 37 |     for i, (data, target) in enumerate(train_loader):
 38 |         data_time.update(time.time() - end)  # measure checkpoint.pth data loading time
 39 | 
 40 |         if gpu is not None:
 41 |             data = data.cuda(gpu, non_blocking=True)
 42 |         target = target.cuda(gpu, non_blocking=True)
 43 | 
 44 |         # if not full_precision:
 45 |         #     model.apply(qw.quantize_tanh)  # 第二步, 量化权重, 保存全精度权重和量化梯度
 46 | 
 47 |         output = model(data)
 48 |         loss = criterion(output, target)
 49 | 
 50 |         # measure accuracy and record loss
 51 |         prec1, prec5 = accuracy(output, target, topk=(1, 5))
 52 |         losses.update(loss.item(), data.size(0))
 53 |         top1.update(prec1[0], data.size(0))
 54 |         top5.update(prec5[0], data.size(0))
 55 | 
 56 |         # compute gradient and do SGD step
 57 |         optimizer.zero_grad()
 58 |         loss.backward()
 59 | 
 60 |         # if not full_precision:
 61 |         #     model.apply(qw.restore)  # 第三步, 反向传播后, 模型梯度计算后, 恢复全精度权重
 62 |         #     model.apply(qw.update_grad)  # 第四步, 使用之前存储的量化梯度乘上反向传播的梯度
 63 | 
 64 |         optimizer.step()
 65 | 
 66 |         # measure elapsed time
 67 |         batch_time.update(time.time() - end)
 68 |         end = time.time()
 69 | 
 70 |         # 控制台
 71 |         if i % print_freq == 0:
 72 |             print('Epoch: [{0}][{1}/{2}]\t'
 73 |                   'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
 74 |                   'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
 75 |                   'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
 76 |                   'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
 77 |                   'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
 78 |                    epoch, i, len(train_loader), batch_time=batch_time,
 79 |                    data_time=data_time, loss=losses, top1=top1, top5=top5))
 80 | 
 81 |         if summary_writer and (i in summary_point):
 82 |             step = i//interval + epoch * log_per_epoch
 83 |             summary_writer.add_scalar("loss/train_loss", loss, step)
 84 |             summary_writer.add_scalar("train/top-1", top1.avg, step)
 85 |             summary_writer.add_scalar("train/top-5", top5.avg, step)
 86 | 
 87 | 
 88 | def validate(model, val_loader, criterion, gpu=0, epoch=0, summary_writer=None, name_prefix=None, print_freq=20):
 89 | 
 90 |     batch_time = AverageMeter()
 91 |     losses = AverageMeter()
 92 |     top1 = AverageMeter()
 93 |     top5 = AverageMeter()
 94 | 
 95 |     loss_name = "val/loss"
 96 |     prec1_name = "val/top-1"
 97 |     prec5_name = "val/top-5"
 98 | 
 99 |     if name_prefix is not None:
100 |         name_prefix = ''.join((name_prefix, '-'))
101 |         loss_name = ''.join((name_prefix, loss_name))
102 |         prec1_name = ''.join((name_prefix, prec1_name))
103 |         prec5_name = ''.join((name_prefix, prec5_name))
104 | 
105 |     # 进入 eval 状态
106 |     model.eval()
107 | 
108 |     # if not full_precision:
109 |     #     qw = QuantizeWeightOrActivation()  # 1, 创建量化器
110 |     #     model.apply(qw.quantize_tanh)  # 2, 量化权重, 保存全精度权重和量化梯度
111 | 
112 |     with torch.no_grad():
113 |         start = time.time()
114 |         for i, (data, target) in enumerate(val_loader):
115 |             if gpu is not None:
116 |                 data = data.cuda(gpu, non_blocking=True)
117 | 
118 |             # batch_size 128时, target size 为 torch.Size([128])
119 |             target = target.cuda(gpu, non_blocking=True)
120 |             output = model(data)
121 |             loss = criterion(output, target)
122 | 
123 |             # measure accuracy and record loss
124 |             prec1, prec5 = accuracy(output, target, topk=(1, 5))
125 |             losses.update(loss.item(), data.size(0))
126 |             top1.update(prec1[0], data.size(0))
127 |             top5.update(prec5[0], data.size(0))
128 | 
129 |             # measure elapsed time
130 |             batch_time.update(time.time() - start)
131 |             start = time.time()
132 | 
133 |             if i % print_freq == 0:
134 |                 print('Test: [{0}/{1}]\t'
135 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
136 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
137 |                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
138 |                       'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
139 |                        i, len(val_loader), batch_time=batch_time,
140 |                        loss=losses, top1=top1, top5=top5))
141 | 
142 |         if summary_writer is not None:
143 |             summary_writer.add_scalar(loss_name, losses.avg, epoch)
144 |             summary_writer.add_scalar(prec1_name, top1.avg, epoch)
145 |             summary_writer.add_scalar(prec5_name, top5.avg, epoch)
146 | 
147 |         print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))
148 | 
149 |     # if not full_precision:
150 |     #     model.apply(qw.restore)  # 第3步, 恢复全精度权重
151 | 
152 |     return top1.avg
153 | 
154 | 
155 | def save_checkpoint(state, is_best, save_dir, name_prefix=None,
156 |                     checkpoint_name='checkpoint.pth.tar',
157 |                     mode_best_name='model_best.pth.tar', ):
158 |     if save_dir:
159 |         if not os.path.exists(save_dir):
160 |             os.makedirs(save_dir)
161 |         print("=> checkpoint directory: {}".format(save_dir))
162 | 
163 |     if name_prefix is not None:
164 |         name_prefix = ''.join((name_prefix, '-'))
165 |     else:
166 |         name_prefix = ''
167 | 
168 |     checkpoint = os.path.join(save_dir, name_prefix + checkpoint_name)
169 |     model_best = os.path.join(save_dir, name_prefix + mode_best_name)
170 | 
171 |     torch.save(state, checkpoint)
172 |     if is_best:
173 |         shutil.copyfile(checkpoint, model_best)


--------------------------------------------------------------------------------
/utils/unzip.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | dir=./
 4 | 
 5 | for x in `ls *.tar`
 6 | 
 7 | do
 8 | 
 9 | filename=`basename $x .tar`
10 | 
11 | mkdir $filename
12 | 
13 | tar -xvf $x -C ./$filename
14 | 
15 | done


--------------------------------------------------------------------------------