├── .idea
├── misc.xml
├── modules.xml
├── quantizednn.iml
└── vcs.xml
├── README.md
├── config
├── data
├── WandA_lr0.01_scalar2.5.png
└── smurf.jpg
├── graffiti
├── QConv2D.py
├── README.md
├── auto_grad.py
├── caffe_guidenet_weight_init.py
├── diffierent_gpu_grad_backward.py
├── float32touint8.py
├── get_module_weight.py
├── inference_on_image.py
├── merge_conv_bn.py
├── nowgood.py
├── quantize_test.py
├── register_forward_hook.py
├── stat_parameters.py
└── weight_distribute.py
├── main.py
├── net
├── net_bn_conv_merge.py
├── net_bn_conv_merge_quantize.py
├── net_quantize_activation.py
├── net_quantize_guide.py
├── net_quantize_weight.py
└── simple_net.py
├── quantize
├── guided_distance_view.py
├── quantize_guided.py
├── quantize_method.py
├── quantize_module_.py
└── quantize_old_plan.py
├── requirements.txt
└── utils
├── data_loader.py
├── meter.py
├── train_val.py
├── unzip.sh
└── valprep.sh
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/quantizednn.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Quantize CNN Model using PyTorch(python3.5)
2 |
3 | Implement [Towards Effective Low-bitwidth Convolutional Neural Networks](https://arxiv.org/abs/1711.00205)
4 |
5 | ```
6 | @InProceedings{Zhuang_2018_CVPR,
7 | author = {Zhuang, Bohan and Shen, Chunhua and Tan, Mingkui and Liu, Lingqiao and Reid, Ian},
8 | title = {Towards Effective Low-Bitwidth Convolutional Neural Networks},
9 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
10 | month = {June},
11 | year = {2018}
12 | }
13 | ```
14 |
15 | ### 下载和配置
16 |
17 | ```bash
18 | git clone https://github.com/nowgood/QuantizeCNNModel.git && cd QuantizeCNNModel
19 | pip install -r requirements.txt
20 | echo export PYTHONPATH=$PYTHONPATH:`pwd` >> ~/.bashrc
21 | source ~/.bashrc
22 | ```
23 |
24 | ### 使用方法
25 |
26 | 使用如下命令查看函数使用方法
27 |
28 | ```
29 | python main.py -h
30 | ```
31 |
32 |
33 |
34 | 然后使用 tensorboard 查看训练过程
35 |
36 | ```
37 | # QuantizeCNNModel 目录下
38 | tensorboard --logdir model/xxx/
39 | ```
40 | 然后就可以在 `http:localhost:6006` 查看训练的损失值和精确度, 以及每个epoch的在验证集上的精确度
41 |
42 | 
43 |
44 | ### 训练方法
45 |
46 | 训练模式选择:
47 |
48 | 0: full precision training from scratch
49 | 1: only quantize weight
50 | 2. quantize activation using quantized weight to init model
51 | 3. joint quantize weight and activation from pre-trained imageNet model
52 | 4. guided quantize weight and activation from pre-trained imageNet model
53 |
54 |
55 | ### 量化权重
56 |
57 | 单机多卡训练, 如: 使用 8 个GPU的后 4 个GPU来训练25个epoch
58 |
59 | ```
60 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \
61 | --mode 1 \
62 | --workers 16 \
63 | --epochs 5 \
64 | --batch-size 1024\
65 | --device-ids 0 1 2 3 \
66 | --lr 0.0001 \
67 | --lr-step 2 \
68 | --save-dir model/W_lr1e-4_epoch5 \
69 | --data /home/user/wangbin/datasets/ILSVRC2012 \
70 | |tee model/W_lr_1e-4_epoch5.log 2>&1
71 | ```
72 |
73 | ```
74 |
75 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \
76 | --mode 1 \
77 | --workers 16 \
78 | --epochs 10 \
79 | --batch-size 1024\
80 | --device-ids 0 1 2 3 \
81 | --lr 0.0001 \
82 | --lr-step 4 \
83 | --save-dir model/W_lr1e-4_epoch10 \
84 | --data /home/user/wangbin/datasets/ILSVRC2012 \
85 | |tee model/W_lr_1e-4_epoch10.log
86 | ```
87 |
88 | ### 使用量化权重的参数来初始化量化激活的网络
89 |
90 | ```bash
91 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \
92 | --mode 2 \
93 | --workers 16 \
94 | --epochs 35 \
95 | --batch-size 1024\
96 | --device-ids 0 1 2 3 \
97 | --lr 0.001 \
98 | --weight-quantized model/W_lr1e-4_epoch2/model_best.pth.tar \
99 | --save-dir model/AafterW_lr1e-2_epoch35 \
100 | --data /home/user/wangbin/datasets/ILSVRC2012 \
101 | |tee model/AafterW_lr1e-2_epoch35.log
102 | ```
103 |
104 | **resume**
105 |
106 | ```bash
107 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \
108 | --mode 2 \
109 | --workers 16 \
110 | --epochs 35 \
111 | --batch-size 1024\
112 | --device-ids 0 1 2 3 \
113 | --lr 0.001 \
114 | --resume \
115 | --weight-quantized model/W_lr1e-4_epoch2/model_best.pth.tar \
116 | --save-dir model/AafterW_lr1e-3_epoch35 \
117 | --data /home/user/wangbin/datasets/ILSVRC2012 \
118 | | tee model/AafterW_lr1e-3_epoch35.log
119 | ```
120 |
121 | ### 同时量化权重和激活
122 |
123 | ```
124 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \
125 | --mode 3 \
126 | --arch resnet18 \
127 | --workers 16 \
128 | --epochs 35 \
129 | --batch-size 512 \
130 | --device-ids 0 1 2 3 \
131 | --lr 0.001 \
132 | --lr-step 10 \
133 | --data /home/user/wangbin/datasets/ILSVRC2012 \
134 | --save-dir model/AandW_lr1e-3_epoch35 \
135 | | tee AandW_1e-3_epoch35.log
136 | ```
137 |
138 | ```
139 | CUDA_VISIBLE_DEVICES=3,4,5,6 python main.py \
140 | --mode 3 \
141 | --arch resnet18 \
142 | --workers 16 \
143 | --epochs 50 \
144 | --batch-size 512 \
145 | --device-ids 0 1 2 3 \
146 | --lr 0.1 \
147 | --lr-step 15 \
148 | --data /home/user/wangbin/datasets/ILSVRC2012 \
149 | --save-dir model/AandW_gemm_lr1e-1_epoch50 \
150 | | tee AandW_gemm_1e-1_epoch50.log
151 | ```
152 |
153 | ### 使用 guidance 信号来同时量化权重和激活
154 |
155 | ```bash
156 | CUDA_VISIBLE_DEVICES=3,4,5,6 python main.py \
157 | --mode 4 \
158 | --workers 16 \
159 | --epochs 35 \
160 | --batch-size 512 \
161 | --device-ids 0 1 2 3\
162 | --balance 0.1 \
163 | --lr 0.001 \
164 | --rate 1 \
165 | --norm 1 \
166 | --data /home/user/wangbin/datasets/ILSVRC2012 \
167 | --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35 \
168 | | tee model/guided_balance0.11_lr1e-3_rate1_epoch35.log
169 | ```
170 |
171 | ```bash
172 | CUDA_VISIBLE_DEVICES=1,2,3 python main.py \
173 | --mode 4 \
174 | --workers 16 \
175 | --epochs 35 \
176 | --batch-size 384 \
177 | --device-ids 0 1 2 \
178 | --balance 0.1 \
179 | --lr 0.001 \
180 | --rate 1 \
181 | --norm 1 \
182 | --data /home/user/wangbin/datasets/ILSVRC2012 \
183 | --resume \
184 | --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35 \
185 | | tee model/guided_balance0.1_lr1e-3_rate1_epoch35_resume.log
186 | ```
187 |
188 | #### view distance
189 |
190 | ```bash
191 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \
192 | --mode 4 \
193 | --workers 16 \
194 | --epochs 35 \
195 | --batch-size 512 \
196 | --device-ids 0 1 2 3 \
197 | --balance 0.1 \
198 | --lr 0.001 \
199 | --rate 1 \
200 | --norm 1 \
201 | --data /home/user/wangbin/datasets/ILSVRC2012 \
202 | --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35_view
203 | ```
--------------------------------------------------------------------------------
/config:
--------------------------------------------------------------------------------
1 | ## mode 2
2 |
3 | --mode 2
4 | --epochs 30
5 | --batch-size 64
6 | --device-ids 0
7 | --lr 0.001
8 | --weight-quantized /home/wangbin/Desktop/uisee/model_quantize/W_lr1e-4_epoch10/checkpoint.pth.tar
9 | --save-dir model
10 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012
11 |
12 | ## mode 3
13 |
14 | --mode 2
15 | --epochs 30
16 | --batch-size 64
17 | --device-ids 0
18 | --lr 0.001
19 | --save-dir model
20 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012
21 |
22 | ## mode 4
23 |
24 | --mode 4
25 | --epochs 30
26 | --batch-size 64
27 | --device-ids 0
28 | --balance 100000
29 | --lr 0.001
30 | --save-dir model
31 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012
--------------------------------------------------------------------------------
/data/WandA_lr0.01_scalar2.5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nowgood/QuantizeCNNModel/531fd8d12fdb42f48a91e9378008a1bb86905d6d/data/WandA_lr0.01_scalar2.5.png
--------------------------------------------------------------------------------
/data/smurf.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nowgood/QuantizeCNNModel/531fd8d12fdb42f48a91e9378008a1bb86905d6d/data/smurf.jpg
--------------------------------------------------------------------------------
/graffiti/QConv2D.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torch.nn as nn
4 | from quantize.quantize_method import QuantizeWeightOrActivation
5 | import torch.nn.functional as F
6 | quantize = QuantizeWeightOrActivation()
7 |
8 |
9 | class QConv2D(torch.nn.Conv2d):
10 | def __init__(self, n_channels, out_channels, kernel_size, stride=1,
11 | padding=0, dilation=1, groups=1, bias=True):
12 | super(QConv2D, self).__init__(n_channels, out_channels, kernel_size, stride,
13 | padding, dilation, groups, bias)
14 | nn.init.constant_(self.weight, 1)
15 |
16 | def forward(self, x):
17 | qweight = quantize.quantize_weights_bias(self.weight)
18 | x = F.conv2d(x, qweight)
19 | return x
20 |
21 |
22 | if __name__ == "__main__":
23 | qconv = QConv2D(1, 1, 3)
24 | qconv.zero_grad()
25 | x = torch.ones(1, 1, 3, 3, requires_grad=True).float()
26 | y = qconv(x)
27 | y.backward()
28 | print(qconv.weight.grad)
29 |
30 | a = torch.ones(3, 3, requires_grad=True).float()
31 | w = torch.nn.init.constant_(torch.empty(3, 3, requires_grad=True), 1)
32 | qw = quantize.quantize_weights_bias(w)
33 |
34 | z = (qw * a).sum()
35 | z.backward()
36 | print(w.grad)
37 |
38 | qa = quantize.quantize_weights_bias(a).sum()
39 | qa.backward()
40 | print(a.grad)
--------------------------------------------------------------------------------
/graffiti/README.md:
--------------------------------------------------------------------------------
1 |
2 | ### Usage: [argparse](http://wiki.jikexueyuan.com/project/explore-python/Standard-Modules/argparse.html)
3 |
4 | ```
5 | 每个参数解释如下:
6 |
7 | name or flags - 选项字符串的名字或者列表,例如 foo 或者 -f, --foo。
8 | action - 命令行遇到参数时的动作,默认值是 store。
9 | store_const,表示赋值为const;
10 | append,将遇到的值存储成列表,也就是如果参数重复则会保存多个值;
11 | append_const,将参数规范中定义的一个值保存到一个列表;
12 | count,存储遇到的次数;此外,也可以继承 argparse.Action 自定义参数解析;
13 | nargs - 应该读取的命令行参数个数,可以是具体的数字,或者是?号,当不指定值时对于 Positional argument 使用 default,
14 | 对于 Optional argument 使用 const;
15 | 或者是 * 号,表示 0 或多个参数;
16 | 或者是 + 号表示 1 或多个参数。
17 | const - action 和 nargs 所需要的常量值。
18 | default - 不指定参数时的默认值。
19 | type - 命令行参数应该被转换成的类型。
20 | choices - 参数可允许的值的一个容器。
21 | required - 可选参数是否可以省略 (仅针对可选参数)。
22 | help - 参数的帮助信息,当指定为 argparse.SUPPRESS 时表示不显示该参数的帮助信息.
23 | metavar - 在 usage 说明中的参数名称,对于必选参数默认就是参数名称,对于可选参数默认是全大写的参数名称.
24 | dest - 解析后的参数名称,默认情况下,对于可选参数选取最长的名称,中划线转换为下划线.
25 | ```
26 |
27 | ### Usage imagenet.py
28 |
29 | ```
30 | usage: guided.py [-h] [--arch ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N]
31 | [--lr LR] [--momentum M] [--weight-decay W] [--print-freq N]
32 | [--resume PATH] [-e] [--pretrained]
33 | DIR
34 |
35 | PyTorch ImageNet Training
36 |
37 | positional arguments:
38 | DIR path to dataset
39 |
40 | optional arguments:
41 | -h, --help show this help message and exit
42 | --arch ARCH, -a ARCH model architecture: alexnet | resnet | resnet101 |
43 | resnet152 | resnet18 | resnet34 | resnet50 | vgg |
44 | vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn
45 | | vgg19 | vgg19_bn (default: resnet18)
46 | -j N, --workers N number of data loading workers (default: 4)
47 | --epochs N number of total epochs to run
48 | --start-epoch N manual epoch number (useful on restarts)
49 | -b N, --batch-size N mini-batch size (default: 256)
50 | --lr LR, --README.md-rate LR
51 | initial README.md rate
52 | --momentum M momentum
53 | --weight-decay W, --wd W
54 | weight decay (default: 1e-4)
55 | --print-freq N, -p N print frequency (default: 10)
56 | --resume PATH path to latest checkpoint (default: none)
57 | -e, --evaluate evaluate model on validation set
58 | --pretrained use pre-trained model
59 |
60 | ```
61 |
62 | ### use pretrained model to initialize your modified model
63 |
64 | ```
65 | model_dict = your_model.state_dict()
66 |
67 | pretrained_model = models.__dict__[args.arch](pretrained=True)
68 | pretrained_dict = pretrained_model.state_dict()
69 |
70 | # 将 pretrained_dict 里不属于 model_dict 的键剔除掉
71 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
72 |
73 | model_dict.update(pretrained_dict)
74 | your_model.load_state_dict(model_dict)
75 | ```
76 |
77 | ### how to get nn.DataParallel model filter weight
78 |
79 | ```python
80 | low_prec_state_dict = low_prec_model.state_dict()
81 | full_prec_state_dict = full_prec_model.state_dict()
82 | low_prec_norm = low_prec_state_dict[qconv1].norm(p=2) + low_prec_state_dict[qlayer4].norm(p=2)
83 | full_prec_norm = full_prec_state_dict[qconv1].norm(p=2) + full_prec_state_dict[qlayer4].norm(p=2)
84 |
85 | l2 = (low_prec_norm + full_prec_norm) * args.balance
86 | ```
87 |
88 | ### torch.topk
89 |
90 | ```
91 | >>> x = torch.arange(1, 6)
92 | >>> x
93 | tensor([ 1., 2., 3., 4., 5.])
94 | >>> torch.topk(x, 3)
95 | (tensor([ 5., 4., 3.]), tensor([ 4, 3, 2]))
96 | ```
--------------------------------------------------------------------------------
/graffiti/auto_grad.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | import torch
4 | import torch.nn as nn
5 | from net import simple_net
6 | import torch.optim as optim
7 | from quantize.quantize_method import QuantizeWeightOrActivation
8 | import queue
9 |
10 | qw = QuantizeWeightOrActivation()
11 |
12 |
13 | class MyFunction(torch.autograd.Function):
14 |
15 | @staticmethod
16 | def forward(ctx, i):
17 |
18 | tanh_i = torch.tanh(i)
19 | max_w = torch.max(torch.abs(tanh_i)).data
20 | out = tanh_i / max_w
21 | ctx.save_for_backward(tanh_i, max_w)
22 | return out
23 |
24 | @staticmethod
25 | def backward(ctx, grad_outputs):
26 | by, bm, = ctx.saved_tensors
27 | return grad_outputs*((1-torch.pow(by, 2.0))/bm)
28 |
29 |
30 | def modify_weights(weight):
31 | fn = MyFunction.apply
32 | return fn(weight)
33 |
34 |
35 | def weights_update():
36 | feature_map = torch.ones(1, 1, 3, 3, requires_grad=True)
37 | kernel = nn.Conv2d(1, 1, kernel_size=3, bias=False)
38 |
39 | # start
40 | print("\n自动求导求量化梯度")
41 | # w = Variable(kernel.weight.data.clone(), requires_grad=True)
42 | w = kernel.weight
43 | y = torch.tanh(w)/torch.max(torch.abs(torch.tanh(w)))
44 | z = y.sum()
45 | z.backward()
46 | print(w.grad)
47 | kernel.zero_grad()
48 | # end
49 |
50 | print("权重初始化\n", kernel.weight.data, "\n")
51 |
52 | tanh_w = torch.tanh(kernel.weight)
53 | max_w = torch.max(torch.abs(tanh_w))
54 | hand_grad = (1 - torch.pow(kernel.weight, 2.0)) / max_w
55 | print("手动求梯度\n", hand_grad, "\n") # 卷积核的面积=3x3=9, y=(x*x).mean(), y'=2x/9
56 |
57 | # fn_w = modify_weights(kernel.weight)
58 | fn_w = qw.quantize_weights_bias(kernel.weight)
59 | fn_w.sum().backward()
60 |
61 | square_weight_grad = kernel.weight.grad.data.clone()
62 | print("自动求梯度\n", square_weight_grad, "\n") # 只需要在原本的梯度上乘以卷积核的面积就好
63 |
64 | print("量化前权重\n", kernel.weight.data, "\n")
65 |
66 | # 这种方式没法更新模型的权重, 看 state_dict 函数可以看出, 返回的是一个新建的有序字典,
67 | # 更新的其实是新字典, 而不是模型参数, 使用 load_state_dict 方法
68 | # kernel.state_dict().update(weight=fn_w)
69 |
70 | # state_dict = kernel.state_dict() # 第 1 种方法更新权重
71 | # state_dict.update(weight=square)
72 | # kernel.load_state_dict(state_dict)
73 |
74 | # kernel.weight = nn.Parameter(square) # 第 2 种方法更新权重
75 |
76 | kernel.weight.data.copy_(fn_w.data) # 第 3 种方法更新权重
77 |
78 | print("量化后权重\n", kernel.weight.data, "\n")
79 |
80 | # 权重的另一个计算图
81 | other_graph = kernel(feature_map)
82 | other_graph.backward()
83 |
84 | print("不使用 Module.zer_grad(), 卷积后权重梯度\n", kernel.weight.grad, "\n")
85 |
86 | kernel.zero_grad()
87 | other_graph = kernel(feature_map)
88 | other_graph.backward()
89 |
90 | print("使用 Module.zer_grad(), 卷积后权重梯度\n", kernel.weight.grad, "\n")
91 | print("手动计算梯度更新(加法)\n", kernel.weight.grad + square_weight_grad, "\n")
92 | print("手动计算梯度更新(乘法)\n", kernel.weight.grad * square_weight_grad, "\n")
93 |
94 |
95 | def module_apply():
96 | saved_param = queue.Queue()
97 | saved_grad = queue.Queue()
98 |
99 | def info(s):
100 | print("\n---{}---\n".format(s))
101 |
102 | for k, v in net.state_dict().items():
103 | print(k, v, "\n")
104 | break
105 |
106 | def square(module):
107 | if type(module) == nn.Conv2d:
108 | saved_param.put(module.weight.data.clone()) # 第一步, 保存全精度权重
109 | quantize_w = modify_weights(module.weight) # 第二步, 量化权重
110 | quantize_w.sum().backward()
111 | saved_grad.put(module.weight.grad.data.clone()) # 第三步, 保存量化梯度
112 | module.weight.data.copy_(quantize_w.data) # 第四步, 使用量化权重代替全精度权重
113 |
114 | def restore(module):
115 | if type(module) == nn.Conv2d:
116 | module.weight.data.copy_(saved_param.get()) # 第四步, 使用量化权重代替全精度权重
117 |
118 | def update_weight(module):
119 | if type(module) == nn.Conv2d:
120 | module.weight.grad.data.mul_(saved_grad.get()) # 第四步, 使用量化权重代替全精度权重
121 |
122 | net = simple_net.Net()
123 | info("初始化权重")
124 |
125 | # net.zero_grad() # optimizer.zero_grad() is enough
126 | # 网络输入, 输出
127 | input_ = torch.ones(1, 1, 6, 6, requires_grad=True)
128 | lable = torch.ones(1, 2)
129 |
130 | optimizer = optim.SGD(net.parameters(), lr=1)
131 | criterion = nn.MSELoss()
132 |
133 | print("\n\n")
134 |
135 | print(net.state_dict().keys(), "\n")
136 | print(optimizer.param_groups)
137 | print(optimizer.state_dict())
138 |
139 | print("\n\n")
140 |
141 | for _ in range(5):
142 |
143 | net.apply(square)
144 | info("量化权重\n")
145 | print("net.conv1.weight.grad\n", net.conv1.weight.grad)
146 | output = net(input_)
147 | loss = criterion(output, lable)
148 | optimizer.zero_grad() # very important!
149 |
150 | print("\nnet.conv1.weight.grad after optimizer.zero_grad()\n", net.conv1.weight.grad)
151 |
152 | loss.backward()
153 |
154 | net.apply(restore)
155 | info("恢复全精度权重")
156 |
157 | net.apply(update_weight)
158 | print(net.state_dict().keys(), "\n")
159 |
160 | optimizer.step()
161 | info("更新全精度权重")
162 | print(net.state_dict().keys(), "\n")
163 |
164 | torch.save(net.state_dict(), "../model/model_name_changed.pkl")
165 | xx = torch.load("../model/model_name_changed.pkl")
166 | print(xx.keys())
167 |
168 |
169 | if __name__ == "__main__":
170 | module_apply()
--------------------------------------------------------------------------------
/graffiti/caffe_guidenet_weight_init.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import caffe
3 |
4 | net = caffe.Net("/home/wangbin/github/RFCN-FasterRCNN/objectDetection/UISEE-FRCNN-3/model_config/train.prototxt",
5 | "/media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/UISEE/"
6 | "caffe_models/PVANET/PVANET-LITE/PVANET-LITE.caffemodel", caffe.TEST)\
7 |
8 | print(type(net.params))
--------------------------------------------------------------------------------
/graffiti/diffierent_gpu_grad_backward.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 |
4 | a = torch.ones(2, 2, requires_grad=True).cuda(1)
5 | b = torch.rand(2, 2, requires_grad=True).cuda(2)
6 | c = a + b
7 |
8 | print(c)
--------------------------------------------------------------------------------
/graffiti/float32touint8.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from net import net_quantize_guide
3 | from torchvision import models
4 |
5 | # coding=utf-8
6 | model = net_quantize_guide.resnet18()
7 | print(model.state_dict().keys())
8 | model = models.resnet18(pretrained=True)
9 | state_dict = model.state_dict()
10 | state_dict = {k: v.to(torch.uint8) for k, v in state_dict.items()}
11 | torch.save(state_dict, "nowgood.pth")
12 |
--------------------------------------------------------------------------------
/graffiti/get_module_weight.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torchvision.models as models
4 |
5 | CUDA_VISIBLE_DEVICES = 0, 3
6 | model = models.resnet18(pretrained=True)
7 | model = torch.nn.DataParallel(model, [0]).cuda()
8 |
9 | state_dict = model.state_dict()
10 |
11 | second_last_convlayer_weight = state_dict['module.layer4.1.conv1.weight']
12 | last_convlayer_weight = state_dict['module.layer4.1.conv2.weight']
13 | print(second_last_convlayer_weight)
14 | print(last_convlayer_weight)
15 | print(last_convlayer_weight.norm(p=2))
16 | l1 = torch.norm(last_convlayer_weight, p=2)
17 | print(l1)
18 |
19 | print(len(list(model.modules())), type(model.modules))
20 | print(state_dict.keys())
--------------------------------------------------------------------------------
/graffiti/inference_on_image.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torchvision.models as models
4 | import cv2
5 |
6 | IMG_PATH = "/home/wangbin/PycharmProjects/quantizednn/data/smurf.jpeg"
7 |
8 |
9 | def image_read(img_path):
10 | img = cv2.imread(img_path)
11 | img = cv2.resize(img, (224, 224))
12 | img = img.transpose(2, 0, 1)
13 | img = torch.tensor(img).div(255).sub(0.5).float()
14 | img = torch.unsqueeze(img, 0)
15 | return img
16 |
17 |
18 | def torch_modules(model_):
19 | print("module.modules()\n")
20 | for e in model_.modules():
21 | print(type(e), e)
22 |
23 | print("modules._modules.keys()\n")
24 | for e in model_._modules.keys():
25 | print(type(e), e)
26 |
27 | print("modules.children.keys()\n")
28 | for e in model_.children():
29 | print(type(e), e)
30 |
31 |
32 | if __name__ == "__main__":
33 | image = image_read(IMG_PATH)
34 | model = models.resnet18(pretrained=True)
35 | model = torch.nn.DataParallel(model)
36 | model.eval()
37 |
38 | pred = model(image)
39 | print(pred.size())
40 |
--------------------------------------------------------------------------------
/graffiti/merge_conv_bn.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | from torchvision import models
4 | import numpy as np
5 | import os
6 | from net import net_bn_conv_merge, net_bn_conv_merge_quantize
7 | from utils.data_loader import load_val_data
8 | from utils.train_val import validate
9 |
10 | epsilon = 1e-5
11 | data = "/media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012"
12 |
13 | model = models.resnet18(pretrained=True)
14 | # merge_model = net_bn_conv_merge.resnet18()
15 | merge_model = net_bn_conv_merge_quantize.resnet18()
16 | state_dict = model.state_dict()
17 | merge_state_dict = merge_model.state_dict()
18 |
19 | # for name in state_dict:
20 | # print(name)
21 |
22 | merge_state_dict.update({"fc.weight": state_dict["fc.weight"],
23 | "fc.bias": state_dict["fc.bias"]})
24 | del state_dict["fc.weight"]
25 | del state_dict["fc.bias"]
26 | params = np.array(list(state_dict.keys()))
27 |
28 | params = params.reshape((-1, 5))
29 | for index in range(params.shape[0]):
30 | weight = state_dict[params[index][0]]
31 | gamma = state_dict[params[index][1]]
32 | beta = state_dict[params[index][2]]
33 | running_mean = state_dict[params[index][3]]
34 | running_var = state_dict[params[index][4]]
35 | delta = gamma/(torch.sqrt(running_var+epsilon))
36 | weight = weight * delta.view(-1, 1, 1, 1)
37 | bias = (0-running_mean) * delta + beta
38 | merge_state_dict.update({params[index][0]: weight,
39 | params[index][0][:-6] + "bias": bias})
40 | merge_model.load_state_dict(merge_state_dict)
41 | merge_model_name = "resnet18_merge_bn_conv.pth.tar"
42 | torch.save(merge_model.state_dict(), merge_model_name)
43 |
44 | """
45 | conv1.weight
46 | bn1.weight
47 | bn1.bias
48 | bn1.running_mean
49 | bn1.running_var
50 | layer1.0.conv1.weight
51 | layer1.0.bn1.weight
52 | layer1.0.bn1.bias
53 | layer1.0.bn1.running_mean
54 | layer1.0.bn1.running_var
55 | """
56 |
57 | # print("bn1.weight: \n", len(state_dict["bn1.weight"]), state_dict["bn1.weight"])
58 | # print("bn1.bias: \n", len(state_dict["bn1.bias"]), state_dict["bn1.bias"])
59 | # print("bn1.running_mean: \n", state_dict["bn1.running_mean"])
60 | # print("bn1.running_val: \n", state_dict["bn1.running_var"])
61 |
62 | val_loader = load_val_data(data)
63 | evaluate = merge_model_name
64 | if os.path.isfile(evaluate):
65 | print("Loading evaluate model '{}'".format(evaluate))
66 | checkpoint = torch.load(evaluate)
67 | merge_model.load_state_dict(checkpoint)
68 | print("Loaded evaluate model '{}'".format(evaluate))
69 | else:
70 | print("No evaluate mode found at '{}'".format(evaluate))
71 |
72 | merge_model.cuda()
73 | merge_model.eval()
74 | criterion = torch.nn.CrossEntropyLoss().cuda()
75 | validate(merge_model, val_loader, criterion)
76 |
--------------------------------------------------------------------------------
/graffiti/nowgood.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from net import net_quantize_guide
3 | from torchvision import models
4 |
5 |
6 | x = torch.ones(5, 3)
7 | bias = torch.ones(5, 1)
8 | bias[0][0] = 4
9 | bias[3][0] = 3
10 | y = x * bias
11 | print(y)
--------------------------------------------------------------------------------
/graffiti/quantize_test.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | from net.simple_net import Net
4 | from quantize.quantize_method import QuantizeWeightOrActivation
5 | import torch.nn as nn
6 | import torch.optim as optim
7 | import matplotlib.pyplot as plt
8 |
9 |
10 | def test_quantize_weight():
11 | qw = QuantizeWeightOrActivation()
12 |
13 | net = Net()
14 | qw.info(net, "初始化权重")
15 |
16 | net.apply(qw.quantize)
17 | qw.info(net, "量化权重")
18 |
19 | # 网络输入, 输出
20 | input_ = torch.ones(1, 1, 6, 6, requires_grad=True)
21 | lable = torch.ones(1, 2)
22 |
23 | optimizer = optim.SGD(net.parameters(), lr=0.01)
24 | criterion = nn.MSELoss()
25 | output = net(input_)
26 | loss = criterion(output, lable)
27 | optimizer.zero_grad()
28 | loss.backward()
29 | print("\nMSE LOSS ", loss, "\n")
30 |
31 | net.apply(qw.restore)
32 | qw.info(net, "恢复全精度权重")
33 |
34 | net.apply(qw.update_grad)
35 |
36 | print("now")
37 | optimizer.step()
38 | qw.info(net, "更新全精度权重")
39 |
40 |
41 | def test_quantize_weight_update():
42 | qw = QuantizeWeightOrActivation()
43 |
44 | net = Net()
45 | input_ = torch.rand(1, 1, 6, 6, requires_grad=True)
46 | label = torch.ones(1, 2)
47 | optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.99)
48 | criterion = nn.MSELoss()
49 | log = {}
50 | for step in torch.arange(5000):
51 | net.apply(qw.quantize)
52 | output = net(input_)
53 | loss = criterion(output, label)
54 | optimizer.zero_grad()
55 | loss.backward()
56 | # print("loss ", loss.data)
57 | net.apply(qw.restore)
58 | net.apply(qw.update_grad)
59 | optimizer.step()
60 |
61 | log[step] = loss
62 |
63 | plt.axis([0, 5000, 0, 0.1])
64 | plt.plot(log.values(), "r-")
65 | plt.show()
66 |
67 |
68 | if __name__ == "__main__":
69 | test_quantize_weight_update()
--------------------------------------------------------------------------------
/graffiti/register_forward_hook.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torchvision.models as models
4 | import cv2
5 |
6 | IMG_PATH = "/home/wangbin/PycharmProjects/quantizednn/data/smurf.jpeg"
7 |
8 |
9 | def image_read(img_path):
10 | img = cv2.imread(img_path)
11 | img = cv2.resize(img, (224, 224))
12 | img = img.transpose(2, 0, 1)
13 | img = torch.tensor(img).div(255).sub(0.5).float()
14 | img = torch.unsqueeze(img, 0)
15 | return img
16 |
17 |
18 | def torch_modules(model_):
19 | print("module.modules()\n")
20 | for e in model_.modules():
21 | print(type(e), e)
22 |
23 | print("modules._modules.keys()\n")
24 | for e in model_._modules.keys():
25 | print(type(e), e)
26 |
27 | print("modules.children.keys()\n")
28 | for e in model_.children():
29 | print(type(e), e)
30 |
31 |
32 | def my_hook(m, i, o):
33 | fm[0] = (i[0].data.clone())
34 | fm[1] = (o.data.clone())
35 | print('m:', type(m))
36 | print('i:', type(i))
37 | print('len(i):', len(i))
38 | print('i[0]:', type(i[0]))
39 | print('i[0]:', i[0].size())
40 | print('o:', type(o))
41 | print()
42 | print('i[0] shape:', i[0].size())
43 | print('o shape:', o.size())
44 |
45 |
46 | def my_hook2(m, i, o):
47 | m.register_buffer("layer3", i[0])
48 | m.register_buffer("layer4", o)
49 |
50 |
51 | if __name__ == "__main__":
52 | image = image_read(IMG_PATH)
53 | model = models.resnet18(pretrained=True)
54 | last = model._modules.get("layer4")
55 | fm = [0, 0]
56 | hook = last.register_forward_hook(my_hook2)
57 | model = torch.nn.DataParallel(model)
58 | model.eval()
59 | pred = model(image)
60 | print(model)
61 | for k, v in model._modules.items():
62 | print(k, v)
63 |
64 | hook.remove()
--------------------------------------------------------------------------------
/graffiti/stat_parameters.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """
3 | resnet-18:
4 | layer1.0.conv1.weight 0.003
5 | layer1.0.conv2.weight 0.003
6 | layer1.1.conv1.weight 0.003
7 | layer1.1.conv2.weight 0.003
8 | layer2.0.conv1.weight 0.006
9 | layer2.0.conv2.weight 0.013
10 | layer2.1.conv1.weight 0.013
11 | layer2.1.conv2.weight 0.013
12 | layer3.0.conv1.weight 0.025
13 | layer3.0.conv2.weight 0.050
14 | layer3.0.downsample.0.weight 0.003
15 | layer3.1.conv1.weight 0.050
16 | layer3.1.conv2.weight 0.050
17 | layer4.0.conv1.weight 0.101
18 | layer4.0.conv2.weight 0.202
19 | layer4.0.downsample.0.weight 0.011
20 | layer4.1.conv1.weight 0.202
21 | layer4.1.conv2.weight 0.202
22 | fc.weight 0.044
23 | """
24 | import torchvision.models as models
25 |
26 |
27 | def num_features(shape):
28 | feature = 1
29 | for dim in shape:
30 | feature *= dim
31 | return feature
32 |
33 |
34 | def total_parameters(state_dict):
35 | count = 0
36 | for value in state_dict.values():
37 | count += num_features(value.size())
38 | return count
39 |
40 |
41 | if __name__ == "__main__":
42 | model = models.resnet50()
43 | total = total_parameters(model.state_dict())
44 | for k, v in model.state_dict().items():
45 | rate = num_features(v.size())/total
46 | if rate > 0.001:
47 | print("{: <30} {:.3f}".format(k, rate))
48 |
--------------------------------------------------------------------------------
/graffiti/weight_distribute.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torchvision.models as models
4 | from quantize.quantize_method import quantize_weights_bias_tanh
5 | import numpy as np
6 |
7 |
8 | checkpoint = "/home/wangbin/Desktop/uisee/model_quantize/AandW_lr1e-3_step10_epoch35/checkpoint.pth.tar"
9 |
10 |
11 | def weight_decay():
12 |
13 | """
14 | random init:
15 | l2_loss: (668.1154)
16 | l2_loss * 1e-4: (0.066812)
17 |
18 | pre-trained
19 | l2_loss: (517.5516)
20 | l2_loss * 1e-4: (0.051755)
21 | """
22 |
23 | model = models.resnet18()
24 | l2_loss = 0
25 | for i in model.parameters():
26 | l2_loss += i.norm(p=2)
27 |
28 | print(l2_loss)
29 | print(l2_loss * 1e-4)
30 |
31 |
32 | def quantize_weight_distribute():
33 | model_checkpoint = torch.load(checkpoint)
34 | state_dict = model_checkpoint['state_dict']
35 |
36 | for k, v in state_dict.items():
37 | if k == "module.layer1.1.conv2.weight":
38 | cnts = [0 for _ in range(26)]
39 | v = v.view(-1)
40 | print(v)
41 | v = (quantize_weights_bias_tanh(v) + 1) / 2 * (256 - 1)
42 | print(v.size())
43 | for ele in v:
44 | cnts[np.abs(int(ele)//10)] += 1
45 | for i in range(26):
46 | print(i, " ", '{:.4f}'.format(cnts[i]/len(v)))
47 |
48 | # 权值越在深层, 方差越小, 越底层, 分布范围越大, 方差越大
49 | """
50 | conv4.1_layer
51 | 0 0.0000
52 | 1 0.0000
53 | 2 0.0000
54 | 3 0.0000
55 | 4 0.0000
56 | 5 0.0000
57 | 6 0.0000
58 | 7 0.0000
59 | 8 0.0000
60 | 9 0.0000
61 | 10 0.0009
62 | 11 0.0717
63 | 12 0.5933
64 | 13 0.3055
65 | 14 0.0257
66 | 15 0.0022
67 | 16 0.0003
68 | 17 0.0001
69 | 18 0.0000
70 | 19 0.0000
71 | 20 0.0000
72 | 21 0.0000
73 | 22 0.0000
74 | 23 0.0000
75 | 24 0.0000
76 | 25 0.0000
77 | """
78 |
79 | '''
80 | conv1.1_layer
81 | 0 0.0001
82 | 1 0.0000
83 | 2 0.0001
84 | 3 0.0002
85 | 4 0.0004
86 | 5 0.0007
87 | 6 0.0019
88 | 7 0.0032
89 | 8 0.0084
90 | 9 0.0204
91 | 10 0.0566
92 | 11 0.1618
93 | 12 0.3274
94 | 13 0.2621
95 | 14 0.1029
96 | 15 0.0341
97 | 16 0.0116
98 | 17 0.0050
99 | 18 0.0019
100 | 19 0.0005
101 | 20 0.0004
102 | 21 0.0002
103 | 22 0.0001
104 | 23 0.0000
105 | 24 0.0000
106 | 25 0.0000
107 |
108 | '''
109 |
110 |
111 | if __name__ == "__main__":
112 | quantize_weight_distribute()
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | """
4 | 1. 量化权重建议:
5 | 1). 学习率最高设置为 0.001, 0.0001可以很快的收敛, 最很好的选择, 训练个 2~5个 epoch 就好
6 |
7 | 2. 权重和激活同时量化注意事项:
8 | 1). 学习率设置不能大于 0.01(学习率最大设置 0.01), 当学习率设置为0.01时, 模型可以很好的微调,
9 | 2). 当学习率设置为 0.1 时, 训练几十个batch之后, 准确率为 千分之一 和 千分之五
10 | 3). 学习率设置为 0.01 时,大约 5~8 epoch降低一次学习率(除以10)比较好, 然后训练大约 30~40 epoch就好
11 | 4). 当学习率设置为 0.001 时, 大约 14~16 epoch 降低一次学习率比较好, 然后训练大约 30~40 epoch就好
12 |
13 | 3. 训练模式(mode):
14 | 0: full precision training from scratch
15 | 1: only quantize_tanh weight
16 | 2. quantize_tanh activation using quantized weight to init model
17 | 3. joint quantize_tanh weight and activation from pre-trained imageNet model
18 | 4. guided quantize_tanh weight and activation from pre-trained imageNet model
19 |
20 | """
21 |
22 | import argparse
23 | import torchvision.models as models
24 | import warnings
25 | import random
26 | import os
27 | import torch.backends.cudnn as cudnn
28 | import torch.distributed as dist
29 | import torch
30 | import torch.optim
31 | import torch.utils.data
32 | import torch.utils.data.distributed
33 | from utils.train_val import train, save_checkpoint, validate
34 | from utils.data_loader import load_train_data, load_val_data
35 | from quantize import quantize_guided
36 | from quantize.quantize_method import quantize_weights_bias_gemm
37 | from net import net_quantize_activation, net_quantize_weight
38 | from tensorboardX import SummaryWriter
39 |
40 |
41 | model_names = sorted(name for name in models.__dict__
42 | if name.islower() and not name.startswith("__")
43 | and callable(models.__dict__[name]))
44 |
45 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
46 |
47 | parser.add_argument('--data', metavar='DIR', help='path to dataset', required=True)
48 | parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18',
49 | choices=model_names,
50 | help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)')
51 | parser.add_argument('--workers', default=16, type=int, metavar='N', # 修改为电脑cpu支持的线程数
52 | help='number of data loading workers (default: 16)')
53 | parser.add_argument('--epochs', default=35, type=int, metavar='N',
54 | help='number of total epochs to run')
55 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
56 | help='manual epoch number (useful on restarts)')
57 | parser.add_argument('--batch-size', default=128, type=int,
58 | metavar='N', help='mini-batch size (default: 128)')
59 |
60 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
61 | help='momentum')
62 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
63 | metavar='W', help='weight decay (default: 1e-4)')
64 | parser.add_argument('--resume', action='store_true',
65 | help='resume training using save-dir checkpoint (default: False)')
66 | # 如果是验证模型, 设置为True就好, 训练时值为False
67 | parser.add_argument('--evaluate', default='', type=str,
68 | help='evaluate model on validation set')
69 | parser.add_argument('--world-size', default=1, type=int,
70 | help='number of distributed processes')
71 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
72 | help='url used to set up distributed training')
73 | parser.add_argument('--dist-backend', default='gloo', type=str,
74 | help='distributed backend')
75 | parser.add_argument('--seed', default=None, type=int,
76 | help='seed for initializing training. ')
77 | parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.')
78 |
79 | parser.add_argument('--device-ids', default=[0], type=int, nargs='+',
80 | help='GPU ids to be used e.g 0 1 2 3')
81 | parser.add_argument('--weight-quantized', default='', type=str, help="quantize_tanh weight model path")
82 | parser.add_argument('--save-dir', default='model', type=str, help='directory to save trained model', required=True)
83 | parser.add_argument('--mode', default=3, type=int, help='model quantized mode', required=True)
84 | # l1 norm balance 设置为1或者0.1比较好, l2 norm balance 设置为100(~0.034) ~ 500 比较好
85 | parser.add_argument('--norm', default=1, type=int, help='feature map norm, default 1')
86 | parser.add_argument('--balance', default=100, type=float, help='balancing parameter (default: 100)')
87 | # 论文中初始学习率 0.001, 每 10 epoch 除以 10, 这在只量化权重时候可以
88 | # 在同时量化权重和激活时, 当使用0.001时, 我们可以观测到权重的持续上升
89 | # 或许可以将初始学习率调为 0.01, 甚至 0.1
90 | # guidance 方法中, 全精度模型的的学习率要小一些, 模型已经训练的很好了, 微调而已
91 | # 不过来低精度模型的学习率可以调高一点
92 | parser.add_argument('--lr', default=0.001, type=float, # 论文中初始学习率 0.001, 每 10 epoch 除以 10
93 | help='initial learning rate')
94 | parser.add_argument('--rate', default=1, type=int,
95 | help='guide training method, full_lr = low_lr * rate')
96 |
97 | parser.add_argument('--lr-step', default=10, type=int, help='learning rate step scheduler')
98 |
99 |
100 | args = parser.parse_args()
101 | best_prec1 = 0
102 |
103 |
104 | def main():
105 | global best_prec1
106 | print("\n"
107 | "=> arch {: <20}\n"
108 | "=> init_lr {: <20}\n"
109 | "=> lr-step {: <20}\n"
110 | "=> momentum {: <20}\n"
111 | "=> weight-decay {: <20}\n"
112 | "=> batch-size {: <20}\n"
113 | "=> balance {: <20}\n"
114 | "=> save-dir {: <20}\n".format(
115 | args.arch, args.lr, args.lr_step, args.momentum, args.weight_decay,
116 | args.batch_size, args.balance, args.save_dir))
117 |
118 | if args.seed is not None:
119 | random.seed(args.seed)
120 | torch.manual_seed(args.seed)
121 | cudnn.deterministic = True
122 | warnings.warn('You have chosen to seed training. This will turn on the CUDNN deterministic setting, '
123 | 'which can slow down your training considerably!, You may see unexpected behavior'
124 | ' when restarting from checkpoints.')
125 |
126 | # 下面的 warning 可以看出, 如果指定一个 gpu id, 就不会使用多 gpu 训练
127 | if args.gpu is not None:
128 | warnings.warn('You have chosen a specific GPU, This will completely disable data parallelism.')
129 |
130 | # 多机器训练而不是一机多卡(集群训练模式)
131 | args.distributed = args.world_size > 1
132 | if args.distributed:
133 | dist.init_process_group(backend=args.dist_backend,
134 | init_method=args.dist_url,
135 | world_size=args.world_size)
136 |
137 | # 根据训练模式加载训练模型
138 | if args.mode == 0:
139 | print("=> training mode {}: full precision training from scratch\n".format(args.mode))
140 | model = models.__dict__[args.arch]()
141 |
142 | elif args.mode == 1:
143 | print("=> training mode {}: quantize weight only\n".format(args.mode))
144 | print("=> loading imageNet pre-trained model {}".format(args.arch))
145 | model = net_quantize_weight.__dict__[args.arch]()
146 | model_dict = model.state_dict()
147 | init_model = models.__dict__[args.arch](pretrained=True)
148 | model_dict.update(init_model.state_dict())
149 | model.load_state_dict(model_dict)
150 | print("=> loaded imageNet pre-trained model {}".format(args.arch))
151 |
152 | elif args.mode == 2:
153 | print("=> training mode {}: quantize activation using quantized weight\n".format(args.mode))
154 | model = net_quantize_activation.__dict__[args.arch]()
155 | if os.path.isfile(args.weight_quantized):
156 | print("=> loading weight quantized model '{}'".format(args.weight_quantized))
157 | model_dict = model.state_dict()
158 | quantized_model = torch.load(args.weight_quantized)
159 | init_dict = {}
160 | for k, v in quantized_model['state_dict'].items():
161 | if k in model.state_dict():
162 | if k.find("conv") != -1 or k.find("fc") != -1:
163 | init_dict[k[7:]] = quantize_weights_bias_gemm(v)
164 | else:
165 | init_dict[k[7:]] = v
166 |
167 | model_dict.update(init_dict)
168 | model.load_state_dict(model_dict)
169 | print("=> loaded weight_quantized '{}'".format(args.weight_quantized))
170 | else:
171 | warnings.warn("=> no weight quantized model found at '{}'".format(args.weight_quantized))
172 | return
173 |
174 | elif args.mode == 3:
175 | print("=> training mode {}: quantize weight and activation simultaneously\n".format(args.mode))
176 | print("=> loading imageNet pre-trained model '{}'".format(args.arch))
177 | # 使用预训练的ResNet18来初始化同时量化网络权重和激活
178 | model = net_quantize_activation.__dict__[args.arch]()
179 | # 获取预训练模型参数
180 | model_dict = model.state_dict()
181 | init_model = models.__dict__[args.arch](pretrained=True)
182 | init_dict = {k: v for k, v in init_model.state_dict().items() if k in model_dict}
183 | model_dict.update(init_dict)
184 | model.load_state_dict(model_dict)
185 |
186 | elif args.mode == 4:
187 | print("=> Training mode {}: guided quantize weight and activation "
188 | "from pre-trained imageNet model {}\n ".format(args.mode, args.arch))
189 |
190 | # quantize_guided.guided(args)
191 | quantize_guided.guided(args)
192 | return
193 | else:
194 | raise Exception("invalid mode, valid mode is 0~4!!")
195 |
196 | if args.gpu is not None: # 指定GPU
197 | model = model.cuda(args.gpu)
198 | elif args.distributed: # 集群训练(多机器)
199 | model.cuda()
200 | model = torch.nn.parallel.DistributedDataParallel(model)
201 | else: # 单机训练(单卡或者多卡)
202 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
203 | model.features = torch.nn.DataParallel(model.features)
204 | model.cuda()
205 | else:
206 | # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids
207 | """
208 | list(model.state_dict().keys())[0]
209 | model 在使用 torch.nn.DataParallel 之前每层的名字, 如 conv1.weight
210 | model 在使用 torch.nn.DataParallel 之后每层的名字, 如 module.conv1.weight
211 | 如果训练使用并行化, 而验证使用指定GPU的话就会出现问题, 所以需要在指定GPU代码中,添加解决冲突的代码
212 | """
213 | model = torch.nn.DataParallel(model, args.device_ids).cuda()
214 |
215 | criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu)
216 | optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
217 | # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
218 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_step)
219 |
220 | # optionally resume from a checkpoint
221 | if args.resume:
222 | print("\n=> resume training from checkpoint")
223 | checkpoint_filename = os.path.join(args.save_dir, "checkpoint.pth.tar")
224 |
225 | if os.path.isfile(checkpoint_filename):
226 | print("=> loading checkpoint '{}'".format(checkpoint_filename))
227 | checkpoint = torch.load(checkpoint_filename)
228 | args.start_epoch = checkpoint['epoch']
229 | best_prec1 = checkpoint['best_prec1']
230 | model.load_state_dict(checkpoint['state_dict'])
231 | optimizer.load_state_dict(checkpoint['optimizer'])
232 | print("=> loaded checkpoint '{}' (epoch {})"
233 | .format(checkpoint_filename, checkpoint['epoch']))
234 | else:
235 | print("=> no checkpoint found at '{}'".format(checkpoint_filename))
236 |
237 | cudnn.benchmark = True
238 |
239 | val_loader = load_val_data(args.data, args.batch_size, args.workers)
240 |
241 | if args.evaluate:
242 | if os.path.isfile(args.evaluate):
243 | print("Loading evaluate model '{}'".format(args.evaluate))
244 | checkpoint = torch.load(args.evaluate)
245 | if "state_dict" in checkpoint.keys():
246 | model.load_state_dict(checkpoint['state_dict'])
247 | print("epoch: {} ".format(checkpoint['epoch']))
248 | else:
249 | checkpoint = {''.join(("module.", k)): v for k, v in checkpoint.items() if not k.startswith("module")}
250 | model.load_state_dict(checkpoint)
251 | print("Loaded evaluate model '{}'".format(args.evaluate))
252 | else:
253 | print("No evaluate mode found at '{}'".format(args.evaluate))
254 | return
255 | validate(model, val_loader, criterion, args.gpu)
256 | return
257 |
258 | train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed)
259 |
260 | summary_writer = SummaryWriter(args.save_dir)
261 | for epoch in range(args.start_epoch, args.epochs):
262 | if args.distributed:
263 | train_sampler.set_epoch(epoch)
264 | lr_scheduler.step()
265 |
266 | # train for one epoch
267 | train(model, train_loader, criterion, optimizer, args.gpu, epoch, summary_writer)
268 |
269 | # evaluate on validation set
270 | prec1 = validate(model, val_loader, criterion, args.gpu, epoch, summary_writer)
271 |
272 | # remember best prec@1 and save checkpoint
273 | is_best = prec1 > best_prec1
274 | best_prec1 = max(prec1, best_prec1)
275 | save_checkpoint({
276 | 'epoch': epoch+1,
277 | 'arch': args.arch,
278 | 'state_dict': model.state_dict(),
279 | 'best_prec1': best_prec1,
280 | 'optimizer': optimizer.state_dict(),
281 | }, is_best, args.save_dir)
282 |
283 | summary_writer.close()
284 |
285 |
286 | if __name__ == '__main__':
287 | main()
--------------------------------------------------------------------------------
/net/net_bn_conv_merge.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch.nn as nn
3 | import math
4 | import torch.utils.model_zoo as model_zoo
5 |
6 | """
7 | 网络修改步骤;
8 | 1. 将卷积层的 bias 设置为 True
9 | 2. 将 bn 层删掉
10 | """
11 |
12 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
13 | 'resnet152']
14 |
15 |
16 | model_urls = {
17 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
18 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
19 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
20 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
21 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
22 | }
23 |
24 |
25 | def conv3x3(in_planes, out_planes, stride=1):
26 | """3x3 convolution with padding"""
27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
28 | padding=1, bias=True)
29 |
30 |
31 | class BasicBlock(nn.Module):
32 | expansion = 1
33 |
34 | def __init__(self, inplanes, planes, stride=1, downsample=None):
35 | super(BasicBlock, self).__init__()
36 | self.conv1 = conv3x3(inplanes, planes, stride)
37 | self.relu = nn.ReLU(inplace=True)
38 | self.conv2 = conv3x3(planes, planes)
39 | self.downsample = downsample
40 | self.stride = stride
41 |
42 | def forward(self, x):
43 | residual = x
44 |
45 | out = self.conv1(x)
46 | out = self.relu(out)
47 |
48 | out = self.conv2(out)
49 |
50 | if self.downsample is not None:
51 | residual = self.downsample(x)
52 |
53 | out += residual
54 | out = self.relu(out)
55 |
56 | return out
57 |
58 |
59 | class Bottleneck(nn.Module):
60 | expansion = 4
61 |
62 | def __init__(self, inplanes, planes, stride=1, downsample=None):
63 | super(Bottleneck, self).__init__()
64 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True)
65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
66 | padding=1, bias=True)
67 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=True)
68 | self.relu = nn.ReLU(inplace=True)
69 | self.downsample = downsample
70 | self.stride = stride
71 |
72 | def forward(self, x):
73 | residual = x
74 |
75 | out = self.conv1(x)
76 | out = self.relu(out)
77 |
78 | out = self.conv2(out)
79 | out = self.relu(out)
80 |
81 | out = self.conv3(out)
82 |
83 | if self.downsample is not None:
84 | residual = self.downsample(x)
85 |
86 | out += residual
87 | out = self.relu(out)
88 |
89 | return out
90 |
91 |
92 | class ResNet(nn.Module):
93 |
94 | def __init__(self, block, layers, num_classes=1000):
95 | self.inplanes = 64
96 | super(ResNet, self).__init__()
97 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
98 | bias=True)
99 | self.relu = nn.ReLU(inplace=True)
100 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
101 | self.layer1 = self._make_layer(block, 64, layers[0])
102 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
103 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
104 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
105 | self.avgpool = nn.AvgPool2d(7, stride=1)
106 | self.fc = nn.Linear(512 * block.expansion, num_classes)
107 |
108 | for m in self.modules():
109 | if isinstance(m, nn.Conv2d):
110 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
111 | m.weight.data.normal_(0, math.sqrt(2. / n))
112 | elif isinstance(m, nn.BatchNorm2d):
113 | m.weight.data.fill_(1)
114 | m.bias.data.zero_()
115 |
116 | def _make_layer(self, block, planes, blocks, stride=1):
117 | downsample = None
118 | if stride != 1 or self.inplanes != planes * block.expansion:
119 | downsample = nn.Sequential(
120 | nn.Conv2d(self.inplanes, planes * block.expansion,
121 | kernel_size=1, stride=stride, bias=True),
122 | )
123 |
124 | layers = []
125 | layers.append(block(self.inplanes, planes, stride, downsample))
126 | self.inplanes = planes * block.expansion
127 | for i in range(1, blocks):
128 | layers.append(block(self.inplanes, planes))
129 |
130 | return nn.Sequential(*layers)
131 |
132 | def forward(self, x):
133 | x = self.conv1(x)
134 | x = self.relu(x)
135 | x = self.maxpool(x)
136 |
137 | x = self.layer1(x)
138 | x = self.layer2(x)
139 | x = self.layer3(x)
140 | x = self.layer4(x)
141 |
142 | x = self.avgpool(x)
143 | x = x.view(x.size(0), -1)
144 | x = self.fc(x)
145 |
146 | return x
147 |
148 |
149 | def resnet18(pretrained=False, **kwargs):
150 | """Constructs a ResNet-18 model.
151 |
152 | Args:
153 | pretrained (bool): If True, returns a model pre-trained on ImageNet
154 | """
155 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
156 | if pretrained:
157 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
158 | return model
159 |
160 |
161 | def resnet34(pretrained=False, **kwargs):
162 | """Constructs a ResNet-34 model.
163 |
164 | Args:
165 | pretrained (bool): If True, returns a model pre-trained on ImageNet
166 | """
167 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
168 | if pretrained:
169 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
170 | return model
171 |
172 |
173 | def resnet50(pretrained=False, **kwargs):
174 | """Constructs a ResNet-50 model.
175 |
176 | Args:
177 | pretrained (bool): If True, returns a model pre-trained on ImageNet
178 | """
179 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
180 | if pretrained:
181 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
182 | return model
183 |
184 |
185 | def resnet101(pretrained=False, **kwargs):
186 | """Constructs a ResNet-101 model.
187 |
188 | Args:
189 | pretrained (bool): If True, returns a model pre-trained on ImageNet
190 | """
191 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
192 | if pretrained:
193 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
194 | return model
195 |
196 |
197 | def resnet152(pretrained=False, **kwargs):
198 | """Constructs a ResNet-152 model.
199 |
200 | Args:
201 | pretrained (bool): If True, returns a model pre-trained on ImageNet
202 | """
203 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
204 | if pretrained:
205 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
206 | return model
207 |
--------------------------------------------------------------------------------
/net/net_bn_conv_merge_quantize.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """
3 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用QWACvon2D
4 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升
5 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化??
6 | """
7 | import torch.nn as nn
8 | import math
9 | import torch.utils.model_zoo as model_zoo
10 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar
11 |
12 |
13 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
14 | 'resnet152']
15 |
16 |
17 | model_urls = {
18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
23 | }
24 |
25 |
26 | def conv3x3(in_planes, out_planes, stride=1):
27 | """3x3 convolution with padding"""
28 | return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True)
29 |
30 |
31 | class BasicBlock(nn.Module):
32 | expansion = 1
33 |
34 | def __init__(self, inplanes, planes, stride=1, downsample=None):
35 | super(BasicBlock, self).__init__()
36 | self.conv1 = conv3x3(inplanes, planes, stride)
37 | self.relu = nn.ReLU(inplace=True)
38 | self.conv2 = conv3x3(planes, planes)
39 | self.downsample = downsample
40 | self.stride = stride
41 |
42 | def forward(self, x):
43 | residual = x
44 | out = self.conv1(x)
45 | out = self.relu(out)
46 |
47 | out = self.conv2(out)
48 |
49 | if self.downsample is not None:
50 | residual = self.downsample(x)
51 |
52 | out += residual
53 | out = self.relu(out)
54 |
55 | return out
56 |
57 |
58 | class Bottleneck(nn.Module):
59 | expansion = 4
60 |
61 | def __init__(self, inplanes, planes, stride=1, downsample=None):
62 | super(Bottleneck, self).__init__()
63 | self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=True)
64 | self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride,
65 | padding=1, bias=True)
66 | self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=True)
67 | self.relu = nn.ReLU(inplace=True)
68 | self.downsample = downsample
69 | self.stride = stride
70 |
71 | def forward(self, x):
72 | residual = x
73 |
74 | out = self.conv1(x)
75 | out = self.relu(out)
76 |
77 | out = self.conv2(out)
78 | out = self.relu(out)
79 |
80 | out = self.conv3(out)
81 |
82 | if self.downsample is not None:
83 | residual = self.downsample(x)
84 |
85 | out += residual
86 | out = self.relu(out)
87 |
88 | return out
89 |
90 |
91 | class ResNet(nn.Module):
92 |
93 | def __init__(self, qblock, layers, num_classes=1000):
94 | self.inplanes = 64
95 | super(ResNet, self).__init__()
96 | self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
97 | bias=True)
98 | self.relu = nn.ReLU(inplace=True)
99 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
100 | self.layer1 = self._make_layer(qblock, 64, layers[0])
101 | self.layer2 = self._make_layer(qblock, 128, layers[1], stride=2)
102 | self.layer3 = self._make_layer(qblock, 256, layers[2], stride=2)
103 | self.layer4 = self._make_layer(qblock, 512, layers[3], stride=2)
104 | self.avgpool = nn.AvgPool2d(7, stride=1)
105 | self.fc = QWALinear(512 * qblock.expansion, num_classes) # 修改
106 | self.scalar = Scalar() # 修改
107 |
108 | for m in self.modules():
109 | if isinstance(m, nn.Conv2d):
110 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
111 | m.weight.data.normal_(0, math.sqrt(2. / n))
112 | elif isinstance(m, nn.BatchNorm2d):
113 | m.weight.data.fill_(1)
114 | m.bias.data.zero_()
115 |
116 | def _make_layer(self, block, planes, blocks, stride=1):
117 | downsample = None
118 | if stride != 1 or self.inplanes != planes * block.expansion:
119 | downsample = nn.Sequential(
120 | QWAConv2D(self.inplanes, planes * block.expansion,
121 | kernel_size=1, stride=stride, bias=True),
122 | )
123 |
124 | layers = []
125 | layers.append(block(self.inplanes, planes, stride, downsample))
126 | self.inplanes = planes * block.expansion
127 | for i in range(1, blocks):
128 | layers.append(block(self.inplanes, planes))
129 |
130 | return nn.Sequential(*layers)
131 |
132 | def forward(self, x):
133 | x = self.conv1(x)
134 | x = self.relu(x)
135 | x = self.maxpool(x)
136 |
137 | x = self.layer1(x)
138 | x = self.layer2(x)
139 | x = self.layer3(x)
140 | x = self.layer4(x)
141 |
142 | x = self.avgpool(x)
143 | x = x.view(x.size(0), -1)
144 | x = self.fc(x)
145 | x = self.scalar(x) # 修改
146 |
147 | return x
148 |
149 |
150 | def resnet18(pretrained=False, **kwargs):
151 | """Constructs a ResNet-18 model.
152 |
153 | Args:
154 | pretrained (bool): If True, returns a model pre-trained on ImageNet
155 | """
156 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
157 | if pretrained:
158 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
159 | return model
160 |
161 |
162 | def resnet34(pretrained=False, **kwargs):
163 | """Constructs a ResNet-34 model.
164 |
165 | Args:
166 | pretrained (bool): If True, returns a model pre-trained on ImageNet
167 | """
168 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
169 | if pretrained:
170 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
171 | return model
172 |
173 |
174 | def resnet50(pretrained=False, **kwargs):
175 | """Constructs a ResNet-50 model.
176 |
177 | Args:
178 | pretrained (bool): If True, returns a model pre-trained on ImageNet
179 | """
180 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
181 | if pretrained:
182 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
183 | return model
184 |
185 |
186 | def resnet101(pretrained=False, **kwargs):
187 | """Constructs a ResNet-101 model.
188 |
189 | Args:
190 | pretrained (bool): If True, returns a model pre-trained on ImageNet
191 | """
192 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
193 | if pretrained:
194 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
195 | return model
196 |
197 |
198 | def resnet152(pretrained=False, **kwargs):
199 | """Constructs a ResNet-152 model.
200 |
201 | Args:
202 | pretrained (bool): If True, returns a model pre-trained on ImageNet
203 | """
204 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
205 | if pretrained:
206 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
207 | return model
208 |
--------------------------------------------------------------------------------
/net/net_quantize_activation.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """
3 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用QWACvon2D
4 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升
5 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化??
6 | """
7 | import torch.nn as nn
8 | import math
9 | import torch.utils.model_zoo as model_zoo
10 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar
11 |
12 |
13 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
14 | 'resnet152']
15 |
16 |
17 | model_urls = {
18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
23 | }
24 |
25 |
26 | def conv3x3(in_planes, out_planes, stride=1):
27 | """3x3 convolution with padding"""
28 | return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride,
29 | padding=1, bias=False)
30 |
31 |
32 | class BasicBlock(nn.Module):
33 | expansion = 1
34 |
35 | def __init__(self, inplanes, planes, stride=1, downsample=None):
36 | super(BasicBlock, self).__init__()
37 | self.conv1 = conv3x3(inplanes, planes, stride)
38 | self.bn1 = nn.BatchNorm2d(planes)
39 | self.relu = nn.ReLU(inplace=True)
40 | self.conv2 = conv3x3(planes, planes)
41 | self.bn2 = nn.BatchNorm2d(planes)
42 | self.downsample = downsample
43 | self.stride = stride
44 |
45 | def forward(self, x):
46 | residual = x
47 | out = self.conv1(x)
48 | out = self.bn1(out)
49 | out = self.relu(out)
50 |
51 | out = self.conv2(out)
52 | out = self.bn2(out)
53 |
54 | if self.downsample is not None:
55 | residual = self.downsample(x)
56 |
57 | out += residual
58 | out = self.relu(out)
59 |
60 | return out
61 |
62 |
63 | class Bottleneck(nn.Module):
64 | expansion = 4
65 |
66 | def __init__(self, inplanes, planes, stride=1, downsample=None):
67 | super(Bottleneck, self).__init__()
68 | self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=False)
69 | self.bn1 = nn.BatchNorm2d(planes)
70 | self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride,
71 | padding=1, bias=False)
72 | self.bn2 = nn.BatchNorm2d(planes)
73 | self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=False)
74 | self.bn3 = nn.BatchNorm2d(planes * 4)
75 | self.relu = nn.ReLU(inplace=True)
76 | self.downsample = downsample
77 | self.stride = stride
78 |
79 | def forward(self, x):
80 | residual = x
81 |
82 | out = self.conv1(x)
83 | out = self.bn1(out)
84 | out = self.relu(out)
85 |
86 | out = self.conv2(out)
87 | out = self.bn2(out)
88 | out = self.relu(out)
89 |
90 | out = self.conv3(out)
91 | out = self.bn3(out)
92 |
93 | if self.downsample is not None:
94 | residual = self.downsample(x)
95 |
96 | out += residual
97 | out = self.relu(out)
98 |
99 | return out
100 |
101 |
102 | class ResNet(nn.Module):
103 |
104 | def __init__(self, qblock, layers, num_classes=1000):
105 | self.inplanes = 64
106 | super(ResNet, self).__init__()
107 | self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
108 | bias=False)
109 | self.bn1 = nn.BatchNorm2d(64)
110 | self.relu = nn.ReLU(inplace=True)
111 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
112 | self.layer1 = self._make_layer(qblock, 64, layers[0])
113 | self.layer2 = self._make_layer(qblock, 128, layers[1], stride=2)
114 | self.layer3 = self._make_layer(qblock, 256, layers[2], stride=2)
115 | self.layer4 = self._make_layer(qblock, 512, layers[3], stride=2)
116 | self.avgpool = nn.AvgPool2d(7, stride=1)
117 | self.fc = QWALinear(512 * qblock.expansion, num_classes) # 修改
118 | self.scalar = Scalar() # 修改
119 |
120 | for m in self.modules():
121 | if isinstance(m, nn.Conv2d):
122 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
123 | m.weight.data.normal_(0, math.sqrt(2. / n))
124 | elif isinstance(m, nn.BatchNorm2d):
125 | m.weight.data.fill_(1)
126 | m.bias.data.zero_()
127 |
128 | def _make_layer(self, block, planes, blocks, stride=1):
129 | downsample = None
130 | if stride != 1 or self.inplanes != planes * block.expansion:
131 | downsample = nn.Sequential(
132 | QWAConv2D(self.inplanes, planes * block.expansion,
133 | kernel_size=1, stride=stride, bias=False),
134 | nn.BatchNorm2d(planes * block.expansion),
135 | )
136 |
137 | layers = []
138 | layers.append(block(self.inplanes, planes, stride, downsample))
139 | self.inplanes = planes * block.expansion
140 | for i in range(1, blocks):
141 | layers.append(block(self.inplanes, planes))
142 |
143 | return nn.Sequential(*layers)
144 |
145 | def forward(self, x):
146 | x = self.conv1(x)
147 | x = self.bn1(x)
148 | x = self.relu(x)
149 | x = self.maxpool(x)
150 |
151 | x = self.layer1(x)
152 | x = self.layer2(x)
153 | x = self.layer3(x)
154 | x = self.layer4(x)
155 |
156 | x = self.avgpool(x)
157 | x = x.view(x.size(0), -1)
158 | x = self.fc(x)
159 | x = self.scalar(x) # 修改
160 |
161 | return x
162 |
163 |
164 | def resnet18(pretrained=False, **kwargs):
165 | """Constructs a ResNet-18 model.
166 |
167 | Args:
168 | pretrained (bool): If True, returns a model pre-trained on ImageNet
169 | """
170 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
171 | if pretrained:
172 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
173 | return model
174 |
175 |
176 | def resnet34(pretrained=False, **kwargs):
177 | """Constructs a ResNet-34 model.
178 |
179 | Args:
180 | pretrained (bool): If True, returns a model pre-trained on ImageNet
181 | """
182 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
183 | if pretrained:
184 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
185 | return model
186 |
187 |
188 | def resnet50(pretrained=False, **kwargs):
189 | """Constructs a ResNet-50 model.
190 |
191 | Args:
192 | pretrained (bool): If True, returns a model pre-trained on ImageNet
193 | """
194 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
195 | if pretrained:
196 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
197 | return model
198 |
199 |
200 | def resnet101(pretrained=False, **kwargs):
201 | """Constructs a ResNet-101 model.
202 |
203 | Args:
204 | pretrained (bool): If True, returns a model pre-trained on ImageNet
205 | """
206 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
207 | if pretrained:
208 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
209 | return model
210 |
211 |
212 | def resnet152(pretrained=False, **kwargs):
213 | """Constructs a ResNet-152 model.
214 |
215 | Args:
216 | pretrained (bool): If True, returns a model pre-trained on ImageNet
217 | """
218 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
219 | if pretrained:
220 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
221 | return model
222 |
--------------------------------------------------------------------------------
/net/net_quantize_guide.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch.nn as nn
3 | import math
4 | import torch.utils.model_zoo as model_zoo
5 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar
6 | from quantize.quantize_method import quantize_activations_gemm
7 |
8 | """
9 | guide 两个模型一起训练的两种思路,
10 | 1. 将两个模型分别训练, 然后提取中间层的 feature map, 计算 distance
11 | 2. 讲这两个模型写成一个网络, 一起训练, 加载参数时, 一起加载, 然后提取训练好的低精度模型的参数
12 | """
13 |
14 | """
15 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用 QWACvon2D
16 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升
17 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化??
18 | """
19 |
20 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
21 | 'resnet152']
22 |
23 |
24 | model_urls = {
25 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
26 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
27 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
28 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
29 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
30 | }
31 |
32 |
33 | def qconv3x3(in_planes, out_planes, stride=1):
34 | """3x3 convolution with padding"""
35 | return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride,
36 | padding=1, bias=False)
37 |
38 |
39 | def conv3x3(in_planes, out_planes, stride=1):
40 | """3x3 convolution with padding"""
41 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
42 | padding=1, bias=False)
43 |
44 |
45 | class ResNet(nn.Module):
46 |
47 | def __init__(self, block, layers, num_classes=1000):
48 | self.inplanes = 64
49 | super(ResNet, self).__init__()
50 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
51 | bias=False)
52 | self.bn1 = nn.BatchNorm2d(64)
53 | self.relu = nn.ReLU(inplace=True)
54 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
55 | self.layer1 = self._make_layer(block, 64, layers[0])
56 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
57 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
58 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
59 | self.avgpool = nn.AvgPool2d(7, stride=1)
60 | self.fc = nn.Linear(512 * block.expansion, num_classes)
61 |
62 | for m in self.modules():
63 | if isinstance(m, nn.Conv2d):
64 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
65 | m.weight.data.normal_(0, math.sqrt(2. / n))
66 | elif isinstance(m, nn.BatchNorm2d):
67 | m.weight.data.fill_(1)
68 | m.bias.data.zero_()
69 |
70 | def _make_layer(self, block, planes, blocks, stride=1):
71 | downsample = None
72 | if stride != 1 or self.inplanes != planes * block.expansion:
73 | downsample = nn.Sequential(
74 | nn.Conv2d(self.inplanes, planes * block.expansion,
75 | kernel_size=1, stride=stride, bias=False),
76 | nn.BatchNorm2d(planes * block.expansion),
77 | )
78 |
79 | layers = []
80 | layers.append(block(self.inplanes, planes, stride, downsample))
81 | self.inplanes = planes * block.expansion
82 | for i in range(1, blocks):
83 | layers.append(block(self.inplanes, planes))
84 |
85 | return nn.Sequential(*layers)
86 |
87 | def forward(self, x):
88 | x = self.conv1(x)
89 | x = self.bn1(x)
90 | x = self.relu(x)
91 | x = self.maxpool(x)
92 |
93 | x = self.layer1(x)
94 | x = self.layer2(x)
95 | x = self.layer3(x)
96 | x = self.layer4(x)
97 |
98 | x = self.avgpool(x)
99 | x = x.view(x.size(0), -1)
100 | x = self.fc(x)
101 |
102 | return x
103 |
104 |
105 | class BasicBlock(nn.Module):
106 | expansion = 1
107 |
108 | def __init__(self, inplanes, planes, stride=1, downsample=None):
109 | super(BasicBlock, self).__init__()
110 | self.conv1 = conv3x3(inplanes, planes, stride)
111 | self.bn1 = nn.BatchNorm2d(planes)
112 | self.relu = nn.ReLU(inplace=True)
113 | self.conv2 = conv3x3(planes, planes)
114 | self.bn2 = nn.BatchNorm2d(planes)
115 | self.downsample = downsample
116 | self.stride = stride
117 |
118 | def forward(self, x):
119 | residual = x
120 |
121 | out = self.conv1(x)
122 | out = self.bn1(out)
123 | out = self.relu(out)
124 |
125 | out = self.conv2(out)
126 | out = self.bn2(out)
127 |
128 | if self.downsample is not None:
129 | residual = self.downsample(x)
130 |
131 | out += residual
132 | out = self.relu(out)
133 |
134 | return out
135 |
136 |
137 | class QBasicBlock(nn.Module):
138 | expansion = 1
139 |
140 | def __init__(self, inplanes, planes, stride=1, downsample=None):
141 | super(QBasicBlock, self).__init__()
142 | self.conv1 = qconv3x3(inplanes, planes, stride)
143 | self.bn1 = nn.BatchNorm2d(planes)
144 | self.relu = nn.ReLU(inplace=True)
145 | self.conv2 = qconv3x3(planes, planes)
146 | self.bn2 = nn.BatchNorm2d(planes)
147 | self.downsample = downsample
148 | self.stride = stride
149 |
150 | def forward(self, x):
151 | residual = x
152 | out = self.conv1(x)
153 | out = self.bn1(out)
154 | out = self.relu(out)
155 |
156 | out = self.conv2(out)
157 | out = self.bn2(out)
158 |
159 | if self.downsample is not None:
160 | residual = self.downsample(x)
161 |
162 | out += residual
163 | out = self.relu(out)
164 |
165 | return out
166 |
167 |
168 | class Bottleneck(nn.Module):
169 | expansion = 4
170 |
171 | def __init__(self, inplanes, planes, stride=1, downsample=None):
172 | super(Bottleneck, self).__init__()
173 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
174 | self.bn1 = nn.BatchNorm2d(planes)
175 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
176 | padding=1, bias=False)
177 | self.bn2 = nn.BatchNorm2d(planes)
178 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
179 | self.bn3 = nn.BatchNorm2d(planes * 4)
180 | self.relu = nn.ReLU(inplace=True)
181 | self.downsample = downsample
182 | self.stride = stride
183 |
184 | def forward(self, x):
185 | residual = x
186 |
187 | out = self.conv1(x)
188 | out = self.bn1(out)
189 | out = self.relu(out)
190 |
191 | out = self.conv2(out)
192 | out = self.bn2(out)
193 | out = self.relu(out)
194 |
195 | out = self.conv3(out)
196 | out = self.bn3(out)
197 |
198 | if self.downsample is not None:
199 | residual = self.downsample(x)
200 |
201 | out += residual
202 | out = self.relu(out)
203 |
204 | return out
205 |
206 |
207 | class QBottleneck(nn.Module):
208 | expansion = 4
209 |
210 | def __init__(self, inplanes, planes, stride=1, downsample=None):
211 | super(QBottleneck, self).__init__()
212 | self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=False)
213 | self.bn1 = nn.BatchNorm2d(planes)
214 | self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride,
215 | padding=1, bias=False)
216 | self.bn2 = nn.BatchNorm2d(planes)
217 | self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=False)
218 | self.bn3 = nn.BatchNorm2d(planes * 4)
219 | self.relu = nn.ReLU(inplace=True)
220 | self.downsample = downsample
221 | self.stride = stride
222 |
223 | def forward(self, x):
224 | residual = x
225 |
226 | out = self.conv1(x)
227 | out = self.bn1(out)
228 | out = self.relu(out)
229 |
230 | out = self.conv2(out)
231 | out = self.bn2(out)
232 | out = self.relu(out)
233 |
234 | out = self.conv3(out)
235 | out = self.bn3(out)
236 |
237 | if self.downsample is not None:
238 | residual = self.downsample(x)
239 |
240 | out += residual
241 | out = self.relu(out)
242 |
243 | return out
244 |
245 |
246 | class ResNet(nn.Module):
247 |
248 | def __init__(self, qblock, block, layers, num_classes=1000):
249 | self.inplanes = 64
250 | super(ResNet, self).__init__()
251 | self.qconv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
252 | bias=False)
253 | self.qbn1 = nn.BatchNorm2d(64)
254 | self.qrelu = nn.ReLU(inplace=True)
255 | self.qmaxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
256 | self.qlayer1 = self._qmake_layer(qblock, 64, layers[0])
257 | self.qlayer2 = self._qmake_layer(qblock, 128, layers[1], stride=2)
258 | self.qlayer3 = self._qmake_layer(qblock, 256, layers[2], stride=2)
259 | self.qlayer4 = self._qmake_layer(qblock, 512, layers[3], stride=2)
260 | self.qavgpool = nn.AvgPool2d(7, stride=1)
261 | self.qfc = QWALinear(512 * qblock.expansion, num_classes) # 修改
262 | self.scalar = Scalar() # 修改
263 |
264 | # 全精度的 module
265 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
266 | bias=False)
267 | self.bn1 = nn.BatchNorm2d(64)
268 | self.relu = nn.ReLU(inplace=True)
269 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
270 | self.layer1 = self._make_layer(block, 64, layers[0])
271 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
272 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
273 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
274 | self.avgpool = nn.AvgPool2d(7, stride=1)
275 | self.fc = nn.Linear(512 * block.expansion, num_classes)
276 |
277 | for m in self.modules():
278 | if isinstance(m, nn.Conv2d):
279 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
280 | m.weight.data.normal_(0, math.sqrt(2. / n))
281 | elif isinstance(m, nn.BatchNorm2d):
282 | m.weight.data.fill_(1)
283 | m.bias.data.zero_()
284 |
285 | def _qmake_layer(self, block, planes, blocks, stride=1):
286 | downsample = None
287 | if stride != 1 or self.inplanes != planes * block.expansion:
288 | downsample = nn.Sequential(
289 | QWAConv2D(self.inplanes, planes * block.expansion,
290 | kernel_size=1, stride=stride, bias=False),
291 | nn.BatchNorm2d(planes * block.expansion),
292 | )
293 |
294 | layers = []
295 | layers.append(block(self.inplanes, planes, stride, downsample))
296 | self.inplanes = planes * block.expansion
297 | for i in range(1, blocks):
298 | layers.append(block(self.inplanes, planes))
299 |
300 | return nn.Sequential(*layers)
301 |
302 | def _make_layer(self, block, planes, blocks, stride=1):
303 | downsample = None
304 | if stride != 1 or self.inplanes != planes * block.expansion:
305 | downsample = nn.Sequential(
306 | nn.Conv2d(self.inplanes, planes * block.expansion,
307 | kernel_size=1, stride=stride, bias=False),
308 | nn.BatchNorm2d(planes * block.expansion),
309 | )
310 |
311 | layers = []
312 | layers.append(block(self.inplanes, planes, stride, downsample))
313 | self.inplanes = planes * block.expansion
314 | for i in range(1, blocks):
315 | layers.append(block(self.inplanes, planes))
316 |
317 | return nn.Sequential(*layers)
318 |
319 | @staticmethod
320 | def num_features(self, fm_shape):
321 | num = 1
322 | for dim in fm_shape:
323 | num *= dim
324 | return num
325 |
326 | def forward(self, x):
327 | qx = x
328 | qx = self.qconv1(qx)
329 | qx = self.qbn1(qx)
330 | qx = self.qrelu(qx)
331 | qx = self.qmaxpool(qx)
332 |
333 | qx = self.qlayer1(qx)
334 | qx = self.qlayer2(qx)
335 | ql3 = self.qlayer3(qx)
336 | ql4 = self.qlayer4(ql3)
337 |
338 | qx = self.qavgpool(ql4)
339 | qx = x.view(qx.size(0), -1)
340 | qx = self.qfc(qx)
341 | qx = self.scalar(qx) # 修改
342 |
343 | # 全进度模型
344 | x = self.conv1(x)
345 | x = self.bn1(x)
346 | x = self.relu(x)
347 | x = self.maxpool(x)
348 |
349 | x = self.layer1(x)
350 | x = self.layer2(x)
351 | l3 = self.layer3(x)
352 | l4 = self.layer4(l3)
353 |
354 | x = self.avgpool(l4)
355 | x = x.view(x.size(0), -1)
356 | x = self.fc(x)
357 |
358 | pair_distance = nn.PairwiseDistance(p=1)
359 | distance = pair_distance(quantize_activations_gemm(ql3), quantize_activations_gemm(l3)) / self.num_features(l3.size()) \
360 | + pair_distance(quantize_activations_gemm(ql4), quantize_activations_gemm(l4) / self.num_features(l4.size()))
361 |
362 | return qx, x, distance
363 |
364 |
365 | def resnet18(pretrained=False, **kwargs):
366 | """Constructs a ResNet-18 model.
367 |
368 | Args:
369 | pretrained (bool): If True, returns a model pre-trained on ImageNet
370 | """
371 | model = ResNet(QBasicBlock, BasicBlock, [2, 2, 2, 2], **kwargs)
372 | if pretrained:
373 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
374 | return model
375 |
376 |
377 | def resnet34(pretrained=False, **kwargs):
378 | """Constructs a ResNet-34 model.
379 |
380 | Args:
381 | pretrained (bool): If True, returns a model pre-trained on ImageNet
382 | """
383 | model = ResNet(QBasicBlock, [3, 4, 6, 3], **kwargs)
384 | if pretrained:
385 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
386 | return model
387 |
388 |
389 | def resnet50(pretrained=False, **kwargs):
390 | """Constructs a ResNet-50 model.
391 |
392 | Args:
393 | pretrained (bool): If True, returns a model pre-trained on ImageNet
394 | """
395 | model = ResNet(QBottleneck, [3, 4, 6, 3], **kwargs)
396 | if pretrained:
397 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
398 | return model
399 |
400 |
401 | def resnet101(pretrained=False, **kwargs):
402 | """Constructs a ResNet-101 model.
403 |
404 | Args:
405 | pretrained (bool): If True, returns a model pre-trained on ImageNet
406 | """
407 | model = ResNet(QBottleneck, [3, 4, 23, 3], **kwargs)
408 | if pretrained:
409 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
410 | return model
411 |
412 |
413 | def resnet152(pretrained=False, **kwargs):
414 | """Constructs a ResNet-152 model.
415 |
416 | Args:
417 | pretrained (bool): If True, returns a model pre-trained on ImageNet
418 | """
419 | model = ResNet(QBottleneck, [3, 8, 36, 3], **kwargs)
420 | if pretrained:
421 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
422 | return model
423 |
--------------------------------------------------------------------------------
/net/net_quantize_weight.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """
3 | 线性层全部使用 QWLinear
4 | """
5 | import torch.nn as nn
6 | import math
7 | import torch.utils.model_zoo as model_zoo
8 | from quantize.quantize_module_ import QWConv2D, Scalar, QWLinear
9 |
10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
11 | 'resnet152']
12 |
13 |
14 | model_urls = {
15 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
16 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
17 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
18 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
19 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
20 | }
21 |
22 |
23 | def conv3x3(in_planes, out_planes, stride=1):
24 | """3x3 convolution with padding"""
25 | return QWConv2D(in_planes, out_planes, kernel_size=3, stride=stride,
26 | padding=1, bias=False)
27 |
28 |
29 | class BasicBlock(nn.Module):
30 | expansion = 1
31 |
32 | def __init__(self, inplanes, planes, stride=1, downsample=None):
33 | super(BasicBlock, self).__init__()
34 | self.conv1 = conv3x3(inplanes, planes, stride)
35 | self.bn1 = nn.BatchNorm2d(planes)
36 | self.relu = nn.ReLU(inplace=True)
37 | self.conv2 = conv3x3(planes, planes)
38 | self.bn2 = nn.BatchNorm2d(planes)
39 | self.downsample = downsample
40 | self.stride = stride
41 |
42 | def forward(self, x):
43 | residual = x
44 |
45 | out = self.conv1(x)
46 | out = self.bn1(out)
47 | out = self.relu(out)
48 |
49 | out = self.conv2(out)
50 | out = self.bn2(out)
51 |
52 | if self.downsample is not None:
53 | residual = self.downsample(x)
54 |
55 | out += residual
56 | out = self.relu(out)
57 |
58 | return out
59 |
60 |
61 | class Bottleneck(nn.Module):
62 | expansion = 4
63 |
64 | def __init__(self, inplanes, planes, stride=1, downsample=None):
65 | super(Bottleneck, self).__init__()
66 | self.conv1 = QWConv2D(inplanes, planes, kernel_size=1, bias=False)
67 | self.bn1 = nn.BatchNorm2d(planes)
68 | self.conv2 = QWConv2D(planes, planes, kernel_size=3, stride=stride,
69 | padding=1, bias=False)
70 | self.bn2 = nn.BatchNorm2d(planes)
71 | self.conv3 = QWConv2D(planes, planes * 4, kernel_size=1, bias=False)
72 | self.bn3 = nn.BatchNorm2d(planes * 4)
73 | self.relu = nn.ReLU(inplace=True)
74 | self.downsample = downsample
75 | self.stride = stride
76 |
77 | def forward(self, x):
78 | residual = x
79 |
80 | out = self.conv1(x)
81 | out = self.bn1(out)
82 | out = self.relu(out)
83 |
84 | out = self.conv2(out)
85 | out = self.bn2(out)
86 | out = self.relu(out)
87 |
88 | out = self.conv3(out)
89 | out = self.bn3(out)
90 |
91 | if self.downsample is not None:
92 | residual = self.downsample(x)
93 |
94 | out += residual
95 | out = self.relu(out)
96 |
97 | return out
98 |
99 |
100 | class ResNet(nn.Module):
101 |
102 | def __init__(self, block, layers, num_classes=1000):
103 | self.inplanes = 64
104 | super(ResNet, self).__init__()
105 | self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3,
106 | bias=False)
107 | self.bn1 = nn.BatchNorm2d(64)
108 | self.relu = nn.ReLU(inplace=True)
109 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
110 | self.layer1 = self._make_layer(block, 64, layers[0])
111 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
112 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
113 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
114 | self.avgpool = nn.AvgPool2d(7, stride=1)
115 | self.fc = QWLinear(512 * block.expansion, num_classes)
116 | self.scalar = Scalar()
117 |
118 | for m in self.modules():
119 | if isinstance(m, QWConv2D):
120 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
121 | m.weight.data.normal_(0, math.sqrt(2. / n))
122 | elif isinstance(m, nn.BatchNorm2d):
123 | m.weight.data.fill_(1)
124 | m.bias.data.zero_()
125 |
126 | def _make_layer(self, block, planes, blocks, stride=1):
127 | downsample = None
128 | if stride != 1 or self.inplanes != planes * block.expansion:
129 | downsample = nn.Sequential(
130 | QWConv2D(self.inplanes, planes * block.expansion,
131 | kernel_size=1, stride=stride, bias=False),
132 | nn.BatchNorm2d(planes * block.expansion),
133 | )
134 |
135 | layers = []
136 | layers.append(block(self.inplanes, planes, stride, downsample))
137 | self.inplanes = planes * block.expansion
138 | for i in range(1, blocks):
139 | layers.append(block(self.inplanes, planes))
140 |
141 | return nn.Sequential(*layers)
142 |
143 | def forward(self, x):
144 | x = self.conv1(x)
145 | x = self.bn1(x)
146 | x = self.relu(x)
147 | x = self.maxpool(x)
148 |
149 | x = self.layer1(x)
150 | x = self.layer2(x)
151 | x = self.layer3(x)
152 | x = self.layer4(x)
153 |
154 | x = self.avgpool(x)
155 | x = x.view(x.size(0), -1)
156 | x = self.fc(x)
157 | x = self.scalar(x)
158 |
159 | return x
160 |
161 |
162 | def resnet18(pretrained=False, **kwargs):
163 | """Constructs a ResNet-18 model.
164 |
165 | Args:
166 | pretrained (bool): If True, returns a model pre-trained on ImageNet
167 | """
168 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
169 | if pretrained:
170 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
171 | return model
172 |
173 |
174 | def resnet34(pretrained=False, **kwargs):
175 | """Constructs a ResNet-34 model.
176 |
177 | Args:
178 | pretrained (bool): If True, returns a model pre-trained on ImageNet
179 | """
180 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
181 | if pretrained:
182 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
183 | return model
184 |
185 |
186 | def resnet50(pretrained=False, **kwargs):
187 | """Constructs a ResNet-50 model.
188 |
189 | Args:
190 | pretrained (bool): If True, returns a model pre-trained on ImageNet
191 | """
192 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
193 | if pretrained:
194 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
195 | return model
196 |
197 |
198 | def resnet101(pretrained=False, **kwargs):
199 | """Constructs a ResNet-101 model.
200 |
201 | Args:
202 | pretrained (bool): If True, returns a model pre-trained on ImageNet
203 | """
204 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
205 | if pretrained:
206 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
207 | return model
208 |
209 |
210 | def resnet152(pretrained=False, **kwargs):
211 | """Constructs a ResNet-152 model.
212 |
213 | Args:
214 | pretrained (bool): If True, returns a model pre-trained on ImageNet
215 | """
216 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
217 | if pretrained:
218 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
219 | return model
220 |
--------------------------------------------------------------------------------
/net/simple_net.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torch.nn as nn
4 |
5 |
6 | class Net(nn.Module):
7 |
8 | def __init__(self):
9 | super(Net, self).__init__()
10 | # 1 input image channel, 6 output channels, 5x5 square convolution
11 | # kernel
12 | self.conv1 = nn.Conv2d(1, 1, kernel_size=3, padding=0, stride=1, bias=False)
13 | self.conv2 = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=1, bias=False)
14 | # an affine operation: y = Wx + b
15 | self.fc1 = nn.Linear(16, 2)
16 | self.relu = nn.ReLU(inplace=True)
17 |
18 | def forward(self, x):
19 | x = self.conv1(x)
20 | x = self.relu(x)
21 | x = self.conv2(x)
22 | x = self.relu(x)
23 | size = x.size()[1:] # all dimensions except the batch dimension
24 | num_features = 1
25 | for s in size:
26 | num_features *= s
27 | x = x.view(-1, num_features)
28 | x = self.fc1(x)
29 | return x
30 |
31 |
32 | if __name__ == "__main__":
33 | net = Net()
34 | print(net)
35 |
--------------------------------------------------------------------------------
/quantize/guided_distance_view.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import os
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.parallel
6 | import torch.backends.cudnn as cudnn
7 | import torch.optim
8 | import torch.utils.data
9 | import torch.utils.data.distributed
10 | import torchvision.models as models
11 | from tensorboardX import SummaryWriter
12 | from collections import defaultdict
13 | import time
14 |
15 | from utils.train_val import save_checkpoint, validate
16 | from utils.data_loader import load_train_data, load_val_data
17 | from utils.meter import AverageMeter, accuracy
18 | from quantize.quantize_method import quantize_activations_gemm
19 | from net import net_quantize_activation
20 |
21 |
22 | def guided(args):
23 | best_low_prec1 = 0
24 | full_prec_feature_map1 = defaultdict(torch.Tensor)
25 | full_prec_feature_map2 = defaultdict(torch.Tensor)
26 | low_prec_feature_map1 = defaultdict(torch.Tensor)
27 | low_prec_feature_map2 = defaultdict(torch.Tensor)
28 |
29 | def full_prec_hook(module, input, output):
30 | # 一定要写成 input[0].data.clone()
31 | # 而不能写成 input[0].clone(), 否则报错
32 | # RuntimeError: Trying to backward through the graph a second time,
33 | # but the buffers have already been freed. Specify retain_graph=True
34 | # when calling backward the first time
35 | cudaid = int(repr(output.device)[-2])
36 | full_prec_feature_map1[cudaid] = input[0].data.clone()
37 | full_prec_feature_map2[cudaid] = output.data.clone()
38 |
39 | def low_prec_hook(module, input, output):
40 | cudaid = int(repr(output.device)[-2])
41 | low_prec_feature_map1[cudaid] = input[0].data.clone()
42 | low_prec_feature_map2[cudaid] = output.data.clone()
43 |
44 | def gpu_config(model):
45 | if args.gpu is not None: # 指定GPU
46 | model = model.cuda(args.gpu)
47 | elif args.distributed: # 集群训练(多机器)
48 | model.cuda()
49 | model = torch.nn.parallel.DistributedDataParallel(model)
50 |
51 | else: # 单机训练(单卡或者多卡)
52 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
53 | model.features = torch.nn.DataParallel(model.features)
54 | model.cuda()
55 | else:
56 | # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids
57 | model = torch.nn.DataParallel(model, args.device_ids).cuda()
58 | return model
59 |
60 | def guided_train(summary_writer, log_per_epoch=100, print_freq=20):
61 |
62 | batch_time = AverageMeter()
63 | data_time = AverageMeter()
64 |
65 | low_prec_losses = AverageMeter()
66 | low_prec_top1 = AverageMeter()
67 | low_prec_top5 = AverageMeter()
68 | distance_meter = AverageMeter()
69 |
70 | # 状态转化为训练
71 | low_prec_model.train()
72 | full_prec_model.eval()
73 |
74 | end = time.time()
75 |
76 | # 用于控制 tensorboard 的显示频率
77 | interval = len(train_loader) // log_per_epoch
78 | summary_point = [interval * split for split in torch.arange(log_per_epoch)]
79 |
80 | for i, (input, target) in enumerate(train_loader):
81 | # measure checkpoint.pth data loading time
82 | data_time.update(time.time() - end)
83 |
84 | if args.gpu is not None:
85 | input = input.cuda(args.gpu, non_blocking=True)
86 |
87 | # target 必须要转为 cuda 类型
88 | # If ``True`` and the source is in pinned memory(固定内存),
89 | # the copy will be asynchronous(异步) with respect to the host
90 | target = target.cuda(args.gpu, non_blocking=True)
91 |
92 | full_prec_feature_map1.clear()
93 | low_prec_feature_map1.clear()
94 | full_prec_feature_map2.clear()
95 | low_prec_feature_map2.clear()
96 |
97 | # compute low_pre_output
98 | low_pre_output = low_prec_model(input)
99 | full_pre_output = full_prec_model(input)
100 |
101 | """Guided Key Point start"""
102 |
103 | # 将 distance 和 feature map放在同一个一gpu上
104 | distance = torch.tensor([0.0]).cuda(args.gpu, non_blocking=True)
105 | num_layer3_features = 1
106 | for dim in full_prec_feature_map1[0].size():
107 | num_layer3_features *= dim
108 |
109 | num_layer4_features = 1
110 | for dim in full_prec_feature_map2[0].size():
111 | num_layer4_features *= dim
112 |
113 | for cudaid in full_prec_feature_map1:
114 | # 手动将feature map都搬到同一个 GPU 上
115 | full_prec_feature_map1[cudaid] = full_prec_feature_map1[cudaid].cuda(args.gpu, non_blocking=True)
116 | low_prec_feature_map1[cudaid] = low_prec_feature_map1[cudaid].cuda(args.gpu, non_blocking=True)
117 | full_prec_feature_map2[cudaid] = full_prec_feature_map2[cudaid].cuda(args.gpu, non_blocking=True)
118 | low_prec_feature_map2[cudaid] = low_prec_feature_map2[cudaid].cuda(args.gpu, non_blocking=True)
119 |
120 | for cudaid in low_prec_feature_map1:
121 | """
122 | RuntimeError: arguments are located on different GPUs
123 | 解决方法在于手动将feature map都搬到同一个 GPU 上
124 | """
125 | layer3 = (quantize_activations_gemm(low_prec_feature_map1[cudaid]) -
126 | quantize_activations_gemm(full_prec_feature_map1[cudaid])).norm(p=args.norm) / num_layer3_features
127 | layer4 = (quantize_activations_gemm(low_prec_feature_map2[cudaid]) -
128 | quantize_activations_gemm(full_prec_feature_map2[cudaid])).norm(p=args.norm) / num_layer4_features
129 | distance += (layer3 + layer4) / len(low_prec_feature_map1)
130 |
131 | distance *= args.balance
132 |
133 | """Guided Key Point end"""
134 |
135 | low_prec_loss = criterion(low_pre_output, target)
136 | low_prec_prec1, low_prec_prec5 = accuracy(low_pre_output, target, topk=(1, 5))
137 |
138 | low_prec_losses.update(low_prec_loss.item(), input.size(0))
139 | low_prec_top1.update(low_prec_prec1[0], input.size(0))
140 | low_prec_top5.update(low_prec_prec5[0], input.size(0))
141 | distance_meter.update(distance[0], 1)
142 |
143 | # compute gradient and do SGD step
144 | low_prec_optimizer.zero_grad()
145 | low_prec_loss.backward()
146 | low_prec_optimizer.step()
147 |
148 | # measure elapsed time
149 | batch_time.update(time.time() - end)
150 | end = time.time()
151 |
152 | if i % print_freq == 0:
153 |
154 | print('Epoch: [{0}][{1}/{2}]\t'
155 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
156 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
157 | 'Loss {low_prec_loss.val:.4f} ({low_prec_loss.avg:.4f})\t'
158 | 'Prec@1 {low_prec_top1.val:.3f} ({low_prec_top1.avg:.3f})\t'
159 | 'Prec@5 {low_prec_top5.val:.3f} ({low_prec_top5.avg:.3f}) \t'
160 | 'distance {distance.val:.3f} ({distance.avg:.3f})'.format(
161 | epoch, i, len(train_loader), batch_time=batch_time,
162 | data_time=data_time, low_prec_loss=low_prec_losses, low_prec_top1=low_prec_top1,
163 | low_prec_top5=low_prec_top5, distance=distance_meter))
164 |
165 | if summary_writer is not None and (i in summary_point):
166 | step = i / interval + (epoch - 1) * log_per_epoch
167 | summary_writer.add_scalar("distance", distance_meter.avg, step)
168 | summary_writer.add_scalar("loss/low_prec_loss", low_prec_loss, step)
169 | summary_writer.add_scalar("train_low_prec/top-1", low_prec_top1.avg, step)
170 | summary_writer.add_scalar("train_low_prec/top-5", low_prec_top5.avg, step)
171 |
172 | # 代码用于使用预训练的ResNet18来同时量化网络权重和激活
173 | print("=> using imageNet pre-trained model '{}'".format(args.arch))
174 | # 获取预训练模型参数
175 | full_prec_model = models.__dict__[args.arch](pretrained=True)
176 | low_prec_model = net_quantize_activation.__dict__[args.arch]()
177 |
178 | model_dict = low_prec_model.state_dict()
179 | imagenet_dict = full_prec_model.state_dict()
180 | model_dict.update(imagenet_dict)
181 | low_prec_model.load_state_dict(model_dict)
182 |
183 | low_prec_layer4 = low_prec_model._modules.get("layer4")
184 | full_prec_layer4 = full_prec_model._modules.get("layer4")
185 |
186 | hook_low_prec = low_prec_layer4.register_forward_hook(low_prec_hook)
187 | hook_full_prec = full_prec_layer4.register_forward_hook(full_prec_hook)
188 |
189 | low_prec_model = gpu_config(low_prec_model)
190 | full_prec_model = gpu_config(full_prec_model)
191 |
192 | # 定义损失函数和优化器
193 | criterion = nn.CrossEntropyLoss().cuda(args.gpu)
194 | low_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(),
195 | args.lr,
196 | momentum=args.momentum,
197 | weight_decay=args.weight_decay)
198 |
199 | low_prec_scheduler = torch.optim.lr_scheduler.StepLR(low_prec_optimizer, step_size=args.lr_step, gamma=0.1)
200 |
201 | cudnn.benchmark = True
202 |
203 | val_loader = load_val_data(args.data, args.batch_size, args.workers)
204 | train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed)
205 |
206 | # 加载日志 writer
207 | writer = SummaryWriter(args.save_dir)
208 |
209 | for epoch in range(args.start_epoch, args.epochs+1):
210 | if args.distributed:
211 | train_sampler.set_epoch(epoch)
212 |
213 | low_prec_scheduler.step()
214 |
215 | # train for one epoch
216 | guided_train(writer)
217 |
218 | # evaluate on validation set
219 | low_prec1 = validate(low_prec_model, val_loader, criterion, args.gpu,
220 | epoch, writer, name_prefix='low_prec')
221 |
222 | # remember best prec@1 and save low_prec_checkpoint
223 | is_best_low = low_prec1 > best_low_prec1
224 |
225 | best_low_prec1 = max(low_prec1, best_low_prec1)
226 |
227 | save_checkpoint({
228 | 'epoch': epoch + 1,
229 | 'arch': args.arch,
230 | 'state_dict': low_prec_model.state_dict(),
231 | 'best_prec1': best_low_prec1,
232 | 'optimizer': low_prec_optimizer.state_dict(),
233 | }, is_best_low, args.save_dir, name_prefix="low_prec")
234 |
235 | # 关闭日志 writer
236 | writer.close()
237 |
238 | # 去掉钩子
239 |
240 | hook_full_prec.remove()
241 | hook_low_prec.remove()
242 |
243 |
--------------------------------------------------------------------------------
/quantize/quantize_guided.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import os
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.parallel
6 | import torch.backends.cudnn as cudnn
7 | import torch.optim
8 | import torch.utils.data
9 | import torch.utils.data.distributed
10 | import torchvision.models as models
11 | from tensorboardX import SummaryWriter
12 | from collections import defaultdict
13 | import time
14 | import warnings
15 |
16 | from utils.train_val import save_checkpoint, validate
17 | from utils.data_loader import load_train_data, load_val_data
18 | from utils.meter import AverageMeter, accuracy
19 | from quantize.quantize_method import quantize_activations_gemm
20 | from net import net_quantize_activation
21 |
22 |
23 | def guided(args):
24 | best_low_prec1 = 0
25 | best_full_prec1 = 0
26 | full_prec_feature_map1 = defaultdict(torch.Tensor)
27 | full_prec_feature_map2 = defaultdict(torch.Tensor)
28 | low_prec_feature_map1 = defaultdict(torch.Tensor)
29 | low_prec_feature_map2 = defaultdict(torch.Tensor)
30 |
31 | def full_prec_hook(module, input, output):
32 | # 一定要写成 input[0]
33 | # 而不能写成 input[0].data.clone(), 否则没法加入反向传播
34 |
35 | # 而使用直接使用 input[0] 也会有问题, 如下
36 | # RuntimeError: Trying to backward through the graph a second time,
37 | # but the buffers have already been freed. Specify retain_graph=True
38 | # when calling backward the first time
39 | # 即 distance_loss 同时参与高精度和低精度的反向传播, 比如先通过低精度的反向传播之后
40 | # 该 distance_loss 的计算图被释放, 然后第二次使用的时候, 找不到对应的计算图和相应的参数
41 | cudaid = int(repr(output.device)[-2])
42 | full_prec_feature_map1[cudaid] = input[0]
43 | full_prec_feature_map2[cudaid] = output
44 |
45 | def low_prec_hook(module, input, output):
46 | cudaid = int(repr(output.device)[-2])
47 | low_prec_feature_map1[cudaid] = input[0]
48 | low_prec_feature_map2[cudaid] = output
49 |
50 | def gpu_config(model):
51 | if args.gpu is not None: # 指定GPU
52 | model = model.cuda(args.gpu)
53 | elif args.distributed: # 集群训练(多机器)
54 | model.cuda()
55 | model = torch.nn.parallel.DistributedDataParallel(model)
56 |
57 | else: # 单机训练(单卡或者多卡)
58 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
59 | model.features = torch.nn.DataParallel(model.features)
60 | model.cuda()
61 | else:
62 | # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids
63 | model = torch.nn.DataParallel(model, args.device_ids).cuda()
64 | return model
65 |
66 | def guided_train(summary_writer, log_per_epoch=100, print_freq=20):
67 |
68 | batch_time = AverageMeter()
69 | data_time = AverageMeter()
70 |
71 | low_prec_losses = AverageMeter()
72 | low_prec_top1 = AverageMeter()
73 | low_prec_top5 = AverageMeter()
74 |
75 | full_prec_losses = AverageMeter()
76 | full_prec_top1 = AverageMeter()
77 | full_prec_top5 = AverageMeter()
78 | distance_meter = AverageMeter()
79 |
80 | # 状态转化为训练
81 | low_prec_model.train()
82 | full_prec_model.train()
83 |
84 | end = time.time()
85 |
86 | # 用于控制 tensorboard 的显示频率
87 | interval = len(train_loader) // log_per_epoch
88 | summary_point = [interval * split for split in torch.arange(log_per_epoch)]
89 |
90 | for i, (input, target) in enumerate(train_loader):
91 | # measure checkpoint.pth data loading time
92 | data_time.update(time.time() - end)
93 |
94 | if args.gpu is not None:
95 | input = input.cuda(args.gpu, non_blocking=True)
96 |
97 | # target 必须要转为 cuda 类型
98 | # If ``True`` and the source is in pinned memory(固定内存),
99 | # the copy will be asynchronous(异步) with respect to the host
100 | target = target.cuda(args.gpu, non_blocking=True)
101 |
102 | full_prec_feature_map1.clear()
103 | low_prec_feature_map1.clear()
104 | full_prec_feature_map2.clear()
105 | low_prec_feature_map2.clear()
106 |
107 | # compute low_pre_output
108 | low_pre_output = low_prec_model(input)
109 | full_pre_output = full_prec_model(input)
110 |
111 | """Guided Key Point start"""
112 |
113 | # 将 distance 和 feature map放在同一个一gpu上
114 | distance = torch.tensor([0.0]).cuda(args.gpu, non_blocking=True)
115 | num_layer3_features = 1
116 | for dim in full_prec_feature_map1[0].size():
117 | num_layer3_features *= dim
118 |
119 | num_layer4_features = 1
120 | for dim in full_prec_feature_map2[0].size():
121 | num_layer4_features *= dim
122 |
123 | for cudaid in low_prec_feature_map1:
124 |
125 | layer3 = (quantize_activations_gemm(low_prec_feature_map1[cudaid]) -
126 | quantize_activations_gemm(full_prec_feature_map1[cudaid])).norm(p=args.norm) / num_layer3_features
127 | layer4 = (quantize_activations_gemm(low_prec_feature_map2[cudaid]) -
128 | quantize_activations_gemm(full_prec_feature_map2[cudaid])).norm(p=args.norm) / num_layer4_features
129 | # RuntimeError: arguments are located on different GPUs
130 | # 解决方法在于手动将 feature map 都搬到同一个GPU, Tensor.cuda(args.gpu, non_blocking=True)
131 | distance += (layer3 + layer4).cuda(args.gpu, non_blocking=True) / len(low_prec_feature_map1)
132 |
133 | distance *= args.balance
134 | low_prec_loss = criterion(low_pre_output, target) + distance
135 | full_prec_loss = criterion(full_pre_output, target) + distance
136 |
137 | low_prec_prec1, low_prec_prec5 = accuracy(low_pre_output, target, topk=(1, 5))
138 | full_prec_prec1, full_prec_prec5 = accuracy(full_pre_output, target, topk=(1, 5))
139 |
140 | low_prec_losses.update(low_prec_loss.item(), input.size(0))
141 | low_prec_top1.update(low_prec_prec1[0], input.size(0))
142 | low_prec_top5.update(low_prec_prec5[0], input.size(0))
143 |
144 | full_prec_losses.update(full_prec_loss.item(), input.size(0))
145 | full_prec_top1.update(full_prec_prec1[0], input.size(0))
146 | full_prec_top5.update(full_prec_prec5[0], input.size(0))
147 | distance_meter.update(distance[0], 1)
148 |
149 | # compute gradient and do SGD step
150 | low_prec_optimizer.zero_grad()
151 | full_prec_optimizer.zero_grad()
152 |
153 | low_prec_loss.backward() # retain_graph=True
154 | # full_prec_loss.backward()
155 |
156 | # 第五步, 使用更新的梯度更新权重
157 | low_prec_optimizer.step()
158 | full_prec_optimizer.step()
159 |
160 | # measure elapsed time
161 | batch_time.update(time.time() - end)
162 | end = time.time()
163 |
164 | if i % print_freq == 0:
165 |
166 | print('Epoch: [{0}][{1}/{2}]\t'
167 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
168 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
169 | 'Loss {low_prec_loss.val:.4f} ({low_prec_loss.avg:.4f})\t'
170 | 'Prec@1 {low_prec_top1.val:.3f} ({low_prec_top1.avg:.3f})\t'
171 | 'Prec@5 {low_prec_top5.val:.3f} ({low_prec_top5.avg:.3f}) \t'
172 | 'distance {distance.val:.3f} ({distance.avg:.3f})'.format(
173 | epoch, i, len(train_loader), batch_time=batch_time,
174 | data_time=data_time, low_prec_loss=low_prec_losses, low_prec_top1=low_prec_top1,
175 | low_prec_top5=low_prec_top5, distance=distance_meter))
176 |
177 | print('Epoch: [{0}][{1}/{2}]\t'
178 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
179 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
180 | 'Loss {full_prec_loss.val:.4f} ({full_prec_loss.avg:.4f})\t'
181 | 'Prec@1 {full_prec_top1.val:.3f} ({full_prec_top1.avg:.3f})\t'
182 | 'Prec@5 {full_prec_top5.val:.3f} ({full_prec_top5.avg:.3f})'.format(
183 | epoch, i, len(train_loader), batch_time=batch_time,
184 | data_time=data_time, full_prec_loss=full_prec_losses, full_prec_top1=full_prec_top1,
185 | full_prec_top5=full_prec_top5))
186 |
187 | if summary_writer is not None and (i in summary_point):
188 | step = i / interval + (epoch - 1) * log_per_epoch
189 | summary_writer.add_scalar("distance", distance_meter.avg, step)
190 | summary_writer.add_scalar("loss/low_prec_loss", low_prec_loss, step)
191 | summary_writer.add_scalar("train_low_prec/top-1", low_prec_top1.avg, step)
192 | summary_writer.add_scalar("train_low_prec/top-5", low_prec_top5.avg, step)
193 |
194 | summary_writer.add_scalar("loss/full_prec_loss", full_prec_loss, step)
195 | summary_writer.add_scalar("train_full_prec/top-1", full_prec_top1.avg, step)
196 | summary_writer.add_scalar("train_full_prec/top-5", full_prec_top5.avg, step)
197 |
198 | if args.weight_quantized:
199 | print("=> using quantize_tanh-weight model '{}'".format(args.arch))
200 | full_prec_model = models.__dict__[args.arch](pretrained=True)
201 | low_prec_model = net_quantize_activation.__dict__[args.arch]()
202 | if os.path.isfile(args.weight_quantized):
203 | print("=> loading weight_quantized model '{}'".format(args.weight_quantized))
204 | model_dict = low_prec_model.state_dict()
205 | quantized_model = torch.load(args.weight_quantized)
206 | pretrained__dict = {k[7:]: v for k, v in quantized_model['state_dict'].items()
207 | if k in low_prec_model.state_dict()}
208 | model_dict.update(pretrained__dict)
209 | low_prec_model.load_state_dict(model_dict)
210 | print("=> loaded weight_quantized '{}'".format(args.weight_quantized))
211 | else:
212 | print("=> no quantize_tanh-weight model found at '{}'".format(args.weight_quantized))
213 | else:
214 | # 代码用于使用预训练的ResNet18来同时量化网络权重和激活
215 | print("=> using imageNet pre-trained model '{}'".format(args.arch))
216 | # 获取预训练模型参数
217 | full_prec_model = models.__dict__[args.arch](pretrained=True)
218 | low_prec_model = net_quantize_activation.__dict__[args.arch]()
219 |
220 | model_dict = low_prec_model.state_dict()
221 | imagenet_dict = full_prec_model.state_dict()
222 | model_dict.update(imagenet_dict)
223 | low_prec_model.load_state_dict(model_dict)
224 |
225 | if not args.evaluate:
226 | low_prec_layer4 = low_prec_model._modules.get("layer4")
227 | full_prec_layer4 = full_prec_model._modules.get("layer4")
228 |
229 | hook_low_prec = low_prec_layer4.register_forward_hook(low_prec_hook)
230 | hook_full_prec = full_prec_layer4.register_forward_hook(full_prec_hook)
231 |
232 | low_prec_model = gpu_config(low_prec_model)
233 | full_prec_model = gpu_config(full_prec_model)
234 |
235 | # 定义损失函数和优化器
236 | criterion = nn.CrossEntropyLoss().cuda(args.gpu)
237 | low_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(),
238 | args.lr,
239 | momentum=args.momentum,
240 | weight_decay=args.weight_decay)
241 | full_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(),
242 | args.lr * args.rate,
243 | momentum=args.momentum,
244 | weight_decay=args.weight_decay)
245 |
246 | # 调整学习率
247 | full_prec_scheduler = torch.optim.lr_scheduler.StepLR(full_prec_optimizer, step_size=args.lr_step, gamma=0.1)
248 | low_prec_scheduler = torch.optim.lr_scheduler.StepLR(low_prec_optimizer, step_size=args.lr_step, gamma=0.1)
249 |
250 | # optionally resume from a checkpoint
251 | if args.resume:
252 | full_prec_resume = os.path.join(args.save_dir, "full_prec-checkpoint.pth.tar")
253 | low_prec_resume = os.path.join(args.save_dir, "low_prec-checkpoint.pth.tar")
254 | if os.path.isfile(full_prec_resume) and os.path.isfile(low_prec_resume):
255 | print("=> loading low_prec_checkpoint from '{}' and '{}'".format(full_prec_resume,
256 | low_prec_resume))
257 | full_prec_checkpoint = torch.load(full_prec_resume)
258 | low_prec_checkpoint = torch.load(low_prec_resume)
259 |
260 | args.start_epoch = low_prec_checkpoint['epoch']
261 | # 模型的最好精度
262 | best_low_prec1 = low_prec_checkpoint['best_prec1']
263 | best_full_prec1 = full_prec_checkpoint['best_prec1']
264 |
265 | low_prec_model.load_state_dict(low_prec_checkpoint['state_dict'])
266 | full_prec_model.load_state_dict(full_prec_checkpoint['state_dict'])
267 |
268 | low_prec_optimizer.load_state_dict(low_prec_checkpoint['optimizer'])
269 | full_prec_optimizer.load_state_dict(full_prec_checkpoint['optimizer'])
270 |
271 | print("=> loaded low_prec_checkpoint from '{}' and '{}' (epoch {})".format(
272 | full_prec_resume, low_prec_model, low_prec_checkpoint['epoch']))
273 | else:
274 | warnings.warn("=> no checkpoint found at directory'{}'".format(args.save_dir))
275 |
276 | cudnn.benchmark = True
277 |
278 | val_loader = load_val_data(args.data, args.batch_size, args.workers)
279 | train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed)
280 |
281 | # 加载日志 writer
282 | writer = SummaryWriter(args.save_dir)
283 |
284 | for epoch in range(args.start_epoch, args.epochs):
285 | if args.distributed:
286 | train_sampler.set_epoch(epoch)
287 |
288 | full_prec_scheduler.step()
289 | low_prec_scheduler.step()
290 |
291 | # train for one epoch
292 | guided_train(writer)
293 |
294 | # evaluate on validation set
295 | low_prec1 = validate(low_prec_model, val_loader, criterion, args.gpu,
296 | epoch, writer, name_prefix='low_prec')
297 | full_prec1 = validate(full_prec_model, val_loader, criterion, args.gpu,
298 | epoch, writer, name_prefix='full_prec')
299 |
300 | # remember best prec@1 and save low_prec_checkpoint
301 | is_best_low = low_prec1 > best_low_prec1
302 | is_best_full = full_prec1 > best_full_prec1
303 |
304 | best_low_prec1 = max(low_prec1, best_low_prec1)
305 | best_full_prec1 = max(full_prec1, best_full_prec1)
306 |
307 | save_checkpoint({
308 | 'epoch': epoch + 1,
309 | 'arch': args.arch,
310 | 'state_dict': low_prec_model.state_dict(),
311 | 'best_prec1': best_low_prec1,
312 | 'optimizer': low_prec_optimizer.state_dict(),
313 | }, is_best_low, args.save_dir, name_prefix="low_prec")
314 |
315 | save_checkpoint({
316 | 'epoch': epoch + 1,
317 | 'arch': args.arch,
318 | 'state_dict': full_prec_model.state_dict(),
319 | 'best_prec1': best_full_prec1,
320 | 'optimizer': full_prec_optimizer.state_dict(),
321 | }, is_best_full, args.save_dir, name_prefix="full_prec")
322 |
323 | # 关闭日志 writer
324 | writer.close()
325 |
326 | # 去掉钩子
327 | if not args.evaluate:
328 | hook_full_prec.remove()
329 | hook_low_prec.remove()
330 |
--------------------------------------------------------------------------------
/quantize/quantize_method.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | """
3 | """
4 | import torch
5 | import math
6 | import numpy as np
7 |
8 |
9 | # 量化比特
10 | QUANTIZE_BIT = 8
11 |
12 |
13 | class QuantizeTanh(torch.autograd.Function):
14 |
15 | @staticmethod
16 | def forward(ctx, i):
17 | n = math.pow(2.0, QUANTIZE_BIT) - 1
18 | return torch.round(i * n) / n
19 |
20 | @staticmethod
21 | def backward(ctx, grad_outputs):
22 | return grad_outputs
23 |
24 |
25 | class QuantizeGEMM(torch.autograd.Function):
26 |
27 | @staticmethod
28 | def forward(ctx, i):
29 | n = math.pow(2.0, QUANTIZE_BIT) - 1
30 | v_max = torch.max(i)
31 | v_min = torch.min(i)
32 | scale = (v_max - v_min)/n
33 | scale = max(scale, 1e-8)
34 | zero_point = torch.round(torch.clamp(-v_min/scale, 0, n))
35 | quantize_val = torch.clamp(torch.round(i/scale + zero_point), 0, n)
36 | return (quantize_val-zero_point) * scale
37 |
38 | @staticmethod
39 | def backward(ctx, grad_outputs):
40 | return grad_outputs
41 |
42 |
43 | quantize_tanh = QuantizeTanh.apply
44 | quantize_gemm = QuantizeGEMM.apply
45 |
46 |
47 | def quantize_weights_bias_tanh(weight):
48 | tanh_w = torch.tanh(weight)
49 | """
50 | torch 关于 y = w/max(|w|) 函数在max(|w|)处梯度行为怪异该如何解释?
51 | tensor w ([[ 0.1229, 0.2390],
52 | [ 0.8703, 0.6368]])
53 |
54 | tensor y ([[ 0.2873, 0.2873],
55 | [-0.3296, 0.2873]])
56 | 由于没有搞清楚 torch 在 max(|w|) 处如何处理的,
57 | 不过, 从上面看出梯度为负数, y = w/max(|w|) w>0时, 梯度为负数, 我认为是不正确的.
58 | 为了便于处理, 这里求梯度过程中, 我们把 max(|w|) 当成一个常量来处理,
59 | 代码中通过 Tensor.data 这样求 max(|w|) 的过程就不会加入到计算图中,
60 | 可以看出, max_abs_w 就是一个一个常量
61 | """
62 | max_abs_w = torch.max(torch.abs(tanh_w)).data
63 | norm_weight = ((tanh_w / max_abs_w) + 1) / 2
64 |
65 | return 2 * quantize_tanh(norm_weight) - 1
66 |
67 |
68 | def quantize_activations_tanh(activation):
69 | activation = torch.clamp(activation, 0.0, 1.0)
70 | return 2 * quantize_tanh(activation) - 1
71 |
72 |
73 | def quantize_weights_bias_gemm(weight):
74 | return quantize_gemm(weight)
75 |
76 |
77 | def quantize_activations_gemm(activation):
78 | return quantize_gemm(activation)
79 |
80 |
--------------------------------------------------------------------------------
/quantize/quantize_module_.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torch.nn as nn
4 | from quantize.quantize_method import quantize_weights_bias_gemm, quantize_activations_gemm
5 | import torch.nn.functional as F
6 |
7 |
8 | class QWConv2D(torch.nn.Conv2d):
9 | def __init__(self, n_channels, out_channels, kernel_size, stride=1,
10 | padding=0, dilation=1, groups=1, bias=True):
11 | super(QWConv2D, self).__init__(n_channels, out_channels, kernel_size, stride,
12 | padding, dilation, groups, bias)
13 | # nn.init.xavier_normal_(self.weight, 1)
14 | # nn.init.constant_(self.weight, 1)
15 |
16 | def forward(self, input):
17 | """
18 | 关键在于使用函数 F.conv2d, 而不是使用模块 nn.ConV2d
19 | """
20 | qweight = quantize_weights_bias_gemm(self.weight)
21 | if self.bias is not None:
22 | qbias = quantize_weights_bias_gemm(self.bias)
23 | else:
24 | qbias = None
25 | return F.conv2d(input, qweight, qbias, self.stride,
26 | self.padding, self.dilation, self.groups)
27 |
28 |
29 | class QWAConv2D(torch.nn.Conv2d):
30 | def __init__(self, n_channels, out_channels, kernel_size, stride=1,
31 | padding=0, dilation=1, groups=1, bias=True):
32 | super(QWAConv2D, self).__init__(n_channels, out_channels, kernel_size, stride,
33 | padding, dilation, groups, bias)
34 | # nn.init.xavier_normal_(self.weight, 1)
35 | # nn.init.constant_(self.weight, 1)
36 |
37 | def forward(self, input):
38 | qweight = quantize_weights_bias_gemm(self.weight)
39 | if self.bias is not None:
40 | qbias = quantize_weights_bias_gemm(self.bias)
41 | else:
42 | qbias = None
43 | qinput = quantize_activations_gemm(input)
44 | return F.conv2d(qinput, qweight, qbias, self.stride,
45 | self.padding, self.dilation, self.groups)
46 |
47 |
48 | class QWLinear(nn.Linear):
49 |
50 | def __init__(self, in_features, out_features, bias=True, num_bits=8, num_bits_weight=None,
51 | num_bits_grad=None, biprecision=False):
52 | super(QWLinear, self).__init__(in_features, out_features, bias)
53 |
54 | def forward(self, input):
55 | qweight = quantize_weights_bias_gemm(self.weight)
56 |
57 | if self.bias is not None:
58 | qbias = quantize_weights_bias_gemm(self.bias)
59 | else:
60 | qbias = None
61 |
62 | return F.linear(input, qweight, qbias)
63 |
64 |
65 | class QWALinear(nn.Linear):
66 |
67 | def __init__(self, in_features, out_features, bias=True):
68 | super(QWALinear, self).__init__(in_features, out_features, bias)
69 |
70 | def forward(self, input):
71 | qinput = quantize_activations_gemm(input)
72 | qweight = quantize_weights_bias_gemm(self.weight)
73 |
74 | if self.bias is not None:
75 | qbias = quantize_weights_bias_gemm(self.bias)
76 | else:
77 | qbias = None
78 |
79 | return F.linear(qinput, qweight, qbias)
80 |
81 |
82 | """
83 | 论文中 scalar layer 层设计 (多个 GPU )
84 | """
85 |
86 |
87 | class Scalar(nn.Module):
88 |
89 | def __init__(self):
90 | super(Scalar, self).__init__() # 这一行很重要
91 | # 第1种错误
92 | # self.scalar = torch.tensor([0.01], requires_grad=True)
93 | # RuntimeError: Expected object of type torch.FloatTensor
94 | # but found type torch.cuda.FloatTensor for argument
95 |
96 | # 第2种错误
97 | # self.scalar = torch.tensor([0.01], requires_grad=True).cuda()
98 | # RuntimeError: arguments are located on different GPUs
99 |
100 | # 第3种错误
101 | # self.scalar = nn.Parameter(torch.tensor(0.01, requires_grad=True))
102 | # RuntimeError: slice() cannot be applied to a 0-dim tensor,
103 | # 而加了方括号正确为 1-dim tensor
104 |
105 | # 第4中错误
106 | # scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True))
107 | # self.register_buffer("scalar", scalar)
108 | # scalar没有梯度更新(全是None), register_buffer 用于存储非训练参数, 如bn的平均值存储
109 |
110 | # 第1种方法, 可以使用
111 | # self.scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True))
112 |
113 | # 第2种方法, 可以使用
114 | # scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True))
115 | # self.register_parameter("scalar", scalar)
116 |
117 | # 根据训练经验, 设为 2.5
118 | self.scalar = nn.Parameter(torch.tensor([1.0], requires_grad=True, dtype=torch.float))
119 |
120 | def forward(self, i):
121 | return self.scalar * i
122 |
123 |
124 | if __name__ == "__main__":
125 | qconv = QWConv2D(1, 1, 3)
126 | qconv.zero_grad()
127 | x = torch.ones(1, 1, 3, 3, requires_grad=True).float()
128 | y = qconv(x)
129 | y.backward()
130 | print("QConv2D 权重梯度", qconv.weight.grad)
131 |
132 | # 直接求梯度
133 | a = torch.ones(3, 3, requires_grad=True).float()
134 | w = nn.init.constant_(torch.empty(3, 3, requires_grad=True), 1)
135 | qw = quantize_weights_bias_gemm(w)
136 |
137 | z = (qw * a).sum()
138 | z.backward()
139 | print("求权重梯度", w.grad)
140 |
141 | # 验证量化梯度
142 | qa = quantize_weights_bias_gemm(a).sum()
143 | qa.backward()
144 | print("直接求量化权重梯度", a.grad)
--------------------------------------------------------------------------------
/quantize/quantize_old_plan.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import torch.nn as nn
4 | import queue
5 | from quantize.quantize_method import QuantizeTanh
6 |
7 |
8 | class QuantizeWeightOrActivation(object):
9 | def __init__(self):
10 | self.saved_param = queue.Queue()
11 | self.saved_grad = queue.Queue()
12 | self.quantize_fn = QuantizeTanh.apply # 量化函数
13 |
14 | def quantize_weights_bias(self, weight):
15 | tanh_w = torch.tanh(weight)
16 | """
17 | torch 关于 y = w/max(|w|) 函数在max(|w|)处梯度行为怪异该如何解释?
18 | tensor w ([[ 0.1229, 0.2390],
19 | [ 0.8703, 0.6368]])
20 |
21 | tensor y ([[ 0.2873, 0.2873],
22 | [-0.3296, 0.2873]])
23 | 由于没有搞清楚 torch 在 max(|w|) 处如何处理的,
24 | 不过, 从上面看出梯度为负数, y = w/max(|w|) w>0时, 梯度为负数, 我认为是不正确的.
25 | 为了便于处理, 这里求梯度过程中, 我们把 max(|w|) 当成一个常量来处理,
26 | 代码中通过 Tensor.data 这样求 max(|w|) 的过程就不会加入到计算图中,
27 | 可以看出, max_abs_w 就是一个一个常量
28 | """
29 | max_abs_w = torch.max(torch.abs(tanh_w)).data
30 | norm_weight = ((tanh_w / max_abs_w) + 1) / 2
31 |
32 | return 2 * self.quantize_fn(norm_weight) - 1
33 |
34 | def quantize_activations(self, activation):
35 | activation = torch.clamp(activation, 0.0, 1.0)
36 | return self.quantize_fn(activation)
37 |
38 | def quantize(self, m):
39 | # isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear)
40 | if type(m) == nn.Conv2d or type(m) == nn.Linear:
41 | self.saved_param.put(m.weight.data.clone()) # 第1步, 保存全精度权重
42 | quantize_w = self.quantize_weights_bias(m.weight) # 第2步, 量化权重
43 | quantize_w.sum().backward()
44 | self.saved_grad.put(m.weight.grad.data.clone()) # 第3步, 保存量化梯度
45 | m.weight.data.copy_(quantize_w.data) # 第4步, 使用量化权重代替全精度权重
46 | # m.zero_grad() # 不需要, 因为后面调用 optimizer.zero_grad() 会把所有 m 的梯度清零
47 |
48 | if type(m) == nn.Linear: # 量化 bias
49 | self.saved_param.put(m.bias.data.clone())
50 | quantize_b = self.quantize_weights_bias(m.bias)
51 | quantize_b.sum().backward()
52 | self.saved_grad.put(m.bias.grad.data.clone())
53 | m.bias.data.copy_(quantize_b.data)
54 |
55 | def restore(self, m):
56 | if type(m) == nn.Conv2d or type(m) == nn.Linear:
57 | m.weight.data.copy_(self.saved_param.get()) # 第5步, 使用全精度权重代替量化权重
58 |
59 | if type(m) == nn.Linear:
60 | m.bias.data.copy_(self.saved_param.get())
61 |
62 | def update_grad(self, m):
63 | if type(m) == nn.Conv2d or type(m) == nn.Linear:
64 | m.weight.grad.data.mul_(self.saved_grad.get()) # 第6步, 使用量化权重更新全精度权重
65 |
66 | if type(m) == nn.Linear:
67 | m.bias.grad.data.mul_(self.saved_grad.get())
68 |
69 | @staticmethod
70 | def info(net, s):
71 | print("\n-----------{}--------\n".format(s))
72 | for k, v in net.state_dict().items():
73 | print(k, "\n", v)
74 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | torchvision
3 | tensorboardx
4 | tensorflow
5 | opencv-python
6 | numpy
--------------------------------------------------------------------------------
/utils/data_loader.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | from torchvision import transforms
4 | import os
5 | from torchvision import datasets
6 |
7 |
8 | def load_train_data(data_dir, batch_size, workers, distributed):
9 |
10 | traindir = os.path.join(data_dir, 'train')
11 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
12 | std=[0.229, 0.224, 0.225])
13 |
14 | # 训练模式, 加载训练数据
15 | train_dataset = datasets.ImageFolder(
16 | traindir,
17 | transforms.Compose([
18 | transforms.RandomResizedCrop(224),
19 | transforms.RandomHorizontalFlip(),
20 | transforms.ToTensor(),
21 | normalize,
22 | ]))
23 |
24 | if distributed:
25 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
26 | else:
27 | train_sampler = None
28 |
29 | train_loader = torch.utils.data.DataLoader(
30 | train_dataset,
31 | batch_size=batch_size, shuffle=(train_sampler is None),
32 | num_workers=workers, pin_memory=True,
33 | sampler=train_sampler)
34 |
35 | return train_loader, train_sampler
36 |
37 |
38 | def load_val_data(data_dir, batch_size=64, workers=8):
39 | valdir = os.path.join(data_dir, 'val')
40 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
41 | std=[0.229, 0.224, 0.225])
42 |
43 | # 加载验证数据
44 | val_loader = torch.utils.data.DataLoader(
45 | datasets.ImageFolder(valdir, transforms.Compose([
46 | transforms.Resize(256),
47 | transforms.CenterCrop(224),
48 | transforms.ToTensor(),
49 | normalize,
50 | ])),
51 | batch_size=batch_size, shuffle=False,
52 | num_workers=workers, pin_memory=True)
53 | return val_loader
54 |
--------------------------------------------------------------------------------
/utils/meter.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import torch
3 | import logging
4 |
5 |
6 | class AverageMeter(object):
7 | """Computes and stores the average and current value"""
8 |
9 | def __init__(self):
10 | self.val = 0
11 | self.avg = 0
12 | self.sum = 0
13 | self.count = 0
14 |
15 | def reset(self):
16 | self.val = 0
17 | self.avg = 0
18 | self.sum = 0
19 | self.count = 0
20 |
21 | def update(self, val, n=1):
22 | self.val = val
23 | self.sum += val * n
24 | self.count += n
25 | self.avg = self.sum / self.count
26 |
27 |
28 | def accuracy(output, target, topk=(1,)):
29 | """Computes the precision@k for the specified values of k"""
30 | with torch.no_grad():
31 | maxk = max(topk)
32 | batch_size = target.size(0)
33 |
34 | # pred: torch.Size([128, 5])
35 | _, pred = output.topk(maxk, 1, True, True)
36 | pred = pred.t() # 转置
37 | # pred: torch.Size([5, 128])
38 |
39 | # batch_size 128 target: torch.Size([128]),
40 | # 也就是说 target 不是 one-hot 编码, 而是 class id
41 | target = target.view(1, -1).expand_as(pred)
42 | # [128] =>view=> [1, 128] =>expand_as[5, 128]=>[5, 128]
43 | correct = pred.eq(target) # eq: Computes element-wise equality
44 |
45 | res = []
46 | for k in topk:
47 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
48 | res.append(correct_k.mul_(100.0 / batch_size))
49 | return res
50 |
51 |
52 | def get_logger(logger_name="nowgood", filename=None, level=logging.INFO):
53 | logger = logging.getLogger(logger_name)
54 | logger.setLevel(level)
55 |
56 | # 定义handler的输出格式
57 | formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s')
58 |
59 | if filename is not None:
60 | # 创建一个handler,用于写入日志文件
61 | fh = logging.FileHandler(filename)
62 | fh.setLevel(level)
63 | fh.setFormatter(formatter)
64 | logger.addHandler(fh)
65 |
66 | # 创建一个handler,用于输出到控制台
67 | ch = logging.StreamHandler()
68 | ch.setLevel(level)
69 | ch.setFormatter(formatter)
70 | # 给logger添加handler
71 | logger.addHandler(ch)
72 |
73 | # disable logger
74 | # logger.setLevel(logger.disabled)
75 |
76 | return logger
77 |
--------------------------------------------------------------------------------
/utils/train_val.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | import time
3 | import torch
4 | import os
5 | import shutil
6 | import torch.nn.parallel
7 | import torch.optim
8 | import torch.utils.data
9 | import torch.utils.data.distributed
10 | from utils.meter import AverageMeter, accuracy
11 | import torch
12 | import time
13 |
14 | best_prec1 = 0
15 |
16 |
17 | def train(model, train_loader, criterion, optimizer, gpu, epoch=0,
18 | summary_writer=None, log_per_epoch=100, print_freq=30):
19 |
20 | batch_time = AverageMeter()
21 | data_time = AverageMeter()
22 | losses = AverageMeter()
23 | top1 = AverageMeter()
24 | top5 = AverageMeter()
25 |
26 | # switch to train mode
27 | model.train()
28 |
29 | # if not full_precision:
30 | # qw = QuantizeWeightOrActivation() # 第一步, 创建量化器
31 | end = time.time()
32 |
33 | # 用于控制 tensorboard 的显示频率
34 | interval = len(train_loader) // log_per_epoch
35 | summary_point = [interval * split for split in torch.arange(log_per_epoch)]
36 |
37 | for i, (data, target) in enumerate(train_loader):
38 | data_time.update(time.time() - end) # measure checkpoint.pth data loading time
39 |
40 | if gpu is not None:
41 | data = data.cuda(gpu, non_blocking=True)
42 | target = target.cuda(gpu, non_blocking=True)
43 |
44 | # if not full_precision:
45 | # model.apply(qw.quantize_tanh) # 第二步, 量化权重, 保存全精度权重和量化梯度
46 |
47 | output = model(data)
48 | loss = criterion(output, target)
49 |
50 | # measure accuracy and record loss
51 | prec1, prec5 = accuracy(output, target, topk=(1, 5))
52 | losses.update(loss.item(), data.size(0))
53 | top1.update(prec1[0], data.size(0))
54 | top5.update(prec5[0], data.size(0))
55 |
56 | # compute gradient and do SGD step
57 | optimizer.zero_grad()
58 | loss.backward()
59 |
60 | # if not full_precision:
61 | # model.apply(qw.restore) # 第三步, 反向传播后, 模型梯度计算后, 恢复全精度权重
62 | # model.apply(qw.update_grad) # 第四步, 使用之前存储的量化梯度乘上反向传播的梯度
63 |
64 | optimizer.step()
65 |
66 | # measure elapsed time
67 | batch_time.update(time.time() - end)
68 | end = time.time()
69 |
70 | # 控制台
71 | if i % print_freq == 0:
72 | print('Epoch: [{0}][{1}/{2}]\t'
73 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
74 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
75 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
76 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
77 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
78 | epoch, i, len(train_loader), batch_time=batch_time,
79 | data_time=data_time, loss=losses, top1=top1, top5=top5))
80 |
81 | if summary_writer and (i in summary_point):
82 | step = i//interval + epoch * log_per_epoch
83 | summary_writer.add_scalar("loss/train_loss", loss, step)
84 | summary_writer.add_scalar("train/top-1", top1.avg, step)
85 | summary_writer.add_scalar("train/top-5", top5.avg, step)
86 |
87 |
88 | def validate(model, val_loader, criterion, gpu=0, epoch=0, summary_writer=None, name_prefix=None, print_freq=20):
89 |
90 | batch_time = AverageMeter()
91 | losses = AverageMeter()
92 | top1 = AverageMeter()
93 | top5 = AverageMeter()
94 |
95 | loss_name = "val/loss"
96 | prec1_name = "val/top-1"
97 | prec5_name = "val/top-5"
98 |
99 | if name_prefix is not None:
100 | name_prefix = ''.join((name_prefix, '-'))
101 | loss_name = ''.join((name_prefix, loss_name))
102 | prec1_name = ''.join((name_prefix, prec1_name))
103 | prec5_name = ''.join((name_prefix, prec5_name))
104 |
105 | # 进入 eval 状态
106 | model.eval()
107 |
108 | # if not full_precision:
109 | # qw = QuantizeWeightOrActivation() # 1, 创建量化器
110 | # model.apply(qw.quantize_tanh) # 2, 量化权重, 保存全精度权重和量化梯度
111 |
112 | with torch.no_grad():
113 | start = time.time()
114 | for i, (data, target) in enumerate(val_loader):
115 | if gpu is not None:
116 | data = data.cuda(gpu, non_blocking=True)
117 |
118 | # batch_size 128时, target size 为 torch.Size([128])
119 | target = target.cuda(gpu, non_blocking=True)
120 | output = model(data)
121 | loss = criterion(output, target)
122 |
123 | # measure accuracy and record loss
124 | prec1, prec5 = accuracy(output, target, topk=(1, 5))
125 | losses.update(loss.item(), data.size(0))
126 | top1.update(prec1[0], data.size(0))
127 | top5.update(prec5[0], data.size(0))
128 |
129 | # measure elapsed time
130 | batch_time.update(time.time() - start)
131 | start = time.time()
132 |
133 | if i % print_freq == 0:
134 | print('Test: [{0}/{1}]\t'
135 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
136 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
137 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
138 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
139 | i, len(val_loader), batch_time=batch_time,
140 | loss=losses, top1=top1, top5=top5))
141 |
142 | if summary_writer is not None:
143 | summary_writer.add_scalar(loss_name, losses.avg, epoch)
144 | summary_writer.add_scalar(prec1_name, top1.avg, epoch)
145 | summary_writer.add_scalar(prec5_name, top5.avg, epoch)
146 |
147 | print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(top1=top1, top5=top5))
148 |
149 | # if not full_precision:
150 | # model.apply(qw.restore) # 第3步, 恢复全精度权重
151 |
152 | return top1.avg
153 |
154 |
155 | def save_checkpoint(state, is_best, save_dir, name_prefix=None,
156 | checkpoint_name='checkpoint.pth.tar',
157 | mode_best_name='model_best.pth.tar', ):
158 | if save_dir:
159 | if not os.path.exists(save_dir):
160 | os.makedirs(save_dir)
161 | print("=> checkpoint directory: {}".format(save_dir))
162 |
163 | if name_prefix is not None:
164 | name_prefix = ''.join((name_prefix, '-'))
165 | else:
166 | name_prefix = ''
167 |
168 | checkpoint = os.path.join(save_dir, name_prefix + checkpoint_name)
169 | model_best = os.path.join(save_dir, name_prefix + mode_best_name)
170 |
171 | torch.save(state, checkpoint)
172 | if is_best:
173 | shutil.copyfile(checkpoint, model_best)
--------------------------------------------------------------------------------
/utils/unzip.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | dir=./
4 |
5 | for x in `ls *.tar`
6 |
7 | do
8 |
9 | filename=`basename $x .tar`
10 |
11 | mkdir $filename
12 |
13 | tar -xvf $x -C ./$filename
14 |
15 | done
--------------------------------------------------------------------------------