├── .idea ├── misc.xml ├── modules.xml ├── quantizednn.iml └── vcs.xml ├── README.md ├── config ├── data ├── WandA_lr0.01_scalar2.5.png └── smurf.jpg ├── graffiti ├── QConv2D.py ├── README.md ├── auto_grad.py ├── caffe_guidenet_weight_init.py ├── diffierent_gpu_grad_backward.py ├── float32touint8.py ├── get_module_weight.py ├── inference_on_image.py ├── merge_conv_bn.py ├── nowgood.py ├── quantize_test.py ├── register_forward_hook.py ├── stat_parameters.py └── weight_distribute.py ├── main.py ├── net ├── net_bn_conv_merge.py ├── net_bn_conv_merge_quantize.py ├── net_quantize_activation.py ├── net_quantize_guide.py ├── net_quantize_weight.py └── simple_net.py ├── quantize ├── guided_distance_view.py ├── quantize_guided.py ├── quantize_method.py ├── quantize_module_.py └── quantize_old_plan.py ├── requirements.txt └── utils ├── data_loader.py ├── meter.py ├── train_val.py ├── unzip.sh └── valprep.sh /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/quantizednn.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Quantize CNN Model using PyTorch(python3.5) 2 | 3 | Implement [Towards Effective Low-bitwidth Convolutional Neural Networks](https://arxiv.org/abs/1711.00205) 4 | 5 | ``` 6 | @InProceedings{Zhuang_2018_CVPR, 7 | author = {Zhuang, Bohan and Shen, Chunhua and Tan, Mingkui and Liu, Lingqiao and Reid, Ian}, 8 | title = {Towards Effective Low-Bitwidth Convolutional Neural Networks}, 9 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 10 | month = {June}, 11 | year = {2018} 12 | } 13 | ``` 14 | 15 | ### 下载和配置 16 | 17 | ```bash 18 | git clone https://github.com/nowgood/QuantizeCNNModel.git && cd QuantizeCNNModel 19 | pip install -r requirements.txt 20 | echo export PYTHONPATH=$PYTHONPATH:`pwd` >> ~/.bashrc 21 | source ~/.bashrc 22 | ``` 23 | 24 | ### 使用方法 25 | 26 | 使用如下命令查看函数使用方法 27 | 28 | ``` 29 | python main.py -h 30 | ``` 31 | 32 | 33 | 34 | 然后使用 tensorboard 查看训练过程 35 | 36 | ``` 37 | # QuantizeCNNModel 目录下 38 | tensorboard --logdir model/xxx/ 39 | ``` 40 | 然后就可以在 `http:localhost:6006` 查看训练的损失值和精确度, 以及每个epoch的在验证集上的精确度 41 | 42 | ![top5](https://github.com/nowgood/QuantizeCNNModel/raw/master/data/WandA_lr0.01_scalar2.5.png) 43 | 44 | ### 训练方法 45 | 46 | 训练模式选择: 47 | 48 | 0: full precision training from scratch 49 | 1: only quantize weight 50 | 2. quantize activation using quantized weight to init model 51 | 3. joint quantize weight and activation from pre-trained imageNet model 52 | 4. guided quantize weight and activation from pre-trained imageNet model 53 | 54 | 55 | ### 量化权重 56 | 57 | 单机多卡训练, 如: 使用 8 个GPU的后 4 个GPU来训练25个epoch 58 | 59 | ``` 60 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \ 61 | --mode 1 \ 62 | --workers 16 \ 63 | --epochs 5 \ 64 | --batch-size 1024\ 65 | --device-ids 0 1 2 3 \ 66 | --lr 0.0001 \ 67 | --lr-step 2 \ 68 | --save-dir model/W_lr1e-4_epoch5 \ 69 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 70 | |tee model/W_lr_1e-4_epoch5.log 2>&1 71 | ``` 72 | 73 | ``` 74 | 75 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \ 76 | --mode 1 \ 77 | --workers 16 \ 78 | --epochs 10 \ 79 | --batch-size 1024\ 80 | --device-ids 0 1 2 3 \ 81 | --lr 0.0001 \ 82 | --lr-step 4 \ 83 | --save-dir model/W_lr1e-4_epoch10 \ 84 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 85 | |tee model/W_lr_1e-4_epoch10.log 86 | ``` 87 | 88 | ### 使用量化权重的参数来初始化量化激活的网络 89 | 90 | ```bash 91 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \ 92 | --mode 2 \ 93 | --workers 16 \ 94 | --epochs 35 \ 95 | --batch-size 1024\ 96 | --device-ids 0 1 2 3 \ 97 | --lr 0.001 \ 98 | --weight-quantized model/W_lr1e-4_epoch2/model_best.pth.tar \ 99 | --save-dir model/AafterW_lr1e-2_epoch35 \ 100 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 101 | |tee model/AafterW_lr1e-2_epoch35.log 102 | ``` 103 | 104 | **resume** 105 | 106 | ```bash 107 | CUDA_VISIBLE_DEVICES=0,1,2,3 python main.py \ 108 | --mode 2 \ 109 | --workers 16 \ 110 | --epochs 35 \ 111 | --batch-size 1024\ 112 | --device-ids 0 1 2 3 \ 113 | --lr 0.001 \ 114 | --resume \ 115 | --weight-quantized model/W_lr1e-4_epoch2/model_best.pth.tar \ 116 | --save-dir model/AafterW_lr1e-3_epoch35 \ 117 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 118 | | tee model/AafterW_lr1e-3_epoch35.log 119 | ``` 120 | 121 | ### 同时量化权重和激活 122 | 123 | ``` 124 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \ 125 | --mode 3 \ 126 | --arch resnet18 \ 127 | --workers 16 \ 128 | --epochs 35 \ 129 | --batch-size 512 \ 130 | --device-ids 0 1 2 3 \ 131 | --lr 0.001 \ 132 | --lr-step 10 \ 133 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 134 | --save-dir model/AandW_lr1e-3_epoch35 \ 135 | | tee AandW_1e-3_epoch35.log 136 | ``` 137 | 138 | ``` 139 | CUDA_VISIBLE_DEVICES=3,4,5,6 python main.py \ 140 | --mode 3 \ 141 | --arch resnet18 \ 142 | --workers 16 \ 143 | --epochs 50 \ 144 | --batch-size 512 \ 145 | --device-ids 0 1 2 3 \ 146 | --lr 0.1 \ 147 | --lr-step 15 \ 148 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 149 | --save-dir model/AandW_gemm_lr1e-1_epoch50 \ 150 | | tee AandW_gemm_1e-1_epoch50.log 151 | ``` 152 | 153 | ### 使用 guidance 信号来同时量化权重和激活 154 | 155 | ```bash 156 | CUDA_VISIBLE_DEVICES=3,4,5,6 python main.py \ 157 | --mode 4 \ 158 | --workers 16 \ 159 | --epochs 35 \ 160 | --batch-size 512 \ 161 | --device-ids 0 1 2 3\ 162 | --balance 0.1 \ 163 | --lr 0.001 \ 164 | --rate 1 \ 165 | --norm 1 \ 166 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 167 | --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35 \ 168 | | tee model/guided_balance0.11_lr1e-3_rate1_epoch35.log 169 | ``` 170 | 171 | ```bash 172 | CUDA_VISIBLE_DEVICES=1,2,3 python main.py \ 173 | --mode 4 \ 174 | --workers 16 \ 175 | --epochs 35 \ 176 | --batch-size 384 \ 177 | --device-ids 0 1 2 \ 178 | --balance 0.1 \ 179 | --lr 0.001 \ 180 | --rate 1 \ 181 | --norm 1 \ 182 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 183 | --resume \ 184 | --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35 \ 185 | | tee model/guided_balance0.1_lr1e-3_rate1_epoch35_resume.log 186 | ``` 187 | 188 | #### view distance 189 | 190 | ```bash 191 | CUDA_VISIBLE_DEVICES=4,5,6,7 python main.py \ 192 | --mode 4 \ 193 | --workers 16 \ 194 | --epochs 35 \ 195 | --batch-size 512 \ 196 | --device-ids 0 1 2 3 \ 197 | --balance 0.1 \ 198 | --lr 0.001 \ 199 | --rate 1 \ 200 | --norm 1 \ 201 | --data /home/user/wangbin/datasets/ILSVRC2012 \ 202 | --save-dir /home/user/wangbin/quantizednn/model/guided_balance0.1_lr1e-3_rate1_epoch35_view 203 | ``` -------------------------------------------------------------------------------- /config: -------------------------------------------------------------------------------- 1 | ## mode 2 2 | 3 | --mode 2 4 | --epochs 30 5 | --batch-size 64 6 | --device-ids 0 7 | --lr 0.001 8 | --weight-quantized /home/wangbin/Desktop/uisee/model_quantize/W_lr1e-4_epoch10/checkpoint.pth.tar 9 | --save-dir model 10 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012 11 | 12 | ## mode 3 13 | 14 | --mode 2 15 | --epochs 30 16 | --batch-size 64 17 | --device-ids 0 18 | --lr 0.001 19 | --save-dir model 20 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012 21 | 22 | ## mode 4 23 | 24 | --mode 4 25 | --epochs 30 26 | --batch-size 64 27 | --device-ids 0 28 | --balance 100000 29 | --lr 0.001 30 | --save-dir model 31 | --data /media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012 -------------------------------------------------------------------------------- /data/WandA_lr0.01_scalar2.5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nowgood/QuantizeCNNModel/531fd8d12fdb42f48a91e9378008a1bb86905d6d/data/WandA_lr0.01_scalar2.5.png -------------------------------------------------------------------------------- /data/smurf.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nowgood/QuantizeCNNModel/531fd8d12fdb42f48a91e9378008a1bb86905d6d/data/smurf.jpg -------------------------------------------------------------------------------- /graffiti/QConv2D.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torch.nn as nn 4 | from quantize.quantize_method import QuantizeWeightOrActivation 5 | import torch.nn.functional as F 6 | quantize = QuantizeWeightOrActivation() 7 | 8 | 9 | class QConv2D(torch.nn.Conv2d): 10 | def __init__(self, n_channels, out_channels, kernel_size, stride=1, 11 | padding=0, dilation=1, groups=1, bias=True): 12 | super(QConv2D, self).__init__(n_channels, out_channels, kernel_size, stride, 13 | padding, dilation, groups, bias) 14 | nn.init.constant_(self.weight, 1) 15 | 16 | def forward(self, x): 17 | qweight = quantize.quantize_weights_bias(self.weight) 18 | x = F.conv2d(x, qweight) 19 | return x 20 | 21 | 22 | if __name__ == "__main__": 23 | qconv = QConv2D(1, 1, 3) 24 | qconv.zero_grad() 25 | x = torch.ones(1, 1, 3, 3, requires_grad=True).float() 26 | y = qconv(x) 27 | y.backward() 28 | print(qconv.weight.grad) 29 | 30 | a = torch.ones(3, 3, requires_grad=True).float() 31 | w = torch.nn.init.constant_(torch.empty(3, 3, requires_grad=True), 1) 32 | qw = quantize.quantize_weights_bias(w) 33 | 34 | z = (qw * a).sum() 35 | z.backward() 36 | print(w.grad) 37 | 38 | qa = quantize.quantize_weights_bias(a).sum() 39 | qa.backward() 40 | print(a.grad) -------------------------------------------------------------------------------- /graffiti/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### Usage: [argparse](http://wiki.jikexueyuan.com/project/explore-python/Standard-Modules/argparse.html) 3 | 4 | ``` 5 | 每个参数解释如下: 6 | 7 | name or flags - 选项字符串的名字或者列表,例如 foo 或者 -f, --foo。 8 | action - 命令行遇到参数时的动作,默认值是 store。 9 | store_const,表示赋值为const; 10 | append,将遇到的值存储成列表,也就是如果参数重复则会保存多个值; 11 | append_const,将参数规范中定义的一个值保存到一个列表; 12 | count,存储遇到的次数;此外,也可以继承 argparse.Action 自定义参数解析; 13 | nargs - 应该读取的命令行参数个数,可以是具体的数字,或者是?号,当不指定值时对于 Positional argument 使用 default, 14 | 对于 Optional argument 使用 const; 15 | 或者是 * 号,表示 0 或多个参数; 16 | 或者是 + 号表示 1 或多个参数。 17 | const - action 和 nargs 所需要的常量值。 18 | default - 不指定参数时的默认值。 19 | type - 命令行参数应该被转换成的类型。 20 | choices - 参数可允许的值的一个容器。 21 | required - 可选参数是否可以省略 (仅针对可选参数)。 22 | help - 参数的帮助信息,当指定为 argparse.SUPPRESS 时表示不显示该参数的帮助信息. 23 | metavar - 在 usage 说明中的参数名称,对于必选参数默认就是参数名称,对于可选参数默认是全大写的参数名称. 24 | dest - 解析后的参数名称,默认情况下,对于可选参数选取最长的名称,中划线转换为下划线. 25 | ``` 26 | 27 | ### Usage imagenet.py 28 | 29 | ``` 30 | usage: guided.py [-h] [--arch ARCH] [-j N] [--epochs N] [--start-epoch N] [-b N] 31 | [--lr LR] [--momentum M] [--weight-decay W] [--print-freq N] 32 | [--resume PATH] [-e] [--pretrained] 33 | DIR 34 | 35 | PyTorch ImageNet Training 36 | 37 | positional arguments: 38 | DIR path to dataset 39 | 40 | optional arguments: 41 | -h, --help show this help message and exit 42 | --arch ARCH, -a ARCH model architecture: alexnet | resnet | resnet101 | 43 | resnet152 | resnet18 | resnet34 | resnet50 | vgg | 44 | vgg11 | vgg11_bn | vgg13 | vgg13_bn | vgg16 | vgg16_bn 45 | | vgg19 | vgg19_bn (default: resnet18) 46 | -j N, --workers N number of data loading workers (default: 4) 47 | --epochs N number of total epochs to run 48 | --start-epoch N manual epoch number (useful on restarts) 49 | -b N, --batch-size N mini-batch size (default: 256) 50 | --lr LR, --README.md-rate LR 51 | initial README.md rate 52 | --momentum M momentum 53 | --weight-decay W, --wd W 54 | weight decay (default: 1e-4) 55 | --print-freq N, -p N print frequency (default: 10) 56 | --resume PATH path to latest checkpoint (default: none) 57 | -e, --evaluate evaluate model on validation set 58 | --pretrained use pre-trained model 59 | 60 | ``` 61 | 62 | ### use pretrained model to initialize your modified model 63 | 64 | ``` 65 | model_dict = your_model.state_dict() 66 | 67 | pretrained_model = models.__dict__[args.arch](pretrained=True) 68 | pretrained_dict = pretrained_model.state_dict() 69 | 70 | # 将 pretrained_dict 里不属于 model_dict 的键剔除掉 71 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} 72 | 73 | model_dict.update(pretrained_dict) 74 | your_model.load_state_dict(model_dict) 75 | ``` 76 | 77 | ### how to get nn.DataParallel model filter weight 78 | 79 | ```python 80 | low_prec_state_dict = low_prec_model.state_dict() 81 | full_prec_state_dict = full_prec_model.state_dict() 82 | low_prec_norm = low_prec_state_dict[qconv1].norm(p=2) + low_prec_state_dict[qlayer4].norm(p=2) 83 | full_prec_norm = full_prec_state_dict[qconv1].norm(p=2) + full_prec_state_dict[qlayer4].norm(p=2) 84 | 85 | l2 = (low_prec_norm + full_prec_norm) * args.balance 86 | ``` 87 | 88 | ### torch.topk 89 | 90 | ``` 91 | >>> x = torch.arange(1, 6) 92 | >>> x 93 | tensor([ 1., 2., 3., 4., 5.]) 94 | >>> torch.topk(x, 3) 95 | (tensor([ 5., 4., 3.]), tensor([ 4, 3, 2])) 96 | ``` -------------------------------------------------------------------------------- /graffiti/auto_grad.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | import torch 4 | import torch.nn as nn 5 | from net import simple_net 6 | import torch.optim as optim 7 | from quantize.quantize_method import QuantizeWeightOrActivation 8 | import queue 9 | 10 | qw = QuantizeWeightOrActivation() 11 | 12 | 13 | class MyFunction(torch.autograd.Function): 14 | 15 | @staticmethod 16 | def forward(ctx, i): 17 | 18 | tanh_i = torch.tanh(i) 19 | max_w = torch.max(torch.abs(tanh_i)).data 20 | out = tanh_i / max_w 21 | ctx.save_for_backward(tanh_i, max_w) 22 | return out 23 | 24 | @staticmethod 25 | def backward(ctx, grad_outputs): 26 | by, bm, = ctx.saved_tensors 27 | return grad_outputs*((1-torch.pow(by, 2.0))/bm) 28 | 29 | 30 | def modify_weights(weight): 31 | fn = MyFunction.apply 32 | return fn(weight) 33 | 34 | 35 | def weights_update(): 36 | feature_map = torch.ones(1, 1, 3, 3, requires_grad=True) 37 | kernel = nn.Conv2d(1, 1, kernel_size=3, bias=False) 38 | 39 | # start 40 | print("\n自动求导求量化梯度") 41 | # w = Variable(kernel.weight.data.clone(), requires_grad=True) 42 | w = kernel.weight 43 | y = torch.tanh(w)/torch.max(torch.abs(torch.tanh(w))) 44 | z = y.sum() 45 | z.backward() 46 | print(w.grad) 47 | kernel.zero_grad() 48 | # end 49 | 50 | print("权重初始化\n", kernel.weight.data, "\n") 51 | 52 | tanh_w = torch.tanh(kernel.weight) 53 | max_w = torch.max(torch.abs(tanh_w)) 54 | hand_grad = (1 - torch.pow(kernel.weight, 2.0)) / max_w 55 | print("手动求梯度\n", hand_grad, "\n") # 卷积核的面积=3x3=9, y=(x*x).mean(), y'=2x/9 56 | 57 | # fn_w = modify_weights(kernel.weight) 58 | fn_w = qw.quantize_weights_bias(kernel.weight) 59 | fn_w.sum().backward() 60 | 61 | square_weight_grad = kernel.weight.grad.data.clone() 62 | print("自动求梯度\n", square_weight_grad, "\n") # 只需要在原本的梯度上乘以卷积核的面积就好 63 | 64 | print("量化前权重\n", kernel.weight.data, "\n") 65 | 66 | # 这种方式没法更新模型的权重, 看 state_dict 函数可以看出, 返回的是一个新建的有序字典, 67 | # 更新的其实是新字典, 而不是模型参数, 使用 load_state_dict 方法 68 | # kernel.state_dict().update(weight=fn_w) 69 | 70 | # state_dict = kernel.state_dict() # 第 1 种方法更新权重 71 | # state_dict.update(weight=square) 72 | # kernel.load_state_dict(state_dict) 73 | 74 | # kernel.weight = nn.Parameter(square) # 第 2 种方法更新权重 75 | 76 | kernel.weight.data.copy_(fn_w.data) # 第 3 种方法更新权重 77 | 78 | print("量化后权重\n", kernel.weight.data, "\n") 79 | 80 | # 权重的另一个计算图 81 | other_graph = kernel(feature_map) 82 | other_graph.backward() 83 | 84 | print("不使用 Module.zer_grad(), 卷积后权重梯度\n", kernel.weight.grad, "\n") 85 | 86 | kernel.zero_grad() 87 | other_graph = kernel(feature_map) 88 | other_graph.backward() 89 | 90 | print("使用 Module.zer_grad(), 卷积后权重梯度\n", kernel.weight.grad, "\n") 91 | print("手动计算梯度更新(加法)\n", kernel.weight.grad + square_weight_grad, "\n") 92 | print("手动计算梯度更新(乘法)\n", kernel.weight.grad * square_weight_grad, "\n") 93 | 94 | 95 | def module_apply(): 96 | saved_param = queue.Queue() 97 | saved_grad = queue.Queue() 98 | 99 | def info(s): 100 | print("\n---{}---\n".format(s)) 101 | 102 | for k, v in net.state_dict().items(): 103 | print(k, v, "\n") 104 | break 105 | 106 | def square(module): 107 | if type(module) == nn.Conv2d: 108 | saved_param.put(module.weight.data.clone()) # 第一步, 保存全精度权重 109 | quantize_w = modify_weights(module.weight) # 第二步, 量化权重 110 | quantize_w.sum().backward() 111 | saved_grad.put(module.weight.grad.data.clone()) # 第三步, 保存量化梯度 112 | module.weight.data.copy_(quantize_w.data) # 第四步, 使用量化权重代替全精度权重 113 | 114 | def restore(module): 115 | if type(module) == nn.Conv2d: 116 | module.weight.data.copy_(saved_param.get()) # 第四步, 使用量化权重代替全精度权重 117 | 118 | def update_weight(module): 119 | if type(module) == nn.Conv2d: 120 | module.weight.grad.data.mul_(saved_grad.get()) # 第四步, 使用量化权重代替全精度权重 121 | 122 | net = simple_net.Net() 123 | info("初始化权重") 124 | 125 | # net.zero_grad() # optimizer.zero_grad() is enough 126 | # 网络输入, 输出 127 | input_ = torch.ones(1, 1, 6, 6, requires_grad=True) 128 | lable = torch.ones(1, 2) 129 | 130 | optimizer = optim.SGD(net.parameters(), lr=1) 131 | criterion = nn.MSELoss() 132 | 133 | print("\n\n") 134 | 135 | print(net.state_dict().keys(), "\n") 136 | print(optimizer.param_groups) 137 | print(optimizer.state_dict()) 138 | 139 | print("\n\n") 140 | 141 | for _ in range(5): 142 | 143 | net.apply(square) 144 | info("量化权重\n") 145 | print("net.conv1.weight.grad\n", net.conv1.weight.grad) 146 | output = net(input_) 147 | loss = criterion(output, lable) 148 | optimizer.zero_grad() # very important! 149 | 150 | print("\nnet.conv1.weight.grad after optimizer.zero_grad()\n", net.conv1.weight.grad) 151 | 152 | loss.backward() 153 | 154 | net.apply(restore) 155 | info("恢复全精度权重") 156 | 157 | net.apply(update_weight) 158 | print(net.state_dict().keys(), "\n") 159 | 160 | optimizer.step() 161 | info("更新全精度权重") 162 | print(net.state_dict().keys(), "\n") 163 | 164 | torch.save(net.state_dict(), "../model/model_name_changed.pkl") 165 | xx = torch.load("../model/model_name_changed.pkl") 166 | print(xx.keys()) 167 | 168 | 169 | if __name__ == "__main__": 170 | module_apply() -------------------------------------------------------------------------------- /graffiti/caffe_guidenet_weight_init.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import caffe 3 | 4 | net = caffe.Net("/home/wangbin/github/RFCN-FasterRCNN/objectDetection/UISEE-FRCNN-3/model_config/train.prototxt", 5 | "/media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/UISEE/" 6 | "caffe_models/PVANET/PVANET-LITE/PVANET-LITE.caffemodel", caffe.TEST)\ 7 | 8 | print(type(net.params)) -------------------------------------------------------------------------------- /graffiti/diffierent_gpu_grad_backward.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | 4 | a = torch.ones(2, 2, requires_grad=True).cuda(1) 5 | b = torch.rand(2, 2, requires_grad=True).cuda(2) 6 | c = a + b 7 | 8 | print(c) -------------------------------------------------------------------------------- /graffiti/float32touint8.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from net import net_quantize_guide 3 | from torchvision import models 4 | 5 | # coding=utf-8 6 | model = net_quantize_guide.resnet18() 7 | print(model.state_dict().keys()) 8 | model = models.resnet18(pretrained=True) 9 | state_dict = model.state_dict() 10 | state_dict = {k: v.to(torch.uint8) for k, v in state_dict.items()} 11 | torch.save(state_dict, "nowgood.pth") 12 | -------------------------------------------------------------------------------- /graffiti/get_module_weight.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torchvision.models as models 4 | 5 | CUDA_VISIBLE_DEVICES = 0, 3 6 | model = models.resnet18(pretrained=True) 7 | model = torch.nn.DataParallel(model, [0]).cuda() 8 | 9 | state_dict = model.state_dict() 10 | 11 | second_last_convlayer_weight = state_dict['module.layer4.1.conv1.weight'] 12 | last_convlayer_weight = state_dict['module.layer4.1.conv2.weight'] 13 | print(second_last_convlayer_weight) 14 | print(last_convlayer_weight) 15 | print(last_convlayer_weight.norm(p=2)) 16 | l1 = torch.norm(last_convlayer_weight, p=2) 17 | print(l1) 18 | 19 | print(len(list(model.modules())), type(model.modules)) 20 | print(state_dict.keys()) -------------------------------------------------------------------------------- /graffiti/inference_on_image.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torchvision.models as models 4 | import cv2 5 | 6 | IMG_PATH = "/home/wangbin/PycharmProjects/quantizednn/data/smurf.jpeg" 7 | 8 | 9 | def image_read(img_path): 10 | img = cv2.imread(img_path) 11 | img = cv2.resize(img, (224, 224)) 12 | img = img.transpose(2, 0, 1) 13 | img = torch.tensor(img).div(255).sub(0.5).float() 14 | img = torch.unsqueeze(img, 0) 15 | return img 16 | 17 | 18 | def torch_modules(model_): 19 | print("module.modules()\n") 20 | for e in model_.modules(): 21 | print(type(e), e) 22 | 23 | print("modules._modules.keys()\n") 24 | for e in model_._modules.keys(): 25 | print(type(e), e) 26 | 27 | print("modules.children.keys()\n") 28 | for e in model_.children(): 29 | print(type(e), e) 30 | 31 | 32 | if __name__ == "__main__": 33 | image = image_read(IMG_PATH) 34 | model = models.resnet18(pretrained=True) 35 | model = torch.nn.DataParallel(model) 36 | model.eval() 37 | 38 | pred = model(image) 39 | print(pred.size()) 40 | -------------------------------------------------------------------------------- /graffiti/merge_conv_bn.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | from torchvision import models 4 | import numpy as np 5 | import os 6 | from net import net_bn_conv_merge, net_bn_conv_merge_quantize 7 | from utils.data_loader import load_val_data 8 | from utils.train_val import validate 9 | 10 | epsilon = 1e-5 11 | data = "/media/wangbin/8057840b-9a1e-48c9-aa84-d353a6ba1090/ImageNet_ILSVRC2012/ILSVRC2012" 12 | 13 | model = models.resnet18(pretrained=True) 14 | # merge_model = net_bn_conv_merge.resnet18() 15 | merge_model = net_bn_conv_merge_quantize.resnet18() 16 | state_dict = model.state_dict() 17 | merge_state_dict = merge_model.state_dict() 18 | 19 | # for name in state_dict: 20 | # print(name) 21 | 22 | merge_state_dict.update({"fc.weight": state_dict["fc.weight"], 23 | "fc.bias": state_dict["fc.bias"]}) 24 | del state_dict["fc.weight"] 25 | del state_dict["fc.bias"] 26 | params = np.array(list(state_dict.keys())) 27 | 28 | params = params.reshape((-1, 5)) 29 | for index in range(params.shape[0]): 30 | weight = state_dict[params[index][0]] 31 | gamma = state_dict[params[index][1]] 32 | beta = state_dict[params[index][2]] 33 | running_mean = state_dict[params[index][3]] 34 | running_var = state_dict[params[index][4]] 35 | delta = gamma/(torch.sqrt(running_var+epsilon)) 36 | weight = weight * delta.view(-1, 1, 1, 1) 37 | bias = (0-running_mean) * delta + beta 38 | merge_state_dict.update({params[index][0]: weight, 39 | params[index][0][:-6] + "bias": bias}) 40 | merge_model.load_state_dict(merge_state_dict) 41 | merge_model_name = "resnet18_merge_bn_conv.pth.tar" 42 | torch.save(merge_model.state_dict(), merge_model_name) 43 | 44 | """ 45 | conv1.weight 46 | bn1.weight 47 | bn1.bias 48 | bn1.running_mean 49 | bn1.running_var 50 | layer1.0.conv1.weight 51 | layer1.0.bn1.weight 52 | layer1.0.bn1.bias 53 | layer1.0.bn1.running_mean 54 | layer1.0.bn1.running_var 55 | """ 56 | 57 | # print("bn1.weight: \n", len(state_dict["bn1.weight"]), state_dict["bn1.weight"]) 58 | # print("bn1.bias: \n", len(state_dict["bn1.bias"]), state_dict["bn1.bias"]) 59 | # print("bn1.running_mean: \n", state_dict["bn1.running_mean"]) 60 | # print("bn1.running_val: \n", state_dict["bn1.running_var"]) 61 | 62 | val_loader = load_val_data(data) 63 | evaluate = merge_model_name 64 | if os.path.isfile(evaluate): 65 | print("Loading evaluate model '{}'".format(evaluate)) 66 | checkpoint = torch.load(evaluate) 67 | merge_model.load_state_dict(checkpoint) 68 | print("Loaded evaluate model '{}'".format(evaluate)) 69 | else: 70 | print("No evaluate mode found at '{}'".format(evaluate)) 71 | 72 | merge_model.cuda() 73 | merge_model.eval() 74 | criterion = torch.nn.CrossEntropyLoss().cuda() 75 | validate(merge_model, val_loader, criterion) 76 | -------------------------------------------------------------------------------- /graffiti/nowgood.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from net import net_quantize_guide 3 | from torchvision import models 4 | 5 | 6 | x = torch.ones(5, 3) 7 | bias = torch.ones(5, 1) 8 | bias[0][0] = 4 9 | bias[3][0] = 3 10 | y = x * bias 11 | print(y) -------------------------------------------------------------------------------- /graffiti/quantize_test.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | from net.simple_net import Net 4 | from quantize.quantize_method import QuantizeWeightOrActivation 5 | import torch.nn as nn 6 | import torch.optim as optim 7 | import matplotlib.pyplot as plt 8 | 9 | 10 | def test_quantize_weight(): 11 | qw = QuantizeWeightOrActivation() 12 | 13 | net = Net() 14 | qw.info(net, "初始化权重") 15 | 16 | net.apply(qw.quantize) 17 | qw.info(net, "量化权重") 18 | 19 | # 网络输入, 输出 20 | input_ = torch.ones(1, 1, 6, 6, requires_grad=True) 21 | lable = torch.ones(1, 2) 22 | 23 | optimizer = optim.SGD(net.parameters(), lr=0.01) 24 | criterion = nn.MSELoss() 25 | output = net(input_) 26 | loss = criterion(output, lable) 27 | optimizer.zero_grad() 28 | loss.backward() 29 | print("\nMSE LOSS ", loss, "\n") 30 | 31 | net.apply(qw.restore) 32 | qw.info(net, "恢复全精度权重") 33 | 34 | net.apply(qw.update_grad) 35 | 36 | print("now") 37 | optimizer.step() 38 | qw.info(net, "更新全精度权重") 39 | 40 | 41 | def test_quantize_weight_update(): 42 | qw = QuantizeWeightOrActivation() 43 | 44 | net = Net() 45 | input_ = torch.rand(1, 1, 6, 6, requires_grad=True) 46 | label = torch.ones(1, 2) 47 | optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.99) 48 | criterion = nn.MSELoss() 49 | log = {} 50 | for step in torch.arange(5000): 51 | net.apply(qw.quantize) 52 | output = net(input_) 53 | loss = criterion(output, label) 54 | optimizer.zero_grad() 55 | loss.backward() 56 | # print("loss ", loss.data) 57 | net.apply(qw.restore) 58 | net.apply(qw.update_grad) 59 | optimizer.step() 60 | 61 | log[step] = loss 62 | 63 | plt.axis([0, 5000, 0, 0.1]) 64 | plt.plot(log.values(), "r-") 65 | plt.show() 66 | 67 | 68 | if __name__ == "__main__": 69 | test_quantize_weight_update() -------------------------------------------------------------------------------- /graffiti/register_forward_hook.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torchvision.models as models 4 | import cv2 5 | 6 | IMG_PATH = "/home/wangbin/PycharmProjects/quantizednn/data/smurf.jpeg" 7 | 8 | 9 | def image_read(img_path): 10 | img = cv2.imread(img_path) 11 | img = cv2.resize(img, (224, 224)) 12 | img = img.transpose(2, 0, 1) 13 | img = torch.tensor(img).div(255).sub(0.5).float() 14 | img = torch.unsqueeze(img, 0) 15 | return img 16 | 17 | 18 | def torch_modules(model_): 19 | print("module.modules()\n") 20 | for e in model_.modules(): 21 | print(type(e), e) 22 | 23 | print("modules._modules.keys()\n") 24 | for e in model_._modules.keys(): 25 | print(type(e), e) 26 | 27 | print("modules.children.keys()\n") 28 | for e in model_.children(): 29 | print(type(e), e) 30 | 31 | 32 | def my_hook(m, i, o): 33 | fm[0] = (i[0].data.clone()) 34 | fm[1] = (o.data.clone()) 35 | print('m:', type(m)) 36 | print('i:', type(i)) 37 | print('len(i):', len(i)) 38 | print('i[0]:', type(i[0])) 39 | print('i[0]:', i[0].size()) 40 | print('o:', type(o)) 41 | print() 42 | print('i[0] shape:', i[0].size()) 43 | print('o shape:', o.size()) 44 | 45 | 46 | def my_hook2(m, i, o): 47 | m.register_buffer("layer3", i[0]) 48 | m.register_buffer("layer4", o) 49 | 50 | 51 | if __name__ == "__main__": 52 | image = image_read(IMG_PATH) 53 | model = models.resnet18(pretrained=True) 54 | last = model._modules.get("layer4") 55 | fm = [0, 0] 56 | hook = last.register_forward_hook(my_hook2) 57 | model = torch.nn.DataParallel(model) 58 | model.eval() 59 | pred = model(image) 60 | print(model) 61 | for k, v in model._modules.items(): 62 | print(k, v) 63 | 64 | hook.remove() -------------------------------------------------------------------------------- /graffiti/stat_parameters.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | resnet-18: 4 | layer1.0.conv1.weight 0.003 5 | layer1.0.conv2.weight 0.003 6 | layer1.1.conv1.weight 0.003 7 | layer1.1.conv2.weight 0.003 8 | layer2.0.conv1.weight 0.006 9 | layer2.0.conv2.weight 0.013 10 | layer2.1.conv1.weight 0.013 11 | layer2.1.conv2.weight 0.013 12 | layer3.0.conv1.weight 0.025 13 | layer3.0.conv2.weight 0.050 14 | layer3.0.downsample.0.weight 0.003 15 | layer3.1.conv1.weight 0.050 16 | layer3.1.conv2.weight 0.050 17 | layer4.0.conv1.weight 0.101 18 | layer4.0.conv2.weight 0.202 19 | layer4.0.downsample.0.weight 0.011 20 | layer4.1.conv1.weight 0.202 21 | layer4.1.conv2.weight 0.202 22 | fc.weight 0.044 23 | """ 24 | import torchvision.models as models 25 | 26 | 27 | def num_features(shape): 28 | feature = 1 29 | for dim in shape: 30 | feature *= dim 31 | return feature 32 | 33 | 34 | def total_parameters(state_dict): 35 | count = 0 36 | for value in state_dict.values(): 37 | count += num_features(value.size()) 38 | return count 39 | 40 | 41 | if __name__ == "__main__": 42 | model = models.resnet50() 43 | total = total_parameters(model.state_dict()) 44 | for k, v in model.state_dict().items(): 45 | rate = num_features(v.size())/total 46 | if rate > 0.001: 47 | print("{: <30} {:.3f}".format(k, rate)) 48 | -------------------------------------------------------------------------------- /graffiti/weight_distribute.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torchvision.models as models 4 | from quantize.quantize_method import quantize_weights_bias_tanh 5 | import numpy as np 6 | 7 | 8 | checkpoint = "/home/wangbin/Desktop/uisee/model_quantize/AandW_lr1e-3_step10_epoch35/checkpoint.pth.tar" 9 | 10 | 11 | def weight_decay(): 12 | 13 | """ 14 | random init: 15 | l2_loss: (668.1154) 16 | l2_loss * 1e-4: (0.066812) 17 | 18 | pre-trained 19 | l2_loss: (517.5516) 20 | l2_loss * 1e-4: (0.051755) 21 | """ 22 | 23 | model = models.resnet18() 24 | l2_loss = 0 25 | for i in model.parameters(): 26 | l2_loss += i.norm(p=2) 27 | 28 | print(l2_loss) 29 | print(l2_loss * 1e-4) 30 | 31 | 32 | def quantize_weight_distribute(): 33 | model_checkpoint = torch.load(checkpoint) 34 | state_dict = model_checkpoint['state_dict'] 35 | 36 | for k, v in state_dict.items(): 37 | if k == "module.layer1.1.conv2.weight": 38 | cnts = [0 for _ in range(26)] 39 | v = v.view(-1) 40 | print(v) 41 | v = (quantize_weights_bias_tanh(v) + 1) / 2 * (256 - 1) 42 | print(v.size()) 43 | for ele in v: 44 | cnts[np.abs(int(ele)//10)] += 1 45 | for i in range(26): 46 | print(i, " ", '{:.4f}'.format(cnts[i]/len(v))) 47 | 48 | # 权值越在深层, 方差越小, 越底层, 分布范围越大, 方差越大 49 | """ 50 | conv4.1_layer 51 | 0 0.0000 52 | 1 0.0000 53 | 2 0.0000 54 | 3 0.0000 55 | 4 0.0000 56 | 5 0.0000 57 | 6 0.0000 58 | 7 0.0000 59 | 8 0.0000 60 | 9 0.0000 61 | 10 0.0009 62 | 11 0.0717 63 | 12 0.5933 64 | 13 0.3055 65 | 14 0.0257 66 | 15 0.0022 67 | 16 0.0003 68 | 17 0.0001 69 | 18 0.0000 70 | 19 0.0000 71 | 20 0.0000 72 | 21 0.0000 73 | 22 0.0000 74 | 23 0.0000 75 | 24 0.0000 76 | 25 0.0000 77 | """ 78 | 79 | ''' 80 | conv1.1_layer 81 | 0 0.0001 82 | 1 0.0000 83 | 2 0.0001 84 | 3 0.0002 85 | 4 0.0004 86 | 5 0.0007 87 | 6 0.0019 88 | 7 0.0032 89 | 8 0.0084 90 | 9 0.0204 91 | 10 0.0566 92 | 11 0.1618 93 | 12 0.3274 94 | 13 0.2621 95 | 14 0.1029 96 | 15 0.0341 97 | 16 0.0116 98 | 17 0.0050 99 | 18 0.0019 100 | 19 0.0005 101 | 20 0.0004 102 | 21 0.0002 103 | 22 0.0001 104 | 23 0.0000 105 | 24 0.0000 106 | 25 0.0000 107 | 108 | ''' 109 | 110 | 111 | if __name__ == "__main__": 112 | quantize_weight_distribute() -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | 3 | """ 4 | 1. 量化权重建议: 5 | 1). 学习率最高设置为 0.001, 0.0001可以很快的收敛, 最很好的选择, 训练个 2~5个 epoch 就好 6 | 7 | 2. 权重和激活同时量化注意事项: 8 | 1). 学习率设置不能大于 0.01(学习率最大设置 0.01), 当学习率设置为0.01时, 模型可以很好的微调, 9 | 2). 当学习率设置为 0.1 时, 训练几十个batch之后, 准确率为 千分之一 和 千分之五 10 | 3). 学习率设置为 0.01 时,大约 5~8 epoch降低一次学习率(除以10)比较好, 然后训练大约 30~40 epoch就好 11 | 4). 当学习率设置为 0.001 时, 大约 14~16 epoch 降低一次学习率比较好, 然后训练大约 30~40 epoch就好 12 | 13 | 3. 训练模式(mode): 14 | 0: full precision training from scratch 15 | 1: only quantize_tanh weight 16 | 2. quantize_tanh activation using quantized weight to init model 17 | 3. joint quantize_tanh weight and activation from pre-trained imageNet model 18 | 4. guided quantize_tanh weight and activation from pre-trained imageNet model 19 | 20 | """ 21 | 22 | import argparse 23 | import torchvision.models as models 24 | import warnings 25 | import random 26 | import os 27 | import torch.backends.cudnn as cudnn 28 | import torch.distributed as dist 29 | import torch 30 | import torch.optim 31 | import torch.utils.data 32 | import torch.utils.data.distributed 33 | from utils.train_val import train, save_checkpoint, validate 34 | from utils.data_loader import load_train_data, load_val_data 35 | from quantize import quantize_guided 36 | from quantize.quantize_method import quantize_weights_bias_gemm 37 | from net import net_quantize_activation, net_quantize_weight 38 | from tensorboardX import SummaryWriter 39 | 40 | 41 | model_names = sorted(name for name in models.__dict__ 42 | if name.islower() and not name.startswith("__") 43 | and callable(models.__dict__[name])) 44 | 45 | parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') 46 | 47 | parser.add_argument('--data', metavar='DIR', help='path to dataset', required=True) 48 | parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', 49 | choices=model_names, 50 | help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)') 51 | parser.add_argument('--workers', default=16, type=int, metavar='N', # 修改为电脑cpu支持的线程数 52 | help='number of data loading workers (default: 16)') 53 | parser.add_argument('--epochs', default=35, type=int, metavar='N', 54 | help='number of total epochs to run') 55 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', 56 | help='manual epoch number (useful on restarts)') 57 | parser.add_argument('--batch-size', default=128, type=int, 58 | metavar='N', help='mini-batch size (default: 128)') 59 | 60 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', 61 | help='momentum') 62 | parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float, 63 | metavar='W', help='weight decay (default: 1e-4)') 64 | parser.add_argument('--resume', action='store_true', 65 | help='resume training using save-dir checkpoint (default: False)') 66 | # 如果是验证模型, 设置为True就好, 训练时值为False 67 | parser.add_argument('--evaluate', default='', type=str, 68 | help='evaluate model on validation set') 69 | parser.add_argument('--world-size', default=1, type=int, 70 | help='number of distributed processes') 71 | parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str, 72 | help='url used to set up distributed training') 73 | parser.add_argument('--dist-backend', default='gloo', type=str, 74 | help='distributed backend') 75 | parser.add_argument('--seed', default=None, type=int, 76 | help='seed for initializing training. ') 77 | parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') 78 | 79 | parser.add_argument('--device-ids', default=[0], type=int, nargs='+', 80 | help='GPU ids to be used e.g 0 1 2 3') 81 | parser.add_argument('--weight-quantized', default='', type=str, help="quantize_tanh weight model path") 82 | parser.add_argument('--save-dir', default='model', type=str, help='directory to save trained model', required=True) 83 | parser.add_argument('--mode', default=3, type=int, help='model quantized mode', required=True) 84 | # l1 norm balance 设置为1或者0.1比较好, l2 norm balance 设置为100(~0.034) ~ 500 比较好 85 | parser.add_argument('--norm', default=1, type=int, help='feature map norm, default 1') 86 | parser.add_argument('--balance', default=100, type=float, help='balancing parameter (default: 100)') 87 | # 论文中初始学习率 0.001, 每 10 epoch 除以 10, 这在只量化权重时候可以 88 | # 在同时量化权重和激活时, 当使用0.001时, 我们可以观测到权重的持续上升 89 | # 或许可以将初始学习率调为 0.01, 甚至 0.1 90 | # guidance 方法中, 全精度模型的的学习率要小一些, 模型已经训练的很好了, 微调而已 91 | # 不过来低精度模型的学习率可以调高一点 92 | parser.add_argument('--lr', default=0.001, type=float, # 论文中初始学习率 0.001, 每 10 epoch 除以 10 93 | help='initial learning rate') 94 | parser.add_argument('--rate', default=1, type=int, 95 | help='guide training method, full_lr = low_lr * rate') 96 | 97 | parser.add_argument('--lr-step', default=10, type=int, help='learning rate step scheduler') 98 | 99 | 100 | args = parser.parse_args() 101 | best_prec1 = 0 102 | 103 | 104 | def main(): 105 | global best_prec1 106 | print("\n" 107 | "=> arch {: <20}\n" 108 | "=> init_lr {: <20}\n" 109 | "=> lr-step {: <20}\n" 110 | "=> momentum {: <20}\n" 111 | "=> weight-decay {: <20}\n" 112 | "=> batch-size {: <20}\n" 113 | "=> balance {: <20}\n" 114 | "=> save-dir {: <20}\n".format( 115 | args.arch, args.lr, args.lr_step, args.momentum, args.weight_decay, 116 | args.batch_size, args.balance, args.save_dir)) 117 | 118 | if args.seed is not None: 119 | random.seed(args.seed) 120 | torch.manual_seed(args.seed) 121 | cudnn.deterministic = True 122 | warnings.warn('You have chosen to seed training. This will turn on the CUDNN deterministic setting, ' 123 | 'which can slow down your training considerably!, You may see unexpected behavior' 124 | ' when restarting from checkpoints.') 125 | 126 | # 下面的 warning 可以看出, 如果指定一个 gpu id, 就不会使用多 gpu 训练 127 | if args.gpu is not None: 128 | warnings.warn('You have chosen a specific GPU, This will completely disable data parallelism.') 129 | 130 | # 多机器训练而不是一机多卡(集群训练模式) 131 | args.distributed = args.world_size > 1 132 | if args.distributed: 133 | dist.init_process_group(backend=args.dist_backend, 134 | init_method=args.dist_url, 135 | world_size=args.world_size) 136 | 137 | # 根据训练模式加载训练模型 138 | if args.mode == 0: 139 | print("=> training mode {}: full precision training from scratch\n".format(args.mode)) 140 | model = models.__dict__[args.arch]() 141 | 142 | elif args.mode == 1: 143 | print("=> training mode {}: quantize weight only\n".format(args.mode)) 144 | print("=> loading imageNet pre-trained model {}".format(args.arch)) 145 | model = net_quantize_weight.__dict__[args.arch]() 146 | model_dict = model.state_dict() 147 | init_model = models.__dict__[args.arch](pretrained=True) 148 | model_dict.update(init_model.state_dict()) 149 | model.load_state_dict(model_dict) 150 | print("=> loaded imageNet pre-trained model {}".format(args.arch)) 151 | 152 | elif args.mode == 2: 153 | print("=> training mode {}: quantize activation using quantized weight\n".format(args.mode)) 154 | model = net_quantize_activation.__dict__[args.arch]() 155 | if os.path.isfile(args.weight_quantized): 156 | print("=> loading weight quantized model '{}'".format(args.weight_quantized)) 157 | model_dict = model.state_dict() 158 | quantized_model = torch.load(args.weight_quantized) 159 | init_dict = {} 160 | for k, v in quantized_model['state_dict'].items(): 161 | if k in model.state_dict(): 162 | if k.find("conv") != -1 or k.find("fc") != -1: 163 | init_dict[k[7:]] = quantize_weights_bias_gemm(v) 164 | else: 165 | init_dict[k[7:]] = v 166 | 167 | model_dict.update(init_dict) 168 | model.load_state_dict(model_dict) 169 | print("=> loaded weight_quantized '{}'".format(args.weight_quantized)) 170 | else: 171 | warnings.warn("=> no weight quantized model found at '{}'".format(args.weight_quantized)) 172 | return 173 | 174 | elif args.mode == 3: 175 | print("=> training mode {}: quantize weight and activation simultaneously\n".format(args.mode)) 176 | print("=> loading imageNet pre-trained model '{}'".format(args.arch)) 177 | # 使用预训练的ResNet18来初始化同时量化网络权重和激活 178 | model = net_quantize_activation.__dict__[args.arch]() 179 | # 获取预训练模型参数 180 | model_dict = model.state_dict() 181 | init_model = models.__dict__[args.arch](pretrained=True) 182 | init_dict = {k: v for k, v in init_model.state_dict().items() if k in model_dict} 183 | model_dict.update(init_dict) 184 | model.load_state_dict(model_dict) 185 | 186 | elif args.mode == 4: 187 | print("=> Training mode {}: guided quantize weight and activation " 188 | "from pre-trained imageNet model {}\n ".format(args.mode, args.arch)) 189 | 190 | # quantize_guided.guided(args) 191 | quantize_guided.guided(args) 192 | return 193 | else: 194 | raise Exception("invalid mode, valid mode is 0~4!!") 195 | 196 | if args.gpu is not None: # 指定GPU 197 | model = model.cuda(args.gpu) 198 | elif args.distributed: # 集群训练(多机器) 199 | model.cuda() 200 | model = torch.nn.parallel.DistributedDataParallel(model) 201 | else: # 单机训练(单卡或者多卡) 202 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): 203 | model.features = torch.nn.DataParallel(model.features) 204 | model.cuda() 205 | else: 206 | # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids 207 | """ 208 | list(model.state_dict().keys())[0] 209 | model 在使用 torch.nn.DataParallel 之前每层的名字, 如 conv1.weight 210 | model 在使用 torch.nn.DataParallel 之后每层的名字, 如 module.conv1.weight 211 | 如果训练使用并行化, 而验证使用指定GPU的话就会出现问题, 所以需要在指定GPU代码中,添加解决冲突的代码 212 | """ 213 | model = torch.nn.DataParallel(model, args.device_ids).cuda() 214 | 215 | criterion = torch.nn.CrossEntropyLoss().cuda(args.gpu) 216 | optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) 217 | # optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 218 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_step) 219 | 220 | # optionally resume from a checkpoint 221 | if args.resume: 222 | print("\n=> resume training from checkpoint") 223 | checkpoint_filename = os.path.join(args.save_dir, "checkpoint.pth.tar") 224 | 225 | if os.path.isfile(checkpoint_filename): 226 | print("=> loading checkpoint '{}'".format(checkpoint_filename)) 227 | checkpoint = torch.load(checkpoint_filename) 228 | args.start_epoch = checkpoint['epoch'] 229 | best_prec1 = checkpoint['best_prec1'] 230 | model.load_state_dict(checkpoint['state_dict']) 231 | optimizer.load_state_dict(checkpoint['optimizer']) 232 | print("=> loaded checkpoint '{}' (epoch {})" 233 | .format(checkpoint_filename, checkpoint['epoch'])) 234 | else: 235 | print("=> no checkpoint found at '{}'".format(checkpoint_filename)) 236 | 237 | cudnn.benchmark = True 238 | 239 | val_loader = load_val_data(args.data, args.batch_size, args.workers) 240 | 241 | if args.evaluate: 242 | if os.path.isfile(args.evaluate): 243 | print("Loading evaluate model '{}'".format(args.evaluate)) 244 | checkpoint = torch.load(args.evaluate) 245 | if "state_dict" in checkpoint.keys(): 246 | model.load_state_dict(checkpoint['state_dict']) 247 | print("epoch: {} ".format(checkpoint['epoch'])) 248 | else: 249 | checkpoint = {''.join(("module.", k)): v for k, v in checkpoint.items() if not k.startswith("module")} 250 | model.load_state_dict(checkpoint) 251 | print("Loaded evaluate model '{}'".format(args.evaluate)) 252 | else: 253 | print("No evaluate mode found at '{}'".format(args.evaluate)) 254 | return 255 | validate(model, val_loader, criterion, args.gpu) 256 | return 257 | 258 | train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed) 259 | 260 | summary_writer = SummaryWriter(args.save_dir) 261 | for epoch in range(args.start_epoch, args.epochs): 262 | if args.distributed: 263 | train_sampler.set_epoch(epoch) 264 | lr_scheduler.step() 265 | 266 | # train for one epoch 267 | train(model, train_loader, criterion, optimizer, args.gpu, epoch, summary_writer) 268 | 269 | # evaluate on validation set 270 | prec1 = validate(model, val_loader, criterion, args.gpu, epoch, summary_writer) 271 | 272 | # remember best prec@1 and save checkpoint 273 | is_best = prec1 > best_prec1 274 | best_prec1 = max(prec1, best_prec1) 275 | save_checkpoint({ 276 | 'epoch': epoch+1, 277 | 'arch': args.arch, 278 | 'state_dict': model.state_dict(), 279 | 'best_prec1': best_prec1, 280 | 'optimizer': optimizer.state_dict(), 281 | }, is_best, args.save_dir) 282 | 283 | summary_writer.close() 284 | 285 | 286 | if __name__ == '__main__': 287 | main() -------------------------------------------------------------------------------- /net/net_bn_conv_merge.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch.nn as nn 3 | import math 4 | import torch.utils.model_zoo as model_zoo 5 | 6 | """ 7 | 网络修改步骤; 8 | 1. 将卷积层的 bias 设置为 True 9 | 2. 将 bn 层删掉 10 | """ 11 | 12 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 13 | 'resnet152'] 14 | 15 | 16 | model_urls = { 17 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 18 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 19 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 20 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 21 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 22 | } 23 | 24 | 25 | def conv3x3(in_planes, out_planes, stride=1): 26 | """3x3 convolution with padding""" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 28 | padding=1, bias=True) 29 | 30 | 31 | class BasicBlock(nn.Module): 32 | expansion = 1 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None): 35 | super(BasicBlock, self).__init__() 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3(planes, planes) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | residual = x 44 | 45 | out = self.conv1(x) 46 | out = self.relu(out) 47 | 48 | out = self.conv2(out) 49 | 50 | if self.downsample is not None: 51 | residual = self.downsample(x) 52 | 53 | out += residual 54 | out = self.relu(out) 55 | 56 | return out 57 | 58 | 59 | class Bottleneck(nn.Module): 60 | expansion = 4 61 | 62 | def __init__(self, inplanes, planes, stride=1, downsample=None): 63 | super(Bottleneck, self).__init__() 64 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=True) 65 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 66 | padding=1, bias=True) 67 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=True) 68 | self.relu = nn.ReLU(inplace=True) 69 | self.downsample = downsample 70 | self.stride = stride 71 | 72 | def forward(self, x): 73 | residual = x 74 | 75 | out = self.conv1(x) 76 | out = self.relu(out) 77 | 78 | out = self.conv2(out) 79 | out = self.relu(out) 80 | 81 | out = self.conv3(out) 82 | 83 | if self.downsample is not None: 84 | residual = self.downsample(x) 85 | 86 | out += residual 87 | out = self.relu(out) 88 | 89 | return out 90 | 91 | 92 | class ResNet(nn.Module): 93 | 94 | def __init__(self, block, layers, num_classes=1000): 95 | self.inplanes = 64 96 | super(ResNet, self).__init__() 97 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 98 | bias=True) 99 | self.relu = nn.ReLU(inplace=True) 100 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 101 | self.layer1 = self._make_layer(block, 64, layers[0]) 102 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 103 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 104 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 105 | self.avgpool = nn.AvgPool2d(7, stride=1) 106 | self.fc = nn.Linear(512 * block.expansion, num_classes) 107 | 108 | for m in self.modules(): 109 | if isinstance(m, nn.Conv2d): 110 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 111 | m.weight.data.normal_(0, math.sqrt(2. / n)) 112 | elif isinstance(m, nn.BatchNorm2d): 113 | m.weight.data.fill_(1) 114 | m.bias.data.zero_() 115 | 116 | def _make_layer(self, block, planes, blocks, stride=1): 117 | downsample = None 118 | if stride != 1 or self.inplanes != planes * block.expansion: 119 | downsample = nn.Sequential( 120 | nn.Conv2d(self.inplanes, planes * block.expansion, 121 | kernel_size=1, stride=stride, bias=True), 122 | ) 123 | 124 | layers = [] 125 | layers.append(block(self.inplanes, planes, stride, downsample)) 126 | self.inplanes = planes * block.expansion 127 | for i in range(1, blocks): 128 | layers.append(block(self.inplanes, planes)) 129 | 130 | return nn.Sequential(*layers) 131 | 132 | def forward(self, x): 133 | x = self.conv1(x) 134 | x = self.relu(x) 135 | x = self.maxpool(x) 136 | 137 | x = self.layer1(x) 138 | x = self.layer2(x) 139 | x = self.layer3(x) 140 | x = self.layer4(x) 141 | 142 | x = self.avgpool(x) 143 | x = x.view(x.size(0), -1) 144 | x = self.fc(x) 145 | 146 | return x 147 | 148 | 149 | def resnet18(pretrained=False, **kwargs): 150 | """Constructs a ResNet-18 model. 151 | 152 | Args: 153 | pretrained (bool): If True, returns a model pre-trained on ImageNet 154 | """ 155 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 156 | if pretrained: 157 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 158 | return model 159 | 160 | 161 | def resnet34(pretrained=False, **kwargs): 162 | """Constructs a ResNet-34 model. 163 | 164 | Args: 165 | pretrained (bool): If True, returns a model pre-trained on ImageNet 166 | """ 167 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 168 | if pretrained: 169 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 170 | return model 171 | 172 | 173 | def resnet50(pretrained=False, **kwargs): 174 | """Constructs a ResNet-50 model. 175 | 176 | Args: 177 | pretrained (bool): If True, returns a model pre-trained on ImageNet 178 | """ 179 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 180 | if pretrained: 181 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 182 | return model 183 | 184 | 185 | def resnet101(pretrained=False, **kwargs): 186 | """Constructs a ResNet-101 model. 187 | 188 | Args: 189 | pretrained (bool): If True, returns a model pre-trained on ImageNet 190 | """ 191 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 192 | if pretrained: 193 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 194 | return model 195 | 196 | 197 | def resnet152(pretrained=False, **kwargs): 198 | """Constructs a ResNet-152 model. 199 | 200 | Args: 201 | pretrained (bool): If True, returns a model pre-trained on ImageNet 202 | """ 203 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 204 | if pretrained: 205 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 206 | return model 207 | -------------------------------------------------------------------------------- /net/net_bn_conv_merge_quantize.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用QWACvon2D 4 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升 5 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化?? 6 | """ 7 | import torch.nn as nn 8 | import math 9 | import torch.utils.model_zoo as model_zoo 10 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar 11 | 12 | 13 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 14 | 'resnet152'] 15 | 16 | 17 | model_urls = { 18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 23 | } 24 | 25 | 26 | def conv3x3(in_planes, out_planes, stride=1): 27 | """3x3 convolution with padding""" 28 | return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True) 29 | 30 | 31 | class BasicBlock(nn.Module): 32 | expansion = 1 33 | 34 | def __init__(self, inplanes, planes, stride=1, downsample=None): 35 | super(BasicBlock, self).__init__() 36 | self.conv1 = conv3x3(inplanes, planes, stride) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3(planes, planes) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | residual = x 44 | out = self.conv1(x) 45 | out = self.relu(out) 46 | 47 | out = self.conv2(out) 48 | 49 | if self.downsample is not None: 50 | residual = self.downsample(x) 51 | 52 | out += residual 53 | out = self.relu(out) 54 | 55 | return out 56 | 57 | 58 | class Bottleneck(nn.Module): 59 | expansion = 4 60 | 61 | def __init__(self, inplanes, planes, stride=1, downsample=None): 62 | super(Bottleneck, self).__init__() 63 | self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=True) 64 | self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride, 65 | padding=1, bias=True) 66 | self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=True) 67 | self.relu = nn.ReLU(inplace=True) 68 | self.downsample = downsample 69 | self.stride = stride 70 | 71 | def forward(self, x): 72 | residual = x 73 | 74 | out = self.conv1(x) 75 | out = self.relu(out) 76 | 77 | out = self.conv2(out) 78 | out = self.relu(out) 79 | 80 | out = self.conv3(out) 81 | 82 | if self.downsample is not None: 83 | residual = self.downsample(x) 84 | 85 | out += residual 86 | out = self.relu(out) 87 | 88 | return out 89 | 90 | 91 | class ResNet(nn.Module): 92 | 93 | def __init__(self, qblock, layers, num_classes=1000): 94 | self.inplanes = 64 95 | super(ResNet, self).__init__() 96 | self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3, 97 | bias=True) 98 | self.relu = nn.ReLU(inplace=True) 99 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 100 | self.layer1 = self._make_layer(qblock, 64, layers[0]) 101 | self.layer2 = self._make_layer(qblock, 128, layers[1], stride=2) 102 | self.layer3 = self._make_layer(qblock, 256, layers[2], stride=2) 103 | self.layer4 = self._make_layer(qblock, 512, layers[3], stride=2) 104 | self.avgpool = nn.AvgPool2d(7, stride=1) 105 | self.fc = QWALinear(512 * qblock.expansion, num_classes) # 修改 106 | self.scalar = Scalar() # 修改 107 | 108 | for m in self.modules(): 109 | if isinstance(m, nn.Conv2d): 110 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 111 | m.weight.data.normal_(0, math.sqrt(2. / n)) 112 | elif isinstance(m, nn.BatchNorm2d): 113 | m.weight.data.fill_(1) 114 | m.bias.data.zero_() 115 | 116 | def _make_layer(self, block, planes, blocks, stride=1): 117 | downsample = None 118 | if stride != 1 or self.inplanes != planes * block.expansion: 119 | downsample = nn.Sequential( 120 | QWAConv2D(self.inplanes, planes * block.expansion, 121 | kernel_size=1, stride=stride, bias=True), 122 | ) 123 | 124 | layers = [] 125 | layers.append(block(self.inplanes, planes, stride, downsample)) 126 | self.inplanes = planes * block.expansion 127 | for i in range(1, blocks): 128 | layers.append(block(self.inplanes, planes)) 129 | 130 | return nn.Sequential(*layers) 131 | 132 | def forward(self, x): 133 | x = self.conv1(x) 134 | x = self.relu(x) 135 | x = self.maxpool(x) 136 | 137 | x = self.layer1(x) 138 | x = self.layer2(x) 139 | x = self.layer3(x) 140 | x = self.layer4(x) 141 | 142 | x = self.avgpool(x) 143 | x = x.view(x.size(0), -1) 144 | x = self.fc(x) 145 | x = self.scalar(x) # 修改 146 | 147 | return x 148 | 149 | 150 | def resnet18(pretrained=False, **kwargs): 151 | """Constructs a ResNet-18 model. 152 | 153 | Args: 154 | pretrained (bool): If True, returns a model pre-trained on ImageNet 155 | """ 156 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 157 | if pretrained: 158 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 159 | return model 160 | 161 | 162 | def resnet34(pretrained=False, **kwargs): 163 | """Constructs a ResNet-34 model. 164 | 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 169 | if pretrained: 170 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 171 | return model 172 | 173 | 174 | def resnet50(pretrained=False, **kwargs): 175 | """Constructs a ResNet-50 model. 176 | 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 181 | if pretrained: 182 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 183 | return model 184 | 185 | 186 | def resnet101(pretrained=False, **kwargs): 187 | """Constructs a ResNet-101 model. 188 | 189 | Args: 190 | pretrained (bool): If True, returns a model pre-trained on ImageNet 191 | """ 192 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 193 | if pretrained: 194 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 195 | return model 196 | 197 | 198 | def resnet152(pretrained=False, **kwargs): 199 | """Constructs a ResNet-152 model. 200 | 201 | Args: 202 | pretrained (bool): If True, returns a model pre-trained on ImageNet 203 | """ 204 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 205 | if pretrained: 206 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 207 | return model 208 | -------------------------------------------------------------------------------- /net/net_quantize_activation.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用QWACvon2D 4 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升 5 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化?? 6 | """ 7 | import torch.nn as nn 8 | import math 9 | import torch.utils.model_zoo as model_zoo 10 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar 11 | 12 | 13 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 14 | 'resnet152'] 15 | 16 | 17 | model_urls = { 18 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 19 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 20 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 21 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 22 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 23 | } 24 | 25 | 26 | def conv3x3(in_planes, out_planes, stride=1): 27 | """3x3 convolution with padding""" 28 | return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride, 29 | padding=1, bias=False) 30 | 31 | 32 | class BasicBlock(nn.Module): 33 | expansion = 1 34 | 35 | def __init__(self, inplanes, planes, stride=1, downsample=None): 36 | super(BasicBlock, self).__init__() 37 | self.conv1 = conv3x3(inplanes, planes, stride) 38 | self.bn1 = nn.BatchNorm2d(planes) 39 | self.relu = nn.ReLU(inplace=True) 40 | self.conv2 = conv3x3(planes, planes) 41 | self.bn2 = nn.BatchNorm2d(planes) 42 | self.downsample = downsample 43 | self.stride = stride 44 | 45 | def forward(self, x): 46 | residual = x 47 | out = self.conv1(x) 48 | out = self.bn1(out) 49 | out = self.relu(out) 50 | 51 | out = self.conv2(out) 52 | out = self.bn2(out) 53 | 54 | if self.downsample is not None: 55 | residual = self.downsample(x) 56 | 57 | out += residual 58 | out = self.relu(out) 59 | 60 | return out 61 | 62 | 63 | class Bottleneck(nn.Module): 64 | expansion = 4 65 | 66 | def __init__(self, inplanes, planes, stride=1, downsample=None): 67 | super(Bottleneck, self).__init__() 68 | self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=False) 69 | self.bn1 = nn.BatchNorm2d(planes) 70 | self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride, 71 | padding=1, bias=False) 72 | self.bn2 = nn.BatchNorm2d(planes) 73 | self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=False) 74 | self.bn3 = nn.BatchNorm2d(planes * 4) 75 | self.relu = nn.ReLU(inplace=True) 76 | self.downsample = downsample 77 | self.stride = stride 78 | 79 | def forward(self, x): 80 | residual = x 81 | 82 | out = self.conv1(x) 83 | out = self.bn1(out) 84 | out = self.relu(out) 85 | 86 | out = self.conv2(out) 87 | out = self.bn2(out) 88 | out = self.relu(out) 89 | 90 | out = self.conv3(out) 91 | out = self.bn3(out) 92 | 93 | if self.downsample is not None: 94 | residual = self.downsample(x) 95 | 96 | out += residual 97 | out = self.relu(out) 98 | 99 | return out 100 | 101 | 102 | class ResNet(nn.Module): 103 | 104 | def __init__(self, qblock, layers, num_classes=1000): 105 | self.inplanes = 64 106 | super(ResNet, self).__init__() 107 | self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3, 108 | bias=False) 109 | self.bn1 = nn.BatchNorm2d(64) 110 | self.relu = nn.ReLU(inplace=True) 111 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 112 | self.layer1 = self._make_layer(qblock, 64, layers[0]) 113 | self.layer2 = self._make_layer(qblock, 128, layers[1], stride=2) 114 | self.layer3 = self._make_layer(qblock, 256, layers[2], stride=2) 115 | self.layer4 = self._make_layer(qblock, 512, layers[3], stride=2) 116 | self.avgpool = nn.AvgPool2d(7, stride=1) 117 | self.fc = QWALinear(512 * qblock.expansion, num_classes) # 修改 118 | self.scalar = Scalar() # 修改 119 | 120 | for m in self.modules(): 121 | if isinstance(m, nn.Conv2d): 122 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 123 | m.weight.data.normal_(0, math.sqrt(2. / n)) 124 | elif isinstance(m, nn.BatchNorm2d): 125 | m.weight.data.fill_(1) 126 | m.bias.data.zero_() 127 | 128 | def _make_layer(self, block, planes, blocks, stride=1): 129 | downsample = None 130 | if stride != 1 or self.inplanes != planes * block.expansion: 131 | downsample = nn.Sequential( 132 | QWAConv2D(self.inplanes, planes * block.expansion, 133 | kernel_size=1, stride=stride, bias=False), 134 | nn.BatchNorm2d(planes * block.expansion), 135 | ) 136 | 137 | layers = [] 138 | layers.append(block(self.inplanes, planes, stride, downsample)) 139 | self.inplanes = planes * block.expansion 140 | for i in range(1, blocks): 141 | layers.append(block(self.inplanes, planes)) 142 | 143 | return nn.Sequential(*layers) 144 | 145 | def forward(self, x): 146 | x = self.conv1(x) 147 | x = self.bn1(x) 148 | x = self.relu(x) 149 | x = self.maxpool(x) 150 | 151 | x = self.layer1(x) 152 | x = self.layer2(x) 153 | x = self.layer3(x) 154 | x = self.layer4(x) 155 | 156 | x = self.avgpool(x) 157 | x = x.view(x.size(0), -1) 158 | x = self.fc(x) 159 | x = self.scalar(x) # 修改 160 | 161 | return x 162 | 163 | 164 | def resnet18(pretrained=False, **kwargs): 165 | """Constructs a ResNet-18 model. 166 | 167 | Args: 168 | pretrained (bool): If True, returns a model pre-trained on ImageNet 169 | """ 170 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 171 | if pretrained: 172 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 173 | return model 174 | 175 | 176 | def resnet34(pretrained=False, **kwargs): 177 | """Constructs a ResNet-34 model. 178 | 179 | Args: 180 | pretrained (bool): If True, returns a model pre-trained on ImageNet 181 | """ 182 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 183 | if pretrained: 184 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 185 | return model 186 | 187 | 188 | def resnet50(pretrained=False, **kwargs): 189 | """Constructs a ResNet-50 model. 190 | 191 | Args: 192 | pretrained (bool): If True, returns a model pre-trained on ImageNet 193 | """ 194 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 195 | if pretrained: 196 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 197 | return model 198 | 199 | 200 | def resnet101(pretrained=False, **kwargs): 201 | """Constructs a ResNet-101 model. 202 | 203 | Args: 204 | pretrained (bool): If True, returns a model pre-trained on ImageNet 205 | """ 206 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 207 | if pretrained: 208 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 209 | return model 210 | 211 | 212 | def resnet152(pretrained=False, **kwargs): 213 | """Constructs a ResNet-152 model. 214 | 215 | Args: 216 | pretrained (bool): If True, returns a model pre-trained on ImageNet 217 | """ 218 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 219 | if pretrained: 220 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 221 | return model 222 | -------------------------------------------------------------------------------- /net/net_quantize_guide.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch.nn as nn 3 | import math 4 | import torch.utils.model_zoo as model_zoo 5 | from quantize.quantize_module_ import QWConv2D, QWAConv2D, QWALinear, Scalar 6 | from quantize.quantize_method import quantize_activations_gemm 7 | 8 | """ 9 | guide 两个模型一起训练的两种思路, 10 | 1. 将两个模型分别训练, 然后提取中间层的 feature map, 计算 distance 11 | 2. 讲这两个模型写成一个网络, 一起训练, 加载参数时, 一起加载, 然后提取训练好的低精度模型的参数 12 | """ 13 | 14 | """ 15 | 1. 将卷积层, 除了第一层使用 QWConv2D(不量化输出, 不然性能下降10个百分点), 全部使用 QWACvon2D 16 | 2. 线性层全部使用 QWALinear, 线性层所占的参数比例在 resnet18中占据 4.4%, resnet50中占据 8%, 不量化的话会有大约 0.4个百分点的性能提升 17 | 3. 在全连接层送入 softmax 之前, 加一个标量层, 做 softmax 的软化?? 18 | """ 19 | 20 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 21 | 'resnet152'] 22 | 23 | 24 | model_urls = { 25 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 26 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 27 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 28 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 29 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 30 | } 31 | 32 | 33 | def qconv3x3(in_planes, out_planes, stride=1): 34 | """3x3 convolution with padding""" 35 | return QWAConv2D(in_planes, out_planes, kernel_size=3, stride=stride, 36 | padding=1, bias=False) 37 | 38 | 39 | def conv3x3(in_planes, out_planes, stride=1): 40 | """3x3 convolution with padding""" 41 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 42 | padding=1, bias=False) 43 | 44 | 45 | class ResNet(nn.Module): 46 | 47 | def __init__(self, block, layers, num_classes=1000): 48 | self.inplanes = 64 49 | super(ResNet, self).__init__() 50 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 51 | bias=False) 52 | self.bn1 = nn.BatchNorm2d(64) 53 | self.relu = nn.ReLU(inplace=True) 54 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 55 | self.layer1 = self._make_layer(block, 64, layers[0]) 56 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 57 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 58 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 59 | self.avgpool = nn.AvgPool2d(7, stride=1) 60 | self.fc = nn.Linear(512 * block.expansion, num_classes) 61 | 62 | for m in self.modules(): 63 | if isinstance(m, nn.Conv2d): 64 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 65 | m.weight.data.normal_(0, math.sqrt(2. / n)) 66 | elif isinstance(m, nn.BatchNorm2d): 67 | m.weight.data.fill_(1) 68 | m.bias.data.zero_() 69 | 70 | def _make_layer(self, block, planes, blocks, stride=1): 71 | downsample = None 72 | if stride != 1 or self.inplanes != planes * block.expansion: 73 | downsample = nn.Sequential( 74 | nn.Conv2d(self.inplanes, planes * block.expansion, 75 | kernel_size=1, stride=stride, bias=False), 76 | nn.BatchNorm2d(planes * block.expansion), 77 | ) 78 | 79 | layers = [] 80 | layers.append(block(self.inplanes, planes, stride, downsample)) 81 | self.inplanes = planes * block.expansion 82 | for i in range(1, blocks): 83 | layers.append(block(self.inplanes, planes)) 84 | 85 | return nn.Sequential(*layers) 86 | 87 | def forward(self, x): 88 | x = self.conv1(x) 89 | x = self.bn1(x) 90 | x = self.relu(x) 91 | x = self.maxpool(x) 92 | 93 | x = self.layer1(x) 94 | x = self.layer2(x) 95 | x = self.layer3(x) 96 | x = self.layer4(x) 97 | 98 | x = self.avgpool(x) 99 | x = x.view(x.size(0), -1) 100 | x = self.fc(x) 101 | 102 | return x 103 | 104 | 105 | class BasicBlock(nn.Module): 106 | expansion = 1 107 | 108 | def __init__(self, inplanes, planes, stride=1, downsample=None): 109 | super(BasicBlock, self).__init__() 110 | self.conv1 = conv3x3(inplanes, planes, stride) 111 | self.bn1 = nn.BatchNorm2d(planes) 112 | self.relu = nn.ReLU(inplace=True) 113 | self.conv2 = conv3x3(planes, planes) 114 | self.bn2 = nn.BatchNorm2d(planes) 115 | self.downsample = downsample 116 | self.stride = stride 117 | 118 | def forward(self, x): 119 | residual = x 120 | 121 | out = self.conv1(x) 122 | out = self.bn1(out) 123 | out = self.relu(out) 124 | 125 | out = self.conv2(out) 126 | out = self.bn2(out) 127 | 128 | if self.downsample is not None: 129 | residual = self.downsample(x) 130 | 131 | out += residual 132 | out = self.relu(out) 133 | 134 | return out 135 | 136 | 137 | class QBasicBlock(nn.Module): 138 | expansion = 1 139 | 140 | def __init__(self, inplanes, planes, stride=1, downsample=None): 141 | super(QBasicBlock, self).__init__() 142 | self.conv1 = qconv3x3(inplanes, planes, stride) 143 | self.bn1 = nn.BatchNorm2d(planes) 144 | self.relu = nn.ReLU(inplace=True) 145 | self.conv2 = qconv3x3(planes, planes) 146 | self.bn2 = nn.BatchNorm2d(planes) 147 | self.downsample = downsample 148 | self.stride = stride 149 | 150 | def forward(self, x): 151 | residual = x 152 | out = self.conv1(x) 153 | out = self.bn1(out) 154 | out = self.relu(out) 155 | 156 | out = self.conv2(out) 157 | out = self.bn2(out) 158 | 159 | if self.downsample is not None: 160 | residual = self.downsample(x) 161 | 162 | out += residual 163 | out = self.relu(out) 164 | 165 | return out 166 | 167 | 168 | class Bottleneck(nn.Module): 169 | expansion = 4 170 | 171 | def __init__(self, inplanes, planes, stride=1, downsample=None): 172 | super(Bottleneck, self).__init__() 173 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 174 | self.bn1 = nn.BatchNorm2d(planes) 175 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, 176 | padding=1, bias=False) 177 | self.bn2 = nn.BatchNorm2d(planes) 178 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 179 | self.bn3 = nn.BatchNorm2d(planes * 4) 180 | self.relu = nn.ReLU(inplace=True) 181 | self.downsample = downsample 182 | self.stride = stride 183 | 184 | def forward(self, x): 185 | residual = x 186 | 187 | out = self.conv1(x) 188 | out = self.bn1(out) 189 | out = self.relu(out) 190 | 191 | out = self.conv2(out) 192 | out = self.bn2(out) 193 | out = self.relu(out) 194 | 195 | out = self.conv3(out) 196 | out = self.bn3(out) 197 | 198 | if self.downsample is not None: 199 | residual = self.downsample(x) 200 | 201 | out += residual 202 | out = self.relu(out) 203 | 204 | return out 205 | 206 | 207 | class QBottleneck(nn.Module): 208 | expansion = 4 209 | 210 | def __init__(self, inplanes, planes, stride=1, downsample=None): 211 | super(QBottleneck, self).__init__() 212 | self.conv1 = QWAConv2D(inplanes, planes, kernel_size=1, bias=False) 213 | self.bn1 = nn.BatchNorm2d(planes) 214 | self.conv2 = QWAConv2D(planes, planes, kernel_size=3, stride=stride, 215 | padding=1, bias=False) 216 | self.bn2 = nn.BatchNorm2d(planes) 217 | self.conv3 = QWAConv2D(planes, planes * 4, kernel_size=1, bias=False) 218 | self.bn3 = nn.BatchNorm2d(planes * 4) 219 | self.relu = nn.ReLU(inplace=True) 220 | self.downsample = downsample 221 | self.stride = stride 222 | 223 | def forward(self, x): 224 | residual = x 225 | 226 | out = self.conv1(x) 227 | out = self.bn1(out) 228 | out = self.relu(out) 229 | 230 | out = self.conv2(out) 231 | out = self.bn2(out) 232 | out = self.relu(out) 233 | 234 | out = self.conv3(out) 235 | out = self.bn3(out) 236 | 237 | if self.downsample is not None: 238 | residual = self.downsample(x) 239 | 240 | out += residual 241 | out = self.relu(out) 242 | 243 | return out 244 | 245 | 246 | class ResNet(nn.Module): 247 | 248 | def __init__(self, qblock, block, layers, num_classes=1000): 249 | self.inplanes = 64 250 | super(ResNet, self).__init__() 251 | self.qconv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3, 252 | bias=False) 253 | self.qbn1 = nn.BatchNorm2d(64) 254 | self.qrelu = nn.ReLU(inplace=True) 255 | self.qmaxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 256 | self.qlayer1 = self._qmake_layer(qblock, 64, layers[0]) 257 | self.qlayer2 = self._qmake_layer(qblock, 128, layers[1], stride=2) 258 | self.qlayer3 = self._qmake_layer(qblock, 256, layers[2], stride=2) 259 | self.qlayer4 = self._qmake_layer(qblock, 512, layers[3], stride=2) 260 | self.qavgpool = nn.AvgPool2d(7, stride=1) 261 | self.qfc = QWALinear(512 * qblock.expansion, num_classes) # 修改 262 | self.scalar = Scalar() # 修改 263 | 264 | # 全精度的 module 265 | self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, 266 | bias=False) 267 | self.bn1 = nn.BatchNorm2d(64) 268 | self.relu = nn.ReLU(inplace=True) 269 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 270 | self.layer1 = self._make_layer(block, 64, layers[0]) 271 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 272 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 273 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 274 | self.avgpool = nn.AvgPool2d(7, stride=1) 275 | self.fc = nn.Linear(512 * block.expansion, num_classes) 276 | 277 | for m in self.modules(): 278 | if isinstance(m, nn.Conv2d): 279 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 280 | m.weight.data.normal_(0, math.sqrt(2. / n)) 281 | elif isinstance(m, nn.BatchNorm2d): 282 | m.weight.data.fill_(1) 283 | m.bias.data.zero_() 284 | 285 | def _qmake_layer(self, block, planes, blocks, stride=1): 286 | downsample = None 287 | if stride != 1 or self.inplanes != planes * block.expansion: 288 | downsample = nn.Sequential( 289 | QWAConv2D(self.inplanes, planes * block.expansion, 290 | kernel_size=1, stride=stride, bias=False), 291 | nn.BatchNorm2d(planes * block.expansion), 292 | ) 293 | 294 | layers = [] 295 | layers.append(block(self.inplanes, planes, stride, downsample)) 296 | self.inplanes = planes * block.expansion 297 | for i in range(1, blocks): 298 | layers.append(block(self.inplanes, planes)) 299 | 300 | return nn.Sequential(*layers) 301 | 302 | def _make_layer(self, block, planes, blocks, stride=1): 303 | downsample = None 304 | if stride != 1 or self.inplanes != planes * block.expansion: 305 | downsample = nn.Sequential( 306 | nn.Conv2d(self.inplanes, planes * block.expansion, 307 | kernel_size=1, stride=stride, bias=False), 308 | nn.BatchNorm2d(planes * block.expansion), 309 | ) 310 | 311 | layers = [] 312 | layers.append(block(self.inplanes, planes, stride, downsample)) 313 | self.inplanes = planes * block.expansion 314 | for i in range(1, blocks): 315 | layers.append(block(self.inplanes, planes)) 316 | 317 | return nn.Sequential(*layers) 318 | 319 | @staticmethod 320 | def num_features(self, fm_shape): 321 | num = 1 322 | for dim in fm_shape: 323 | num *= dim 324 | return num 325 | 326 | def forward(self, x): 327 | qx = x 328 | qx = self.qconv1(qx) 329 | qx = self.qbn1(qx) 330 | qx = self.qrelu(qx) 331 | qx = self.qmaxpool(qx) 332 | 333 | qx = self.qlayer1(qx) 334 | qx = self.qlayer2(qx) 335 | ql3 = self.qlayer3(qx) 336 | ql4 = self.qlayer4(ql3) 337 | 338 | qx = self.qavgpool(ql4) 339 | qx = x.view(qx.size(0), -1) 340 | qx = self.qfc(qx) 341 | qx = self.scalar(qx) # 修改 342 | 343 | # 全进度模型 344 | x = self.conv1(x) 345 | x = self.bn1(x) 346 | x = self.relu(x) 347 | x = self.maxpool(x) 348 | 349 | x = self.layer1(x) 350 | x = self.layer2(x) 351 | l3 = self.layer3(x) 352 | l4 = self.layer4(l3) 353 | 354 | x = self.avgpool(l4) 355 | x = x.view(x.size(0), -1) 356 | x = self.fc(x) 357 | 358 | pair_distance = nn.PairwiseDistance(p=1) 359 | distance = pair_distance(quantize_activations_gemm(ql3), quantize_activations_gemm(l3)) / self.num_features(l3.size()) \ 360 | + pair_distance(quantize_activations_gemm(ql4), quantize_activations_gemm(l4) / self.num_features(l4.size())) 361 | 362 | return qx, x, distance 363 | 364 | 365 | def resnet18(pretrained=False, **kwargs): 366 | """Constructs a ResNet-18 model. 367 | 368 | Args: 369 | pretrained (bool): If True, returns a model pre-trained on ImageNet 370 | """ 371 | model = ResNet(QBasicBlock, BasicBlock, [2, 2, 2, 2], **kwargs) 372 | if pretrained: 373 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 374 | return model 375 | 376 | 377 | def resnet34(pretrained=False, **kwargs): 378 | """Constructs a ResNet-34 model. 379 | 380 | Args: 381 | pretrained (bool): If True, returns a model pre-trained on ImageNet 382 | """ 383 | model = ResNet(QBasicBlock, [3, 4, 6, 3], **kwargs) 384 | if pretrained: 385 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 386 | return model 387 | 388 | 389 | def resnet50(pretrained=False, **kwargs): 390 | """Constructs a ResNet-50 model. 391 | 392 | Args: 393 | pretrained (bool): If True, returns a model pre-trained on ImageNet 394 | """ 395 | model = ResNet(QBottleneck, [3, 4, 6, 3], **kwargs) 396 | if pretrained: 397 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 398 | return model 399 | 400 | 401 | def resnet101(pretrained=False, **kwargs): 402 | """Constructs a ResNet-101 model. 403 | 404 | Args: 405 | pretrained (bool): If True, returns a model pre-trained on ImageNet 406 | """ 407 | model = ResNet(QBottleneck, [3, 4, 23, 3], **kwargs) 408 | if pretrained: 409 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 410 | return model 411 | 412 | 413 | def resnet152(pretrained=False, **kwargs): 414 | """Constructs a ResNet-152 model. 415 | 416 | Args: 417 | pretrained (bool): If True, returns a model pre-trained on ImageNet 418 | """ 419 | model = ResNet(QBottleneck, [3, 8, 36, 3], **kwargs) 420 | if pretrained: 421 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 422 | return model 423 | -------------------------------------------------------------------------------- /net/net_quantize_weight.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | 线性层全部使用 QWLinear 4 | """ 5 | import torch.nn as nn 6 | import math 7 | import torch.utils.model_zoo as model_zoo 8 | from quantize.quantize_module_ import QWConv2D, Scalar, QWLinear 9 | 10 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 11 | 'resnet152'] 12 | 13 | 14 | model_urls = { 15 | 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 16 | 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', 17 | 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', 18 | 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', 19 | 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', 20 | } 21 | 22 | 23 | def conv3x3(in_planes, out_planes, stride=1): 24 | """3x3 convolution with padding""" 25 | return QWConv2D(in_planes, out_planes, kernel_size=3, stride=stride, 26 | padding=1, bias=False) 27 | 28 | 29 | class BasicBlock(nn.Module): 30 | expansion = 1 31 | 32 | def __init__(self, inplanes, planes, stride=1, downsample=None): 33 | super(BasicBlock, self).__init__() 34 | self.conv1 = conv3x3(inplanes, planes, stride) 35 | self.bn1 = nn.BatchNorm2d(planes) 36 | self.relu = nn.ReLU(inplace=True) 37 | self.conv2 = conv3x3(planes, planes) 38 | self.bn2 = nn.BatchNorm2d(planes) 39 | self.downsample = downsample 40 | self.stride = stride 41 | 42 | def forward(self, x): 43 | residual = x 44 | 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | 49 | out = self.conv2(out) 50 | out = self.bn2(out) 51 | 52 | if self.downsample is not None: 53 | residual = self.downsample(x) 54 | 55 | out += residual 56 | out = self.relu(out) 57 | 58 | return out 59 | 60 | 61 | class Bottleneck(nn.Module): 62 | expansion = 4 63 | 64 | def __init__(self, inplanes, planes, stride=1, downsample=None): 65 | super(Bottleneck, self).__init__() 66 | self.conv1 = QWConv2D(inplanes, planes, kernel_size=1, bias=False) 67 | self.bn1 = nn.BatchNorm2d(planes) 68 | self.conv2 = QWConv2D(planes, planes, kernel_size=3, stride=stride, 69 | padding=1, bias=False) 70 | self.bn2 = nn.BatchNorm2d(planes) 71 | self.conv3 = QWConv2D(planes, planes * 4, kernel_size=1, bias=False) 72 | self.bn3 = nn.BatchNorm2d(planes * 4) 73 | self.relu = nn.ReLU(inplace=True) 74 | self.downsample = downsample 75 | self.stride = stride 76 | 77 | def forward(self, x): 78 | residual = x 79 | 80 | out = self.conv1(x) 81 | out = self.bn1(out) 82 | out = self.relu(out) 83 | 84 | out = self.conv2(out) 85 | out = self.bn2(out) 86 | out = self.relu(out) 87 | 88 | out = self.conv3(out) 89 | out = self.bn3(out) 90 | 91 | if self.downsample is not None: 92 | residual = self.downsample(x) 93 | 94 | out += residual 95 | out = self.relu(out) 96 | 97 | return out 98 | 99 | 100 | class ResNet(nn.Module): 101 | 102 | def __init__(self, block, layers, num_classes=1000): 103 | self.inplanes = 64 104 | super(ResNet, self).__init__() 105 | self.conv1 = QWConv2D(3, 64, kernel_size=7, stride=2, padding=3, 106 | bias=False) 107 | self.bn1 = nn.BatchNorm2d(64) 108 | self.relu = nn.ReLU(inplace=True) 109 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 110 | self.layer1 = self._make_layer(block, 64, layers[0]) 111 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 112 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 113 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 114 | self.avgpool = nn.AvgPool2d(7, stride=1) 115 | self.fc = QWLinear(512 * block.expansion, num_classes) 116 | self.scalar = Scalar() 117 | 118 | for m in self.modules(): 119 | if isinstance(m, QWConv2D): 120 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 121 | m.weight.data.normal_(0, math.sqrt(2. / n)) 122 | elif isinstance(m, nn.BatchNorm2d): 123 | m.weight.data.fill_(1) 124 | m.bias.data.zero_() 125 | 126 | def _make_layer(self, block, planes, blocks, stride=1): 127 | downsample = None 128 | if stride != 1 or self.inplanes != planes * block.expansion: 129 | downsample = nn.Sequential( 130 | QWConv2D(self.inplanes, planes * block.expansion, 131 | kernel_size=1, stride=stride, bias=False), 132 | nn.BatchNorm2d(planes * block.expansion), 133 | ) 134 | 135 | layers = [] 136 | layers.append(block(self.inplanes, planes, stride, downsample)) 137 | self.inplanes = planes * block.expansion 138 | for i in range(1, blocks): 139 | layers.append(block(self.inplanes, planes)) 140 | 141 | return nn.Sequential(*layers) 142 | 143 | def forward(self, x): 144 | x = self.conv1(x) 145 | x = self.bn1(x) 146 | x = self.relu(x) 147 | x = self.maxpool(x) 148 | 149 | x = self.layer1(x) 150 | x = self.layer2(x) 151 | x = self.layer3(x) 152 | x = self.layer4(x) 153 | 154 | x = self.avgpool(x) 155 | x = x.view(x.size(0), -1) 156 | x = self.fc(x) 157 | x = self.scalar(x) 158 | 159 | return x 160 | 161 | 162 | def resnet18(pretrained=False, **kwargs): 163 | """Constructs a ResNet-18 model. 164 | 165 | Args: 166 | pretrained (bool): If True, returns a model pre-trained on ImageNet 167 | """ 168 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 169 | if pretrained: 170 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 171 | return model 172 | 173 | 174 | def resnet34(pretrained=False, **kwargs): 175 | """Constructs a ResNet-34 model. 176 | 177 | Args: 178 | pretrained (bool): If True, returns a model pre-trained on ImageNet 179 | """ 180 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 181 | if pretrained: 182 | model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) 183 | return model 184 | 185 | 186 | def resnet50(pretrained=False, **kwargs): 187 | """Constructs a ResNet-50 model. 188 | 189 | Args: 190 | pretrained (bool): If True, returns a model pre-trained on ImageNet 191 | """ 192 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 193 | if pretrained: 194 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 195 | return model 196 | 197 | 198 | def resnet101(pretrained=False, **kwargs): 199 | """Constructs a ResNet-101 model. 200 | 201 | Args: 202 | pretrained (bool): If True, returns a model pre-trained on ImageNet 203 | """ 204 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 205 | if pretrained: 206 | model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) 207 | return model 208 | 209 | 210 | def resnet152(pretrained=False, **kwargs): 211 | """Constructs a ResNet-152 model. 212 | 213 | Args: 214 | pretrained (bool): If True, returns a model pre-trained on ImageNet 215 | """ 216 | model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) 217 | if pretrained: 218 | model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) 219 | return model 220 | -------------------------------------------------------------------------------- /net/simple_net.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torch.nn as nn 4 | 5 | 6 | class Net(nn.Module): 7 | 8 | def __init__(self): 9 | super(Net, self).__init__() 10 | # 1 input image channel, 6 output channels, 5x5 square convolution 11 | # kernel 12 | self.conv1 = nn.Conv2d(1, 1, kernel_size=3, padding=0, stride=1, bias=False) 13 | self.conv2 = nn.Conv2d(1, 1, kernel_size=3, padding=1, stride=1, bias=False) 14 | # an affine operation: y = Wx + b 15 | self.fc1 = nn.Linear(16, 2) 16 | self.relu = nn.ReLU(inplace=True) 17 | 18 | def forward(self, x): 19 | x = self.conv1(x) 20 | x = self.relu(x) 21 | x = self.conv2(x) 22 | x = self.relu(x) 23 | size = x.size()[1:] # all dimensions except the batch dimension 24 | num_features = 1 25 | for s in size: 26 | num_features *= s 27 | x = x.view(-1, num_features) 28 | x = self.fc1(x) 29 | return x 30 | 31 | 32 | if __name__ == "__main__": 33 | net = Net() 34 | print(net) 35 | -------------------------------------------------------------------------------- /quantize/guided_distance_view.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import os 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.parallel 6 | import torch.backends.cudnn as cudnn 7 | import torch.optim 8 | import torch.utils.data 9 | import torch.utils.data.distributed 10 | import torchvision.models as models 11 | from tensorboardX import SummaryWriter 12 | from collections import defaultdict 13 | import time 14 | 15 | from utils.train_val import save_checkpoint, validate 16 | from utils.data_loader import load_train_data, load_val_data 17 | from utils.meter import AverageMeter, accuracy 18 | from quantize.quantize_method import quantize_activations_gemm 19 | from net import net_quantize_activation 20 | 21 | 22 | def guided(args): 23 | best_low_prec1 = 0 24 | full_prec_feature_map1 = defaultdict(torch.Tensor) 25 | full_prec_feature_map2 = defaultdict(torch.Tensor) 26 | low_prec_feature_map1 = defaultdict(torch.Tensor) 27 | low_prec_feature_map2 = defaultdict(torch.Tensor) 28 | 29 | def full_prec_hook(module, input, output): 30 | # 一定要写成 input[0].data.clone() 31 | # 而不能写成 input[0].clone(), 否则报错 32 | # RuntimeError: Trying to backward through the graph a second time, 33 | # but the buffers have already been freed. Specify retain_graph=True 34 | # when calling backward the first time 35 | cudaid = int(repr(output.device)[-2]) 36 | full_prec_feature_map1[cudaid] = input[0].data.clone() 37 | full_prec_feature_map2[cudaid] = output.data.clone() 38 | 39 | def low_prec_hook(module, input, output): 40 | cudaid = int(repr(output.device)[-2]) 41 | low_prec_feature_map1[cudaid] = input[0].data.clone() 42 | low_prec_feature_map2[cudaid] = output.data.clone() 43 | 44 | def gpu_config(model): 45 | if args.gpu is not None: # 指定GPU 46 | model = model.cuda(args.gpu) 47 | elif args.distributed: # 集群训练(多机器) 48 | model.cuda() 49 | model = torch.nn.parallel.DistributedDataParallel(model) 50 | 51 | else: # 单机训练(单卡或者多卡) 52 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): 53 | model.features = torch.nn.DataParallel(model.features) 54 | model.cuda() 55 | else: 56 | # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids 57 | model = torch.nn.DataParallel(model, args.device_ids).cuda() 58 | return model 59 | 60 | def guided_train(summary_writer, log_per_epoch=100, print_freq=20): 61 | 62 | batch_time = AverageMeter() 63 | data_time = AverageMeter() 64 | 65 | low_prec_losses = AverageMeter() 66 | low_prec_top1 = AverageMeter() 67 | low_prec_top5 = AverageMeter() 68 | distance_meter = AverageMeter() 69 | 70 | # 状态转化为训练 71 | low_prec_model.train() 72 | full_prec_model.eval() 73 | 74 | end = time.time() 75 | 76 | # 用于控制 tensorboard 的显示频率 77 | interval = len(train_loader) // log_per_epoch 78 | summary_point = [interval * split for split in torch.arange(log_per_epoch)] 79 | 80 | for i, (input, target) in enumerate(train_loader): 81 | # measure checkpoint.pth data loading time 82 | data_time.update(time.time() - end) 83 | 84 | if args.gpu is not None: 85 | input = input.cuda(args.gpu, non_blocking=True) 86 | 87 | # target 必须要转为 cuda 类型 88 | # If ``True`` and the source is in pinned memory(固定内存), 89 | # the copy will be asynchronous(异步) with respect to the host 90 | target = target.cuda(args.gpu, non_blocking=True) 91 | 92 | full_prec_feature_map1.clear() 93 | low_prec_feature_map1.clear() 94 | full_prec_feature_map2.clear() 95 | low_prec_feature_map2.clear() 96 | 97 | # compute low_pre_output 98 | low_pre_output = low_prec_model(input) 99 | full_pre_output = full_prec_model(input) 100 | 101 | """Guided Key Point start""" 102 | 103 | # 将 distance 和 feature map放在同一个一gpu上 104 | distance = torch.tensor([0.0]).cuda(args.gpu, non_blocking=True) 105 | num_layer3_features = 1 106 | for dim in full_prec_feature_map1[0].size(): 107 | num_layer3_features *= dim 108 | 109 | num_layer4_features = 1 110 | for dim in full_prec_feature_map2[0].size(): 111 | num_layer4_features *= dim 112 | 113 | for cudaid in full_prec_feature_map1: 114 | # 手动将feature map都搬到同一个 GPU 上 115 | full_prec_feature_map1[cudaid] = full_prec_feature_map1[cudaid].cuda(args.gpu, non_blocking=True) 116 | low_prec_feature_map1[cudaid] = low_prec_feature_map1[cudaid].cuda(args.gpu, non_blocking=True) 117 | full_prec_feature_map2[cudaid] = full_prec_feature_map2[cudaid].cuda(args.gpu, non_blocking=True) 118 | low_prec_feature_map2[cudaid] = low_prec_feature_map2[cudaid].cuda(args.gpu, non_blocking=True) 119 | 120 | for cudaid in low_prec_feature_map1: 121 | """ 122 | RuntimeError: arguments are located on different GPUs 123 | 解决方法在于手动将feature map都搬到同一个 GPU 上 124 | """ 125 | layer3 = (quantize_activations_gemm(low_prec_feature_map1[cudaid]) - 126 | quantize_activations_gemm(full_prec_feature_map1[cudaid])).norm(p=args.norm) / num_layer3_features 127 | layer4 = (quantize_activations_gemm(low_prec_feature_map2[cudaid]) - 128 | quantize_activations_gemm(full_prec_feature_map2[cudaid])).norm(p=args.norm) / num_layer4_features 129 | distance += (layer3 + layer4) / len(low_prec_feature_map1) 130 | 131 | distance *= args.balance 132 | 133 | """Guided Key Point end""" 134 | 135 | low_prec_loss = criterion(low_pre_output, target) 136 | low_prec_prec1, low_prec_prec5 = accuracy(low_pre_output, target, topk=(1, 5)) 137 | 138 | low_prec_losses.update(low_prec_loss.item(), input.size(0)) 139 | low_prec_top1.update(low_prec_prec1[0], input.size(0)) 140 | low_prec_top5.update(low_prec_prec5[0], input.size(0)) 141 | distance_meter.update(distance[0], 1) 142 | 143 | # compute gradient and do SGD step 144 | low_prec_optimizer.zero_grad() 145 | low_prec_loss.backward() 146 | low_prec_optimizer.step() 147 | 148 | # measure elapsed time 149 | batch_time.update(time.time() - end) 150 | end = time.time() 151 | 152 | if i % print_freq == 0: 153 | 154 | print('Epoch: [{0}][{1}/{2}]\t' 155 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 156 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 157 | 'Loss {low_prec_loss.val:.4f} ({low_prec_loss.avg:.4f})\t' 158 | 'Prec@1 {low_prec_top1.val:.3f} ({low_prec_top1.avg:.3f})\t' 159 | 'Prec@5 {low_prec_top5.val:.3f} ({low_prec_top5.avg:.3f}) \t' 160 | 'distance {distance.val:.3f} ({distance.avg:.3f})'.format( 161 | epoch, i, len(train_loader), batch_time=batch_time, 162 | data_time=data_time, low_prec_loss=low_prec_losses, low_prec_top1=low_prec_top1, 163 | low_prec_top5=low_prec_top5, distance=distance_meter)) 164 | 165 | if summary_writer is not None and (i in summary_point): 166 | step = i / interval + (epoch - 1) * log_per_epoch 167 | summary_writer.add_scalar("distance", distance_meter.avg, step) 168 | summary_writer.add_scalar("loss/low_prec_loss", low_prec_loss, step) 169 | summary_writer.add_scalar("train_low_prec/top-1", low_prec_top1.avg, step) 170 | summary_writer.add_scalar("train_low_prec/top-5", low_prec_top5.avg, step) 171 | 172 | # 代码用于使用预训练的ResNet18来同时量化网络权重和激活 173 | print("=> using imageNet pre-trained model '{}'".format(args.arch)) 174 | # 获取预训练模型参数 175 | full_prec_model = models.__dict__[args.arch](pretrained=True) 176 | low_prec_model = net_quantize_activation.__dict__[args.arch]() 177 | 178 | model_dict = low_prec_model.state_dict() 179 | imagenet_dict = full_prec_model.state_dict() 180 | model_dict.update(imagenet_dict) 181 | low_prec_model.load_state_dict(model_dict) 182 | 183 | low_prec_layer4 = low_prec_model._modules.get("layer4") 184 | full_prec_layer4 = full_prec_model._modules.get("layer4") 185 | 186 | hook_low_prec = low_prec_layer4.register_forward_hook(low_prec_hook) 187 | hook_full_prec = full_prec_layer4.register_forward_hook(full_prec_hook) 188 | 189 | low_prec_model = gpu_config(low_prec_model) 190 | full_prec_model = gpu_config(full_prec_model) 191 | 192 | # 定义损失函数和优化器 193 | criterion = nn.CrossEntropyLoss().cuda(args.gpu) 194 | low_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(), 195 | args.lr, 196 | momentum=args.momentum, 197 | weight_decay=args.weight_decay) 198 | 199 | low_prec_scheduler = torch.optim.lr_scheduler.StepLR(low_prec_optimizer, step_size=args.lr_step, gamma=0.1) 200 | 201 | cudnn.benchmark = True 202 | 203 | val_loader = load_val_data(args.data, args.batch_size, args.workers) 204 | train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed) 205 | 206 | # 加载日志 writer 207 | writer = SummaryWriter(args.save_dir) 208 | 209 | for epoch in range(args.start_epoch, args.epochs+1): 210 | if args.distributed: 211 | train_sampler.set_epoch(epoch) 212 | 213 | low_prec_scheduler.step() 214 | 215 | # train for one epoch 216 | guided_train(writer) 217 | 218 | # evaluate on validation set 219 | low_prec1 = validate(low_prec_model, val_loader, criterion, args.gpu, 220 | epoch, writer, name_prefix='low_prec') 221 | 222 | # remember best prec@1 and save low_prec_checkpoint 223 | is_best_low = low_prec1 > best_low_prec1 224 | 225 | best_low_prec1 = max(low_prec1, best_low_prec1) 226 | 227 | save_checkpoint({ 228 | 'epoch': epoch + 1, 229 | 'arch': args.arch, 230 | 'state_dict': low_prec_model.state_dict(), 231 | 'best_prec1': best_low_prec1, 232 | 'optimizer': low_prec_optimizer.state_dict(), 233 | }, is_best_low, args.save_dir, name_prefix="low_prec") 234 | 235 | # 关闭日志 writer 236 | writer.close() 237 | 238 | # 去掉钩子 239 | 240 | hook_full_prec.remove() 241 | hook_low_prec.remove() 242 | 243 | -------------------------------------------------------------------------------- /quantize/quantize_guided.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import os 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.parallel 6 | import torch.backends.cudnn as cudnn 7 | import torch.optim 8 | import torch.utils.data 9 | import torch.utils.data.distributed 10 | import torchvision.models as models 11 | from tensorboardX import SummaryWriter 12 | from collections import defaultdict 13 | import time 14 | import warnings 15 | 16 | from utils.train_val import save_checkpoint, validate 17 | from utils.data_loader import load_train_data, load_val_data 18 | from utils.meter import AverageMeter, accuracy 19 | from quantize.quantize_method import quantize_activations_gemm 20 | from net import net_quantize_activation 21 | 22 | 23 | def guided(args): 24 | best_low_prec1 = 0 25 | best_full_prec1 = 0 26 | full_prec_feature_map1 = defaultdict(torch.Tensor) 27 | full_prec_feature_map2 = defaultdict(torch.Tensor) 28 | low_prec_feature_map1 = defaultdict(torch.Tensor) 29 | low_prec_feature_map2 = defaultdict(torch.Tensor) 30 | 31 | def full_prec_hook(module, input, output): 32 | # 一定要写成 input[0] 33 | # 而不能写成 input[0].data.clone(), 否则没法加入反向传播 34 | 35 | # 而使用直接使用 input[0] 也会有问题, 如下 36 | # RuntimeError: Trying to backward through the graph a second time, 37 | # but the buffers have already been freed. Specify retain_graph=True 38 | # when calling backward the first time 39 | # 即 distance_loss 同时参与高精度和低精度的反向传播, 比如先通过低精度的反向传播之后 40 | # 该 distance_loss 的计算图被释放, 然后第二次使用的时候, 找不到对应的计算图和相应的参数 41 | cudaid = int(repr(output.device)[-2]) 42 | full_prec_feature_map1[cudaid] = input[0] 43 | full_prec_feature_map2[cudaid] = output 44 | 45 | def low_prec_hook(module, input, output): 46 | cudaid = int(repr(output.device)[-2]) 47 | low_prec_feature_map1[cudaid] = input[0] 48 | low_prec_feature_map2[cudaid] = output 49 | 50 | def gpu_config(model): 51 | if args.gpu is not None: # 指定GPU 52 | model = model.cuda(args.gpu) 53 | elif args.distributed: # 集群训练(多机器) 54 | model.cuda() 55 | model = torch.nn.parallel.DistributedDataParallel(model) 56 | 57 | else: # 单机训练(单卡或者多卡) 58 | if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): 59 | model.features = torch.nn.DataParallel(model.features) 60 | model.cuda() 61 | else: 62 | # 一机多卡时, 多 GPU 训练, 指定要用到 GPU 的 ids 63 | model = torch.nn.DataParallel(model, args.device_ids).cuda() 64 | return model 65 | 66 | def guided_train(summary_writer, log_per_epoch=100, print_freq=20): 67 | 68 | batch_time = AverageMeter() 69 | data_time = AverageMeter() 70 | 71 | low_prec_losses = AverageMeter() 72 | low_prec_top1 = AverageMeter() 73 | low_prec_top5 = AverageMeter() 74 | 75 | full_prec_losses = AverageMeter() 76 | full_prec_top1 = AverageMeter() 77 | full_prec_top5 = AverageMeter() 78 | distance_meter = AverageMeter() 79 | 80 | # 状态转化为训练 81 | low_prec_model.train() 82 | full_prec_model.train() 83 | 84 | end = time.time() 85 | 86 | # 用于控制 tensorboard 的显示频率 87 | interval = len(train_loader) // log_per_epoch 88 | summary_point = [interval * split for split in torch.arange(log_per_epoch)] 89 | 90 | for i, (input, target) in enumerate(train_loader): 91 | # measure checkpoint.pth data loading time 92 | data_time.update(time.time() - end) 93 | 94 | if args.gpu is not None: 95 | input = input.cuda(args.gpu, non_blocking=True) 96 | 97 | # target 必须要转为 cuda 类型 98 | # If ``True`` and the source is in pinned memory(固定内存), 99 | # the copy will be asynchronous(异步) with respect to the host 100 | target = target.cuda(args.gpu, non_blocking=True) 101 | 102 | full_prec_feature_map1.clear() 103 | low_prec_feature_map1.clear() 104 | full_prec_feature_map2.clear() 105 | low_prec_feature_map2.clear() 106 | 107 | # compute low_pre_output 108 | low_pre_output = low_prec_model(input) 109 | full_pre_output = full_prec_model(input) 110 | 111 | """Guided Key Point start""" 112 | 113 | # 将 distance 和 feature map放在同一个一gpu上 114 | distance = torch.tensor([0.0]).cuda(args.gpu, non_blocking=True) 115 | num_layer3_features = 1 116 | for dim in full_prec_feature_map1[0].size(): 117 | num_layer3_features *= dim 118 | 119 | num_layer4_features = 1 120 | for dim in full_prec_feature_map2[0].size(): 121 | num_layer4_features *= dim 122 | 123 | for cudaid in low_prec_feature_map1: 124 | 125 | layer3 = (quantize_activations_gemm(low_prec_feature_map1[cudaid]) - 126 | quantize_activations_gemm(full_prec_feature_map1[cudaid])).norm(p=args.norm) / num_layer3_features 127 | layer4 = (quantize_activations_gemm(low_prec_feature_map2[cudaid]) - 128 | quantize_activations_gemm(full_prec_feature_map2[cudaid])).norm(p=args.norm) / num_layer4_features 129 | # RuntimeError: arguments are located on different GPUs 130 | # 解决方法在于手动将 feature map 都搬到同一个GPU, Tensor.cuda(args.gpu, non_blocking=True) 131 | distance += (layer3 + layer4).cuda(args.gpu, non_blocking=True) / len(low_prec_feature_map1) 132 | 133 | distance *= args.balance 134 | low_prec_loss = criterion(low_pre_output, target) + distance 135 | full_prec_loss = criterion(full_pre_output, target) + distance 136 | 137 | low_prec_prec1, low_prec_prec5 = accuracy(low_pre_output, target, topk=(1, 5)) 138 | full_prec_prec1, full_prec_prec5 = accuracy(full_pre_output, target, topk=(1, 5)) 139 | 140 | low_prec_losses.update(low_prec_loss.item(), input.size(0)) 141 | low_prec_top1.update(low_prec_prec1[0], input.size(0)) 142 | low_prec_top5.update(low_prec_prec5[0], input.size(0)) 143 | 144 | full_prec_losses.update(full_prec_loss.item(), input.size(0)) 145 | full_prec_top1.update(full_prec_prec1[0], input.size(0)) 146 | full_prec_top5.update(full_prec_prec5[0], input.size(0)) 147 | distance_meter.update(distance[0], 1) 148 | 149 | # compute gradient and do SGD step 150 | low_prec_optimizer.zero_grad() 151 | full_prec_optimizer.zero_grad() 152 | 153 | low_prec_loss.backward() # retain_graph=True 154 | # full_prec_loss.backward() 155 | 156 | # 第五步, 使用更新的梯度更新权重 157 | low_prec_optimizer.step() 158 | full_prec_optimizer.step() 159 | 160 | # measure elapsed time 161 | batch_time.update(time.time() - end) 162 | end = time.time() 163 | 164 | if i % print_freq == 0: 165 | 166 | print('Epoch: [{0}][{1}/{2}]\t' 167 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 168 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 169 | 'Loss {low_prec_loss.val:.4f} ({low_prec_loss.avg:.4f})\t' 170 | 'Prec@1 {low_prec_top1.val:.3f} ({low_prec_top1.avg:.3f})\t' 171 | 'Prec@5 {low_prec_top5.val:.3f} ({low_prec_top5.avg:.3f}) \t' 172 | 'distance {distance.val:.3f} ({distance.avg:.3f})'.format( 173 | epoch, i, len(train_loader), batch_time=batch_time, 174 | data_time=data_time, low_prec_loss=low_prec_losses, low_prec_top1=low_prec_top1, 175 | low_prec_top5=low_prec_top5, distance=distance_meter)) 176 | 177 | print('Epoch: [{0}][{1}/{2}]\t' 178 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 179 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 180 | 'Loss {full_prec_loss.val:.4f} ({full_prec_loss.avg:.4f})\t' 181 | 'Prec@1 {full_prec_top1.val:.3f} ({full_prec_top1.avg:.3f})\t' 182 | 'Prec@5 {full_prec_top5.val:.3f} ({full_prec_top5.avg:.3f})'.format( 183 | epoch, i, len(train_loader), batch_time=batch_time, 184 | data_time=data_time, full_prec_loss=full_prec_losses, full_prec_top1=full_prec_top1, 185 | full_prec_top5=full_prec_top5)) 186 | 187 | if summary_writer is not None and (i in summary_point): 188 | step = i / interval + (epoch - 1) * log_per_epoch 189 | summary_writer.add_scalar("distance", distance_meter.avg, step) 190 | summary_writer.add_scalar("loss/low_prec_loss", low_prec_loss, step) 191 | summary_writer.add_scalar("train_low_prec/top-1", low_prec_top1.avg, step) 192 | summary_writer.add_scalar("train_low_prec/top-5", low_prec_top5.avg, step) 193 | 194 | summary_writer.add_scalar("loss/full_prec_loss", full_prec_loss, step) 195 | summary_writer.add_scalar("train_full_prec/top-1", full_prec_top1.avg, step) 196 | summary_writer.add_scalar("train_full_prec/top-5", full_prec_top5.avg, step) 197 | 198 | if args.weight_quantized: 199 | print("=> using quantize_tanh-weight model '{}'".format(args.arch)) 200 | full_prec_model = models.__dict__[args.arch](pretrained=True) 201 | low_prec_model = net_quantize_activation.__dict__[args.arch]() 202 | if os.path.isfile(args.weight_quantized): 203 | print("=> loading weight_quantized model '{}'".format(args.weight_quantized)) 204 | model_dict = low_prec_model.state_dict() 205 | quantized_model = torch.load(args.weight_quantized) 206 | pretrained__dict = {k[7:]: v for k, v in quantized_model['state_dict'].items() 207 | if k in low_prec_model.state_dict()} 208 | model_dict.update(pretrained__dict) 209 | low_prec_model.load_state_dict(model_dict) 210 | print("=> loaded weight_quantized '{}'".format(args.weight_quantized)) 211 | else: 212 | print("=> no quantize_tanh-weight model found at '{}'".format(args.weight_quantized)) 213 | else: 214 | # 代码用于使用预训练的ResNet18来同时量化网络权重和激活 215 | print("=> using imageNet pre-trained model '{}'".format(args.arch)) 216 | # 获取预训练模型参数 217 | full_prec_model = models.__dict__[args.arch](pretrained=True) 218 | low_prec_model = net_quantize_activation.__dict__[args.arch]() 219 | 220 | model_dict = low_prec_model.state_dict() 221 | imagenet_dict = full_prec_model.state_dict() 222 | model_dict.update(imagenet_dict) 223 | low_prec_model.load_state_dict(model_dict) 224 | 225 | if not args.evaluate: 226 | low_prec_layer4 = low_prec_model._modules.get("layer4") 227 | full_prec_layer4 = full_prec_model._modules.get("layer4") 228 | 229 | hook_low_prec = low_prec_layer4.register_forward_hook(low_prec_hook) 230 | hook_full_prec = full_prec_layer4.register_forward_hook(full_prec_hook) 231 | 232 | low_prec_model = gpu_config(low_prec_model) 233 | full_prec_model = gpu_config(full_prec_model) 234 | 235 | # 定义损失函数和优化器 236 | criterion = nn.CrossEntropyLoss().cuda(args.gpu) 237 | low_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(), 238 | args.lr, 239 | momentum=args.momentum, 240 | weight_decay=args.weight_decay) 241 | full_prec_optimizer = torch.optim.SGD(low_prec_model.parameters(), 242 | args.lr * args.rate, 243 | momentum=args.momentum, 244 | weight_decay=args.weight_decay) 245 | 246 | # 调整学习率 247 | full_prec_scheduler = torch.optim.lr_scheduler.StepLR(full_prec_optimizer, step_size=args.lr_step, gamma=0.1) 248 | low_prec_scheduler = torch.optim.lr_scheduler.StepLR(low_prec_optimizer, step_size=args.lr_step, gamma=0.1) 249 | 250 | # optionally resume from a checkpoint 251 | if args.resume: 252 | full_prec_resume = os.path.join(args.save_dir, "full_prec-checkpoint.pth.tar") 253 | low_prec_resume = os.path.join(args.save_dir, "low_prec-checkpoint.pth.tar") 254 | if os.path.isfile(full_prec_resume) and os.path.isfile(low_prec_resume): 255 | print("=> loading low_prec_checkpoint from '{}' and '{}'".format(full_prec_resume, 256 | low_prec_resume)) 257 | full_prec_checkpoint = torch.load(full_prec_resume) 258 | low_prec_checkpoint = torch.load(low_prec_resume) 259 | 260 | args.start_epoch = low_prec_checkpoint['epoch'] 261 | # 模型的最好精度 262 | best_low_prec1 = low_prec_checkpoint['best_prec1'] 263 | best_full_prec1 = full_prec_checkpoint['best_prec1'] 264 | 265 | low_prec_model.load_state_dict(low_prec_checkpoint['state_dict']) 266 | full_prec_model.load_state_dict(full_prec_checkpoint['state_dict']) 267 | 268 | low_prec_optimizer.load_state_dict(low_prec_checkpoint['optimizer']) 269 | full_prec_optimizer.load_state_dict(full_prec_checkpoint['optimizer']) 270 | 271 | print("=> loaded low_prec_checkpoint from '{}' and '{}' (epoch {})".format( 272 | full_prec_resume, low_prec_model, low_prec_checkpoint['epoch'])) 273 | else: 274 | warnings.warn("=> no checkpoint found at directory'{}'".format(args.save_dir)) 275 | 276 | cudnn.benchmark = True 277 | 278 | val_loader = load_val_data(args.data, args.batch_size, args.workers) 279 | train_loader, train_sampler = load_train_data(args.data, args.batch_size, args.workers, args.distributed) 280 | 281 | # 加载日志 writer 282 | writer = SummaryWriter(args.save_dir) 283 | 284 | for epoch in range(args.start_epoch, args.epochs): 285 | if args.distributed: 286 | train_sampler.set_epoch(epoch) 287 | 288 | full_prec_scheduler.step() 289 | low_prec_scheduler.step() 290 | 291 | # train for one epoch 292 | guided_train(writer) 293 | 294 | # evaluate on validation set 295 | low_prec1 = validate(low_prec_model, val_loader, criterion, args.gpu, 296 | epoch, writer, name_prefix='low_prec') 297 | full_prec1 = validate(full_prec_model, val_loader, criterion, args.gpu, 298 | epoch, writer, name_prefix='full_prec') 299 | 300 | # remember best prec@1 and save low_prec_checkpoint 301 | is_best_low = low_prec1 > best_low_prec1 302 | is_best_full = full_prec1 > best_full_prec1 303 | 304 | best_low_prec1 = max(low_prec1, best_low_prec1) 305 | best_full_prec1 = max(full_prec1, best_full_prec1) 306 | 307 | save_checkpoint({ 308 | 'epoch': epoch + 1, 309 | 'arch': args.arch, 310 | 'state_dict': low_prec_model.state_dict(), 311 | 'best_prec1': best_low_prec1, 312 | 'optimizer': low_prec_optimizer.state_dict(), 313 | }, is_best_low, args.save_dir, name_prefix="low_prec") 314 | 315 | save_checkpoint({ 316 | 'epoch': epoch + 1, 317 | 'arch': args.arch, 318 | 'state_dict': full_prec_model.state_dict(), 319 | 'best_prec1': best_full_prec1, 320 | 'optimizer': full_prec_optimizer.state_dict(), 321 | }, is_best_full, args.save_dir, name_prefix="full_prec") 322 | 323 | # 关闭日志 writer 324 | writer.close() 325 | 326 | # 去掉钩子 327 | if not args.evaluate: 328 | hook_full_prec.remove() 329 | hook_low_prec.remove() 330 | -------------------------------------------------------------------------------- /quantize/quantize_method.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | """ 3 | """ 4 | import torch 5 | import math 6 | import numpy as np 7 | 8 | 9 | # 量化比特 10 | QUANTIZE_BIT = 8 11 | 12 | 13 | class QuantizeTanh(torch.autograd.Function): 14 | 15 | @staticmethod 16 | def forward(ctx, i): 17 | n = math.pow(2.0, QUANTIZE_BIT) - 1 18 | return torch.round(i * n) / n 19 | 20 | @staticmethod 21 | def backward(ctx, grad_outputs): 22 | return grad_outputs 23 | 24 | 25 | class QuantizeGEMM(torch.autograd.Function): 26 | 27 | @staticmethod 28 | def forward(ctx, i): 29 | n = math.pow(2.0, QUANTIZE_BIT) - 1 30 | v_max = torch.max(i) 31 | v_min = torch.min(i) 32 | scale = (v_max - v_min)/n 33 | scale = max(scale, 1e-8) 34 | zero_point = torch.round(torch.clamp(-v_min/scale, 0, n)) 35 | quantize_val = torch.clamp(torch.round(i/scale + zero_point), 0, n) 36 | return (quantize_val-zero_point) * scale 37 | 38 | @staticmethod 39 | def backward(ctx, grad_outputs): 40 | return grad_outputs 41 | 42 | 43 | quantize_tanh = QuantizeTanh.apply 44 | quantize_gemm = QuantizeGEMM.apply 45 | 46 | 47 | def quantize_weights_bias_tanh(weight): 48 | tanh_w = torch.tanh(weight) 49 | """ 50 | torch 关于 y = w/max(|w|) 函数在max(|w|)处梯度行为怪异该如何解释? 51 | tensor w ([[ 0.1229, 0.2390], 52 | [ 0.8703, 0.6368]]) 53 | 54 | tensor y ([[ 0.2873, 0.2873], 55 | [-0.3296, 0.2873]]) 56 | 由于没有搞清楚 torch 在 max(|w|) 处如何处理的, 57 | 不过, 从上面看出梯度为负数, y = w/max(|w|) w>0时, 梯度为负数, 我认为是不正确的. 58 | 为了便于处理, 这里求梯度过程中, 我们把 max(|w|) 当成一个常量来处理, 59 | 代码中通过 Tensor.data 这样求 max(|w|) 的过程就不会加入到计算图中, 60 | 可以看出, max_abs_w 就是一个一个常量 61 | """ 62 | max_abs_w = torch.max(torch.abs(tanh_w)).data 63 | norm_weight = ((tanh_w / max_abs_w) + 1) / 2 64 | 65 | return 2 * quantize_tanh(norm_weight) - 1 66 | 67 | 68 | def quantize_activations_tanh(activation): 69 | activation = torch.clamp(activation, 0.0, 1.0) 70 | return 2 * quantize_tanh(activation) - 1 71 | 72 | 73 | def quantize_weights_bias_gemm(weight): 74 | return quantize_gemm(weight) 75 | 76 | 77 | def quantize_activations_gemm(activation): 78 | return quantize_gemm(activation) 79 | 80 | -------------------------------------------------------------------------------- /quantize/quantize_module_.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torch.nn as nn 4 | from quantize.quantize_method import quantize_weights_bias_gemm, quantize_activations_gemm 5 | import torch.nn.functional as F 6 | 7 | 8 | class QWConv2D(torch.nn.Conv2d): 9 | def __init__(self, n_channels, out_channels, kernel_size, stride=1, 10 | padding=0, dilation=1, groups=1, bias=True): 11 | super(QWConv2D, self).__init__(n_channels, out_channels, kernel_size, stride, 12 | padding, dilation, groups, bias) 13 | # nn.init.xavier_normal_(self.weight, 1) 14 | # nn.init.constant_(self.weight, 1) 15 | 16 | def forward(self, input): 17 | """ 18 | 关键在于使用函数 F.conv2d, 而不是使用模块 nn.ConV2d 19 | """ 20 | qweight = quantize_weights_bias_gemm(self.weight) 21 | if self.bias is not None: 22 | qbias = quantize_weights_bias_gemm(self.bias) 23 | else: 24 | qbias = None 25 | return F.conv2d(input, qweight, qbias, self.stride, 26 | self.padding, self.dilation, self.groups) 27 | 28 | 29 | class QWAConv2D(torch.nn.Conv2d): 30 | def __init__(self, n_channels, out_channels, kernel_size, stride=1, 31 | padding=0, dilation=1, groups=1, bias=True): 32 | super(QWAConv2D, self).__init__(n_channels, out_channels, kernel_size, stride, 33 | padding, dilation, groups, bias) 34 | # nn.init.xavier_normal_(self.weight, 1) 35 | # nn.init.constant_(self.weight, 1) 36 | 37 | def forward(self, input): 38 | qweight = quantize_weights_bias_gemm(self.weight) 39 | if self.bias is not None: 40 | qbias = quantize_weights_bias_gemm(self.bias) 41 | else: 42 | qbias = None 43 | qinput = quantize_activations_gemm(input) 44 | return F.conv2d(qinput, qweight, qbias, self.stride, 45 | self.padding, self.dilation, self.groups) 46 | 47 | 48 | class QWLinear(nn.Linear): 49 | 50 | def __init__(self, in_features, out_features, bias=True, num_bits=8, num_bits_weight=None, 51 | num_bits_grad=None, biprecision=False): 52 | super(QWLinear, self).__init__(in_features, out_features, bias) 53 | 54 | def forward(self, input): 55 | qweight = quantize_weights_bias_gemm(self.weight) 56 | 57 | if self.bias is not None: 58 | qbias = quantize_weights_bias_gemm(self.bias) 59 | else: 60 | qbias = None 61 | 62 | return F.linear(input, qweight, qbias) 63 | 64 | 65 | class QWALinear(nn.Linear): 66 | 67 | def __init__(self, in_features, out_features, bias=True): 68 | super(QWALinear, self).__init__(in_features, out_features, bias) 69 | 70 | def forward(self, input): 71 | qinput = quantize_activations_gemm(input) 72 | qweight = quantize_weights_bias_gemm(self.weight) 73 | 74 | if self.bias is not None: 75 | qbias = quantize_weights_bias_gemm(self.bias) 76 | else: 77 | qbias = None 78 | 79 | return F.linear(qinput, qweight, qbias) 80 | 81 | 82 | """ 83 | 论文中 scalar layer 层设计 (多个 GPU ) 84 | """ 85 | 86 | 87 | class Scalar(nn.Module): 88 | 89 | def __init__(self): 90 | super(Scalar, self).__init__() # 这一行很重要 91 | # 第1种错误 92 | # self.scalar = torch.tensor([0.01], requires_grad=True) 93 | # RuntimeError: Expected object of type torch.FloatTensor 94 | # but found type torch.cuda.FloatTensor for argument 95 | 96 | # 第2种错误 97 | # self.scalar = torch.tensor([0.01], requires_grad=True).cuda() 98 | # RuntimeError: arguments are located on different GPUs 99 | 100 | # 第3种错误 101 | # self.scalar = nn.Parameter(torch.tensor(0.01, requires_grad=True)) 102 | # RuntimeError: slice() cannot be applied to a 0-dim tensor, 103 | # 而加了方括号正确为 1-dim tensor 104 | 105 | # 第4中错误 106 | # scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True)) 107 | # self.register_buffer("scalar", scalar) 108 | # scalar没有梯度更新(全是None), register_buffer 用于存储非训练参数, 如bn的平均值存储 109 | 110 | # 第1种方法, 可以使用 111 | # self.scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True)) 112 | 113 | # 第2种方法, 可以使用 114 | # scalar = nn.Parameter(torch.tensor([0.01], requires_grad=True)) 115 | # self.register_parameter("scalar", scalar) 116 | 117 | # 根据训练经验, 设为 2.5 118 | self.scalar = nn.Parameter(torch.tensor([1.0], requires_grad=True, dtype=torch.float)) 119 | 120 | def forward(self, i): 121 | return self.scalar * i 122 | 123 | 124 | if __name__ == "__main__": 125 | qconv = QWConv2D(1, 1, 3) 126 | qconv.zero_grad() 127 | x = torch.ones(1, 1, 3, 3, requires_grad=True).float() 128 | y = qconv(x) 129 | y.backward() 130 | print("QConv2D 权重梯度", qconv.weight.grad) 131 | 132 | # 直接求梯度 133 | a = torch.ones(3, 3, requires_grad=True).float() 134 | w = nn.init.constant_(torch.empty(3, 3, requires_grad=True), 1) 135 | qw = quantize_weights_bias_gemm(w) 136 | 137 | z = (qw * a).sum() 138 | z.backward() 139 | print("求权重梯度", w.grad) 140 | 141 | # 验证量化梯度 142 | qa = quantize_weights_bias_gemm(a).sum() 143 | qa.backward() 144 | print("直接求量化权重梯度", a.grad) -------------------------------------------------------------------------------- /quantize/quantize_old_plan.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import torch.nn as nn 4 | import queue 5 | from quantize.quantize_method import QuantizeTanh 6 | 7 | 8 | class QuantizeWeightOrActivation(object): 9 | def __init__(self): 10 | self.saved_param = queue.Queue() 11 | self.saved_grad = queue.Queue() 12 | self.quantize_fn = QuantizeTanh.apply # 量化函数 13 | 14 | def quantize_weights_bias(self, weight): 15 | tanh_w = torch.tanh(weight) 16 | """ 17 | torch 关于 y = w/max(|w|) 函数在max(|w|)处梯度行为怪异该如何解释? 18 | tensor w ([[ 0.1229, 0.2390], 19 | [ 0.8703, 0.6368]]) 20 | 21 | tensor y ([[ 0.2873, 0.2873], 22 | [-0.3296, 0.2873]]) 23 | 由于没有搞清楚 torch 在 max(|w|) 处如何处理的, 24 | 不过, 从上面看出梯度为负数, y = w/max(|w|) w>0时, 梯度为负数, 我认为是不正确的. 25 | 为了便于处理, 这里求梯度过程中, 我们把 max(|w|) 当成一个常量来处理, 26 | 代码中通过 Tensor.data 这样求 max(|w|) 的过程就不会加入到计算图中, 27 | 可以看出, max_abs_w 就是一个一个常量 28 | """ 29 | max_abs_w = torch.max(torch.abs(tanh_w)).data 30 | norm_weight = ((tanh_w / max_abs_w) + 1) / 2 31 | 32 | return 2 * self.quantize_fn(norm_weight) - 1 33 | 34 | def quantize_activations(self, activation): 35 | activation = torch.clamp(activation, 0.0, 1.0) 36 | return self.quantize_fn(activation) 37 | 38 | def quantize(self, m): 39 | # isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear) 40 | if type(m) == nn.Conv2d or type(m) == nn.Linear: 41 | self.saved_param.put(m.weight.data.clone()) # 第1步, 保存全精度权重 42 | quantize_w = self.quantize_weights_bias(m.weight) # 第2步, 量化权重 43 | quantize_w.sum().backward() 44 | self.saved_grad.put(m.weight.grad.data.clone()) # 第3步, 保存量化梯度 45 | m.weight.data.copy_(quantize_w.data) # 第4步, 使用量化权重代替全精度权重 46 | # m.zero_grad() # 不需要, 因为后面调用 optimizer.zero_grad() 会把所有 m 的梯度清零 47 | 48 | if type(m) == nn.Linear: # 量化 bias 49 | self.saved_param.put(m.bias.data.clone()) 50 | quantize_b = self.quantize_weights_bias(m.bias) 51 | quantize_b.sum().backward() 52 | self.saved_grad.put(m.bias.grad.data.clone()) 53 | m.bias.data.copy_(quantize_b.data) 54 | 55 | def restore(self, m): 56 | if type(m) == nn.Conv2d or type(m) == nn.Linear: 57 | m.weight.data.copy_(self.saved_param.get()) # 第5步, 使用全精度权重代替量化权重 58 | 59 | if type(m) == nn.Linear: 60 | m.bias.data.copy_(self.saved_param.get()) 61 | 62 | def update_grad(self, m): 63 | if type(m) == nn.Conv2d or type(m) == nn.Linear: 64 | m.weight.grad.data.mul_(self.saved_grad.get()) # 第6步, 使用量化权重更新全精度权重 65 | 66 | if type(m) == nn.Linear: 67 | m.bias.grad.data.mul_(self.saved_grad.get()) 68 | 69 | @staticmethod 70 | def info(net, s): 71 | print("\n-----------{}--------\n".format(s)) 72 | for k, v in net.state_dict().items(): 73 | print(k, "\n", v) 74 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | tensorboardx 4 | tensorflow 5 | opencv-python 6 | numpy -------------------------------------------------------------------------------- /utils/data_loader.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | from torchvision import transforms 4 | import os 5 | from torchvision import datasets 6 | 7 | 8 | def load_train_data(data_dir, batch_size, workers, distributed): 9 | 10 | traindir = os.path.join(data_dir, 'train') 11 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 12 | std=[0.229, 0.224, 0.225]) 13 | 14 | # 训练模式, 加载训练数据 15 | train_dataset = datasets.ImageFolder( 16 | traindir, 17 | transforms.Compose([ 18 | transforms.RandomResizedCrop(224), 19 | transforms.RandomHorizontalFlip(), 20 | transforms.ToTensor(), 21 | normalize, 22 | ])) 23 | 24 | if distributed: 25 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) 26 | else: 27 | train_sampler = None 28 | 29 | train_loader = torch.utils.data.DataLoader( 30 | train_dataset, 31 | batch_size=batch_size, shuffle=(train_sampler is None), 32 | num_workers=workers, pin_memory=True, 33 | sampler=train_sampler) 34 | 35 | return train_loader, train_sampler 36 | 37 | 38 | def load_val_data(data_dir, batch_size=64, workers=8): 39 | valdir = os.path.join(data_dir, 'val') 40 | normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], 41 | std=[0.229, 0.224, 0.225]) 42 | 43 | # 加载验证数据 44 | val_loader = torch.utils.data.DataLoader( 45 | datasets.ImageFolder(valdir, transforms.Compose([ 46 | transforms.Resize(256), 47 | transforms.CenterCrop(224), 48 | transforms.ToTensor(), 49 | normalize, 50 | ])), 51 | batch_size=batch_size, shuffle=False, 52 | num_workers=workers, pin_memory=True) 53 | return val_loader 54 | -------------------------------------------------------------------------------- /utils/meter.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import torch 3 | import logging 4 | 5 | 6 | class AverageMeter(object): 7 | """Computes and stores the average and current value""" 8 | 9 | def __init__(self): 10 | self.val = 0 11 | self.avg = 0 12 | self.sum = 0 13 | self.count = 0 14 | 15 | def reset(self): 16 | self.val = 0 17 | self.avg = 0 18 | self.sum = 0 19 | self.count = 0 20 | 21 | def update(self, val, n=1): 22 | self.val = val 23 | self.sum += val * n 24 | self.count += n 25 | self.avg = self.sum / self.count 26 | 27 | 28 | def accuracy(output, target, topk=(1,)): 29 | """Computes the precision@k for the specified values of k""" 30 | with torch.no_grad(): 31 | maxk = max(topk) 32 | batch_size = target.size(0) 33 | 34 | # pred: torch.Size([128, 5]) 35 | _, pred = output.topk(maxk, 1, True, True) 36 | pred = pred.t() # 转置 37 | # pred: torch.Size([5, 128]) 38 | 39 | # batch_size 128 target: torch.Size([128]), 40 | # 也就是说 target 不是 one-hot 编码, 而是 class id 41 | target = target.view(1, -1).expand_as(pred) 42 | # [128] =>view=> [1, 128] =>expand_as[5, 128]=>[5, 128] 43 | correct = pred.eq(target) # eq: Computes element-wise equality 44 | 45 | res = [] 46 | for k in topk: 47 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 48 | res.append(correct_k.mul_(100.0 / batch_size)) 49 | return res 50 | 51 | 52 | def get_logger(logger_name="nowgood", filename=None, level=logging.INFO): 53 | logger = logging.getLogger(logger_name) 54 | logger.setLevel(level) 55 | 56 | # 定义handler的输出格式 57 | formatter = logging.Formatter('%(asctime)s %(levelname)s: %(message)s') 58 | 59 | if filename is not None: 60 | # 创建一个handler,用于写入日志文件 61 | fh = logging.FileHandler(filename) 62 | fh.setLevel(level) 63 | fh.setFormatter(formatter) 64 | logger.addHandler(fh) 65 | 66 | # 创建一个handler,用于输出到控制台 67 | ch = logging.StreamHandler() 68 | ch.setLevel(level) 69 | ch.setFormatter(formatter) 70 | # 给logger添加handler 71 | logger.addHandler(ch) 72 | 73 | # disable logger 74 | # logger.setLevel(logger.disabled) 75 | 76 | return logger 77 | -------------------------------------------------------------------------------- /utils/train_val.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | import time 3 | import torch 4 | import os 5 | import shutil 6 | import torch.nn.parallel 7 | import torch.optim 8 | import torch.utils.data 9 | import torch.utils.data.distributed 10 | from utils.meter import AverageMeter, accuracy 11 | import torch 12 | import time 13 | 14 | best_prec1 = 0 15 | 16 | 17 | def train(model, train_loader, criterion, optimizer, gpu, epoch=0, 18 | summary_writer=None, log_per_epoch=100, print_freq=30): 19 | 20 | batch_time = AverageMeter() 21 | data_time = AverageMeter() 22 | losses = AverageMeter() 23 | top1 = AverageMeter() 24 | top5 = AverageMeter() 25 | 26 | # switch to train mode 27 | model.train() 28 | 29 | # if not full_precision: 30 | # qw = QuantizeWeightOrActivation() # 第一步, 创建量化器 31 | end = time.time() 32 | 33 | # 用于控制 tensorboard 的显示频率 34 | interval = len(train_loader) // log_per_epoch 35 | summary_point = [interval * split for split in torch.arange(log_per_epoch)] 36 | 37 | for i, (data, target) in enumerate(train_loader): 38 | data_time.update(time.time() - end) # measure checkpoint.pth data loading time 39 | 40 | if gpu is not None: 41 | data = data.cuda(gpu, non_blocking=True) 42 | target = target.cuda(gpu, non_blocking=True) 43 | 44 | # if not full_precision: 45 | # model.apply(qw.quantize_tanh) # 第二步, 量化权重, 保存全精度权重和量化梯度 46 | 47 | output = model(data) 48 | loss = criterion(output, target) 49 | 50 | # measure accuracy and record loss 51 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 52 | losses.update(loss.item(), data.size(0)) 53 | top1.update(prec1[0], data.size(0)) 54 | top5.update(prec5[0], data.size(0)) 55 | 56 | # compute gradient and do SGD step 57 | optimizer.zero_grad() 58 | loss.backward() 59 | 60 | # if not full_precision: 61 | # model.apply(qw.restore) # 第三步, 反向传播后, 模型梯度计算后, 恢复全精度权重 62 | # model.apply(qw.update_grad) # 第四步, 使用之前存储的量化梯度乘上反向传播的梯度 63 | 64 | optimizer.step() 65 | 66 | # measure elapsed time 67 | batch_time.update(time.time() - end) 68 | end = time.time() 69 | 70 | # 控制台 71 | if i % print_freq == 0: 72 | print('Epoch: [{0}][{1}/{2}]\t' 73 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 74 | 'Data {data_time.val:.3f} ({data_time.avg:.3f})\t' 75 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 76 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 77 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( 78 | epoch, i, len(train_loader), batch_time=batch_time, 79 | data_time=data_time, loss=losses, top1=top1, top5=top5)) 80 | 81 | if summary_writer and (i in summary_point): 82 | step = i//interval + epoch * log_per_epoch 83 | summary_writer.add_scalar("loss/train_loss", loss, step) 84 | summary_writer.add_scalar("train/top-1", top1.avg, step) 85 | summary_writer.add_scalar("train/top-5", top5.avg, step) 86 | 87 | 88 | def validate(model, val_loader, criterion, gpu=0, epoch=0, summary_writer=None, name_prefix=None, print_freq=20): 89 | 90 | batch_time = AverageMeter() 91 | losses = AverageMeter() 92 | top1 = AverageMeter() 93 | top5 = AverageMeter() 94 | 95 | loss_name = "val/loss" 96 | prec1_name = "val/top-1" 97 | prec5_name = "val/top-5" 98 | 99 | if name_prefix is not None: 100 | name_prefix = ''.join((name_prefix, '-')) 101 | loss_name = ''.join((name_prefix, loss_name)) 102 | prec1_name = ''.join((name_prefix, prec1_name)) 103 | prec5_name = ''.join((name_prefix, prec5_name)) 104 | 105 | # 进入 eval 状态 106 | model.eval() 107 | 108 | # if not full_precision: 109 | # qw = QuantizeWeightOrActivation() # 1, 创建量化器 110 | # model.apply(qw.quantize_tanh) # 2, 量化权重, 保存全精度权重和量化梯度 111 | 112 | with torch.no_grad(): 113 | start = time.time() 114 | for i, (data, target) in enumerate(val_loader): 115 | if gpu is not None: 116 | data = data.cuda(gpu, non_blocking=True) 117 | 118 | # batch_size 128时, target size 为 torch.Size([128]) 119 | target = target.cuda(gpu, non_blocking=True) 120 | output = model(data) 121 | loss = criterion(output, target) 122 | 123 | # measure accuracy and record loss 124 | prec1, prec5 = accuracy(output, target, topk=(1, 5)) 125 | losses.update(loss.item(), data.size(0)) 126 | top1.update(prec1[0], data.size(0)) 127 | top5.update(prec5[0], data.size(0)) 128 | 129 | # measure elapsed time 130 | batch_time.update(time.time() - start) 131 | start = time.time() 132 | 133 | if i % print_freq == 0: 134 | print('Test: [{0}/{1}]\t' 135 | 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 136 | 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 137 | 'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t' 138 | 'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format( 139 | i, len(val_loader), batch_time=batch_time, 140 | loss=losses, top1=top1, top5=top5)) 141 | 142 | if summary_writer is not None: 143 | summary_writer.add_scalar(loss_name, losses.avg, epoch) 144 | summary_writer.add_scalar(prec1_name, top1.avg, epoch) 145 | summary_writer.add_scalar(prec5_name, top5.avg, epoch) 146 | 147 | print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(top1=top1, top5=top5)) 148 | 149 | # if not full_precision: 150 | # model.apply(qw.restore) # 第3步, 恢复全精度权重 151 | 152 | return top1.avg 153 | 154 | 155 | def save_checkpoint(state, is_best, save_dir, name_prefix=None, 156 | checkpoint_name='checkpoint.pth.tar', 157 | mode_best_name='model_best.pth.tar', ): 158 | if save_dir: 159 | if not os.path.exists(save_dir): 160 | os.makedirs(save_dir) 161 | print("=> checkpoint directory: {}".format(save_dir)) 162 | 163 | if name_prefix is not None: 164 | name_prefix = ''.join((name_prefix, '-')) 165 | else: 166 | name_prefix = '' 167 | 168 | checkpoint = os.path.join(save_dir, name_prefix + checkpoint_name) 169 | model_best = os.path.join(save_dir, name_prefix + mode_best_name) 170 | 171 | torch.save(state, checkpoint) 172 | if is_best: 173 | shutil.copyfile(checkpoint, model_best) -------------------------------------------------------------------------------- /utils/unzip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dir=./ 4 | 5 | for x in `ls *.tar` 6 | 7 | do 8 | 9 | filename=`basename $x .tar` 10 | 11 | mkdir $filename 12 | 13 | tar -xvf $x -C ./$filename 14 | 15 | done --------------------------------------------------------------------------------