├── README.md ├── experiments └── cifar10 │ ├── densenet100bc │ └── config.yaml │ ├── lenet │ └── config.yaml │ ├── preresnet20 │ └── config.yaml │ └── resnext29_16x64d │ └── config.yaml ├── models ├── __init__.py ├── alexnet.py ├── bam.py ├── cbam.py ├── cbam_resnext.py ├── densenet.py ├── lenet.py ├── mobilenetv1.py ├── mobilenetv2.py ├── preresnet.py ├── resnet.py ├── resnext.py ├── senet.py └── vgg.py ├── train.py ├── train_visdom.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # pytorch_image_classifier_tutorial 2 | This repository uses Pytorch to implement the popular CNN architectures, using the dataset CIFAR. The following is the reference paper: 3 | 4 | ## Architecture 5 | * (lenet)[LeNet-5, convolutional neural networks](http://yann.lecun.com/exdb/lenet/) 6 | * (alexnet) [ImageNet Classification with Deep Convolutional Neural Networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) 7 | * (vgg) [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) 8 | * (resnet) [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) 9 | * (preresnet) [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1512.03385) 10 | * (resnext) [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431) 11 | * (densenet) [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.06993) 12 | * (senet) [Squeeze-and-Excitation Networks](https://arxiv.org/abs/1709.01507) 13 | * (bam) [BAM: Bottleneck Attention Module](https://arxiv.org/abs/1807.06514) 14 | * (cbam) [CBAM: Convolutional Block Attention Module](https://arxiv.org/abs/1807.06521) 15 | * (genet) [Gather-Excite: Exploiting Feature Context in Convolutional Neural Networks](https://arxiv.org/abs/1810.12348) 16 | * (sknet) [SKNet: Selective Kernel Networks](https://arxiv.org/abs/1903.06586) 17 | * (mobilenetV1)[MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications](https://arxiv.org/abs/1704.04861?context=cs) 18 | * (mobilenetV2)[MobileNetV2: Inverted Residuals and Linear Bottlenecks](https://arxiv.org/abs/1801.04381) 19 | 20 | ## Regularization 21 | * (shake-shake) [Shake-Shake regularization](https://arxiv.org/abs/1705.07485) 22 | * (cutout) [Improved Regularization of Convolutional Neural Networks with Cutout](https://arxiv.org/abs/1708.04552) 23 | * (mixup) [mixup: Beyond Empirical Risk Minimization](https://arxiv.org/abs/1710.09412) 24 | 25 | ## Learning Rate Scheduler 26 | * (cos_lr) [SGDR: Stochastic Gradient Descent with Warm Restarts](https://arxiv.org/abs/1608.03983) 27 | * (htd_lr) [Stochastic Gradient Descent with Hyperbolic-Tangent Decay on Classification](https://arxiv.org/abs/1806.01593) 28 | 29 | ## Requirements 30 | * Python >= 3.5 31 | * PyTorch = 0.4 or 1.0 32 | * Tensorboard (if you want to use the tensorboard for visualization) 33 | * pyyaml, easydict, tensorboardX 34 | 35 | ## Usage 36 | Run the command for training as following: 37 | >##1 GPU for lenet 38 | python -u train.py --work-path ./experiments/cifar10/lenet 39 | ##resume from checkpoint 40 | python -u train.py --work-path ./experiments/cifar10/lenet --resume 41 | ##2 GPUs for resnet1202 42 | CUDA_VISIBLE_DEVICES=0,1 python -u train.py --work-path ./experiments/cifar10/preresnet20 43 | ##4 GPUs for densenet190bc 44 | CUDA_VISIBLE_DEVICES=0,1,2,3 python -u train.py --work-path ./experiments/cifar10/densenet100bc 45 | 46 | Feel free to contact me if you have any suggestions or questions, issues are welcome, 47 | create a PR if you find any bugs or you want to contribute.:smile: 48 | 49 | 50 | -------------------------------------------------------------------------------- /experiments/cifar10/densenet100bc/config.yaml: -------------------------------------------------------------------------------- 1 | #net architecture 2 | architecture: densenet100bc 3 | 4 | #log and checkpoint 5 | data_path: ./data 6 | ckpt_path: ./ 7 | ckpt_name: densenet100bc 8 | 9 | #datasets 10 | num_classes: 10 11 | dataset: cifar10 12 | 13 | #training parameters 14 | use_gpu: True 15 | input_size: 32 16 | epochs: 300 17 | batch_size: 64 18 | test_batch: 200 19 | eval_freq: 2 20 | workers: 4 21 | 22 | #optimize 23 | optimize: 24 | momentum: 0.9 25 | weight_decay: 0.0001 26 | nesterov: True 27 | 28 | #regularizatoin 29 | mixup: False 30 | mixup_alpha: 0.4 31 | 32 | augmentation: 33 | normalize: True 34 | random_crop: True 35 | random_horizontal_flip: True 36 | cutout: False 37 | holes: 1 38 | length: 8 39 | 40 | #learning rate scheduler 41 | lr_scheduler: 42 | type: STEP 43 | base_lr: 0.1 44 | lr_epochs: [150,225] 45 | lr_mults: 0.1 46 | min_lr: 0.0 47 | lower_bound: -6.0 48 | upper_bound: 3.0 49 | -------------------------------------------------------------------------------- /experiments/cifar10/lenet/config.yaml: -------------------------------------------------------------------------------- 1 | #net architecture 2 | architecture: lenet 3 | 4 | #log and checkpoint 5 | data_path: ./data 6 | ckpt_path: . 7 | ckpt_name: lenet 8 | 9 | #datasets 10 | num_classes: 10 11 | dataset: cifar10 12 | 13 | #training parameters 14 | use_gpu: True 15 | input_size: 32 16 | epochs: 250 17 | batch_size: 128 18 | test_batch: 200 19 | eval_freq: 2 20 | workers: 4 21 | 22 | #optimizer 23 | optimize: 24 | momentum: 0.9 25 | weight_decay: 0.0001 26 | nesterov: True 27 | 28 | #regularization 29 | mixup: False 30 | mixup_alpha: 0.4 31 | 32 | augmentation: 33 | normalize: True 34 | random_crop: True 35 | random_horizontal_flip: True 36 | cutout: False 37 | holes: 1 38 | length: 8 39 | 40 | #learning rate scheduler 41 | lr_scheduler: 42 | #type: STEP or COSINE or HTD 43 | type: STEP 44 | base_lr: 0.1 45 | #only for STEP 46 | lr_epochs: [100,150,200] 47 | lr_mults: 0.1 48 | #for HTD and COSINE 49 | min_lr: 0.0 50 | #only for HTD 51 | lower_bound: -6.0 52 | upper_bound: 3.0 53 | -------------------------------------------------------------------------------- /experiments/cifar10/preresnet20/config.yaml: -------------------------------------------------------------------------------- 1 | # net architecture 2 | architecture: preresnet20 3 | 4 | # log and checkpoint 5 | data_path: ./data 6 | ckpt_path: ./ 7 | ckpt_name: preresnet20 8 | 9 | # datasets 10 | num_classes: 10 11 | dataset: cifar10 12 | 13 | # training parameters 14 | use_gpu: True 15 | input_size: 32 16 | epochs: 250 17 | batch_size: 128 18 | test_batch: 200 19 | eval_freq: 2 20 | workers: 4 21 | 22 | # optimizer 23 | optimize: 24 | momentum: 0.9 25 | weight_decay: 0.0001 26 | nesterov: True 27 | 28 | # regularization 29 | mixup: False 30 | mixup_alpha: 0.4 31 | 32 | augmentation: 33 | normalize: True 34 | random_crop: True 35 | random_horizontal_flip: True 36 | cutout: False 37 | holes: 1 38 | length: 8 39 | 40 | # learning rate scheduler 41 | lr_scheduler: 42 | #type: SETP or COSINE or HTD 43 | type: STEP 44 | base_lr: 0.1 45 | #only for STEP 46 | lr_epochs: [100, 150, 200] 47 | lr_mults: 0.1 48 | #for HTD and COSINE 49 | min_lr: 0.0 50 | #only for HTD 51 | lower_bound: -6.0 52 | upper_bound: 3.0 -------------------------------------------------------------------------------- /experiments/cifar10/resnext29_16x64d/config.yaml: -------------------------------------------------------------------------------- 1 | # net architecture 2 | architecture: resnext29_16x64d 3 | 4 | # log and checkpoint 5 | data_path: ./data 6 | ckpt_path: ./ 7 | ckpt_name: resnext29_16x64d 8 | 9 | # datasets 10 | num_classes: 10 11 | dataset: cifar10 12 | 13 | # training parameters 14 | use_gpu: True 15 | input_size: 32 16 | epochs: 300 17 | batch_size: 128 18 | test_batch: 200 19 | eval_freq: 2 20 | workers: 4 21 | 22 | # optimizer 23 | optimize: 24 | momentum: 0.9 25 | weight_decay: 0.0005 26 | nesterov: True 27 | 28 | # regularization 29 | mixup: False 30 | mixup_alpha: 0.4 31 | 32 | augmentation: 33 | normalize: True 34 | random_crop: True 35 | random_horizontal_flip: True 36 | cutout: False 37 | holes: 1 38 | length: 8 39 | 40 | # learning rate scheduler 41 | lr_scheduler: 42 | #type: STEP or COSINE or HTD 43 | type: STEP 44 | base_lr: 0.1 45 | #only for STEP 46 | lr_epochs: [150, 225] 47 | lr_mults: 0.1 48 | # for HTD and COSINE 49 | min_lr: 0.0 50 | # only for HTD 51 | lower_bound: -6.0 52 | upper_bound: 3.0 53 | 54 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8 -*- 2 | from .lenet import * 3 | from .alexnet import * 4 | from .vgg import * 5 | from .resnet import * 6 | from .preresnet import * 7 | from .densenet import * 8 | from .resnext import * 9 | from .senet import * 10 | from .mobilenetv1 import * 11 | from .mobilenetv2 import * 12 | 13 | def get_model(config): 14 | return globals()[config.architecture](config.num_classes) 15 | -------------------------------------------------------------------------------- /models/alexnet.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import torch.nn as nn 3 | 4 | __all__ = ['alexnet'] 5 | 6 | class AlexNet(nn.Module): 7 | def __init__(self, num_classes): 8 | super(AlexNet, self).__init__() 9 | self.features = nn.Sequential( 10 | nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=5), 11 | nn.ReLU(inplace=True), 12 | nn.MaxPool2d(kernel_size=2, stride=2), 13 | nn.Conv2d(64, 192, kernel_size=5, padding=2), 14 | nn.ReLU(inplace=True), 15 | nn.MaxPool2d(kernel_size=2, stride=2), 16 | nn.Conv2d(192, 384, kernel_size=3, padding=1), 17 | nn.ReLU(inplace=True), 18 | nn.Conv2d(384, 256, kernel_size=3, padding=1), 19 | nn.ReLU(inplace=True), 20 | nn.Conv2d(256, 256, kernel_size=3, padding=1), 21 | nn.ReLU(inplace=True), 22 | nn.MaxPool2d(kernel_size=2, stride=2), 23 | ) 24 | self.fc = nn.Linear(256, num_classes) 25 | 26 | def forward(self, x): 27 | x = self.features(x) 28 | x = x.view(x.size(0), -1) 29 | x = self.fc(x) 30 | return x 31 | 32 | def alexnet(num_classes): 33 | return AlexNet(num_classes=num_classes) -------------------------------------------------------------------------------- /models/bam.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class Flatten(nn.Module): 7 | def forward(self, x): 8 | return x.view(x.size(0), -1) 9 | 10 | class ChannelGate(nn.Module): 11 | def __init__(self, gate_channel, reduction_ratio=16, num_layers=1): 12 | super(ChannelGate, self).__init__() 13 | self.gate_c = nn.Sequential() 14 | # after avg_pool 15 | self.gate_c.add_module('flatten', Flatten()) 16 | gate_channels = [gate_channel] 17 | gate_channels += [gate_channel // reduction_ratio] * num_layers 18 | gate_channels += [gate_channel] 19 | for i in range(len(gate_channels) - 2): 20 | # fc->bn 21 | self.gate_c.add_module('gate_c_fc_%d'%i, nn.Linear(gate_channels[i], gate_channels[i+1])) 22 | self.gate_c.add_module('gate_c_bn_%d'%(i+1), nn.BatchNorm2d(gate_channels[i+1])) 23 | self.gate_c.add_module('gate_c_relu_%d'%(i+1), nn.ReLU()) 24 | # final_fc 25 | self.gate_c.add_module('gate_c_fc_final', nn.Linear(gate_channels[-2], gate_channels[-1])) 26 | 27 | def forward(self, in_tensor): 28 | # Global avg pool 29 | avg_pool = F.avg_pool2d(in_tensor, in_tensor.size(2), stride=in_tensor.size(2)) 30 | # C∗H∗W -> C*1*1 -> C*H*W 31 | return self.gate_c(avg_pool).unsqueeze(2).unsqueeze(3).expand_as(in_tensor) 32 | 33 | class SpatiaGate(nn.Module): 34 | # dilation value and reduction ratio, set d = 4 r = 16 35 | def __init__(self, gate_channel, reduction_ratio=16, dilation_conv_num=2, dilation_val=4): 36 | self.gate_s = nn.Sequential() 37 | # 1x1 + (3x3)*2 + 1x1 38 | self.gate_s.add_module('gate_s_conv_reduce0', nn.Conv2d(gate_channel, gate_channel // reduction_ratio, kernel_size=1)) 39 | self.gate_s.add_module('gate_s_bn_reduce0', nn.BatchNorm2d(gate_channel // reduction_ratio)) 40 | self.gate_s.add_module('gate_s_relu_reduce0', nn.ReLU()) 41 | for i in range(dilation_conv_num): 42 | self.gate_s.add_module('gate_s_conv_di_%d' % i, nn.Conv2d(gate_channel // reduction_ratio, gate_channel // reduction_ratio, 43 | kernel_size=3, padding=dilation_val, dilation=dilation_val)) 44 | self.gate_s.add_module('gate_s_bn_di_%d' % i, nn.BatchNorm2d(gate_channel // reduction_ratio)) 45 | self.gate_s.add_module('gate_s_relu_di_%d' % i, nn.ReLU()) 46 | self.gate_s.add_module('gate_s_conv_final', nn.Conv2d(gate_channel // reduction_ratio, 1, kernel_size=1)) # 1×H×W 47 | 48 | def forward(self, in_tensor): 49 | return self.gate_s(in_tensor).expand_as(in_tensor) 50 | 51 | class BAM(nn.Module): 52 | def __init__(self, gate_channel): 53 | super(BAM, self).__init__() 54 | self.channel_att = ChannelGate(gate_channel) 55 | self.spatial_att = SpatiaGate(gate_channel) 56 | 57 | def forward(self, in_tensor): 58 | att = 1 + F.sigmoid( self.channel_att(in_tensor) * self.spatial_att(in_tensor)) 59 | return att * in_tensor -------------------------------------------------------------------------------- /models/cbam.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | class BasicConv(nn.Module): 7 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False): 8 | super(BasicConv, self).__init__() 9 | self.out_planes = out_planes 10 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) 11 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)if bn else None 12 | self.relu = nn.ReLU() if relu else None 13 | 14 | def forward(self, x): 15 | x = self.conv(x) 16 | if self.bn is not None: 17 | x = self.bn(x) 18 | if self.relu is not None: 19 | x = self.relu(x) 20 | return x 21 | 22 | class Flatten(nn.Module): 23 | def forward(self, x): 24 | return x.view(x.size(0), -1) 25 | 26 | class ChannelGate(nn.Module): 27 | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']): 28 | super(ChannelGate, self).__init__() 29 | self.gate_channels = gate_channels 30 | self.mlp = nn.Sequential( 31 | Flatten(), 32 | nn.Linear(gate_channels, gate_channels // reduction_ratio), 33 | nn.ReLU(), 34 | nn.Linear(gate_channels // reduction_ratio, gate_channels) 35 | ) 36 | self.pool_types = pool_types 37 | 38 | def forward(self, x): 39 | channel_att_sum = None 40 | for pool_type in self.pool_types: 41 | if pool_type == 'avg': 42 | # avg_pool2d(input, kernel_size, stride=None, padding=0, ceil_mode=False, count_include_pad=True) 43 | avg_pool = F.avg_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))) 44 | channel_att_raw = self.mlp(avg_pool) 45 | elif pool_type == 'max': 46 | max_pool = F.max_pool2d(x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))) 47 | channel_att_raw = self.mlp(max_pool) 48 | elif pool_type == 'lp': 49 | lp_pool = F.lp_pool2d(x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size*(3))) 50 | channel_att_raw = self.mlp(lp_pool) 51 | elif pool_type == 'lse': 52 | # LSE pool 53 | lse_pool = logsumexp_2d(x) 54 | channel_att_raw = self.mlp(lse_pool) 55 | 56 | if channel_att_sum is None: 57 | channel_att_sum = channel_att_raw 58 | else: 59 | channel_att_sum = channel_att_sum + channel_att_raw 60 | scale = F.sigmoid(channel_att_sum).unsqueeze(2).unsqueeze(3).expand_as(x) 61 | return x * scale 62 | 63 | def logsumexp_2d(tensor): 64 | tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1) 65 | s, _ = torch.max(tensor_flatten, dim=2, keepdim=True) 66 | outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log() 67 | return outputs 68 | 69 | class ChannelPool(nn.Module): 70 | def forward(self, x): 71 | return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1) 72 | 73 | class SpatialGate(nn.Module): 74 | def __init__(self): 75 | super(SpatialGate, self).__init__() 76 | kernel_size = 7 77 | self.compress = ChannelPool() 78 | self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False) 79 | 80 | def forward(self, x): 81 | x_compress = self.compress(x) 82 | x_out = self.spatial(x_compress) 83 | scale = F.sigmoid(x_out) 84 | return x * scale 85 | 86 | class CBAM(nn.Module): 87 | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False): 88 | super(CBAM, self).__init__() 89 | self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types) 90 | self.no_spatial = no_spatial 91 | if not no_spatial: 92 | self.SpatialGate = SpatialGate() 93 | 94 | def forward(self, x): 95 | x_out = self.ChannelGate(x) 96 | if not self.no_spatial: 97 | x_out = self.SpatialGate(x_out) 98 | return x_out 99 | -------------------------------------------------------------------------------- /models/cbam_resnext.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | 9 | __all__ = ['cbam_resnext29_8x64d', 'cbam_resnext29_16x64d'] 10 | 11 | 12 | class BasicConv(nn.Module): 13 | def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False): 14 | super(BasicConv, self).__init__() 15 | self.out_channels = out_planes 16 | self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, 17 | stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) 18 | self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, 19 | momentum=0.01, affine=True) if bn else None 20 | self.relu = nn.ReLU() if relu else None 21 | 22 | def forward(self, x): 23 | x = self.conv(x) 24 | if self.bn is not None: 25 | x = self.bn(x) 26 | if self.relu is not None: 27 | x = self.relu(x) 28 | return x 29 | 30 | 31 | class Flatten(nn.Module): 32 | def forward(self, x): 33 | return x.view(x.size(0), -1) 34 | 35 | 36 | class ChannelGate(nn.Module): 37 | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']): 38 | super(ChannelGate, self).__init__() 39 | self.gate_channels = gate_channels 40 | self.mlp = nn.Sequential( 41 | Flatten(), 42 | nn.Linear(gate_channels, gate_channels // reduction_ratio), 43 | nn.ReLU(), 44 | nn.Linear(gate_channels // reduction_ratio, gate_channels) 45 | ) 46 | self.pool_types = pool_types 47 | 48 | def forward(self, x): 49 | channel_att_sum = None 50 | for pool_type in self.pool_types: 51 | if pool_type == 'avg': 52 | avg_pool = F.avg_pool2d( 53 | x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))) 54 | channel_att_raw = self.mlp(avg_pool) 55 | elif pool_type == 'max': 56 | max_pool = F.max_pool2d( 57 | x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))) 58 | channel_att_raw = self.mlp(max_pool) 59 | elif pool_type == 'lp': 60 | lp_pool = F.lp_pool2d( 61 | x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3))) 62 | channel_att_raw = self.mlp(lp_pool) 63 | elif pool_type == 'lse': 64 | # LSE pool only 65 | lse_pool = logsumexp_2d(x) 66 | channel_att_raw = self.mlp(lse_pool) 67 | 68 | if channel_att_sum is None: 69 | channel_att_sum = channel_att_raw 70 | else: 71 | channel_att_sum = channel_att_sum + channel_att_raw 72 | 73 | scale = torch.sigmoid(channel_att_sum).unsqueeze( 74 | 2).unsqueeze(3).expand_as(x) 75 | return x * scale 76 | 77 | 78 | def logsumexp_2d(tensor): 79 | tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1) 80 | s, _ = torch.max(tensor_flatten, dim=2, keepdim=True) 81 | outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log() 82 | return outputs 83 | 84 | 85 | class ChannelPool(nn.Module): 86 | def forward(self, x): 87 | return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1) 88 | 89 | 90 | class SpatialGate(nn.Module): 91 | def __init__(self): 92 | super(SpatialGate, self).__init__() 93 | kernel_size = 7 94 | self.compress = ChannelPool() 95 | self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=( 96 | kernel_size-1) // 2, relu=False) 97 | 98 | def forward(self, x): 99 | x_compress = self.compress(x) 100 | x_out = self.spatial(x_compress) 101 | scale = torch.sigmoid(x_out) # broadcasting 102 | return x * scale 103 | 104 | 105 | class CBAM(nn.Module): 106 | def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False): 107 | super(CBAM, self).__init__() 108 | self.ChannelGate = ChannelGate( 109 | gate_channels, reduction_ratio, pool_types) 110 | self.no_spatial = no_spatial 111 | if not no_spatial: 112 | self.SpatialGate = SpatialGate() 113 | 114 | def forward(self, x): 115 | x_out = self.ChannelGate(x) 116 | if not self.no_spatial: 117 | x_out = self.SpatialGate(x_out) 118 | return x_out 119 | 120 | 121 | class Bottleneck(nn.Module): 122 | 123 | def __init__(self, in_channels, out_channels, stride, cardinality, base_width, expansion): 124 | 125 | super(Bottleneck, self).__init__() 126 | width_ratio = out_channels / (expansion * 64.) 127 | D = cardinality * int(base_width * width_ratio) 128 | 129 | self.relu = nn.ReLU(inplace=True) 130 | self.cbam_module = CBAM(out_channels) 131 | 132 | self.conv_reduce = nn.Conv2d( 133 | in_channels, D, kernel_size=1, stride=1, padding=0, bias=False) 134 | self.bn_reduce = nn.BatchNorm2d(D) 135 | self.conv_conv = nn.Conv2d( 136 | D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) 137 | self.bn = nn.BatchNorm2d(D) 138 | self.conv_expand = nn.Conv2d( 139 | D, out_channels, kernel_size=1, stride=1, padding=0, bias=False) 140 | self.bn_expand = nn.BatchNorm2d(out_channels) 141 | 142 | self.shortcut = nn.Sequential() 143 | if in_channels != out_channels: 144 | self.shortcut.add_module('shortcut_conv', 145 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, 146 | bias=False)) 147 | self.shortcut.add_module( 148 | 'shortcut_bn', nn.BatchNorm2d(out_channels)) 149 | 150 | def forward(self, x): 151 | out = self.conv_reduce.forward(x) 152 | out = self.relu(self.bn_reduce.forward(out)) 153 | out = self.conv_conv.forward(out) 154 | out = self.relu(self.bn.forward(out)) 155 | out = self.conv_expand.forward(out) 156 | out = self.bn_expand.forward(out) 157 | 158 | residual = self.shortcut.forward(x) 159 | 160 | out = self.cbam_module(out) + residual 161 | out = self.relu(out) 162 | return out 163 | 164 | 165 | class SeResNeXt(nn.Module): 166 | def __init__(self, cardinality, depth, num_classes, base_width, expansion=4): 167 | super(SeResNeXt, self).__init__() 168 | self.cardinality = cardinality 169 | self.depth = depth 170 | self.block_depth = (self.depth - 2) // 9 171 | self.base_width = base_width 172 | self.expansion = expansion 173 | self.num_classes = num_classes 174 | self.output_size = 64 175 | self.stages = [64, 64 * self.expansion, 128 * 176 | self.expansion, 256 * self.expansion] 177 | 178 | self.conv_1_3x3 = nn.Conv2d(3, 64, 3, 1, 1, bias=False) 179 | self.bn_1 = nn.BatchNorm2d(64) 180 | self.stage_1 = self.block('stage_1', self.stages[0], self.stages[1], 1) 181 | self.stage_2 = self.block('stage_2', self.stages[1], self.stages[2], 2) 182 | self.stage_3 = self.block('stage_3', self.stages[2], self.stages[3], 2) 183 | self.fc = nn.Linear(self.stages[3], num_classes) 184 | for m in self.modules(): 185 | if isinstance(m, nn.Conv2d): 186 | nn.init.kaiming_normal_(m.weight.data) 187 | elif isinstance(m, nn.BatchNorm2d): 188 | m.weight.data.fill_(1) 189 | m.bias.data.zero_() 190 | 191 | def block(self, name, in_channels, out_channels, pool_stride=2): 192 | block = nn.Sequential() 193 | for bottleneck in range(self.block_depth): 194 | name_ = '%s_bottleneck_%d' % (name, bottleneck) 195 | if bottleneck == 0: 196 | block.add_module(name_, Bottleneck(in_channels, out_channels, pool_stride, self.cardinality, 197 | self.base_width, self.expansion)) 198 | else: 199 | block.add_module(name_, 200 | Bottleneck(out_channels, out_channels, 1, self.cardinality, self.base_width, 201 | self.expansion)) 202 | return block 203 | 204 | def forward(self, x): 205 | x = self.conv_1_3x3.forward(x) 206 | x = F.relu(self.bn_1.forward(x), inplace=True) 207 | x = self.stage_1.forward(x) 208 | x = self.stage_2.forward(x) 209 | x = self.stage_3.forward(x) 210 | x = F.avg_pool2d(x, 8, 1) 211 | x = x.view(-1, self.stages[3]) 212 | return self.fc(x) 213 | 214 | 215 | def cbam_resnext29_8x64d(num_classes): 216 | return SeResNeXt(cardinality=8, depth=29, num_classes=num_classes, base_width=64) 217 | 218 | 219 | def cbam_resnext29_16x64d(num_classes): 220 | return SeResNeXt(cardinality=16, depth=29, num_classes=num_classes, base_width=64) 221 | -------------------------------------------------------------------------------- /models/densenet.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | __all__ = ['densenet100bc','densenet190bc'] 9 | 10 | class Bottleneck(nn.Module): 11 | def __init__(self, in_planes, growth_rate): 12 | super(Bottleneck,self).__init__() 13 | self.bn_1 = nn.BatchNorm2d(in_planes) 14 | self.conv_1 = nn.Conv2d(in_planes,growth_rate*4,kernel_size=1,bias=False) #growth_rate用来控制特征图的数量 15 | self.bn_2 = nn.BatchNorm2d(growth_rate*4) 16 | self.conv_2 = nn.Conv2d(growth_rate*4,growth_rate,kernel_size=3,padding=1,bias=False) 17 | 18 | def forward(self,x): 19 | out = self.conv_1(F.relu(self.bn_1(x))) 20 | out = self.conv_2(F.relu(self.bn_2(out))) 21 | out = torch.cat([out,x], 1) 22 | return out 23 | 24 | #通常在过渡阶段将输出缩小到原来的一半reduction=0.5 25 | class Transition(nn.Module): 26 | def __init__(self,in_planes,out_planes): 27 | super(Transition, self).__init__() 28 | self.bn = nn.BatchNorm2d(in_planes) 29 | self.conv = nn.Conv2d(in_planes,out_planes,kernel_size=1,bias=False) 30 | 31 | def forward(self,x): 32 | out = self.conv(F.relu(self.bn(x))) 33 | out = F.avg_pool2d(out,2) 34 | return out 35 | 36 | class DenseNet(nn.Module): 37 | def __init__(self, block, depth, growth_rate=12, reduction=0.5, num_classes=10): 38 | super(DenseNet,self).__init__() 39 | self.growth_rate = growth_rate 40 | 41 | nblocks = (depth - 4)//6 #这里代表的是在有多少个Bottleneck 42 | num_planes = 2*growth_rate #一开始先设置为growth_rate的两倍,最后经过第一个dense后变为一个growth_rate 43 | self.conv1 = nn.Conv2d(3,num_planes,kernel_size=3,padding=1,bias=False) 44 | 45 | self.dense1 = self._make_dense_layers(block,num_planes,nblocks) 46 | num_planes += nblocks*growth_rate #由于最后还是要接收前面的特征图作为输入,因为新的输入还是很大 47 | out_planes = int(math.floor(num_planes*reduction)) #math.floor()返回数字的下舍整数,这里进行减半操作 48 | self.tran_1 = Transition(num_planes,out_planes) 49 | num_planes = out_planes 50 | 51 | self.dense2 = self._make_dense_layers(block,num_planes,nblocks) 52 | num_planes += nblocks*growth_rate 53 | out_planes = int(math.floor(num_planes*reduction)) 54 | self.tran_2 = Transition(num_planes,out_planes) 55 | num_planes = out_planes 56 | 57 | self.dense3 = self._make_dense_layers(block,num_planes,nblocks) 58 | num_planes += nblocks*growth_rate 59 | 60 | self.bn = nn.BatchNorm2d(num_planes) 61 | self.fc = nn.Linear(num_planes,num_classes) 62 | 63 | for m in self.modules(): 64 | if isinstance(m,nn.Conv2d): 65 | nn.init.kaiming_normal_(m.weight.data) 66 | elif isinstance(m,nn.BatchNorm2d): 67 | m.weight.data.fill_(1) 68 | m.bias.data.zero_() 69 | 70 | def _make_dense_layers(self,block,in_planes,nblock): 71 | layers = [] 72 | for _ in range(nblock): 73 | layers.append(block(in_planes,self.growth_rate)) 74 | in_planes += self.growth_rate 75 | return nn.Sequential(*layers) 76 | 77 | def forward(self,x): 78 | out = self.conv1(x) 79 | out = self.tran_1(self.dense1(out)) 80 | out = self.tran_2(self.dense2(out)) 81 | out = self.dense3(out) 82 | out = F.avg_pool2d(F.relu(self.bn(out)),8) 83 | out = out.view(out.size(0),-1) 84 | out = self.fc(out) 85 | return out 86 | 87 | def densenet100bc(num_classes): 88 | return DenseNet(Bottleneck,depth=100,growth_rate=12,num_classes=num_classes) 89 | 90 | def densenet190bc(num_classes): 91 | return DenseNet(Bottleneck,depth=190,growth_rate=40,num_classes=num_classes) 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /models/lenet.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | __all__ = ['lenet'] 7 | 8 | class LeNet(nn.Module): 9 | def __init__(self,num_classes=0): 10 | super(LeNet,self).__init__() 11 | self.conv1 = nn.Conv2d(3,6,5) 12 | self.conv2 = nn.Conv2d(6,16,5) 13 | self.fc_1 = nn.Linear(16*5*5,120) 14 | self.fc_2 = nn.Linear(120,84) 15 | self.fc_3 = nn.Linear(84,num_classes) 16 | 17 | def forward(self,x): 18 | out = F.relu(self.conv1(x)) 19 | out = F.max_pool2d(out,2) 20 | out = F.relu(self.conv2(out)) 21 | out = F.max_pool2d(out,2) 22 | out = out.view(out.size(0),-1) 23 | out = F.relu(self.fc_1(out)) 24 | out = F.relu(self.fc_2(out)) 25 | out = self.fc_3(out) 26 | return out 27 | 28 | def lenet(num_classes): 29 | return LeNet(num_classes=num_classes) -------------------------------------------------------------------------------- /models/mobilenetv1.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import torch 3 | import torch.nn as nn 4 | 5 | class dw_conv(nn.Module): 6 | #Depthwise convolution,currently slow to train in Pytorch 7 | def __init__(self, in_dim, out_dim, stride): 8 | super(dw_conv, self).__init__() 9 | self.dw_conv_k3 = nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=stride, groups=in_dim, bias=False) 10 | self.bn = nn.BatchNorm2d(out_dim) 11 | self.relu = nn.ReLU(inplace=True) 12 | 13 | def forward(self, x): 14 | x = self.dw_conv_k3(x) 15 | x = self.bn(x) 16 | x = self.relu(x) 17 | return x 18 | 19 | class point_conv(nn.Module): 20 | #Pointwise 1x1 convolution 21 | def __init__(self, in_dim, out_dim): 22 | super(point_conv, self).__init__() 23 | self.p_conv_k1 = nn.Conv2d(in_dim, out_dim, kernel_size=1, bias=False) 24 | self.bn = nn.BatchNorm2d(out_dim) 25 | self.relu = nn.ReLU(inplace=True) 26 | 27 | def forward(self, x): 28 | x = self.p_conv_k1(x) 29 | x = self.bn(x) 30 | x = self.relu(x) 31 | return x 32 | 33 | class MobileNets(nn.Module): 34 | def __init__(self, num_classes): 35 | super(MobileNets, self).__init__() 36 | self.num_classes = num_classes 37 | self.features = nn.Sequential( 38 | nn.Conv2d(3, 32, kernel_size=3, stride=1), 39 | nn.ReLU(inplace=True), 40 | dw_conv(32, 32, 1), 41 | point_conv(32, 64), 42 | dw_conv(64, 64, 1), 43 | point_conv(64, 128), 44 | dw_conv(128, 128, 1), 45 | point_conv(128, 128), 46 | dw_conv(128, 128, 1), 47 | point_conv(128, 256), 48 | dw_conv(256, 256, 1), 49 | point_conv(256, 256), 50 | dw_conv(256, 256, 1), 51 | point_conv(256, 512), 52 | dw_conv(512, 512, 1), 53 | point_conv(512, 512), 54 | dw_conv(512, 512, 1), 55 | point_conv(512, 512), 56 | dw_conv(512, 512, 1), 57 | point_conv(512, 512), 58 | dw_conv(512, 512, 1), 59 | point_conv(512, 512), 60 | dw_conv(512, 512, 1), 61 | point_conv(512, 512), 62 | dw_conv(512, 512, 1), 63 | point_conv(512, 1024), 64 | dw_conv(1024, 1024, 1), 65 | point_conv(1024, 1024), 66 | nn.AvgPool2d(4), 67 | ) 68 | self.fc = nn.Linear(1024, num_classes) 69 | 70 | def forward(self, x): 71 | x = self.features(x) 72 | x = x.view(-1, 1024) 73 | x = self.fc(x) 74 | return x 75 | 76 | def mobilenet(num_classes): 77 | return MobileNets(num_classes=num_classes) -------------------------------------------------------------------------------- /models/mobilenetv2.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import torch.nn as nn 3 | import math 4 | 5 | __all__ = ['MobileNetV2', 'mobilenetv2_19'] 6 | 7 | #conv1x1->Dwise3x3->conv1x1 8 | class Bottleneck(nn.Module): 9 | def __init__(self, inplanes, planes, stride=1, downsample=None, expansion=1): 10 | super(Bottleneck, self).__init__() 11 | self.conv1 = nn.Conv2d(inplanes, inplanes*expansion, kernel_size=1, bias=False) 12 | self.bn1 = nn.BatchNorm2d(inplanes*expansion) 13 | self.conv2 = nn.Conv2d(inplanes*expansion, inplanes*expansion, kernel_size=3, stride=3, padding=1, 14 | bias=False, groups=inplanes*expansion) 15 | self.bn2 = nn.BatchNorm2d(inplanes*expansion) 16 | self.conv3 = nn.Conv2d(inplanes*expansion, planes, kernel_size=1, bias=False) 17 | self.bn3 = nn.BatchNorm2d(planes) 18 | self.relu = nn.ReLU(inplace=True) 19 | self.downsample = downsample 20 | self.stride = stride 21 | 22 | def forward(self, x): 23 | residual = x 24 | 25 | out = self.conv1(x) 26 | out = self.bn1(out) 27 | out = self.relu(out) 28 | 29 | out = self.conv2(out) 30 | out = self.bn2(out) 31 | out = self.relu(out) 32 | 33 | out = self.conv3(out) 34 | out = self.bn3(out) 35 | 36 | if self.downsample is not None: 37 | residual = self.downsample(x) 38 | 39 | out += residual 40 | out = self.relu(out) 41 | 42 | return out 43 | 44 | class MobileNetV2(nn.Module): 45 | def __init__(self, block, layers, num_classes=1000): 46 | self.inplanes = 32 47 | super(MobileNetV2, self).__init__() 48 | self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1, bias=False) 49 | self.bn1 = nn.BatchNorm2d(32) 50 | self.relu = nn.ReLU(inplace=True) 51 | self.layer1 = self._make_layer(block, 16, layers[0], stride=1, expansion=1) 52 | self.layer2 = self._make_layer(block, 24, layers[1], stride=2, expansion=6) 53 | self.layer3 = self._make_layer(block, 32, layers[2], stride=2, expansion=6) 54 | self.layer4 = self._make_layer(block, 64, layers[3], stride=2, expansion=6) 55 | self.layer5 = self._make_layer(block, 96, layers[4], stride=1, expansion=6) 56 | self.layer6 = self._make_layer(block, 160, layers[5], stride=2, expansion=6) 57 | self.layer7 = self._make_layer(block, 320, layers[6], stride=1, expansion=6) 58 | 59 | self.conv8 = nn.Conv2d(320, 1280, kernel_size=1, stride=1, bias=False) 60 | self.avgpool = nn.AvgPool2d(7, stride=1) 61 | self.conv9 = nn.Conv2d(1280, num_classes, kernel_size=1, stride=1, bias=False) 62 | 63 | for m in self.modules(): 64 | if isinstance(m, nn.Conv2d): 65 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 66 | m.weight.data.normal_(0, math.sqrt(2./n)) 67 | elif isinstance(m, nn.BatchNorm2d): 68 | m.weight.data.fill_(1) 69 | m.bias.data.zero_() 70 | 71 | def _make_layer(self, block, planes, blocks, stride, expansion): 72 | downsample = nn.Sequential( 73 | nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride, bias=False), 74 | nn.BatchNorm2d(planes), 75 | ) 76 | 77 | layers = [] 78 | layers.append(block(self.inplanes, planes, stride=stride, downsample=downsample, expansion=expansion)) 79 | self.inplanes = planes 80 | for i in range(1,blocks): 81 | layers.append(block(self.inplanes,planes,expansion)) 82 | return nn.Sequential(*layers) 83 | 84 | def mobilenetv2_19(num_classess): 85 | return MobileNetV2(Bottleneck, [1, 2, 3, 4, 3, 3, 1], num_classes=num_classess) -------------------------------------------------------------------------------- /models/preresnet.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8 -*- 2 | 3 | import torch.nn as nn 4 | 5 | __all__ = ['preresnet20', 'preresnet32', 'preresnet44', 6 | 'preresnet56', 'preresnet110', 'preresnet1202'] 7 | 8 | def conv3x3(in_planes, out_planes, stride=1): 9 | #3x3 convolution with padding 10 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False) 11 | 12 | class BasicBlock(nn.Module): 13 | expansion = 1 14 | 15 | def __init__(self, inplanes, planes, stride=1, downsample=None): 16 | super(BasicBlock, self).__init__() 17 | self.bn_1 = nn.BatchNorm2d(inplanes) 18 | self.relu = nn.ReLU(inplace=True) 19 | self.conv_1 = conv3x3(inplanes, planes, stride) 20 | self.bn_2 = nn.BatchNorm2d(planes) 21 | self.conv_2 = conv3x3(planes, planes) 22 | self.downsample = downsample 23 | self.stride = stride 24 | 25 | def forward(self, x): 26 | residual = x 27 | 28 | #bn->relu->conv->bn->relu->conv 29 | out = self.bn_1(x) 30 | out = self.relu(out) 31 | out = self.conv_1(out) 32 | 33 | out = self.bn_2(out) 34 | out = self.relu(out) 35 | out = self.conv_2(out) 36 | 37 | if self.downsample is not None: 38 | residual = self.downsample(x) 39 | 40 | out += residual 41 | 42 | return out 43 | 44 | class Bottleneck(nn.Module): 45 | expansion = 4 46 | 47 | def __init__(self, inplanes, planes, stride=1, downsample=None): 48 | super(Bottleneck, self).__init__() 49 | self.bn_1 = nn.BatchNorm2d(inplanes) 50 | self.conv_1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 51 | self.bn_2 = nn.BatchNorm2d(planes) 52 | self.conv_2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 53 | self.bn_3 = nn.BatchNorm2d(planes) 54 | self.conv_3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 55 | self.relu = nn.ReLU(inplace=True) 56 | self.downsample = downsample 57 | self.stride = stride 58 | 59 | def forward(self, x): 60 | residual = x 61 | 62 | out = self.bn_1(x) 63 | out = self.relu(out) 64 | out = self.conv_1(out) 65 | 66 | out = self.bn_2(out) 67 | out = self.relu(out) 68 | out = self.conv_2(out) 69 | 70 | out = self.bn_3(out) 71 | out = self.relu(out) 72 | out = self.conv_3(out) 73 | 74 | if self.downsample is not None: 75 | residual = self.downsample(x) 76 | 77 | out += residual 78 | 79 | return out 80 | 81 | class PreResNet(nn.Module): 82 | 83 | def __init__(self, depth, num_classes=1000, block_name='BasicBlock'): 84 | super(PreResNet, self).__init__() 85 | #Model type specifies number of layers for CIFAR-10 model 86 | if block_name.lower() == 'basicblock': 87 | assert (depth - 2) % 6 == 0, 'When use basicblock, depth should be 6m+2, e.g. 20, 32, 44, 56,110, 1202' 88 | n = (depth - 2) // 6 89 | block = BasicBlock 90 | elif block_name.lower() == 'bottleneck': 91 | assert (depth - 2) % 9 == 0, 'When use bottleneck,depth should be 9n+2, e.g. 20, 29, 47, 56,110,1199' 92 | n = (depth - 2) // 9 93 | block = Bottleneck 94 | else: 95 | raise ValueError('block_name should be Basicblock or Bottleneck') 96 | 97 | self.inplanes = 16 98 | self.conv_1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False) 99 | self.layer1 = self._make_layer(block, 16, n) 100 | self.layer2 = self._make_layer(block, 32, n, stride=2) 101 | self.layer3 = self._make_layer(block, 64, n, stride=2) 102 | self.bn = nn.BatchNorm2d(64 * block.expansion) 103 | self.relu = nn.ReLU(inplace=True) 104 | self.avgpool = nn.AvgPool2d(8) 105 | self.fc = nn.Linear(64 * block.expansion, num_classes) 106 | 107 | for m in self.modules(): 108 | if isinstance(m, nn.Conv2d): 109 | nn.init.kaiming_normal_(m.weight.data) 110 | elif isinstance(m, nn.BatchNorm2d): 111 | m.weight.data.fill_(1) 112 | m.bias.data.zero_() 113 | 114 | def _make_layer(self, block, planes, blocks, stride=1): 115 | downsample = None 116 | if stride != 1 or self.inplanes != planes * block.expansion: 117 | downsample = nn.Sequential( 118 | nn.Conv2d(self.inplanes, planes*block.expansion, kernel_size=1, stride=stride, bias=False) 119 | ) 120 | 121 | layers = [] 122 | layers.append(block(self.inplanes, planes, stride, downsample)) 123 | self.inplanes = planes * block.expansion 124 | for _ in range(1, blocks): 125 | layers.append(block(self.inplanes, planes)) 126 | 127 | return nn.Sequential(*layers) 128 | 129 | def forward(self, x): 130 | x = self.conv_1(x) #32*32 131 | 132 | x = self.layer1(x) #32*32 133 | x = self.layer2(x) #16*16 134 | x = self.layer3(x) #8*8 135 | x = self.bn(x) 136 | x = self.relu(x) 137 | 138 | x = self.avgpool(x) 139 | x = x.view(x.size(0), -1) 140 | x = self.fc(x) 141 | 142 | return x 143 | 144 | def preresnet20(num_classes): 145 | return PreResNet(depth=20, num_classes=num_classes) 146 | 147 | def preresnet32(num_classes): 148 | return PreResNet(depth=32, num_classes=num_classes) 149 | 150 | def preresnet44(num_classes): 151 | return PreResNet(depth=44, num_classes=num_classes) 152 | 153 | def preresnet56(num_classes): 154 | return PreResNet(depth=56, num_classes=num_classes) 155 | 156 | def preresnet110(num_classes): 157 | return PreResNet(depth=110, num_classes=num_classes) 158 | 159 | def preresnet1202(num_classes): 160 | return PreResNet(depth=1202, num_classes=num_classes) 161 | 162 | -------------------------------------------------------------------------------- /models/resnet.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import math 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | __all__ = ['resnet20', 'resnet32', 'resnet44', 'resnet56', 'resnet110', 'resnet1202'] 9 | 10 | def conv3x3(in_planes, out_planes, stride): 11 | return nn.Conv2d(in_planes, out_planes, stride=stride, kernel_size=3, padding=1, bias=False) 12 | 13 | class BasicBlock(nn.Module): 14 | expansion = 1 15 | def __init__(self, in_planes, planes, stride=1, downsample=None): 16 | super(BasicBlock, self).__init__() 17 | self.conv_1 = conv3x3(in_planes, planes, stride) 18 | self.bn_1 = nn.BatchNorm2d(planes) 19 | self.relu = nn.ReLU(in_planes=True) 20 | self.conv_2 = conv3x3(planes, planes) 21 | self.bn_2 = nn.BatchNorm2d(planes) 22 | self.downsample = downsample 23 | self.stride = stride 24 | 25 | def forward(self, x): 26 | residual = x 27 | out = self.conv_1(x) 28 | out = self.bn_1(out) 29 | out = self.relu(out) 30 | 31 | out = self.conv_2(out) 32 | out = self.bn_2(out) 33 | 34 | if self.downsample is not None: 35 | residual = self.downsample(x) 36 | 37 | out += residual 38 | out = self.relu(out) 39 | return out 40 | 41 | class Bottleneck(nn.Module): 42 | expansion = 4 43 | def __init__(self, inplanes, planes, stride=1, downsample=None): 44 | super(Bottleneck, self).__init__() 45 | self.conv_1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) 46 | self.bn_1 = nn.BatchNorm2d(planes) 47 | self.conv_2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 48 | self.bn_2 = nn.BatchNorm2d(planes) 49 | self.conv_3 = nn.Conv2d(planes, planes*4, kernel_size=1, bias=False) 50 | self.bn_3 = nn.BatchNorm2d(planes*4) 51 | self.relu = nn.ReLU(inplace=True) 52 | self.downsample = downsample 53 | self.stride = stride 54 | 55 | def forward(self, x): 56 | residual = x 57 | 58 | out = self.conv_1(x) 59 | out = self.bn_1(out) 60 | out = self.relu(out) 61 | 62 | out = self.conv_2(out) 63 | out = self.bn_2(out) 64 | out = self.relu(out) 65 | 66 | out = self.conv_3(out) 67 | out = self.bn_3(out) 68 | 69 | if self.downsample is not None: 70 | residual = self.downsample(x) 71 | 72 | out += residual 73 | out = self.relu(out) 74 | 75 | return out 76 | 77 | class ResNet(nn.Module): 78 | def __init__(self, depth, num_classes, block_name='BasicBlock'): 79 | super(ResNet, self).__init__() 80 | if block_name == 'BasicBlock': 81 | assert (depth - 2) % 6 == 0, 'depth should be 6n+2, e.g 20,32,44,56,110,1202' 82 | n = (depth - 2) // 6 83 | block = BasicBlock 84 | elif block_name == 'Bottleneck': 85 | assert (depth - 2) % 9 == 0, 'depth should be 9n+2,e.g. 20,29,47,56,110,1199' 86 | n = (depth - 2) // 9 87 | block = Bottleneck 88 | else: 89 | raise ValueError('block_name should be Basicblock or Bottleneck') 90 | 91 | self.inplanes = 16 92 | self.conv_1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False) 93 | self.bn_1 = nn.BatchNorm2d(16) 94 | self.relu = nn.ReLU(inplace=True) 95 | self.stage_1 = self._make_layer(block, 16, n) 96 | self.stage_2 = self._make_layer(block, 32, n, stride=2) 97 | self.stage_3 = self._make_layer(block, 64, n, stride=2) 98 | self.avgpool = nn.AvgPool2d(8) 99 | self.fc = nn.Linear(64 * block.expansion, num_classes) 100 | 101 | for m in self.modules(): 102 | if isinstance(m, nn.Conv2d): 103 | #nn.init.xavier_normal_(m.weight.data) 104 | nn.init.kaiming_normal_(m.weight.data) 105 | elif isinstance(m, nn.BatchNorm2d): 106 | m.weight.data.fill_(1) 107 | m.bias.data.zero_() 108 | 109 | def _make_layer(self, block, planes, blocks, stride=1): 110 | downsample = None 111 | if stride != 1 or self.inplanes != planes*block.expansion: 112 | downsample = nn.Sequential( 113 | nn.Conv2d(self.in_planes, planes*block.expansion, kernel_size=1, stride=stride,bias=False), 114 | nn.BatchNorm2d(planes*block.expansion), 115 | ) 116 | layers = [] 117 | layers.append(block(self.inplanes, planes, stride, downsample)) 118 | self.inplanes = planes * block.expansion 119 | for i in range(1, blocks): 120 | layers.append(block(self.inplanes, planes)) 121 | 122 | return nn.Sequential(*layers) 123 | 124 | def forward(self, x): 125 | x = self.conv_1(x) 126 | x = self.bn_1(x) 127 | x = self.relu(x) #32*32 128 | 129 | x = self.stage_1(x) #32*32 130 | x = self.stage_2(x) #16*16 131 | x = self.stage_3(x) #8*8 132 | 133 | x = self.avgpool(x) 134 | x = x.view(x.size(0), -1) 135 | x = self.fc(x) 136 | 137 | return x 138 | 139 | def resnet20(num_classes): 140 | return ResNet(depth=20, num_classes=num_classes) 141 | 142 | def resnet32(num_classes): 143 | return ResNet(depth=32, num_classes=num_classes) 144 | 145 | def resnet44(num_classes): 146 | return ResNet(depth=44, num_classes=num_classes) 147 | 148 | def resnet56(num_classes): 149 | return ResNet(depth=56, num_classes=num_classes) 150 | 151 | def resnet110(num_classes): 152 | return ResNet(depth=110, num_classes=num_classes) 153 | 154 | def resnet1202(num_classes): 155 | return ResNet(depth=1202, num_classes=num_classes) -------------------------------------------------------------------------------- /models/resnext.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | __all__ = ['resnext29_8x64d', 'resnext29_16x64d'] 7 | 8 | 9 | class Bottleneck(nn.Module): 10 | 11 | def __init__(self, in_channels, out_channels, stride, cardinality, base_width, expansion): 12 | """ Constructor 13 | Args: 14 | in_channels: input channel dimensionality 15 | out_channels: output channel dimensionality 16 | stride: conv stride. Replaces pooling layer. 17 | cardinality: num of convolution groups. 18 | base_width: base number of channels in each group. 19 | expansion: factor to reduce the input dimensionality before convolution. 20 | """ 21 | super(Bottleneck, self).__init__() 22 | width_ratio = out_channels / (expansion * 64.) 23 | D = cardinality * int(base_width * width_ratio) 24 | 25 | self.relu = nn.ReLU(inplace=True) 26 | 27 | self.conv_reduce = nn.Conv2d( 28 | in_channels, D, kernel_size=1, stride=1, padding=0, bias=False) 29 | self.bn_reduce = nn.BatchNorm2d(D) 30 | self.conv_conv = nn.Conv2d( 31 | D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) 32 | self.bn = nn.BatchNorm2d(D) 33 | self.conv_expand = nn.Conv2d( 34 | D, out_channels, kernel_size=1, stride=1, padding=0, bias=False) 35 | self.bn_expand = nn.BatchNorm2d(out_channels) 36 | 37 | self.shortcut = nn.Sequential() 38 | if in_channels != out_channels: 39 | self.shortcut.add_module('shortcut_conv', 40 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, 41 | bias=False)) 42 | self.shortcut.add_module( 43 | 'shortcut_bn', nn.BatchNorm2d(out_channels)) 44 | 45 | def forward(self, x): 46 | out = self.conv_reduce.forward(x) 47 | out = self.relu(self.bn_reduce.forward(out)) 48 | out = self.conv_conv.forward(out) 49 | out = self.relu(self.bn.forward(out)) 50 | out = self.conv_expand.forward(out) 51 | out = self.bn_expand.forward(out) 52 | residual = self.shortcut.forward(x) 53 | return self.relu(residual + out) 54 | 55 | 56 | class ResNeXt(nn.Module): 57 | """ 58 | ResNext optimized for the Cifar dataset, as specified in 59 | https://arxiv.org/pdf/1611.05431.pdf 60 | """ 61 | 62 | def __init__(self, cardinality, depth, num_classes, base_width, expansion=4): 63 | """ Constructor 64 | Args: 65 | cardinality: number of convolution groups. 66 | depth: number of layers. 67 | num_classes: number of classes 68 | base_width: base number of channels in each group. 69 | expansion: factor to adjust the channel dimensionality 70 | """ 71 | super(ResNeXt, self).__init__() 72 | self.cardinality = cardinality 73 | self.depth = depth 74 | self.block_depth = (self.depth - 2) // 9 75 | self.base_width = base_width 76 | self.expansion = expansion 77 | self.num_classes = num_classes 78 | self.output_size = 64 79 | self.stages = [64, 64 * self.expansion, 128 * 80 | self.expansion, 256 * self.expansion] 81 | 82 | self.conv_1_3x3 = nn.Conv2d(3, 64, 3, 1, 1, bias=False) 83 | self.bn_1 = nn.BatchNorm2d(64) 84 | self.stage_1 = self.block('stage_1', self.stages[0], self.stages[1], 1) 85 | self.stage_2 = self.block('stage_2', self.stages[1], self.stages[2], 2) 86 | self.stage_3 = self.block('stage_3', self.stages[2], self.stages[3], 2) 87 | self.fc = nn.Linear(self.stages[3], num_classes) 88 | for m in self.modules(): 89 | if isinstance(m, nn.Conv2d): 90 | nn.init.kaiming_normal_(m.weight.data) 91 | elif isinstance(m, nn.BatchNorm2d): 92 | m.weight.data.fill_(1) 93 | m.bias.data.zero_() 94 | 95 | def block(self, name, in_channels, out_channels, pool_stride=2): 96 | """ Stack n bottleneck modules where n is inferred from the depth of the network. 97 | Args: 98 | name: string name of the current block. 99 | in_channels: number of input channels 100 | out_channels: number of output channels 101 | pool_stride: factor to reduce the spatial dimensionality in the first bottleneck of the block. 102 | Returns: a Module consisting of n sequential bottlenecks. 103 | """ 104 | block = nn.Sequential() 105 | for bottleneck in range(self.block_depth): 106 | name_ = '%s_bottleneck_%d' % (name, bottleneck) 107 | if bottleneck == 0: 108 | block.add_module(name_, Bottleneck(in_channels, out_channels, pool_stride, self.cardinality, 109 | self.base_width, self.expansion)) 110 | else: 111 | block.add_module(name_, 112 | Bottleneck(out_channels, out_channels, 1, self.cardinality, self.base_width, 113 | self.expansion)) 114 | return block 115 | 116 | def forward(self, x): 117 | x = self.conv_1_3x3.forward(x) 118 | x = F.relu(self.bn_1.forward(x), inplace=True) 119 | x = self.stage_1.forward(x) 120 | x = self.stage_2.forward(x) 121 | x = self.stage_3.forward(x) 122 | x = F.avg_pool2d(x, 8, 1) 123 | x = x.view(-1, self.stages[3]) 124 | return self.fc(x) 125 | 126 | 127 | def resnext29_8x64d(num_classes): 128 | return ResNeXt(cardinality=8, depth=29, num_classes=num_classes, base_width=64) 129 | 130 | 131 | def resnext29_16x64d(num_classes): 132 | return ResNeXt(cardinality=16, depth=29, num_classes=num_classes, base_width=64) 133 | -------------------------------------------------------------------------------- /models/senet.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8 -*- 2 | 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | __all__ = ['se_resnext29_8x64d', 'se_resnext29_16x64d'] 7 | 8 | class SEModule(nn.Module): 9 | 10 | def __init__(self, channels, reduction=16): 11 | super(SEModule, self).__init__() 12 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 13 | self.fc_1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0) 14 | self.relu = nn.ReLU(inplace=True) 15 | self.fc_2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0) 16 | self.sigmoid = nn.Sigmoid() 17 | 18 | def forward(self, x): 19 | original = x 20 | x = self.avg_pool(x) 21 | x = self.fc_1(x) 22 | x = self.relu(x) 23 | x = self.fc_2(x) 24 | x = self.sigmoid(x) 25 | return original * x 26 | 27 | class Bottleneck(nn.Module): 28 | def __init__(self, in_channels, out_channels, stride, cardinality, base_width, expansion): 29 | super(Bottleneck, self).__init__() 30 | width_ratio = out_channels // (expansion * 64) 31 | D = cardinality * int(base_width * width_ratio) 32 | 33 | self.relu = nn.ReLU(inplace=True) 34 | self.se_module = SEModule(out_channels) 35 | 36 | self.conv_reduce = nn.Conv2d(in_channels, D, kernel_size=1, stride=1, padding=0, bias=False) 37 | self.bn_reduce = nn.BatchNorm2d(D) 38 | self.conv_conv = nn.Conv2d(D, D, kernel_size=3, stride=stride, padding=1, groups=cardinality, bias=False) 39 | self.bn = nn.BatchNorm2d(D) 40 | self.conv_expand = nn.Conv2d(D, out_channels, kernel_size=1, stride=1, padding=0, bias=False) 41 | self.bn_expand = nn.BatchNorm2d(out_channels) 42 | 43 | self.shortcut = nn.Sequential() 44 | if in_channels != out_channels: 45 | self.shortcut.add_module('shortcut_conv', 46 | nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False)) 47 | self.shortcut.add_module('shortcut_bn', nn.BatchNorm2d(out_channels)) 48 | 49 | def forward(self, x): 50 | out = self.conv_reduce.forward(x) 51 | out = self.relu(self.bn_reduce.forward(out)) 52 | out = self.conv_conv.forward(out) 53 | out = self.relu(self.bn.forward(out)) 54 | out = self.conv_expand.forward(out) 55 | out = self.bn_expand.forward(out) 56 | 57 | residual = self.shortcut.forward(x) 58 | 59 | out = self.se_module(out) + residual 60 | out = self.relu(out) 61 | 62 | return out 63 | 64 | class SeResNeXt(nn.Module): 65 | def __init__(self, cardinality, depth, num_classes, base_width, expandsion=4): 66 | super(SeResNeXt, self).__init__() 67 | self.cardinality = cardinality 68 | self.depth = depth 69 | self.block_depth = (self.depth - 2) // 9 70 | self.base_width = base_width 71 | self.expansion = expandsion 72 | self.num_classes = num_classes 73 | self.output_size = 64 74 | self.stages = [64, 64 * self.expansion, 128 * self.expansion, 256 * self.expansion] 75 | 76 | self.conv_1_3x3 = nn.Conv2d(3, 64, 3, 1, 1, bias=False) 77 | self.bn_1 = nn.BatchNorm2d(64) 78 | 79 | self.stage_1 = self.block('stage_1', self.stages[0], self.stages[1], 1) 80 | self.stage_2 = self.block('stage_2', self.stages[1], self.stages[2], 2) 81 | self.stage_3 = self.block('stage_3', self.stages[2], self.stages[3], 2) 82 | self.fc = nn.Linear(self.stages[3], num_classes) 83 | for m in self.modules(): 84 | if isinstance(m, nn.Conv2d): 85 | nn.init.kaiming_normal_(m.weight.data) 86 | elif isinstance(m, nn.BatchNorm2d): 87 | m.weight.data.fill_(1) 88 | m.bias.data.zero_() 89 | 90 | def block(self, name, in_channels, out_channels, pool_stride=2): 91 | block = nn.Sequential() 92 | for bottleneck in range(self.block_depth): 93 | name_ = '%s_bottleneck_%d' % (name, bottleneck) 94 | if bottleneck == 0: 95 | block.add_module(name_, Bottleneck(in_channels, out_channels, pool_stride, self.cardinality, self.base_width, self.expansion)) 96 | else: 97 | block.add_module(name_, Bottleneck(out_channels, out_channels, 1, self.cardinality, self.base_width, self.expansion)) 98 | 99 | return block 100 | 101 | def forward(self, x): 102 | x = self.conv_1_3x3.forward(x) 103 | x = F.relu(self.bn_1.forward(x), inplace=True) 104 | x = self.stage_1.forward(x) 105 | x = self.stage_2.forward(x) 106 | x = self.stage_3.forward(x) 107 | x = F.avg_pool2d(x, 8, 1) 108 | x = x.view(-1, self.stage_3[3]) 109 | return self.fc(x) 110 | 111 | def se_resnext29_8x64d(num_classes): 112 | return SeResNeXt(cardinality=8, depth=29, num_classes=num_classes, base_width=64) 113 | 114 | 115 | def se_resnext29_16x64d(num_classes): 116 | return SeResNeXt(cardinality=16, depth=29, num_classes=num_classes, base_width=64) 117 | -------------------------------------------------------------------------------- /models/vgg.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import math 3 | 4 | import torch.nn as nn 5 | import torch.utils.model_zoo as model_zoo 6 | 7 | 8 | __all__ = ['vgg11', 'vgg13', 'vgg16', 'vgg19'] 9 | 10 | cfg = { 11 | 'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 12 | 'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], 13 | 'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], 14 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], 15 | } 16 | 17 | 18 | class VGG(nn.Module): 19 | 20 | def __init__(self, features, num_classes=10): 21 | super(VGG, self).__init__() 22 | self.features = features 23 | self.classifier = nn.Linear(512, num_classes) 24 | self._initialize_weights() 25 | 26 | def forward(self, x): 27 | x = self.features(x) 28 | x = x.view(x.size(0), -1) 29 | x = self.classifier(x) 30 | return x 31 | 32 | def _initialize_weights(self): 33 | for m in self.modules(): 34 | if isinstance(m, nn.Conv2d): 35 | nn.init.kaiming_normal_(m.weight.data) 36 | if m.bias is not None: 37 | m.bias.data.zero_() 38 | elif isinstance(m, nn.BatchNorm2d): 39 | m.weight.data.fill_(1) 40 | m.bias.data.zero_() 41 | elif isinstance(m, nn.Linear): 42 | n = m.weight.size(1) 43 | m.weight.data.normal_(0, 0.01) 44 | m.bias.data.zero_() 45 | 46 | 47 | def make_layers(cfg, batch_norm=False): 48 | layers = [] 49 | in_channels = 3 50 | for v in cfg: 51 | if v == 'M': 52 | layers += [nn.MaxPool2d(kernel_size=2, stride=2)] 53 | else: 54 | conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1) 55 | if batch_norm: 56 | layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] 57 | else: 58 | layers += [conv2d, nn.ReLU(inplace=True)] 59 | in_channels = v 60 | return nn.Sequential(*layers) 61 | 62 | 63 | def vgg11(num_classes): 64 | return VGG(make_layers(cfg['A'], batch_norm=True), num_classes) 65 | 66 | 67 | def vgg13(num_classes): 68 | return VGG(make_layers(cfg['B'], batch_norm=True), num_classes) 69 | 70 | 71 | def vgg16(num_classes): 72 | return VGG(make_layers(cfg['D'], batch_norm=True), num_classes) 73 | 74 | 75 | def vgg19(num_classes): 76 | return VGG(make_layers(cfg['E'], batch_norm=True), num_classes) 77 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import argparse 3 | import yaml 4 | import time 5 | import torch 6 | import torch.nn as nn 7 | import torchvision 8 | import torchvision.transforms as transforms 9 | import torch.backends.cudnn as cudnn 10 | 11 | from tensorboardX import SummaryWriter 12 | 13 | from easydict import EasyDict 14 | from models import * 15 | from utils import * 16 | 17 | parser = argparse.ArgumentParser(description="Pytorch_Image_classifier_tutorial") 18 | parser.add_argument('--work-path', required=True, type=str) 19 | parser.add_argument('--resume', action='store_true', help='resume from checkpoint') 20 | 21 | args = parser.parse_args() 22 | logger = Logger(log_file_name=args.work_path + '/log.txt', log_level=logging.DEBUG, logger_name='CIFAR').get_log() 23 | 24 | 25 | def train(train_loader, net, criterion, optimizer, epoch, device): 26 | global writer # 创建一个SummaryWriter实例 27 | 28 | start = time.time() 29 | net.train() 30 | 31 | train_loss = 0 32 | correct = 0 33 | total = 0 34 | logger.info("===Epoch:[{}/{}]===".format(epoch + 1, config.epochs)) 35 | for batch_index, (inputs, targets) in enumerate(train_loader): 36 | inputs, targets = inputs.to(device), targets.to(device) 37 | if config.mixup: 38 | inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, config.mixup_alpha, device) 39 | outputs = net(inputs) 40 | loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) 41 | else: 42 | outputs = net(inputs) 43 | loss = criterion(outputs, targets) 44 | 45 | # zero the gradient buffers 46 | optimizer.zero_grad() 47 | # backward() 48 | loss.backward() 49 | # update weight 50 | optimizer.step() 51 | 52 | # count the loss and acc 53 | train_loss += loss.item() 54 | _, predicted = outputs.max( 55 | 1) # 这里_代表我们不关心的部分,而我们关系predicted部分。这部分对应了所属Label的索引https://cloud.tencent.com/developer/article/1433941 56 | total += targets.size(0) 57 | if config.mixup: 58 | correct += (lam * predicted.eq(targets_a).sum().item() + (1 - lam) * predicted.eq(targets_b).sum().item()) 59 | else: 60 | correct += predicted.eq(targets).sum().item() 61 | 62 | if (batch_index + 1) % 100 == 0: 63 | logger.info(" === step:[{:3}/{}],train_loss:{:.3f}|train_acc:{:6.3f}%|lr:{:.6f}".format( 64 | batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100 * correct / total, 65 | get_current_lr(optimizer) 66 | )) 67 | logger.info(" ===step:[{:3}/{}],train_loss:{:.3f}|train_acc:{:6.3f}%|lr:{:.6f}".format( 68 | batch_index + 1, len(train_loader), train_loss / (batch_index + 1), 100.0 * correct / total, 69 | get_current_lr(optimizer) 70 | )) 71 | 72 | end = time.time() 73 | logger.info(" ===cost time:{:.4f}s".format(end - start)) 74 | train_loss = train_loss / (batch_index + 1) 75 | train_acc = correct / total 76 | 77 | writer.add_scalar('train_loss', train_loss, epoch) 78 | writer.add_scalar('train_acc', train_acc, epoch) 79 | 80 | return train_loss, train_acc 81 | 82 | 83 | # val 84 | def test(test_loader, net, criterion, optimizer, epoch, device): 85 | global best_prec, writer 86 | 87 | net.eval() 88 | 89 | test_loss = 0 90 | correct = 0 91 | total = 0 92 | 93 | logger.info("===== Validate =====".format(epoch + 1, config.epochs)) 94 | 95 | with torch.no_grad(): 96 | for batch_index, (inputs, targets) in enumerate(test_loader): 97 | inputs, targets = inputs.to(device), targets.to(device) 98 | outputs = net(inputs) 99 | loss = criterion(outputs, targets) 100 | 101 | test_loss += loss.item() 102 | _, predicted = outputs.max(1) 103 | total += targets.size(0) 104 | correct += predicted.eq(targets).sum().item() 105 | 106 | logger.info(" ===test loss:{:.3f}|test acc{:6.3f}%".format(test_loss / (batch_index + 1), 100.0 * correct / total)) 107 | test_loss = test_loss / (batch_index + 1) 108 | test_acc = correct / total 109 | writer.add_scalar('test_loss', test_loss, epoch) 110 | writer.add_scalar('test_acc', test_acc, epoch) 111 | 112 | # Save checkpoint 113 | acc = 100. * correct / total 114 | state = { 115 | 'state_dict': net.state_dict(), 116 | 'best_prec': best_prec, 117 | 'last_epoch': epoch, 118 | 'optimizer': optimizer.state_dict(), 119 | } 120 | is_best = acc > best_prec 121 | save_checkpoint(state, is_best, args.work_path + '/' + config.ckpt_name) 122 | if is_best: 123 | best_prec = acc 124 | 125 | 126 | def main(): 127 | global args, config, last_epoch, best_prec, writer 128 | writer = SummaryWriter(logdir=args.work_path + '/event') 129 | 130 | # read config from yaml file 131 | with open(args.work_path + '/config.yaml') as f: 132 | config = yaml.load(f) 133 | 134 | # convert to dict 135 | config = EasyDict(config) # easydict的作用:可以使得以属性的方式去访问字典的值 136 | logger.info(config) 137 | 138 | # denfine net 139 | net = get_model(config) 140 | logger.info(net) 141 | logger.info("===total parameters:" + str(count_parameters(net))) 142 | 143 | # GPU or CPU 144 | device = 'cuda' if config.use_gpu else 'cpu' 145 | # data parallel for multiple-GPU 146 | if device == 'cuda': 147 | net = torch.nn.DataParallel(net) 148 | cudnn.benchmark = True 149 | net.to(device) 150 | 151 | # define loss and optimizer 152 | criterion = nn.CrossEntropyLoss() 153 | optimizer = torch.optim.SGD(net.parameters(), config.lr_scheduler.base_lr, 154 | momentum=config.optimize.momentum, 155 | weight_decay=config.optimize.weight_decay, 156 | nesterov=config.optimize.nesterov) 157 | 158 | # resume from a checkpoint 159 | last_epoch = -1 160 | best_prec = 0 161 | if args.work_path: 162 | ckpt_file_name = args.work_path + '/' + config.ckpt_name + '.pth.tar' 163 | if args.resume: 164 | best_prec, last_epoch = load_checkpoint(ckpt_file_name, net, optimizer) 165 | 166 | # load training data,do data augmentation and get data loader 167 | transform_train = transforms.Compose( 168 | data_augmentation(config) 169 | ) 170 | transform_test = transforms.Compose( 171 | data_augmentation(config, is_train=False) 172 | ) 173 | 174 | train_loader, test_loader = get_data_loader(transform_train, transform_test, config) 175 | 176 | # start training 177 | logger.info(" ======= start training ====== ") 178 | for epoch in range(last_epoch + 1, config.epochs): 179 | lr = adjust_learning_rate(optimizer, epoch, config) 180 | writer.add_scalar('learning_rate', lr, epoch) 181 | train(train_loader, net, criterion, optimizer, epoch, device) 182 | if epoch == 0 or (epoch + 1) % config.eval_freq == 0 or epoch == config.epochs - 1: 183 | test(test_loader, net, criterion, optimizer, epoch, device) 184 | logger.info("=====Training Finished. best_test_acc:{:.3f}%====".format(best_prec)) 185 | 186 | 187 | if __name__ == "__main__": 188 | main() 189 | -------------------------------------------------------------------------------- /train_visdom.py: -------------------------------------------------------------------------------- 1 | # -*-coding:utf-8-*- 2 | import argparse 3 | import yaml 4 | import time 5 | import torch 6 | import torch.nn as nn 7 | import torchvision 8 | import torchvision.transforms as transforms 9 | import torch.backends.cudnn as cudnn 10 | 11 | from visdom import Visdom 12 | 13 | from easydict import EasyDict 14 | from models import * 15 | from utils import * 16 | 17 | parser = argparse.ArgumentParser(description="Pytorch_Image_classifier_tutorial") 18 | parser.add_argument('--work-path', required=True, type=str) 19 | parser.add_argument('--resume', action='store_true', help='resume from checkpoint') 20 | 21 | args = parser.parse_args() 22 | logger = Logger(log_file_name=args.work_path+'/log.txt',log_level=logging.DEBUG,logger_name='CIFAR').get_log() 23 | 24 | #visdom = Visdom() 25 | 26 | 27 | 28 | def train(train_loader,net,criterion,optimizer,epoch,device): 29 | global visdom 30 | 31 | start = time.time() 32 | net.train() 33 | 34 | train_loss = 0 35 | correct = 0 36 | total = 0 37 | logger.info("===Epoch:[{}/{}]===".format(epoch+1, config.epochs)) 38 | step = 0 39 | for batch_index, (inputs, targets) in enumerate(train_loader): 40 | inputs, targets = inputs.to(device), targets.to(device) 41 | if config.mixup: 42 | inputs, targets_a, targets_b, lam = mixup_data(inputs,targets,config.mixup_alpha,device) 43 | outputs = net(inputs) 44 | loss = mixup_criterion(criterion, outputs, targets_a, targets_b, lam) 45 | else: 46 | outputs = net(inputs) 47 | loss = criterion(outputs, targets) 48 | step += 1 49 | #zero the gradient buffers 50 | optimizer.zero_grad() 51 | #backward() 52 | loss.backward() 53 | #update weight 54 | optimizer.step() 55 | 56 | #count the loss and acc 57 | train_loss += loss.item() 58 | _, predicted = outputs.max(1) #这里_代表我们不关心的部分,而我们关系predicted部分。这部分对应了所属Label的索引https://cloud.tencent.com/developer/article/1433941 59 | total += targets.size(0) 60 | if config.mixup: 61 | correct += (lam*predicted.eq(targets_a).sum().item()+(1-lam)*predicted.eq(targets_b).sum().item()) 62 | else: 63 | correct += predicted.eq(targets).sum().item() 64 | visdom.line([[train_loss, correct]], [step], win='train_loss', update='append') 65 | if(batch_index + 1) %100 == 0: 66 | logger.info(" === step:[{:3}/{}],train_loss:{:.3f}|train_acc:{:6.3f}%|lr:{:.6f}".format( 67 | batch_index+1, len(train_loader), train_loss/(batch_index+1), 100*correct/total, get_current_lr(optimizer) 68 | )) 69 | logger.info(" ===step:[{:3}/{}],train_loss:{:.3f}|train_acc:{:6.3f}%|lr:{:.6f}".format( 70 | batch_index+1, len(train_loader), train_loss/(batch_index+1), 100.0*correct/total, get_current_lr(optimizer) 71 | )) 72 | 73 | end = time.time() 74 | logger.info(" ===cost time:{:.4f}s".format(end-start)) 75 | train_loss = train_loss/(batch_index+1) 76 | train_acc = correct/total 77 | 78 | 79 | return train_loss, train_acc 80 | 81 | #val 82 | def test(test_loader, net, criterion, optimizer, epoch, device): 83 | global best_prec, visdom 84 | 85 | net.eval() 86 | 87 | test_loss = 0 88 | correct = 0 89 | total = 0 90 | 91 | logger.info("===== Validate =====".format(epoch+1,config.epochs)) 92 | 93 | step = 0 94 | with torch.no_grad(): 95 | for batch_index, (inputs, targets) in enumerate(test_loader): 96 | inputs, targets = inputs.to(device), targets.to(device) 97 | outputs = net(inputs) 98 | loss = criterion(outputs, targets) 99 | 100 | test_loss += loss.item() 101 | _, predicted = outputs.max(1) 102 | total += targets.size(0) 103 | correct += predicted.eq(targets).sum().item() 104 | step += 1 105 | visdom.line([[test_loss, correct]], [step], win='test', update='append') 106 | visdom.images(inputs, win='x') 107 | visdom.text(str(predicted.detach().cpu().numpy()), win='pred', opts=dict(title='pred')) 108 | 109 | logger.info(" ===test loss:{:.3f}|test acc{:6.3f}%".format(test_loss/(batch_index + 1), 100.0*correct/total)) 110 | test_loss = test_loss / (batch_index + 1) 111 | test_acc = correct / total 112 | 113 | 114 | #Save checkpoint 115 | acc = 100.*correct/total 116 | state = { 117 | 'state_dict': net.state_dict(), 118 | 'best_prec': best_prec, 119 | 'last_epoch': epoch, 120 | 'optimizer': optimizer.state_dict(), 121 | } 122 | is_best = acc > best_prec 123 | save_checkpoint(state, is_best, args.work_path + '/' + config.ckpt_name) 124 | if is_best: 125 | best_prec = acc 126 | 127 | 128 | def main(): 129 | global args, config, last_epoch, best_prec, visdom 130 | 131 | visdom = Visdom() 132 | visdom.line([0.], [0.], win='train_acc', opts=dict(title='train acc')) 133 | visdom.line([[0.0, 0.0]], [0.], win='test', opts=dict(title='test loss&acc.', legend=['loss', 'acc'])) 134 | 135 | #read config from yaml file 136 | with open(args.work_path + '/config.yaml') as f: 137 | config = yaml.load(f) 138 | 139 | #convert to dict 140 | config = EasyDict(config) #easydict的作用:可以使得以属性的方式去访问字典的值 141 | logger.info(config) 142 | 143 | #denfine net 144 | net = get_model(config) 145 | logger.info(net) 146 | logger.info("===total parameters:" + str(count_parameters(net))) 147 | 148 | #GPU or CPU 149 | device = 'cuda' if config.use_gpu else 'cpu' 150 | #data parallel for multiple-GPU 151 | if device == 'cuda': 152 | net = torch.nn.DataParallel(net) 153 | cudnn.benchmark = True 154 | net.to(device) 155 | 156 | #define loss and optimizer 157 | criterion = nn.CrossEntropyLoss() 158 | optimizer = torch.optim.SGD(net.parameters(),config.lr_scheduler.base_lr, 159 | momentum=config.optimize.momentum, 160 | weight_decay=config.optimize.weight_decay, 161 | nesterov=config.optimize.nesterov) 162 | 163 | #resume from a checkpoint 164 | last_epoch = -1 165 | best_prec = 0 166 | if args.work_path: 167 | ckpt_file_name = args.work_path + '/' + config.ckpt_name + '.pth.tar' 168 | if args.resume: 169 | best_prec, last_epoch = load_checkpoint(ckpt_file_name, net, optimizer) 170 | 171 | #load training data,do data augmentation and get data loader 172 | transform_train = transforms.Compose( 173 | data_augmentation(config) 174 | ) 175 | transform_test = transforms.Compose( 176 | data_augmentation(config, is_train=False) 177 | ) 178 | 179 | train_loader, test_loader = get_data_loader(transform_train,transform_test,config) 180 | 181 | #start training 182 | logger.info(" ======= start training ====== ") 183 | for epoch in range(last_epoch+1, config.epochs): 184 | lr = adjust_learning_rate(optimizer, epoch, config) 185 | train(train_loader, net, criterion, optimizer, epoch, device) 186 | if epoch == 0 or (epoch + 1) % config.eval_freq == 0 or epoch == config.epochs - 1: 187 | test(test_loader, net, criterion, optimizer, epoch, device) 188 | logger.info("=====Training Finished. best_test_acc:{:.3f}%====".format(best_prec)) 189 | 190 | if __name__ == "__main__": 191 | main() -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8-*- 2 | import os 3 | import math 4 | import shutil #shutil则就是对os中文件操作的补充。--移动 复制 打包 压缩 解压 5 | import logging 6 | import numpy as np 7 | 8 | import torch 9 | import torch.utils.data as Data 10 | import torchvision 11 | import torchvision.transforms as transforms 12 | 13 | #https://www.cnblogs.com/CJOKER/p/8295272.html 14 | class Logger(object): 15 | def __init__(self,log_file_name,log_level,logger_name): 16 | #第一步,创建一个logger 17 | self.__logger = logging.getLogger(logger_name) 18 | self.__logger.setLevel(log_level) 19 | #第二步,创建一个handler 20 | file_handler = logging.FileHandler(log_file_name) 21 | console_handler = logging.StreamHandler() 22 | #第三步,定义handler的输出格式 23 | formatter = logging.Formatter( 24 | '[%(asctime)s]-[%(filename)s line:%(lineno)d]:%(message)s ' 25 | ) 26 | file_handler.setFormatter(formatter) 27 | console_handler.setFormatter(formatter) 28 | #第四步,将Hander添加到logger中 29 | self.__logger.addHandler(file_handler) 30 | self.__logger.addHandler(console_handler) 31 | 32 | def get_log(self): 33 | return self.__logger 34 | 35 | def count_parameters(model): 36 | return sum(p.numel() for p in model.parameters() if p.requires_grad) #numel()函数:返回数组中元素的个数 37 | 38 | def data_augmentation(config,is_train=True): 39 | aug = [] 40 | if is_train: 41 | #random crop 42 | if config.augmentation.random_crop: 43 | aug.append(transforms.RandomCrop(config.input_size,padding=4)) 44 | #horizontal flip 45 | if config.augmentation.random_horizontal_flip: 46 | aug.append(transforms.RandomHorizontalFlip()) 47 | aug.append(transforms.ToTensor()) 48 | #normalize [-mean/std] 49 | if config.augmentation.normalize: 50 | if config.dataset =='cifar10': 51 | aug.append(transforms.Normalize( 52 | (0.4914,0.4822,0.4465),(0.2023,0.1994,0.2010) 53 | )) 54 | else: 55 | aug.append(transforms.Normalize( 56 | (0.5071,0.4867,0.4408),(0.2675,0.2565,0.2761) 57 | )) 58 | 59 | if is_train and config.augmentation.cutout: 60 | #cutout 61 | aug.append(Cutout(n_holes=config.augmentation.holes, 62 | length=config.augmentation.length)) 63 | return aug 64 | 65 | class Cutout(object): 66 | def __init__(self,n_holes,length): 67 | self.n_holes = n_holes 68 | self.length = length 69 | 70 | def __call__(self, img): 71 | h = img.size(1) 72 | w = img.size(2) 73 | 74 | mask = np.ones((h,w),np.float32) 75 | 76 | for n in range(self.n_holes): 77 | y = np.random.randint(h) 78 | x = np.random.randint(w) 79 | 80 | #np.clip()函数将将数组中的元素限制在a_min, a_max之间 81 | y1 = np.clip(y-self.length//2,0,h) #" / " 表示浮点数除法,返回浮点结果," // " 表示整数除法,返回不大于结果的一个最大的整数 82 | y2 = np.clip(y+self.length//2,0,h) 83 | x1 = np.clip(x-self.length//2,0,w) 84 | x2 = np.clip(x+self.length//2,0,w) 85 | 86 | mask[y1:y2, x1:x2] = 0 87 | mask = torch.from_numpy(mask) 88 | mask = mask.expand_as(img) #把一个tensor变成和函数括号内一样形状的tensor,用法与expand()类似 89 | img = img*mask 90 | 91 | return img 92 | 93 | def save_checkpoint(state,is_best,filename): 94 | torch.save(state,filename+'.pth.tar') 95 | if is_best: 96 | shutil.copyfile(filename+'.pth.tar',filename+'_best.pth.tar') 97 | 98 | def load_checkpoint(path,model,optimizer=None): 99 | if os.path.isfile(path): 100 | logging.info("=== loading checkpoint '{}' ===".format(path)) 101 | 102 | checkpoint = torch.load(path) 103 | model.load_state_dict(checkpoint['state_dict'],strict=False) 104 | 105 | if optimizer != None: 106 | best_prec = checkpoint['best_prec'] 107 | last_epoch = checkpoint['last_epoch'] 108 | optimizer.load_state_dict(checkpoint['optimizer']) 109 | logging.info("=== load state dict done.also load optimizer from checkpoint'{}'(epoch{})".format( 110 | path,last_epoch+1 111 | )) 112 | 113 | return best_prec,last_epoch 114 | 115 | def get_data_loader(transform_train, transform_test, config): 116 | assert config.dataset == 'cifar10' or config.dataset == 'cifar100' 117 | if config.dataset == 'cifar10': 118 | trainset = torchvision.datasets.CIFAR10( 119 | root=config.data_path, train=True, transform=transform_train, download=True 120 | ) 121 | testset = torchvision.datasets.CIFAR10( 122 | root=config.data_path, train=False, transform=transform_test, download=True) 123 | else: 124 | trainset = torchvision.datasets.CIFAR100( 125 | root=config.data_path, train=True, transform=transform_train, download=True 126 | ) 127 | testset = torchvision.datasets.CIFAR100( 128 | root=config.data_path, train=False, transform=transform_test, download=True 129 | ) 130 | 131 | train_loader = Data.DataLoader( 132 | trainset, batch_size=config.batch_size, shuffle=True, num_workers=config.workers) 133 | test_loader = Data.DataLoader( 134 | testset, batch_size=config.test_batch, shuffle=True, num_workers=config.workers) 135 | return train_loader, test_loader 136 | 137 | def mixup_data(x,y,alpha,device): 138 | '''Returns mixed inputs,pairs of targets,and lambda''' 139 | if alpha > 0: 140 | lam = np.random.beta(alpha,alpha) 141 | else: 142 | lam = 1 143 | 144 | batch_size = x.size()[0] 145 | index = torch.randperm(batch_size).to(device) #返回一个0到n-1的数组 146 | 147 | mixed_x = lam*x + (1-lam)*x[index,:] 148 | y_a,y_b = y,y[index] 149 | return mixed_x,y_a,y_b,lam 150 | 151 | def mixup_criterion(criterion,pred,y_a,y_b,lam): 152 | return lam*criterion(pred,y_a)+(1-lam)*criterion(pred,y_b) 153 | 154 | def get_current_lr(optimizer): 155 | for param_group in optimizer.param_groups: 156 | return param_group['lr'] 157 | 158 | def adjust_learning_rate(optimizer,epoch,config): 159 | lr = get_current_lr(optimizer) 160 | if config.lr_scheduler.type == 'STEP': 161 | if epoch in config.lr_scheduler.lr_epochs: 162 | lr *= config.lr_scheduler.lr_mults 163 | elif config.lr_scheduler.type == 'COSINE': 164 | ratio = epoch / config.epochs 165 | lr = config.lr_scheduler.min_lr + \ 166 | (config.lr_scheduler.base_lr - config.lr_scheduler.min_lr)*\ 167 | (1.0+math.cos(math.pi*ratio))/2.0 168 | elif config.lr_scheduler.type == 'HTD': 169 | ratio = epoch / config.epochs 170 | lr = config.lr_schedule.min_lr + \ 171 | (config.lr_scheduler.base_lr-config.lr_scheduler.min_lr)*\ 172 | (1.0-math.tanh(config.lr_scheduler.lower_bound+ 173 | (config.lr_scheduler.upper_bound - config.lr_scheduler.lower_bound)*ratio))/2.0 174 | for param_group in optimizer.param_groups: 175 | param_group['lr'] = lr 176 | return lr 177 | --------------------------------------------------------------------------------