├── README.md ├── data └── readme.md ├── get_ap.py ├── logs └── README.md ├── nets ├── __pycache__ │ └── resnet50.cpython-36.pyc └── resnet50.py ├── prediction.py ├── test_net.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # ResNet50:残差网络在Pytorch中的实现(徐宝文) 2 | 3 | # 性能情况 4 | | 训练数据集 | 权值文件名称 | 测试数据集 | 精度 | 5 | | :---: | :---: | :---: | :---: | 6 | | MNIST-train | resnet50_mnist.pth | MNIST-test | 99.64% | 7 | 8 | # 所需环境 9 | torch==1.7.1 10 | # 文件下载 11 | ## a.模型文件下载 12 | 训练所需的resnet50_mnist.pth可以在百度云或google drive下载。 13 | **百度云链接:** 14 | 链接:https://pan.baidu.com/s/1apl5kspxGvjg4y6hLjSktQ?pwd=4mlv 15 | 提取码:4mlv 16 | 17 | **google drive 链接:** 18 | [https://drive.google.com/file/d/1rFNsKgbUWKfp533Znsu0Jwz3XhQSxksM/view?usp=sharing](https://drive.google.com/file/d/1rFNsKgbUWKfp533Znsu0Jwz3XhQSxksM/view?usp=sharing) 19 | ## b.MNIST数据集下载 20 | **百度云链接:** 21 | 链接: [https://pan.baidu.com/s/1MYMs_axknMm2g5Ou-cWmgQ](https://pan.baidu.com/s/1MYMs_axknMm2g5Ou-cWmgQ) 22 | 提取码: 8ce2 23 | # 预测步骤 24 | 25 | 1. 下载好预训练的模型或按照训练步骤训练好模型; 26 | 1. 在prediction.py文件里面,在如下部分修改PAHT使其对应训练好的模型路径; 27 | ```python 28 | PATH = './logs/resnet50-mnist.pth' 29 | ``` 30 | 31 | 3. 运行prediction.py,输入每次预测的图片个数。 32 | # 训练步骤 33 | 34 | 1. 本文使用MNIST数据集进行训练,调用pytorch接口可以直接进行下载(代码已写好); 35 | 1. 如果使用pytorch接口下载速度慢,可使用百度云进行下载。将下载后的文件放入data文件夹中即可; 36 | 1. 运行train.py即可开始训练。 37 | # Reference 38 | [https://github.com/bubbliiiing/faster-rcnn-pytorch.git](https://github.com/bubbliiiing/faster-rcnn-pytorch.git) 39 | -------------------------------------------------------------------------------- /data/readme.md: -------------------------------------------------------------------------------- 1 | 这里存放数据文件 2 | 3 | -------------------------------------------------------------------------------- /get_ap.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from nets.resnet50 import ResNet,Bottleneck 3 | import os 4 | from torchvision import datasets, transforms 5 | from torch.utils.data import DataLoader 6 | from torch.autograd import Variable 7 | 8 | Batch_size = 128 9 | 10 | root = '.\logs' 11 | file_dir = os.listdir(root) 12 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=10) 13 | 14 | for file in file_dir: 15 | PATH = os.path.join(root, file) 16 | 17 | 18 | 19 | model.load_state_dict(torch.load(PATH)) 20 | model = model.cuda() 21 | model.eval() 22 | 23 | test_dataset = datasets.MNIST(root='data/', train=False, 24 | transform=transforms.ToTensor(), download=False) 25 | 26 | 27 | gen_test = DataLoader(dataset=test_dataset, batch_size=Batch_size, shuffle=True) 28 | 29 | test_correct = 0 30 | for data in gen_test: 31 | inputs, lables = data 32 | inputs, lables = Variable(inputs).cuda(), Variable(lables).cuda() 33 | outputs = model(inputs) 34 | _, id = torch.max(outputs.data, 1) 35 | test_correct += torch.sum(id == lables.data) 36 | print(file) 37 | print("correct:%.3f%%" % (100 * test_correct / len(test_dataset))) 38 | -------------------------------------------------------------------------------- /logs/README.md: -------------------------------------------------------------------------------- 1 | # 这里存放权重文件 -------------------------------------------------------------------------------- /nets/__pycache__/resnet50.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangyunjeff/ResNet50-MNIST-pytorch/fb2fa21f0e22fac710b8596c068ee98d00275a07/nets/__pycache__/resnet50.cpython-36.pyc -------------------------------------------------------------------------------- /nets/resnet50.py: -------------------------------------------------------------------------------- 1 | import math 2 | import pdb 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | import torch.utils.model_zoo as model_zoo 8 | from torch.autograd import Variable 9 | 10 | 11 | class Bottleneck(nn.Module): 12 | ''' 13 | 包含三种卷积层 14 | conv1-压缩通道数 15 | conv2-提取特征 16 | conv3-扩展通道数 17 | 这种结构可以更好的提取特征,加深网络,并且可以减少网络的参数量。 18 | ''' 19 | 20 | expansion = 4 21 | 22 | def __init__(self, inplanes, planes, stride=1, downsample=None): 23 | super(Bottleneck, self).__init__() 24 | self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) 25 | self.bn1 = nn.BatchNorm2d(planes) 26 | 27 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False) 28 | self.bn2 = nn.BatchNorm2d(planes) 29 | 30 | self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) 31 | self.bn3 = nn.BatchNorm2d(planes * 4) 32 | 33 | self.relu = nn.ReLU(inplace=True) 34 | self.downsample = downsample 35 | self.stride = stride 36 | 37 | def forward(self, x): 38 | ''' 39 | 这块实现了残差块结构 40 | 41 | ResNet50有两个基本的块,分别名为Conv Block和Identity Block,renet50就是利用了这两个结构堆叠起来的。 42 | 它们最大的差距是残差边上是否有卷积。 43 | 44 | Identity Block是正常的残差结构,其残差边没有卷积,输入直接与输出相加; 45 | Conv Block的残差边加入了卷积操作和BN操作(批量归一化),其作用是可以通过改变卷积操作的步长通道数,达到改变网络维度的效果。 46 | 47 | 也就是说 48 | Identity Block输入维度和输出维度相同,可以串联,用于加深网络的; 49 | Conv Block输入和输出的维度是不一样的,所以不能连续串联,它的作用是改变网络的维度。 50 | :param 51 | x:输入数据 52 | :return: 53 | out:网络输出结果 54 | ''' 55 | residual = x 56 | 57 | out = self.conv1(x) 58 | out = self.bn1(out) 59 | out = self.relu(out) 60 | 61 | out = self.conv2(out) 62 | out = self.bn2(out) 63 | out = self.relu(out) 64 | 65 | out = self.conv3(out) 66 | out = self.bn3(out) 67 | 68 | if self.downsample is not None: 69 | residual = self.downsample(x) 70 | 71 | out += residual 72 | out = self.relu(out) 73 | 74 | return out 75 | 76 | 77 | class ResNet(nn.Module): 78 | def __init__(self, block, layers, num_classes=10): 79 | # -----------------------------------# 80 | # 假设输入进来的图片是600,600,3 81 | # -----------------------------------# 82 | self.inplanes = 64 83 | super(ResNet, self).__init__() 84 | 85 | # 600,600,3 -> 300,300,64 86 | self.conv1 = nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=2, bias=False) 87 | self.bn1 = nn.BatchNorm2d(64) 88 | self.relu = nn.ReLU(inplace=True) 89 | 90 | # 300,300,64 -> 150,150,64 91 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) 92 | 93 | # 150,150,64 -> 150,150,256 94 | self.layer1 = self._make_layer(block, 64, layers[0]) 95 | # 150,150,256 -> 75,75,512 96 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2) 97 | # 75,75,512 -> 38,38,1024 到这里可以获得一个38,38,1024的共享特征层 98 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2) 99 | # self.layer4被用在classifier模型中 100 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2) 101 | 102 | self.avgpool = nn.AvgPool2d(2) 103 | self.fc = nn.Linear(512 * block.expansion, num_classes) 104 | 105 | for m in self.modules(): 106 | if isinstance(m, nn.Conv2d): 107 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 108 | m.weight.data.normal_(0, math.sqrt(2. / n)) 109 | elif isinstance(m, nn.BatchNorm2d): 110 | m.weight.data.fill_(1) 111 | m.bias.data.zero_() 112 | 113 | def _make_layer(self, block, planes, blocks, stride=1): 114 | ''' 115 | 用于构造Conv Block 和 Identity Block的堆叠 116 | :param block:就是上面的Bottleneck,用于实现resnet50中最基本的残差块结构 117 | :param planes:输出通道数 118 | :param blocks:残差块重复次数 119 | :param stride:步长 120 | :return: 121 | 构造好的Conv Block 和 Identity Block的堆叠网络结构 122 | ''' 123 | downsample = None 124 | # -------------------------------------------------------------------# 125 | # 当模型需要进行高和宽的压缩的时候,就需要用到残差边的downsample 126 | # -------------------------------------------------------------------# 127 | 128 | # 边(do构建Conv Block的残差wnsample) 129 | if stride != 1 or self.inplanes != planes * block.expansion:# block.expansion=4 130 | downsample = nn.Sequential( 131 | nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 132 | nn.BatchNorm2d(planes * block.expansion), 133 | ) 134 | layers = [] # 用于堆叠Conv Block 和 Identity Block 135 | # 添加一层Conv Block 136 | layers.append(block(self.inplanes, planes, stride, downsample)) 137 | # 添加完后输入维度变了,因此改变inplanes(输入维度) 138 | self.inplanes = planes * block.expansion 139 | # 添加blocks层 Identity Block 140 | for i in range(1, blocks): 141 | layers.append(block(self.inplanes, planes)) 142 | return nn.Sequential(*layers) 143 | 144 | def forward(self, x): 145 | x = self.conv1(x) 146 | x = self.bn1(x) 147 | x = self.relu(x) 148 | x = self.maxpool(x) 149 | 150 | x = self.layer1(x) 151 | x = self.layer2(x) 152 | x = self.layer3(x) 153 | x = self.layer4(x) 154 | 155 | x = self.avgpool(x) 156 | x = x.view(x.size(0), -1) 157 | x = self.fc(x) 158 | return x 159 | 160 | 161 | def resnet50(): 162 | model = ResNet(Bottleneck, [3, 4, 6, 3]) 163 | # ----------------------------------------------------------------------------# 164 | # 获取特征提取部分,从conv1到model.layer3,最终获得一个38,38,1024的特征层 165 | # ----------------------------------------------------------------------------# 166 | features = list([model.conv1, model.bn1, model.relu, model.maxpool, model.layer1, model.layer2, model.layer3]) 167 | # ----------------------------------------------------------------------------# 168 | # 获取分类部分,从model.layer4到model.avgpool 169 | # ----------------------------------------------------------------------------# 170 | classifier = list([model.layer4, model.avgpool]) 171 | 172 | features = nn.Sequential(*features) 173 | classifier = nn.Sequential(*classifier) 174 | return features, classifier 175 | -------------------------------------------------------------------------------- /prediction.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from nets.resnet50 import ResNet,Bottleneck 3 | import os 4 | from torchvision import datasets, transforms 5 | from torch.utils.data import DataLoader 6 | from torch.autograd import Variable 7 | import torchvision 8 | import cv2 9 | import time 10 | 11 | 12 | PATH = './logs/resnet50-mnist.pth' 13 | 14 | Batch_Size = int(input('每次预测手写字体图片个数:')) 15 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=10) 16 | model.load_state_dict(torch.load(PATH)) 17 | model = model.cuda() 18 | model.eval() 19 | test_dataset = datasets.MNIST(root='data/', train=False, 20 | transform=transforms.ToTensor(), download=False) 21 | gen_test = DataLoader(dataset=test_dataset, batch_size=Batch_Size, shuffle=True) 22 | 23 | while True: 24 | 25 | images, lables = next(iter(gen_test)) 26 | img = torchvision.utils.make_grid(images, nrow=Batch_Size) 27 | img_array = img.numpy().transpose(1, 2, 0) 28 | 29 | start_time = time.time() 30 | outputs = model(images.cuda()) 31 | _, id = torch.max(outputs.data, 1) 32 | end_time = time.time() 33 | 34 | print('预测用时:', end_time-start_time) 35 | print('预测结果为', id.data.cpu().numpy()) 36 | 37 | cv2.imshow('img', img_array) 38 | cv2.waitKey(0) 39 | 40 | 41 | -------------------------------------------------------------------------------- /test_net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchsummary import summary 3 | 4 | # from nets.CSPdarknet import darknet53 5 | # from nets.yolo4 import YoloBody 6 | from nets.resnet50 import ResNet,Bottleneck 7 | from torch import sum 8 | if __name__ == "__main__": 9 | # 需要使用device来指定网络在GPU还是CPU运行 10 | 11 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=10) 12 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 13 | model = model.to(device) 14 | summary(model, input_size=(1, 28, 28)) 15 | 16 | pass -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | from torch.autograd import Variable 4 | from torchvision import datasets, transforms 5 | from torch.utils.data import DataLoader 6 | import cv2 7 | from nets.resnet50 import Bottleneck, ResNet 8 | from tqdm import tqdm 9 | 10 | 11 | def get_lr(optimizer): 12 | for param_group in optimizer.param_groups: 13 | return param_group['lr'] 14 | 15 | 16 | def fit_one_epoch(net, softmaxloss, epoch, epoch_size, epoch_size_val, gen, gen_test, Epoch, cuda): 17 | total_loss = 0 18 | val_loss = 0 19 | 20 | with tqdm(total=epoch_size, desc='Epoch{}/{}'.format(epoch + 1, Epoch), postfix=dict, mininterval=0.3) as pbar: 21 | for iteration, batch in enumerate(gen): 22 | images, targets = batch[0], batch[1] 23 | if cuda: 24 | images = images.cuda() 25 | targets = targets.cuda() 26 | 27 | # ----------------------# 28 | # 清零梯度 29 | # ----------------------# 30 | optimizer.zero_grad() 31 | # ----------------------# 32 | # 前向传播 33 | # ----------------------# 34 | outputs = net(images) 35 | # ----------------------# 36 | # 计算损失 37 | # ----------------------# 38 | loss = softmaxloss(outputs, targets) 39 | # ----------------------# 40 | # 反向传播 41 | # ----------------------# 42 | loss.backward() 43 | optimizer.step() 44 | total_loss += loss 45 | 46 | pbar.set_postfix(**{'total_loss': float(total_loss / (iteration + 1)), 47 | 'lr': get_lr(optimizer)}) 48 | pbar.update(1) 49 | 50 | net.eval() 51 | print('\nStart test') 52 | test_correct = 0 53 | with tqdm(total=epoch_size_val, desc='Epoch{}/{}'.format(epoch + 1, Epoch), postfix=dict, mininterval=0.3) as pbar: 54 | for iteration, batch in enumerate(gen_test): 55 | images, targets = batch[0], batch[1] 56 | if cuda: 57 | images = images.cuda() 58 | targets = targets.cuda() 59 | outputs = net(images) 60 | _, id = torch.max(outputs.data, 1) 61 | test_correct += torch.sum(id == targets.data) 62 | pbar.set_postfix(**{'test AP': float(100 * test_correct / len(test_dataset))}) 63 | pbar.update(1) 64 | torch.save(net.state_dict(), 'logs/Epoch{}-Total_Loss{}.pth'.format((epoch + 1), (total_loss / ((iteration + 1))))) 65 | 66 | 67 | if __name__ == '__main__': 68 | # ----------------------------# 69 | # 是否使用Cuda 70 | # 没有GPU可以设置成Fasle 71 | # ----------------------------# 72 | cuda = True 73 | # ----------------------------# 74 | # 是否使用预训练模型 75 | # ----------------------------# 76 | pre_train = True 77 | # ----------------------------# 78 | # 是否使用余弦退火学习率 79 | # ----------------------------# 80 | CosineLR = True 81 | 82 | # ----------------------------# 83 | # 超参数设置 84 | # lr:学习率 85 | # Batch_size:batchsize大小 86 | # ----------------------------# 87 | lr = 1e-3 88 | Batch_size = 512 89 | Init_Epoch = 0 90 | Fin_Epoch = 100 91 | 92 | # 创建模型 93 | model = ResNet(Bottleneck, [3, 4, 6, 3], num_classes=10) 94 | if pre_train: 95 | model_path = 'logs/resnet50-mnist.pth' 96 | model.load_state_dict(torch.load(model_path)) 97 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 98 | model = model.to(device) 99 | 100 | train_dataset = datasets.MNIST(root='data/', train=True, 101 | transform=transforms.ToTensor(), download=True) 102 | test_dataset = datasets.MNIST(root='data/', train=False, 103 | transform=transforms.ToTensor(), download=False) 104 | 105 | gen = DataLoader(dataset=train_dataset, batch_size=Batch_size, shuffle=True, num_workers=0) 106 | gen_test = DataLoader(dataset=test_dataset, batch_size=Batch_size // 2, shuffle=True, num_workers=0) 107 | 108 | epoch_size = len(gen) 109 | epoch_size_val = len(gen_test) 110 | 111 | softmax_loss = torch.nn.CrossEntropyLoss() 112 | optimizer = torch.optim.Adam(model.parameters(), lr) 113 | 114 | if CosineLR: 115 | lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=5, eta_min=1e-10) 116 | else: 117 | lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92) 118 | 119 | for epoch in range(Init_Epoch, Fin_Epoch): 120 | fit_one_epoch(net=model, softmaxloss=softmax_loss, epoch=epoch, epoch_size=epoch_size, 121 | epoch_size_val=epoch_size_val, gen=gen, gen_test=gen_test, Epoch=Fin_Epoch, cuda=cuda) 122 | lr_scheduler.step() 123 | --------------------------------------------------------------------------------