├── CNN.py ├── README.md ├── resnet.py └── work.py /CNN.py: -------------------------------------------------------------------------------- 1 | import time 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class Net(nn.Module): 9 | def __init__(self): 10 | super(Net, self).__init__() 11 | self.conv1 = nn.Conv2d(3, 20, 5, 1) 12 | # torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1) 13 | # in_channels:输入图像通道数,手写数字图像为1,彩色图像为3 14 | # out_channels:输出通道数,这个等于卷积核的数量 15 | # kernel_size:卷积核大小 16 | # stride:步长 17 | 18 | self.conv2 = nn.Conv2d(20, 50, 5, 1) 19 | # 上个卷积网络的out_channels,就是下一个网络的in_channels,所以这里是20 20 | # out_channels:卷积核数量50 21 | self.fc1 = nn.Linear(53 * 53 * 50, 500) 22 | # 全连接层torch.nn.Linear(in_features, out_features) 23 | # in_features:输入特征维度,4*4*50是自己算出来的,跟输入图像维度有关 24 | # out_features;输出特征维度 25 | 26 | self.fc2 = nn.Linear(500, 2) 27 | # 输出维度10,10分类 28 | 29 | def forward(self, x): 30 | # print(x.shape) #手写数字的输入维度,(N,1,28,28), N为batch_size 31 | x = F.relu(self.conv1(x)) # x = (N,50,24,24) 32 | x = F.max_pool2d(x, 2, 2) # x = (N,50,12,12) 33 | x = F.relu(self.conv2(x)) # x = (N,50,8,8) 34 | x = F.max_pool2d(x, 2, 2) # x = (N,50,4,4) 35 | x = x.view(-1, 53 * 53 * 50) # x = (N,4*4*50) 36 | x = F.relu(self.fc1(x)) # x = (N,4*4*50)*(4*4*50, 500)=(N,500) 37 | x = self.fc2(x) # x = (N,500)*(500, 10)=(N,10) 38 | return x # 带log的softmax分类,每张图片返回10个概率 39 | 40 | 41 | def train(model, train_loader, optimizer, epoch, log_interval=1): 42 | model.train() 43 | running_corrects = 0 44 | for batch_idx, (data, target) in enumerate(train_loader): 45 | with torch.autograd.set_grad_enabled(True): 46 | # torch.autograd.set_grad_enabled梯度管理器,可设置为打开或关闭 47 | # phase=="train"是True和False,双等号要注意 48 | outputs = model(data) 49 | criterion = nn.CrossEntropyLoss() 50 | loss = criterion(outputs, target) 51 | _, preds = torch.max(outputs, 1) 52 | # 返回每一行最大的数和索引,prds的位置是索引的位置 53 | # 也可以preds = outputs.argmax(dim=1) 54 | optimizer.zero_grad() 55 | loss.backward() 56 | optimizer.step() 57 | running_corrects += torch.sum(preds.view(-1) == target.view(-1)).item() 58 | if batch_idx % log_interval == 0: 59 | print("Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}\tACC: {}".format( 60 | epoch, 61 | batch_idx * len(data), # 100*32 62 | len(train_loader.dataset), # 60000 63 | 100. * batch_idx / len(train_loader), # len(train_loader)=60000/32=1875 64 | loss.item(), 65 | running_corrects 66 | )) 67 | 68 | 69 | def test(model, test_loader, loss): 70 | model.eval() # 进入测试模式 71 | test_loss = 0 72 | correct = 0 73 | since = time.time() 74 | with torch.no_grad(): 75 | for data, target in test_loader: 76 | output = model(data) 77 | loss_function = nn.CrossEntropyLoss() 78 | test_loss += loss_function(output, target).item() # sum up batch loss 79 | # reduction='sum'代表batch的每个元素loss累加求和,默认是mean求平均 80 | 81 | pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability 82 | 83 | # print(target.shape) #torch.Size([32]) 84 | # print(pred.shape) #torch.Size([32, 1]) 85 | correct += pred.eq(target.view_as(pred)).sum().item() 86 | # pred和target的维度不一样 87 | # pred.eq()相等返回1,不相等返回0,返回的tensor维度(32,1)。 88 | 89 | test_loss /= len(test_loader.dataset) 90 | loss.append(test_loss) 91 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( 92 | test_loss, correct, len(test_loader.dataset), 93 | 100. * correct / len(test_loader.dataset))) 94 | end = time.time() 95 | print(end - since) 96 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 使用Pytorch框架通过神经网络实现的图像二分类问题 2 | ## 问题描述 3 | 本次实验使用多种算法实现了二分类问题,使用两个数据集对网络进行训练与学习。 4 | ## 数据集说明 5 | 数据集一: 6 | 纹理图片数据集,数据集来源:公开的纹理数据集。 百度云下载地址:https://pan.baidu.com/s/1NiFmTGCkTSCyNMLhsUln3w 提取码:z6hy 图片下载后解压在程序当前目录下即可 7 | 数据集二: 8 | 猫狗图片数据集,数据集来源:百度数据集。 下载地址:https://pan.baidu.com/s/1B01mUaodlPwdxBU-dLjkEg 提取码:dp74 图片下载后解压在程序当前目录下即可 9 | ## 代码说明 10 | 此代码使用Pytorch框架,建立CNN卷积全连接神经网络以及ResNet网络。使用两种网络对数据进行训练以及测试,比较性能 11 | CNN.py:定义了CNN模型,定义了训练函数以及测试函数 12 | ResNet.py:定义了ResNet模型,定义了训练函数以及测试函数 13 | work.py:主程序,调用函数对数据进行训练以及测试 14 | ## 运行说明 15 | 下载数据集到程序运行目录,解压。 16 | 安装pytorch框架,该程序使用的是cpu版本,安装命令请访问https://pytorch.org/ ,若使用GPU版本请修改代码 17 | 运行work.py 18 | 19 | 20 | -------------------------------------------------------------------------------- /resnet.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | import shutil 4 | import time 5 | from PIL import Image 6 | import torch 7 | import torch.nn as nn 8 | import torchvision 9 | from torchvision import models 10 | from torchvision import transforms 11 | 12 | 13 | def set_parameter_requires_grad(model, feature_extracting): 14 | """ 15 | 该函数用于将模型所有的梯度改为不可变 16 | :param model:要修改的模型 17 | :param feature_extracting:是否要改为不可变 18 | :return: 19 | """ 20 | if feature_extracting: 21 | for param in model.parameters(): 22 | param.requires_grad = False 23 | 24 | 25 | def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True): 26 | if model_name == "resnet": 27 | model_ft = models.resnet18(pretrained=use_pretrained) 28 | 29 | set_parameter_requires_grad(model_ft, feature_extract) 30 | num_ftrs = model_ft.fc.in_features 31 | 32 | model_ft.fc = nn.Linear(num_ftrs, num_classes) 33 | input_size = 224 34 | 35 | return model_ft, input_size 36 | 37 | 38 | def train_model(model, dataloaders, criterion, optimizer, num_epochs=5, log_interval=20): 39 | since = time.time() 40 | val_acc_history = [] 41 | best_acc = 0. 42 | for epoch in range(num_epochs): 43 | print("Epoch {}/{}".format(epoch, num_epochs - 1)) 44 | print("-" * 10) 45 | 46 | running_loss = 0. 47 | running_corrects = 0. 48 | model.train() 49 | phase = 'train' 50 | for batch_id, (inputs, labels) in enumerate(dataloaders[phase]): 51 | with torch.autograd.set_grad_enabled(True): 52 | outputs = model(inputs) 53 | loss = criterion(outputs, labels) 54 | _, preds = torch.max(outputs, 1) 55 | optimizer.zero_grad() 56 | loss.backward() 57 | optimizer.step() 58 | if batch_id % log_interval == 0: 59 | print("Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}\tAcc: {}/{}".format( 60 | epoch, 61 | batch_id * 32, 62 | len(dataloaders['train'].dataset), 63 | 100. * batch_id / len(dataloaders['train']), 64 | loss.item(), 65 | int(running_corrects), 66 | batch_id * 32 67 | )) 68 | epoch_loss, epoch_acc = test_model(model, dataloaders, criterion, epoch) 69 | if epoch_acc > best_acc: 70 | best_acc = epoch_acc 71 | best_model_wts = copy.deepcopy(model.state_dict()) 72 | val_acc_history.append(epoch_acc) 73 | running_loss += loss.item() * inputs.size(0) 74 | running_corrects += torch.sum(preds.view(-1) == labels.view(-1)).item() 75 | 76 | epoch_loss = running_loss / len(dataloaders[phase].dataset) 77 | epoch_acc = running_corrects / len(dataloaders[phase].dataset) 78 | print("{} Loss: {} Acc: {}".format(phase, epoch_loss, epoch_acc)) 79 | print() 80 | time_elapsed = time.time() - since 81 | print("Training compete in {}m {}s".format(time_elapsed // 60, time_elapsed % 60)) 82 | print("Best val Acc: {}".format(best_acc)) 83 | 84 | model.load_state_dict(best_model_wts) 85 | return model, val_acc_history 86 | 87 | 88 | def test_model(model, dataloaders, criterion, epoch): 89 | running_loss = 0. 90 | running_corrects = 0. 91 | model.eval() 92 | since = time.time() 93 | for inputs, labels in dataloaders['val']: 94 | with torch.autograd.set_grad_enabled(False): 95 | outputs = model(inputs) 96 | loss = criterion(outputs, labels) 97 | _, preds = torch.max(outputs, 1) 98 | running_loss += loss.item() * inputs.size(0) 99 | running_corrects += torch.sum(preds.view(-1) == labels.view(-1)).item() 100 | 101 | epoch_loss = running_loss / len(dataloaders['val'].dataset) 102 | epoch_acc = running_corrects / len(dataloaders['val'].dataset) 103 | time_elapsed = time.time() - since 104 | print("Training compete in {}m {}s".format(time_elapsed // 60, time_elapsed % 60)) 105 | print("{} Loss: {} Acc: {}".format('val', epoch_loss, epoch_acc)) 106 | # model.load_state_dict(best_model_wts) 107 | return epoch_loss, epoch_acc 108 | 109 | def work_model(model, data_dir, input_size): 110 | """ 111 | 使用已经训练好的模型对图片进行分类 112 | :param model: 已经训练好的模型 113 | :param data_dir: 项目路径 114 | :param input_size: 图片输入的大小 115 | :return: 116 | """ 117 | result_path = data_dir + '/result/' 118 | if os.path.exists(result_path): 119 | shutil.rmtree(result_path) 120 | os.mkdir(result_path) 121 | os.mkdir(result_path + '/Dog') 122 | os.mkdir(result_path + '/Cat') 123 | test_path = data_dir + '/test/' 124 | for name in os.listdir(test_path): 125 | image_name = test_path + name 126 | image = loader(image_name, input_size) 127 | outputs = model(image) 128 | _, preds = torch.max(outputs, 1) 129 | if preds == 1: 130 | shutil.copyfile(image_name, result_path + 'Dog/' + name) 131 | else: 132 | shutil.copyfile(image_name, result_path + 'Cat/' + name) 133 | 134 | 135 | def loader(image_name, input_size): 136 | """ 137 | function:将图片转化为可测试的tensor类型 138 | :param imagepath: 要测试的图片路径 139 | :return: 图片的tensor值 140 | """ 141 | loader = torchvision.transforms.Compose([ 142 | transforms.Resize(input_size), 143 | transforms.CenterCrop(input_size), 144 | transforms.ToTensor(), 145 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 146 | ]) 147 | # loader使用torchvision中自带的transforms函数 148 | 149 | image = Image.open(image_name).convert('RGB') 150 | image = loader(image).unsqueeze(0) 151 | return image.to(torch.float) 152 | 153 | if __name__ == '__main__': 154 | print(loader("./Dog_Cat/test/")) -------------------------------------------------------------------------------- /work.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import matplotlib.pyplot as plt 5 | import torch 6 | import torch.optim as optim 7 | import torchvision 8 | from torch import nn 9 | from torchvision import datasets, transforms 10 | 11 | import CNN 12 | import resnet 13 | 14 | # 打印下框架版本 15 | print("TorchVision Version: ", torchvision.__version__) 16 | 17 | # cpu随机种子 18 | torch.manual_seed(53113) 19 | 20 | 21 | def show(xlabel, ylabel, y): 22 | """ 23 | 绘图函数,将训练的 24 | :param xlable: x标签名 25 | :param ylable: y标签名 26 | :param y: 要绘制的y值 27 | :return: 28 | """ 29 | 30 | xList = [] 31 | yList = [] 32 | for index, yy in enumerate(y): 33 | xList.append(index) 34 | yList.append(yy) 35 | plt.plot(xList, yList) 36 | 37 | plt.xlabel(xlabel) 38 | plt.ylabel(ylabel) 39 | plt.title('performance') 40 | plt.legend() 41 | plt.show() 42 | 43 | 44 | """ 45 | 首先对参数进行初始化 46 | """ 47 | data_dir = "./Dog_Cat" # 设置要处理图片的目录 48 | batch_size = 32 # 每次梯度降的的数量 49 | input_size = 224 # 输入大小(reset默认是224) 50 | device = torch.device("cpu") # 没啥用,反正我都没下cuda 51 | lr = 0.001 # 学习率:影响较大 52 | momentum = 0.9 53 | model = CNN.Net() # 模型初始化 54 | function = "CNN" # 训练方法 55 | optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) # 定义优化器 56 | 57 | if function == 'resnet': 58 | model_name = "resnet" 59 | num_classes = 2 60 | feature_extract = True 61 | model, input_size = resnet.initialize_model('resnet', 2, feature_extract, use_pretrained=True) 62 | print("Params to learn:") 63 | if feature_extract: 64 | params_to_update = [] 65 | for name, param in model.named_parameters(): 66 | if param.requires_grad: 67 | params_to_update.append(param) 68 | print("\t", name) 69 | else: 70 | for name, param in model.named_parameters(): 71 | if param.requires_grad: 72 | print("\t", name) 73 | 74 | optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9) # 定义优化器 75 | criterion = nn.CrossEntropyLoss() # 定义损失函数 76 | print(model) 77 | data_transforms = { 78 | "train": transforms.Compose([ 79 | transforms.RandomResizedCrop(input_size), 80 | transforms.RandomHorizontalFlip(), 81 | transforms.ToTensor(), 82 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 83 | ]), 84 | "val": transforms.Compose([ 85 | transforms.Resize(input_size), 86 | transforms.CenterCrop(input_size), 87 | transforms.ToTensor(), 88 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 89 | ]), 90 | } 91 | 92 | print("Initializing Datasets and Dataloaders...") 93 | 94 | image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']} 95 | dataloaders_dict = { 96 | x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=0) for x in 97 | ['train', 'val']} 98 | 99 | # 训练模型 100 | model, acc = resnet.train_model(model, dataloaders_dict, criterion, optimizer_ft, 2) 101 | 102 | show('5epoch', 'Accuracy', acc) 103 | # 测试部分,需要将待测试的图片放在'Dog_Cat/test'文件夹内 104 | resnet.work_model(model, data_dir, input_size) 105 | 106 | if function == "CNN": 107 | trainset = datasets.ImageFolder(os.path.join(data_dir, "train"), 108 | transforms.Compose([ 109 | transforms.RandomResizedCrop(input_size), 110 | transforms.RandomHorizontalFlip(), 111 | transforms.ToTensor(), 112 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 113 | 114 | ])) 115 | 116 | testset = datasets.ImageFolder(os.path.join(data_dir, "val"), 117 | transforms.Compose([ 118 | transforms.Resize(input_size), 119 | transforms.CenterCrop(input_size), 120 | transforms.ToTensor(), 121 | transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) 122 | ])) 123 | 124 | train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=0) 125 | test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True, num_workers=0) 126 | epochs = 10 127 | loss = [] 128 | t1 = time.time() 129 | for epoch in range(1, epochs + 1): 130 | CNN.train(model, train_loader, optimizer, epoch) 131 | t3 = time.time() 132 | print(t3 - t1) 133 | if epoch % 2 == 0: 134 | t3 = time.time() 135 | print(t3 - t1) 136 | CNN.test(model, test_loader, loss) 137 | t2 = time.time() 138 | print(t2 - t1) 139 | show('epoch', 'loss', loss) 140 | --------------------------------------------------------------------------------