├── BP_NetWork ├── function1_BPnet.py └── function2_BPnet.py ├── LeNet-5 ├── LeNet.py └── net.pth ├── README.md ├── ResNet-18 ├── ResNet.py ├── net.pth ├── test.py └── trian.py ├── SelectiveSearch ├── SelectiveSearch.py ├── main.py └── selectiveSearchDraft.pdf └── read.txt /BP_NetWork/function1_BPnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib as mpl 3 | import matplotlib.pyplot as plt 4 | # 神经网络结构 5 | class BPNet: 6 | # 初始化,构造函数 7 | def __init__(self, layers, learning_rate=0.1,act = "sigmoid"): 8 | self.W = [] 9 | self.layers = layers 10 | self.learning_rate = learning_rate # 学习率 11 | self.act = act #激活函数 12 | # 初始化Weight 13 | # 这里-2 表示除了最后由隐藏层到输出层的权值 14 | for i in np.arange(0, len(layers) - 2): 15 | # 输入层到隐藏层及隐藏层内部的Weight 16 | # 这里+1是表示这些层中都添加了偏置项bias所以输入的维度会+1改变 17 | weight = np.random.rand(layers[i] + 1, layers[i + 1] + 1) 18 | self.W.append(weight) 19 | #添加最后一层的权重,由隐藏层到输出层 20 | self.W.append(np.random.rand(layers[-2] + 1, layers[-1])) 21 | #激活函数 22 | def act_func(self,x): 23 | if (self.act == "sigmoid"): 24 | return 1.0 / (1 + np.exp(-x)) 25 | elif(self.act == "tanh"): 26 | return np.tanh(x) 27 | #激活函数的导数 28 | def act_derivative(self,x): 29 | if (self.act == "sigmoid"): 30 | return x * (1 - x) 31 | elif(self.act == "tanh"): 32 | return 1 - np.tanh(x) * np.tanh(x) # tanh函数的导数 33 | # 均方误差函数 34 | def MSE_loss(self, inputs, targetValue): 35 | targetValue = np.atleast_2d(targetValue) 36 | predictions = self.test(inputs) 37 | loss = 0.5 * np.sum((predictions - targetValue) ** 2) 38 | return loss 39 | # 链式求导,反向传播 40 | def back_propagation(self, x, y): 41 | # np.atleast_2d()函数用于将输入视为至少具有两个维度的数组 42 | forward_list = [np.atleast_2d(x)] 43 | # 计算这个w矩阵下整个神经网络的输出 44 | for layer in np.arange(0, len(self.W)): #2 45 | net = forward_list[layer].dot(self.W[layer]) 46 | out = self.act_func(net) 47 | forward_list.append(out) 48 | error = forward_list[-1] - y 49 | D = [error * self.act_derivative(forward_list[-1])] 50 | for layer in np.arange(len(forward_list)- 2, 0, -1): 51 | delta = D[-1].dot(self.W[layer].T) 52 | delta = delta * self.act_derivative(forward_list[layer]) 53 | D.append(delta) 54 | D = D[::-1] 55 | # 更新权值W 56 | for layer in np.arange(0, len(self.W)): 57 | self.W[layer] += -self.learning_rate * forward_list[layer].T.dot(D[layer]) 58 | # 训练方法 epochs迭代次数 59 | def train(self, inputs, y, epochs=100000): 60 | #表示对输入数据添加偏置项,np.c_按行连接数组 61 | inputs = np.c_[inputs, np.ones((inputs.shape[0]))] 62 | losses = [] 63 | time = 0 64 | # 根据每一层网络进行反向传播,然后更新W 65 | for epoch in np.arange(0, epochs): 66 | # while (True): 67 | for (x, target) in zip(inputs, y): 68 | self.back_propagation(x, target) # 更新weights 69 | # 显示训练结果,计算误差 70 | if epoch == 0 or (epoch + 1) % 1000 == 0: 71 | loss = self.MSE_loss(inputs, y) 72 | losses.append(loss) 73 | print("epoch={}, loss={:.3f}".format(epoch + 1, loss)) 74 | if (loss<=0.005): 75 | time = epoch 76 | break 77 | return losses,time 78 | # 测试 79 | def test(self, inputs): 80 | predict = np.atleast_2d(inputs) 81 | if(len(predict[-1]) 0.5 else 0 95 | print("data={}, 实际值={}, 预测值大小={:.2f}, 预测判别为={}" 96 | .format(x, target[0], outputs, label)) 97 | print("训练权重\n", net.W) 98 | plt.figure() 99 | plt.plot(np.arange(0, len(losses)), losses) 100 | plt.xlabel("epoch") 101 | plt.ylabel("loss") 102 | plt.show() 103 | 104 | 105 | -------------------------------------------------------------------------------- /BP_NetWork/function2_BPnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import matplotlib as mpl 5 | import matplotlib.pyplot as plt 6 | import math 7 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 8 | 9 | neurons = 30 10 | # 搭建网络 11 | BPNet = nn.Sequential( 12 | nn.Linear(1, neurons), 13 | nn.Tanh(), 14 | nn.Linear(neurons, 1), 15 | ).to(device) 16 | # 设置优化器 17 | optimzer = torch.optim.SGD(BPNet.parameters(),lr=0.1) 18 | loss_func = nn.MSELoss() 19 | #数据 20 | X = np.linspace(-math.pi/2,math.pi/2,50) 21 | X = np.reshape(X,(50,1)) 22 | Y = 1/np.sin(X) + 1/np.cos(X) 23 | for i in range(len(Y)): 24 | if(abs(Y[i])>100): 25 | Y[i] = 10 26 | x = torch.tensor(X).float().to(device) 27 | y = torch.tensor(Y).float().to(device) 28 | losses = [] 29 | optimzer.zero_grad() # 清除梯度 30 | for epoch in range(20001): 31 | out = BPNet(x) 32 | loss = loss_func(out, y) # 计算误差 33 | optimzer.zero_grad() # 清除梯度 34 | loss.backward() 35 | optimzer.step() 36 | if(epoch%1000 == 0): 37 | print(epoch/1000,loss) 38 | losses.append(loss.to("cpu").detach().numpy()) 39 | # 测试结果 40 | yTest = BPNet(x).to("cpu").detach().numpy() 41 | print(yTest) 42 | plt.figure() 43 | plt.plot(X, yTest, color='green') 44 | plt.title('Curve') 45 | plt.plot(X, Y,color = "red") 46 | plt.figure() 47 | plt.plot(np.arange(0, len(losses)), losses) 48 | plt.title("Loss") 49 | plt.xlabel("Epoch") 50 | plt.ylabel("Loss") 51 | plt.show() 52 | 53 | 54 | -------------------------------------------------------------------------------- /LeNet-5/LeNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.nn as nn 4 | from torchvision import datasets, transforms 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | import cv2 8 | from torch import optim 9 | import time 10 | # 训练设备选择 11 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 12 | # batch的大小 13 | batch_size = 100 14 | 15 | class LeNet(nn.Module): 16 | def __init__(self): 17 | super(LeNet, self).__init__() 18 | self.conv = nn.Sequential( 19 | # 卷积层C1 20 | nn.Conv2d(in_channels=1, out_channels=6, 21 | kernel_size=5, padding=2), # 32,32 to 28*28 22 | nn.Sigmoid(), 23 | # 池化层S2 24 | nn.MaxPool2d(kernel_size=2, stride=2), # 28*28 to 14*14 25 | # 卷积层C3 26 | nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5), # 10*10 27 | nn.Sigmoid(), 28 | # 池化层S4 29 | nn.MaxPool2d(kernel_size=2, stride=2), # 10*10 to 5*5 30 | ) 31 | self.fullconnect = nn.Sequential( 32 | nn.Linear(16*5*5, 120), 33 | nn.Sigmoid(), 34 | nn.Linear(120, 84), 35 | nn.Sigmoid(), 36 | nn.Linear(84, 10) 37 | ) 38 | 39 | def forward(self, inputs): 40 | output = self.conv(inputs) 41 | # 将高维向量flatten 42 | output = self.fullconnect(output.view(inputs.shape[0], -1)) 43 | return output 44 | 45 | 46 | # 实例化 47 | # net = LeNet() 48 | #加载之前训练好的模型 49 | net = torch.load("net.pth") 50 | net.to(device) 51 | # 打印模型 52 | print(net) 53 | # 对输入变量的操作 54 | transform = transforms.Compose([ 55 | transforms.ToTensor(), # 转为Tensor 56 | transforms.Normalize((0.5,), (0.5,)), # 归一化 57 | ]) 58 | # 返回值为mnist类 59 | train_dataset = torchvision.datasets.MNIST( 60 | root='./mnist', train=True, transform=transform, download=False) 61 | test_dataset = torchvision.datasets.MNIST( 62 | root='./mnist', train=False, transform=transform, download=False) 63 | 64 | # 数据加载器,返回list[0]为100,1,28,28 list[1] 100 65 | trainloader = torch.utils.data.DataLoader( 66 | train_dataset, batch_size=batch_size, shuffle=False, num_workers=0) 67 | testloader = torch.utils.data.DataLoader( 68 | test_dataset, batch_size=batch_size, shuffle=False, num_workers=0) 69 | 70 | def main(): 71 | # 交叉熵损失函数 72 | criterion = nn.CrossEntropyLoss() 73 | # 优化器 74 | optimizer = optim.Adam(net.parameters(), lr=0.01) 75 | running_loss = 0.0 # 初始化loss 76 | loss_list = [] 77 | for epoch in range(5): 78 | start = time.time() 79 | running_loss = 0.0 80 | for i, (inputs, labels) in enumerate(trainloader, 0): 81 | net.train() 82 | # 输入数据 83 | inputs = inputs.to(device) 84 | labels = labels.to(device) 85 | # 梯度清零 86 | optimizer.zero_grad() 87 | outputs = net(inputs) 88 | loss = criterion(outputs, labels) 89 | loss.backward() 90 | # 更新参数 91 | optimizer.step() 92 | running_loss += loss.item() 93 | loss_list.append(loss.item()) 94 | print("loss:",running_loss,"\n") 95 | 96 | net.eval() 97 | with torch.no_grad(): 98 | # 测试集不需要计算梯度,放在torch.no_grad()节省计算资源 99 | # 总共正确的数量 100 | total_correct = 0 101 | #总共的数量 102 | total_num = 0 103 | for inputs, labels in trainloader: 104 | # 输入数据 105 | inputs = inputs.to(device) 106 | labels = labels.to(device) 107 | #输出数据 108 | outputs = net(inputs) 109 | #输出的数据选取概率最大的值 110 | pred = outputs.argmax(dim=1) 111 | #eq函数比较是否相等返回相等的和 112 | total_correct += torch.eq(labels, pred).float().sum() 113 | total_num += inputs.size(0) 114 | acc = total_correct/total_num 115 | print('测试集正确率为 :', (acc*100),"\n") 116 | end = time.time() 117 | print("训练时间:",end-start,"\n") 118 | main() 119 | #保存模型 120 | torch.save(net, "net.pth") -------------------------------------------------------------------------------- /LeNet-5/net.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sun-roc/Deep_Learning_in_Action/bb7f254b11eef8f94fe98d6d6cc89781e103111e/LeNet-5/net.pth -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep_Learning_in_Action 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /ResNet-18/ResNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | #残差块 5 | class BasicBlock(nn.Module): 6 | def __init__(self, in_channel, out_channel, stride=1, downsample=None): 7 | super(BasicBlock, self).__init__() 8 | #仅第一层卷积stride = 2,另图片大小减半 9 | self.conv1 = nn.Conv2d(in_channels=in_channel, 10 | out_channels=out_channel, 11 | kernel_size=3, 12 | stride=stride, 13 | padding=1, 14 | bias=False) 15 | # 批归一化 16 | self.bn1 = nn.BatchNorm2d(out_channel) 17 | #激活函数 ReLU 18 | # inplace对从上层网络nn.Conv2d中传递下来的tensor直接进行修改,这样能够节省运算内存 19 | self.relu = nn.ReLU(inplace=True) 20 | 21 | self.conv2 = nn.Conv2d(in_channels=out_channel, 22 | out_channels=out_channel, 23 | kernel_size=3, 24 | stride=1, 25 | padding=1, 26 | bias=False) 27 | self.bn2 = nn.BatchNorm2d(out_channel) 28 | self.downsample = downsample 29 | self.stride = stride 30 | 31 | def forward(self, x): 32 | identity = x 33 | if self.downsample is not None: 34 | identity = self.downsample(x) 35 | 36 | out = self.conv1(x) 37 | out = self.bn1(out) 38 | out = self.relu(out) 39 | 40 | out = self.conv2(out) 41 | out = self.bn2(out) 42 | 43 | out += identity 44 | out = self.relu(out) 45 | 46 | return out 47 | 48 | 49 | class ResNet(nn.Module): 50 | def __init__(self, block=BasicBlock, num_classes=10): 51 | super(ResNet, self).__init__() 52 | # 控制padding stride 让输入图片大小减半 53 | # 224 to 112 54 | self.in_channel = 64 55 | self.conv1 = nn.Conv2d(in_channels=3, 56 | out_channels=64, 57 | kernel_size=7, 58 | stride=2, 59 | padding=3, 60 | bias=False) 61 | self.bn1 = nn.BatchNorm2d(64) 62 | self.relu = nn.ReLU(inplace=True) 63 | #112 to 56 64 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 65 | #maxpool已经减半了 66 | self.layer1 = self.make_layer(block, 64) 67 | self.layer2 = self.make_layer(block, 128, stride=2) 68 | self.layer3 = self.make_layer(block, 256, stride=2) 69 | self.layer4 = self.make_layer(block, 512, stride=2) 70 | 71 | self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 72 | self.fc = nn.Linear(512, num_classes) 73 | #初始化 74 | for m in self.modules(): 75 | if isinstance(m, nn.Conv2d): 76 | nn.init.kaiming_normal_(m.weight, 77 | mode='fan_out', 78 | nonlinearity='relu') 79 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 80 | nn.init.constant_(m.weight, 1) 81 | nn.init.constant_(m.bias, 0) 82 | 83 | def make_layer(self, block, channel, stride=1): 84 | downsample = None 85 | #不是第一层时要加入下采样项 86 | if stride != 1: 87 | downsample = nn.Sequential( 88 | nn.Conv2d(self.in_channel, 89 | channel, 90 | kernel_size=1, 91 | stride=stride, 92 | bias=False), nn.BatchNorm2d(channel)) 93 | bk1 = block(self.in_channel, 94 | channel, 95 | downsample= downsample, 96 | stride=stride) 97 | self.in_channel = channel 98 | bk2 = block(self.in_channel, channel) 99 | bk = nn.Sequential(bk1,bk2) 100 | return bk 101 | 102 | def forward(self, x): 103 | x = self.conv1(x) 104 | x = self.bn1(x) 105 | x = self.relu(x) 106 | x = self.maxpool(x) 107 | 108 | x = self.layer1(x) 109 | x = self.layer2(x) 110 | x = self.layer3(x) 111 | x = self.layer4(x) 112 | x = self.avgpool(x) 113 | x = torch.flatten(x, 1) 114 | x = self.fc(x) 115 | 116 | return x 117 | 118 | 119 | 120 | if __name__ == '__main__': 121 | net = ResNet() 122 | print(net) 123 | -------------------------------------------------------------------------------- /ResNet-18/net.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sun-roc/Deep_Learning_in_Action/bb7f254b11eef8f94fe98d6d6cc89781e103111e/ResNet-18/net.pth -------------------------------------------------------------------------------- /ResNet-18/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from torchvision import datasets, transforms 4 | from torch import nn, optim 5 | import numpy as np 6 | from ResNet import ResNet 7 | import matplotlib.pyplot as plt 8 | import torchvision.models as models 9 | 10 | from PIL import Image 11 | batch_size = 500 12 | name = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] 13 | 14 | # model = ResNet() 15 | # model = torch.load("net.pth") 16 | transform=transforms.Compose([ 17 | transforms.Resize((112,112)), 18 | transforms.ToTensor(), 19 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 20 | std=[0.229, 0.224, 0.225]) 21 | ]) 22 | for i in range(2): 23 | im = Image.open('{}.jpg'.format(i)) 24 | plt.imshow(im) 25 | plt.show() 26 | im = transform(im) 27 | im=im.type(torch.cuda.FloatTensor) 28 | im = torch.reshape(im,(1,3,112,112)) 29 | predict = model(im) 30 | pred = predict.argmax(dim=1) 31 | print(name[pred]) 32 | -------------------------------------------------------------------------------- /ResNet-18/trian.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.utils.data import DataLoader 3 | from torchvision import datasets, transforms 4 | from torch import nn, optim 5 | import numpy as np 6 | from ResNet import ResNet 7 | import matplotlib.pyplot as plt 8 | 9 | # 训练设备选择 10 | device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") 11 | path = "train_out.txt" 12 | 13 | def train(): 14 | batch_size = 100 15 | # 训练集 16 | cifar_train = datasets.CIFAR10( 17 | root='cifar', 18 | train=True, 19 | transform=transforms.Compose([ 20 | transforms.Resize((112,112)), 21 | transforms.ToTensor(), 22 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 23 | std=[0.229, 0.224, 0.225]) 24 | ])) 25 | cifar_train = DataLoader(cifar_train, 26 | batch_size=batch_size, 27 | shuffle=True, 28 | num_workers=0) 29 | 30 | # 测试集 31 | cifar_test = datasets.CIFAR10( 32 | root='cifar', 33 | train=False, 34 | transform=transforms.Compose([ 35 | transforms.Resize((112,112)), 36 | transforms.ToTensor(), 37 | transforms.Normalize(mean=[0.485, 0.456, 0.406], 38 | std=[0.229, 0.224, 0.225]) 39 | ])) 40 | cifar_test = DataLoader(cifar_test, 41 | batch_size=batch_size, 42 | shuffle=True, 43 | num_workers=0) 44 | 45 | # 定义模型-ResNet 46 | model = ResNet() 47 | model = torch.load("net.pth") 48 | model.to(device) 49 | 50 | # 定义损失函数和优化方式 51 | criteon = nn.CrossEntropyLoss() 52 | optimizer = optim.Adam(model.parameters(), 0.001) 53 | 54 | # 训练网络 55 | for epoch in range(5): 56 | f = open(path , "a+") 57 | model.train() # 训练模式 58 | loss_sum = [] 59 | for batchidx, (data, label) in enumerate(cifar_train): 60 | 61 | data = data.to(device) 62 | label = label.to(device) 63 | predict = model(data) 64 | loss = criteon(predict, label) 65 | 66 | optimizer.zero_grad() 67 | loss.backward() 68 | optimizer.step() 69 | loss_sum.append(loss.item()) 70 | if (batchidx % 20 == 0): 71 | print(batchidx, loss.item()) 72 | 73 | 74 | print("Epoch_num: ", epoch, ' training-mean-loss:', np.mean(loss_sum),file = f) 75 | 76 | model.eval() # 测试模式 77 | with torch.no_grad(): 78 | 79 | total_correct = 0 # 预测正确的个数 80 | total_num = 0 81 | for data, label in cifar_test: 82 | data = data.to(device) 83 | label = label.to(device) 84 | predict = model(data) 85 | 86 | pred = predict.argmax(dim=1) 87 | correct = torch.eq(pred, label).float().sum().item() 88 | total_correct += correct 89 | total_num += data.size(0) 90 | acc = total_correct / total_num 91 | print("Epoch_num: ", epoch, 'test_acc:', acc,file = f) 92 | torch.save(model, "net.pth") 93 | f.close() 94 | 95 | if __name__ == '__main__': 96 | train() 97 | -------------------------------------------------------------------------------- /SelectiveSearch/SelectiveSearch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | 4 | import skimage.io 5 | import skimage.feature 6 | import skimage.color 7 | import skimage.transform 8 | import skimage.util 9 | import skimage.segmentation 10 | import numpy as np 11 | # 主函数 -- Selective Search 12 | # scale:图像分割的集群程度。值越大,意味集群程度越高,分割的越少,获得子区域越大。默认为1 13 | # sigma: 图像分割前,会先对原图像进行高斯滤波去噪,sigma即为高斯核的大小。默认为0.8 14 | # min_size : 最小的区域像素点个数。当小于此值时,图像分割的计算就停止,默认为20 15 | 16 | # 函数执行的主要流程可以概括为: 17 | # 每次选出相似度最高的一组区域(如编号为100和101的区域),进行合并,得到新的区域(编号为200)。 18 | # 后计算新的区域200与区域100的所有邻居和区域101的所有邻居的相似度,加入区域集S。不断循环,直到S为空, 19 | # 此时最后只剩然下一个区域,而且它的像素数会非常大,接近原始图片的像素数,因此无法继续合并。最后退出程序。 20 | 21 | def selective_search( 22 | im_orig, scale=1.0, sigma=0.8, min_size=50): # img, scale=500, sigma=0.9, min_size=10 23 | """ 24 | 返回值 25 | ------- 26 | img : ndarray 27 | 包含区域标签的图像 28 | 区域标签存储在每个像素的第四个值 [r,g,b,(region)] 29 | regions : 字典数组 30 | [ 31 | { 32 | 'rect': (left, top, right, bottom), 33 | 'labels': [...] 34 | }, 35 | ... 36 | ] 37 | """ 38 | #当图片不是三通道时,引发异常 39 | assert im_orig.shape[2] == 3, "输入非三通道图像" 40 | # 步骤1:加载图像felzenszwalb获取最小区域 41 | # 区域标签存储在像素第四维度 [r,g,b,(region)] 42 | img = add_region_channel(im_orig, scale, sigma, min_size) 43 | 44 | # 图像大小 512 * 512 45 | img_size = img.shape[0] * img.shape[1] 46 | 47 | # 步骤2: 将初始分割区域的特征提取出来 48 | # 每个key下的字典中min_x,min_y,max_x,max_y,labels,size,hist_c,hist_t 49 | R = get_regions(img) 50 | 51 | # 步骤4: 提取相邻区域 52 | # 返回一个列表,列表中的每个元素以(a,b)形式,a和b分别是两个有重叠区域的key的字典 53 | neighbours = get_region_neighbours(R) 54 | 55 | S = {} # 相似度集 56 | 57 | # 步骤5: 衡量相似度 58 | # ai,bi是region label(0-285),ar,br是其对应的矩阵 59 | for (ai, ar), (bi, br) in neighbours: 60 | S[(ai, bi)] = sum__sim(ar, br, img_size) # 计算相似度,(ai,bi)对应一个相似度值 61 | 62 | # 步骤六: 合并相似度高的区域 63 | while S != {}: 64 | # 步骤6.1:获取最大相似度 65 | # 获得相似度最高的两个区域标签 66 | i, j = sorted(S.items(), key=lambda i: i[1])[-1][0] 67 | 68 | # 步骤6.2:合并区域 69 | # 开辟一个新的key,存放合并两个最相似区域后的区域 70 | # 合并区域后新的区域键值 71 | t = max(R.keys()) + 1.0 72 | # 步骤6.3:合并区域,放入区域集合 73 | R[t] = merge_regions(R[i], R[j]) 74 | # 步骤6.4:移除相关区域的相似度 75 | # 把合并的两个区域的标签,加入待删除列表 76 | key_to_delete = [] 77 | for k, v in list(S.items()): 78 | if (i in k) or (j in k): 79 | key_to_delete.append(k) 80 | # 从S里面移除所有关于合并的两个区域的相似度 81 | for k in key_to_delete: 82 | del S[k] 83 | # 步骤6.5:计算新区域与相邻区域相似度 84 | # 计算新形成的区域的相似度,更新相似度集 85 | for k in [a for a in key_to_delete if a != (i, j)]: 86 | n = k[1] if k[0] in (i, j) else k[0] 87 | # 步骤4.6:计算新区域与相邻区域的相似度,放入相似度集合 88 | S[(t, n)] = sum__sim(R[t], R[n], img_size) 89 | 90 | # 从所有的区域R中抽取目标定位框L,放到新的列表中,返回 91 | # 步骤5: 提取提议框 [left, top, w, h] 92 | regions = [] 93 | for k, r in list(R.items()): 94 | regions.append({ 95 | 'rect': ( 96 | r['min_x'], r['min_y'], 97 | r['max_x'] - r['min_x'], r['max_y'] - r['min_y']), 98 | 'size': r['size'], 99 | 'labels': r['labels'] 100 | }) 101 | 102 | return img, regions 103 | 104 | #1. 用户生成原始区域集的函数,其中用到了felzenszwalb图像分割算法。每一个区域都有一个编号,将编号并入图片中 105 | def add_region_channel(im, scale, sigma, min_size): # img, scale=500, sigma=0.9, min_size=10 106 | """ 107 | rerurn: 512*512*4的图 108 | """ 109 | # 产生一层区域的mask 110 | #计算Felsenszwalb的基于有效图的图像分割。 111 | #im_mask对每一个像素都进行编号 112 | im_mask = skimage.segmentation.felzenszwalb( 113 | skimage.util.img_as_float(im), scale=scale, sigma=sigma, 114 | min_size=min_size) 115 | add_channel = np.zeros(im.shape[:2])[:, :, np.newaxis] 116 | # 把掩码通道合并成图像的第四通道 117 | im = np.append(im, add_channel, axis=2) 118 | im[:, :, 3] = im_mask 119 | 120 | return im 121 | 122 | #2. 提取区域的尺寸,颜色和纹理特征 123 | def get_regions(img): 124 | """ 125 | 从图像中提取区域,包括区域的尺寸,颜色和纹理特征 126 | return: 包含min_x,min_y,max_x,max_y,labels,size,hist_c,hist_t这些key的区域字典 127 | """ 128 | R = {} # 候选区域列表,R的key是区域四个点的 129 | 130 | # 将rgb空间转为hsv空间 131 | hsv = skimage.color.rgb2hsv(img[:, :, :3]) 132 | 133 | # 计算区域位置、角点坐标 134 | for y, i in enumerate(img): # 遍历,img是(x,y,(r,g,b,l)) 135 | for x, (r, g, b, l) in enumerate(i): # 遍历l,从0到285 136 | # 将所有分割区域的外框加到候选区域列表中 137 | if l not in R: 138 | R[l] = { 139 | "min_x": 0xffff, "min_y": 0xffff, # 把min先设成最大,max先设成最小 140 | "max_x": 0, "max_y": 0, "labels": [l]} 141 | # 更新边界 142 | if R[l]["min_x"] > x: # 新的x比原来x的最小值更小 143 | R[l]["min_x"] = x # x的最小值更新为新的x 144 | if R[l]["min_y"] > y: 145 | R[l]["min_y"] = y 146 | if R[l]["max_x"] < x: 147 | R[l]["max_x"] = x 148 | if R[l]["max_y"] < y: 149 | R[l]["max_y"] = y 150 | # 计算图像纹理梯度 151 | tex_grad = LBP_texture(img) 152 | 153 | # 计算每个区域的颜色直方图 154 | #k是种类,v是此类别的minx,maxx,miny,maxy 155 | for k, v in list(R.items()): # R中的每一组key, value 156 | 157 | masked_pixels = hsv[:, :, :][img[:, :, 3] == k] # 找出某一key对应区域所有点的h,s,v值 158 | R[k]["size"] = len(masked_pixels / 4) # 某一key对应区域所有点的个数 159 | R[k]["hist_c"] = get_color_hist(masked_pixels) # 颜色直方图 160 | R[k]["hist_t"] = get_texture_hist(tex_grad[:, :][img[:, :, 3] == k]) # 纹理直方图 161 | # 新增了size,hist_c,hist_t这些key 162 | return R 163 | 164 | # 3. 计算颜色直方图 165 | # 颜色直方图:将色彩空间转为HSV,每个通道下以bins=25计算直方图,这样每个区域的颜色直方图有25*3=75个区间。 166 | 167 | # 纹理相似度:论文采用方差为1的高斯分布在8个方向做梯度统计,然后将统计结果(尺寸与区域大小一致)以bins=10计算直方图。直方图区间数为8*3*10=240(使用RGB色彩空间)。这里是用了LBP获取纹理特征,建立直方图,其余相同 168 | def get_color_hist(img): 169 | """ 170 | 计算输入区域的颜色直方图 171 | return: BINS * COLOUR_CHANNELS(3) 172 | """ 173 | #定义BINS数量为25 174 | BINS = 25 175 | hist = np.array([]) 176 | # 依次提取每个颜色通道 177 | for colour_channel in (0, 1, 2): 178 | # 将输入的参数img各个像素带的第1,2,3hsv色道值提取出来,所以c数组是一维的,c的长度和img是相同的 179 | c = img[:, colour_channel] 180 | # numpy.concatenate是拼接函数,将两个函数拼接起来 181 | # numpy.histogram是计算数据的直方图,即统计哪个数据段中有多少数据,第一个参数是数据矩阵,第二个参数bins指定统计的区间个数,第三个参数是统计的最大最小值 182 | # 然后将这个类别的三个色道的直方统计拼接在一起 183 | # 计算每个颜色的直方图,加入到结果中 184 | hist = np.concatenate( 185 | [hist] + [np.histogram(c, BINS, (0.0, 255.0))[0]]) 186 | # L1正则化,得到三个色道的颜色直方图 187 | hist = hist / len(img) 188 | return hist 189 | #计算纹理梯度 190 | def LBP_texture(img): 191 | """ 192 | 用LBP(局部二值模式)计算整幅图的纹理梯度,提取纹理特征 193 | return: 512*512*4 194 | """ 195 | ret = np.zeros((img.shape[0], img.shape[1], img.shape[2])) 196 | for colour_channel in (0, 1, 2): 197 | ret[:, :, colour_channel] = skimage.feature.local_binary_pattern( 198 | img[:, :, colour_channel], 8, 1.0) 199 | 200 | # 512*512*4 201 | return ret 202 | #计算纹理直方图 203 | def get_texture_hist(img): 204 | """ 205 | 计算每个区域的纹理直方图 206 | 输出直方图的大小:BINS * ORIENTATIONS * COLOUR_CHANNELS(3) 207 | """ 208 | BINS = 10 209 | hist = np.array([]) 210 | for colour_channel in (0, 1, 2): 211 | 212 | # mask by the colour channel 213 | fd = img[:, colour_channel] 214 | 215 | # 计算每个方向的直方图,加入到结果中 216 | hist = np.concatenate([hist] + [np.histogram(fd, BINS, (0.0, 1.0))[0]]) 217 | # 标准化 218 | hist = hist / len(img) 219 | return hist 220 | 221 | #4. 提取相邻区域 通过计算每个区域与其余的所有区域是否有相交,来判断是不是邻居参数regions:R记录了该图像每个类别的信息:mix_x,min_y,max_x,max_y,size,hist_c,hist_t 222 | def get_region_neighbours(regions): 223 | # 检测a,b长方形区域是否存在交叉重叠部分 224 | def intersect(a, b): 225 | if (a["min_x"] < b["min_x"] < a["max_x"] 226 | and a["min_y"] < b["min_y"] < a["max_y"]) or ( 227 | a["min_x"] < b["max_x"] < a["max_x"] 228 | and a["min_y"] < b["max_y"] < a["max_y"]) or ( 229 | a["min_x"] < b["min_x"] < a["max_x"] 230 | and a["min_y"] < b["max_y"] < a["max_y"]) or ( 231 | a["min_x"] < b["max_x"] < a["max_x"] 232 | and a["min_y"] < b["min_y"] < a["max_y"]): 233 | return True 234 | return False 235 | # items()取regions的每个元素,即每个类别的信息 236 | R_list = list(regions.items()) # 把传进来的R以列表形式表示 237 | 238 | neighbours = [] 239 | for cur, a in enumerate(R_list[:-1]): 240 | for b in R_list[cur + 1:]: 241 | if intersect(a[1], b[1]): 242 | neighbours.append((a, b)) 243 | 244 | return neighbours 245 | 246 | #5. 计算两个区域的相似度 247 | # 论文中考虑了四种相似度 -- 颜色,纹理,尺寸,以及交叠。 248 | # 最后的相似度是四种相似度的加和 249 | def cal_color_sim(r1, r2): 250 | """ 251 | 计算两个区域颜色的相似度 252 | """ 253 | """ 254 | a = [1,2,3] 255 | b = [4,5,6] 256 | zipped = zip(a,b) # 打包为元组的列表 257 | [(1, 4), (2, 5), (3, 6)] 258 | """ 259 | zipped = zip(r1["hist_c"], r2["hist_c"]) 260 | min_list = [min(a, b) for a, b in zipped] 261 | S_color = sum(min_list) 262 | return S_color 263 | 264 | 265 | def cal_texture_sim(r1, r2): 266 | """ 267 | 计算两个区域纹理的相似度 268 | """ 269 | zipped = zip(r1["hist_t"], r2["hist_t"]) 270 | min_list = S_texture = sum([min(a, b) for a, b in zipped]) 271 | return S_texture 272 | 273 | 274 | def cal_size_sim(r1, r2, img_size): 275 | """ 276 | 计算两个区域尺寸的相似度 277 | """ 278 | S_size = 1.0 - (r1["size"] + r2["size"]) / img_size 279 | return S_size 280 | 281 | 282 | def cal_fill_sim(r1, r2, img_size): 283 | """ 284 | 计算两个区域交叠的相似度 285 | """ 286 | Bx = (max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"])) 287 | By = (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"])) 288 | # 能包含两个区域的最小矩形区域 289 | BBsize = (Bx*By) 290 | S_fill = 1.0 - (BBsize - r1["size"] - r2["size"]) / img_size 291 | return S_fill 292 | 293 | 294 | def sum__sim(r1, r2, img_size): 295 | """ 296 | 计算两个区域的相似度 297 | """ 298 | # 计算类别的相似度 299 | S_similar = cal_color_sim(r1, r2) + cal_texture_sim(r1, r2)+ cal_size_sim(r1, r2, img_size) + cal_fill_sim(r1, r2, img_size) 300 | return (S_similar) 301 | 302 | # 步骤六合并相似度高的区域 303 | def merge_regions(r1, r2): 304 | """ 305 | input: 区域字典R中的两个key,也就是两个区域 306 | output: 合并后的新的区域,代表一个新的key 307 | """ 308 | new_size = r1["size"] + r2["size"] 309 | # 合并后的新的区域字典 310 | rt = { 311 | "min_x": min(r1["min_x"], r2["min_x"]), 312 | "min_y": min(r1["min_y"], r2["min_y"]), 313 | "max_x": max(r1["max_x"], r2["max_x"]), 314 | "max_y": max(r1["max_y"], r2["max_y"]), 315 | "size": new_size, 316 | "hist_c": ( 317 | r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size, 318 | "hist_t": ( 319 | r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size, 320 | "labels": r1["labels"] + r2["labels"] 321 | } 322 | return rt 323 | 324 | 325 | 326 | 327 | 328 | img = skimage.data.astronaut() 329 | selective_search(img, scale=500, sigma=0.9, min_size=10) -------------------------------------------------------------------------------- /SelectiveSearch/main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import ( 3 | division, 4 | print_function, 5 | ) 6 | 7 | import skimage.data 8 | import matplotlib.pyplot as plt 9 | import matplotlib.patches as mpatches 10 | import selectivesearch 11 | import time 12 | 13 | 14 | def main(): 15 | 16 | # 导入宇航员图片,原图是512*512*3,第三维是RGB 17 | # img = skimage.data.astronaut() 18 | # img = skimage.data.hubble_deep_field() 19 | img = skimage.data.chelsea() 20 | img_lbl, regions = selectivesearch.selective_search(img, scale=500, sigma=0.8, min_size=100) 21 | # region是一个列表,每一个元素是一个字典,存放每一个区域的信息(rect,size,labels三个key) 22 | temp = set() # set() 函数创建一个无序不重复元素集 23 | for i in range(img_lbl.shape[0]): 24 | for j in range(img_lbl.shape[1]): 25 | # temp存储了所有的类别编号 26 | temp.add(img_lbl[i, j, 3]) 27 | 28 | print("原始候选区域:",len(temp)) # 286 29 | print("SS区域",len(regions)) # 570 30 | # 创建一个新集合并添加所有区域 31 | region_rect = set() 32 | for i,r in enumerate(regions) : 33 | 34 | x, y, w, h = r['rect'] 35 | if r['size'] < 1000: 36 | continue 37 | # 排除扭曲的候选区域边框 即只保留近似正方形的 38 | # if w / h > 1.3 or h / w > 1.3: 39 | # continue 40 | region_rect.add(r['rect']) 41 | 42 | 43 | # 在原图上绘制矩形框 44 | # 生成1行1列,大小为6*6的一个字图,fig用来生成一个新的图,ax用来控制子图 45 | fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6)) 46 | ax.imshow(img) 47 | i = 1 48 | for x, y, w, h in region_rect: 49 | # print("Region",i,":",regions[i]) 50 | rect = mpatches.Rectangle( 51 | (x, y), w, h, fill=False, edgecolor='blue', linewidth=2) 52 | ax.add_patch(rect) 53 | i+=1 54 | 55 | plt.show() 56 | 57 | if __name__ == "__main__": 58 | start_time = time.time() 59 | main() 60 | end_time = time.time() 61 | run_time = end_time - start_time 62 | print("run time =",run_time,"s") -------------------------------------------------------------------------------- /SelectiveSearch/selectiveSearchDraft.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sun-roc/Deep_Learning_in_Action/bb7f254b11eef8f94fe98d6d6cc89781e103111e/SelectiveSearch/selectiveSearchDraft.pdf -------------------------------------------------------------------------------- /read.txt: -------------------------------------------------------------------------------- 1 | ## 深度学习实战 2 | 3 | 1. #### BP神经网络函数逼近 4 | 5 | 搭建基本的BP神经网络,应用三层神经网络进行函数逼近,函数包括XOR和y=1/sin(x) + 1/cos(x) 6 | 7 | 2. #### 基于LeNet-5的MNIST字符识别 8 | 9 | 编程实现网络结构,采用层次化的映射模式 10 | 11 | 3. #### 基于ResNet-18的CIFAR-10图像分类 12 | 13 | 搭建神经网络,完成图像分类并优化训练过程 14 | 15 | 4. #### 基于选择性搜索(Selective Search)选择候选区域 16 | 17 | 从图片中找出物体可能存在的区域,缩小候选区域规模,加快训练速度 18 | --------------------------------------------------------------------------------