├── .gitignore
├── README.md
├── checkpoints
    └── log.txt
├── data.py
├── model.py
├── my_arguments.py
├── prepare_data.py
├── prepare_test.py
├── test.py
├── train.py
└── util.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | *.py[cop]


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # README
 2 | 
 3 | implementation of YOLOv1
 4 | 
 5 | 
 6 | **Required:**
 7 | 
 8 | * pytorch 1.1.0
 9 | * torchvision
10 | * numpy > 1.16.2
11 | * opencv 3.4.1
12 | * VOC2012 Dataset
13 | 
14 | 
15 | 
16 | **What can this repo do now**
17 | * train with VOC2012 Dataset
18 | 


--------------------------------------------------------------------------------
/checkpoints/log.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/johnwingit/YOLOV1_Pytorch/7620e186abb05e1d6fd3db597983f51ab4769541/checkpoints/log.txt


--------------------------------------------------------------------------------
/data.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import Dataset, DataLoader
 2 | import numpy as np
 3 | import os
 4 | import random
 5 | import torch
 6 | from PIL import Image
 7 | import torchvision.transforms as transforms
 8 | 
 9 | 
10 | class MyDataset(Dataset):
11 |     def __init__(self, dataset_dir, seed=None, mode="train", train_val_ratio=0.9, trans=None):
12 |         """
13 |         :param dataset_dir: 数据所在文件夹
14 |         :param seed: 打乱数据所用的随机数种子
15 |         :param mode: 数据模式，"train", "val", "test"
16 |         :param train_val_ratio: 训练时，训练集:验证集的比例
17 |         :param trans:  数据预处理函数
18 | 
19 |         TODO:
20 |         1. 读取储存图片路径的.txt文件，并保存在self.img_list中
21 |         2. 读取储存样本标签的.csv文件，并保存在self.label中
22 |         3. 如果mode="train"， 将数据集拆分为训练集和验证集，用self.use_ids来保存对应数据集的样本序号。
23 |             注意，mode="train"和"val"时，必须传入随机数种子，且两者必须相同
24 |         4. 保存传入的数据增广函数
25 |         """
26 |         if seed is None:
27 |             seed = random.randint(0, 65536)
28 |         random.seed(seed)
29 |         self.dataset_dir = dataset_dir
30 |         self.mode = mode
31 |         if mode=="val":
32 |             mode = "train"
33 |         img_list_txt = os.path.join(dataset_dir, mode+".txt")  # 储存图片位置的列表
34 |         label_csv = os.path.join(dataset_dir, mode+".csv")  # 储存标签的数组文件
35 |         self.img_list = []
36 |         self.label = np.loadtxt(label_csv)  # 读取标签数组文件
37 |         # 读取图片位置文件
38 |         with open(img_list_txt, 'r') as f:
39 |             for line in f.readlines():
40 |                 self.img_list.append(line.strip())
41 |         # 在mode=train或val时， 将数据进行切分
42 |         # 注意在mode="val"时，传入的随机种子seed要和mode="train"相同
43 |         self.num_all_data = len(self.img_list)
44 |         all_ids = list(range(self.num_all_data))
45 |         num_train = int(train_val_ratio*self.num_all_data)
46 |         if self.mode == "train":
47 |             self.use_ids = all_ids[:num_train]
48 |         elif self.mode == "val":
49 |             self.use_ids = all_ids[num_train:]
50 |         else:
51 |             self.use_ids = all_ids
52 | 
53 |         # 储存数据增广函数
54 |         self.trans = trans
55 | 
56 |     def __len__(self):
57 |         """获取数据集数量"""
58 |         return len(self.use_ids)
59 | 
60 |     def __getitem__(self, item):
61 |         """
62 |         TODO:
63 |         1. 按顺序依次取出第item个训练数据img及其对应的样本标签label
64 |         2. 图像数据要进行预处理，并最终转换为(c, h, w)的维度，同时转换为torch.tensor
65 |         3. 样本标签要按需要转换为指定格式的torch.tensor
66 |         """
67 |         id = self.use_ids[item]
68 |         label = torch.tensor(self.label[id, :])
69 |         img_path = self.img_list[id]
70 |         img = Image.open(img_path)
71 |         if self.trans is None:
72 |             trans = transforms.Compose([
73 |                 # transforms.Resize((112,112)),
74 |                 transforms.ToTensor(),
75 |             ])
76 |         else:
77 |             trans = self.trans
78 |         img = trans(img)  # 图像预处理&数据增广
79 |         # transforms.ToPILImage()(img).show()  # for debug
80 |         # print(label)
81 |         return img, label
82 | 
83 | if __name__ == '__main__':
84 |     # 调试用，依次取出数据看看是否正确
85 |     dataset_dir = r"C:\Users\Owen\Desktop\VOCdevkit\VOC2012\voc2012_forYolov1"
86 |     dataset = MyDataset(dataset_dir)
87 |     dataloader = DataLoader(dataset, 1)
88 |     for i in enumerate(dataloader):
89 |         input("press enter to continue")


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | """
  2 | time:2021.4.26
  3 | author:Jylyy
  4 | 
  5 | """
  6 | import torch
  7 | import torch.nn as nn
  8 | import torchvision.models as tvmodel
  9 | from prepare_data import GL_CLASSES, GL_NUMBBOX, GL_NUMGRID
 10 | from util import calculate_iou
 11 | 
 12 | 
 13 | class MyNet(nn.Module):
 14 |     """
 15 |     @ 网络实际名称
 16 |     为了和后续接口对齐，此处类名固定为MyNet，具体是什么网络可以写在注释里。
 17 |     """
 18 |     def __init__(self):
 19 |         """
 20 |         :param args: 构建网络所需要的参数
 21 | 
 22 |         TODO:
 23 |         在__init__()函数里，将网络框架搭好，并存在self里
 24 |         """
 25 |         super(MyNet, self).__init__()
 26 |         resnet = tvmodel.resnet34(pretrained=True)  # 调用torchvision里的resnet34预训练模型
 27 |         resnet_out_channel = resnet.fc.in_features  # 记录resnet全连接层之前的网络输出通道数，方便连入后续卷积网络中
 28 |         self.resnet = nn.Sequential(*list(resnet.children())[:-2])  # 去除resnet的最后两层
 29 |         # 以下是YOLOv1的最后四个卷积层
 30 |         self.Conv_layers = nn.Sequential(
 31 |             nn.Conv2d(resnet_out_channel, 1024, 3, padding=1),
 32 |             nn.BatchNorm2d(1024),  # 为了加快训练，这里增加了BN层，原论文里YOLOv1是没有的
 33 |             nn.LeakyReLU(inplace=True),
 34 |             nn.Conv2d(1024, 1024, 3, stride=2, padding=1),
 35 |             nn.BatchNorm2d(1024),
 36 |             nn.LeakyReLU(inplace=True),
 37 |             nn.Conv2d(1024, 1024, 3, padding=1),
 38 |             nn.BatchNorm2d(1024),
 39 |             nn.LeakyReLU(inplace=True),
 40 |             nn.Conv2d(1024, 1024, 3, padding=1),
 41 |             nn.BatchNorm2d(1024),
 42 |             nn.LeakyReLU(inplace=True),
 43 |         )
 44 |         # 以下是YOLOv1的最后2个全连接层
 45 |         self.Conn_layers = nn.Sequential(
 46 |             nn.Linear(GL_NUMGRID * GL_NUMGRID * 1024, 4096),
 47 |             nn.LeakyReLU(inplace=True),
 48 |             nn.Linear(4096, GL_NUMGRID * GL_NUMGRID * (5*GL_NUMBBOX+len(GL_CLASSES))),
 49 |             nn.Sigmoid()  # 增加sigmoid函数是为了将输出全部映射到(0,1)之间，因为如果出现负数或太大的数，后续计算loss会很麻烦
 50 |         )
 51 | 
 52 | 
 53 |     def forward(self, inputs):
 54 |         """
 55 |         :param inputs:  输入网络的张量
 56 |         :return:  输出网络的结果
 57 | 
 58 |         TODO
 59 |         根据网络的结构，完成网络的前向传播计算。
 60 |         如果网络有多条分支，可以用self储存需要在别的地方使用的中间张量。
 61 |         如果网络有多个输出，需要将多个输出按后续inference的需求打包输出
 62 |         """
 63 |         x = self.resnet(inputs)
 64 |         x = self.Conv_layers(x)
 65 |         x = x.view(x.size()[0], -1)
 66 |         x = self.Conn_layers(x)
 67 |         self.pred = x.reshape(-1, (5 * GL_NUMBBOX + len(GL_CLASSES)), GL_NUMGRID, GL_NUMGRID)  # 记住最后要reshape一下输出数据
 68 |         return self.pred
 69 | 
 70 | 
 71 |     def calculate_loss(self, preds,labels):
 72 |         """
 73 |         TODO: 根据labels和self.outputs计算训练loss
 74 |         :param labels: (bs, n), 对应训练数据的样本标签
 75 |         :return: loss数值
 76 |         """
 77 |         self.pred = preds.double()
 78 |         labels = labels.double()
 79 |         num_gridx, num_gridy = GL_NUMGRID, GL_NUMGRID  # 划分网格数量
 80 |         noobj_confi_loss = 0.  # 不含目标的网格损失(只有置信度损失)
 81 |         coor_loss = 0.  # 含有目标的bbox的坐标损失
 82 |         obj_confi_loss = 0.  # 含有目标的bbox的置信度损失
 83 |         class_loss = 0.  # 含有目标的网格的类别损失
 84 |         n_batch = labels.size()[0]  # batchsize的大小
 85 |         # 可以考虑用矩阵运算进行优化，提高速度，为了准确起见，这里还是用循环
 86 |         for i in range(n_batch):  # batchsize循环
 87 |             for n in range(num_gridx):  # x方向网格循环
 88 |                 for m in range(num_gridy):  # y方向网格循环
 89 |                     if labels[i, 4, m, n] == 1:  # 如果包含物体
 90 |                         # 将数据(px,py,w,h)转换为(x1,y1,x2,y2)
 91 |                         # 先将px,py转换为cx,cy，即相对网格的位置转换为标准化后实际的bbox中心位置cx,xy
 92 |                         # 然后再利用(cx-w/2,cy-h/2,cx+w/2,cy+h/2)转换为xyxy形式，用于计算iou
 93 |                         bbox1_pred_xyxy = ((self.pred[i, 0, m, n] + n) / num_gridx - self.pred[i, 2, m, n] / 2,
 94 |                                            (self.pred[i, 1, m, n] + m) / num_gridy - self.pred[i, 3, m, n] / 2,
 95 |                                            (self.pred[i, 0, m, n] + n) / num_gridx + self.pred[i, 2, m, n] / 2,
 96 |                                            (self.pred[i, 1, m, n] + m) / num_gridy + self.pred[i, 3, m, n] / 2)
 97 |                         bbox2_pred_xyxy = ((self.pred[i, 5, m, n] + n) / num_gridx - self.pred[i, 7, m, n] / 2,
 98 |                                            (self.pred[i, 6, m, n] + m) / num_gridy - self.pred[i, 8, m, n] / 2,
 99 |                                            (self.pred[i, 5, m, n] + n) / num_gridx + self.pred[i, 7, m, n] / 2,
100 |                                            (self.pred[i, 6, m, n] + m) / num_gridy + self.pred[i, 8, m, n] / 2)
101 |                         bbox_gt_xyxy = ((labels[i, 0, m, n] + n) / num_gridx - labels[i, 2, m, n] / 2,
102 |                                         (labels[i, 1, m, n] + m) / num_gridy - labels[i, 3, m, n] / 2,
103 |                                         (labels[i, 0, m, n] + n) / num_gridx + labels[i, 2, m, n] / 2,
104 |                                         (labels[i, 1, m, n] + m) / num_gridy + labels[i, 3, m, n] / 2)
105 |                         iou1 = calculate_iou(bbox1_pred_xyxy, bbox_gt_xyxy)
106 |                         iou2 = calculate_iou(bbox2_pred_xyxy, bbox_gt_xyxy)
107 |                         # 选择iou大的bbox作为负责物体
108 |                         if iou1 >= iou2:
109 |                             coor_loss = coor_loss + 5 * (torch.sum((self.pred[i, 0:2, m, n] - labels[i, 0:2, m, n]) ** 2) \
110 |                                         + 5*torch.sum((self.pred[i, 2:4, m, n].sqrt() - labels[i, 2:4, m, n].sqrt()) ** 2))
111 |                             obj_confi_loss = obj_confi_loss + (self.pred[i, 4, m, n] - iou1) ** 2
112 |                             # iou比较小的bbox不负责预测物体，因此confidence loss算在noobj中，注意，对于标签的置信度应该是iou2
113 |                             noobj_confi_loss = noobj_confi_loss + 0.5 * ((self.pred[i, 9, m, n] - iou2) ** 2)
114 |                         else:
115 |                             coor_loss = coor_loss + 5 * (torch.sum((self.pred[i, 5:7, m, n] - labels[i, 5:7, m, n]) ** 2) \
116 |                                         + 5*torch.sum((self.pred[i, 7:9, m, n].sqrt() - labels[i, 7:9, m, n].sqrt()) ** 2))
117 |                             obj_confi_loss = obj_confi_loss + (self.pred[i, 9, m, n] - iou2) ** 2
118 |                             # iou比较小的bbox不负责预测物体，因此confidence loss算在noobj中,注意，对于标签的置信度应该是iou1
119 |                             noobj_confi_loss = noobj_confi_loss + 0.5 * ((self.pred[i, 4, m, n] - iou1) ** 2)
120 |                         class_loss = class_loss + torch.sum((self.pred[i, 10:, m, n] - labels[i, 10:, m, n]) ** 2)
121 |                     else:  # 如果不包含物体
122 |                         noobj_confi_loss = noobj_confi_loss + 0.5 * torch.sum(self.pred[i, [4, 9], m, n] ** 2)
123 | 
124 |         loss = coor_loss + obj_confi_loss + noobj_confi_loss + class_loss
125 |         # 此处可以写代码验证一下loss的大致计算是否正确，这个要验证起来比较麻烦，比较简洁的办法是，将输入的pred置为全1矩阵，再进行误差检查，会直观很多。
126 |         return loss / n_batch
127 | 
128 |     def calculate_metric(self, preds, labels):
129 |         """
130 |         TODO: 根据preds和labels，以及指定的评价方法计算网络效果得分， 网络validation时使用
131 |         :param preds: 预测数据
132 |         :param labels: 预测数据对应的样本标签
133 |         :return: 评估得分metric
134 |         """
135 |         preds = preds.double()
136 |         labels = labels[:, :(self.n_points*2)]
137 |         l2_distance = torch.mean(torch.sum((preds-labels)**2, dim=1))
138 |         return l2_distance
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     # 自定义输入张量，验证网络可以正常跑通，并计算loss，调试用
143 |     x = torch.zeros(5,3,448,448)
144 |     net = MyNet()
145 |     print(net)
146 |     a = net(x)
147 |     labels = torch.zeros(5, 30, 7, 7)
148 |     loss = net.calculate_loss(labels)
149 |     print(loss)
150 |     print(a.shape)
151 | 


--------------------------------------------------------------------------------
/my_arguments.py:
--------------------------------------------------------------------------------
 1 | """
 2 | time:2021.4.26
 3 | author:Jylyy
 4 | 
 5 | """
 6 | 
 7 | import argparse
 8 | import torch
 9 | 
10 | class Args(object):
11 |     """
12 |     设置命令行参数的接口
13 |     """
14 |     def __init__(self):
15 |         self.parser = argparse.ArgumentParser()
16 | 
17 |     def set_train_args(self):
18 |         """options for train"""
19 |         self.parser.add_argument("--batch_size", type=int, default=1)
20 |         self.parser.add_argument("--lr", type=float, default=0.0001, help="learning rate")
21 |         self.parser.add_argument("--weight_decay", type=float, default=1e-4)
22 |         self.parser.add_argument("--epoch", type=int, default=60, help="number of end epoch")
23 |         self.parser.add_argument("--start_epoch", type=int, default=1, help="number of start epoch")
24 |         self.parser.add_argument("--use_GPU", action="store_true", help="identify whether to use gpu")
25 |         self.parser.add_argument("--GPU_id", type=int, default=None, help="device id")
26 |         self.parser.add_argument("--dataset_dir", type=str, default=r"D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1")
27 |         self.parser.add_argument("--checkpoints_dir", type=str, default="./checkpoints")
28 |         self.parser.add_argument("--print_freq", type=int, default=10,
29 |                             help="print training information frequency (per n iteration)")
30 |         self.parser.add_argument("--save_freq", type=int, default=20, help="save model frequency (per n epoch)")
31 |         self.parser.add_argument("--num_workers", type=int, default=0, help="use n threads to read data")
32 |         self.parser.add_argument("--pretrain", type=str, default=r"C:\Users\Owen\Desktop\YOLOv1-from-scratch-master\checkpoints\epoch60.tar", help="pretrain model path")
33 |         #self.parser.add_argument("--pretrain", type=str,default=None)
34 |         self.parser.add_argument("--random_seed", type=int, default=0, help="random seed for split dataset")
35 | 
36 |         self.opts = self.parser.parse_args()
37 | 
38 |         if torch.cuda.is_available():
39 |             self.opts.use_GPU = False
40 |             self.opts.GPU_id = torch.cuda.current_device()
41 |             print("use GPU %d to train." % (self.opts.GPU_id))
42 |         else:
43 |             print("use CPU to train.")
44 | 
45 |     def set_test_args(self):
46 |         """options for inference"""
47 |         self.parser.add_argument("--batch_size", type=int, default=1)
48 |         self.parser.add_argument("--use_GPU", action="store_true", help="identify whether to use gpu")
49 |         self.parser.add_argument("--GPU_id", type=int, default=None, help="device id")
50 |         self.parser.add_argument("--dataset_dir", type=str, default=r"D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\testimg")
51 |         self.parser.add_argument("--weight_path", type=str,
52 |                              default=r"checkpoints\epoch60.tar",
53 |                              help="load path for model weight")
54 |         self.opts = self.parser.parse_args()
55 |         if torch.cuda.is_available():
56 |             self.opts.use_GPU = False
57 |             self.opts.GPU_id = torch.cuda.current_device()
58 |             print("use GPU %d to train." % (self.opts.GPU_id))
59 |         else:
60 |             print("use CPU to train.")
61 | 
62 |     def get_opts(self):
63 |         return self.opts


--------------------------------------------------------------------------------
/prepare_data.py:
--------------------------------------------------------------------------------
  1 | """
  2 | time:2021.4.26
  3 | author:Jylyy
  4 | 
  5 | """
  6 | 
  7 | """
  8 | 数据准备，将数据处理为两个文件，一个是train.csv,另一个是train.txt。同理也会有test.csv, test.txt
  9 | train.csv: 每一行是一张图片的标签，具体储存情况根据不同任务的需求自行设定
 10 | train.txt: 每一行是图片的路径，该文件每行的图片和train.csv的每一行标注应该是一一对应的
 11 | 另外，根据需要将图片稍微离线处理一下，比如将原图片裁剪出训练使用的图片(resize成训练要求大小)后，保存在自定义文件夹中，train.txt里的路径应与自定义文件夹相同
 12 | """
 13 | import xml.etree.ElementTree as ET
 14 | import numpy as np
 15 | import cv2
 16 | import random
 17 | import os
 18 | 
 19 | 
 20 | GL_CLASSES = ['person', 'bird', 'cat', 'cow', 'dog', 'horse', 'sheep',
 21 |            'aeroplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train',
 22 |            'bottle', 'chair', 'diningtable', 'pottedplant', 'sofa', 'tvmonitor']
 23 | 
 24 | GL_NUMBBOX = 2   #bbox数量
 25 | GL_NUMGRID = 7   #grid cell数量
 26 | 
 27 | STATIC_DATASET_PATH = r'D:\wangpan\VOC\VOCdevkit\VOC2012'
 28 | STATIC_DEBUG = False  # 调试用
 29 | 
 30 | 
 31 | def convert(size, box):
 32 |     """将bbox的左上角点、右下角点坐标的格式，转换为bbox中心点+bbox的w,h的格式
 33 |     并进行归一化"""
 34 |     dw = 1. / size[0]
 35 |     dh = 1. / size[1]
 36 |     x = (box[0] + box[1]) / 2.0
 37 |     y = (box[2] + box[3]) / 2.0
 38 |     w = box[1] - box[0]
 39 |     h = box[3] - box[2]
 40 |     x = x * dw
 41 |     w = w * dw
 42 |     y = y * dh
 43 |     h = h * dh
 44 |     return (x, y, w, h)
 45 | 
 46 | 
 47 | def convert_annotation(anno_dir, image_id, labels_dir):
 48 |     """把图像image_id的xml文件转换为目标检测的label文件(txt)：(class,x,y,w,h)
 49 |     其中包含物体的类别，bbox的左上角点坐标以及bbox的宽、高
 50 |     并将四个物理量归一化"""
 51 |     in_file = open(os.path.join(anno_dir, 'Annotations/%s' % (image_id)))
 52 |     image_id = image_id.split('.')[0]
 53 |     tree = ET.parse(in_file)
 54 |     root = tree.getroot()
 55 |     size = root.find('size')
 56 |     w = int(size.find('width').text)
 57 |     h = int(size.find('height').text)
 58 | 
 59 |     for obj in root.iter('object'):
 60 |         difficult = obj.find('difficult').text
 61 |         cls = obj.find('name').text
 62 |         if cls not in GL_CLASSES or int(difficult) == 1:
 63 |             continue
 64 |         cls_id = GL_CLASSES.index(cls)
 65 |         xmlbox = obj.find('bndbox')
 66 |         points = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
 67 |              float(xmlbox.find('ymax').text))
 68 |         bb = convert((w, h), points)   #返回(x,y,w,h)
 69 |         with open(os.path.join(labels_dir, '%s.txt' % (image_id)), 'a') as out_file:
 70 |             out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
 71 | 
 72 | 
 73 | def make_label_txt(anno_dir, labels_dir):
 74 |     """在labels文件夹下创建image_id.txt，对应每个image_id.xml提取出的bbox信息"""
 75 |     filenames = os.listdir(os.path.join(anno_dir,'Annotations'))
 76 |     for file in filenames:
 77 |         convert_annotation(anno_dir, file, labels_dir)
 78 | 
 79 | 
 80 | def img_augument(img_dir, save_img_dir, labels_dir):
 81 |     imgs_list = [x.split('.')[0]+".jpg" for x in os.listdir(labels_dir)]
 82 |     for img_name in imgs_list:
 83 |         print("process %s"%os.path.join(img_dir, img_name))
 84 |         img = cv2.imread(os.path.join(img_dir, img_name))
 85 |         h, w = img.shape[0:2]
 86 |         input_size = 448  # 输入YOLOv1网络的图像尺寸为448x448
 87 |         # 因为数据集内原始图像的尺寸是不定的，所以需要进行适当的padding，将原始图像padding成宽高一致的正方形
 88 |         # 然后再将Padding后的正方形图像缩放成448x448
 89 |         padw, padh = 0, 0  # 要记录宽高方向的padding具体数值，因为padding之后需要调整bbox的位置信息
 90 |         if h > w:
 91 |             padw = (h - w) // 2
 92 |             img = np.pad(img, ((0, 0), (padw, padw), (0, 0)), 'constant', constant_values=0)
 93 |         elif w > h:
 94 |             padh = (w - h) // 2
 95 |             img = np.pad(img, ((padh, padh), (0, 0), (0, 0)), 'constant', constant_values=0)
 96 |         img = cv2.resize(img, (input_size, input_size))
 97 |         cv2.imwrite(os.path.join(save_img_dir, img_name), img)
 98 |         # 读取图像对应的bbox信息，按1维的方式储存，每5个元素表示一个bbox的(cls,xc,yc,w,h)
 99 |         with open(os.path.join(labels_dir,img_name.split('.')[0] + ".txt"), 'r') as f:
100 |             bbox = f.read().split('\n')
101 |         bbox = [x.split() for x in bbox]
102 |         bbox = [float(x) for y in bbox for x in y]
103 |         if len(bbox) % 5 != 0:
104 |             raise ValueError("File:"
105 |                              + os.path.join(labels_dir,img_name.split('.')[0] + ".txt") + "——bbox Extraction Error!")
106 | 
107 |         # 根据padding、图像增广等操作，将原始的bbox数据转换为修改后图像的bbox数据
108 |         if padw != 0:
109 |             for i in range(len(bbox) // 5):
110 |                 bbox[i * 5 + 1] = (bbox[i * 5 + 1] * w + padw) / h
111 |                 bbox[i * 5 + 3] = (bbox[i * 5 + 3] * w) / h
112 |                 if STATIC_DEBUG:
113 |                     cv2.rectangle(img, (int(bbox[1] * input_size - bbox[3] * input_size / 2),
114 |                                         int(bbox[2] * input_size - bbox[4] * input_size / 2)),
115 |                                   (int(bbox[1] * input_size + bbox[3] * input_size / 2),
116 |                                    int(bbox[2] * input_size + bbox[4] * input_size / 2)), (0, 0, 255))
117 |         elif padh != 0:
118 |             for i in range(len(bbox) // 5):
119 |                 bbox[i * 5 + 2] = (bbox[i * 5 + 2] * h + padh) / w
120 |                 bbox[i * 5 + 4] = (bbox[i * 5 + 4] * h) / w
121 |                 if STATIC_DEBUG:
122 |                     cv2.rectangle(img, (int(bbox[1] * input_size - bbox[3] * input_size / 2),
123 |                                         int(bbox[2] * input_size - bbox[4] * input_size / 2)),
124 |                                   (int(bbox[1] * input_size + bbox[3] * input_size / 2),
125 |                                    int(bbox[2] * input_size + bbox[4] * input_size / 2)), (0, 0, 255))
126 |         # 此处可以写代码验证一下，查看padding后修改的bbox数值是否正确，在原图中画出bbox检验
127 |         if STATIC_DEBUG:
128 |             cv2.imshow("bbox-%d"%int(bbox[0]), img)
129 |             cv2.waitKey(0)
130 |         with open(os.path.join(labels_dir, img_name.split('.')[0] + ".txt"), 'w') as f:
131 |             for i in range(len(bbox) // 5):
132 |                 bbox = [str(x) for x in bbox[i*5:(i*5+5)]]
133 |                 str_context = " ".join(bbox)+'\n'
134 |                 f.write(str_context)
135 | 
136 | 
137 | def convert_bbox2labels(bbox):
138 |     """将bbox的(cls,x,y,w,h)数据转换为训练时方便计算Loss的数据形式(7,7,5*B+cls_num)
139 |     注意，输入的bbox的信息是(xc,yc,w,h)格式的，转换为labels后，bbox的信息转换为了(px,py,w,h)格式"""
140 |     gridsize = 1.0/GL_NUMGRID
141 |     labels = np.zeros((7,7,5*GL_NUMBBOX+len(GL_CLASSES)))  # 注意，此处需要根据不同数据集的类别个数进行修改
142 |     for i in range(len(bbox)//5):
143 |         gridx = int(bbox[i*5+1] // gridsize)  # 当前bbox中心落在第gridx个网格,列
144 |         gridy = int(bbox[i*5+2] // gridsize)  # 当前bbox中心落在第gridy个网格,行
145 |         # (bbox中心坐标 - 网格左上角点的坐标)/网格大小  ==> bbox中心点的相对位置
146 |         gridpx = bbox[i * 5 + 1] / gridsize - gridx
147 |         gridpy = bbox[i * 5 + 2] / gridsize - gridy
148 |         # 将第gridy行，gridx列的网格设置为负责当前ground truth的预测，置信度和对应类别概率均置为1   !!!!!!!!出现错误
149 |         labels[gridy, gridx, 0:5] = np.array([gridpx, gridpy, bbox[i * 5 + 3], bbox[i * 5 + 4], 1])
150 |         labels[gridy, gridx, 5:10] = np.array([gridpx, gridpy, bbox[i * 5 + 3], bbox[i * 5 + 4], 1])
151 |         labels[gridy, gridx, 10+int(bbox[i*5])] = 1
152 | 
153 |     labels = labels.reshape(1, -1)
154 |     return labels
155 | 
156 | 
157 | def create_csv_txt(img_dir, anno_dir, save_root_dir, train_val_ratio=0.9, padding=10, debug=False):
158 |     """
159 |     TODO:
160 |     将img_dir文件夹内的图片按实际需要处理后，存入save_dir
161 |     最终得到图片文件夹及所有图片对应的标注(train.csv/test.csv)和图片列表文件(train.txt, test.txt)
162 |     """
163 |     labels_dir = os.path.join(anno_dir, "labels")
164 |     if not os.path.exists(labels_dir):
165 |         os.mkdir(labels_dir)
166 |         make_label_txt(anno_dir, labels_dir)
167 |         print("labels done.")
168 |     save_img_dir = os.path.join(os.path.join(anno_dir, "voc2012_forYolov1"), "img")
169 |     if not os.path.exists(save_img_dir):
170 |         os.mkdir(save_img_dir)
171 |         img_augument(img_dir, save_img_dir, labels_dir)
172 |     imgs_list = os.listdir(save_img_dir)
173 |     n_trainval = len(imgs_list)
174 |     shuffle_id = list(range(n_trainval))
175 |     random.shuffle(shuffle_id)
176 |     n_train = int(n_trainval*train_val_ratio)
177 |     train_id = shuffle_id[:n_train]
178 |     test_id = shuffle_id[n_train:]
179 |     traintxt = open(os.path.join(save_root_dir, "train.txt"), 'w')
180 |     traincsv = np.zeros((n_train, GL_NUMGRID*GL_NUMGRID*(5*GL_NUMBBOX+len(GL_CLASSES))),dtype=np.float32)
181 |     for i,id in enumerate(train_id):
182 |         img_name = imgs_list[id]
183 |         img_path = os.path.join(save_img_dir, img_name)+'\n'
184 |         traintxt.write(img_path)
185 |         with open(os.path.join(labels_dir,"%s.txt"%img_name.split('.')[0]), 'r') as f:
186 |             bbox = [float(x) for x in f.read().split()]
187 |             traincsv[i,:] = convert_bbox2labels(bbox)
188 |     np.savetxt(os.path.join(save_root_dir, "train.csv"), traincsv)
189 |     print("Create %d train data." % (n_train))
190 | 
191 |     testtxt = open(os.path.join(save_root_dir, "test.txt"), 'w')
192 |     testcsv = np.zeros((n_trainval - n_train, GL_NUMGRID*GL_NUMGRID*(5*GL_NUMBBOX+len(GL_CLASSES))),dtype=np.float32)
193 |     for i,id in enumerate(test_id):
194 |         img_name = imgs_list[id]
195 |         img_path = os.path.join(save_img_dir, img_name)+'\n'
196 |         testtxt.write(img_path)
197 |         with open(os.path.join(labels_dir,"%s.txt"%img_name.split('.')[0]), 'r') as f:
198 |             bbox = [float(x) for x in f.read().split()]
199 |             testcsv[i,:] = convert_bbox2labels(bbox)
200 |     np.savetxt(os.path.join(save_root_dir, "test.csv"), testcsv)
201 |     print("Create %d test data." % (n_trainval-n_train))
202 | 
203 | 
204 | if __name__ == '__main__':
205 |     random.seed(0)
206 |     np.set_printoptions(threshold=np.inf)
207 |     img_dir = os.path.join(STATIC_DATASET_PATH, "JPEGImages")  # 原始图像文件夹
208 |     anno_dirs = [STATIC_DATASET_PATH]  # 标注文件
209 |     save_dir = os.path.join(STATIC_DATASET_PATH, "voc2012_forYolov1")  # 保存处理后的数据(图片+标签)的文件夹
210 |     if not os.path.exists(save_dir):
211 |         os.mkdir(save_dir)
212 |     # 分别处理
213 |     for anno_dir in anno_dirs:
214 |         create_csv_txt(img_dir, anno_dir, save_dir, debug=False)
215 | 


--------------------------------------------------------------------------------
/prepare_test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | time:2021.4.26
 3 | author:Jylyy
 4 | 
 5 | """
 6 | 
 7 | """
 8 | 将预处理后的图片中测试集的图片存放到testimg文件夹用来测试
 9 | """
10 | 
11 | import os, random, glob
12 | from shutil import copyfile
13 | 
14 | imgtxt_dir = r'D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\train.txt'
15 | img_dir = r'D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\img'
16 | save_testimg = r'D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\testimg'
17 | choseImg = []
18 | 
19 | 
20 | #通过glob.glob来获取原始路径下，所有'.jpg'文件
21 | imageList1 = glob.glob(os.path.join(img_dir, '*.jpg'))
22 | 
23 | f = open(imgtxt_dir,"r")   #设置文件对象
24 | line = f.readline()
25 | line = line[:-1]
26 | while line:             #直到读取完文件
27 |     line = f.readline().strip() #读取一行文件，包括换行符
28 |     if os.path.exists(line):
29 |         choseImg.append(os.path.basename(line))
30 | 
31 | for i in choseImg:
32 |     # 将随机选中的jpg文件遍历复制到目标文件夹中
33 |     copyfile(img_dir + '/' + i, save_testimg + '/' + i)
34 | 
35 | f.close() #关闭文件
36 | 
37 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
 1 | """
 2 | time:2021.4.26
 3 | author:Jylyy
 4 | 
 5 | """
 6 | import os
 7 | from my_arguments import Args
 8 | import torch
 9 | from torch.utils.data import DataLoader
10 | 
11 | from model import MyNet
12 | from data import MyDataset
13 | from util import labels2bbox
14 | from prepare_data import GL_CLASSES
15 | import torchvision.transforms as transforms
16 | from PIL import Image
17 | import cv2
18 | 
19 | 
20 | COLOR = [(255,0,0),(255,125,0),(255,255,0),(255,0,125),(255,0,250),
21 |          (255,125,125),(255,125,250),(125,125,0),(0,255,125),(255,0,0),
22 |          (0,0,255),(125,0,255),(0,125,255),(0,255,255),(125,125,255),
23 |          (0,255,0),(125,255,125),(255,255,255),(100,100,100),(0,0,0),]  # 用来标识20个类别的bbox颜色，可自行设定
24 | 
25 | 
26 | class TestInterface(object):
27 |     """
28 |     网络测试接口，
29 |     main(): 网络测试主函数
30 |     """
31 |     def __init__(self, opts):
32 |         self.opts = opts
33 |         print("=======================Start inferring.=======================")
34 | 
35 |     def main(self):
36 |         """
37 |         具体测试流程根据不同项目有较大区别，需要自行编写代码，主要流程如下：
38 |         1. 获取命令行参数
39 |         2. 获取测试集
40 |         3. 加载网络模型
41 |         4. 用网络模型对测试集进行测试，得到测试结果
42 |         5. 根据不同项目，计算测试集的评价指标， 或者可视化测试结果
43 |         """
44 |         opts = self.opts
45 |         img_list = os.listdir(opts.dataset_dir)
46 |         trans = transforms.Compose([
47 |             # transforms.Resize((112, 112)),
48 |             transforms.ToTensor(),
49 |         ])
50 |         model = torch.load(opts.weight_path)
51 |         if opts.use_GPU:
52 |             model.to(opts.GPU_id)
53 |         for img_name in img_list:
54 |             img_path = os.path.join(opts.dataset_dir, img_name)
55 |             img = Image.open(img_path).convert('RGB')
56 |             img = trans(img)
57 |             img = torch.unsqueeze(img, dim=0)
58 |             print(img_name, img.shape)
59 |             if opts.use_GPU:
60 |                 img = img.to(opts.GPU_id)
61 |             preds = torch.squeeze(model(img), dim=0).detach().cpu()
62 |             preds = preds.permute(1,2,0)
63 |             bbox = labels2bbox(preds)
64 |             draw_img = cv2.imread(img_path)
65 |             self.draw_bbox(draw_img, bbox)
66 | 
67 |     def draw_bbox(self, img, bbox):
68 |         """
69 |         根据bbox的信息在图像上绘制bounding box
70 |         :param img: 绘制bbox的图像
71 |         :param bbox: 是(n,6)的尺寸，0:4是(x1,y1,x2,y2), 4是conf， 5是cls
72 |         """
73 |         h, w = img.shape[0:2]
74 |         n = bbox.shape[0]
75 |         for i in range(n):
76 |             confidence = bbox[i, 4]
77 |             if confidence<0.2:
78 |                 continue
79 |             p1 = (int(w * bbox[i, 0]), int(h * bbox[i, 1]))
80 |             p2 = (int(w * bbox[i, 2]), int(h * bbox[i, 3]))
81 |             cls_name = GL_CLASSES[int(bbox[i, 5])]
82 |             print(cls_name, p1, p2)
83 |             cv2.rectangle(img, p1, p2, COLOR[int(bbox[i, 5])])
84 |             cv2.putText(img, cls_name, p1, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
85 |             cv2.putText(img, str(confidence), (p1[0],p1[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
86 |         cv2.imshow("bbox", img)
87 |         cv2.waitKey(0)
88 | 
89 | 
90 | if __name__ == '__main__':
91 |     # 网络测试代码
92 |     args = Args()
93 |     args.set_test_args()  # 获取命令行参数
94 |     test_interface = TestInterface(args.get_opts())
95 |     test_interface.main()  # 调用测试接口


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | """
  2 | time:2021.4.26
  3 | author:Jylyy
  4 | 
  5 | """
  6 | 
  7 | import os
  8 | import datetime
  9 | import time
 10 | import torch
 11 | from torch.utils.data import DataLoader
 12 | 
 13 | from model import MyNet
 14 | from data import MyDataset
 15 | from my_arguments import Args
 16 | from prepare_data import GL_CLASSES, GL_NUMBBOX, GL_NUMGRID
 17 | from util import labels2bbox
 18 | 
 19 | 
 20 | class TrainInterface(object):
 21 |     """
 22 |     网络训练接口，
 23 |     __train(): 训练过程函数
 24 |     __validate(): 验证过程函数
 25 |     __save_model(): 保存模型函数
 26 |     main(): 训练网络主函数
 27 |     """
 28 |     #初始化
 29 |     def __init__(self, opts):
 30 |         """
 31 |         :param opts: 命令行参数
 32 |         """
 33 |         self.opts = opts
 34 |         print("=======================Start training.=======================")
 35 | 
 36 |     #训练
 37 |     @staticmethod
 38 |     def __train(model, train_loader, optimizer, epoch, num_train, opts):
 39 |         """
 40 |         完成一个epoch的训练
 41 |         :param model: torch.nn.Module, 需要训练的网络
 42 |         :param train_loader: torch.utils.data.Dataset, 训练数据集对应的类
 43 |         :param optimizer: torch.optim.Optimizer, 优化网络参数的优化器
 44 |         :param epoch: int, 表明当前训练的是第几个epoch
 45 |         :param num_train: int, 训练集数量
 46 |         :param opts: 命令行参数
 47 |         """
 48 |         model.train()
 49 |         device = opts.GPU_id
 50 |         avg_metric = 0.  # 平均评价指标
 51 |         avg_loss = 0.  # 平均损失数值
 52 |         losssum = 0.
 53 |         # log_file是保存网络训练过程信息的文件，网络训练信息会以追加的形式打印在log.txt里，不会覆盖原有log文件
 54 |         log_file = open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+")
 55 |         localtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")  # 打印训练时间
 56 |         log_file.write(localtime)
 57 |         log_file.write("\n======================training epoch %d======================\n"%epoch)
 58 |         for i,(imgs, labels) in enumerate(train_loader):
 59 |             labels1 = labels.view(opts.batch_size, GL_NUMGRID, GL_NUMGRID, -1)
 60 |             labels1 = labels1.permute(0,3,1,2)
 61 |             if opts.use_GPU:
 62 |                 imgs = imgs.to(device)
 63 |                 labels1 = labels1.to(device)
 64 |             preds = model(imgs)  # 前向传播
 65 |             loss = model.calculate_loss(preds,labels1)  # 计算损失
 66 |             optimizer.zero_grad()  # 梯度清零
 67 |             loss.backward()  # 反向传播
 68 |             optimizer.step()  # 优化网络参数
 69 |             # metric = model.calculate_metric(preds, labels)  # 计算评价指标
 70 |             # avg_metric = (avg_metric*i+metric)/(i+1)
 71 |             #avg_loss = (avg_loss*i+loss.item())/(i+1)
 72 |             losssum += loss.item()
 73 |             #if i % opts.print_freq == 0:  # 根据打印频率输出log信息和训练信息
 74 |             # if (t+1) % 20 == 0:
 75 |         print("Epoch %d/%d | training loss = %.3f, avg_loss = %.3f" %
 76 |                       (epoch, opts.epoch, loss.item(), losssum/num_train))
 77 |         log_file.write("Epoch %d/%d | training loss = %.3f, avg_loss = %.3f\n" %
 78 |                   (epoch, opts.epoch, loss.item(), losssum/num_train))
 79 |         log_file.flush()
 80 |         log_file.close()
 81 | 
 82 |     @staticmethod
 83 |     def __validate(model, val_loader, epoch, num_val, opts):
 84 |         """
 85 |         完成一个epoch训练后的验证任务
 86 |         :param model: torch.nn.Module, 需要训练的网络
 87 |         :param _loader: torch.utils.data.Dataset, 验证数据集对应的类
 88 |         :param epoch: int, 表明当前训练的是第几个epoch
 89 |         :param num_val: int, 验证集数量
 90 |         :param opts: 命令行参数
 91 |         """
 92 |         model.eval()
 93 |         log_file = open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+")
 94 |         log_file.write("======================validate epoch %d======================\n"%epoch)
 95 |         preds = None
 96 |         gts = None
 97 |         avg_metric = 0.
 98 |         with torch.no_grad():  # 加上这个可以减少在validation过程时的显存占用，提高代码的显存利用率
 99 |             for i,(imgs, labels) in enumerate(val_loader):
100 |                 if opts.use_GPU:
101 |                     imgs = imgs.to(opts.GPU_id)
102 |                 pred = model(imgs).cpu().squeeze(dim=0).permute(1,2,0)
103 |                 pred_bbox = labels2bbox(pred)  # 将网络输出经过NMS后转换为shape为(-1, 6)的bbox
104 |             metric = model.calculate_metric(preds, gts)
105 |             print("Evaluation of validation result: average L2 distance = %.5f"%(metric))
106 |             log_file.write("Evaluation of validation result: average L2 distance = %.5f\n"%(metric))
107 |             log_file.flush()
108 |             log_file.close()
109 |         return metric
110 | 
111 |     @staticmethod
112 |     def __save_model(model, epoch, opts):
113 |         """
114 |         保存第epoch个网络的参数
115 |         :param model: torch.nn.Module, 需要训练的网络
116 |         :param epoch: int, 表明当前训练的是第几个epoch
117 |         :param opts: 命令行参数
118 |         """
119 |         model_name = "epoch%d.tar" % epoch
120 |         save_dir = os.path.join(opts.checkpoints_dir, model_name)
121 |         torch.save(model, save_dir)
122 | 
123 | 
124 |     def main(self):
125 |         """
126 |         训练接口主函数，完成整个训练流程
127 |         1. 创建训练集和验证集的DataLoader类
128 |         2. 初始化带训练的网络
129 |         3. 选择合适的优化器
130 |         4. 训练并验证指定个epoch，保存其中评价指标最好的模型，并打印训练过程信息
131 |         5. TODO: 可视化训练过程信息
132 |         """
133 |         opts = self.opts
134 |         if not os.path.exists(opts.checkpoints_dir):
135 |             os.mkdir(opts.checkpoints_dir)
136 |         random_seed = opts.random_seed
137 |         train_dataset = MyDataset(opts.dataset_dir, seed=random_seed, mode="train", train_val_ratio=0.9)
138 |         val_dataset = MyDataset(opts.dataset_dir, seed=random_seed, mode="val", train_val_ratio=0.9)
139 |         train_loader = DataLoader(train_dataset, opts.batch_size, shuffle=False, num_workers=0,drop_last=True)
140 |         val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=opts.num_workers)
141 |         num_train = len(train_dataset)
142 |         num_val = len(val_dataset)
143 | 
144 |         if opts.pretrain is None:
145 |             model = MyNet()
146 |         else:
147 |             model = torch.load(opts.pretrain)
148 |         if opts.use_GPU:
149 |             model.to(opts.GPU_id)
150 |         optimizer = torch.optim.SGD(model.parameters(), lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay)
151 |         # optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr, weight_decay=opts.weight_decay)
152 | 
153 |         best_metric=1000000
154 |         for e in range(opts.start_epoch+60, opts.epoch+31):
155 |             t = time.time()
156 |             self.__train(model, train_loader, optimizer, e, num_train, opts)
157 |             t2 = time.time()
158 |             print("Training consumes %.2f second\n" % (t2-t))
159 |             with open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") as log_file:
160 |                 log_file.write("Training consumes %.2f second\n" % (t2-t))
161 |             if e % opts.save_freq==0 or e == opts.epoch+1:
162 |                 # t = time.time()
163 |                 # metric = self.__validate(model, val_loader, e, num_val, opts)
164 |                 # t2 = time.time()
165 |                 # print("Validation consumes %.2f second\n" % (t2 - t))
166 |                 # with open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") as log_file:
167 |                 #     log_file.write("Validation consumes %.2f second\n" % (t2 - t))
168 |                 # if best_metric>metric:
169 |                 #     best_metric = metric
170 |                 #     print("Epoch %d is now the best epoch with metric %.4f\n"%(e, best_metric))
171 |                 #     with open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") as log_file:
172 |                 #         log_file.write("Epoch %d is now the best epoch with metric %.4f\n"%(e, best_metric))
173 |                 self.__save_model(model, e, opts)
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     # 训练网络代码
178 |     args = Args()
179 |     args.set_train_args()  # 获取命令行参数
180 |     train_interface = TrainInterface(args.get_opts())
181 |     train_interface.main()  # 调用训练接口
182 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | """
  2 | time:2021.4.26
  3 | author:Jylyy
  4 | 
  5 | """
  6 | 
  7 | """
  8 | 该文件里提供一些项目需要的功能性函数/类
  9 | """
 10 | import torch
 11 | import torch.nn as nn
 12 | import numpy as np
 13 | 
 14 | 
 15 | class DepthwiseConv(nn.Module):
 16 |     """
 17 |     深度可分离卷积层
 18 |     """
 19 |     def __init__(self, in_channels, out_channels, kernel_size, stride, padding=0, bias=True):
 20 |         super(DepthwiseConv, self).__init__()
 21 |         self.layers = nn.Sequential(
 22 |             nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, stride=stride,
 23 |                       padding=padding, groups=in_channels, bias=bias),
 24 |             nn.BatchNorm2d(num_features=in_channels),
 25 |             nn.ReLU(),
 26 |             nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias),
 27 |             nn.BatchNorm2d(num_features=out_channels),
 28 |             nn.ReLU()
 29 |         )
 30 | 
 31 |     def forward(self, inputs):
 32 |         outputs = self.layers(inputs)
 33 |         return outputs
 34 | 
 35 | 
 36 | class InvertedBottleneck(nn.Module):
 37 |     """
 38 |     MobileNet v2 的InvertedBottleneck
 39 |     """
 40 |     def __init__(self, in_channels, out_channels, kernel_size, stride, t_factor, padding=0, bias=True):
 41 |         super(InvertedBottleneck, self).__init__()
 42 |         mid_channels = t_factor*in_channels
 43 |         self.layers = nn.Sequential(
 44 |             nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, bias=bias),
 45 |             nn.BatchNorm2d(num_features=mid_channels),
 46 |             nn.ReLU(),
 47 |             nn.Conv2d(in_channels=mid_channels, out_channels=mid_channels, kernel_size=kernel_size, stride=stride,
 48 |                       padding=padding, bias=bias),
 49 |             nn.BatchNorm2d(num_features=mid_channels),
 50 |             nn.ReLU(),
 51 |             nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, bias=bias)
 52 |         )
 53 | 
 54 |     def forward(self, inputs):
 55 |         outputs = self.layers(inputs)
 56 |         return outputs
 57 | 
 58 | class Flatten(nn.Module):
 59 |     """
 60 |     将三维张量拉平的网络层
 61 |     (n,c,h,w) -> (n, c*h*w)
 62 |     """
 63 |     def __init__(self):
 64 |         super(Flatten, self).__init__()
 65 | 
 66 |     def forward(self, x):
 67 |         n_samples = x.shape[0]
 68 |         x = x.reshape(n_samples, -1)
 69 |         return x
 70 | 
 71 | 
 72 | def calculate_iou(bbox1, bbox2):
 73 |     """计算bbox1=(x1,y1,x2,y2)和bbox2=(x3,y3,x4,y4)两个bbox的iou"""
 74 |     if bbox1[2]<=bbox1[0] or bbox1[3]<=bbox1[1] or bbox2[2]<=bbox2[0] or bbox2[3]<=bbox2[1]:
 75 |         return 0  # 如果bbox1或bbox2没有面积，或者输入错误，直接返回0
 76 | 
 77 |     intersect_bbox = [0., 0., 0., 0.]  # bbox1和bbox2的重合区域的(x1,y1,x2,y2)
 78 | 
 79 |     intersect_bbox[0] = max(bbox1[0],bbox2[0])
 80 |     intersect_bbox[1] = max(bbox1[1],bbox2[1])
 81 |     intersect_bbox[2] = min(bbox1[2],bbox2[2])
 82 |     intersect_bbox[3] = min(bbox1[3],bbox2[3])
 83 | 
 84 |     w = max(intersect_bbox[2] - intersect_bbox[0], 0)
 85 |     h = max(intersect_bbox[3] - intersect_bbox[1], 0)
 86 |     area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])  # bbox1面积
 87 |     area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])  # bbox2面积
 88 |     area_intersect = w * h  # 交集面积
 89 |     iou = area_intersect / (area1 + area2 - area_intersect + 1e-6)  # 防止除0
 90 |     # print(bbox1,bbox2)
 91 |     # print(intersect_bbox)
 92 |     # input()
 93 |     return iou
 94 | 
 95 | 
 96 | # 注意检查一下输入数据的格式，到底是xywh还是xyxy
 97 | def labels2bbox(matrix):
 98 |     """
 99 |     将网络输出的7*7*30的数据转换为bbox的(98,25)的格式，然后再将NMS处理后的结果返回
100 |     :param matrix: 注意，输入的数据中，bbox坐标的格式是(px,py,w,h)，需要转换为(x1,y1,x2,y2)的格式再输入NMS
101 |     :return: 返回NMS处理后的结果,bboxes.shape = (-1, 6), 0:4是(x1,y1,x2,y2), 4是conf， 5是cls
102 |     """
103 |     if matrix.size()[0:2]!=(7,7):
104 |         raise ValueError("Error: Wrong labels size: ", matrix.size(), " != (7,7)")
105 |     matrix = matrix.numpy()
106 |     bboxes = np.zeros((98, 6))
107 |     # 先把7*7*30的数据转变为bbox的(98,25)的格式，其中，bbox信息格式从(px,py,w,h)转换为(x1,y1,x2,y2),方便计算iou
108 |     matrix = matrix.reshape(49,-1)
109 |     bbox = matrix[:, :10].reshape(98, 5)
110 |     r_grid = np.array(list(range(7)))
111 |     r_grid = np.repeat(r_grid, repeats=14, axis=0)  # [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...]
112 |     c_grid = np.array(list(range(7)))
113 |     c_grid = np.repeat(c_grid, repeats=2, axis=0)[np.newaxis, :]
114 |     c_grid = np.repeat(c_grid, repeats=7, axis=0).reshape(-1)  # [0 0 1 1 2 2 3 3 4 4 5 5 6 6 0 0 1 1 2 2 3 3 4 4 5 5 6 6...]
115 |     bboxes[:, 0] = np.maximum((bbox[:, 0] + c_grid) / 7.0 - bbox[:, 2] / 2.0, 0)
116 |     bboxes[:, 1] = np.maximum((bbox[:, 1] + r_grid) / 7.0 - bbox[:, 3] / 2.0, 0)
117 |     bboxes[:, 2] = np.minimum((bbox[:, 0] + c_grid) / 7.0 + bbox[:, 2] / 2.0, 1)
118 |     bboxes[:, 3] = np.minimum((bbox[:, 1] + r_grid) / 7.0 + bbox[:, 3] / 2.0, 1)
119 |     bboxes[:, 4] = bbox[:, 4]
120 |     cls = np.argmax(matrix[:, 10:], axis=1)
121 |     cls = np.repeat(cls, repeats=2, axis=0)
122 |     bboxes[:, 5] = cls
123 |     # 对所有98个bbox执行NMS算法，清理cls-specific confidence score较低以及iou重合度过高的bbox
124 |     keepid = nms_multi_cls(bboxes, thresh=0.1, n_cls=20)
125 |     ids = []
126 |     for x in keepid:
127 |         ids = ids + list(x)
128 |     ids = sorted(ids)
129 |     return bboxes[ids, :]
130 | 
131 | 
132 | def nms_1cls(dets, thresh):
133 |     """
134 |     单类别NMS
135 |     :param dets: ndarray,nx5,dets[i,0:4]分别是bbox坐标；dets[i,4]是置信度score
136 |     :param thresh: NMS算法设置的iou阈值
137 |     """
138 |     # 从检测结果dets中获得x1,y1,x2,y2和scores的值
139 |     x1 = dets[:, 0]
140 |     y1 = dets[:, 1]
141 |     x2 = dets[:, 2]
142 |     y2 = dets[:, 3]
143 |     scores = dets[:, 4]
144 | 
145 |     # 计算每个检测框的面积
146 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
147 |     # 按照置信度score的值降序排序的下标序列
148 |     order = scores.argsort()[::-1]
149 | 
150 |     # keep用来保存最后保留的检测框的下标
151 |     keep = []
152 |     while order.size > 0:
153 |         # 当前置信度最高bbox的index
154 |         i = order[0]
155 |         # 添加当前剩余检测框中得分最高的index到keep中
156 |         keep.append(i)
157 |         # 得到此bbox和剩余其他bbox的相交区域，左上角和右下角
158 |         xx1 = np.maximum(x1[i], x1[order[1:]])
159 |         yy1 = np.maximum(y1[i], y1[order[1:]])
160 |         xx2 = np.minimum(x2[i], x2[order[1:]])
161 |         yy2 = np.minimum(y2[i], y2[order[1:]])
162 | 
163 |         # 计算相交的面积，不重叠时面积为0
164 |         w = np.maximum(0.0, xx2 - xx1 + 1)
165 |         h = np.maximum(0.0, yy2 - yy1 + 1)
166 |         inter = w * h
167 |         # 计算IoU：重叠面积/(面积1+面积2-重叠面积)
168 |         iou = inter / (areas[i] + areas[order[1:]] - inter)
169 |         # 保留IoU小于阈值的bbox
170 |         inds = np.where(iou <= thresh)[0]
171 |         order = order[inds+1]
172 |     return keep
173 | 
174 | 
175 | def nms_multi_cls(dets, thresh, n_cls):
176 |     """
177 |     多类别的NMS算法
178 |     :param dets:ndarray,nx6,dets[i,0:4]是bbox坐标；dets[i,4]是置信度score；dets[i,5]是类别序号；
179 |     :param thresh: NMS算法的阈值；
180 |     :param n_cls: 是类别总数
181 |     """
182 |     # 储存结果的列表，keeps_index[i]表示第i类保留下来的bbox下标list
183 |     keeps_index = []
184 |     for i in range(n_cls):
185 |         order_i = np.where(dets[:,5]==i)[0]
186 |         det = dets[dets[:, 5] == i, 0:5]
187 |         if det.shape[0] == 0:
188 |             keeps_index.append([])
189 |             continue
190 |         keep = nms_1cls(det, thresh)
191 |         keeps_index.append(order_i[keep])
192 |     return keeps_index
193 | 
194 | 
195 | if __name__ == '__main__':
196 |     a = torch.randn((7,7,30))
197 |     print(a)
198 |     labels2bbox(a)


--------------------------------------------------------------------------------