├── .gitignore ├── README.md ├── checkpoints └── log.txt ├── data.py ├── model.py ├── my_arguments.py ├── prepare_data.py ├── prepare_test.py ├── test.py ├── train.py └── util.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.py[cop] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | implementation of YOLOv1 4 | 5 | 6 | **Required:** 7 | 8 | * pytorch 1.1.0 9 | * torchvision 10 | * numpy > 1.16.2 11 | * opencv 3.4.1 12 | * VOC2012 Dataset 13 | 14 | 15 | 16 | **What can this repo do now** 17 | * train with VOC2012 Dataset 18 | -------------------------------------------------------------------------------- /checkpoints/log.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/johnwingit/YOLOV1_Pytorch/7620e186abb05e1d6fd3db597983f51ab4769541/checkpoints/log.txt -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import Dataset, DataLoader 2 | import numpy as np 3 | import os 4 | import random 5 | import torch 6 | from PIL import Image 7 | import torchvision.transforms as transforms 8 | 9 | 10 | class MyDataset(Dataset): 11 | def __init__(self, dataset_dir, seed=None, mode="train", train_val_ratio=0.9, trans=None): 12 | """ 13 | :param dataset_dir: 数据所在文件夹 14 | :param seed: 打乱数据所用的随机数种子 15 | :param mode: 数据模式,"train", "val", "test" 16 | :param train_val_ratio: 训练时,训练集:验证集的比例 17 | :param trans: 数据预处理函数 18 | 19 | TODO: 20 | 1. 读取储存图片路径的.txt文件,并保存在self.img_list中 21 | 2. 读取储存样本标签的.csv文件,并保存在self.label中 22 | 3. 如果mode="train", 将数据集拆分为训练集和验证集,用self.use_ids来保存对应数据集的样本序号。 23 | 注意,mode="train"和"val"时,必须传入随机数种子,且两者必须相同 24 | 4. 保存传入的数据增广函数 25 | """ 26 | if seed is None: 27 | seed = random.randint(0, 65536) 28 | random.seed(seed) 29 | self.dataset_dir = dataset_dir 30 | self.mode = mode 31 | if mode=="val": 32 | mode = "train" 33 | img_list_txt = os.path.join(dataset_dir, mode+".txt") # 储存图片位置的列表 34 | label_csv = os.path.join(dataset_dir, mode+".csv") # 储存标签的数组文件 35 | self.img_list = [] 36 | self.label = np.loadtxt(label_csv) # 读取标签数组文件 37 | # 读取图片位置文件 38 | with open(img_list_txt, 'r') as f: 39 | for line in f.readlines(): 40 | self.img_list.append(line.strip()) 41 | # 在mode=train或val时, 将数据进行切分 42 | # 注意在mode="val"时,传入的随机种子seed要和mode="train"相同 43 | self.num_all_data = len(self.img_list) 44 | all_ids = list(range(self.num_all_data)) 45 | num_train = int(train_val_ratio*self.num_all_data) 46 | if self.mode == "train": 47 | self.use_ids = all_ids[:num_train] 48 | elif self.mode == "val": 49 | self.use_ids = all_ids[num_train:] 50 | else: 51 | self.use_ids = all_ids 52 | 53 | # 储存数据增广函数 54 | self.trans = trans 55 | 56 | def __len__(self): 57 | """获取数据集数量""" 58 | return len(self.use_ids) 59 | 60 | def __getitem__(self, item): 61 | """ 62 | TODO: 63 | 1. 按顺序依次取出第item个训练数据img及其对应的样本标签label 64 | 2. 图像数据要进行预处理,并最终转换为(c, h, w)的维度,同时转换为torch.tensor 65 | 3. 样本标签要按需要转换为指定格式的torch.tensor 66 | """ 67 | id = self.use_ids[item] 68 | label = torch.tensor(self.label[id, :]) 69 | img_path = self.img_list[id] 70 | img = Image.open(img_path) 71 | if self.trans is None: 72 | trans = transforms.Compose([ 73 | # transforms.Resize((112,112)), 74 | transforms.ToTensor(), 75 | ]) 76 | else: 77 | trans = self.trans 78 | img = trans(img) # 图像预处理&数据增广 79 | # transforms.ToPILImage()(img).show() # for debug 80 | # print(label) 81 | return img, label 82 | 83 | if __name__ == '__main__': 84 | # 调试用,依次取出数据看看是否正确 85 | dataset_dir = r"C:\Users\Owen\Desktop\VOCdevkit\VOC2012\voc2012_forYolov1" 86 | dataset = MyDataset(dataset_dir) 87 | dataloader = DataLoader(dataset, 1) 88 | for i in enumerate(dataloader): 89 | input("press enter to continue") -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | """ 2 | time:2021.4.26 3 | author:Jylyy 4 | 5 | """ 6 | import torch 7 | import torch.nn as nn 8 | import torchvision.models as tvmodel 9 | from prepare_data import GL_CLASSES, GL_NUMBBOX, GL_NUMGRID 10 | from util import calculate_iou 11 | 12 | 13 | class MyNet(nn.Module): 14 | """ 15 | @ 网络实际名称 16 | 为了和后续接口对齐,此处类名固定为MyNet,具体是什么网络可以写在注释里。 17 | """ 18 | def __init__(self): 19 | """ 20 | :param args: 构建网络所需要的参数 21 | 22 | TODO: 23 | 在__init__()函数里,将网络框架搭好,并存在self里 24 | """ 25 | super(MyNet, self).__init__() 26 | resnet = tvmodel.resnet34(pretrained=True) # 调用torchvision里的resnet34预训练模型 27 | resnet_out_channel = resnet.fc.in_features # 记录resnet全连接层之前的网络输出通道数,方便连入后续卷积网络中 28 | self.resnet = nn.Sequential(*list(resnet.children())[:-2]) # 去除resnet的最后两层 29 | # 以下是YOLOv1的最后四个卷积层 30 | self.Conv_layers = nn.Sequential( 31 | nn.Conv2d(resnet_out_channel, 1024, 3, padding=1), 32 | nn.BatchNorm2d(1024), # 为了加快训练,这里增加了BN层,原论文里YOLOv1是没有的 33 | nn.LeakyReLU(inplace=True), 34 | nn.Conv2d(1024, 1024, 3, stride=2, padding=1), 35 | nn.BatchNorm2d(1024), 36 | nn.LeakyReLU(inplace=True), 37 | nn.Conv2d(1024, 1024, 3, padding=1), 38 | nn.BatchNorm2d(1024), 39 | nn.LeakyReLU(inplace=True), 40 | nn.Conv2d(1024, 1024, 3, padding=1), 41 | nn.BatchNorm2d(1024), 42 | nn.LeakyReLU(inplace=True), 43 | ) 44 | # 以下是YOLOv1的最后2个全连接层 45 | self.Conn_layers = nn.Sequential( 46 | nn.Linear(GL_NUMGRID * GL_NUMGRID * 1024, 4096), 47 | nn.LeakyReLU(inplace=True), 48 | nn.Linear(4096, GL_NUMGRID * GL_NUMGRID * (5*GL_NUMBBOX+len(GL_CLASSES))), 49 | nn.Sigmoid() # 增加sigmoid函数是为了将输出全部映射到(0,1)之间,因为如果出现负数或太大的数,后续计算loss会很麻烦 50 | ) 51 | 52 | 53 | def forward(self, inputs): 54 | """ 55 | :param inputs: 输入网络的张量 56 | :return: 输出网络的结果 57 | 58 | TODO 59 | 根据网络的结构,完成网络的前向传播计算。 60 | 如果网络有多条分支,可以用self储存需要在别的地方使用的中间张量。 61 | 如果网络有多个输出,需要将多个输出按后续inference的需求打包输出 62 | """ 63 | x = self.resnet(inputs) 64 | x = self.Conv_layers(x) 65 | x = x.view(x.size()[0], -1) 66 | x = self.Conn_layers(x) 67 | self.pred = x.reshape(-1, (5 * GL_NUMBBOX + len(GL_CLASSES)), GL_NUMGRID, GL_NUMGRID) # 记住最后要reshape一下输出数据 68 | return self.pred 69 | 70 | 71 | def calculate_loss(self, preds,labels): 72 | """ 73 | TODO: 根据labels和self.outputs计算训练loss 74 | :param labels: (bs, n), 对应训练数据的样本标签 75 | :return: loss数值 76 | """ 77 | self.pred = preds.double() 78 | labels = labels.double() 79 | num_gridx, num_gridy = GL_NUMGRID, GL_NUMGRID # 划分网格数量 80 | noobj_confi_loss = 0. # 不含目标的网格损失(只有置信度损失) 81 | coor_loss = 0. # 含有目标的bbox的坐标损失 82 | obj_confi_loss = 0. # 含有目标的bbox的置信度损失 83 | class_loss = 0. # 含有目标的网格的类别损失 84 | n_batch = labels.size()[0] # batchsize的大小 85 | # 可以考虑用矩阵运算进行优化,提高速度,为了准确起见,这里还是用循环 86 | for i in range(n_batch): # batchsize循环 87 | for n in range(num_gridx): # x方向网格循环 88 | for m in range(num_gridy): # y方向网格循环 89 | if labels[i, 4, m, n] == 1: # 如果包含物体 90 | # 将数据(px,py,w,h)转换为(x1,y1,x2,y2) 91 | # 先将px,py转换为cx,cy,即相对网格的位置转换为标准化后实际的bbox中心位置cx,xy 92 | # 然后再利用(cx-w/2,cy-h/2,cx+w/2,cy+h/2)转换为xyxy形式,用于计算iou 93 | bbox1_pred_xyxy = ((self.pred[i, 0, m, n] + n) / num_gridx - self.pred[i, 2, m, n] / 2, 94 | (self.pred[i, 1, m, n] + m) / num_gridy - self.pred[i, 3, m, n] / 2, 95 | (self.pred[i, 0, m, n] + n) / num_gridx + self.pred[i, 2, m, n] / 2, 96 | (self.pred[i, 1, m, n] + m) / num_gridy + self.pred[i, 3, m, n] / 2) 97 | bbox2_pred_xyxy = ((self.pred[i, 5, m, n] + n) / num_gridx - self.pred[i, 7, m, n] / 2, 98 | (self.pred[i, 6, m, n] + m) / num_gridy - self.pred[i, 8, m, n] / 2, 99 | (self.pred[i, 5, m, n] + n) / num_gridx + self.pred[i, 7, m, n] / 2, 100 | (self.pred[i, 6, m, n] + m) / num_gridy + self.pred[i, 8, m, n] / 2) 101 | bbox_gt_xyxy = ((labels[i, 0, m, n] + n) / num_gridx - labels[i, 2, m, n] / 2, 102 | (labels[i, 1, m, n] + m) / num_gridy - labels[i, 3, m, n] / 2, 103 | (labels[i, 0, m, n] + n) / num_gridx + labels[i, 2, m, n] / 2, 104 | (labels[i, 1, m, n] + m) / num_gridy + labels[i, 3, m, n] / 2) 105 | iou1 = calculate_iou(bbox1_pred_xyxy, bbox_gt_xyxy) 106 | iou2 = calculate_iou(bbox2_pred_xyxy, bbox_gt_xyxy) 107 | # 选择iou大的bbox作为负责物体 108 | if iou1 >= iou2: 109 | coor_loss = coor_loss + 5 * (torch.sum((self.pred[i, 0:2, m, n] - labels[i, 0:2, m, n]) ** 2) \ 110 | + 5*torch.sum((self.pred[i, 2:4, m, n].sqrt() - labels[i, 2:4, m, n].sqrt()) ** 2)) 111 | obj_confi_loss = obj_confi_loss + (self.pred[i, 4, m, n] - iou1) ** 2 112 | # iou比较小的bbox不负责预测物体,因此confidence loss算在noobj中,注意,对于标签的置信度应该是iou2 113 | noobj_confi_loss = noobj_confi_loss + 0.5 * ((self.pred[i, 9, m, n] - iou2) ** 2) 114 | else: 115 | coor_loss = coor_loss + 5 * (torch.sum((self.pred[i, 5:7, m, n] - labels[i, 5:7, m, n]) ** 2) \ 116 | + 5*torch.sum((self.pred[i, 7:9, m, n].sqrt() - labels[i, 7:9, m, n].sqrt()) ** 2)) 117 | obj_confi_loss = obj_confi_loss + (self.pred[i, 9, m, n] - iou2) ** 2 118 | # iou比较小的bbox不负责预测物体,因此confidence loss算在noobj中,注意,对于标签的置信度应该是iou1 119 | noobj_confi_loss = noobj_confi_loss + 0.5 * ((self.pred[i, 4, m, n] - iou1) ** 2) 120 | class_loss = class_loss + torch.sum((self.pred[i, 10:, m, n] - labels[i, 10:, m, n]) ** 2) 121 | else: # 如果不包含物体 122 | noobj_confi_loss = noobj_confi_loss + 0.5 * torch.sum(self.pred[i, [4, 9], m, n] ** 2) 123 | 124 | loss = coor_loss + obj_confi_loss + noobj_confi_loss + class_loss 125 | # 此处可以写代码验证一下loss的大致计算是否正确,这个要验证起来比较麻烦,比较简洁的办法是,将输入的pred置为全1矩阵,再进行误差检查,会直观很多。 126 | return loss / n_batch 127 | 128 | def calculate_metric(self, preds, labels): 129 | """ 130 | TODO: 根据preds和labels,以及指定的评价方法计算网络效果得分, 网络validation时使用 131 | :param preds: 预测数据 132 | :param labels: 预测数据对应的样本标签 133 | :return: 评估得分metric 134 | """ 135 | preds = preds.double() 136 | labels = labels[:, :(self.n_points*2)] 137 | l2_distance = torch.mean(torch.sum((preds-labels)**2, dim=1)) 138 | return l2_distance 139 | 140 | 141 | if __name__ == '__main__': 142 | # 自定义输入张量,验证网络可以正常跑通,并计算loss,调试用 143 | x = torch.zeros(5,3,448,448) 144 | net = MyNet() 145 | print(net) 146 | a = net(x) 147 | labels = torch.zeros(5, 30, 7, 7) 148 | loss = net.calculate_loss(labels) 149 | print(loss) 150 | print(a.shape) 151 | -------------------------------------------------------------------------------- /my_arguments.py: -------------------------------------------------------------------------------- 1 | """ 2 | time:2021.4.26 3 | author:Jylyy 4 | 5 | """ 6 | 7 | import argparse 8 | import torch 9 | 10 | class Args(object): 11 | """ 12 | 设置命令行参数的接口 13 | """ 14 | def __init__(self): 15 | self.parser = argparse.ArgumentParser() 16 | 17 | def set_train_args(self): 18 | """options for train""" 19 | self.parser.add_argument("--batch_size", type=int, default=1) 20 | self.parser.add_argument("--lr", type=float, default=0.0001, help="learning rate") 21 | self.parser.add_argument("--weight_decay", type=float, default=1e-4) 22 | self.parser.add_argument("--epoch", type=int, default=60, help="number of end epoch") 23 | self.parser.add_argument("--start_epoch", type=int, default=1, help="number of start epoch") 24 | self.parser.add_argument("--use_GPU", action="store_true", help="identify whether to use gpu") 25 | self.parser.add_argument("--GPU_id", type=int, default=None, help="device id") 26 | self.parser.add_argument("--dataset_dir", type=str, default=r"D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1") 27 | self.parser.add_argument("--checkpoints_dir", type=str, default="./checkpoints") 28 | self.parser.add_argument("--print_freq", type=int, default=10, 29 | help="print training information frequency (per n iteration)") 30 | self.parser.add_argument("--save_freq", type=int, default=20, help="save model frequency (per n epoch)") 31 | self.parser.add_argument("--num_workers", type=int, default=0, help="use n threads to read data") 32 | self.parser.add_argument("--pretrain", type=str, default=r"C:\Users\Owen\Desktop\YOLOv1-from-scratch-master\checkpoints\epoch60.tar", help="pretrain model path") 33 | #self.parser.add_argument("--pretrain", type=str,default=None) 34 | self.parser.add_argument("--random_seed", type=int, default=0, help="random seed for split dataset") 35 | 36 | self.opts = self.parser.parse_args() 37 | 38 | if torch.cuda.is_available(): 39 | self.opts.use_GPU = False 40 | self.opts.GPU_id = torch.cuda.current_device() 41 | print("use GPU %d to train." % (self.opts.GPU_id)) 42 | else: 43 | print("use CPU to train.") 44 | 45 | def set_test_args(self): 46 | """options for inference""" 47 | self.parser.add_argument("--batch_size", type=int, default=1) 48 | self.parser.add_argument("--use_GPU", action="store_true", help="identify whether to use gpu") 49 | self.parser.add_argument("--GPU_id", type=int, default=None, help="device id") 50 | self.parser.add_argument("--dataset_dir", type=str, default=r"D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\testimg") 51 | self.parser.add_argument("--weight_path", type=str, 52 | default=r"checkpoints\epoch60.tar", 53 | help="load path for model weight") 54 | self.opts = self.parser.parse_args() 55 | if torch.cuda.is_available(): 56 | self.opts.use_GPU = False 57 | self.opts.GPU_id = torch.cuda.current_device() 58 | print("use GPU %d to train." % (self.opts.GPU_id)) 59 | else: 60 | print("use CPU to train.") 61 | 62 | def get_opts(self): 63 | return self.opts -------------------------------------------------------------------------------- /prepare_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | time:2021.4.26 3 | author:Jylyy 4 | 5 | """ 6 | 7 | """ 8 | 数据准备,将数据处理为两个文件,一个是train.csv,另一个是train.txt。同理也会有test.csv, test.txt 9 | train.csv: 每一行是一张图片的标签,具体储存情况根据不同任务的需求自行设定 10 | train.txt: 每一行是图片的路径,该文件每行的图片和train.csv的每一行标注应该是一一对应的 11 | 另外,根据需要将图片稍微离线处理一下,比如将原图片裁剪出训练使用的图片(resize成训练要求大小)后,保存在自定义文件夹中,train.txt里的路径应与自定义文件夹相同 12 | """ 13 | import xml.etree.ElementTree as ET 14 | import numpy as np 15 | import cv2 16 | import random 17 | import os 18 | 19 | 20 | GL_CLASSES = ['person', 'bird', 'cat', 'cow', 'dog', 'horse', 'sheep', 21 | 'aeroplane', 'bicycle', 'boat', 'bus', 'car', 'motorbike', 'train', 22 | 'bottle', 'chair', 'diningtable', 'pottedplant', 'sofa', 'tvmonitor'] 23 | 24 | GL_NUMBBOX = 2 #bbox数量 25 | GL_NUMGRID = 7 #grid cell数量 26 | 27 | STATIC_DATASET_PATH = r'D:\wangpan\VOC\VOCdevkit\VOC2012' 28 | STATIC_DEBUG = False # 调试用 29 | 30 | 31 | def convert(size, box): 32 | """将bbox的左上角点、右下角点坐标的格式,转换为bbox中心点+bbox的w,h的格式 33 | 并进行归一化""" 34 | dw = 1. / size[0] 35 | dh = 1. / size[1] 36 | x = (box[0] + box[1]) / 2.0 37 | y = (box[2] + box[3]) / 2.0 38 | w = box[1] - box[0] 39 | h = box[3] - box[2] 40 | x = x * dw 41 | w = w * dw 42 | y = y * dh 43 | h = h * dh 44 | return (x, y, w, h) 45 | 46 | 47 | def convert_annotation(anno_dir, image_id, labels_dir): 48 | """把图像image_id的xml文件转换为目标检测的label文件(txt):(class,x,y,w,h) 49 | 其中包含物体的类别,bbox的左上角点坐标以及bbox的宽、高 50 | 并将四个物理量归一化""" 51 | in_file = open(os.path.join(anno_dir, 'Annotations/%s' % (image_id))) 52 | image_id = image_id.split('.')[0] 53 | tree = ET.parse(in_file) 54 | root = tree.getroot() 55 | size = root.find('size') 56 | w = int(size.find('width').text) 57 | h = int(size.find('height').text) 58 | 59 | for obj in root.iter('object'): 60 | difficult = obj.find('difficult').text 61 | cls = obj.find('name').text 62 | if cls not in GL_CLASSES or int(difficult) == 1: 63 | continue 64 | cls_id = GL_CLASSES.index(cls) 65 | xmlbox = obj.find('bndbox') 66 | points = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), 67 | float(xmlbox.find('ymax').text)) 68 | bb = convert((w, h), points) #返回(x,y,w,h) 69 | with open(os.path.join(labels_dir, '%s.txt' % (image_id)), 'a') as out_file: 70 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 71 | 72 | 73 | def make_label_txt(anno_dir, labels_dir): 74 | """在labels文件夹下创建image_id.txt,对应每个image_id.xml提取出的bbox信息""" 75 | filenames = os.listdir(os.path.join(anno_dir,'Annotations')) 76 | for file in filenames: 77 | convert_annotation(anno_dir, file, labels_dir) 78 | 79 | 80 | def img_augument(img_dir, save_img_dir, labels_dir): 81 | imgs_list = [x.split('.')[0]+".jpg" for x in os.listdir(labels_dir)] 82 | for img_name in imgs_list: 83 | print("process %s"%os.path.join(img_dir, img_name)) 84 | img = cv2.imread(os.path.join(img_dir, img_name)) 85 | h, w = img.shape[0:2] 86 | input_size = 448 # 输入YOLOv1网络的图像尺寸为448x448 87 | # 因为数据集内原始图像的尺寸是不定的,所以需要进行适当的padding,将原始图像padding成宽高一致的正方形 88 | # 然后再将Padding后的正方形图像缩放成448x448 89 | padw, padh = 0, 0 # 要记录宽高方向的padding具体数值,因为padding之后需要调整bbox的位置信息 90 | if h > w: 91 | padw = (h - w) // 2 92 | img = np.pad(img, ((0, 0), (padw, padw), (0, 0)), 'constant', constant_values=0) 93 | elif w > h: 94 | padh = (w - h) // 2 95 | img = np.pad(img, ((padh, padh), (0, 0), (0, 0)), 'constant', constant_values=0) 96 | img = cv2.resize(img, (input_size, input_size)) 97 | cv2.imwrite(os.path.join(save_img_dir, img_name), img) 98 | # 读取图像对应的bbox信息,按1维的方式储存,每5个元素表示一个bbox的(cls,xc,yc,w,h) 99 | with open(os.path.join(labels_dir,img_name.split('.')[0] + ".txt"), 'r') as f: 100 | bbox = f.read().split('\n') 101 | bbox = [x.split() for x in bbox] 102 | bbox = [float(x) for y in bbox for x in y] 103 | if len(bbox) % 5 != 0: 104 | raise ValueError("File:" 105 | + os.path.join(labels_dir,img_name.split('.')[0] + ".txt") + "——bbox Extraction Error!") 106 | 107 | # 根据padding、图像增广等操作,将原始的bbox数据转换为修改后图像的bbox数据 108 | if padw != 0: 109 | for i in range(len(bbox) // 5): 110 | bbox[i * 5 + 1] = (bbox[i * 5 + 1] * w + padw) / h 111 | bbox[i * 5 + 3] = (bbox[i * 5 + 3] * w) / h 112 | if STATIC_DEBUG: 113 | cv2.rectangle(img, (int(bbox[1] * input_size - bbox[3] * input_size / 2), 114 | int(bbox[2] * input_size - bbox[4] * input_size / 2)), 115 | (int(bbox[1] * input_size + bbox[3] * input_size / 2), 116 | int(bbox[2] * input_size + bbox[4] * input_size / 2)), (0, 0, 255)) 117 | elif padh != 0: 118 | for i in range(len(bbox) // 5): 119 | bbox[i * 5 + 2] = (bbox[i * 5 + 2] * h + padh) / w 120 | bbox[i * 5 + 4] = (bbox[i * 5 + 4] * h) / w 121 | if STATIC_DEBUG: 122 | cv2.rectangle(img, (int(bbox[1] * input_size - bbox[3] * input_size / 2), 123 | int(bbox[2] * input_size - bbox[4] * input_size / 2)), 124 | (int(bbox[1] * input_size + bbox[3] * input_size / 2), 125 | int(bbox[2] * input_size + bbox[4] * input_size / 2)), (0, 0, 255)) 126 | # 此处可以写代码验证一下,查看padding后修改的bbox数值是否正确,在原图中画出bbox检验 127 | if STATIC_DEBUG: 128 | cv2.imshow("bbox-%d"%int(bbox[0]), img) 129 | cv2.waitKey(0) 130 | with open(os.path.join(labels_dir, img_name.split('.')[0] + ".txt"), 'w') as f: 131 | for i in range(len(bbox) // 5): 132 | bbox = [str(x) for x in bbox[i*5:(i*5+5)]] 133 | str_context = " ".join(bbox)+'\n' 134 | f.write(str_context) 135 | 136 | 137 | def convert_bbox2labels(bbox): 138 | """将bbox的(cls,x,y,w,h)数据转换为训练时方便计算Loss的数据形式(7,7,5*B+cls_num) 139 | 注意,输入的bbox的信息是(xc,yc,w,h)格式的,转换为labels后,bbox的信息转换为了(px,py,w,h)格式""" 140 | gridsize = 1.0/GL_NUMGRID 141 | labels = np.zeros((7,7,5*GL_NUMBBOX+len(GL_CLASSES))) # 注意,此处需要根据不同数据集的类别个数进行修改 142 | for i in range(len(bbox)//5): 143 | gridx = int(bbox[i*5+1] // gridsize) # 当前bbox中心落在第gridx个网格,列 144 | gridy = int(bbox[i*5+2] // gridsize) # 当前bbox中心落在第gridy个网格,行 145 | # (bbox中心坐标 - 网格左上角点的坐标)/网格大小 ==> bbox中心点的相对位置 146 | gridpx = bbox[i * 5 + 1] / gridsize - gridx 147 | gridpy = bbox[i * 5 + 2] / gridsize - gridy 148 | # 将第gridy行,gridx列的网格设置为负责当前ground truth的预测,置信度和对应类别概率均置为1 !!!!!!!!出现错误 149 | labels[gridy, gridx, 0:5] = np.array([gridpx, gridpy, bbox[i * 5 + 3], bbox[i * 5 + 4], 1]) 150 | labels[gridy, gridx, 5:10] = np.array([gridpx, gridpy, bbox[i * 5 + 3], bbox[i * 5 + 4], 1]) 151 | labels[gridy, gridx, 10+int(bbox[i*5])] = 1 152 | 153 | labels = labels.reshape(1, -1) 154 | return labels 155 | 156 | 157 | def create_csv_txt(img_dir, anno_dir, save_root_dir, train_val_ratio=0.9, padding=10, debug=False): 158 | """ 159 | TODO: 160 | 将img_dir文件夹内的图片按实际需要处理后,存入save_dir 161 | 最终得到图片文件夹及所有图片对应的标注(train.csv/test.csv)和图片列表文件(train.txt, test.txt) 162 | """ 163 | labels_dir = os.path.join(anno_dir, "labels") 164 | if not os.path.exists(labels_dir): 165 | os.mkdir(labels_dir) 166 | make_label_txt(anno_dir, labels_dir) 167 | print("labels done.") 168 | save_img_dir = os.path.join(os.path.join(anno_dir, "voc2012_forYolov1"), "img") 169 | if not os.path.exists(save_img_dir): 170 | os.mkdir(save_img_dir) 171 | img_augument(img_dir, save_img_dir, labels_dir) 172 | imgs_list = os.listdir(save_img_dir) 173 | n_trainval = len(imgs_list) 174 | shuffle_id = list(range(n_trainval)) 175 | random.shuffle(shuffle_id) 176 | n_train = int(n_trainval*train_val_ratio) 177 | train_id = shuffle_id[:n_train] 178 | test_id = shuffle_id[n_train:] 179 | traintxt = open(os.path.join(save_root_dir, "train.txt"), 'w') 180 | traincsv = np.zeros((n_train, GL_NUMGRID*GL_NUMGRID*(5*GL_NUMBBOX+len(GL_CLASSES))),dtype=np.float32) 181 | for i,id in enumerate(train_id): 182 | img_name = imgs_list[id] 183 | img_path = os.path.join(save_img_dir, img_name)+'\n' 184 | traintxt.write(img_path) 185 | with open(os.path.join(labels_dir,"%s.txt"%img_name.split('.')[0]), 'r') as f: 186 | bbox = [float(x) for x in f.read().split()] 187 | traincsv[i,:] = convert_bbox2labels(bbox) 188 | np.savetxt(os.path.join(save_root_dir, "train.csv"), traincsv) 189 | print("Create %d train data." % (n_train)) 190 | 191 | testtxt = open(os.path.join(save_root_dir, "test.txt"), 'w') 192 | testcsv = np.zeros((n_trainval - n_train, GL_NUMGRID*GL_NUMGRID*(5*GL_NUMBBOX+len(GL_CLASSES))),dtype=np.float32) 193 | for i,id in enumerate(test_id): 194 | img_name = imgs_list[id] 195 | img_path = os.path.join(save_img_dir, img_name)+'\n' 196 | testtxt.write(img_path) 197 | with open(os.path.join(labels_dir,"%s.txt"%img_name.split('.')[0]), 'r') as f: 198 | bbox = [float(x) for x in f.read().split()] 199 | testcsv[i,:] = convert_bbox2labels(bbox) 200 | np.savetxt(os.path.join(save_root_dir, "test.csv"), testcsv) 201 | print("Create %d test data." % (n_trainval-n_train)) 202 | 203 | 204 | if __name__ == '__main__': 205 | random.seed(0) 206 | np.set_printoptions(threshold=np.inf) 207 | img_dir = os.path.join(STATIC_DATASET_PATH, "JPEGImages") # 原始图像文件夹 208 | anno_dirs = [STATIC_DATASET_PATH] # 标注文件 209 | save_dir = os.path.join(STATIC_DATASET_PATH, "voc2012_forYolov1") # 保存处理后的数据(图片+标签)的文件夹 210 | if not os.path.exists(save_dir): 211 | os.mkdir(save_dir) 212 | # 分别处理 213 | for anno_dir in anno_dirs: 214 | create_csv_txt(img_dir, anno_dir, save_dir, debug=False) 215 | -------------------------------------------------------------------------------- /prepare_test.py: -------------------------------------------------------------------------------- 1 | """ 2 | time:2021.4.26 3 | author:Jylyy 4 | 5 | """ 6 | 7 | """ 8 | 将预处理后的图片中测试集的图片存放到testimg文件夹用来测试 9 | """ 10 | 11 | import os, random, glob 12 | from shutil import copyfile 13 | 14 | imgtxt_dir = r'D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\train.txt' 15 | img_dir = r'D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\img' 16 | save_testimg = r'D:\wangpan\VOC\VOCdevkit\VOC2012\voc2012_forYolov1\testimg' 17 | choseImg = [] 18 | 19 | 20 | #通过glob.glob来获取原始路径下,所有'.jpg'文件 21 | imageList1 = glob.glob(os.path.join(img_dir, '*.jpg')) 22 | 23 | f = open(imgtxt_dir,"r") #设置文件对象 24 | line = f.readline() 25 | line = line[:-1] 26 | while line: #直到读取完文件 27 | line = f.readline().strip() #读取一行文件,包括换行符 28 | if os.path.exists(line): 29 | choseImg.append(os.path.basename(line)) 30 | 31 | for i in choseImg: 32 | # 将随机选中的jpg文件遍历复制到目标文件夹中 33 | copyfile(img_dir + '/' + i, save_testimg + '/' + i) 34 | 35 | f.close() #关闭文件 36 | 37 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | """ 2 | time:2021.4.26 3 | author:Jylyy 4 | 5 | """ 6 | import os 7 | from my_arguments import Args 8 | import torch 9 | from torch.utils.data import DataLoader 10 | 11 | from model import MyNet 12 | from data import MyDataset 13 | from util import labels2bbox 14 | from prepare_data import GL_CLASSES 15 | import torchvision.transforms as transforms 16 | from PIL import Image 17 | import cv2 18 | 19 | 20 | COLOR = [(255,0,0),(255,125,0),(255,255,0),(255,0,125),(255,0,250), 21 | (255,125,125),(255,125,250),(125,125,0),(0,255,125),(255,0,0), 22 | (0,0,255),(125,0,255),(0,125,255),(0,255,255),(125,125,255), 23 | (0,255,0),(125,255,125),(255,255,255),(100,100,100),(0,0,0),] # 用来标识20个类别的bbox颜色,可自行设定 24 | 25 | 26 | class TestInterface(object): 27 | """ 28 | 网络测试接口, 29 | main(): 网络测试主函数 30 | """ 31 | def __init__(self, opts): 32 | self.opts = opts 33 | print("=======================Start inferring.=======================") 34 | 35 | def main(self): 36 | """ 37 | 具体测试流程根据不同项目有较大区别,需要自行编写代码,主要流程如下: 38 | 1. 获取命令行参数 39 | 2. 获取测试集 40 | 3. 加载网络模型 41 | 4. 用网络模型对测试集进行测试,得到测试结果 42 | 5. 根据不同项目,计算测试集的评价指标, 或者可视化测试结果 43 | """ 44 | opts = self.opts 45 | img_list = os.listdir(opts.dataset_dir) 46 | trans = transforms.Compose([ 47 | # transforms.Resize((112, 112)), 48 | transforms.ToTensor(), 49 | ]) 50 | model = torch.load(opts.weight_path) 51 | if opts.use_GPU: 52 | model.to(opts.GPU_id) 53 | for img_name in img_list: 54 | img_path = os.path.join(opts.dataset_dir, img_name) 55 | img = Image.open(img_path).convert('RGB') 56 | img = trans(img) 57 | img = torch.unsqueeze(img, dim=0) 58 | print(img_name, img.shape) 59 | if opts.use_GPU: 60 | img = img.to(opts.GPU_id) 61 | preds = torch.squeeze(model(img), dim=0).detach().cpu() 62 | preds = preds.permute(1,2,0) 63 | bbox = labels2bbox(preds) 64 | draw_img = cv2.imread(img_path) 65 | self.draw_bbox(draw_img, bbox) 66 | 67 | def draw_bbox(self, img, bbox): 68 | """ 69 | 根据bbox的信息在图像上绘制bounding box 70 | :param img: 绘制bbox的图像 71 | :param bbox: 是(n,6)的尺寸,0:4是(x1,y1,x2,y2), 4是conf, 5是cls 72 | """ 73 | h, w = img.shape[0:2] 74 | n = bbox.shape[0] 75 | for i in range(n): 76 | confidence = bbox[i, 4] 77 | if confidence<0.2: 78 | continue 79 | p1 = (int(w * bbox[i, 0]), int(h * bbox[i, 1])) 80 | p2 = (int(w * bbox[i, 2]), int(h * bbox[i, 3])) 81 | cls_name = GL_CLASSES[int(bbox[i, 5])] 82 | print(cls_name, p1, p2) 83 | cv2.rectangle(img, p1, p2, COLOR[int(bbox[i, 5])]) 84 | cv2.putText(img, cls_name, p1, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) 85 | cv2.putText(img, str(confidence), (p1[0],p1[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255)) 86 | cv2.imshow("bbox", img) 87 | cv2.waitKey(0) 88 | 89 | 90 | if __name__ == '__main__': 91 | # 网络测试代码 92 | args = Args() 93 | args.set_test_args() # 获取命令行参数 94 | test_interface = TestInterface(args.get_opts()) 95 | test_interface.main() # 调用测试接口 -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | """ 2 | time:2021.4.26 3 | author:Jylyy 4 | 5 | """ 6 | 7 | import os 8 | import datetime 9 | import time 10 | import torch 11 | from torch.utils.data import DataLoader 12 | 13 | from model import MyNet 14 | from data import MyDataset 15 | from my_arguments import Args 16 | from prepare_data import GL_CLASSES, GL_NUMBBOX, GL_NUMGRID 17 | from util import labels2bbox 18 | 19 | 20 | class TrainInterface(object): 21 | """ 22 | 网络训练接口, 23 | __train(): 训练过程函数 24 | __validate(): 验证过程函数 25 | __save_model(): 保存模型函数 26 | main(): 训练网络主函数 27 | """ 28 | #初始化 29 | def __init__(self, opts): 30 | """ 31 | :param opts: 命令行参数 32 | """ 33 | self.opts = opts 34 | print("=======================Start training.=======================") 35 | 36 | #训练 37 | @staticmethod 38 | def __train(model, train_loader, optimizer, epoch, num_train, opts): 39 | """ 40 | 完成一个epoch的训练 41 | :param model: torch.nn.Module, 需要训练的网络 42 | :param train_loader: torch.utils.data.Dataset, 训练数据集对应的类 43 | :param optimizer: torch.optim.Optimizer, 优化网络参数的优化器 44 | :param epoch: int, 表明当前训练的是第几个epoch 45 | :param num_train: int, 训练集数量 46 | :param opts: 命令行参数 47 | """ 48 | model.train() 49 | device = opts.GPU_id 50 | avg_metric = 0. # 平均评价指标 51 | avg_loss = 0. # 平均损失数值 52 | losssum = 0. 53 | # log_file是保存网络训练过程信息的文件,网络训练信息会以追加的形式打印在log.txt里,不会覆盖原有log文件 54 | log_file = open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") 55 | localtime = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") # 打印训练时间 56 | log_file.write(localtime) 57 | log_file.write("\n======================training epoch %d======================\n"%epoch) 58 | for i,(imgs, labels) in enumerate(train_loader): 59 | labels1 = labels.view(opts.batch_size, GL_NUMGRID, GL_NUMGRID, -1) 60 | labels1 = labels1.permute(0,3,1,2) 61 | if opts.use_GPU: 62 | imgs = imgs.to(device) 63 | labels1 = labels1.to(device) 64 | preds = model(imgs) # 前向传播 65 | loss = model.calculate_loss(preds,labels1) # 计算损失 66 | optimizer.zero_grad() # 梯度清零 67 | loss.backward() # 反向传播 68 | optimizer.step() # 优化网络参数 69 | # metric = model.calculate_metric(preds, labels) # 计算评价指标 70 | # avg_metric = (avg_metric*i+metric)/(i+1) 71 | #avg_loss = (avg_loss*i+loss.item())/(i+1) 72 | losssum += loss.item() 73 | #if i % opts.print_freq == 0: # 根据打印频率输出log信息和训练信息 74 | # if (t+1) % 20 == 0: 75 | print("Epoch %d/%d | training loss = %.3f, avg_loss = %.3f" % 76 | (epoch, opts.epoch, loss.item(), losssum/num_train)) 77 | log_file.write("Epoch %d/%d | training loss = %.3f, avg_loss = %.3f\n" % 78 | (epoch, opts.epoch, loss.item(), losssum/num_train)) 79 | log_file.flush() 80 | log_file.close() 81 | 82 | @staticmethod 83 | def __validate(model, val_loader, epoch, num_val, opts): 84 | """ 85 | 完成一个epoch训练后的验证任务 86 | :param model: torch.nn.Module, 需要训练的网络 87 | :param _loader: torch.utils.data.Dataset, 验证数据集对应的类 88 | :param epoch: int, 表明当前训练的是第几个epoch 89 | :param num_val: int, 验证集数量 90 | :param opts: 命令行参数 91 | """ 92 | model.eval() 93 | log_file = open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") 94 | log_file.write("======================validate epoch %d======================\n"%epoch) 95 | preds = None 96 | gts = None 97 | avg_metric = 0. 98 | with torch.no_grad(): # 加上这个可以减少在validation过程时的显存占用,提高代码的显存利用率 99 | for i,(imgs, labels) in enumerate(val_loader): 100 | if opts.use_GPU: 101 | imgs = imgs.to(opts.GPU_id) 102 | pred = model(imgs).cpu().squeeze(dim=0).permute(1,2,0) 103 | pred_bbox = labels2bbox(pred) # 将网络输出经过NMS后转换为shape为(-1, 6)的bbox 104 | metric = model.calculate_metric(preds, gts) 105 | print("Evaluation of validation result: average L2 distance = %.5f"%(metric)) 106 | log_file.write("Evaluation of validation result: average L2 distance = %.5f\n"%(metric)) 107 | log_file.flush() 108 | log_file.close() 109 | return metric 110 | 111 | @staticmethod 112 | def __save_model(model, epoch, opts): 113 | """ 114 | 保存第epoch个网络的参数 115 | :param model: torch.nn.Module, 需要训练的网络 116 | :param epoch: int, 表明当前训练的是第几个epoch 117 | :param opts: 命令行参数 118 | """ 119 | model_name = "epoch%d.tar" % epoch 120 | save_dir = os.path.join(opts.checkpoints_dir, model_name) 121 | torch.save(model, save_dir) 122 | 123 | 124 | def main(self): 125 | """ 126 | 训练接口主函数,完成整个训练流程 127 | 1. 创建训练集和验证集的DataLoader类 128 | 2. 初始化带训练的网络 129 | 3. 选择合适的优化器 130 | 4. 训练并验证指定个epoch,保存其中评价指标最好的模型,并打印训练过程信息 131 | 5. TODO: 可视化训练过程信息 132 | """ 133 | opts = self.opts 134 | if not os.path.exists(opts.checkpoints_dir): 135 | os.mkdir(opts.checkpoints_dir) 136 | random_seed = opts.random_seed 137 | train_dataset = MyDataset(opts.dataset_dir, seed=random_seed, mode="train", train_val_ratio=0.9) 138 | val_dataset = MyDataset(opts.dataset_dir, seed=random_seed, mode="val", train_val_ratio=0.9) 139 | train_loader = DataLoader(train_dataset, opts.batch_size, shuffle=False, num_workers=0,drop_last=True) 140 | val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=opts.num_workers) 141 | num_train = len(train_dataset) 142 | num_val = len(val_dataset) 143 | 144 | if opts.pretrain is None: 145 | model = MyNet() 146 | else: 147 | model = torch.load(opts.pretrain) 148 | if opts.use_GPU: 149 | model.to(opts.GPU_id) 150 | optimizer = torch.optim.SGD(model.parameters(), lr=opts.lr, momentum=0.9, weight_decay=opts.weight_decay) 151 | # optimizer = torch.optim.Adam(model.parameters(), lr=opts.lr, weight_decay=opts.weight_decay) 152 | 153 | best_metric=1000000 154 | for e in range(opts.start_epoch+60, opts.epoch+31): 155 | t = time.time() 156 | self.__train(model, train_loader, optimizer, e, num_train, opts) 157 | t2 = time.time() 158 | print("Training consumes %.2f second\n" % (t2-t)) 159 | with open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") as log_file: 160 | log_file.write("Training consumes %.2f second\n" % (t2-t)) 161 | if e % opts.save_freq==0 or e == opts.epoch+1: 162 | # t = time.time() 163 | # metric = self.__validate(model, val_loader, e, num_val, opts) 164 | # t2 = time.time() 165 | # print("Validation consumes %.2f second\n" % (t2 - t)) 166 | # with open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") as log_file: 167 | # log_file.write("Validation consumes %.2f second\n" % (t2 - t)) 168 | # if best_metric>metric: 169 | # best_metric = metric 170 | # print("Epoch %d is now the best epoch with metric %.4f\n"%(e, best_metric)) 171 | # with open(os.path.join(opts.checkpoints_dir, "log.txt"), "a+") as log_file: 172 | # log_file.write("Epoch %d is now the best epoch with metric %.4f\n"%(e, best_metric)) 173 | self.__save_model(model, e, opts) 174 | 175 | 176 | if __name__ == '__main__': 177 | # 训练网络代码 178 | args = Args() 179 | args.set_train_args() # 获取命令行参数 180 | train_interface = TrainInterface(args.get_opts()) 181 | train_interface.main() # 调用训练接口 182 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | """ 2 | time:2021.4.26 3 | author:Jylyy 4 | 5 | """ 6 | 7 | """ 8 | 该文件里提供一些项目需要的功能性函数/类 9 | """ 10 | import torch 11 | import torch.nn as nn 12 | import numpy as np 13 | 14 | 15 | class DepthwiseConv(nn.Module): 16 | """ 17 | 深度可分离卷积层 18 | """ 19 | def __init__(self, in_channels, out_channels, kernel_size, stride, padding=0, bias=True): 20 | super(DepthwiseConv, self).__init__() 21 | self.layers = nn.Sequential( 22 | nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, stride=stride, 23 | padding=padding, groups=in_channels, bias=bias), 24 | nn.BatchNorm2d(num_features=in_channels), 25 | nn.ReLU(), 26 | nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias), 27 | nn.BatchNorm2d(num_features=out_channels), 28 | nn.ReLU() 29 | ) 30 | 31 | def forward(self, inputs): 32 | outputs = self.layers(inputs) 33 | return outputs 34 | 35 | 36 | class InvertedBottleneck(nn.Module): 37 | """ 38 | MobileNet v2 的InvertedBottleneck 39 | """ 40 | def __init__(self, in_channels, out_channels, kernel_size, stride, t_factor, padding=0, bias=True): 41 | super(InvertedBottleneck, self).__init__() 42 | mid_channels = t_factor*in_channels 43 | self.layers = nn.Sequential( 44 | nn.Conv2d(in_channels=in_channels, out_channels=mid_channels, kernel_size=1, bias=bias), 45 | nn.BatchNorm2d(num_features=mid_channels), 46 | nn.ReLU(), 47 | nn.Conv2d(in_channels=mid_channels, out_channels=mid_channels, kernel_size=kernel_size, stride=stride, 48 | padding=padding, bias=bias), 49 | nn.BatchNorm2d(num_features=mid_channels), 50 | nn.ReLU(), 51 | nn.Conv2d(in_channels=mid_channels, out_channels=out_channels, kernel_size=1, bias=bias) 52 | ) 53 | 54 | def forward(self, inputs): 55 | outputs = self.layers(inputs) 56 | return outputs 57 | 58 | class Flatten(nn.Module): 59 | """ 60 | 将三维张量拉平的网络层 61 | (n,c,h,w) -> (n, c*h*w) 62 | """ 63 | def __init__(self): 64 | super(Flatten, self).__init__() 65 | 66 | def forward(self, x): 67 | n_samples = x.shape[0] 68 | x = x.reshape(n_samples, -1) 69 | return x 70 | 71 | 72 | def calculate_iou(bbox1, bbox2): 73 | """计算bbox1=(x1,y1,x2,y2)和bbox2=(x3,y3,x4,y4)两个bbox的iou""" 74 | if bbox1[2]<=bbox1[0] or bbox1[3]<=bbox1[1] or bbox2[2]<=bbox2[0] or bbox2[3]<=bbox2[1]: 75 | return 0 # 如果bbox1或bbox2没有面积,或者输入错误,直接返回0 76 | 77 | intersect_bbox = [0., 0., 0., 0.] # bbox1和bbox2的重合区域的(x1,y1,x2,y2) 78 | 79 | intersect_bbox[0] = max(bbox1[0],bbox2[0]) 80 | intersect_bbox[1] = max(bbox1[1],bbox2[1]) 81 | intersect_bbox[2] = min(bbox1[2],bbox2[2]) 82 | intersect_bbox[3] = min(bbox1[3],bbox2[3]) 83 | 84 | w = max(intersect_bbox[2] - intersect_bbox[0], 0) 85 | h = max(intersect_bbox[3] - intersect_bbox[1], 0) 86 | area1 = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) # bbox1面积 87 | area2 = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) # bbox2面积 88 | area_intersect = w * h # 交集面积 89 | iou = area_intersect / (area1 + area2 - area_intersect + 1e-6) # 防止除0 90 | # print(bbox1,bbox2) 91 | # print(intersect_bbox) 92 | # input() 93 | return iou 94 | 95 | 96 | # 注意检查一下输入数据的格式,到底是xywh还是xyxy 97 | def labels2bbox(matrix): 98 | """ 99 | 将网络输出的7*7*30的数据转换为bbox的(98,25)的格式,然后再将NMS处理后的结果返回 100 | :param matrix: 注意,输入的数据中,bbox坐标的格式是(px,py,w,h),需要转换为(x1,y1,x2,y2)的格式再输入NMS 101 | :return: 返回NMS处理后的结果,bboxes.shape = (-1, 6), 0:4是(x1,y1,x2,y2), 4是conf, 5是cls 102 | """ 103 | if matrix.size()[0:2]!=(7,7): 104 | raise ValueError("Error: Wrong labels size: ", matrix.size(), " != (7,7)") 105 | matrix = matrix.numpy() 106 | bboxes = np.zeros((98, 6)) 107 | # 先把7*7*30的数据转变为bbox的(98,25)的格式,其中,bbox信息格式从(px,py,w,h)转换为(x1,y1,x2,y2),方便计算iou 108 | matrix = matrix.reshape(49,-1) 109 | bbox = matrix[:, :10].reshape(98, 5) 110 | r_grid = np.array(list(range(7))) 111 | r_grid = np.repeat(r_grid, repeats=14, axis=0) # [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ...] 112 | c_grid = np.array(list(range(7))) 113 | c_grid = np.repeat(c_grid, repeats=2, axis=0)[np.newaxis, :] 114 | c_grid = np.repeat(c_grid, repeats=7, axis=0).reshape(-1) # [0 0 1 1 2 2 3 3 4 4 5 5 6 6 0 0 1 1 2 2 3 3 4 4 5 5 6 6...] 115 | bboxes[:, 0] = np.maximum((bbox[:, 0] + c_grid) / 7.0 - bbox[:, 2] / 2.0, 0) 116 | bboxes[:, 1] = np.maximum((bbox[:, 1] + r_grid) / 7.0 - bbox[:, 3] / 2.0, 0) 117 | bboxes[:, 2] = np.minimum((bbox[:, 0] + c_grid) / 7.0 + bbox[:, 2] / 2.0, 1) 118 | bboxes[:, 3] = np.minimum((bbox[:, 1] + r_grid) / 7.0 + bbox[:, 3] / 2.0, 1) 119 | bboxes[:, 4] = bbox[:, 4] 120 | cls = np.argmax(matrix[:, 10:], axis=1) 121 | cls = np.repeat(cls, repeats=2, axis=0) 122 | bboxes[:, 5] = cls 123 | # 对所有98个bbox执行NMS算法,清理cls-specific confidence score较低以及iou重合度过高的bbox 124 | keepid = nms_multi_cls(bboxes, thresh=0.1, n_cls=20) 125 | ids = [] 126 | for x in keepid: 127 | ids = ids + list(x) 128 | ids = sorted(ids) 129 | return bboxes[ids, :] 130 | 131 | 132 | def nms_1cls(dets, thresh): 133 | """ 134 | 单类别NMS 135 | :param dets: ndarray,nx5,dets[i,0:4]分别是bbox坐标;dets[i,4]是置信度score 136 | :param thresh: NMS算法设置的iou阈值 137 | """ 138 | # 从检测结果dets中获得x1,y1,x2,y2和scores的值 139 | x1 = dets[:, 0] 140 | y1 = dets[:, 1] 141 | x2 = dets[:, 2] 142 | y2 = dets[:, 3] 143 | scores = dets[:, 4] 144 | 145 | # 计算每个检测框的面积 146 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 147 | # 按照置信度score的值降序排序的下标序列 148 | order = scores.argsort()[::-1] 149 | 150 | # keep用来保存最后保留的检测框的下标 151 | keep = [] 152 | while order.size > 0: 153 | # 当前置信度最高bbox的index 154 | i = order[0] 155 | # 添加当前剩余检测框中得分最高的index到keep中 156 | keep.append(i) 157 | # 得到此bbox和剩余其他bbox的相交区域,左上角和右下角 158 | xx1 = np.maximum(x1[i], x1[order[1:]]) 159 | yy1 = np.maximum(y1[i], y1[order[1:]]) 160 | xx2 = np.minimum(x2[i], x2[order[1:]]) 161 | yy2 = np.minimum(y2[i], y2[order[1:]]) 162 | 163 | # 计算相交的面积,不重叠时面积为0 164 | w = np.maximum(0.0, xx2 - xx1 + 1) 165 | h = np.maximum(0.0, yy2 - yy1 + 1) 166 | inter = w * h 167 | # 计算IoU:重叠面积/(面积1+面积2-重叠面积) 168 | iou = inter / (areas[i] + areas[order[1:]] - inter) 169 | # 保留IoU小于阈值的bbox 170 | inds = np.where(iou <= thresh)[0] 171 | order = order[inds+1] 172 | return keep 173 | 174 | 175 | def nms_multi_cls(dets, thresh, n_cls): 176 | """ 177 | 多类别的NMS算法 178 | :param dets:ndarray,nx6,dets[i,0:4]是bbox坐标;dets[i,4]是置信度score;dets[i,5]是类别序号; 179 | :param thresh: NMS算法的阈值; 180 | :param n_cls: 是类别总数 181 | """ 182 | # 储存结果的列表,keeps_index[i]表示第i类保留下来的bbox下标list 183 | keeps_index = [] 184 | for i in range(n_cls): 185 | order_i = np.where(dets[:,5]==i)[0] 186 | det = dets[dets[:, 5] == i, 0:5] 187 | if det.shape[0] == 0: 188 | keeps_index.append([]) 189 | continue 190 | keep = nms_1cls(det, thresh) 191 | keeps_index.append(order_i[keep]) 192 | return keeps_index 193 | 194 | 195 | if __name__ == '__main__': 196 | a = torch.randn((7,7,30)) 197 | print(a) 198 | labels2bbox(a) --------------------------------------------------------------------------------