├── images ├── kite-pj.jpg ├── kite-7-final.jpg └── loss_analyse_result.png ├── make_yolo_dataset ├── helmet_to_yolo.py ├── show_yolo_label.py ├── show_voc_xml.py ├── YOLO_VOC2007 │ ├── voc_label.py │ └── extract_person.py ├── YOLO_VOC2007_2012 │ ├── voc_label.py │ ├── extract_person_2012.py │ └── extract_person_2007.py ├── coco_to_yolo.py └── xml2json.py ├── yolo_loss_analyse └── analyse.py ├── OpenCV_make.md ├── yolo_compute_mAP_on_VOC2007 ├── reval_voc_py3.py └── voc_eval_py3.py ├── README.md └── LICENSE /images/kite-pj.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pascal1129/yolo_person_detect/HEAD/images/kite-pj.jpg -------------------------------------------------------------------------------- /images/kite-7-final.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pascal1129/yolo_person_detect/HEAD/images/kite-7-final.jpg -------------------------------------------------------------------------------- /images/loss_analyse_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pascal1129/yolo_person_detect/HEAD/images/loss_analyse_result.png -------------------------------------------------------------------------------- /make_yolo_dataset/helmet_to_yolo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import numpy as np 4 | from PIL import Image 5 | 6 | def convert(size, box): 7 | dw = 1./(size[0]) 8 | dh = 1./(size[1]) 9 | x = (box[0] + box[1])/2.0 - 1 10 | y = (box[2] + box[3])/2.0 - 1 11 | w = box[1] - box[0] 12 | h = box[3] - box[2] 13 | x = x*dw 14 | w = w*dw 15 | y = y*dh 16 | h = h*dh 17 | return (x,y,w,h) 18 | 19 | 20 | if __name__ == '__main__': 21 | df = pd.read_csv('./train_labels.csv') 22 | print('Origin shape: ',df.shape) 23 | df = df.dropna(axis=0) 24 | labels = np.array(df) 25 | num_of_labels = labels.shape[0] 26 | print('Preprocessed: ',num_of_labels) 27 | 28 | image_ids = [] 29 | 30 | for index in range(num_of_labels): 31 | image_path = './JPEGImages/'+ labels[index,0] 32 | img = Image.open(image_path) 33 | w = img.size[0] 34 | h = img.size[1] 35 | 36 | xmin = int(labels[index,1].split(' ')[0]) 37 | ymin = int(labels[index,1].split(' ')[1]) 38 | xmax = int(labels[index,1].split(' ')[2]) 39 | ymax = int(labels[index,1].split(' ')[3]) 40 | 41 | b = (float(xmin), float(xmax), float(ymin), float(ymax)) 42 | bb = convert((w,h), b) 43 | 44 | cls_id = 0 45 | 46 | image_id = labels[index,0][:-4] 47 | with open('./labels/' + image_id + '.txt', 'a+') as out_file: 48 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 49 | 50 | 51 | if image_id not in image_ids: 52 | image_ids.append(image_id) 53 | 54 | print('labels created done') 55 | 56 | with open('./train.txt','w') as f: 57 | wd = os.getcwd() 58 | for image_id in image_ids: 59 | f.write('%s/JPEGImages/%s.jpg\n'%(wd, image_id)) 60 | f.close() 61 | 62 | print('train.txt created done') -------------------------------------------------------------------------------- /make_yolo_dataset/show_yolo_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | from PIL import Image,ImageFont,ImageDraw,ImageFont 7 | 8 | classes = ['person'] 9 | # classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 10 | 11 | 12 | def convert_annotation(year, image_id): 13 | 14 | im_path = ('VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(year, image_id)) 15 | im = Image.open(im_path) 16 | 17 | im_w = int(im.size[0]) 18 | im_h = int(im.size[1]) 19 | 20 | draw = ImageDraw.Draw(im) 21 | 22 | 23 | label_path = ('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id)) 24 | 25 | with open(label_path) as file: 26 | line = file.readline() 27 | while line: 28 | cls = int(line.split(' ')[0]) 29 | x = float(line.split(' ')[1]) 30 | y = float(line.split(' ')[2]) 31 | w = float(line.split(' ')[3]) 32 | h = float(line.split(' ')[4].split('\n')[0]) 33 | 34 | # print(cls,x,y,w,h) 35 | 36 | real_w = im_w * w 37 | real_h = im_h * h 38 | x1 = (( x*im_w + 1.0 ) * 2.0 - real_w )/2.0 39 | x2 = (( x*im_w + 1.0 ) * 2.0 + real_w )/2.0 40 | y1 = (( y*im_h + 1.0 ) * 2.0 - real_h )/2.0 41 | y2 = (( y*im_h + 1.0 ) * 2.0 + real_h )/2.0 42 | 43 | draw.rectangle([x1,y1,x2,y2], outline='red', fill=None) 44 | # 防止越界 45 | if y1-15>=10: 46 | draw.text([x1,y1-15],classes[cls],"black") 47 | else: 48 | draw.text([x1,y1],classes[cls],"black") 49 | line = file.readline() 50 | im.show() 51 | im.save('tmp/'+image_id+'.jpg') 52 | 53 | 54 | if __name__ == '__main__': 55 | convert_annotation('2012','2010_001107') 56 | 57 | # with open('./VOCdevkit/VOC2012/ImageSets/Main/train.txt') as file: 58 | # line = file.readline() 59 | # while line: 60 | # convert_annotation('2012',line[:-1]) 61 | # line = file.readline() 62 | # break 63 | # file.close 64 | -------------------------------------------------------------------------------- /yolo_loss_analyse/analyse.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | # %matplotlib inline 5 | 6 | # 提取训练log,去除不可解析的log后,使log文件格式化,生成新的log文件供可视化工具绘图 7 | def extract_log(log_origin, log_new, key_word): 8 | f_origin = open(log_origin) 9 | f_new = open(log_new, 'w') 10 | for line in f_origin: 11 | # 去除多gpu的同步log 12 | if 'Syncing' in line: 13 | continue 14 | # 去除除零错误的log 15 | if 'nan' in line: 16 | continue 17 | if key_word in line: 18 | f_new.write(line.replace(':',',')) 19 | f_new.close() 20 | f_origin.close() 21 | 22 | 23 | def get_loss(file_path): 24 | result = pd.read_csv(file_path, error_bad_lines=False, names=['batch', 'loss', 'avg', 'rate', 'seconds', 'images']) 25 | result['avg']=result['avg'].str.split(' ').str.get(1) 26 | print(result.head()) 27 | print(result.tail()) 28 | 29 | result['batch']=pd.to_numeric(result['batch']) 30 | # result['loss']=pd.to_numeric(result['loss']) 31 | result['avg']=pd.to_numeric(result['avg']) 32 | x = result['batch'].values 33 | y = result['avg'].values 34 | return(x,y) 35 | 36 | def draw_loss(): 37 | x1,y1 = get_loss('train6-loss.txt') 38 | x2,y2 = get_loss('train6.1-loss.txt') 39 | 40 | # 绘制曲线 41 | plt.plot(x1, y1, color='blue') 42 | plt.plot(x2, y2, color='orange') 43 | #设置坐标轴范围 44 | plt.xlim((0,51000)) 45 | plt.ylim((0,2)) 46 | # 设置坐标轴、图片名称 47 | plt.xlabel('batch') 48 | plt.ylabel('avg_loss') 49 | plt.title('loss') 50 | # 设置图例 51 | plt.legend(["train1","train2"], loc="upper right") 52 | plt.show() 53 | 54 | 55 | 56 | 57 | 58 | if __name__ == '__main__': 59 | # extract_log('../../train/train_log/train7.txt','loss/train7-loss.txt','images') 60 | 61 | 62 | # draw_loss() 63 | x1,y1 = get_loss('loss/train7-loss.txt') 64 | # x4,y4 = get_loss('train7.1-loss.txt') 65 | 66 | # 绘制曲线 67 | plt.plot(x1, y1, color='red') 68 | # plt.plot(x4, y4, color='blue') 69 | #设置坐标轴范围 70 | plt.xlim((1,80200)) 71 | plt.ylim((0,2.5)) 72 | # 设置坐标轴、图片名称 73 | plt.xlabel('batch') 74 | plt.ylabel('avg_loss') 75 | plt.title('loss') 76 | # 设置图例 77 | # plt.legend(["train6: COCO+VOC","train6.1: COCO+VOC,upsample"], loc="upper right") 78 | plt.legend(["train7: COCO+VOC, upsample, 8w iters, AB"], loc="upper right") 79 | 80 | plt.savefig('result.png') 81 | plt.show() -------------------------------------------------------------------------------- /make_yolo_dataset/show_voc_xml.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | from PIL import Image,ImageFont,ImageDraw,ImageFont 7 | 8 | classes = ['person'] 9 | # classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 10 | 11 | 12 | def convert_annotation(year, image_id): 13 | 14 | im_path = ('VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(year, image_id)) 15 | im = Image.open(im_path) 16 | xml_path = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) 17 | tree=ET.parse(xml_path) 18 | root = tree.getroot() 19 | size = root.find('size') 20 | w = int(size.find('width').text) 21 | h = int(size.find('height').text) 22 | 23 | draw = ImageDraw.Draw(im) 24 | # print(w,h) 25 | for obj in root.iter('object'): 26 | difficult = obj.find('difficult').text 27 | cls = obj.find('name').text 28 | if cls not in classes: 29 | continue 30 | cls_id = classes.index(cls) 31 | xmlbox = obj.find('bndbox') 32 | x1,x2,y1,y2 = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 33 | if int(difficult)==0: 34 | draw.rectangle([x1,y1,x2,y2], outline='blue', fill=None) 35 | else: 36 | draw.rectangle([x1,y1,x2,y2], outline='red', fill=None) 37 | 38 | # 防止越界 39 | if y1-15>=10: 40 | draw.text([x1,y1-15],classes[cls_id],"black") 41 | else: 42 | draw.text([x1,y1],classes[cls_id],"black") 43 | 44 | # im.show() 45 | im.save('voc2007_with_person/'+image_id+'.jpg') 46 | 47 | 48 | if __name__ == '__main__': 49 | with open('./VOCdevkit/VOC2012/ImageSets/Main/train.txt') as file: 50 | line = file.readline() 51 | while line: 52 | convert_annotation('2012',line[:-1]) 53 | line = file.readline() 54 | # break 55 | file.close 56 | 57 | 58 | # bad:2010_006507,2010_006104,2010_006097,2010_006158, 59 | # many persons:2010_004439,2010_004597, 60 | # small objects 61 | # difficult -------------------------------------------------------------------------------- /make_yolo_dataset/YOLO_VOC2007/voc_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | 7 | sets=[('2007', 'train'), ('2007', 'test')] 8 | 9 | classes = ['person'] 10 | 11 | 12 | def convert(size, box): 13 | dw = 1./(size[0]) 14 | dh = 1./(size[1]) 15 | x = (box[0] + box[1])/2.0 - 1 16 | y = (box[2] + box[3])/2.0 - 1 17 | w = box[1] - box[0] 18 | h = box[3] - box[2] 19 | x = x*dw 20 | w = w*dw 21 | y = y*dh 22 | h = h*dh 23 | return (x,y,w,h) 24 | 25 | def convert_annotation(year, image_id): 26 | in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) 27 | out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') 28 | tree=ET.parse(in_file) 29 | root = tree.getroot() 30 | size = root.find('size') 31 | w = int(size.find('width').text) 32 | h = int(size.find('height').text) 33 | 34 | for obj in root.iter('object'): 35 | # difficult = obj.find('difficult').text 36 | cls = obj.find('name').text 37 | # if cls not in classes or int(difficult)==1: 38 | # 保留困难的数据集 39 | if cls not in classes: 40 | continue 41 | cls_id = classes.index(cls) 42 | xmlbox = obj.find('bndbox') 43 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 44 | bb = convert((w,h), b) 45 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 46 | 47 | wd = getcwd() 48 | 49 | for year, image_set in sets: 50 | if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): 51 | os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) 52 | image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() 53 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 54 | for image_id in image_ids: 55 | list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) 56 | convert_annotation(year, image_id) 57 | list_file.close() 58 | 59 | # os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") 60 | # os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") -------------------------------------------------------------------------------- /make_yolo_dataset/YOLO_VOC2007_2012/voc_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | 7 | sets=[('2007', 'train'), ('2007', 'test'), ('2012', 'train')] 8 | classes = ['person'] 9 | 10 | 11 | def convert(size, box): 12 | dw = 1./(size[0]) 13 | dh = 1./(size[1]) 14 | x = (box[0] + box[1])/2.0 - 1 15 | y = (box[2] + box[3])/2.0 - 1 16 | w = box[1] - box[0] 17 | h = box[3] - box[2] 18 | x = x*dw 19 | w = w*dw 20 | y = y*dh 21 | h = h*dh 22 | return (x,y,w,h) 23 | 24 | def convert_annotation(year, image_id): 25 | in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) 26 | out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') 27 | tree=ET.parse(in_file) 28 | root = tree.getroot() 29 | size = root.find('size') 30 | w = int(size.find('width').text) 31 | h = int(size.find('height').text) 32 | 33 | for obj in root.iter('object'): 34 | # difficult = obj.find('difficult').text 35 | cls = obj.find('name').text 36 | # if cls not in classes or int(difficult)==1: 37 | # 保留困难的数据集 38 | if cls not in classes: 39 | continue 40 | cls_id = classes.index(cls) 41 | xmlbox = obj.find('bndbox') 42 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 43 | bb = convert((w,h), b) 44 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 45 | 46 | wd = getcwd() 47 | 48 | for year, image_set in sets: 49 | if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): 50 | os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) 51 | image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() 52 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 53 | for image_id in image_ids: 54 | list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) 55 | convert_annotation(year, image_id) 56 | list_file.close() 57 | 58 | # os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt") 59 | # os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt") -------------------------------------------------------------------------------- /make_yolo_dataset/YOLO_VOC2007_2012/extract_person_2012.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | # person类训练集、测试集标签地址 5 | VOC2012_trainval = 'VOCdevkit/VOC2012/ImageSets/Main/person_trainval.txt' 6 | # VOC2012_test = 'VOCdevkit/VOC2012/ImageSets/Main/person_test.txt' 7 | # 标签所在文件夹 8 | VOC2012_label = 'VOCdevkit/VOC2012/ImageSets/Main/' 9 | 10 | # 图片和标注地址 11 | VOC2012_images = 'VOCdevkit/VOC2012/JPEGImages' 12 | VOC2012_annotations = 'VOCdevkit/VOC2012/Annotations' 13 | 14 | # 待删除的无用文件夹 15 | FOLDER_TO_DELETE = ['VOCdevkit/VOC2012/SegmentationClass', 'VOCdevkit/VOC2012/SegmentationObject', 'VOCdevkit/VOC2012/ImageSets/Layout', 'VOCdevkit/VOC2012/ImageSets/Segmentation','VOCdevkit/VOC2012/ImageSets/Action'] 16 | 17 | # 含person类的图片的序列号 18 | train_person_index = [] 19 | # test_person_index = [] 20 | 21 | def rm_unnecessary_files(): 22 | # 删除无用的文件夹 23 | for file in FOLDER_TO_DELETE: 24 | if os.path.exists(file): 25 | shutil.rmtree(file) 26 | # 清空无用和过期的label文件 27 | for file in os.listdir(VOC2012_label): 28 | if 'person' not in file: 29 | os.remove(os.path.join(VOC2012_label, file)) 30 | print('[0] remove unnecessary files done') 31 | 32 | def get_index(dataset_path): 33 | person_index = [] 34 | with open(dataset_path,'r') as f: 35 | line = f.readline() 36 | # 判断文件是否结束 37 | while line: 38 | # 一行十个字符,最后一个字符是'\n',因此检测倒数第三个字符即可 39 | if line[-3] != '-': 40 | index = line.split(' ')[0] 41 | # 如果该序列号不存在于列表,则添加 42 | if index not in person_index: 43 | person_index.append(index) 44 | line = f.readline() 45 | f.close 46 | # 排个序,便于使用 47 | person_index.sort() 48 | print('[1] extract pics: %d'%(len(person_index))) 49 | return person_index 50 | 51 | 52 | def write_txt(person_index, label_path): 53 | # 所有的训练数据写入train 54 | with open(os.path.join(VOC2012_label, label_path),'w') as f: 55 | for index in person_index: 56 | line = index + '\n' 57 | f.write(line) 58 | f.close 59 | print(' write labels into txt finished') 60 | 61 | 62 | if __name__ == '__main__': 63 | rm_unnecessary_files() # 删除无用文件夹 64 | train_person_index = get_index(VOC2012_trainval) # 提取训练集中的含人数据 65 | write_txt(train_person_index, 'train.txt') 66 | print('[2] All is done!') -------------------------------------------------------------------------------- /make_yolo_dataset/YOLO_VOC2007/extract_person.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | # person类训练集、测试集标签地址 5 | VOC2007_trainval = 'VOCdevkit/VOC2007/ImageSets/Main/person_trainval.txt' 6 | VOC2007_test = 'VOCdevkit/VOC2007/ImageSets/Main/person_test.txt' 7 | # 标签所在文件夹 8 | VOC2007_label = 'VOCdevkit/VOC2007/ImageSets/Main/' 9 | 10 | # 图片和标注地址 11 | VOC2007_images = 'VOCdevkit/VOC2007/JPEGImages' 12 | VOC2007_annotations = 'VOCdevkit/VOC2007/Annotations' 13 | 14 | # 待删除的无用文件夹 15 | FOLDER_TO_DELETE = ['VOCdevkit/VOC2007/SegmentationClass', 'VOCdevkit/VOC2007/SegmentationObject', 'VOCdevkit/VOC2007/ImageSets/Layout', 'VOCdevkit/VOC2007/ImageSets/Segmentation'] 16 | 17 | # 含person类的图片的序列号 18 | train_person_index = [] 19 | test_person_index = [] 20 | 21 | def rm_unnecessary_files(): 22 | # 删除无用的文件夹 23 | for file in FOLDER_TO_DELETE: 24 | if os.path.exists(file): 25 | shutil.rmtree(file) 26 | # 清空无用和过期的label文件 27 | for file in os.listdir(VOC2007_label): 28 | if 'person' not in file: 29 | os.remove(os.path.join(VOC2007_label, file)) 30 | print('[0] remove unnecessary files done') 31 | 32 | def get_index(dataset_path): 33 | person_index = [] 34 | with open(dataset_path,'r') as f: 35 | line = f.readline() 36 | # 判断文件是否结束 37 | while line: 38 | # 一行十个字符,最后一个字符是'\n',因此检测倒数第三个字符即可 39 | if line[-3] != '-': 40 | index = line.split(' ')[0] 41 | # 如果该序列号不存在于列表,则添加 42 | if index not in person_index: 43 | person_index.append(index) 44 | line = f.readline() 45 | f.close 46 | # 排个序,便于使用 47 | person_index.sort() 48 | print('[1] extract pics: %d'%(len(person_index))) 49 | return person_index 50 | 51 | 52 | def write_txt(person_index, label_path): 53 | # 所有的训练数据写入train 54 | with open(os.path.join(VOC2007_label, label_path),'w') as f: 55 | for index in person_index: 56 | line = index + '\n' 57 | f.write(line) 58 | f.close 59 | print(' write labels into txt finished') 60 | 61 | 62 | if __name__ == '__main__': 63 | rm_unnecessary_files() # 删除无用文件夹 64 | train_person_index = get_index(VOC2007_trainval) # 提取训练集中的含人数据 65 | write_txt(train_person_index, 'train.txt') 66 | 67 | test_person_index = get_index(VOC2007_test) # 提取测试集合的含人数据 68 | write_txt(test_person_index, 'test.txt') 69 | 70 | print('[2] All is done!') -------------------------------------------------------------------------------- /make_yolo_dataset/YOLO_VOC2007_2012/extract_person_2007.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | 4 | # person类训练集、测试集标签地址 5 | VOC2007_trainval = 'VOCdevkit/VOC2007/ImageSets/Main/person_trainval.txt' 6 | VOC2007_test = 'VOCdevkit/VOC2007/ImageSets/Main/person_test.txt' 7 | # 标签所在文件夹 8 | VOC2007_label = 'VOCdevkit/VOC2007/ImageSets/Main/' 9 | 10 | # 图片和标注地址 11 | VOC2007_images = 'VOCdevkit/VOC2007/JPEGImages' 12 | VOC2007_annotations = 'VOCdevkit/VOC2007/Annotations' 13 | 14 | # 待删除的无用文件夹 15 | FOLDER_TO_DELETE = ['VOCdevkit/VOC2007/SegmentationClass', 'VOCdevkit/VOC2007/SegmentationObject', 'VOCdevkit/VOC2007/ImageSets/Layout', 'VOCdevkit/VOC2007/ImageSets/Segmentation'] 16 | 17 | # 含person类的图片的序列号 18 | train_person_index = [] 19 | test_person_index = [] 20 | 21 | def rm_unnecessary_files(): 22 | # 删除无用的文件夹 23 | for file in FOLDER_TO_DELETE: 24 | if os.path.exists(file): 25 | shutil.rmtree(file) 26 | # 清空无用和过期的label文件 27 | for file in os.listdir(VOC2007_label): 28 | if 'person' not in file: 29 | os.remove(os.path.join(VOC2007_label, file)) 30 | print('[0] remove unnecessary files done') 31 | 32 | def get_index(dataset_path): 33 | person_index = [] 34 | with open(dataset_path,'r') as f: 35 | line = f.readline() 36 | # 判断文件是否结束 37 | while line: 38 | # 一行十个字符,最后一个字符是'\n',因此检测倒数第三个字符即可 39 | if line[-3] != '-': 40 | index = line.split(' ')[0] 41 | # 如果该序列号不存在于列表,则添加 42 | if index not in person_index: 43 | person_index.append(index) 44 | line = f.readline() 45 | f.close 46 | # 排个序,便于使用 47 | person_index.sort() 48 | print('[1] extract pics: %d'%(len(person_index))) 49 | return person_index 50 | 51 | 52 | def write_txt(person_index, label_path): 53 | # 所有的训练数据写入train 54 | with open(os.path.join(VOC2007_label, label_path),'w') as f: 55 | for index in person_index: 56 | line = index + '\n' 57 | f.write(line) 58 | f.close 59 | print(' write labels into txt finished') 60 | 61 | 62 | if __name__ == '__main__': 63 | rm_unnecessary_files() # 删除无用文件夹 64 | train_person_index = get_index(VOC2007_trainval) # 提取训练集中的含人数据 65 | write_txt(train_person_index, 'train.txt') 66 | 67 | test_person_index = get_index(VOC2007_test) # 提取测试集合的含人数据 68 | write_txt(test_person_index, 'test.txt') 69 | 70 | print('[2] All is done!') -------------------------------------------------------------------------------- /OpenCV_make.md: -------------------------------------------------------------------------------- 1 | ## 0.0、Linux下OpenCV编译和指定版本调用 2 | 3 | 在多人共用的服务器上,可能存在OpenCV多版本共存的混乱局面,给其他依赖OpenCV的代码的编译带来不确定性。为此本文档主要是介绍如何在Ubuntu上编译OpenCV,并通过环境变量调用指定版本。 4 | 5 | 6 | 7 | ## 0.1、CMake与Mak最简单的理解 8 | 9 | 正常的开发步骤:源文件(.c) --> 编译得到目标文件(.o) --> 连接得到可执行文件(.exe) 10 | 11 | 源文件过多时,可使用make工具批处理编译源文件。此外,makefile是一个事先编写好的规则文件,make依据它来批处理编译。cmake工具能够输出各种各样的makefile或者project文件,它的依据是cmakelist。 12 | 13 | ![Cmake](https://img-blog.csdn.net/20160521170837135) 14 | 15 | 更多内容参见 [CMake与Make最简单直接的理解 - CSDN博客](https://blog.csdn.net/zgrjkflmkyc/article/details/51471229) 16 | 17 | 18 | 19 | ## 1、下载OpenCV源码 20 | 21 | 可以去 [OpenCV官网](https://opencv.org/) 下载源码; 22 | 23 | 这里以3.4.1版本的编译为例进行演示,版本选择需要自己考虑,比如说但**3.4.1版本与yolo不兼容,亲测3.4.0版本与yolo兼容** 24 | 25 | 下载地址: 26 | 27 | 28 | 29 | ## 2、解压缩 30 | 31 | ``` 32 | tar -zxvf opencv-3.4.1.tar.gz 33 | ``` 34 | 35 | 36 | 37 | ## 3、cmake编译 38 | 39 | ``` 40 | cd opencv-3.4.1 41 | mkdir build 42 | cd build 43 | 44 | # 注意,下面是一条指令,部分环境下可能显示成两行,看起来像两行 45 | cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/home/pascal/software/opencv341 .. 46 | ``` 47 | 48 | 得到结果 49 | 50 | ``` 51 | -- Configuring done 52 | -- Generating done 53 | -- Build files have been written to: /home/pascal/downloads/opencv-3.4.1/build 54 | ``` 55 | 56 | 57 | 58 | ## 4、make编译 59 | 60 | ``` 61 | make 62 | ``` 63 | 64 | 此过程耗时较久,服务器CPU有6个核心,因此可通过make -j12加速,一般取CPU核心数的两倍 65 | 66 | 查看CPU有几个核心,可以参考: [Linux 如何查看主机的cpu总个数和总内存_百度经验](https://jingyan.baidu.com/article/63f2362848492a0209ab3d49.html) 和 [make太慢了,加快编译速度的方法 make -j - CSDN博客](https://blog.csdn.net/gonghuihuihui/article/details/79091762) 67 | 68 | 得到下面这个结果,说明编译成功 69 | 70 | [![FF0Umj.md.png](https://s1.ax1x.com/2018/11/24/FF0Umj.md.png)](https://imgchr.com/i/FF0Umj) 71 | 72 | 73 | 74 | ## 5、安装 75 | 76 | ``` 77 | make install 78 | ``` 79 | 80 | 结果: 81 | 82 | [![FF0spT.md.png](https://s1.ax1x.com/2018/11/24/FF0spT.md.png)](https://imgchr.com/i/FF0spT) 83 | 84 | 85 | 86 | ## 6、更新环境变量,确定OpenCV的调用版本 87 | 88 | 在.bashrc文件中添加新的opencv路径: 89 | 90 | ``` 91 | export PKG_CONFIG_PATH=/home/pascal/software/opencv341/lib/pkgconfig 92 | export LD_LIBRARY_PATH=/home/pascal/software/opencv341/lib 93 | ``` 94 | 95 | 更新bashrc: 96 | 97 | ``` 98 | source ~/.bashrc 99 | ``` 100 | 101 | 参考资料: 102 | 103 | 主要流程参考: [Linux下面安装OpenCV3.3.0 - CSDN博客](https://blog.csdn.net/u013685902/article/details/78695094) 104 | 105 | 环境变量设置参考:[ubuntu下opencv2.4.10 和 opencv 3.10 多版本切换问题 - CSDN博客]( 106 | https://blog.csdn.net/kekong0713/article/details/53688131) 107 | 108 | 整体流程可参考,步骤五release和步骤七cd ..不对:[ubuntu12.04 64位安装opencv-2.4.9 - CSDN博客](https://blog.csdn.net/dengshuai_super/article/details/51394118) 109 | 110 | 111 | 112 | ## 7、验证:OpenCV版本查看 113 | 114 | pkg-config可用与列举出某个库的相关信息,比如此库的路径、相关头文件路径等,常用命令如下: 115 | 116 | ``` 117 | # 头文件 118 | pkg-config --cflags opencv 119 | 120 | # 库文件 121 | pkg-config --libs opencv 122 | 123 | # 库版本 124 | pkg-config --modversion opencv 125 | ``` 126 | 127 | 经过重新设置,服务器上的OpenCV相关参数发生如下变化: 128 | 129 | ``` 130 | 版本:3.1.0 --> 3.4.1 131 | 库文件:/usr/local/lib --> /home/pascal/software/opencv341/lib 132 | 头文件:/usr/local/include/opencv /usr/local/include --> 133 | /home/pascal/software/opencv341/include/opencv /home/pascal/software/opencv341/include 134 | ``` 135 | 136 | 参见:[pkg-config 详解 - CSDN博客](https://blog.csdn.net/newchenxf/article/details/51750239) 137 | 138 | -------------------------------------------------------------------------------- /make_yolo_dataset/coco_to_yolo.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # 使用说明 3 | # 需要先安装coco tools 4 | # git clone https://github.com/pdollar/coco.git 5 | # cd coco/PythonAPI 6 | # make install(可能会缺少相关依赖,根据提示安装依赖即可) 7 | # 执行脚本前需在train2014和val2014目录下分别创建JPEGImages和labels目录,并将原来train2014和val2014目录下的图片移到JPEGImages下 8 | # COCO数据集的filelist目录下会生成图片路径列表 9 | # COCO数据集的子集的labels目录下会生成yolo需要的标注文件 10 | 11 | 12 | from pycocotools.coco import COCO 13 | import shutil 14 | import os 15 | 16 | 17 | # 将ROI的坐标转换为yolo需要的坐标 18 | # size是图片的w和h 19 | # box里保存的是ROI的坐标(x,y的最大值和最小值) 20 | # 返回值为ROI中心点相对于图片大小的比例坐标,和ROI的w、h相对于图片大小的比例 21 | def convert(size, box): 22 | dw = 1. / size[0] 23 | dh = 1. / size[1] 24 | x = box[0] + box[2] / 2.0 25 | y = box[1] + box[3] / 2.0 26 | w = box[2] 27 | h = box[3] 28 | x = x * dw 29 | w = w * dw 30 | y = y * dh 31 | h = h * dh 32 | return (x, y, w, h) 33 | 34 | 35 | # 获取所需要的类名和id 36 | # path为类名和id的对应关系列表的地址(标注文件中可能有很多类,我们只加载该path指向文件中的类) 37 | # 返回值是一个字典,键名是类名,键值是id 38 | def get_classes_and_index(path): 39 | D = {} 40 | f = open(path) 41 | for line in f: 42 | temp = line.rstrip().split(',', 2) 43 | print("temp[0]:" + temp[0] + "\n") 44 | print("temp[1]:" + temp[1] + "\n") 45 | D[temp[1]] = temp[0] 46 | return D 47 | 48 | 49 | dataDir = '/home/pascal/COCO' # COCO数据集所在的路径 50 | dataType = 'train2017' # 要转换的COCO数据集的子集名 51 | annFile = '%s/annotations/instances_%s.json' % (dataDir, dataType) # COCO数据集的标注文件路径 52 | classes = get_classes_and_index('/home/pascal/COCO/coco_list.txt') 53 | 54 | # labels 目录若不存在,创建labels目录。若存在,则清空目录 55 | if not os.path.exists('%s/%s/labels/' % (dataDir, dataType)): 56 | os.makedirs('%s/%s/labels/' % (dataDir, dataType)) 57 | else: 58 | shutil.rmtree('%s/%s/labels/' % (dataDir, dataType)) 59 | os.makedirs('%s/%s/labels/' % (dataDir, dataType)) 60 | 61 | # filelist 目录若不存在,创建filelist目录。 62 | if not os.path.exists('%s/filelist/' % dataDir): 63 | os.makedirs('%s/filelist/' % dataDir) 64 | 65 | coco = COCO(annFile) # 加载解析标注文件 66 | list_file = open('%s/filelist/%s.txt' % (dataDir, dataType), 'w') # 数据集的图片list保存路径 67 | 68 | imgIds = coco.getImgIds() # 获取标注文件中所有图片的COCO Img ID 69 | catIds = coco.getCatIds() # 获取标注文件总所有的物体类别的COCO Cat ID 70 | 71 | for imgId in imgIds: 72 | objCount = 0 # 一个标志位,用来判断该img是否包含我们需要的标注 73 | print('imgId :%s' % imgId) 74 | Img = coco.loadImgs(imgId)[0] # 加载图片信息 75 | print('Img :%s' % Img) 76 | filename = Img['file_name'] # 获取图片名 77 | width = Img['width'] # 获取图片尺寸 78 | height = Img['height'] # 获取图片尺寸 79 | print('filename :%s, width :%s ,height :%s' % (filename, width, height)) 80 | annIds = coco.getAnnIds(imgIds=imgId, catIds=catIds, iscrowd=None) # 获取该图片对应的所有COCO物体类别标注ID 81 | print('annIds :%s' % annIds) 82 | for annId in annIds: 83 | anns = coco.loadAnns(annId)[0] # 加载标注信息 84 | catId = anns['category_id'] # 获取该标注对应的物体类别的COCO Cat ID 85 | cat = coco.loadCats(catId)[0]['name'] # 获取该COCO Cat ID对应的物体种类名 86 | # print 'anns :%s' % anns 87 | # print 'catId :%s , cat :%s' % (catId,cat) 88 | 89 | # 如果该类名在我们需要的物体种类列表中,将标注文件转换为YOLO需要的格式 90 | if cat in classes: 91 | objCount = objCount + 1 92 | out_file = open('%s/%s/labels/%s.txt' % (dataDir, dataType, filename[:-4]), 'a') 93 | cls_id = classes[cat] # 获取该类物体在yolo训练中的id 94 | box = anns['bbox'] 95 | size = [width, height] 96 | bb = convert(size, box) 97 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 98 | out_file.close() 99 | 100 | if objCount > 0: 101 | list_file.write('%s/%s/JPEGImages/%s\n' % (dataDir, dataType, filename)) 102 | 103 | 104 | list_file.close() -------------------------------------------------------------------------------- /yolo_compute_mAP_on_VOC2007/reval_voc_py3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Adapt from -> 4 | # -------------------------------------------------------- 5 | # Fast R-CNN 6 | # Copyright (c) 2015 Microsoft 7 | # Licensed under The MIT License [see LICENSE for details] 8 | # Written by Ross Girshick 9 | # -------------------------------------------------------- 10 | # <- Written by Yaping Sun 11 | 12 | """Reval = re-eval. Re-evaluate saved detections.""" 13 | 14 | import os, sys, argparse 15 | import numpy as np 16 | import _pickle as cPickle 17 | #import cPickle 18 | 19 | from voc_eval_py3 import voc_eval 20 | 21 | def parse_args(): 22 | """ 23 | Parse input arguments 24 | """ 25 | parser = argparse.ArgumentParser(description='Re-evaluate results') 26 | parser.add_argument('output_dir', nargs=1, help='results directory', 27 | type=str) 28 | parser.add_argument('--voc_dir', dest='voc_dir', default='/home/pascal/person_data2/VOCdevkit', type=str) 29 | parser.add_argument('--year', dest='year', default='2007', type=str) 30 | parser.add_argument('--image_set', dest='image_set', default='test', type=str) 31 | parser.add_argument('--classes', dest='class_file', default='data/voc.names', type=str) 32 | 33 | if len(sys.argv) == 1: 34 | parser.print_help() 35 | sys.exit(1) 36 | 37 | args = parser.parse_args() 38 | return args 39 | 40 | def get_voc_results_file_template(image_set, out_dir = 'results'): 41 | filename = 'comp4_det_' + image_set + '_{:s}.txt' 42 | path = os.path.join(out_dir, filename) 43 | return path 44 | 45 | def do_python_eval(devkit_path, year, image_set, classes, output_dir = 'results'): 46 | annopath = os.path.join( 47 | devkit_path, 48 | 'VOC' + year, 49 | 'Annotations', 50 | '{}.xml') 51 | imagesetfile = os.path.join( 52 | devkit_path, 53 | 'VOC' + year, 54 | 'ImageSets', 55 | 'Main', 56 | image_set + '.txt') 57 | cachedir = os.path.join(devkit_path, 'annotations_cache') 58 | aps = [] 59 | # The PASCAL VOC metric changed in 2010 60 | use_07_metric = True if int(year) < 2010 else False 61 | print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No')) 62 | print('devkit_path=',devkit_path,', year = ',year) 63 | 64 | if not os.path.isdir(output_dir): 65 | os.mkdir(output_dir) 66 | for i, cls in enumerate(classes): 67 | if cls == '__background__': 68 | continue 69 | filename = get_voc_results_file_template(image_set).format(cls) 70 | rec, prec, ap = voc_eval( 71 | filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5, 72 | use_07_metric=use_07_metric) 73 | aps += [ap] 74 | print('AP for {} = {:.4f}'.format(cls, ap)) 75 | with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f: 76 | cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f) 77 | print('Mean AP = {:.4f}'.format(np.mean(aps))) 78 | print('~~~~~~~~') 79 | print('Results:') 80 | for ap in aps: 81 | print('{:.3f}'.format(ap)) 82 | print('{:.3f}'.format(np.mean(aps))) 83 | print('~~~~~~~~') 84 | print('') 85 | print('--------------------------------------------------------------') 86 | print('Results computed with the **unofficial** Python eval code.') 87 | print('Results should be very close to the official MATLAB eval code.') 88 | print('-- Thanks, The Management') 89 | print('--------------------------------------------------------------') 90 | 91 | 92 | 93 | if __name__ == '__main__': 94 | args = parse_args() 95 | 96 | output_dir = os.path.abspath(args.output_dir[0]) 97 | with open(args.class_file, 'r') as f: 98 | lines = f.readlines() 99 | 100 | classes = [t.strip('\n') for t in lines] 101 | 102 | print('Evaluating detections') 103 | do_python_eval(args.voc_dir, args.year, args.image_set, classes, output_dir) -------------------------------------------------------------------------------- /make_yolo_dataset/xml2json.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import os 3 | import json 4 | 5 | coco = dict() 6 | coco['images'] = [] 7 | coco['type'] = 'instances' 8 | coco['annotations'] = [] 9 | coco['categories'] = [] 10 | 11 | category_set = dict() 12 | image_set = set() 13 | 14 | category_item_id = 0 15 | image_id = 20180000000 16 | annotation_id = 0 17 | 18 | 19 | def addCatItem(name): 20 | global category_item_id 21 | category_item = dict() 22 | category_item['supercategory'] = 'none' 23 | category_item_id += 1 24 | category_item['id'] = category_item_id 25 | category_item['name'] = name 26 | coco['categories'].append(category_item) 27 | category_set[name] = category_item_id 28 | return category_item_id 29 | 30 | 31 | def addImgItem(file_name, size): 32 | global image_id 33 | if file_name is None: 34 | raise Exception('Could not find filename tag in xml file.') 35 | if size['width'] is None: 36 | raise Exception('Could not find width tag in xml file.') 37 | if size['height'] is None: 38 | raise Exception('Could not find height tag in xml file.') 39 | image_id += 1 40 | image_item = dict() 41 | image_item['id'] = image_id 42 | image_item['file_name'] = file_name 43 | image_item['width'] = size['width'] 44 | image_item['height'] = size['height'] 45 | coco['images'].append(image_item) 46 | image_set.add(file_name) 47 | return image_id 48 | 49 | 50 | def addAnnoItem(object_name, image_id, category_id, bbox): 51 | global annotation_id 52 | annotation_item = dict() 53 | annotation_item['segmentation'] = [] 54 | seg = [] 55 | # bbox[] is x,y,w,h 56 | # left_top 57 | seg.append(bbox[0]) 58 | seg.append(bbox[1]) 59 | # left_bottom 60 | seg.append(bbox[0]) 61 | seg.append(bbox[1] + bbox[3]) 62 | # right_bottom 63 | seg.append(bbox[0] + bbox[2]) 64 | seg.append(bbox[1] + bbox[3]) 65 | # right_top 66 | seg.append(bbox[0] + bbox[2]) 67 | seg.append(bbox[1]) 68 | 69 | annotation_item['segmentation'].append(seg) 70 | 71 | annotation_item['area'] = bbox[2] * bbox[3] 72 | annotation_item['iscrowd'] = 0 73 | annotation_item['ignore'] = 0 74 | annotation_item['image_id'] = image_id 75 | annotation_item['bbox'] = bbox 76 | annotation_item['category_id'] = category_id 77 | annotation_id += 1 78 | annotation_item['id'] = annotation_id 79 | coco['annotations'].append(annotation_item) 80 | 81 | 82 | def parseXmlFiles(xml_path): 83 | for f in os.listdir(xml_path): 84 | if not f.endswith('.xml'): 85 | continue 86 | 87 | bndbox = dict() 88 | size = dict() 89 | current_image_id = None 90 | current_category_id = None 91 | file_name = None 92 | size['width'] = None 93 | size['height'] = None 94 | size['depth'] = None 95 | 96 | xml_file = os.path.join(xml_path, f) 97 | print(xml_file) 98 | 99 | tree = ET.parse(xml_file) 100 | root = tree.getroot() 101 | if root.tag != 'annotation': 102 | raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag)) 103 | 104 | # elem is , , , 105 | for elem in root: 106 | current_parent = elem.tag 107 | current_sub = None 108 | object_name = None 109 | 110 | if elem.tag == 'folder': 111 | continue 112 | 113 | if elem.tag == 'filename': 114 | file_name = elem.text 115 | if file_name in category_set: 116 | raise Exception('file_name duplicated') 117 | 118 | # add img item only after parse tag 119 | elif current_image_id is None and file_name is not None and size['width'] is not None: 120 | if file_name not in image_set: 121 | current_image_id = addImgItem(file_name, size) 122 | print('add image with {} and {}'.format(file_name, size)) 123 | else: 124 | raise Exception('duplicated image: {}'.format(file_name)) 125 | # subelem is , , , , 126 | for subelem in elem: 127 | bndbox['xmin'] = None 128 | bndbox['xmax'] = None 129 | bndbox['ymin'] = None 130 | bndbox['ymax'] = None 131 | 132 | current_sub = subelem.tag 133 | if current_parent == 'object' and subelem.tag == 'name': 134 | object_name = subelem.text 135 | if object_name not in category_set: 136 | current_category_id = addCatItem(object_name) 137 | else: 138 | current_category_id = category_set[object_name] 139 | 140 | elif current_parent == 'size': 141 | if size[subelem.tag] is not None: 142 | raise Exception('xml structure broken at size tag.') 143 | size[subelem.tag] = int(subelem.text) 144 | 145 | # option is , , , , when subelem is 146 | for option in subelem: 147 | if current_sub == 'bndbox': 148 | if bndbox[option.tag] is not None: 149 | raise Exception('xml structure corrupted at bndbox tag.') 150 | bndbox[option.tag] = int(option.text) 151 | 152 | # only after parse the tag 153 | if bndbox['xmin'] is not None: 154 | if object_name is None: 155 | raise Exception('xml structure broken at bndbox tag') 156 | if current_image_id is None: 157 | raise Exception('xml structure broken at bndbox tag') 158 | if current_category_id is None: 159 | raise Exception('xml structure broken at bndbox tag') 160 | bbox = [] 161 | # x 162 | bbox.append(bndbox['xmin']) 163 | # y 164 | bbox.append(bndbox['ymin']) 165 | # w 166 | bbox.append(bndbox['xmax'] - bndbox['xmin']) 167 | # h 168 | bbox.append(bndbox['ymax'] - bndbox['ymin']) 169 | print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id, 170 | bbox)) 171 | addAnnoItem(object_name, current_image_id, current_category_id, bbox) 172 | 173 | 174 | if __name__ == '__main__': 175 | xml_path = 'annoval' 176 | json_file = 'instanceval.json' 177 | parseXmlFiles(xml_path) 178 | json.dump(coco, open(json_file, 'w')) -------------------------------------------------------------------------------- /yolo_compute_mAP_on_VOC2007/voc_eval_py3.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | import xml.etree.ElementTree as ET 8 | import os 9 | #import cPickle 10 | import _pickle as cPickle 11 | import numpy as np 12 | 13 | def parse_rec(filename): 14 | """ Parse a PASCAL VOC xml file """ 15 | tree = ET.parse(filename) 16 | objects = [] 17 | for obj in tree.findall('object'): 18 | obj_struct = {} 19 | obj_struct['name'] = obj.find('name').text 20 | #obj_struct['pose'] = obj.find('pose').text 21 | #obj_struct['truncated'] = int(obj.find('truncated').text) 22 | obj_struct['difficult'] = int(obj.find('difficult').text) 23 | bbox = obj.find('bndbox') 24 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 25 | int(bbox.find('ymin').text), 26 | int(bbox.find('xmax').text), 27 | int(bbox.find('ymax').text)] 28 | objects.append(obj_struct) 29 | 30 | return objects 31 | 32 | def voc_ap(rec, prec, use_07_metric=False): 33 | """ ap = voc_ap(rec, prec, [use_07_metric]) 34 | Compute VOC AP given precision and recall. 35 | If use_07_metric is true, uses the 36 | VOC 07 11 point method (default:False). 37 | """ 38 | if use_07_metric: 39 | # 11 point metric 40 | ap = 0. 41 | for t in np.arange(0., 1.1, 0.1): 42 | if np.sum(rec >= t) == 0: 43 | p = 0 44 | else: 45 | p = np.max(prec[rec >= t]) 46 | ap = ap + p / 11. 47 | else: 48 | # correct AP calculation 49 | # first append sentinel values at the end 50 | mrec = np.concatenate(([0.], rec, [1.])) 51 | mpre = np.concatenate(([0.], prec, [0.])) 52 | 53 | # compute the precision envelope 54 | for i in range(mpre.size - 1, 0, -1): 55 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 56 | 57 | # to calculate area under PR curve, look for points 58 | # where X axis (recall) changes value 59 | i = np.where(mrec[1:] != mrec[:-1])[0] 60 | 61 | # and sum (\Delta recall) * prec 62 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 63 | return ap 64 | 65 | def voc_eval(detpath, 66 | annopath, 67 | imagesetfile, 68 | classname, 69 | cachedir, 70 | ovthresh=0.5, 71 | use_07_metric=False): 72 | """rec, prec, ap = voc_eval(detpath, 73 | annopath, 74 | imagesetfile, 75 | classname, 76 | [ovthresh], 77 | [use_07_metric]) 78 | Top level function that does the PASCAL VOC evaluation. 79 | detpath: Path to detections 80 | detpath.format(classname) should produce the detection results file. 81 | annopath: Path to annotations 82 | annopath.format(imagename) should be the xml annotations file. 83 | imagesetfile: Text file containing the list of images, one image per line. 84 | classname: Category name (duh) 85 | cachedir: Directory for caching the annotations 86 | [ovthresh]: Overlap threshold (default = 0.5) 87 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 88 | (default False) 89 | """ 90 | # assumes detections are in detpath.format(classname) 91 | # assumes annotations are in annopath.format(imagename) 92 | # assumes imagesetfile is a text file with each line an image name 93 | # cachedir caches the annotations in a pickle file 94 | 95 | # first load gt 96 | if not os.path.isdir(cachedir): 97 | os.mkdir(cachedir) 98 | cachefile = os.path.join(cachedir, 'annots.pkl') 99 | # read list of images 100 | with open(imagesetfile, 'r') as f: 101 | lines = f.readlines() 102 | imagenames = [x.strip() for x in lines] 103 | 104 | if not os.path.isfile(cachefile): 105 | # load annots 106 | recs = {} 107 | for i, imagename in enumerate(imagenames): 108 | recs[imagename] = parse_rec(annopath.format(imagename)) 109 | #if i % 100 == 0: 110 | #print('Reading annotation for {:d}/{:d}').format(i + 1, len(imagenames)) 111 | # save 112 | #print('Saving cached annotations to {:s}').format(cachefile) 113 | with open(cachefile, 'wb') as f: 114 | cPickle.dump(recs, f) 115 | else: 116 | # load 117 | print('!!! cachefile = ',cachefile) 118 | with open(cachefile, 'rb') as f: 119 | recs = cPickle.load(f) 120 | 121 | # extract gt objects for this class 122 | class_recs = {} 123 | npos = 0 124 | for imagename in imagenames: 125 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 126 | bbox = np.array([x['bbox'] for x in R]) 127 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 128 | det = [False] * len(R) 129 | npos = npos + sum(~difficult) 130 | class_recs[imagename] = {'bbox': bbox, 131 | 'difficult': difficult, 132 | 'det': det} 133 | 134 | # read dets 135 | detfile = detpath.format(classname) 136 | with open(detfile, 'r') as f: 137 | lines = f.readlines() 138 | 139 | splitlines = [x.strip().split(' ') for x in lines] 140 | image_ids = [x[0] for x in splitlines] 141 | confidence = np.array([float(x[1]) for x in splitlines]) 142 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 143 | 144 | # sort by confidence 145 | sorted_ind = np.argsort(-confidence) 146 | sorted_scores = np.sort(-confidence) 147 | BB = BB[sorted_ind, :] 148 | image_ids = [image_ids[x] for x in sorted_ind] 149 | 150 | # go down dets and mark TPs and FPs 151 | nd = len(image_ids) 152 | tp = np.zeros(nd) 153 | fp = np.zeros(nd) 154 | for d in range(nd): 155 | R = class_recs[image_ids[d]] 156 | bb = BB[d, :].astype(float) 157 | ovmax = -np.inf 158 | BBGT = R['bbox'].astype(float) 159 | 160 | if BBGT.size > 0: 161 | # compute overlaps 162 | # intersection 163 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 164 | iymin = np.maximum(BBGT[:, 1], bb[1]) 165 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 166 | iymax = np.minimum(BBGT[:, 3], bb[3]) 167 | iw = np.maximum(ixmax - ixmin + 1., 0.) 168 | ih = np.maximum(iymax - iymin + 1., 0.) 169 | inters = iw * ih 170 | 171 | # union 172 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 173 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 174 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 175 | 176 | overlaps = inters / uni 177 | ovmax = np.max(overlaps) 178 | jmax = np.argmax(overlaps) 179 | 180 | if ovmax > ovthresh: 181 | if not R['difficult'][jmax]: 182 | if not R['det'][jmax]: 183 | tp[d] = 1. 184 | R['det'][jmax] = 1 185 | else: 186 | fp[d] = 1. 187 | else: 188 | fp[d] = 1. 189 | 190 | # compute precision recall 191 | fp = np.cumsum(fp) 192 | tp = np.cumsum(tp) 193 | rec = tp / float(npos) 194 | # avoid divide by zero in case the first detection matches a difficult 195 | # ground truth 196 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 197 | ap = voc_ap(rec, prec, use_07_metric) 198 | 199 | return rec, prec, ap -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## YOLOv3行人检测 2 | 3 | 本脚本集合主要是针对YOLOv3的两个主流版本([AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) & [pjreddie/darknet](https://github.com/pjreddie/darknet)),本身不包含YOLOv3的代码和配置文件,但是根据指引可以完成一个效果较好的行人检测系统。 4 | 5 | 目前主要是以下几个功能: 6 | 7 | 0. 将YOLOv3常用的网址和资料归纳整理了一下; 8 | 1. 提供代码,帮助从VOC2007/VOC2012/COCO等数据集中提取出person类图片,并转换标注。这里提取VOC数据集时默认保留了全部difficult=1的图片,效果会更好,具体请参见[Issue #1200](https://github.com/AlexeyAB/darknet/issues/1200); 9 | 2. 提供代码,帮助计算mAP正确率; 10 | 3. 建议使用tee指令保存训练日志,可使用本文提供的脚本实现训练过程可视化; 11 | 12 | 13 | ## 效果对比 14 | 15 | YOLO_mine(基于AB版本,只检测行人) 16 | 17 | ![kite-7-final](https://github.com/pascal1129/yolo_person_detect/blob/master/images/kite-7-final.jpg) 18 | 19 | YOLO_pj(pj版本,所有类别全检测) 20 | 21 | ![kite-pj](https://github.com/pascal1129/yolo_person_detect/blob/master/images/kite-pj.jpg) 22 | 23 | 24 | 25 | ## 文件结构 26 | 27 | ``` 28 | yolo_person_detect 29 | | 30 | │ README.md 31 | │ 32 | ├─make_yolo_dataset 33 | │ │ helmet_to_yolo.py # 某次比赛用到的转换代码 34 | │ │ show_voc_xml.py # 可视化VOC数据集 35 | │ │ show_yolo_label.py # 可视化YOLO数据集 36 | │ │ xml2json.py # 37 | │ │ coco_to_yolo.py # COCO --> YOLO 38 | │ │ 39 | │ ├─YOLO_VOC2007 # VOC2007 40 | │ │ extract_person.py # 从VOC2007数据集中提取person图片 41 | │ │ voc_label.py # 将提取到的person图片转为YOLO格式 42 | │ │ 43 | │ └─YOLO_VOC2007_2012 # VOC2007 + VOC2012 44 | │ extract_person_2007.py 45 | │ extract_person_2012.py 46 | │ voc_label.py 47 | │ 48 | ├─yolo_compute_mAP_on_VOC2007 # 在VOC2007上测试性能 49 | │ reval_voc_py3.py 50 | │ voc_eval_py3.py 51 | │ 52 | └─yolo_loss_analyse 53 | │ analyse.py # 训练过程可视化代码 54 | │ result.png # 可视化训练过程 55 | └─loss 56 | train7-loss.txt # 示例训练日志 57 | ``` 58 | 59 | 60 | 61 | ## 官方Demo运行 62 | 63 | - 下载源代码、预训练权重 64 | 65 | ```Shell 66 | git clone https://github.com/AlexeyAB/darknet.git 67 | cd darknet/ 68 | wget https://pjreddie.com/media/files/yolov3.weights 69 | ``` 70 | 71 | - 如果需要编译OpenCV,可参见 [Pascal129/yolo_person_detect: Linux下OpenCV编译和指定版本调用](https://github.com/pascal1129/yolo_person_detect/blob/master/OpenCV_make.md) 72 | - 修改Makefile并编译,可以加上 -j8 等参数设定多CPU编译 73 | 74 | ```Shell 75 | vim Makefile 76 | make 77 | ``` 78 | 79 | 注意:OpenCV版本号亲测3.40可用,但是3.41不可用,可参见[YOLOv3的Darknet在OpenCV下编译出错填坑](https://zhuanlan.zhihu.com/p/36933700) 80 | 81 | - 试运行图片检测demo 82 | ``` 83 | ./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg 84 | ``` 85 | 86 | - 试运行视频检测demo 87 | 88 | ``` 89 | ./darknet detector demo cfg/coco.data cfg/yolov3.cfg yolov3.weights