├── images
    ├── kite-pj.jpg
    ├── kite-7-final.jpg
    └── loss_analyse_result.png
├── make_yolo_dataset
    ├── helmet_to_yolo.py
    ├── show_yolo_label.py
    ├── show_voc_xml.py
    ├── YOLO_VOC2007
    │   ├── voc_label.py
    │   └── extract_person.py
    ├── YOLO_VOC2007_2012
    │   ├── voc_label.py
    │   ├── extract_person_2012.py
    │   └── extract_person_2007.py
    ├── coco_to_yolo.py
    └── xml2json.py
├── yolo_loss_analyse
    └── analyse.py
├── OpenCV_make.md
├── yolo_compute_mAP_on_VOC2007
    ├── reval_voc_py3.py
    └── voc_eval_py3.py
├── README.md
└── LICENSE


/images/kite-pj.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pascal1129/yolo_person_detect/HEAD/images/kite-pj.jpg


--------------------------------------------------------------------------------
/images/kite-7-final.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pascal1129/yolo_person_detect/HEAD/images/kite-7-final.jpg


--------------------------------------------------------------------------------
/images/loss_analyse_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pascal1129/yolo_person_detect/HEAD/images/loss_analyse_result.png


--------------------------------------------------------------------------------
/make_yolo_dataset/helmet_to_yolo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import numpy as np
 4 | from PIL import Image
 5 | 
 6 | def convert(size, box):
 7 |     dw = 1./(size[0])
 8 |     dh = 1./(size[1])
 9 |     x = (box[0] + box[1])/2.0 - 1
10 |     y = (box[2] + box[3])/2.0 - 1
11 |     w = box[1] - box[0]
12 |     h = box[3] - box[2]
13 |     x = x*dw
14 |     w = w*dw
15 |     y = y*dh
16 |     h = h*dh
17 |     return (x,y,w,h)
18 | 
19 | 
20 | if __name__ == '__main__':
21 |     df = pd.read_csv('./train_labels.csv')
22 |     print('Origin shape: ',df.shape)
23 |     df = df.dropna(axis=0)
24 |     labels = np.array(df)
25 |     num_of_labels = labels.shape[0]
26 |     print('Preprocessed: ',num_of_labels)
27 | 
28 |     image_ids = []
29 | 
30 |     for index in range(num_of_labels):
31 |         image_path = './JPEGImages/'+ labels[index,0]
32 |         img = Image.open(image_path)
33 |         w = img.size[0]
34 |         h = img.size[1]
35 | 
36 |         xmin = int(labels[index,1].split(' ')[0])
37 |         ymin = int(labels[index,1].split(' ')[1])
38 |         xmax = int(labels[index,1].split(' ')[2])
39 |         ymax = int(labels[index,1].split(' ')[3])
40 | 
41 |         b = (float(xmin), float(xmax), float(ymin), float(ymax))
42 |         bb = convert((w,h), b)
43 | 
44 |         cls_id = 0
45 | 
46 |         image_id = labels[index,0][:-4]
47 |         with open('./labels/' + image_id + '.txt', 'a+') as out_file:
48 |             out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
49 | 
50 | 
51 |         if image_id not in image_ids:
52 |             image_ids.append(image_id)
53 | 
54 |     print('labels created done')
55 | 
56 |     with open('./train.txt','w') as f:
57 |         wd = os.getcwd()
58 |         for image_id in image_ids:
59 |             f.write('%s/JPEGImages/%s.jpg\n'%(wd, image_id))
60 |         f.close()
61 | 
62 |     print('train.txt created done')


--------------------------------------------------------------------------------
/make_yolo_dataset/show_yolo_label.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | import pickle
 3 | import os
 4 | from os import listdir, getcwd
 5 | from os.path import join
 6 | from PIL import Image,ImageFont,ImageDraw,ImageFont
 7 | 
 8 | classes = ['person']
 9 | # classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
10 | 
11 | 
12 | def convert_annotation(year, image_id):
13 |     
14 |     im_path  = ('VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(year, image_id))
15 |     im = Image.open(im_path)
16 | 
17 |     im_w = int(im.size[0])
18 |     im_h = int(im.size[1])
19 | 
20 |     draw = ImageDraw.Draw(im)
21 | 
22 |     
23 |     label_path = ('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id))
24 | 
25 |     with open(label_path) as file:
26 |         line = file.readline()
27 |         while line:
28 |             cls = int(line.split(' ')[0])
29 |             x = float(line.split(' ')[1])
30 |             y = float(line.split(' ')[2])
31 |             w = float(line.split(' ')[3])
32 |             h = float(line.split(' ')[4].split('\n')[0])
33 |             
34 |             # print(cls,x,y,w,h)
35 |         
36 |             real_w = im_w * w
37 |             real_h = im_h * h
38 |             x1 = (( x*im_w + 1.0 ) * 2.0 - real_w )/2.0
39 |             x2 = (( x*im_w + 1.0 ) * 2.0 + real_w )/2.0
40 |             y1 = (( y*im_h + 1.0 ) * 2.0 - real_h )/2.0
41 |             y2 = (( y*im_h + 1.0 ) * 2.0 + real_h )/2.0
42 | 
43 |             draw.rectangle([x1,y1,x2,y2], outline='red', fill=None)
44 |             # 防止越界
45 |             if y1-15>=10:
46 |                 draw.text([x1,y1-15],classes[cls],"black")
47 |             else:
48 |                 draw.text([x1,y1],classes[cls],"black")
49 |             line = file.readline()
50 |     im.show()
51 |     im.save('tmp/'+image_id+'.jpg')
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     convert_annotation('2012','2010_001107')
56 | 
57 |     # with open('./VOCdevkit/VOC2012/ImageSets/Main/train.txt') as file:
58 |     #     line = file.readline()
59 |     #     while line:
60 |     #         convert_annotation('2012',line[:-1])
61 |     #         line = file.readline()
62 |     #         break
63 |     #     file.close
64 | 


--------------------------------------------------------------------------------
/yolo_loss_analyse/analyse.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import matplotlib.pyplot as plt
 4 | # %matplotlib inline
 5 | 
 6 | # 提取训练log，去除不可解析的log后，使log文件格式化，生成新的log文件供可视化工具绘图
 7 | def extract_log(log_origin, log_new, key_word):
 8 | 	f_origin = open(log_origin)
 9 | 	f_new = open(log_new, 'w')
10 | 	for line in f_origin:
11 | 	    # 去除多gpu的同步log
12 | 	    if 'Syncing' in line:
13 | 	        continue
14 | 	    # 去除除零错误的log
15 | 	    if 'nan' in line:
16 | 	        continue
17 | 	    if key_word in line:
18 | 	        f_new.write(line.replace(':',','))
19 | 	f_new.close()
20 | 	f_origin.close()
21 | 
22 | 
23 | def get_loss(file_path):
24 | 	result = pd.read_csv(file_path, error_bad_lines=False, names=['batch', 'loss', 'avg', 'rate', 'seconds', 'images'])
25 | 	result['avg']=result['avg'].str.split(' ').str.get(1)
26 | 	print(result.head())
27 | 	print(result.tail())
28 | 
29 | 	result['batch']=pd.to_numeric(result['batch'])
30 | 	# result['loss']=pd.to_numeric(result['loss'])
31 | 	result['avg']=pd.to_numeric(result['avg'])
32 | 	x = result['batch'].values
33 | 	y = result['avg'].values
34 | 	return(x,y)
35 | 
36 | def draw_loss():
37 | 	x1,y1 = get_loss('train6-loss.txt')
38 | 	x2,y2 = get_loss('train6.1-loss.txt')
39 | 	
40 | 	# 绘制曲线
41 | 	plt.plot(x1, y1, color='blue')
42 | 	plt.plot(x2, y2, color='orange')
43 | 	#设置坐标轴范围
44 | 	plt.xlim((0,51000))
45 | 	plt.ylim((0,2))
46 | 	# 设置坐标轴、图片名称
47 | 	plt.xlabel('batch')
48 | 	plt.ylabel('avg_loss')
49 | 	plt.title('loss')
50 | 	# 设置图例
51 | 	plt.legend(["train1","train2"], loc="upper right")
52 | 	plt.show()
53 | 
54 | 
55 | 
56 | 
57 | 
58 | if __name__ == '__main__':
59 | 	# extract_log('../../train/train_log/train7.txt','loss/train7-loss.txt','images')
60 | 
61 | 
62 | 	# draw_loss()
63 | 	x1,y1 = get_loss('loss/train7-loss.txt')
64 | 	# x4,y4 = get_loss('train7.1-loss.txt')
65 | 	
66 | 	# 绘制曲线
67 | 	plt.plot(x1, y1, color='red')
68 | 	# plt.plot(x4, y4, color='blue')
69 | 	#设置坐标轴范围
70 | 	plt.xlim((1,80200))
71 | 	plt.ylim((0,2.5))
72 | 	# 设置坐标轴、图片名称
73 | 	plt.xlabel('batch')
74 | 	plt.ylabel('avg_loss')
75 | 	plt.title('loss')
76 | 	# 设置图例
77 | 	# plt.legend(["train6: COCO+VOC","train6.1: COCO+VOC,upsample"], loc="upper right")
78 | 	plt.legend(["train7: COCO+VOC, upsample, 8w iters, AB"], loc="upper right")
79 | 
80 | 	plt.savefig('result.png')
81 | 	plt.show()


--------------------------------------------------------------------------------
/make_yolo_dataset/show_voc_xml.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | import pickle
 3 | import os
 4 | from os import listdir, getcwd
 5 | from os.path import join
 6 | from PIL import Image,ImageFont,ImageDraw,ImageFont
 7 | 
 8 | classes = ['person']
 9 | # classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
10 | 
11 | 
12 | def convert_annotation(year, image_id):
13 |     
14 |     im_path  = ('VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(year, image_id))
15 |     im = Image.open(im_path)
16 |     xml_path = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
17 |     tree=ET.parse(xml_path)
18 |     root = tree.getroot()
19 |     size = root.find('size')
20 |     w = int(size.find('width').text)
21 |     h = int(size.find('height').text)
22 | 
23 |     draw = ImageDraw.Draw(im)
24 |     # print(w,h)
25 |     for obj in root.iter('object'):
26 |         difficult = obj.find('difficult').text
27 |         cls = obj.find('name').text
28 |         if cls not in classes:
29 |             continue
30 |         cls_id = classes.index(cls)
31 |         xmlbox = obj.find('bndbox')
32 |         x1,x2,y1,y2 = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
33 |         if int(difficult)==0:
34 |             draw.rectangle([x1,y1,x2,y2], outline='blue', fill=None)
35 |         else:
36 |             draw.rectangle([x1,y1,x2,y2], outline='red', fill=None)
37 | 
38 |         # 防止越界
39 |         if y1-15>=10:
40 |             draw.text([x1,y1-15],classes[cls_id],"black")
41 |         else:
42 |             draw.text([x1,y1],classes[cls_id],"black")
43 | 
44 |     # im.show()
45 |     im.save('voc2007_with_person/'+image_id+'.jpg')
46 | 
47 | 
48 | if __name__ == '__main__':
49 |     with open('./VOCdevkit/VOC2012/ImageSets/Main/train.txt') as file:
50 |         line = file.readline()
51 |         while line:
52 |             convert_annotation('2012',line[:-1])
53 |             line = file.readline()
54 |             # break
55 |         file.close
56 | 
57 | 
58 |     # bad:2010_006507,2010_006104,2010_006097,2010_006158,
59 |     # many persons:2010_004439,2010_004597,
60 |     # small objects
61 |     # difficult


--------------------------------------------------------------------------------
/make_yolo_dataset/YOLO_VOC2007/voc_label.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | import pickle
 3 | import os
 4 | from os import listdir, getcwd
 5 | from os.path import join
 6 | 
 7 | sets=[('2007', 'train'), ('2007', 'test')]
 8 | 
 9 | classes = ['person']
10 | 
11 | 
12 | def convert(size, box):
13 |     dw = 1./(size[0])
14 |     dh = 1./(size[1])
15 |     x = (box[0] + box[1])/2.0 - 1
16 |     y = (box[2] + box[3])/2.0 - 1
17 |     w = box[1] - box[0]
18 |     h = box[3] - box[2]
19 |     x = x*dw
20 |     w = w*dw
21 |     y = y*dh
22 |     h = h*dh
23 |     return (x,y,w,h)
24 | 
25 | def convert_annotation(year, image_id):
26 |     in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
27 |     out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
28 |     tree=ET.parse(in_file)
29 |     root = tree.getroot()
30 |     size = root.find('size')
31 |     w = int(size.find('width').text)
32 |     h = int(size.find('height').text)
33 | 
34 |     for obj in root.iter('object'):
35 |         # difficult = obj.find('difficult').text
36 |         cls = obj.find('name').text
37 |         # if cls not in classes or int(difficult)==1:
38 |         # 保留困难的数据集
39 |         if cls not in classes:
40 |             continue
41 |         cls_id = classes.index(cls)
42 |         xmlbox = obj.find('bndbox')
43 |         b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
44 |         bb = convert((w,h), b)
45 |         out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
46 | 
47 | wd = getcwd()
48 | 
49 | for year, image_set in sets:
50 |     if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
51 |         os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
52 |     image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
53 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
54 |     for image_id in image_ids:
55 |         list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
56 |         convert_annotation(year, image_id)
57 |     list_file.close()
58 | 
59 | # os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
60 | # os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")


--------------------------------------------------------------------------------
/make_yolo_dataset/YOLO_VOC2007_2012/voc_label.py:
--------------------------------------------------------------------------------
 1 | import xml.etree.ElementTree as ET
 2 | import pickle
 3 | import os
 4 | from os import listdir, getcwd
 5 | from os.path import join
 6 | 
 7 | sets=[('2007', 'train'), ('2007', 'test'), ('2012', 'train')]
 8 | classes = ['person']
 9 | 
10 | 
11 | def convert(size, box):
12 |     dw = 1./(size[0])
13 |     dh = 1./(size[1])
14 |     x = (box[0] + box[1])/2.0 - 1
15 |     y = (box[2] + box[3])/2.0 - 1
16 |     w = box[1] - box[0]
17 |     h = box[3] - box[2]
18 |     x = x*dw
19 |     w = w*dw
20 |     y = y*dh
21 |     h = h*dh
22 |     return (x,y,w,h)
23 | 
24 | def convert_annotation(year, image_id):
25 |     in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
26 |     out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
27 |     tree=ET.parse(in_file)
28 |     root = tree.getroot()
29 |     size = root.find('size')
30 |     w = int(size.find('width').text)
31 |     h = int(size.find('height').text)
32 | 
33 |     for obj in root.iter('object'):
34 |         # difficult = obj.find('difficult').text
35 |         cls = obj.find('name').text
36 |         # if cls not in classes or int(difficult)==1:
37 |         # 保留困难的数据集
38 |         if cls not in classes:
39 |             continue
40 |         cls_id = classes.index(cls)
41 |         xmlbox = obj.find('bndbox')
42 |         b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
43 |         bb = convert((w,h), b)
44 |         out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
45 | 
46 | wd = getcwd()
47 | 
48 | for year, image_set in sets:
49 |     if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
50 |         os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
51 |     image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
52 |     list_file = open('%s_%s.txt'%(year, image_set), 'w')
53 |     for image_id in image_ids:
54 |         list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
55 |         convert_annotation(year, image_id)
56 |     list_file.close()
57 | 
58 | # os.system("cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt > train.txt")
59 | # os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt > train.all.txt")


--------------------------------------------------------------------------------
/make_yolo_dataset/YOLO_VOC2007_2012/extract_person_2012.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | # person类训练集、测试集标签地址
 5 | VOC2012_trainval =  'VOCdevkit/VOC2012/ImageSets/Main/person_trainval.txt'
 6 | # VOC2012_test =      'VOCdevkit/VOC2012/ImageSets/Main/person_test.txt'
 7 | # 标签所在文件夹
 8 | VOC2012_label =     'VOCdevkit/VOC2012/ImageSets/Main/'
 9 | 
10 | # 图片和标注地址
11 | VOC2012_images =        'VOCdevkit/VOC2012/JPEGImages'
12 | VOC2012_annotations =   'VOCdevkit/VOC2012/Annotations'
13 | 
14 | # 待删除的无用文件夹
15 | FOLDER_TO_DELETE = ['VOCdevkit/VOC2012/SegmentationClass', 'VOCdevkit/VOC2012/SegmentationObject', 'VOCdevkit/VOC2012/ImageSets/Layout', 'VOCdevkit/VOC2012/ImageSets/Segmentation','VOCdevkit/VOC2012/ImageSets/Action']
16 | 
17 | # 含person类的图片的序列号
18 | train_person_index = []
19 | # test_person_index = []
20 | 
21 | def rm_unnecessary_files():
22 |     # 删除无用的文件夹 
23 |     for file in FOLDER_TO_DELETE:
24 |         if os.path.exists(file):
25 |             shutil.rmtree(file)
26 |     # 清空无用和过期的label文件
27 |     for file in os.listdir(VOC2012_label):
28 |         if 'person' not in file:
29 |             os.remove(os.path.join(VOC2012_label, file))
30 |     print('[0] remove unnecessary files done')  
31 | 
32 | def get_index(dataset_path):
33 |     person_index = []
34 |     with open(dataset_path,'r') as f:
35 |         line = f.readline()
36 |         # 判断文件是否结束
37 |         while line:
38 |             # 一行十个字符，最后一个字符是'\n'，因此检测倒数第三个字符即可
39 |             if line[-3] != '-':
40 |                 index = line.split(' ')[0]
41 |                 # 如果该序列号不存在于列表，则添加
42 |                 if index not in person_index:
43 |                     person_index.append(index)
44 |             line = f.readline()
45 |         f.close
46 |     # 排个序，便于使用
47 |     person_index.sort()
48 |     print('[1] extract pics: %d'%(len(person_index)))
49 |     return person_index
50 | 
51 | 
52 | def write_txt(person_index, label_path):
53 |     # 所有的训练数据写入train
54 |     with open(os.path.join(VOC2012_label, label_path),'w') as f:
55 |         for index in person_index:
56 |             line = index + '\n'
57 |             f.write(line)
58 |     f.close
59 |     print('    write labels into txt finished')
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     rm_unnecessary_files()                              # 删除无用文件夹
64 |     train_person_index = get_index(VOC2012_trainval)    # 提取训练集中的含人数据
65 |     write_txt(train_person_index, 'train.txt')
66 |     print('[2] All is done!')


--------------------------------------------------------------------------------
/make_yolo_dataset/YOLO_VOC2007/extract_person.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | # person类训练集、测试集标签地址
 5 | VOC2007_trainval =  'VOCdevkit/VOC2007/ImageSets/Main/person_trainval.txt'
 6 | VOC2007_test =      'VOCdevkit/VOC2007/ImageSets/Main/person_test.txt'
 7 | # 标签所在文件夹
 8 | VOC2007_label =     'VOCdevkit/VOC2007/ImageSets/Main/'
 9 | 
10 | # 图片和标注地址
11 | VOC2007_images =        'VOCdevkit/VOC2007/JPEGImages'
12 | VOC2007_annotations =   'VOCdevkit/VOC2007/Annotations'
13 | 
14 | # 待删除的无用文件夹
15 | FOLDER_TO_DELETE = ['VOCdevkit/VOC2007/SegmentationClass', 'VOCdevkit/VOC2007/SegmentationObject', 'VOCdevkit/VOC2007/ImageSets/Layout', 'VOCdevkit/VOC2007/ImageSets/Segmentation']
16 | 
17 | # 含person类的图片的序列号
18 | train_person_index = []
19 | test_person_index = []
20 | 
21 | def rm_unnecessary_files():
22 |     # 删除无用的文件夹 
23 |     for file in FOLDER_TO_DELETE:
24 |         if os.path.exists(file):
25 |             shutil.rmtree(file)
26 |     # 清空无用和过期的label文件
27 |     for file in os.listdir(VOC2007_label):
28 |         if 'person' not in file:
29 |             os.remove(os.path.join(VOC2007_label, file))
30 |     print('[0] remove unnecessary files done')  
31 | 
32 | def get_index(dataset_path):
33 |     person_index = []
34 |     with open(dataset_path,'r') as f:
35 |         line = f.readline()
36 |         # 判断文件是否结束
37 |         while line:
38 |             # 一行十个字符，最后一个字符是'\n'，因此检测倒数第三个字符即可
39 |             if line[-3] != '-':
40 |                 index = line.split(' ')[0]
41 |                 # 如果该序列号不存在于列表，则添加
42 |                 if index not in person_index:
43 |                     person_index.append(index)
44 |             line = f.readline()
45 |         f.close
46 |     # 排个序，便于使用
47 |     person_index.sort()
48 |     print('[1] extract pics: %d'%(len(person_index)))
49 |     return person_index
50 | 
51 | 
52 | def write_txt(person_index, label_path):
53 |     # 所有的训练数据写入train
54 |     with open(os.path.join(VOC2007_label, label_path),'w') as f:
55 |         for index in person_index:
56 |             line = index + '\n'
57 |             f.write(line)
58 |     f.close
59 |     print('    write labels into txt finished')
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     rm_unnecessary_files()                              # 删除无用文件夹
64 |     train_person_index = get_index(VOC2007_trainval)    # 提取训练集中的含人数据
65 |     write_txt(train_person_index, 'train.txt')
66 | 
67 |     test_person_index  = get_index(VOC2007_test)        # 提取测试集合的含人数据
68 |     write_txt(test_person_index,  'test.txt')
69 | 
70 |     print('[2] All is done!')


--------------------------------------------------------------------------------
/make_yolo_dataset/YOLO_VOC2007_2012/extract_person_2007.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | 
 4 | # person类训练集、测试集标签地址
 5 | VOC2007_trainval =  'VOCdevkit/VOC2007/ImageSets/Main/person_trainval.txt'
 6 | VOC2007_test =      'VOCdevkit/VOC2007/ImageSets/Main/person_test.txt'
 7 | # 标签所在文件夹
 8 | VOC2007_label =     'VOCdevkit/VOC2007/ImageSets/Main/'
 9 | 
10 | # 图片和标注地址
11 | VOC2007_images =        'VOCdevkit/VOC2007/JPEGImages'
12 | VOC2007_annotations =   'VOCdevkit/VOC2007/Annotations'
13 | 
14 | # 待删除的无用文件夹
15 | FOLDER_TO_DELETE = ['VOCdevkit/VOC2007/SegmentationClass', 'VOCdevkit/VOC2007/SegmentationObject', 'VOCdevkit/VOC2007/ImageSets/Layout', 'VOCdevkit/VOC2007/ImageSets/Segmentation']
16 | 
17 | # 含person类的图片的序列号
18 | train_person_index = []
19 | test_person_index = []
20 | 
21 | def rm_unnecessary_files():
22 |     # 删除无用的文件夹 
23 |     for file in FOLDER_TO_DELETE:
24 |         if os.path.exists(file):
25 |             shutil.rmtree(file)
26 |     # 清空无用和过期的label文件
27 |     for file in os.listdir(VOC2007_label):
28 |         if 'person' not in file:
29 |             os.remove(os.path.join(VOC2007_label, file))
30 |     print('[0] remove unnecessary files done')  
31 | 
32 | def get_index(dataset_path):
33 |     person_index = []
34 |     with open(dataset_path,'r') as f:
35 |         line = f.readline()
36 |         # 判断文件是否结束
37 |         while line:
38 |             # 一行十个字符，最后一个字符是'\n'，因此检测倒数第三个字符即可
39 |             if line[-3] != '-':
40 |                 index = line.split(' ')[0]
41 |                 # 如果该序列号不存在于列表，则添加
42 |                 if index not in person_index:
43 |                     person_index.append(index)
44 |             line = f.readline()
45 |         f.close
46 |     # 排个序，便于使用
47 |     person_index.sort()
48 |     print('[1] extract pics: %d'%(len(person_index)))
49 |     return person_index
50 | 
51 | 
52 | def write_txt(person_index, label_path):
53 |     # 所有的训练数据写入train
54 |     with open(os.path.join(VOC2007_label, label_path),'w') as f:
55 |         for index in person_index:
56 |             line = index + '\n'
57 |             f.write(line)
58 |     f.close
59 |     print('    write labels into txt finished')
60 | 
61 | 
62 | if __name__ == '__main__':
63 |     rm_unnecessary_files()                              # 删除无用文件夹
64 |     train_person_index = get_index(VOC2007_trainval)    # 提取训练集中的含人数据
65 |     write_txt(train_person_index, 'train.txt')
66 | 
67 |     test_person_index  = get_index(VOC2007_test)        # 提取测试集合的含人数据
68 |     write_txt(test_person_index,  'test.txt')
69 | 
70 |     print('[2] All is done!')


--------------------------------------------------------------------------------
/OpenCV_make.md:
--------------------------------------------------------------------------------
  1 | ## 0.0、Linux下OpenCV编译和指定版本调用
  2 | 
  3 | 在多人共用的服务器上，可能存在OpenCV多版本共存的混乱局面，给其他依赖OpenCV的代码的编译带来不确定性。为此本文档主要是介绍如何在Ubuntu上编译OpenCV，并通过环境变量调用指定版本。
  4 | 
  5 | 
  6 | 
  7 | ## 0.1、CMake与Mak最简单的理解
  8 | 
  9 | 正常的开发步骤：源文件(.c) --> 编译得到目标文件(.o) --> 连接得到可执行文件(.exe)
 10 | 
 11 | 源文件过多时，可使用make工具批处理编译源文件。此外，makefile是一个事先编写好的规则文件，make依据它来批处理编译。cmake工具能够输出各种各样的makefile或者project文件，它的依据是cmakelist。
 12 | 
 13 | ![Cmake](https://img-blog.csdn.net/20160521170837135)
 14 | 
 15 | 更多内容参见 [CMake与Make最简单直接的理解 - CSDN博客](https://blog.csdn.net/zgrjkflmkyc/article/details/51471229)
 16 | 
 17 | 
 18 | 
 19 | ## 1、下载OpenCV源码
 20 | 
 21 | 可以去 [OpenCV官网](https://opencv.org/) 下载源码；
 22 | 
 23 | 这里以3.4.1版本的编译为例进行演示，版本选择需要自己考虑，比如说但**3.4.1版本与yolo不兼容，亲测3.4.0版本与yolo兼容**
 24 | 
 25 | 下载地址：<https://codeload.github.com/opencv/opencv/tar.gz/3.4.1>
 26 | 
 27 | 
 28 | 
 29 | ## 2、解压缩
 30 | 
 31 | ```
 32 | tar -zxvf opencv-3.4.1.tar.gz
 33 | ```
 34 | 
 35 | 
 36 | 
 37 | ## 3、cmake编译
 38 | 
 39 | ```
 40 | cd opencv-3.4.1
 41 | mkdir build
 42 | cd build
 43 | 
 44 | # 注意，下面是一条指令，部分环境下可能显示成两行，看起来像两行
 45 | cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/home/pascal/software/opencv341 ..
 46 | ```
 47 | 
 48 | 得到结果
 49 | 
 50 | ```
 51 | -- Configuring done
 52 | -- Generating done
 53 | -- Build files have been written to: /home/pascal/downloads/opencv-3.4.1/build
 54 | ```
 55 | 
 56 | 
 57 | 
 58 | ## 4、make编译
 59 | 
 60 | ```
 61 | make
 62 | ```
 63 | 
 64 | 此过程耗时较久，服务器CPU有6个核心，因此可通过make -j12加速，一般取CPU核心数的两倍
 65 | 
 66 | 查看CPU有几个核心，可以参考： [Linux 如何查看主机的cpu总个数和总内存_百度经验](https://jingyan.baidu.com/article/63f2362848492a0209ab3d49.html) 和 [make太慢了，加快编译速度的方法 make -j - CSDN博客](https://blog.csdn.net/gonghuihuihui/article/details/79091762)
 67 | 
 68 | 得到下面这个结果，说明编译成功
 69 | 
 70 | [![FF0Umj.md.png](https://s1.ax1x.com/2018/11/24/FF0Umj.md.png)](https://imgchr.com/i/FF0Umj)
 71 | 
 72 | 
 73 | 
 74 | ## 5、安装
 75 | 
 76 | ```
 77 | make install
 78 | ```
 79 | 
 80 | 结果：
 81 | 
 82 | [![FF0spT.md.png](https://s1.ax1x.com/2018/11/24/FF0spT.md.png)](https://imgchr.com/i/FF0spT)
 83 | 
 84 | 
 85 | 
 86 | ## 6、更新环境变量，确定OpenCV的调用版本
 87 | 
 88 | 在.bashrc文件中添加新的opencv路径：
 89 | 
 90 | ```
 91 | export PKG_CONFIG_PATH=/home/pascal/software/opencv341/lib/pkgconfig
 92 | export LD_LIBRARY_PATH=/home/pascal/software/opencv341/lib
 93 | ```
 94 | 
 95 | 更新bashrc：
 96 | 
 97 | ```
 98 | source ~/.bashrc
 99 | ```
100 | 
101 | 参考资料：
102 | 
103 | 主要流程参考： [Linux下面安装OpenCV3.3.0 - CSDN博客](https://blog.csdn.net/u013685902/article/details/78695094)
104 | 
105 | 环境变量设置参考：[ubuntu下opencv2.4.10 和 opencv 3.10 多版本切换问题 - CSDN博客](
106 | https://blog.csdn.net/kekong0713/article/details/53688131)
107 | 
108 | 整体流程可参考，步骤五release和步骤七cd ..不对:[ubuntu12.04 64位安装opencv-2.4.9 - CSDN博客](https://blog.csdn.net/dengshuai_super/article/details/51394118)
109 | 
110 | 
111 | 
112 | ## 7、验证：OpenCV版本查看
113 | 
114 | pkg-config可用与列举出某个库的相关信息，比如此库的路径、相关头文件路径等，常用命令如下：
115 | 
116 | ```
117 | # 头文件
118 | pkg-config --cflags opencv
119 | 
120 | # 库文件
121 | pkg-config --libs opencv
122 | 
123 | # 库版本
124 | pkg-config --modversion opencv
125 | ```
126 | 
127 | 经过重新设置，服务器上的OpenCV相关参数发生如下变化：
128 | 
129 | ```
130 | 版本：3.1.0 --> 3.4.1
131 | 库文件：/usr/local/lib --> /home/pascal/software/opencv341/lib
132 | 头文件：/usr/local/include/opencv /usr/local/include -->
133 | /home/pascal/software/opencv341/include/opencv /home/pascal/software/opencv341/include
134 | ```
135 | 
136 | 参见：[pkg-config 详解 - CSDN博客](https://blog.csdn.net/newchenxf/article/details/51750239)
137 | 
138 | 


--------------------------------------------------------------------------------
/make_yolo_dataset/coco_to_yolo.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # 使用说明
  3 | # 需要先安装coco tools
  4 | # git clone https://github.com/pdollar/coco.git
  5 | # cd coco/PythonAPI
  6 | # make install(可能会缺少相关依赖，根据提示安装依赖即可)
  7 | # 执行脚本前需在train2014和val2014目录下分别创建JPEGImages和labels目录，并将原来train2014和val2014目录下的图片移到JPEGImages下
  8 | # COCO数据集的filelist目录下会生成图片路径列表
  9 | # COCO数据集的子集的labels目录下会生成yolo需要的标注文件
 10 | 
 11 | 
 12 | from pycocotools.coco import COCO
 13 | import shutil
 14 | import os
 15 | 
 16 | 
 17 | # 将ROI的坐标转换为yolo需要的坐标
 18 | # size是图片的w和h
 19 | # box里保存的是ROI的坐标（x，y的最大值和最小值）
 20 | # 返回值为ROI中心点相对于图片大小的比例坐标，和ROI的w、h相对于图片大小的比例
 21 | def convert(size, box):
 22 |     dw = 1. / size[0]
 23 |     dh = 1. / size[1]
 24 |     x = box[0] + box[2] / 2.0
 25 |     y = box[1] + box[3] / 2.0
 26 |     w = box[2]
 27 |     h = box[3]
 28 |     x = x * dw
 29 |     w = w * dw
 30 |     y = y * dh
 31 |     h = h * dh
 32 |     return (x, y, w, h)
 33 | 
 34 | 
 35 | # 获取所需要的类名和id
 36 | # path为类名和id的对应关系列表的地址（标注文件中可能有很多类，我们只加载该path指向文件中的类）
 37 | # 返回值是一个字典，键名是类名，键值是id
 38 | def get_classes_and_index(path):
 39 |     D = {}
 40 |     f = open(path)
 41 |     for line in f:
 42 |         temp = line.rstrip().split(',', 2)
 43 |         print("temp[0]:" + temp[0] + "\n")
 44 |         print("temp[1]:" + temp[1] + "\n")
 45 |         D[temp[1]] = temp[0]
 46 |     return D
 47 | 
 48 | 
 49 | dataDir = '/home/pascal/COCO'  # COCO数据集所在的路径
 50 | dataType = 'train2017'  # 要转换的COCO数据集的子集名
 51 | annFile = '%s/annotations/instances_%s.json' % (dataDir, dataType)  # COCO数据集的标注文件路径
 52 | classes = get_classes_and_index('/home/pascal/COCO/coco_list.txt')
 53 | 
 54 | # labels 目录若不存在，创建labels目录。若存在，则清空目录
 55 | if not os.path.exists('%s/%s/labels/' % (dataDir, dataType)):
 56 |     os.makedirs('%s/%s/labels/' % (dataDir, dataType))
 57 | else:
 58 |     shutil.rmtree('%s/%s/labels/' % (dataDir, dataType))
 59 |     os.makedirs('%s/%s/labels/' % (dataDir, dataType))
 60 | 
 61 | # filelist 目录若不存在，创建filelist目录。
 62 | if not os.path.exists('%s/filelist/' % dataDir):
 63 |     os.makedirs('%s/filelist/' % dataDir)
 64 | 
 65 | coco = COCO(annFile)  # 加载解析标注文件
 66 | list_file = open('%s/filelist/%s.txt' % (dataDir, dataType), 'w')  # 数据集的图片list保存路径
 67 | 
 68 | imgIds = coco.getImgIds()  # 获取标注文件中所有图片的COCO Img ID
 69 | catIds = coco.getCatIds()  # 获取标注文件总所有的物体类别的COCO Cat ID
 70 | 
 71 | for imgId in imgIds:
 72 |     objCount = 0  # 一个标志位，用来判断该img是否包含我们需要的标注
 73 |     print('imgId :%s' % imgId)
 74 |     Img = coco.loadImgs(imgId)[0]  # 加载图片信息
 75 |     print('Img :%s' % Img)
 76 |     filename = Img['file_name']  # 获取图片名
 77 |     width = Img['width']  # 获取图片尺寸
 78 |     height = Img['height']  # 获取图片尺寸
 79 |     print('filename :%s, width :%s ,height :%s' % (filename, width, height))
 80 |     annIds = coco.getAnnIds(imgIds=imgId, catIds=catIds, iscrowd=None)  # 获取该图片对应的所有COCO物体类别标注ID
 81 |     print('annIds :%s' % annIds)
 82 |     for annId in annIds:
 83 |         anns = coco.loadAnns(annId)[0]  # 加载标注信息
 84 |         catId = anns['category_id']  # 获取该标注对应的物体类别的COCO Cat ID
 85 |         cat = coco.loadCats(catId)[0]['name']  # 获取该COCO Cat ID对应的物体种类名
 86 |         # print 'anns :%s' % anns
 87 |         # print 'catId :%s , cat :%s' % (catId,cat)
 88 | 
 89 |         # 如果该类名在我们需要的物体种类列表中，将标注文件转换为YOLO需要的格式
 90 |         if cat in classes:
 91 |             objCount = objCount + 1
 92 |             out_file = open('%s/%s/labels/%s.txt' % (dataDir, dataType, filename[:-4]), 'a')
 93 |             cls_id = classes[cat]  # 获取该类物体在yolo训练中的id
 94 |             box = anns['bbox']
 95 |             size = [width, height]
 96 |             bb = convert(size, box)
 97 |             out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
 98 |             out_file.close()
 99 | 
100 |     if objCount > 0:
101 |         list_file.write('%s/%s/JPEGImages/%s\n' % (dataDir, dataType, filename))
102 | 
103 | 
104 | list_file.close()


--------------------------------------------------------------------------------
/yolo_compute_mAP_on_VOC2007/reval_voc_py3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Adapt from ->
  4 | # --------------------------------------------------------
  5 | # Fast R-CNN
  6 | # Copyright (c) 2015 Microsoft
  7 | # Licensed under The MIT License [see LICENSE for details]
  8 | # Written by Ross Girshick
  9 | # --------------------------------------------------------
 10 | # <- Written by Yaping Sun
 11 | 
 12 | """Reval = re-eval. Re-evaluate saved detections."""
 13 | 
 14 | import os, sys, argparse
 15 | import numpy as np
 16 | import _pickle as cPickle
 17 | #import cPickle
 18 | 
 19 | from voc_eval_py3 import voc_eval
 20 | 
 21 | def parse_args():
 22 |     """
 23 |     Parse input arguments
 24 |     """
 25 |     parser = argparse.ArgumentParser(description='Re-evaluate results')
 26 |     parser.add_argument('output_dir', nargs=1, help='results directory',
 27 |                         type=str)
 28 |     parser.add_argument('--voc_dir', dest='voc_dir', default='/home/pascal/person_data2/VOCdevkit', type=str)
 29 |     parser.add_argument('--year', dest='year', default='2007', type=str)
 30 |     parser.add_argument('--image_set', dest='image_set', default='test', type=str)
 31 |     parser.add_argument('--classes', dest='class_file', default='data/voc.names', type=str)
 32 | 
 33 |     if len(sys.argv) == 1:
 34 |         parser.print_help()
 35 |         sys.exit(1)
 36 | 
 37 |     args = parser.parse_args()
 38 |     return args
 39 | 
 40 | def get_voc_results_file_template(image_set, out_dir = 'results'):
 41 |     filename = 'comp4_det_' + image_set + '_{:s}.txt'
 42 |     path = os.path.join(out_dir, filename)
 43 |     return path
 44 | 
 45 | def do_python_eval(devkit_path, year, image_set, classes, output_dir = 'results'):
 46 |     annopath = os.path.join(
 47 |         devkit_path,
 48 |         'VOC' + year,
 49 |         'Annotations',
 50 |         '{}.xml')
 51 |     imagesetfile = os.path.join(
 52 |         devkit_path,
 53 |         'VOC' + year,
 54 |         'ImageSets',
 55 |         'Main',
 56 |         image_set + '.txt')
 57 |     cachedir = os.path.join(devkit_path, 'annotations_cache')
 58 |     aps = []
 59 |     # The PASCAL VOC metric changed in 2010
 60 |     use_07_metric = True if int(year) < 2010 else False
 61 |     print('VOC07 metric? ' + ('Yes' if use_07_metric else 'No'))
 62 |     print('devkit_path=',devkit_path,', year = ',year)
 63 | 
 64 |     if not os.path.isdir(output_dir):
 65 |         os.mkdir(output_dir)
 66 |     for i, cls in enumerate(classes):
 67 |         if cls == '__background__':
 68 |             continue
 69 |         filename = get_voc_results_file_template(image_set).format(cls)
 70 |         rec, prec, ap = voc_eval(
 71 |             filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
 72 |             use_07_metric=use_07_metric)
 73 |         aps += [ap]
 74 |         print('AP for {} = {:.4f}'.format(cls, ap))
 75 |         with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
 76 |             cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
 77 |     print('Mean AP = {:.4f}'.format(np.mean(aps)))
 78 |     print('~~~~~~~~')
 79 |     print('Results:')
 80 |     for ap in aps:
 81 |         print('{:.3f}'.format(ap))
 82 |     print('{:.3f}'.format(np.mean(aps)))
 83 |     print('~~~~~~~~')
 84 |     print('')
 85 |     print('--------------------------------------------------------------')
 86 |     print('Results computed with the **unofficial** Python eval code.')
 87 |     print('Results should be very close to the official MATLAB eval code.')
 88 |     print('-- Thanks, The Management')
 89 |     print('--------------------------------------------------------------')
 90 | 
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |     args = parse_args()
 95 | 
 96 |     output_dir = os.path.abspath(args.output_dir[0])
 97 |     with open(args.class_file, 'r') as f:
 98 |         lines = f.readlines()
 99 | 
100 |     classes = [t.strip('\n') for t in lines]
101 | 
102 |     print('Evaluating detections')
103 |     do_python_eval(args.voc_dir, args.year, args.image_set, classes, output_dir)


--------------------------------------------------------------------------------
/make_yolo_dataset/xml2json.py:
--------------------------------------------------------------------------------
  1 | import xml.etree.ElementTree as ET
  2 | import os
  3 | import json
  4 | 
  5 | coco = dict()
  6 | coco['images'] = []
  7 | coco['type'] = 'instances'
  8 | coco['annotations'] = []
  9 | coco['categories'] = []
 10 | 
 11 | category_set = dict()
 12 | image_set = set()
 13 | 
 14 | category_item_id = 0
 15 | image_id = 20180000000
 16 | annotation_id = 0
 17 | 
 18 | 
 19 | def addCatItem(name):
 20 |     global category_item_id
 21 |     category_item = dict()
 22 |     category_item['supercategory'] = 'none'
 23 |     category_item_id += 1
 24 |     category_item['id'] = category_item_id
 25 |     category_item['name'] = name
 26 |     coco['categories'].append(category_item)
 27 |     category_set[name] = category_item_id
 28 |     return category_item_id
 29 | 
 30 | 
 31 | def addImgItem(file_name, size):
 32 |     global image_id
 33 |     if file_name is None:
 34 |         raise Exception('Could not find filename tag in xml file.')
 35 |     if size['width'] is None:
 36 |         raise Exception('Could not find width tag in xml file.')
 37 |     if size['height'] is None:
 38 |         raise Exception('Could not find height tag in xml file.')
 39 |     image_id += 1
 40 |     image_item = dict()
 41 |     image_item['id'] = image_id
 42 |     image_item['file_name'] = file_name
 43 |     image_item['width'] = size['width']
 44 |     image_item['height'] = size['height']
 45 |     coco['images'].append(image_item)
 46 |     image_set.add(file_name)
 47 |     return image_id
 48 | 
 49 | 
 50 | def addAnnoItem(object_name, image_id, category_id, bbox):
 51 |     global annotation_id
 52 |     annotation_item = dict()
 53 |     annotation_item['segmentation'] = []
 54 |     seg = []
 55 |     # bbox[] is x,y,w,h
 56 |     # left_top
 57 |     seg.append(bbox[0])
 58 |     seg.append(bbox[1])
 59 |     # left_bottom
 60 |     seg.append(bbox[0])
 61 |     seg.append(bbox[1] + bbox[3])
 62 |     # right_bottom
 63 |     seg.append(bbox[0] + bbox[2])
 64 |     seg.append(bbox[1] + bbox[3])
 65 |     # right_top
 66 |     seg.append(bbox[0] + bbox[2])
 67 |     seg.append(bbox[1])
 68 | 
 69 |     annotation_item['segmentation'].append(seg)
 70 | 
 71 |     annotation_item['area'] = bbox[2] * bbox[3]
 72 |     annotation_item['iscrowd'] = 0
 73 |     annotation_item['ignore'] = 0
 74 |     annotation_item['image_id'] = image_id
 75 |     annotation_item['bbox'] = bbox
 76 |     annotation_item['category_id'] = category_id
 77 |     annotation_id += 1
 78 |     annotation_item['id'] = annotation_id
 79 |     coco['annotations'].append(annotation_item)
 80 | 
 81 | 
 82 | def parseXmlFiles(xml_path):
 83 |     for f in os.listdir(xml_path):
 84 |         if not f.endswith('.xml'):
 85 |             continue
 86 | 
 87 |         bndbox = dict()
 88 |         size = dict()
 89 |         current_image_id = None
 90 |         current_category_id = None
 91 |         file_name = None
 92 |         size['width'] = None
 93 |         size['height'] = None
 94 |         size['depth'] = None
 95 | 
 96 |         xml_file = os.path.join(xml_path, f)
 97 |         print(xml_file)
 98 | 
 99 |         tree = ET.parse(xml_file)
100 |         root = tree.getroot()
101 |         if root.tag != 'annotation':
102 |             raise Exception('pascal voc xml root element should be annotation, rather than {}'.format(root.tag))
103 | 
104 |         # elem is <folder>, <filename>, <size>, <object>
105 |         for elem in root:
106 |             current_parent = elem.tag
107 |             current_sub = None
108 |             object_name = None
109 | 
110 |             if elem.tag == 'folder':
111 |                 continue
112 | 
113 |             if elem.tag == 'filename':
114 |                 file_name = elem.text
115 |                 if file_name in category_set:
116 |                     raise Exception('file_name duplicated')
117 | 
118 |             # add img item only after parse <size> tag
119 |             elif current_image_id is None and file_name is not None and size['width'] is not None:
120 |                 if file_name not in image_set:
121 |                     current_image_id = addImgItem(file_name, size)
122 |                     print('add image with {} and {}'.format(file_name, size))
123 |                 else:
124 |                     raise Exception('duplicated image: {}'.format(file_name))
125 |                     # subelem is <width>, <height>, <depth>, <name>, <bndbox>
126 |             for subelem in elem:
127 |                 bndbox['xmin'] = None
128 |                 bndbox['xmax'] = None
129 |                 bndbox['ymin'] = None
130 |                 bndbox['ymax'] = None
131 | 
132 |                 current_sub = subelem.tag
133 |                 if current_parent == 'object' and subelem.tag == 'name':
134 |                     object_name = subelem.text
135 |                     if object_name not in category_set:
136 |                         current_category_id = addCatItem(object_name)
137 |                     else:
138 |                         current_category_id = category_set[object_name]
139 | 
140 |                 elif current_parent == 'size':
141 |                     if size[subelem.tag] is not None:
142 |                         raise Exception('xml structure broken at size tag.')
143 |                     size[subelem.tag] = int(subelem.text)
144 | 
145 |                 # option is <xmin>, <ymin>, <xmax>, <ymax>, when subelem is <bndbox>
146 |                 for option in subelem:
147 |                     if current_sub == 'bndbox':
148 |                         if bndbox[option.tag] is not None:
149 |                             raise Exception('xml structure corrupted at bndbox tag.')
150 |                         bndbox[option.tag] = int(option.text)
151 | 
152 |                 # only after parse the <object> tag
153 |                 if bndbox['xmin'] is not None:
154 |                     if object_name is None:
155 |                         raise Exception('xml structure broken at bndbox tag')
156 |                     if current_image_id is None:
157 |                         raise Exception('xml structure broken at bndbox tag')
158 |                     if current_category_id is None:
159 |                         raise Exception('xml structure broken at bndbox tag')
160 |                     bbox = []
161 |                     # x
162 |                     bbox.append(bndbox['xmin'])
163 |                     # y
164 |                     bbox.append(bndbox['ymin'])
165 |                     # w
166 |                     bbox.append(bndbox['xmax'] - bndbox['xmin'])
167 |                     # h
168 |                     bbox.append(bndbox['ymax'] - bndbox['ymin'])
169 |                     print('add annotation with {},{},{},{}'.format(object_name, current_image_id, current_category_id,
170 |                                                                    bbox))
171 |                     addAnnoItem(object_name, current_image_id, current_category_id, bbox)
172 | 
173 | 
174 | if __name__ == '__main__':
175 |     xml_path = 'annoval'
176 |     json_file = 'instanceval.json'
177 |     parseXmlFiles(xml_path)
178 |     json.dump(coco, open(json_file, 'w'))


--------------------------------------------------------------------------------
/yolo_compute_mAP_on_VOC2007/voc_eval_py3.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | #import cPickle
 10 | import _pickle as cPickle
 11 | import numpy as np
 12 | 
 13 | def parse_rec(filename):
 14 |     """ Parse a PASCAL VOC xml file """
 15 |     tree = ET.parse(filename)
 16 |     objects = []
 17 |     for obj in tree.findall('object'):
 18 |         obj_struct = {}
 19 |         obj_struct['name'] = obj.find('name').text
 20 |         #obj_struct['pose'] = obj.find('pose').text
 21 |         #obj_struct['truncated'] = int(obj.find('truncated').text)
 22 |         obj_struct['difficult'] = int(obj.find('difficult').text)
 23 |         bbox = obj.find('bndbox')
 24 |         obj_struct['bbox'] = [int(bbox.find('xmin').text),
 25 |                               int(bbox.find('ymin').text),
 26 |                               int(bbox.find('xmax').text),
 27 |                               int(bbox.find('ymax').text)]
 28 |         objects.append(obj_struct)
 29 | 
 30 |     return objects
 31 | 
 32 | def voc_ap(rec, prec, use_07_metric=False):
 33 |     """ ap = voc_ap(rec, prec, [use_07_metric])
 34 |     Compute VOC AP given precision and recall.
 35 |     If use_07_metric is true, uses the
 36 |     VOC 07 11 point method (default:False).
 37 |     """
 38 |     if use_07_metric:
 39 |         # 11 point metric
 40 |         ap = 0.
 41 |         for t in np.arange(0., 1.1, 0.1):
 42 |             if np.sum(rec >= t) == 0:
 43 |                 p = 0
 44 |             else:
 45 |                 p = np.max(prec[rec >= t])
 46 |             ap = ap + p / 11.
 47 |     else:
 48 |         # correct AP calculation
 49 |         # first append sentinel values at the end
 50 |         mrec = np.concatenate(([0.], rec, [1.]))
 51 |         mpre = np.concatenate(([0.], prec, [0.]))
 52 | 
 53 |         # compute the precision envelope
 54 |         for i in range(mpre.size - 1, 0, -1):
 55 |             mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 56 | 
 57 |         # to calculate area under PR curve, look for points
 58 |         # where X axis (recall) changes value
 59 |         i = np.where(mrec[1:] != mrec[:-1])[0]
 60 | 
 61 |         # and sum (\Delta recall) * prec
 62 |         ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 63 |     return ap
 64 | 
 65 | def voc_eval(detpath,
 66 |              annopath,
 67 |              imagesetfile,
 68 |              classname,
 69 |              cachedir,
 70 |              ovthresh=0.5,
 71 |              use_07_metric=False):
 72 |     """rec, prec, ap = voc_eval(detpath,
 73 |                                 annopath,
 74 |                                 imagesetfile,
 75 |                                 classname,
 76 |                                 [ovthresh],
 77 |                                 [use_07_metric])
 78 |     Top level function that does the PASCAL VOC evaluation.
 79 |     detpath: Path to detections
 80 |         detpath.format(classname) should produce the detection results file.
 81 |     annopath: Path to annotations
 82 |         annopath.format(imagename) should be the xml annotations file.
 83 |     imagesetfile: Text file containing the list of images, one image per line.
 84 |     classname: Category name (duh)
 85 |     cachedir: Directory for caching the annotations
 86 |     [ovthresh]: Overlap threshold (default = 0.5)
 87 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 88 |         (default False)
 89 |     """
 90 |     # assumes detections are in detpath.format(classname)
 91 |     # assumes annotations are in annopath.format(imagename)
 92 |     # assumes imagesetfile is a text file with each line an image name
 93 |     # cachedir caches the annotations in a pickle file
 94 | 
 95 |     # first load gt
 96 |     if not os.path.isdir(cachedir):
 97 |         os.mkdir(cachedir)
 98 |     cachefile = os.path.join(cachedir, 'annots.pkl')
 99 |     # read list of images
100 |     with open(imagesetfile, 'r') as f:
101 |         lines = f.readlines()
102 |     imagenames = [x.strip() for x in lines]
103 | 
104 |     if not os.path.isfile(cachefile):
105 |         # load annots
106 |         recs = {}
107 |         for i, imagename in enumerate(imagenames):
108 |             recs[imagename] = parse_rec(annopath.format(imagename))
109 |             #if i % 100 == 0:
110 |                 #print('Reading annotation for {:d}/{:d}').format(i + 1, len(imagenames))
111 |         # save
112 |         #print('Saving cached annotations to {:s}').format(cachefile)
113 |         with open(cachefile, 'wb') as f:
114 |             cPickle.dump(recs, f)
115 |     else:
116 |         # load
117 |         print('!!! cachefile = ',cachefile)
118 |         with open(cachefile, 'rb') as f:
119 |             recs = cPickle.load(f)
120 | 
121 |     # extract gt objects for this class
122 |     class_recs = {}
123 |     npos = 0
124 |     for imagename in imagenames:
125 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]
126 |         bbox = np.array([x['bbox'] for x in R])
127 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
128 |         det = [False] * len(R)
129 |         npos = npos + sum(~difficult)
130 |         class_recs[imagename] = {'bbox': bbox,
131 |                                  'difficult': difficult,
132 |                                  'det': det}
133 | 
134 |     # read dets
135 |     detfile = detpath.format(classname)
136 |     with open(detfile, 'r') as f:
137 |         lines = f.readlines()
138 | 
139 |     splitlines = [x.strip().split(' ') for x in lines]
140 |     image_ids = [x[0] for x in splitlines]
141 |     confidence = np.array([float(x[1]) for x in splitlines])
142 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
143 | 
144 |     # sort by confidence
145 |     sorted_ind = np.argsort(-confidence)
146 |     sorted_scores = np.sort(-confidence)
147 |     BB = BB[sorted_ind, :]
148 |     image_ids = [image_ids[x] for x in sorted_ind]
149 | 
150 |     # go down dets and mark TPs and FPs
151 |     nd = len(image_ids)
152 |     tp = np.zeros(nd)
153 |     fp = np.zeros(nd)
154 |     for d in range(nd):
155 |         R = class_recs[image_ids[d]]
156 |         bb = BB[d, :].astype(float)
157 |         ovmax = -np.inf
158 |         BBGT = R['bbox'].astype(float)
159 | 
160 |         if BBGT.size > 0:
161 |             # compute overlaps
162 |             # intersection
163 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
164 |             iymin = np.maximum(BBGT[:, 1], bb[1])
165 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
166 |             iymax = np.minimum(BBGT[:, 3], bb[3])
167 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
168 |             ih = np.maximum(iymax - iymin + 1., 0.)
169 |             inters = iw * ih
170 | 
171 |             # union
172 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
173 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
174 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
175 | 
176 |             overlaps = inters / uni
177 |             ovmax = np.max(overlaps)
178 |             jmax = np.argmax(overlaps)
179 | 
180 |         if ovmax > ovthresh:
181 |             if not R['difficult'][jmax]:
182 |                 if not R['det'][jmax]:
183 |                     tp[d] = 1.
184 |                     R['det'][jmax] = 1
185 |                 else:
186 |                     fp[d] = 1.
187 |         else:
188 |             fp[d] = 1.
189 | 
190 |     # compute precision recall
191 |     fp = np.cumsum(fp)
192 |     tp = np.cumsum(tp)
193 |     rec = tp / float(npos)
194 |     # avoid divide by zero in case the first detection matches a difficult
195 |     # ground truth
196 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
197 |     ap = voc_ap(rec, prec, use_07_metric)
198 | 
199 |     return rec, prec, ap


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## YOLOv3行人检测
  2 | 
  3 | 本脚本集合主要是针对YOLOv3的两个主流版本（[AlexeyAB/darknet](https://github.com/AlexeyAB/darknet) & [pjreddie/darknet](https://github.com/pjreddie/darknet)），本身不包含YOLOv3的代码和配置文件，但是根据指引可以完成一个效果较好的行人检测系统。
  4 | 
  5 | 目前主要是以下几个功能：
  6 | 
  7 | 0. 将YOLOv3常用的网址和资料归纳整理了一下；
  8 | 1. 提供代码，帮助从VOC2007/VOC2012/COCO等数据集中提取出person类图片，并转换标注。这里提取VOC数据集时默认保留了全部difficult=1的图片，效果会更好，具体请参见[Issue #1200](https://github.com/AlexeyAB/darknet/issues/1200)；
  9 | 2. 提供代码，帮助计算mAP正确率；
 10 | 3. 建议使用tee指令保存训练日志，可使用本文提供的脚本实现训练过程可视化；
 11 | 
 12 | 
 13 | ## 效果对比
 14 | 
 15 | YOLO_mine（基于AB版本，只检测行人）
 16 | 
 17 | ![kite-7-final](https://github.com/pascal1129/yolo_person_detect/blob/master/images/kite-7-final.jpg)
 18 | 
 19 | YOLO_pj（pj版本，所有类别全检测）
 20 | 
 21 | ![kite-pj](https://github.com/pascal1129/yolo_person_detect/blob/master/images/kite-pj.jpg)
 22 | 
 23 | 
 24 | 
 25 | ## 文件结构
 26 | 
 27 | ```
 28 | yolo_person_detect
 29 | |
 30 | │  README.md
 31 | │
 32 | ├─make_yolo_dataset
 33 | │  │  helmet_to_yolo.py                     # 某次比赛用到的转换代码
 34 | │  │  show_voc_xml.py                       # 可视化VOC数据集
 35 | │  │  show_yolo_label.py                    # 可视化YOLO数据集
 36 | │  │  xml2json.py                           # 
 37 | │  │  coco_to_yolo.py                       # COCO --> YOLO
 38 | │  │
 39 | │  ├─YOLO_VOC2007                           # VOC2007
 40 | │  │      extract_person.py                 # 从VOC2007数据集中提取person图片
 41 | │  │      voc_label.py                      # 将提取到的person图片转为YOLO格式
 42 | │  │
 43 | │  └─YOLO_VOC2007_2012                      # VOC2007 + VOC2012
 44 | │          extract_person_2007.py
 45 | │          extract_person_2012.py
 46 | │          voc_label.py
 47 | │
 48 | ├─yolo_compute_mAP_on_VOC2007               # 在VOC2007上测试性能
 49 | │      reval_voc_py3.py
 50 | │      voc_eval_py3.py
 51 | │
 52 | └─yolo_loss_analyse
 53 |     │  analyse.py                           # 训练过程可视化代码
 54 |     │  result.png                           # 可视化训练过程
 55 |     └─loss
 56 |             train7-loss.txt                 # 示例训练日志
 57 | ```
 58 | 
 59 | 
 60 | 
 61 | ## 官方Demo运行
 62 | 
 63 | - 下载源代码、预训练权重
 64 | 
 65 | ```Shell
 66 | git clone https://github.com/AlexeyAB/darknet.git
 67 | cd darknet/
 68 | wget https://pjreddie.com/media/files/yolov3.weights
 69 | ```
 70 | 
 71 | - 如果需要编译OpenCV，可参见 [Pascal129/yolo_person_detect: Linux下OpenCV编译和指定版本调用](https://github.com/pascal1129/yolo_person_detect/blob/master/OpenCV_make.md)
 72 | - 修改Makefile并编译，可以加上 -j8 等参数设定多CPU编译
 73 | 
 74 | ```Shell
 75 | vim Makefile	
 76 | make
 77 | ```
 78 | 
 79 | 注意：OpenCV版本号亲测3.40可用，但是3.41不可用，可参见[YOLOv3的Darknet在OpenCV下编译出错填坑](https://zhuanlan.zhihu.com/p/36933700)
 80 | 
 81 | - 试运行图片检测demo
 82 | ```
 83 | ./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg
 84 | ```
 85 | 
 86 | - 试运行视频检测demo
 87 | 
 88 | ```
 89 | ./darknet detector demo cfg/coco.data cfg/yolov3.cfg yolov3.weights <video file>
 90 | ```
 91 | 
 92 | 
 93 | 
 94 | ## 数据集准备
 95 | 
 96 | 处理时，默认所有的train = train+val，不区分二者，2007单独还有个test
 97 | 
 98 | #### 0、下载VOC2007+2012数据集
 99 | 
100 | 下载数据集
101 | 
102 |     wget <http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar>
103 |     wget <http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar>
104 |     wget <http://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar>
105 | 
106 | 解压缩
107 | 
108 |     tar xvf VOCtrainval_06-Nov-2007.tar
109 |     tar xvf VOCtest_06-Nov-2007.tar
110 |     tar xvf VOCtrainval_11-May-2012.tar
111 | 
112 | #### 1、通过extract_person.py提取含人数据
113 | 
114 | 分别运行2007和2012的提取代码
115 | 
116 | #### 2、通过voc_label.py转化voc数据格式为yolo支持的格式
117 | 会在脚本所在文件夹产生相应的txt文本，存储数据集地址信息：
118 | 
119 |     2007_train.txt 
120 |     2007_test.txt
121 |     2012_train.txt
122 | 
123 | 可根据情况进行配置，比如说不想要测试集，那就整合下训练集、测试集：
124 | 
125 | 	cat 2007_train.txt 2012_train.txt > train.txt
126 | 
127 | 
128 | 
129 | ## 配置文件
130 | 
131 | #### 0、下载源代码，下载预训练权重
132 | 
133 | 	git clone https://github.com/AlexeyAB/darknet.git
134 | 	wget https://pjreddie.com/media/files/darknet53.conv.74
135 | 
136 | #### 1、配置Makefile
137 | 
138 | #### 2、配置cfg/voc.data
139 | ```
140 | classes= 1
141 | train  = /home/pascal/person_data2/train.txt
142 | valid  = /home/pascal/person_data2/2007_test.txt
143 | names = data/voc.names
144 | backup = backup
145 | ```
146 | 
147 | #### 3、配置data/voc.names
148 | 
149 | 	person
150 | #### 4、新建backup文件夹
151 | 
152 | 	mkdir backup
153 | 
154 | #### 5、配置cfg/yolov3-voc.cfg
155 | 
156 | 	batch, sub按需修改
157 | 	一共三个YOLO层，均需要修改：
158 | 	classes=1
159 | 	filters=18		3*(1+1+4)=18
160 | 	
161 | 	# filters=(classes + coords + 1)*<number of mask>
162 | 
163 | #### 6、make编译
164 | 
165 | #### 7、开始训练，并留下日志
166 | 
167 |     ./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg ../darknet53.conv.74 -gpus 0,1 | tee -a helmet1.txt
168 | 
169 | #### 8、恢复训练
170 | 
171 | 	./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc.backup -gpus 0,1 | tee -a train7.log
172 | 
173 | #### 9、单张图片测试（需要将batch、subvision均修改为1）
174 | 
175 | 	./darknet detector test cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_final.weights test_data/p0.jpg
176 | 
177 | #### 10、性能检测
178 | 
179 | 计算mAp
180 | 
181 | 	./darknet detector map cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_80172.weights
182 | 
183 | 计算recall（2097张的结果）
184 | 
185 | 	./darknet detector recall cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_final.weights 
186 | 
187 | VOC2007test
188 | 
189 | ```Shell
190 | mkdir results
191 | #（PJ版本需要这一步）
192 | # (会在/results生成默认的comp4_det_test_person.txt，这是在VOC2007 test上的结果)
193 | ./darknet detector valid cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_final.weights -gpu 0,1
194 | 
195 | #在 https://github.com/AlexeyAB/darknet/tree/master/scripts 下载voc_eval_py3.py reval_voc_py3.py ，在./执行
196 | python reval_voc_py3.py output_dir='./'
197 | ```
198 | 
199 | 
200 | 
201 | ## 训练常用指令
202 | 
203 | 训练：加上了tee指令把训练日志保存到txt文本
204 | 
205 | ```
206 | ./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg darknet53.conv.74 -gpus 0,1 |tee -a train7.txt
207 | ```
208 | 
209 | 恢复训练
210 | 
211 | ```
212 | ./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc.backup -gpus 0,1 tee -a train7.txt
213 | ```
214 | 
215 | 测试
216 | 
217 | ```
218 | ./darknet detector test cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_50000.weights ../test_data/p2.jpg
219 | ```
220 | 
221 | 
222 | 
223 | ## VOC数据集格式
224 | 
225 | 以上述VOC2007+VOC2012数据集为例，以下均为处理过的数据集：
226 | 
227 |     VOCdevkit/
228 |     ├── VOC2007
229 |     │   ├── Annotations     原始的VOC标注信息
230 |     │   ├── ImageSets		
231 |     │   │   └── Main		VOC的类标签，和人为提取的样本索引
232 |     │   ├── JPEGImages	    图片文件夹
233 |     │   └── labels			VOC-->YOLO格式的标注信息
234 |     └── VOC2012
235 |          ├── Annotations
236 |          ├── ImageSets
237 |          │   └── Main
238 |          ├── JPEGImages
239 |          └── labels
240 | 
241 | #### VOC的xml格式
242 | 
243 | 核心部分如下:
244 | 
245 | 	<annotation>
246 | 		<object>
247 | 	        <name>person</name>
248 | 	        <difficult>0</difficult>
249 | 	        <bndbox>
250 | 	            <xmin>xxx</xmin>
251 | 	            <ymin>xxx</ymin>
252 | 	            <xmax>xxx</xmax>
253 | 	            <ymax>xxx</ymax>
254 | 			</bndbox>
255 | 		</object>
256 | 	</annotation>
257 | 
258 | #### VOC的图片格式
259 | 
260 | 行列分布同pillow.Image，先行后列
261 | 
262 | 
263 | 
264 | ## COCO数据集格式
265 | 
266 | 数据集格式
267 | 
268 | 	COCO
269 | 	├── annotations
270 | 	├── filelist			训练图片位置
271 | 	└── train2017
272 | 	    ├── JPEGImages	图片
273 | 	    └── labels			生成的YOLO格式标签
274 | 
275 | [COCO数据集下载](<http://cocodataset.org/#download>)
276 | 
277 | [COCO-->YOLO格式转换，代码包含了操作介绍](<https://github.com/PaulChongPeng/darknet/blob/master/tools/coco_label.py>)
278 | 
279 | 
280 | 
281 | ## 数据集统计
282 | 
283 | 数据集        | 训练集（person）| 测试集（person）
284 | ----------------| ---|---
285 | VOC2007   | 5011（2095）  |4952（2097）
286 | VOC2012   | 17125（4374）|未统计
287 | COCO2017| 118287（64115）|未统计
288 | 
289 | \* 括号里面为person类图片数量
290 | 
291 | 
292 | 
293 | ## 训练过程
294 | 
295 | 数据集：VOC2007+VOC2012+COCO2017
296 | 
297 | 硬件环境：GTX1080ti*2, Ubuntu16.04, CUDA9
298 | 
299 | 训练迭代数：8w iters
300 | 
301 | 训练技巧：参见[how-to-train-to-detect-your-custom-objects](https://github.com/AlexeyAB/darknet#how-to-train-to-detect-your-custom-objects)
302 | 
303 | 
304 | 
305 | ![训练过程](https://github.com/pascal1129/yolo_person_detect/blob/master/images/loss_analyse_result.png)
306 | 
307 | 上图是脚本[analyse.py](https://github.com/pascal1129/yolo_person_detect/blob/master/yolo_loss_analyse/analyse.py)对训练日志[train7-loss.txt](https://github.com/pascal1129/yolo_person_detect/blob/master/yolo_loss_analyse/loss/train7-loss.txt)的训练过程可视化
308 | 
309 | 配置：batch=64/16，总计8w次迭代，在原来的5w次之后，追加2w次0.001和1w次0.0001，0.00001
310 | 
311 | 耗时：5h/万次迭代
312 | 
313 | 结果：loss=0.98
314 | 
315 | map        | p| r|f1|IOU
316 | ----------------| ---|---|--|--
317 | 0.8552   | 0.85  |0.84|0.84|69.4%
318 | 
319 | 
320 | 
321 | ## 参考资料
322 | 
323 | [yolov3训练的集大成者](<https://blog.csdn.net/lilai619/article/details/79695109>)
324 | 
325 | [配置文件的设定参考](<https://blog.csdn.net/helloworld1213800/article/details/79749359>)
326 | 
327 | [YOLO官网](<https://pjreddie.com/darknet/yolo/>)
328 | 
329 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------