├── LICENSE ├── MOTEvaluate ├── __init__.py ├── evaluate.py ├── evaluate_pipeline.py └── evaluate_utils │ ├── __init__.py │ ├── bbox.py │ ├── convert.py │ ├── io.py │ └── measurements.py ├── MVI_39401_track_fps12.gif ├── MVI_39501_track_fps12.gif ├── MVI_40855_track_fps12.gif ├── README.md ├── auto_weighted_loss.py ├── cfg ├── mobile-yolo-3l.cfg ├── yolov4-pacsp-mish.cfg ├── yolov4-pacsp-s-mish.cfg ├── yolov4-pacsp-s.cfg ├── yolov4-pacsp-x-mish.cfg ├── yolov4-pacsp-x.cfg ├── yolov4-pacsp.cfg ├── yolov4-paspp-mcmot.cfg ├── yolov4-paspp.cfg └── yolov4-tiny.cfg ├── data ├── coco.data ├── coco.names ├── coco1.data ├── coco1.txt ├── coco16.data ├── coco16.txt ├── coco1cls.data ├── coco1cls.txt ├── coco2014.data ├── coco2017.data ├── coco64.data ├── coco64.txt ├── coco_paper.names ├── get_coco2014.sh ├── get_coco2017.sh ├── mcmot.data ├── mcmot.names ├── mcmot_det.data ├── mcmot_det_old.train ├── mcmot_det_train_old.txt ├── test1.txt ├── test2.txt └── train1.txt ├── demo.py ├── detect.py ├── mAPEvaluate ├── DetectImgAndWriteResultToXml.py ├── ReadAndSaveDarknetDetRes.py ├── ReadAnnotations.py ├── TestmApDetect.py ├── cmp_det_label.py ├── cmp_det_label_sf.py ├── darknet.py ├── darknet_ori_diou_cfg.py ├── findImgByObjectType.py ├── findImgByObjectType_zhou.py ├── model_analysis_PLM.py ├── model_analysis_half.py ├── model_analysis_v4_coco.py ├── model_analysis_v4all.py ├── model_analysis_v4half.py └── voc_eval.py ├── models.py ├── requirements.txt ├── test.py ├── test5_track.gif ├── test_half.py ├── tracker ├── basetrack.py ├── matching.py └── multitracker.py ├── tracking_utils ├── evaluation.py ├── io.py ├── kalman_filter.py ├── log.py ├── nms.py ├── parse_config.py ├── timer.py ├── utils.py └── visualization.py ├── train.py ├── utils ├── __init__.py ├── adabound.py ├── datasets.py ├── evolve.sh ├── gcp.sh ├── gen_dataset_mcmot.py ├── google_utils.py ├── layers.py ├── parse_config.py ├── process_mcmot_dataset.py ├── torch_utils.py └── utils.py └── yolov4-tiny-3l_no_group_id_no_upsample.cfg /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Even 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MOTEvaluate/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MOTEvaluate/__init__.py -------------------------------------------------------------------------------- /MOTEvaluate/evaluate_pipeline.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import os 4 | from MOTEvaluate.evaluate_utils.convert import convert_seqs 5 | from MOTEvaluate.evaluate import evaluate_mcmot_seqs 6 | from demo import DemoRunner 7 | 8 | 9 | # build evaluation pipeline for test set 10 | def evaluate_test_set(test_root): 11 | """ 12 | :param test_root: 13 | :return: 14 | """ 15 | # ---------- set Project root 16 | ROOT = '/mnt/diskb/even/YOLOV4' 17 | 18 | # ---------- init demo runner 19 | demo = DemoRunner() 20 | 21 | # ---------- set object class names 22 | demo.opt.names = ROOT + '/data/mcmot.names' 23 | 24 | # ----------- set weights and cfg file for different models 25 | # demo.opt.cfg = ROOT + '/cfg/' + 'yolov4-tiny-3l_no_group_id_no_upsample.cfg' 26 | # demo.opt.weights = ROOT + '/weights/' + 'v4_tiny3l_no_upsample_track_last.pt' 27 | 28 | # demo.opt.cfg = ROOT + '/cfg/' + 'yolov4_mobilev2_2l.cfg' 29 | # demo.opt.weights = ROOT + '/weights/' + 'track_last.pt' 30 | 31 | demo.opt.cfg = ROOT + '/cfg/' + 'yolov4-tiny-3l_no_group_id_no_upsample.cfg' 32 | demo.opt.weights = ROOT + '/weights/' + 'track_last.pt' 33 | 34 | if not os.path.isfile(demo.opt.cfg): 35 | print('[Err]: invalid cfg file.') 36 | return 37 | if not os.path.isfile(demo.opt.weights): 38 | print('[Err]: invalid weight file.') 39 | return 40 | 41 | print('Cfg: {:s}.'.format(demo.opt.cfg)) 42 | print('Weights: {:s}.\n'.format(demo.opt.weights)) 43 | 44 | # ----------- set test input videos' dir and tracking results dir 45 | demo.opt.videos = '/mnt/diskb/even/dataset/MCMOT_Evaluate' 46 | demo.opt.save_img_dir = demo.opt.videos 47 | 48 | # ---------- set standard out fps and interval: set test fps 49 | demo.opt.outFPS = 12 50 | demo.opt.interval = 1 51 | 52 | # ---------- labels preparation 53 | # Check test root for video and dark label format label file(txt) 54 | # Convert dark-label label file to mot16 format 55 | convert_seqs(seq_root=test_root, 56 | interval=demo.opt.interval, 57 | default_fps=demo.opt.outFPS, 58 | one_plus=True) 59 | # ---------- 60 | 61 | # ---------- Run tracking 62 | # Call mcmot-yolov4(demo.py) to do tracking(generate results.txt) 63 | # set task mode and output results type 64 | demo.opt.task = 'track' 65 | demo.opt.output_type = 'txts' 66 | 67 | # run tracking and output results.txt(MOT16) 68 | demo.run() 69 | # ---------- 70 | 71 | # --------- Run evaluation 72 | out_fps = demo.opt.outFPS // int(demo.opt.interval) 73 | evaluate_mcmot_seqs(test_root, default_fps=out_fps) 74 | # --------- 75 | 76 | 77 | if __name__ == '__main__': 78 | evaluate_test_set(test_root='/mnt/diskb/even/dataset/MCMOT_Evaluate') 79 | print('Done.') 80 | -------------------------------------------------------------------------------- /MOTEvaluate/evaluate_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MOTEvaluate/evaluate_utils/__init__.py -------------------------------------------------------------------------------- /MOTEvaluate/evaluate_utils/bbox.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2D MOT2016 Evaluation Toolkit 3 | An python reimplementation of toolkit in 4 | 2DMOT16(https://motchallenge.net/data/MOT16/) 5 | 6 | This file computes bounding box overlap 7 | 8 | (C) Yiwen Liu(765305261@qq.com), 2020-10 9 | """ 10 | import numpy as np 11 | 12 | 13 | def bbox_overlap(ex_box, gt_box): 14 | ex_box = ex_box.reshape(-1, 4) 15 | gt_box = gt_box.reshape(-1, 4) 16 | paded_gt = np.tile(gt_box, [ex_box.shape[0], 1]) 17 | insec = intersection(ex_box, paded_gt) 18 | 19 | uni = area_sum(ex_box, paded_gt) - insec 20 | return insec / uni 21 | 22 | 23 | def intersection(a, b): 24 | x = np.maximum(a[:, 0], b[:, 0]) 25 | y = np.maximum(a[:, 1], b[:, 1]) 26 | w = np.minimum(a[:, 2], b[:, 2]) - x 27 | h = np.minimum(a[:, 3], b[:, 3]) - y 28 | return np.maximum(w, 0) * np.maximum(h, 0) 29 | 30 | 31 | def area_sum(a, b): 32 | return (a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]) + \ 33 | (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) 34 | -------------------------------------------------------------------------------- /MOTEvaluate/evaluate_utils/convert.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import os 4 | import cv2 5 | 6 | classes = [ 7 | 'car', # 0 8 | 'bicycle', # 1 9 | 'person', # 2 10 | 'cyclist', # 3 11 | 'tricycle' # 4 12 | ] # 5类(不包括背景) 13 | 14 | cls2id = { 15 | 'car': 0, 16 | 'bicycle': 1, 17 | 'person': 2, 18 | 'cyclist': 3, 19 | 'tricycle': 4 20 | } 21 | 22 | id2cls = { 23 | 0: 'car', 24 | 1: 'bicycle', 25 | 2: 'person', 26 | 3: 'cyclist', 27 | 4: 'tricycle' 28 | } 29 | 30 | # 图片数据的宽高 31 | W, H = 1920, 1080 32 | 33 | 34 | def convert_darklabel_2_mot16(darklabel_txt_path, 35 | interval=1, 36 | default_fps=12, 37 | one_plus=True, 38 | out_mot16_path=None): 39 | """ 40 | 将darklabel标注格式frame # n [id, x1, y1, x2, y2, label] 41 | 转换成mot16格式 42 | """ 43 | if not os.path.isfile(darklabel_txt_path): 44 | print('[Err]: invalid input file path.') 45 | return 46 | 47 | if out_mot16_path is None: 48 | out_fps = default_fps // int(interval) 49 | print('[Note]: out_mot16_path not defined, using default.') 50 | dir_name, file_name = os.path.split(darklabel_txt_path) 51 | out_mot16_path = dir_name + '/' + \ 52 | file_name.split('.')[0] + \ 53 | '_mot16_fps{:d}.txt'.format(out_fps) 54 | 55 | with open(darklabel_txt_path, 'r', encoding='utf-8') as r_h, \ 56 | open(out_mot16_path, 'w', encoding='utf-8') as w_h: 57 | lines = r_h.readlines() 58 | 59 | # 遍历每一帧 60 | fr_idx = 0 61 | for fr_i, line in enumerate(lines): 62 | if fr_i % interval != 0: 63 | continue 64 | 65 | line = line.strip().split(',') 66 | fr_id = int(line[0]) 67 | n_objs = int(line[1]) 68 | 69 | # 遍历当前帧的每一个object 70 | for cur in range(2, len(line), 6): 71 | class_type = line[cur + 5].strip() 72 | class_id = cls2id[class_type] # class type => class id 73 | 74 | # 读取track id 75 | if one_plus: 76 | track_id = int(line[cur]) + 1 # track_id从1开始统计 77 | else: 78 | track_id = int(line[cur]) 79 | 80 | # 读取bbox坐标 81 | x1, y1 = int(line[cur + 1]), int(line[cur + 2]) 82 | x2, y2 = int(line[cur + 3]), int(line[cur + 4]) 83 | 84 | # 根据图像分辨率, 裁剪bbox 85 | x1 = x1 if x1 >= 0 else 0 86 | x1 = x1 if x1 < W else W - 1 87 | y1 = y1 if y1 >= 0 else 0 88 | y1 = y1 if y1 < H else H - 1 89 | x2 = x2 if x2 >= 0 else 0 90 | x2 = x2 if x2 < W else W - 1 91 | y2 = y2 if y2 >= 0 else 0 92 | y2 = y2 if y2 < H else H - 1 93 | 94 | left, top = x1, y1 95 | width, height = x2 - x1, y2 - y1 96 | 97 | # 写入该obj的数据 98 | if interval == 1: 99 | write_line_str = str(fr_id + 1) + ',' \ 100 | + str(track_id) + ',' \ 101 | + str(left) + ',' \ 102 | + str(top) + ',' \ 103 | + str(width) + ',' \ 104 | + str(height) + ',' \ 105 | + '1,' + str(class_id) + ',' + '1' 106 | else: 107 | write_line_str = str(fr_idx + 1) + ',' \ 108 | + str(track_id) + ',' \ 109 | + str(left) + ',' \ 110 | + str(top) + ',' \ 111 | + str(width) + ',' \ 112 | + str(height) + ',' \ 113 | + '1,' + str(class_id) + ',' + '1' 114 | # print(write_line_str) 115 | w_h.write(write_line_str + '\n') 116 | 117 | fr_idx += 1 118 | print('Total {:d} frames sampled'.format(fr_idx)) 119 | 120 | print('{:s} written.'.format(out_mot16_path)) 121 | 122 | 123 | def convert_seqs(seq_root, interval=1, default_fps=12, one_plus=True): 124 | """ 125 | """ 126 | if not os.path.isdir(seq_root): 127 | print('[Err]: invalid seq root.') 128 | return 129 | 130 | seq_names = [x for x in os.listdir(seq_root) if x.endswith('.mp4')] 131 | for seq_name in seq_names: 132 | darklabel_txt_path = seq_root + '/' + seq_name[:-4] + '_gt.txt' 133 | 134 | # ---------- do pasing for a seq 135 | convert_darklabel_2_mot16(darklabel_txt_path, 136 | interval=interval, 137 | default_fps=default_fps, 138 | one_plus=one_plus, 139 | out_mot16_path=None) 140 | # ---------- 141 | 142 | 143 | if __name__ == '__main__': 144 | # convert_darklabel_2_mot16(darklabel_txt_path='F:/seq_data/images/mcmot_seq_imgs_1/mcmot_seq_imgs_1_gt.txt') 145 | convert_seqs(seq_root='/mnt/diskb/even/dataset/MCMOT_Evaluate', 146 | interval=1, 147 | default_fps=12, 148 | one_plus=True) 149 | # convert_darklabel_2_mot16(darklabel_txt_path='F:/val_seq/val_1_gt.txt', 150 | # interval=1, 151 | # fps=12, 152 | # one_plus=False, 153 | # out_mot16_path=None) 154 | 155 | print('Done.') 156 | -------------------------------------------------------------------------------- /MOTEvaluate/evaluate_utils/io.py: -------------------------------------------------------------------------------- 1 | """ 2 | 2D MOT2016 Evaluation Toolkit 3 | An python reimplementation of toolkit in 4 | 2DMOT16(https://motchallenge.net/data/MOT16/) 5 | 6 | This file deals with file IO / invalid annotation 7 | removal / result output 8 | 9 | (C) Yiwen Liu(765305261@qq.com), 2020-10 10 | """ 11 | import os 12 | import numpy as np 13 | 14 | 15 | def read_seqmaps(fname): 16 | """ 17 | seqmap: list the sequence name to be evaluated 18 | """ 19 | assert os.path.exists(fname), 'File %s not exists!' % fname 20 | with open(fname, 'r') as fid: 21 | lines = [line.strip() for line in fid.readlines()] 22 | seq_names = lines[1:] 23 | return seq_names 24 | 25 | 26 | def read_txt_to_struct(f_name): 27 | """ 28 | read txt to structure, the column represents: 29 | [frame number] [identity number] [bbox left] [bbox top] 30 | [bbox width] [bbox height] [DET: detection score, 31 | GT: ignored class flag] [class] [visibility ratio] 32 | """ 33 | data = [] 34 | with open(f_name, 'r', encoding='utf-8') as fid: 35 | lines = fid.readlines() 36 | for line in lines: 37 | line = list(map(float, line.strip().split(','))) 38 | data.append(line) 39 | data = np.array(data) 40 | 41 | # change tlwh format to xyxy format 42 | data[:, 4:6] += data[:, 2:4] 43 | return data 44 | 45 | 46 | def extract_valid_gt_data(all_data, remove_ofv=False): 47 | """ 48 | remove non-valid classes. 49 | following mot2016 format, 50 | valid class include [1: pedestrain], 51 | distractor classes include [2: person on vehicle, 52 | 7: static person, 8: distractor, 12: reflection]. 53 | """ 54 | distractor_classes = [2, 7, 8, 12] 55 | valid_classes = [1] 56 | original = all_data.shape[0] 57 | 58 | # remove classes in other classes, pedestrain and distractors 59 | # left for furthur usages 60 | selected = np.array([ 61 | i for i in range(all_data.shape[0]) 62 | if all_data[i, 7] in valid_classes + distractor_classes]) 63 | all_data = all_data[selected, :] 64 | 65 | # remove boxes whose centers is out of view 66 | # Cause this tool is not only set for MOT, thus resolution is not assumed 67 | # provided. In MOT, the maximum width andd height should be taken into 68 | # consirderation 69 | 70 | # PS: As stated by author of MOT benchmark, it would be better the tracker 71 | # could figure out the out of view pedestrain like human does. Thus no 72 | # filtering 73 | if remove_ofv: # remove out of view for ground truth 74 | selected = np.array([i for i in range(all_data.shape[0]) 75 | if (all_data[i, 2] + all_data[i, 4]) / 2 >= 0 and 76 | (all_data[i, 3] + all_data[i, 5]) / 2 >= 0]) 77 | 78 | # not consider right and bottom out of range here. Anyway ofv is not 79 | # removed in MOT2016 80 | # selected = np.array([i for i in xrange(all_data.shape[0]) 81 | # if (all_data[i, 2] + all_data[i, 4]) / 2 != 0 82 | # ]) 83 | all_data = all_data[selected, :] 84 | 85 | # remove non-human classes from ground truth, 86 | # and return distractor identities 87 | cond = np.array( 88 | [i in valid_classes + distractor_classes for i in all_data[:, 7]]) 89 | selected = np.where(cond == True)[0] 90 | all_data = all_data[selected, :] # not necessary? 91 | 92 | print('[GT PREPROCESSING]: Removing non-people classes, remaining ' 93 | '{}/{} boxes'.format(all_data.shape[0], original)) 94 | cond = np.array([i in distractor_classes for i in all_data[:, 7]]) 95 | selected = np.where(cond == True)[0] 96 | 97 | all_dsitractor_ids = all_data[selected, 1] 98 | unique_distractor_ids = np.unique(all_dsitractor_ids) 99 | return all_data, unique_distractor_ids 100 | 101 | 102 | def print_format(widths, formaters, values, form_attr): 103 | return ' '.join([(form_attr % (width, form)).format(val) for ( 104 | form, width, val) in zip(formaters, widths, values)]) 105 | 106 | 107 | def print_format_name(widths, values, form_attr): 108 | return ' '.join([(form_attr % (width)).format(val) for (width, val) in zip( 109 | widths, values)]) 110 | 111 | 112 | def print_metrics(header, metrics, banner=25): 113 | """ 114 | """ 115 | if len(metrics) == 17: 116 | print_metrics_ext(header, metrics) 117 | return 118 | 119 | print('\n', '*' * banner, header, '*' * banner) 120 | # metric_names_long = ['Recall', 'Precision', 'False Alarm Rate', 121 | # 'GT Tracks', 'Mostly Tracked', 'Partially Tracked', 122 | # 'Mostly Lost', 'False Positives', 'False Negatives', 123 | # 'ID Switches', 'Fragmentations', 124 | # 'MOTA', 'MOTP', 'MOTA Log'] 125 | 126 | metric_names_short = ['Rcll', 'Prcn', 'FAR', 127 | 'GT', 'MT', 'PT', 'ML', 128 | 'FP', 'FN', 'IDs', 'FM', 129 | 'MOTA', 'MOTP', 'MOTAL'] 130 | 131 | # metric_widths_long = [6, 9, 16, 9, 14, 17, 11, 15, 15, 11, 14, 5, 5, 8] 132 | metric_widths_short = [5, 5, 5, 4, 4, 4, 4, 6, 6, 5, 5, 5, 5, 5] 133 | 134 | metric_format_long = ['.1f', '.1f', '.2f', 135 | '.0f', '.0f', '.0f', '.0f', 136 | '.0f', '.0f', '.0f', '.0f', 137 | '.1f', '.1f', '.1f'] 138 | 139 | splits = [(0, 3), (3, 7), (7, 11), (11, 14)] 140 | print(' | '.join([print_format_name( 141 | metric_widths_short[start:end], 142 | metric_names_short[start:end], '{0: <%d}') 143 | for (start, end) in splits])) 144 | 145 | metric_str = ' | '.join([print_format( 146 | metric_widths_short[start:end], 147 | metric_format_long[start:end], 148 | metrics[start:end], '{:%d%s}') for (start, end) in splits]) 149 | # metric_str = metric_str[1:] 150 | print(metric_str) 151 | 152 | 153 | def print_metrics_ext(header, metrics, banner=30): 154 | print('\n{} {} {}'.format('*' * banner, header, '*' * banner)) 155 | # metric_names_long = ['IDF1', 'IDP', 'IDR', 156 | # 'Recall', 'Precision', 'False Alarm Rate', 157 | # 'GT Tracks', 'Mostly Tracked', 'Partially Tracked', 158 | # 'Mostly Lost', 159 | # 'False Positives', 'False Negatives', 'ID Switches', 160 | # 'Fragmentations', 161 | # 'MOTA', 'MOTP', 'MOTA Log'] 162 | 163 | metric_names_short = ['IDF1', 'IDP', 'IDR', 164 | 'Rcll', 'Prcn', 'FAR', 165 | 'GT', 'MT', 'PT', 'ML', 166 | 'FP', 'FN', 'IDs', 'FM', 167 | 'MOTA', 'MOTP', 'MOTAL'] 168 | 169 | # metric_widths_long = [5, 4, 4, 6, 9, 16, 170 | # 9, 14, 17, 11, 15, 15, 11, 14, 5, 5, 8] 171 | metric_widths_short = [5, 4, 4, 5, 5, 5, 4, 4, 4, 4, 6, 6, 5, 5, 5, 5, 5] 172 | 173 | metric_format_long = ['.1f', '.1f', '.1f', 174 | '.1f', '.1f', '.2f', 175 | '.0f', '.0f', '.0f', '.0f', 176 | '.0f', '.0f', '.0f', '.0f', 177 | '.1f', '.1f', '.1f'] 178 | 179 | splits = [(0, 3), (3, 6), (6, 10), (10, 14), (14, 17)] 180 | 181 | print(' | '.join([print_format_name( 182 | metric_widths_short[start:end], 183 | metric_names_short[start:end], '{0: <%d}') 184 | for (start, end) in splits])) 185 | 186 | metric_str = ' | '.join([print_format( 187 | metric_widths_short[start:end], 188 | metric_format_long[start:end], 189 | metrics[start:end], '{:%d%s}') 190 | for (start, end) in splits]) 191 | 192 | print(metric_str) 193 | print('\n\n') 194 | -------------------------------------------------------------------------------- /MVI_39401_track_fps12.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MVI_39401_track_fps12.gif -------------------------------------------------------------------------------- /MVI_39501_track_fps12.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MVI_39501_track_fps12.gif -------------------------------------------------------------------------------- /MVI_40855_track_fps12.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MVI_40855_track_fps12.gif -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # YOLOV4_MCMOT 2 | Using YOLOV4 as detector for MCMOT. 3 | 4 | ## Tracking demo of C5(car, bicycle, person, cyclist, tricycle) using YOLOV4-tiny backbone 5 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/test5_track.gif) 6 | 7 | ## Tracking demo of UA-DETRAC dataset using mobilenetv2-yolo backbone(2 layers of yolo output) 8 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/MVI_39401_track_fps12.gif) 9 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/MVI_39501_track_fps12.gif) 10 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/MVI_40855_track_fps12.gif) 11 | -------------------------------------------------------------------------------- /auto_weighted_loss.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class AutomaticWeightedLoss(nn.Module): 8 | """automatically weighted multi-task loss_funcs 9 | Params: 10 | num: int,the number of loss_funcs 11 | x: multi-task loss_funcs 12 | Examples: 13 | loss1=1 14 | loss2=2 15 | awl = AutomaticWeightedLoss(2) 16 | loss_sum = awl(loss1, loss2) 17 | """ 18 | 19 | def __init__(self, num=2): 20 | super(AutomaticWeightedLoss, self).__init__() 21 | params = torch.ones(num, requires_grad=True) 22 | self.params = torch.nn.Parameter(params) 23 | 24 | def forward(self, *x): 25 | loss_sum = 0.0 26 | for i, loss in enumerate(x): 27 | loss_sum += 0.5 / (self.params[i] ** 2) * loss + torch.log(1 + self.params[i] ** 2) 28 | return loss_sum 29 | 30 | 31 | if __name__ == '__main__': 32 | awl = AutomaticWeightedLoss(2) 33 | print(awl.parameters()) 34 | -------------------------------------------------------------------------------- /cfg/mobile-yolo-3l.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | batch=64 3 | subdivisions=16 4 | width=768 5 | height=448 6 | channels=3 7 | momentum=0.9 8 | decay=4e-5 9 | angle=0 10 | saturation=1.5 11 | exposure=1.5 12 | hue=.1 13 | 14 | learning_rate=0.00001 15 | burn_in=1000 16 | max_batches=800020 17 | policy=steps 18 | steps=400000,650000 19 | scales=.1,.1 20 | 21 | [convolutional] 22 | filters=32 23 | size=3 24 | stride=2 25 | pad=1 26 | batch_normalize=1 27 | activation=relu 28 | 29 | [convolutional] 30 | filters=32 31 | size=1 32 | stride=1 33 | pad=1 34 | batch_normalize=1 35 | activation=relu 36 | 37 | [convolutional] 38 | filters=32 39 | size=3 40 | groups=32 41 | stride=1 42 | pad=1 43 | batch_normalize=1 44 | activation=relu 45 | 46 | [convolutional] 47 | filters=16 48 | size=1 49 | stride=1 50 | pad=1 51 | batch_normalize=1 52 | activation=linear 53 | 54 | [convolutional] 55 | filters=96 56 | size=1 57 | stride=1 58 | pad=1 59 | batch_normalize=1 60 | activation=relu 61 | 62 | [convolutional] 63 | filters=96 64 | size=3 65 | groups=96 66 | stride=2 67 | pad=1 68 | batch_normalize=1 69 | activation=relu 70 | 71 | [convolutional] 72 | filters=24 73 | size=1 74 | stride=1 75 | pad=1 76 | batch_normalize=1 77 | activation=linear 78 | 79 | [convolutional] 80 | filters=144 81 | size=1 82 | stride=1 83 | pad=1 84 | batch_normalize=1 85 | activation=relu 86 | 87 | [convolutional] 88 | filters=144 89 | size=3 90 | groups=144 91 | stride=1 92 | pad=1 93 | batch_normalize=1 94 | activation=relu 95 | 96 | [convolutional] 97 | filters=24 98 | size=1 99 | stride=1 100 | pad=1 101 | batch_normalize=1 102 | activation=linear 103 | 104 | [shortcut] 105 | from=-4 106 | activation=linear 107 | 108 | [convolutional] 109 | filters=144 110 | size=1 111 | stride=1 112 | pad=1 113 | batch_normalize=1 114 | activation=relu 115 | 116 | [convolutional] 117 | filters=144 118 | size=3 119 | groups=144 120 | stride=2 121 | pad=1 122 | batch_normalize=1 123 | activation=relu 124 | 125 | [convolutional] 126 | filters=32 127 | size=1 128 | stride=1 129 | pad=1 130 | batch_normalize=1 131 | activation=linear 132 | 133 | [convolutional] 134 | filters=192 135 | size=1 136 | stride=1 137 | pad=1 138 | batch_normalize=1 139 | activation=relu 140 | 141 | [convolutional] 142 | filters=192 143 | size=3 144 | groups=192 145 | stride=1 146 | pad=1 147 | batch_normalize=1 148 | activation=relu 149 | 150 | [convolutional] 151 | filters=32 152 | size=1 153 | stride=1 154 | pad=1 155 | batch_normalize=1 156 | activation=linear 157 | 158 | [shortcut] 159 | from=-4 160 | activation=linear 161 | 162 | [convolutional] 163 | filters=192 164 | size=1 165 | stride=1 166 | pad=1 167 | batch_normalize=1 168 | activation=relu 169 | 170 | [convolutional] 171 | filters=192 172 | size=3 173 | groups=192 174 | stride=1 175 | pad=1 176 | batch_normalize=1 177 | activation=relu 178 | 179 | [convolutional] 180 | filters=32 181 | size=1 182 | stride=1 183 | pad=1 184 | batch_normalize=1 185 | activation=linear 186 | 187 | [shortcut] 188 | from=-4 189 | activation=linear 190 | 191 | [convolutional] 192 | filters=192 193 | size=1 194 | stride=1 195 | pad=1 196 | batch_normalize=1 197 | activation=relu 198 | 199 | [convolutional] 200 | filters=192 201 | size=3 202 | groups=192 203 | stride=1 204 | pad=1 205 | batch_normalize=1 206 | activation=relu 207 | 208 | [convolutional] 209 | filters=64 210 | size=1 211 | stride=1 212 | pad=1 213 | batch_normalize=1 214 | activation=linear 215 | 216 | [convolutional] 217 | filters=384 218 | size=1 219 | stride=1 220 | pad=1 221 | batch_normalize=1 222 | activation=relu 223 | 224 | [convolutional] 225 | filters=384 226 | size=3 227 | groups=384 228 | stride=1 229 | pad=1 230 | batch_normalize=1 231 | activation=relu 232 | 233 | [convolutional] 234 | filters=64 235 | size=1 236 | stride=1 237 | pad=1 238 | batch_normalize=1 239 | activation=linear 240 | 241 | [shortcut] 242 | from=-4 243 | activation=linear 244 | 245 | [convolutional] 246 | filters=384 247 | size=1 248 | stride=1 249 | pad=1 250 | batch_normalize=1 251 | activation=relu 252 | 253 | [convolutional] 254 | filters=384 255 | size=3 256 | groups=384 257 | stride=1 258 | pad=1 259 | batch_normalize=1 260 | activation=relu 261 | 262 | [convolutional] 263 | filters=64 264 | size=1 265 | stride=1 266 | pad=1 267 | batch_normalize=1 268 | activation=linear 269 | 270 | [shortcut] 271 | from=-4 272 | activation=linear 273 | 274 | [convolutional] 275 | filters=384 276 | size=1 277 | stride=1 278 | pad=1 279 | batch_normalize=1 280 | activation=relu 281 | 282 | [convolutional] 283 | filters=384 284 | size=3 285 | groups=384 286 | stride=1 287 | pad=1 288 | batch_normalize=1 289 | activation=relu 290 | 291 | [convolutional] 292 | filters=64 293 | size=1 294 | stride=1 295 | pad=1 296 | batch_normalize=1 297 | activation=linear 298 | 299 | [shortcut] 300 | from=-4 301 | activation=linear 302 | 303 | [convolutional] 304 | filters=384 305 | size=1 306 | stride=1 307 | pad=1 308 | batch_normalize=1 309 | activation=relu 310 | 311 | [convolutional] 312 | filters=384 313 | size=3 314 | groups=384 315 | stride=2 316 | pad=1 317 | batch_normalize=1 318 | activation=relu 319 | 320 | [convolutional] 321 | filters=96 322 | size=1 323 | stride=1 324 | pad=1 325 | batch_normalize=1 326 | activation=linear 327 | 328 | [convolutional] 329 | filters=576 330 | size=1 331 | stride=1 332 | pad=1 333 | batch_normalize=1 334 | activation=relu 335 | 336 | [convolutional] 337 | filters=576 338 | size=3 339 | groups=576 340 | stride=1 341 | pad=1 342 | batch_normalize=1 343 | activation=relu 344 | 345 | [convolutional] 346 | filters=96 347 | size=1 348 | stride=1 349 | pad=1 350 | batch_normalize=1 351 | activation=linear 352 | 353 | [shortcut] 354 | from=-4 355 | activation=linear 356 | 357 | [convolutional] 358 | filters=576 359 | size=1 360 | stride=1 361 | pad=1 362 | batch_normalize=1 363 | activation=relu 364 | 365 | [convolutional] 366 | filters=576 367 | size=3 368 | groups=576 369 | stride=1 370 | pad=1 371 | batch_normalize=1 372 | activation=relu 373 | 374 | [convolutional] 375 | filters=96 376 | size=1 377 | stride=1 378 | pad=1 379 | batch_normalize=1 380 | activation=linear 381 | 382 | [shortcut] 383 | from=-4 384 | activation=linear 385 | 386 | [convolutional] 387 | filters=576 388 | size=1 389 | stride=1 390 | pad=1 391 | batch_normalize=1 392 | activation=relu 393 | 394 | [convolutional] 395 | filters=576 396 | size=3 397 | groups=576 398 | stride=2 399 | pad=1 400 | batch_normalize=1 401 | activation=relu 402 | 403 | [convolutional] 404 | filters=160 405 | size=1 406 | stride=1 407 | pad=1 408 | batch_normalize=1 409 | activation=linear 410 | 411 | [convolutional] 412 | filters=960 413 | size=1 414 | stride=1 415 | pad=1 416 | batch_normalize=1 417 | activation=relu 418 | 419 | [convolutional] 420 | filters=960 421 | size=3 422 | groups=960 423 | stride=1 424 | pad=1 425 | batch_normalize=1 426 | activation=relu 427 | 428 | [convolutional] 429 | filters=160 430 | size=1 431 | stride=1 432 | pad=1 433 | batch_normalize=1 434 | activation=linear 435 | 436 | [shortcut] 437 | from=-4 438 | activation=linear 439 | 440 | [convolutional] 441 | filters=960 442 | size=1 443 | stride=1 444 | pad=1 445 | batch_normalize=1 446 | activation=relu 447 | 448 | [convolutional] 449 | filters=960 450 | size=3 451 | groups=960 452 | stride=1 453 | pad=1 454 | batch_normalize=1 455 | activation=relu 456 | 457 | [convolutional] 458 | filters=160 459 | size=1 460 | stride=1 461 | pad=1 462 | batch_normalize=1 463 | activation=linear 464 | 465 | [shortcut] 466 | from=-4 467 | activation=linear 468 | 469 | ### SPP ### 470 | [maxpool] 471 | stride=1 472 | size=3 473 | 474 | [route] 475 | layers=-2 476 | 477 | [maxpool] 478 | stride=1 479 | size=5 480 | 481 | [route] 482 | layers=-4 483 | 484 | [maxpool] 485 | stride=1 486 | size=9 487 | 488 | [route] 489 | layers=-1,-3,-5,-6 490 | 491 | ### End SPP ### 492 | [convolutional] 493 | filters=288 494 | size=1 495 | stride=1 496 | pad=1 497 | batch_normalize=1 498 | activation=relu 499 | 500 | [convolutional] 501 | filters=288 502 | size=3 503 | groups=288 504 | stride=1 505 | pad=1 506 | batch_normalize=1 507 | activation=relu 508 | 509 | [convolutional] 510 | filters=96 511 | size=1 512 | stride=1 513 | pad=1 514 | batch_normalize=1 515 | activation=relu 516 | 517 | [convolutional] 518 | filters=384 519 | size=1 520 | stride=1 521 | pad=1 522 | batch_normalize=1 523 | activation=relu 524 | 525 | [convolutional] 526 | size=1 527 | stride=1 528 | pad=1 529 | filters=30 530 | activation=linear 531 | 532 | [yolo] 533 | mask = 6,7,8 534 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 535 | classes=5 536 | num=9 537 | jitter=.3 538 | ignore_thresh=.7 539 | truth_thresh=1 540 | random=1 541 | scale_x_y=1.05 542 | iou_thresh=0.213 543 | cls_normalizer=1.0 544 | iou_normalizer=0.07 545 | iou_loss=ciou 546 | nms_kind=greedynms 547 | beta_nms=0.6 548 | 549 | [route] 550 | layers= 65 551 | 552 | [upsample] 553 | stride=2 554 | 555 | [route] 556 | layers=-1,48 557 | 558 | [convolutional] 559 | filters=80 560 | size=1 561 | stride=1 562 | pad=1 563 | batch_normalize=1 564 | activation=relu 565 | 566 | [convolutional] 567 | filters=288 568 | size=1 569 | stride=1 570 | pad=1 571 | batch_normalize=1 572 | activation=relu 573 | 574 | [convolutional] 575 | filters=288 576 | size=3 577 | groups=288 578 | stride=1 579 | pad=1 580 | batch_normalize=1 581 | activation=relu 582 | 583 | [convolutional] 584 | filters=192 585 | size=1 586 | stride=1 587 | pad=1 588 | batch_normalize=1 589 | activation=relu 590 | 591 | [convolutional] 592 | filters=288 593 | size=1 594 | stride=1 595 | pad=1 596 | batch_normalize=1 597 | activation=relu 598 | 599 | [convolutional] 600 | size=1 601 | stride=1 602 | pad=1 603 | filters=30 604 | activation=linear 605 | 606 | [yolo] 607 | mask = 3,4,5 608 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 609 | classes=5 610 | num=9 611 | jitter=.3 612 | ignore_thresh=.7 613 | truth_thresh=1 614 | random=1 615 | scale_x_y=1.05 616 | iou_thresh=0.213 617 | cls_normalizer=1.0 618 | iou_normalizer=0.07 619 | iou_loss=ciou 620 | nms_kind=greedynms 621 | beta_nms=0.6 622 | 623 | [route] 624 | layers= 74 625 | 626 | [upsample] 627 | stride=2 628 | 629 | [route] 630 | layers=-1,37 631 | 632 | [convolutional] 633 | filters=80 634 | size=1 635 | stride=1 636 | pad=1 637 | batch_normalize=1 638 | activation=relu 639 | 640 | [convolutional] 641 | filters=288 642 | size=1 643 | stride=1 644 | pad=1 645 | batch_normalize=1 646 | activation=relu 647 | 648 | [convolutional] 649 | filters=288 650 | size=3 651 | groups=288 652 | stride=1 653 | pad=1 654 | batch_normalize=1 655 | activation=relu 656 | 657 | [convolutional] 658 | filters=192 659 | size=1 660 | stride=1 661 | pad=1 662 | batch_normalize=1 663 | activation=relu 664 | 665 | [convolutional] 666 | filters=288 667 | size=1 668 | stride=1 669 | pad=1 670 | batch_normalize=1 671 | activation=relu 672 | 673 | [convolutional] 674 | size=1 675 | stride=1 676 | pad=1 677 | filters=30 678 | activation=linear 679 | 680 | [yolo] 681 | mask = 0,1,2 682 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 683 | classes=5 684 | num=9 685 | jitter=.3 686 | ignore_thresh=.7 687 | truth_thresh=1 688 | random=1 689 | scale_x_y=1.05 690 | iou_thresh=0.213 691 | cls_normalizer=1.0 692 | iou_normalizer=0.07 693 | iou_loss=ciou 694 | nms_kind=greedynms 695 | beta_nms=0.6 696 | 697 | [route] 698 | layers=-23 699 | 700 | [convolutional] 701 | size=1 702 | stride=1 703 | pad=1 704 | filters=128 705 | activation=linear 706 | 707 | [route] 708 | layers=-15 709 | 710 | [convolutional] 711 | size=1 712 | stride=1 713 | pad=1 714 | filters=128 715 | activation=linear 716 | 717 | [route] 718 | layers=-7 719 | 720 | [convolutional] 721 | size=1 722 | stride=1 723 | pad=1 724 | filters=128 725 | activation=linear -------------------------------------------------------------------------------- /cfg/yolov4-tiny.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=1 8 | width=416 9 | height=416 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00261 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | [convolutional] 26 | batch_normalize=1 27 | filters=32 28 | size=3 29 | stride=2 30 | pad=1 31 | activation=leaky 32 | 33 | [convolutional] 34 | batch_normalize=1 35 | filters=64 36 | size=3 37 | stride=2 38 | pad=1 39 | activation=leaky 40 | 41 | [convolutional] 42 | batch_normalize=1 43 | filters=64 44 | size=3 45 | stride=1 46 | pad=1 47 | activation=leaky 48 | 49 | [route_lhalf] 50 | layers=-1 51 | 52 | [convolutional] 53 | batch_normalize=1 54 | filters=32 55 | size=3 56 | stride=1 57 | pad=1 58 | activation=leaky 59 | 60 | [convolutional] 61 | batch_normalize=1 62 | filters=32 63 | size=3 64 | stride=1 65 | pad=1 66 | activation=leaky 67 | 68 | [route] 69 | layers = -1,-2 70 | 71 | [convolutional] 72 | batch_normalize=1 73 | filters=64 74 | size=1 75 | stride=1 76 | pad=1 77 | activation=leaky 78 | 79 | [route] 80 | layers = -6,-1 81 | 82 | [maxpool] 83 | size=2 84 | stride=2 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=128 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [route_lhalf] 95 | layers=-1 96 | 97 | [convolutional] 98 | batch_normalize=1 99 | filters=64 100 | size=3 101 | stride=1 102 | pad=1 103 | activation=leaky 104 | 105 | [convolutional] 106 | batch_normalize=1 107 | filters=64 108 | size=3 109 | stride=1 110 | pad=1 111 | activation=leaky 112 | 113 | [route] 114 | layers = -1,-2 115 | 116 | [convolutional] 117 | batch_normalize=1 118 | filters=128 119 | size=1 120 | stride=1 121 | pad=1 122 | activation=leaky 123 | 124 | [route] 125 | layers = -6,-1 126 | 127 | [maxpool] 128 | size=2 129 | stride=2 130 | 131 | [convolutional] 132 | batch_normalize=1 133 | filters=256 134 | size=3 135 | stride=1 136 | pad=1 137 | activation=leaky 138 | 139 | [route_lhalf] 140 | layers=-1 141 | 142 | [convolutional] 143 | batch_normalize=1 144 | filters=128 145 | size=3 146 | stride=1 147 | pad=1 148 | activation=leaky 149 | 150 | [convolutional] 151 | batch_normalize=1 152 | filters=128 153 | size=3 154 | stride=1 155 | pad=1 156 | activation=leaky 157 | 158 | [route] 159 | layers = -1,-2 160 | 161 | [convolutional] 162 | batch_normalize=1 163 | filters=256 164 | size=1 165 | stride=1 166 | pad=1 167 | activation=leaky 168 | 169 | [route] 170 | layers = -6,-1 171 | 172 | [maxpool] 173 | size=2 174 | stride=2 175 | 176 | [convolutional] 177 | batch_normalize=1 178 | filters=512 179 | size=3 180 | stride=1 181 | pad=1 182 | activation=leaky 183 | 184 | ################################## 185 | 186 | [convolutional] 187 | batch_normalize=1 188 | filters=256 189 | size=1 190 | stride=1 191 | pad=1 192 | activation=leaky 193 | 194 | [convolutional] 195 | batch_normalize=1 196 | filters=512 197 | size=3 198 | stride=1 199 | pad=1 200 | activation=leaky 201 | 202 | [convolutional] 203 | size=1 204 | stride=1 205 | pad=1 206 | filters=255 207 | activation=linear 208 | 209 | 210 | 211 | [yolo] 212 | mask = 3,4,5 213 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 214 | classes=80 215 | num=6 216 | jitter=.3 217 | scale_x_y = 1.05 218 | cls_normalizer=1.0 219 | iou_normalizer=0.07 220 | iou_loss=ciou 221 | ignore_thresh = .7 222 | truth_thresh = 1 223 | random=0 224 | nms_kind=greedynms 225 | beta_nms=0.6 226 | 227 | [route] 228 | layers = -4 229 | 230 | [convolutional] 231 | batch_normalize=1 232 | filters=128 233 | size=1 234 | stride=1 235 | pad=1 236 | activation=leaky 237 | 238 | [upsample] 239 | stride=2 240 | 241 | [route] 242 | layers = -1, 23 243 | 244 | [convolutional] 245 | batch_normalize=1 246 | filters=256 247 | size=3 248 | stride=1 249 | pad=1 250 | activation=leaky 251 | 252 | [convolutional] 253 | size=1 254 | stride=1 255 | pad=1 256 | filters=255 257 | activation=linear 258 | 259 | [yolo] 260 | mask = 1,2,3 261 | anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319 262 | classes=80 263 | num=6 264 | jitter=.3 265 | scale_x_y = 1.05 266 | cls_normalizer=1.0 267 | iou_normalizer=0.07 268 | iou_loss=ciou 269 | ignore_thresh = .7 270 | truth_thresh = 1 271 | random=0 272 | nms_kind=greedynms 273 | beta_nms=0.6 274 | -------------------------------------------------------------------------------- /data/coco.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=../coco/train2017.txt 3 | valid=../coco/testdev2017.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /data/coco.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /data/coco1.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=data/coco1.txt 3 | valid=data/coco1.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /data/coco1.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000109622.jpg 2 | -------------------------------------------------------------------------------- /data/coco16.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=data/coco16.txt 3 | valid=data/coco16.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /data/coco16.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000109622.jpg 2 | ../coco/images/train2017/000000160694.jpg 3 | ../coco/images/train2017/000000308590.jpg 4 | ../coco/images/train2017/000000327573.jpg 5 | ../coco/images/train2017/000000062929.jpg 6 | ../coco/images/train2017/000000512793.jpg 7 | ../coco/images/train2017/000000371735.jpg 8 | ../coco/images/train2017/000000148118.jpg 9 | ../coco/images/train2017/000000309856.jpg 10 | ../coco/images/train2017/000000141882.jpg 11 | ../coco/images/train2017/000000318783.jpg 12 | ../coco/images/train2017/000000337760.jpg 13 | ../coco/images/train2017/000000298197.jpg 14 | ../coco/images/train2017/000000042421.jpg 15 | ../coco/images/train2017/000000328898.jpg 16 | ../coco/images/train2017/000000458856.jpg 17 | -------------------------------------------------------------------------------- /data/coco1cls.data: -------------------------------------------------------------------------------- 1 | classes=1 2 | train=data/coco1cls.txt 3 | valid=data/coco1cls.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /data/coco1cls.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000000901.jpg 2 | ../coco/images/train2017/000000001464.jpg 3 | ../coco/images/train2017/000000003220.jpg 4 | ../coco/images/train2017/000000003365.jpg 5 | ../coco/images/train2017/000000004772.jpg 6 | ../coco/images/train2017/000000009987.jpg 7 | ../coco/images/train2017/000000010498.jpg 8 | ../coco/images/train2017/000000012455.jpg 9 | ../coco/images/train2017/000000013992.jpg 10 | ../coco/images/train2017/000000014125.jpg 11 | ../coco/images/train2017/000000016314.jpg 12 | ../coco/images/train2017/000000016670.jpg 13 | ../coco/images/train2017/000000018412.jpg 14 | ../coco/images/train2017/000000021212.jpg 15 | ../coco/images/train2017/000000021826.jpg 16 | ../coco/images/train2017/000000030566.jpg 17 | -------------------------------------------------------------------------------- /data/coco2014.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=../coco/trainvalno5k.txt 3 | valid=../coco/5k.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /data/coco2017.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=./data/coco/train2017.txt 3 | valid=./data/coco/val2017.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /data/coco64.data: -------------------------------------------------------------------------------- 1 | classes=80 2 | train=data/coco64.txt 3 | valid=data/coco64.txt 4 | names=data/coco.names 5 | -------------------------------------------------------------------------------- /data/coco64.txt: -------------------------------------------------------------------------------- 1 | ../coco/images/train2017/000000109622.jpg 2 | ../coco/images/train2017/000000160694.jpg 3 | ../coco/images/train2017/000000308590.jpg 4 | ../coco/images/train2017/000000327573.jpg 5 | ../coco/images/train2017/000000062929.jpg 6 | ../coco/images/train2017/000000512793.jpg 7 | ../coco/images/train2017/000000371735.jpg 8 | ../coco/images/train2017/000000148118.jpg 9 | ../coco/images/train2017/000000309856.jpg 10 | ../coco/images/train2017/000000141882.jpg 11 | ../coco/images/train2017/000000318783.jpg 12 | ../coco/images/train2017/000000337760.jpg 13 | ../coco/images/train2017/000000298197.jpg 14 | ../coco/images/train2017/000000042421.jpg 15 | ../coco/images/train2017/000000328898.jpg 16 | ../coco/images/train2017/000000458856.jpg 17 | ../coco/images/train2017/000000073824.jpg 18 | ../coco/images/train2017/000000252846.jpg 19 | ../coco/images/train2017/000000459590.jpg 20 | ../coco/images/train2017/000000273650.jpg 21 | ../coco/images/train2017/000000331311.jpg 22 | ../coco/images/train2017/000000156326.jpg 23 | ../coco/images/train2017/000000262985.jpg 24 | ../coco/images/train2017/000000253580.jpg 25 | ../coco/images/train2017/000000447976.jpg 26 | ../coco/images/train2017/000000378077.jpg 27 | ../coco/images/train2017/000000259913.jpg 28 | ../coco/images/train2017/000000424553.jpg 29 | ../coco/images/train2017/000000000612.jpg 30 | ../coco/images/train2017/000000267625.jpg 31 | ../coco/images/train2017/000000566012.jpg 32 | ../coco/images/train2017/000000196664.jpg 33 | ../coco/images/train2017/000000363331.jpg 34 | ../coco/images/train2017/000000057992.jpg 35 | ../coco/images/train2017/000000520047.jpg 36 | ../coco/images/train2017/000000453903.jpg 37 | ../coco/images/train2017/000000162083.jpg 38 | ../coco/images/train2017/000000268516.jpg 39 | ../coco/images/train2017/000000277436.jpg 40 | ../coco/images/train2017/000000189744.jpg 41 | ../coco/images/train2017/000000041128.jpg 42 | ../coco/images/train2017/000000527728.jpg 43 | ../coco/images/train2017/000000465269.jpg 44 | ../coco/images/train2017/000000246833.jpg 45 | ../coco/images/train2017/000000076784.jpg 46 | ../coco/images/train2017/000000323715.jpg 47 | ../coco/images/train2017/000000560463.jpg 48 | ../coco/images/train2017/000000006263.jpg 49 | ../coco/images/train2017/000000094701.jpg 50 | ../coco/images/train2017/000000521359.jpg 51 | ../coco/images/train2017/000000302903.jpg 52 | ../coco/images/train2017/000000047559.jpg 53 | ../coco/images/train2017/000000480583.jpg 54 | ../coco/images/train2017/000000050025.jpg 55 | ../coco/images/train2017/000000084512.jpg 56 | ../coco/images/train2017/000000508913.jpg 57 | ../coco/images/train2017/000000093708.jpg 58 | ../coco/images/train2017/000000070493.jpg 59 | ../coco/images/train2017/000000539270.jpg 60 | ../coco/images/train2017/000000474402.jpg 61 | ../coco/images/train2017/000000209842.jpg 62 | ../coco/images/train2017/000000028820.jpg 63 | ../coco/images/train2017/000000154257.jpg 64 | ../coco/images/train2017/000000342499.jpg 65 | -------------------------------------------------------------------------------- /data/coco_paper.names: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | street sign 13 | stop sign 14 | parking meter 15 | bench 16 | bird 17 | cat 18 | dog 19 | horse 20 | sheep 21 | cow 22 | elephant 23 | bear 24 | zebra 25 | giraffe 26 | hat 27 | backpack 28 | umbrella 29 | shoe 30 | eye glasses 31 | handbag 32 | tie 33 | suitcase 34 | frisbee 35 | skis 36 | snowboard 37 | sports ball 38 | kite 39 | baseball bat 40 | baseball glove 41 | skateboard 42 | surfboard 43 | tennis racket 44 | bottle 45 | plate 46 | wine glass 47 | cup 48 | fork 49 | knife 50 | spoon 51 | bowl 52 | banana 53 | apple 54 | sandwich 55 | orange 56 | broccoli 57 | carrot 58 | hot dog 59 | pizza 60 | donut 61 | cake 62 | chair 63 | couch 64 | potted plant 65 | bed 66 | mirror 67 | dining table 68 | window 69 | desk 70 | toilet 71 | door 72 | tv 73 | laptop 74 | mouse 75 | remote 76 | keyboard 77 | cell phone 78 | microwave 79 | oven 80 | toaster 81 | sink 82 | refrigerator 83 | blender 84 | book 85 | clock 86 | vase 87 | scissors 88 | teddy bear 89 | hair drier 90 | toothbrush 91 | hair brush -------------------------------------------------------------------------------- /data/get_coco2014.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Zip coco folder 3 | # zip -r coco.zip coco 4 | # tar -czvf coco.tar.gz coco 5 | 6 | # Download labels from Google Drive, accepting presented query 7 | filename="coco2014labels.zip" 8 | fileid="1s6-CmF5_SElM28r52P1OUrCcuXZN-SFo" 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 11 | rm ./cookie 12 | 13 | # Unzip labels 14 | unzip -q ${filename} # for coco.zip 15 | # tar -xzf ${filename} # for coco.tar.gz 16 | rm ${filename} 17 | 18 | # Download and unzip images 19 | cd coco/images 20 | f="train2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 21 | f="val2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 22 | 23 | # cd out 24 | cd ../.. 25 | -------------------------------------------------------------------------------- /data/get_coco2017.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Zip coco folder 3 | # zip -r coco.zip coco 4 | # tar -czvf coco.tar.gz coco 5 | 6 | # Download labels from Google Drive, accepting presented query 7 | filename="coco2017labels.zip" 8 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L" 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null 10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename} 11 | rm ./cookie 12 | 13 | # Unzip labels 14 | unzip -q ${filename} # for coco.zip 15 | # tar -xzf ${filename} # for coco.tar.gz 16 | rm ${filename} 17 | 18 | # Download and unzip images 19 | cd coco/images 20 | f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 21 | f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f 22 | 23 | # cd out 24 | cd ../.. 25 | -------------------------------------------------------------------------------- /data/mcmot.data: -------------------------------------------------------------------------------- 1 | classes=5 2 | train=./data/train_mcmot.txt 3 | valid=./data/val_mcmot.txt 4 | names=data/mcmot.names -------------------------------------------------------------------------------- /data/mcmot.names: -------------------------------------------------------------------------------- 1 | car 2 | bicycle 3 | person 4 | cyclist 5 | tricycle 6 | -------------------------------------------------------------------------------- /data/mcmot_det.data: -------------------------------------------------------------------------------- 1 | classes=5 2 | train=./data/mcmot_det.train 3 | valid=./data/mcmot_det_test.txt 4 | names=data/mcmot.names -------------------------------------------------------------------------------- /data/test2.txt: -------------------------------------------------------------------------------- 1 | /mnt/diskb/maqiao/multiClass/multiClass190827/JPEGImages/2_2018-05-31_10-20-01-207_3-1527733441.jpg -------------------------------------------------------------------------------- /detect.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from sys import platform 3 | 4 | from models import * # set ONNX_EXPORT in models.py 5 | from utils.datasets import * 6 | from utils.utils import * 7 | 8 | 9 | def detect(save_img=False): 10 | img_size = (320, 192) if ONNX_EXPORT else opt.img_size # (320, 192) or (416, 256) or (608, 352) for (height, width) 11 | out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt 12 | web_cam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt') 13 | 14 | # Initialize 15 | device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device) 16 | if os.path.exists(out): 17 | shutil.rmtree(out) # delete output folder 18 | os.makedirs(out) # make new output folder 19 | 20 | # Initialize model 21 | # model = Darknet(opt.cfg, img_size) 22 | max_ids_dict = { 23 | 0: 330, 24 | 1: 102, 25 | 2: 104, 26 | 3: 312, 27 | 4: 53 28 | } 29 | model = Darknet(opt.cfg, (img_size, img_size), False, max_ids_dict, 128, 'detect').to(device) 30 | 31 | # Load weights 32 | attempt_download(weights) 33 | if weights.endswith('.pt'): # pytorch format 34 | chkpt = torch.load(weights, map_location=device) 35 | model.load_state_dict(chkpt['model']) 36 | if 'epoch' in chkpt.keys(): 37 | print('Checkpoint of epoch {} loaded.'.format(chkpt['epoch'])) 38 | else: # darknet format 39 | load_darknet_weights(model, weights) 40 | 41 | # Second-stage classifier 42 | classify = False 43 | if classify: 44 | model_c = torch_utils.load_classifier(name='resnet101', n=2) # initialize 45 | model_c.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']) # load weights 46 | model_c.to(device).eval() 47 | 48 | # Eval mode 49 | model.to(device).eval() 50 | 51 | # Fuse Conv2d + BatchNorm2d layers 52 | # model.fuse() 53 | 54 | # Export mode 55 | if ONNX_EXPORT: 56 | model.fuse() 57 | img = torch.zeros((1, 3) + img_size) # (1, 3, 320, 192) 58 | f = opt.weights.replace(opt.weights.split('.')[-1], 'onnx') # *.onnx filename 59 | torch.onnx.export(model, img, f, verbose=False, opset_version=11, 60 | input_names=['images'], output_names=['classes', 'boxes']) 61 | 62 | # Validate exported model 63 | import onnx 64 | model = onnx.load(f) # Load the ONNX model 65 | onnx.checker.check_model(model) # Check that the IR is well formed 66 | print(onnx.helper.printable_graph(model.graph)) # Print a human readable representation of the graph 67 | return 68 | 69 | # Half precision 70 | half = half and device.type != 'cpu' # half precision only supported on CUDA 71 | if half: 72 | model.half() 73 | 74 | # Set Data loader 75 | vid_path, vid_writer = None, None 76 | if web_cam: 77 | view_img = True 78 | torch.backends.cudnn.benchmark = True # set True to speed up constant image size inference 79 | dataset = LoadStreams(source, img_size=img_size) 80 | else: 81 | save_img = True 82 | dataset = LoadImages(source, net_w=img_size) 83 | 84 | # Get names and colors 85 | names = load_classes(opt.names) 86 | colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))] 87 | 88 | # Run inference 89 | t0 = time.time() 90 | img = torch.zeros((1, 3, img_size, img_size), device=device) # init img 91 | # _ = model.forward(img.half() if half else img.float()) if device.type != 'cpu' else None # run once 92 | for path, img, im0s, vid_cap in dataset: 93 | img = torch.from_numpy(img).to(device) 94 | img = img.half() if half else img.float() # uint8 to fp16/32 95 | img /= 255.0 # 0 - 255 to 0.0 - 1.0 96 | if img.ndimension() == 3: 97 | img = img.unsqueeze(0) 98 | 99 | # ----- Inference 100 | t1 = torch_utils.time_synchronized() 101 | 102 | # only get aggregated result, not original YOLO output 103 | pred = model.forward(img, augment=opt.augment)[0] 104 | 105 | t2 = torch_utils.time_synchronized() 106 | # ----- 107 | 108 | # to float 109 | if half: 110 | pred = pred.float() 111 | 112 | # Apply NMS 113 | pred = non_max_suppression(pred, 114 | opt.conf_thres, 115 | opt.iou_thres, 116 | merge=False, 117 | classes=opt.classes, 118 | agnostic=opt.agnostic_nms) 119 | 120 | # Apply Classifier 121 | if classify: 122 | pred = apply_classifier(pred, model_c, img, im0s) 123 | 124 | # Process detections 125 | for i, det in enumerate(pred): # detections per image 126 | if web_cam: # batch_size >= 1 127 | p, s, im0 = path[i], '%g: ' % i, im0s[i] 128 | else: 129 | p, s, im0 = path, '', im0s 130 | 131 | save_path = str(Path(out) / Path(p).name) 132 | s += '%gx%g ' % img.shape[2:] # print string 133 | if det is not None and len(det): 134 | # Rescale boxes from img_size to im0 size(from net input size to original size) 135 | det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() 136 | 137 | # Print results 138 | for c in det[:, -1].unique(): 139 | n = (det[:, -1] == c).sum() # detections per class 140 | s += '%g %ss, ' % (n, names[int(c)]) # add to string 141 | 142 | # Write results 143 | for *xyxy, conf, cls in det: # x1, y1, x2, y2, confidence, cls_id 144 | if save_txt: # Write to file 145 | with open(save_path + '.txt', 'a') as file: 146 | file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf)) 147 | 148 | if save_img or view_img: # Add bbox to image 149 | label = '%s %.2f' % (names[int(cls)], conf) 150 | plot_one_box(xyxy, im0, label=label, color=colors[int(cls)]) 151 | 152 | # Print time (inference + NMS) 153 | print('%sDone. (%.3fs)' % (s, t2 - t1)) 154 | 155 | # Stream results 156 | if view_img: 157 | cv2.imshow(p, im0) 158 | if cv2.waitKey(1) == ord('q'): # q to quit 159 | raise StopIteration 160 | 161 | # Save results (image with detections) 162 | if save_img: 163 | if dataset.mode == 'images': 164 | cv2.imwrite(save_path, im0) 165 | else: 166 | if vid_path != save_path: # new video 167 | vid_path = save_path 168 | if isinstance(vid_writer, cv2.VideoWriter): 169 | vid_writer.release() # release previous video writer 170 | 171 | fps = vid_cap.get(cv2.CAP_PROP_FPS) 172 | w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 173 | h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) 174 | vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h)) 175 | vid_writer.write(im0) 176 | 177 | if save_txt or save_img: 178 | print('Results saved to %s' % os.getcwd() + os.sep + out) 179 | if platform == 'darwin': # MacOS 180 | os.system('open ' + save_path) 181 | 182 | print('Done. (%.3fs)' % (time.time() - t0)) 183 | 184 | 185 | if __name__ == '__main__': 186 | parser = argparse.ArgumentParser() 187 | parser.add_argument('--cfg', type=str, default='cfg/yolov4_half-mcmot.cfg', help='*.cfg path') 188 | parser.add_argument('--names', type=str, default='data/mcmot.names', help='*.names path') 189 | parser.add_argument('--weights', type=str, default='weights/track_last.weights', help='weights path') 190 | parser.add_argument('--source', type=str, default='data/samples', help='source') # input file/folder, 0 for webcam 191 | parser.add_argument('--output', type=str, default='output', help='output folder') # output folder 192 | parser.add_argument('--img-size', type=int, default=768, help='inference size (pixels)') 193 | parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold') 194 | parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS') 195 | parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)') 196 | parser.add_argument('--half', action='store_true', help='half precision FP16 inference') 197 | parser.add_argument('--device', default='0', help='device id (i.e. 0 or 0,1) or cpu') 198 | parser.add_argument('--view-img', action='store_true', help='display results') 199 | parser.add_argument('--save-txt', action='store_true', help='save results to *.txt') 200 | parser.add_argument('--classes', nargs='+', type=int, help='filter by class') 201 | parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS') 202 | parser.add_argument('--augment', action='store_true', help='augmented inference') 203 | opt = parser.parse_args() 204 | print(opt) 205 | 206 | with torch.no_grad(): 207 | detect() 208 | -------------------------------------------------------------------------------- /mAPEvaluate/DetectImgAndWriteResultToXml.py: -------------------------------------------------------------------------------- 1 | import os 2 | import darknet as dn 3 | import cv2 4 | import shutil 5 | from lxml import etree, objectify 6 | import os,glob 7 | import xml.etree.ElementTree as ET 8 | 9 | def mycopyfile(srcfile,dstfile): 10 | if not os.path.isfile(srcfile): 11 | print("%s not exist!"%(srcfile)) 12 | else: 13 | fpath,fname=os.path.split(dstfile) #分离文件名和路径 14 | if not os.path.exists(fpath): 15 | os.makedirs(fpath) #创建路径 16 | shutil.copyfile(srcfile,dstfile) #复制文件 17 | print("copy %s -> %s"%( srcfile,dstfile)) 18 | 19 | def mymovefile(srcfile,dstfile): 20 | if not os.path.isfile(srcfile): 21 | print("%s not exist!"%(srcfile)) 22 | else: 23 | fpath,fname=os.path.split(dstfile) #分离文件名和路径 24 | if not os.path.exists(fpath): 25 | os.makedirs(fpath) #创建路径 26 | shutil.move(srcfile,dstfile) #移动文件 27 | print("move %s -> %s"%( srcfile,dstfile)) 28 | 29 | def listdir(path, ftype): 30 | list_name = [] 31 | for f in os.listdir(path): 32 | if os.path.splitext(f)[-1] != ftype: 33 | continue 34 | file_path = os.path.join(path, f) 35 | if os.path.isdir(file_path): 36 | continue 37 | # listdir(file_path, list_name) 38 | else: 39 | list_name.append(file_path) 40 | return list_name 41 | 42 | def imagePath2labelPath(image_path): 43 | image_dir = os.path.dirname(image_path) 44 | p = image_dir.split('/') 45 | root_dir = "/".join(p[:-1]) 46 | label_dir = os.path.join(root_dir,'Annotations') 47 | image_name = os.path.basename(image_path) 48 | image_name = image_name.replace(".jpg", "") 49 | label_path = os.path.join(label_dir, image_name+'.xml') 50 | return label_path 51 | 52 | def getFileName(file_path): 53 | file_name = os.path.basename(file_path) 54 | file_name = file_name.replace('.jpg', '').replace('.png', '') 55 | # p = file_name.split('.') 56 | # name = '' 57 | # for i in range(len(p)-1): 58 | # name += p[i] 59 | # file_name = p[] 60 | return file_name 61 | 62 | def Convert(size, box): 63 | dw = 1./size[0] 64 | dh = 1./size[1] 65 | x = (box[0] + box[1])/2.0 66 | y = (box[2] + box[3])/2.0 67 | w = abs(box[1] - box[0]) 68 | h = abs(box[3] - box[2]) 69 | x = x*dw 70 | w = w*dw 71 | y = y*dh 72 | h = h*dh 73 | return (x,y,w,h) 74 | 75 | def writeXml(xmlfile, imgW, imgH, img_name, det_result): 76 | E = objectify.ElementMaker(annotate=False) 77 | anno_dataroot = E.dataroot( 78 | E.folder(''), 79 | E.filename(img_name), 80 | E.createdata(''), 81 | E.modifydata(''), 82 | E.width(imgW), 83 | E.height(imgH), 84 | E.DayNight(''), 85 | E.weather(''), 86 | E.Marker('Alg'), 87 | E.location(''), 88 | E.imageinfo(''), 89 | E.source(''), 90 | E.database('') 91 | ) 92 | 93 | E_markNode = objectify.ElementMaker(annotate=False) 94 | anno_markNode = E_markNode.markNode() 95 | 96 | for i,obj in enumerate(det_result[0]): 97 | # print('det_result: ', det_result) 98 | # print('obj: ', obj) 99 | targettype = obj[0] 100 | x = obj[2]*imgW 101 | y = obj[3]*imgH 102 | w = obj[4]*imgW 103 | h = obj[5]*imgH 104 | xmin = (int)(x - w/2) 105 | ymin = (int)(y - h/2) 106 | xmax = (int)(x + w/2) 107 | ymax = (int)(y + h/2) 108 | if xmin < 0: 109 | xmin = 0 110 | if ymin < 0: 111 | ymin = 0 112 | if xmax > imgW - 1: 113 | xmax = imgW - 1 114 | if ymax > imgH - 1: 115 | ymax = imgH - 1 116 | if xmax - xmin <= 65: 117 | print(obj[0],x,y,w,h) 118 | print('obj width less than 10') 119 | continue 120 | if ymax - ymin <= 65: 121 | print(obj[0],x,y,w,h) 122 | print('obj height less than 10') 123 | continue 124 | cartype = '' 125 | # if targettype == 'car_front': 126 | # continue 127 | if targettype == 'fr': 128 | targettype = 'car_front' 129 | 130 | if targettype == 'car' or targettype == 'car_front': 131 | cartype = 'saloon_car' 132 | 133 | E_object = objectify.ElementMaker(annotate=False) 134 | anno_object = E_object.object( 135 | E_object.index(i+1), 136 | E_object.targettype(targettype), 137 | E_object.cartype(cartype), 138 | E_object.cartypechild(), 139 | E_object.pose(), 140 | E_object.truncated(), 141 | E_object.difficult(), 142 | E_object.remark() 143 | ) 144 | 145 | E_bndbox = objectify.ElementMaker(annotate=False) 146 | anno_bndbox = E_bndbox.bndbox( 147 | E_bndbox.xmin(xmin), 148 | E_bndbox.ymin(ymin), 149 | E_bndbox.xmax(xmax), 150 | E_bndbox.ymax(ymax) 151 | ) 152 | anno_object.append(anno_bndbox) 153 | anno_markNode.append(anno_object) 154 | anno_dataroot.append(anno_markNode) 155 | 156 | etree.ElementTree(anno_dataroot).write(xmlfile, encoding='utf-8', xml_declaration=True) 157 | 158 | 159 | def batch_analysis(meta_file,cfg_file,wgt_file,meta_file_fr,cfg_file_fr,wgt_file_fr, 160 | thresh,nms,img_path,xml_path): 161 | image_list = listdir(img_path,'.jpg') 162 | image_num = len(image_list) 163 | meta = dn.load_meta(meta_file) 164 | net = dn.load_net(cfg_file,wgt_file,0) 165 | # meta_fr = dn.load_meta(meta_file_fr) 166 | # net_fr = dn.load_net(cfg_file_fr,wgt_file_fr,0) 167 | move_count = 0 168 | for j,image_path in enumerate(image_list): 169 | print(str(j)+'/'+str(image_num)+" "+image_path) 170 | image_name = getFileName(image_path) 171 | img_save_path = os.path.join(img_path,image_name+'.jpg') 172 | xml_save_path = os.path.join(xml_path,image_name+'.xml') 173 | # if os.path.exists(xml_save_path): 174 | # continue 175 | # print(img_save_path) 176 | det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh) 177 | # det_fr = dn.detect_ext(net_fr, meta_fr, bytes(image_path,'utf-8'),thresh) 178 | img = cv2.imread(image_path) 179 | if img is None: 180 | print('Can not open image') 181 | continue 182 | h,w,c = img.shape 183 | writeXml(xml_save_path,w,h,image_name,det) 184 | dn.free_net(net) 185 | 186 | def batch_analysis_c6(meta_file,cfg_file,wgt_file,thresh,nms,img_path,xml_path): 187 | image_list = listdir(img_path,'.jpg') 188 | image_num = len(image_list) 189 | meta = dn.load_meta(meta_file) 190 | net = dn.load_net(cfg_file,wgt_file,0) 191 | # meta_fr = dn.load_meta(meta_file_fr) 192 | # net_fr = dn.load_net(cfg_file_fr,wgt_file_fr,0) 193 | move_count = 0 194 | for j,image_path in enumerate(image_list): 195 | print(str(j)+'/'+str(image_num)+" "+image_path) 196 | image_name = getFileName(image_path) 197 | img_save_path = os.path.join(img_path,image_name+'.jpg') 198 | xml_save_path = os.path.join(xml_path,image_name+'.xml') 199 | # if os.path.exists(xml_save_path): 200 | # continue 201 | # print(img_save_path) 202 | det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh) 203 | # det_fr = dn.detect_ext(net_fr, meta_fr, bytes(image_path,'utf-8'),thresh) 204 | img = cv2.imread(image_path) 205 | if img is None: 206 | print('Can not open image') 207 | continue 208 | h,w,c = img.shape 209 | writeXml(xml_save_path,w,h,image_name,det) 210 | dn.free_net(net) 211 | 212 | if __name__ == "__main__": 213 | dn.set_gpu(5) 214 | # img_path = "/mnt/diskc/maqiao/data/20191104/JPEGImages/JPEGImages" 215 | 216 | # 11.25,需要夏燎安排人标注的 217 | # img_path = '/mnt/diskc/maqiao/data/20191122' 218 | # img_path = '/mnt/diskc/maqiao/data/yc20191101~20191119/train' 219 | img_path = '/mnt/diskd/Data_all/SCSN0002-7-12-15' 220 | # img_path = '/mnt/diskd/Data_all/待标注数据20200616' 221 | # img_path = '/users/duanyou/backup_c5/test_1/JPEGImages' 222 | # img_path = '/mnt/diskb/duanyou/需要标注的数据/shangfang_20200605' 223 | # img_path = '/users/duanyou/backup_c5/test_4/train' 224 | # img_path = '/users/duanyou/backup_c5/test_2/1230标注' 225 | # img_path = '/mnt/diskd/Data_all/多目标类型/需要标注的垂停20191217-大连-蒲城-盐城-长沙/train' 226 | 227 | 228 | xml_path = img_path 229 | # if not os.path.exists(xml_path): 230 | # os.mkdir(xml_path) 231 | # xml_path_fr = os.path.join(img_path,'FR_xml') 232 | # if not os.path.exists(xml_path_fr): 233 | # os.mkdir(xml_path_fr) 234 | 235 | # ## multiClass_c5 236 | # cfg_file = b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_test.cfg" 237 | # wgt_file = b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_145000.weights" 238 | # meta_file = b"/users/maqiao/mq/Data_checked/multiClass/backup_c5/multiClass.data" 239 | 240 | # # ## FR 241 | # cfg_file_fr = b"models/FR/tiny-yolo-voc-decode.cfg" 242 | # wgt_file_fr = b"models/FR/tiny_yolo_voc_FR_final.weights" 243 | # meta_file_fr = b"models/FR/FR.data" 244 | 245 | # ## hzpc 246 | # cfg_file_c6 = b"/users/duanyou/c5/hezhoupucheng/multiClass_test.cfg" 247 | # wgt_file_c6 = b"/users/duanyou/c5/hezhoupucheng/multiClass_1084000_20200526.weights" 248 | # meta_file_c6 = b"/users/duanyou/c5/hezhoupucheng/multiClass.data" 249 | 250 | # ## multiClass_c6, 直接用c6的模型跑全部结果【c6 垂停】 251 | # cfg_file_c6 = b"/users/duanyou/backup_c6/experiments/c6_chuiting/multiClass_c6_test.cfg" 252 | # wgt_file_c6 = b"/users/duanyou/backup_c6/experiments/c6_chuiting/multiClass_c6_891000_20200310_best.weights" 253 | # meta_file_c6 = b"/users/duanyou/backup_c6/experiments/c6_chuiting/multiClass_c6.data" 254 | 255 | # new model 256 | cfg_file_c6 = b"/users/duanyou/c5/v4_all_train/v4all_mish_for_yujiazai/yolov4_test.cfg" 257 | wgt_file_c6 = b"/users/duanyou/c5/v4_all_train/v4all_mish_for_yujiazai/yolov4_19000.weights" 258 | meta_file_c6 = b"/users/duanyou/c5/v4_all_train/multiClass.data" 259 | 260 | # batch_analysis(meta_file,cfg_file,wgt_file,meta_file_fr,cfg_file_fr,wgt_file_fr, 261 | # 0.25,0.45,img_path,xml_path) 262 | batch_analysis_c6(meta_file_c6,cfg_file_c6,wgt_file_c6,0.25,0.45,img_path,xml_path) 263 | -------------------------------------------------------------------------------- /mAPEvaluate/ReadAndSaveDarknetDetRes.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | # import darknet as dn 5 | 6 | def read_det_res(res_path): 7 | fr = open(res_path, 'r') 8 | if fr is None: 9 | return -1 10 | cn = 0 11 | num = 0 12 | detect_objs = [] 13 | for line in fr.readlines(): # 依次读取每行 14 | line = line.strip() # 去掉每行头尾空白 15 | if cn == 0: 16 | tmp, num = [str(i) for i in line.split("=")] 17 | # print("object num: ", int(num)) 18 | else: 19 | obj = [float(i) for i in line.split()] 20 | obj[0] = int(obj[0]) 21 | detect_objs.append(obj) 22 | # print(obj) 23 | cn += 1 24 | 25 | return detect_objs 26 | 27 | 28 | def save_det_res(det, det_save_path, cls_names): 29 | """ 30 | :param det: 31 | :param det_save_path: 32 | :param cls_names: 33 | :return: 34 | """ 35 | res = 0 36 | f = open(det_save_path, 'w') 37 | if f is None: 38 | res = -1 39 | return res 40 | 41 | f.write('class prob x y w h total=' + str(len(det)) + '\n') 42 | for d in det: 43 | if d[0] not in cls_names: 44 | res = -2 45 | continue 46 | 47 | obj_cls = cls_names.index(d[0]) 48 | f.write('%d %f %f %f %f %f\n' % (obj_cls, d[1], d[2], d[3], d[4], d[5])) 49 | # print(obj_cls,d[2],d[3],d[4],d[5]) 50 | 51 | f.close() 52 | 53 | return res 54 | 55 | 56 | if __name__ == "__main__": 57 | # detect 58 | print('done') 59 | # net = dn.load_net(b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_test.cfg", 60 | # b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_60000.weights", 0) 61 | # meta = dn.load_meta(b"/users/maqiao/mq/Data_checked/multiClass/backup_c5/multiClass.data") 62 | # r = dn.detect_ext(net, meta, b"/users/maqiao/mq/Data_checked/multiClass/multiClass0320/JPEGImages_ori/000000.jpg") 63 | # dn.free_net(net) 64 | # print(meta.classes) 65 | # for c in range(meta.classes): 66 | # print(meta.names[c]) 67 | # print(r) 68 | 69 | # # save detection result to text 70 | # cls_names = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)] 71 | # saveDetRes(r, 'result.txt', cls_names) 72 | 73 | # # read detection result 74 | # objs = readDetRes('result.txt') 75 | # print(objs) 76 | -------------------------------------------------------------------------------- /mAPEvaluate/ReadAnnotations.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import os 4 | import xml.etree.ElementTree as ET 5 | 6 | 7 | def Convert(size, box): 8 | """ 9 | :param size: 10 | :param box: 11 | :return: 12 | """ 13 | dw = 1.0 / size[0] 14 | dh = 1.0 / size[1] 15 | x = (box[0] + box[1]) / 2.0 16 | y = (box[2] + box[3]) / 2.0 17 | w = abs(box[1] - box[0]) 18 | h = abs(box[3] - box[2]) 19 | x = x * dw 20 | w = w * dw 21 | y = y * dh 22 | h = h * dh 23 | 24 | return (x, y, w, h) 25 | 26 | 27 | # 读取标注数据 28 | def load_label(label_file, object_type): 29 | fl = open(label_file) 30 | cn = 0 31 | num = 0 32 | label_objs = [] 33 | label_info = fl.read() 34 | if label_info.find('dataroot') < 0: 35 | print("Can not find dataroot") 36 | fl.close() 37 | return label_objs 38 | 39 | try: 40 | root = ET.fromstring(label_info) 41 | except(Exception, e): 42 | print("Error: cannot parse file") 43 | # n = raw_input() 44 | fl.close() 45 | return label_objs 46 | 47 | if root.find('markNode') != None: 48 | obj = root.find('markNode').find('object') 49 | if obj != None: 50 | w = int(root.find('width').text) 51 | h = int(root.find('height').text) 52 | # print("w:%d,h%d" % (w, h)) 53 | for obj in root.iter('object'): 54 | target_type = obj.find('targettype').text 55 | car_type = obj.find('cartype').text 56 | if target_type == 'car_front' or target_type == 'car_rear' or target_type == 'car_fr': 57 | target_type = 'fr' 58 | if target_type not in object_type and car_type not in object_type: 59 | # print("********************************* "+str(targettype) + "is not in class list *************************") 60 | continue 61 | 62 | # classes_c9 63 | # if targettype == "car": 64 | # cartype = obj.find('cartype').text 65 | # # print(cartype) 66 | # if cartype == 'motorcycle': 67 | # targettype = "bicycle" 68 | # elif cartype == 'truck': 69 | # targettype = "truck" 70 | # elif cartype == 'waggon': 71 | # targettype = 'waggon' 72 | # elif cartype == 'passenger_car': 73 | # targettype = 'passenger_car' 74 | # elif cartype == 'unkonwn' or cartype == "shop_truck": 75 | # targettype = "other" 76 | 77 | # classes_c5 78 | if target_type == 'car': 79 | car_type = obj.find('cartype').text 80 | if car_type == 'motorcycle': 81 | target_type = 'bicycle' 82 | if target_type == "motorcycle": 83 | target_type = "bicycle" 84 | 85 | xml_box = obj.find('bndbox') 86 | b = (float(xml_box.find('xmin').text), 87 | float(xml_box.find('xmax').text), 88 | float(xml_box.find('ymin').text), 89 | float(xml_box.find('ymax').text)) 90 | bb = Convert((w, h), b) 91 | 92 | obj = [target_type, float(bb[0]), float(bb[1]), float(bb[2]), float(bb[3])] 93 | # print(obj) 94 | label_objs.append(obj) 95 | 96 | return label_objs 97 | 98 | 99 | if __name__ == "__main__": 100 | label_file = '/mnt/diskb/maqiao/multiClass/test_c6/Annotations/1_5_1.xml' 101 | object_types = ['car', 'bicycle', 'person', 'cyclist', 'tricycle', 'fr', ] 102 | 103 | objs = load_label(label_file, object_types) 104 | print(objs) 105 | -------------------------------------------------------------------------------- /mAPEvaluate/cmp_det_label.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import xlwt 4 | 5 | 6 | def overlap(x1, w1, x2, w2): 7 | l1 = x1 - w1 / 2. 8 | l2 = x2 - w2 / 2. 9 | left = l1 if l1 > l2 else l2 10 | r1 = x1 + w1 / 2. 11 | r2 = x2 + w2 / 2. 12 | right = r1 if r1 < r2 else r2 13 | return right - left 14 | 15 | 16 | def box_intersection(box1, box2): 17 | w = overlap(box1[0], box1[2], box2[0], box2[2]) 18 | h = overlap(box1[1], box1[3], box2[1], box2[3]) 19 | if w < 0 or h < 0: 20 | return 0 21 | area = w * h 22 | return area 23 | 24 | 25 | def box_union(box1, box2): 26 | i = box_intersection(box1, box2) 27 | u = box1[2] * box1[3] + box2[2] * box2[3] - i 28 | return u 29 | 30 | 31 | def box_iou(box1, box2): 32 | return box_intersection(box1, box2) / box_union(box1, box2) 33 | 34 | 35 | def box_to_rect(box, width, height): 36 | x = box[0] 37 | y = box[1] 38 | w = box[2] 39 | h = box[3] 40 | left = (x - w / 2.) * width 41 | top = (y - h / 2.) * height 42 | right = (x + w / 2.) * width 43 | bottom = (y + h / 2.) * height 44 | return [int(left), int(top), int(right), int(bottom)] 45 | 46 | 47 | # 比较每张图片的检测结果和标记数据 48 | def CmpData(cmp_type, detect_objs, label_objs, thresh, iou_thresh, img): 49 | # img = cv2.imread("%s/%s.jpg" % (image_path,file_name)) 50 | 51 | df = [False for n in range(0, len(detect_objs))] 52 | correct = 0 53 | iou = 0 54 | label_num = 0 55 | for lobj in label_objs: 56 | if lobj[0] != cmp_type: 57 | continue 58 | label_num += 1 59 | box1 = [lobj[1], lobj[2], lobj[3], lobj[4]] 60 | rect1 = box_to_rect(box1, img.shape[1], img.shape[0]) 61 | best_iou = 0 62 | rect2 = [] 63 | best_no = -1 64 | for dno, dobj in enumerate(detect_objs): 65 | if lobj[0] != dobj[0]: 66 | continue 67 | box2 = [dobj[2], dobj[3], dobj[4], dobj[5]] 68 | biou = box_iou(box1, box2) 69 | if dobj[1] > thresh and biou > best_iou: 70 | best_no = dno 71 | best_iou = biou 72 | rect2 = box_to_rect(box2, img.shape[1], img.shape[0]) 73 | iou += best_iou 74 | # if best_iou > iou_thresh: 75 | if best_iou > iou_thresh and not df[best_no]: #### 若df[best_no]已经是true了,则证明这个检测结果没有匹配的GT,且置信度大于thresh,则算虚警 76 | correct += 1 77 | df[best_no] = True # df相当于该gt被置为已检测到,下一次若还有另一个检测结果与之重合率满足阈值,则不能认为多检测到一个目标 78 | cv2.rectangle(img, (rect1[0], rect1[1]), (rect1[2], rect1[3]), (0, 255, 0), 3) # 绿色 label 79 | cv2.rectangle(img, (rect2[0], rect2[1]), (rect2[2], rect2[3]), (255, 0, 0), 3) # 蓝色 detection 80 | txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2)) 81 | cv2.putText(img, txt, (rect2[0], rect2[1]), 0, 1, (0, 0, 255), 2) 82 | else: 83 | cv2.rectangle(img, (rect1[0], rect1[1]), (rect1[2], rect1[3]), (0, 255, 255), 3) # 黄色,未检测到的GT 84 | 85 | detect_num = 0 86 | for i, dobj in enumerate(detect_objs): 87 | if dobj[0] != cmp_type: 88 | continue 89 | if dobj[1] > thresh: 90 | detect_num += 1 91 | box2 = [dobj[2], dobj[3], dobj[4], dobj[5]] 92 | if not df[i]: # 如果df[i]=False,则表明这个检测结果没有匹配的GT,且置信度大于thresh,则算虚警,相当于R['det'][jmax] 93 | if dobj[1] > thresh: 94 | rect2 = box_to_rect(box2, img.shape[1], img.shape[0]) 95 | cv2.rectangle(img, (rect2[0], rect2[1]), (rect2[2], rect2[3]), (0, 0, 255), 3) # 红色 虚警 96 | txt = cmp_type + ':' + str(round(dobj[1], 2)) 97 | cv2.putText(img, txt, (rect2[0], rect2[1]), 0, 1, (0, 0, 255), 2) 98 | 99 | # cv2.imwrite("%s/show_result/%s_r.jpg" % (result_path,file_name),img) 100 | 101 | tp = correct 102 | fp = detect_num - tp 103 | tn = 0 104 | fn = label_num - tp 105 | avg_iou = 0 106 | recall = 0 107 | accuracy = 0 108 | precision = 0 109 | if 0 == label_num: 110 | avg_iou = 0 111 | recall = 1 112 | accuracy = 1 if detect_num == 0 else 0 113 | precision = 1 if detect_num == 0 else 0 114 | else: 115 | avg_iou = iou / label_num 116 | recall = correct / float(label_num) 117 | accuracy = correct / float(tp + fn + fp + tn) 118 | corr = (correct if correct < detect_num else detect_num) # 检测正确数大于检测结果数的情况,即同一个目标多次标记 119 | precision = 0 if detect_num == 0 else corr / float(detect_num) 120 | 121 | cmp_res = {'label_num': label_num, 'detect_num': detect_num, 'correct': correct, \ 122 | 'recall': recall, 'avg_iou': avg_iou, 'accuracy': accuracy, 'precision': precision} 123 | 124 | return cmp_res 125 | 126 | 127 | # 输出分析结果到excel文件中 128 | def ExportAnaRes(objtype, res1, total_result, image_path, result_path): 129 | f = xlwt.Workbook() # 创建工作簿 130 | sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True) 131 | row0 = [u'图片名', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision'] 132 | for i in range(0, len(row0)): 133 | sheet1.write(0, i, row0[i]) 134 | 135 | for r in range(0, len(res1)): 136 | sheet1.write(r + 1, 0, res1[r]['image_name']) 137 | sheet1.write(r + 1, 1, res1[r]['label_num']) 138 | sheet1.write(r + 1, 2, res1[r]['detect_num']) 139 | sheet1.write(r + 1, 3, res1[r]['correct']) 140 | sheet1.write(r + 1, 4, res1[r]['recall']) 141 | sheet1.write(r + 1, 5, res1[r]['avg_iou']) 142 | sheet1.write(r + 1, 6, res1[r]['accuracy']) 143 | sheet1.write(r + 1, 7, res1[r]['precision']) 144 | 145 | row_end = [u'total', total_result[0], total_result[1], total_result[2], total_result[3], \ 146 | total_result[4], total_result[5], total_result[6]] 147 | for i in range(0, len(row_end)): 148 | sheet1.write(len(res1) + 2, i, row_end[i]) 149 | 150 | save_name = "AnalyseResult_%s.xls" % (objtype) 151 | save_path = os.path.join(result_path, save_name) 152 | f.save(save_path) 153 | 154 | 155 | def ExportAnaResAll(results, result_path): 156 | f = xlwt.Workbook() # 创建工作簿 157 | sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True) 158 | row0 = [u'模型', u'目标类型', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision', u'AP'] 159 | for i in range(0, len(row0)): 160 | sheet1.write(0, i, row0[i]) 161 | for r in range(len(results)): 162 | total_result = results[r] 163 | for i in range(0, len(results[r])): 164 | sheet1.write(r + 1, i, results[r][i]) 165 | 166 | save_path = os.path.join(result_path, 'AnalyseResultAll.xls') 167 | f.save(save_path) 168 | -------------------------------------------------------------------------------- /mAPEvaluate/cmp_det_label_sf.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import os 4 | import cv2 5 | import xlwt 6 | 7 | 8 | def overlap(x1, w1, x2, w2): 9 | l1 = x1 - w1 / 2. 10 | l2 = x2 - w2 / 2. 11 | left = l1 if l1 > l2 else l2 12 | r1 = x1 + w1 / 2. 13 | r2 = x2 + w2 / 2. 14 | right = r1 if r1 < r2 else r2 15 | return right - left 16 | 17 | 18 | def box_intersection(box1, box2): 19 | w = overlap(box1[0], box1[2], box2[0], box2[2]) 20 | h = overlap(box1[1], box1[3], box2[1], box2[3]) 21 | 22 | if w < 0 or h < 0: 23 | return 0 24 | 25 | area = w * h 26 | return area 27 | 28 | 29 | def box_union(box1, box2): 30 | i = box_intersection(box1, box2) 31 | u = box1[2] * box1[3] + box2[2] * box2[3] - i 32 | return u 33 | 34 | 35 | def box_iou(box1, box2): 36 | return box_intersection(box1, box2) / box_union(box1, box2) 37 | 38 | 39 | def box_to_rect(box, width, height): 40 | x = box[0] 41 | y = box[1] 42 | w = box[2] 43 | h = box[3] 44 | left = (x - w / 2.) * width 45 | top = (y - h / 2.) * height 46 | right = (x + w / 2.) * width 47 | bottom = (y + h / 2.) * height 48 | return [int(left), int(top), int(right), int(bottom)] 49 | 50 | 51 | # 比较每张图片的检测结果和标记数据 52 | def cmp_data(cmp_type, detect_objs, label_objs, thresh, iou_thresh, img): 53 | # img = cv2.imread("%s/%s.jpg" % (image_path,file_name)) 54 | 55 | df = [False for n in range(0, len(detect_objs))] 56 | correct = 0 57 | iou = 0 58 | label_num = 0 59 | for l_obj in label_objs: 60 | if l_obj[0] != cmp_type: 61 | continue 62 | 63 | label_num += 1 64 | box_1 = [l_obj[1], l_obj[2], l_obj[3], l_obj[4]] 65 | rect_1 = box_to_rect(box_1, img.shape[1], img.shape[0]) 66 | best_iou = 0 67 | rect_2 = [] 68 | best_no = -1 69 | for d_no, d_obj in enumerate(detect_objs): 70 | if l_obj[0] != d_obj[0]: 71 | continue 72 | 73 | box_2 = [d_obj[2], d_obj[3], d_obj[4], d_obj[5]] 74 | biou = box_iou(box_1, box_2) 75 | if d_obj[1] > thresh and biou > best_iou: 76 | best_no = d_no 77 | best_iou = biou 78 | rect_2 = box_to_rect(box_2, img.shape[1], img.shape[0]) 79 | iou += best_iou 80 | 81 | # if best_iou > iou_thresh: 82 | if best_iou > iou_thresh and not df[best_no]: # 若df[best_no]已经是true了,则证明这个检测结果没有匹配的GT,且置信度大于thresh,则算虚警 83 | correct += 1 84 | df[best_no] = True # df相当于该gt被置为已检测到,下一次若还有另一个检测结果与之重合率满足阈值,则不能认为多检测到一个目标 85 | # cv2.rectangle(img,(rect1[0],rect1[1]),(rect1[2],rect1[3]),(0,255,0),3)# 绿色 label 86 | if cmp_type == 'car': 87 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 0), 3) 88 | txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2)) 89 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 0), 2) 90 | elif cmp_type == 'bicycle': 91 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 255, 0), 3) 92 | txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2)) 93 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 255, 0), 2) 94 | elif cmp_type == 'person': 95 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 255), 3) 96 | txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2)) 97 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 255), 2) 98 | elif cmp_type == 'cyclist': 99 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 0), 3) 100 | txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2)) 101 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 0), 2) 102 | elif cmp_type == 'tricycle': 103 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 0, 255), 3) 104 | txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2)) 105 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 0, 255), 2) 106 | elif cmp_type == 'fr': 107 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 255), 3) 108 | txt = 'fr' + ':' + str(round(detect_objs[best_no][1], 2)) 109 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 255), 2) 110 | # else: 111 | # cv2.rectangle(img,(rect1[0],rect1[1]),(rect1[2],rect1[3]),(0,255,255),3) # 黄色,未检测到的GT 112 | 113 | detect_num = 0 114 | for i, d_obj in enumerate(detect_objs): 115 | if d_obj[0] != cmp_type: 116 | continue 117 | 118 | if d_obj[1] > thresh: 119 | detect_num += 1 120 | 121 | box_2 = [d_obj[2], d_obj[3], d_obj[4], d_obj[5]] 122 | if not df[i]: # 如果df[i]=False,则表明这个检测结果没有匹配的GT, 且置信度大于thresh,则算虚警,相当于R['det'][jmax] 123 | if d_obj[1] > thresh: 124 | rect_2 = box_to_rect(box_2, img.shape[1], img.shape[0]) 125 | 126 | # cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255,0,0), 3) # 红色 虚警 127 | # if cmp_type == 'fr': 128 | # cmp_type1 = 'shangfan' 129 | # else: 130 | # cmp_type1 = cmp_type 131 | # txt = cmp_type1+':'+str(round(d_obj[1],2)) 132 | # cv2.putText(img,txt,(rect_2[0],rect_2[1]), 0, 1, (255,0,0),2) 133 | 134 | if cmp_type == 'car': 135 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 0), 3) 136 | txt = cmp_type + ':' + str(round(d_obj[1], 2)) 137 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 0), 2) 138 | elif cmp_type == 'bicycle': 139 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 255, 0), 3) 140 | txt = cmp_type + ':' + str(round(d_obj[1], 2)) 141 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 255, 0), 2) 142 | elif cmp_type == 'person': 143 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 255), 3) 144 | txt = cmp_type + ':' + str(round(d_obj[1], 2)) 145 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 255), 2) 146 | elif cmp_type == 'cyclist': 147 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 0), 3) 148 | txt = cmp_type + ':' + str(round(d_obj[1], 2)) 149 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 0), 2) 150 | elif cmp_type == 'tricycle': 151 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 0, 255), 3) 152 | txt = cmp_type + ':' + str(round(d_obj[1], 2)) 153 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 0, 255), 2) 154 | elif cmp_type == 'fr': 155 | cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 255), 3) 156 | txt = 'fr' + ':' + str(round(d_obj[1], 2)) 157 | cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 255), 2) 158 | 159 | # cv2.imwrite("%s/show_result/%s_r.jpg" % (result_path, file_name), img) 160 | 161 | tp = correct 162 | fp = detect_num - tp 163 | tn = 0 164 | fn = label_num - tp 165 | avg_iou = 0 166 | recall = 0 167 | accuracy = 0 168 | precision = 0 169 | if 0 == label_num: 170 | avg_iou = 0 171 | recall = 1 172 | accuracy = 1 if detect_num == 0 else 0 173 | precision = 1 if detect_num == 0 else 0 174 | else: 175 | avg_iou = iou / label_num 176 | recall = correct / float(label_num) 177 | accuracy = correct / float(tp + fn + fp + tn) 178 | corr = (correct if correct < detect_num else detect_num) # 检测正确数大于检测结果数的情况,即同一个目标多次标记 179 | precision = 0 if detect_num == 0 else corr / float(detect_num) 180 | 181 | cmp_res = {'label_num': label_num, 182 | 'detect_num': detect_num, 183 | 'correct': correct, 184 | 'recall': recall, 185 | 'avg_iou': avg_iou, 186 | 'accuracy': accuracy, 187 | 'precision': precision} 188 | 189 | return cmp_res 190 | 191 | 192 | # 输出分析结果到excel文件中 193 | def ExportAnaRes(objtype, res1, total_result, image_path, result_path): 194 | f = xlwt.Workbook() # 创建工作簿 195 | sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True) 196 | row0 = [u'图片名', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision'] 197 | for i in range(0, len(row0)): 198 | sheet1.write(0, i, row0[i]) 199 | 200 | for r in range(0, len(res1)): 201 | sheet1.write(r + 1, 0, res1[r]['image_name']) 202 | sheet1.write(r + 1, 1, res1[r]['label_num']) 203 | sheet1.write(r + 1, 2, res1[r]['detect_num']) 204 | sheet1.write(r + 1, 3, res1[r]['correct']) 205 | sheet1.write(r + 1, 4, res1[r]['recall']) 206 | sheet1.write(r + 1, 5, res1[r]['avg_iou']) 207 | sheet1.write(r + 1, 6, res1[r]['accuracy']) 208 | sheet1.write(r + 1, 7, res1[r]['precision']) 209 | 210 | row_end = [u'total', total_result[0], total_result[1], total_result[2], total_result[3], \ 211 | total_result[4], total_result[5], total_result[6]] 212 | for i in range(0, len(row_end)): 213 | sheet1.write(len(res1) + 2, i, row_end[i]) 214 | 215 | save_name = "AnalyseResult_%s.xls" % (objtype) 216 | save_path = os.path.join(result_path, save_name) 217 | f.save(save_path) 218 | 219 | 220 | def ExportAnaResAll(results, result_path): 221 | f = xlwt.Workbook() # 创建工作簿 222 | sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True) 223 | row0 = [u'模型', u'目标类型', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision', u'AP'] 224 | for i in range(0, len(row0)): 225 | sheet1.write(0, i, row0[i]) 226 | for r in range(len(results)): 227 | total_result = results[r] 228 | for i in range(0, len(results[r])): 229 | sheet1.write(r + 1, i, results[r][i]) 230 | 231 | save_path = os.path.join(result_path, 'AnalyseResultAll.xls') 232 | f.save(save_path) 233 | print('{:s} exported.'.format(save_path)) 234 | -------------------------------------------------------------------------------- /mAPEvaluate/darknet_ori_diou_cfg.py: -------------------------------------------------------------------------------- 1 | from ctypes import * 2 | import math 3 | import random 4 | import cv2 5 | import time 6 | 7 | def sample(probs): 8 | s = sum(probs) 9 | probs = [a/s for a in probs] 10 | r = random.uniform(0, 1) 11 | for i in range(len(probs)): 12 | r = r - probs[i] 13 | if r <= 0: 14 | return i 15 | return len(probs)-1 16 | 17 | def c_array(ctype, values): 18 | arr = (ctype*len(values))() 19 | arr[:] = values 20 | return arr 21 | 22 | class BOX(Structure): 23 | _fields_ = [("x", c_float), 24 | ("y", c_float), 25 | ("w", c_float), 26 | ("h", c_float)] 27 | 28 | class DETECTION(Structure): 29 | _fields_ = [("bbox", BOX), 30 | ("classes", c_int), 31 | ("prob", POINTER(c_float)), 32 | ("mask", POINTER(c_float)), 33 | ("objectness", c_float), 34 | ("sort_class", c_int)] 35 | 36 | 37 | class IMAGE(Structure): 38 | _fields_ = [("w", c_int), 39 | ("h", c_int), 40 | ("c", c_int), 41 | ("data", POINTER(c_float))] 42 | 43 | class METADATA(Structure): 44 | _fields_ = [("classes", c_int), 45 | ("names", POINTER(c_char_p))] 46 | 47 | 48 | 49 | lib = CDLL("/users/duanyou/backup_c6/v3tiny_experiments/1_v3tiny_diou/diou_darknet/libdarknet.so", RTLD_GLOBAL) 50 | 51 | lib.network_width.argtypes = [c_void_p] 52 | lib.network_width.restype = c_int 53 | lib.network_height.argtypes = [c_void_p] 54 | lib.network_height.restype = c_int 55 | 56 | predict = lib.network_predict 57 | predict.argtypes = [c_void_p, POINTER(c_float)] 58 | predict.restype = POINTER(c_float) 59 | 60 | set_gpu = lib.cuda_set_device 61 | set_gpu.argtypes = [c_int] 62 | 63 | make_image = lib.make_image 64 | make_image.argtypes = [c_int, c_int, c_int] 65 | make_image.restype = IMAGE 66 | 67 | get_network_boxes = lib.get_network_boxes 68 | get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] 69 | get_network_boxes.restype = POINTER(DETECTION) 70 | 71 | make_network_boxes = lib.make_network_boxes 72 | make_network_boxes.argtypes = [c_void_p] 73 | make_network_boxes.restype = POINTER(DETECTION) 74 | 75 | free_detections = lib.free_detections 76 | free_detections.argtypes = [POINTER(DETECTION), c_int] 77 | 78 | free_ptrs = lib.free_ptrs 79 | free_ptrs.argtypes = [POINTER(c_void_p), c_int] 80 | 81 | network_predict = lib.network_predict 82 | network_predict.argtypes = [c_void_p, POINTER(c_float)] 83 | 84 | reset_rnn = lib.reset_rnn 85 | reset_rnn.argtypes = [c_void_p] 86 | 87 | load_net = lib.load_network 88 | load_net.argtypes = [c_char_p, c_char_p, c_int] 89 | load_net.restype = c_void_p 90 | 91 | free_net = lib.free_network 92 | free_net.argtypes = [c_void_p] 93 | 94 | do_nms_obj = lib.do_nms_obj 95 | do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] 96 | 97 | do_nms_sort = lib.do_nms_sort 98 | do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] 99 | 100 | free_image = lib.free_image 101 | free_image.argtypes = [IMAGE] 102 | 103 | letterbox_image = lib.letterbox_image 104 | letterbox_image.argtypes = [IMAGE, c_int, c_int] 105 | letterbox_image.restype = IMAGE 106 | 107 | load_meta = lib.get_metadata 108 | lib.get_metadata.argtypes = [c_char_p] 109 | lib.get_metadata.restype = METADATA 110 | 111 | load_image = lib.load_image_color 112 | load_image.argtypes = [c_char_p, c_int, c_int] 113 | load_image.restype = IMAGE 114 | 115 | rgbgr_image = lib.rgbgr_image 116 | rgbgr_image.argtypes = [IMAGE] 117 | 118 | predict_image = lib.network_predict_image 119 | predict_image.argtypes = [c_void_p, IMAGE] 120 | predict_image.restype = POINTER(c_float) 121 | 122 | predict_image_cls = lib.network_predict_image 123 | predict_image_cls.argtypes = [c_void_p, IMAGE] 124 | predict_image_cls.restype = POINTER(c_float) 125 | 126 | def classify(net, meta, im): 127 | out = predict_image_cls(net, im) 128 | res = [] 129 | for i in range(meta.classes): 130 | res.append((meta.names[i].decode('utf-8').strip(), out[i])) 131 | res = sorted(res, key=lambda x: -x[1]) 132 | return res 133 | 134 | def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): 135 | im = load_image(image, 0, 0) 136 | num = c_int(0) 137 | pnum = pointer(num) 138 | predict_image(net, im) 139 | dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) 140 | num = pnum[0] 141 | if (nms): do_nms_obj(dets, num, meta.classes, nms) 142 | 143 | res = [] 144 | for j in range(num): 145 | for i in range(meta.classes): 146 | if dets[j].prob[i] > 0: 147 | b = dets[j].bbox 148 | res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) 149 | res = sorted(res, key=lambda x: -x[1]) 150 | free_image(im) 151 | free_detections(dets, num) 152 | return res 153 | 154 | def detect_ext(net, meta, image, thresh=.2, hier_thresh=.5, nms=.45): 155 | im = load_image(image, 0, 0) 156 | num = c_int(0) 157 | pnum = pointer(num) 158 | starttime = time.time() 159 | predict_image(net, im) 160 | endtime = time.time() 161 | print('xxxxxxxxxxxxxxxxxxxx ', endtime - starttime) 162 | dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum) 163 | num = pnum[0] 164 | # if (nms): do_nms_obj(dets, num, meta.classes, nms) 165 | if (nms): do_nms_sort(dets, num, meta.classes, nms) 166 | 167 | res = [] 168 | for j in range(num): 169 | for i in range(meta.classes): 170 | if dets[j].prob[i] > 0: 171 | b = dets[j].bbox 172 | b.x /= im.w 173 | b.y /= im.h 174 | b.w /= im.w 175 | b.h /= im.h 176 | res.append([meta.names[i].decode('utf-8').strip(), dets[j].prob[i], b.x, b.y, b.w, b.h]) 177 | res = sorted(res, key=lambda x: -x[1]) 178 | free_image(im) 179 | free_detections(dets, num) 180 | return res,endtime - starttime 181 | 182 | if __name__ == "__main__": 183 | #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0) 184 | #im = load_image("data/wolf.jpg", 0, 0) 185 | #meta = load_meta("cfg/imagenet1k.data") 186 | #r = classify(net, meta, im) 187 | #print r[:10] 188 | # for i in range(10): 189 | net = load_net(b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_test.cfg", 190 | b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_60000.weights", 0) 191 | meta = load_meta(b"/users/maqiao/mq/Data_checked/multiClass/backup_c5/multiClass.data") 192 | r = detect_ext(net, meta, b"/users/maqiao/mq/Data_checked/multiClass/multiClass0320/JPEGImages_ori/000000.jpg") 193 | free_net(net) 194 | print(meta.classes) 195 | for c in range(meta.classes): 196 | print(meta.names[c]) 197 | print(r) 198 | 199 | # import cv2 200 | # img = cv2.imread("/mnt/diskc/xiaofan/darknet_2019/data/car_test.jpg") 201 | # for detect in r: 202 | # cv2.rectangle(img, (int(detect[2][0])-32, int(detect[2][1])-32), 203 | # (int(detect[2][0])+int(detect[2][2]), int(detect[2][1])+int(detect[2][3])), 204 | # (0, 0, 255), 3) 205 | # cv2.imwrite('result.jpg', img) 206 | 207 | 208 | 209 | # # classify 210 | # meta = 211 | # net = load_net() 212 | -------------------------------------------------------------------------------- /mAPEvaluate/findImgByObjectType.py: -------------------------------------------------------------------------------- 1 | import os 2 | import darknet as dn 3 | import cv2 4 | import shutil 5 | import numpy as np 6 | 7 | def mycopyfile(srcfile,dstfile): 8 | if not os.path.isfile(srcfile): 9 | print("%s not exist!"%(srcfile)) 10 | else: 11 | fpath,fname=os.path.split(dstfile) #分离文件名和路径 12 | if not os.path.exists(fpath): 13 | os.makedirs(fpath) #创建路径 14 | shutil.copyfile(srcfile,dstfile) #复制文件 15 | print("copy %s -> %s"%( srcfile,dstfile)) 16 | 17 | def mymovefile(srcfile,dstfile): 18 | if not os.path.isfile(srcfile): 19 | print("%s not exist!"%(srcfile)) 20 | else: 21 | fpath,fname=os.path.split(dstfile) #分离文件名和路径 22 | if not os.path.exists(fpath): 23 | os.makedirs(fpath) #创建路径 24 | shutil.move(srcfile,dstfile) #移动文件 25 | print("move %s -> %s"%( srcfile,dstfile)) 26 | 27 | def listdir(path): 28 | list_name = [] 29 | for file in os.listdir(path): 30 | file_path = os.path.join(path, file) 31 | if os.path.isdir(file_path): 32 | list_name += listdir(file_path) 33 | else: 34 | list_name.append(file_path) 35 | return list_name 36 | 37 | def imagePath2labelPath(image_path): 38 | image_dir = os.path.dirname(image_path) 39 | p = image_dir.split('/') 40 | root_dir = "/".join(p[:-1]) 41 | label_dir = os.path.join(root_dir,'Annotations') 42 | image_name = os.path.basename(image_path) 43 | image_name = image_name.replace(".jpg", "") 44 | label_path = os.path.join(label_dir, image_name+'.xml') 45 | return label_path 46 | 47 | def getFileName(file_path): 48 | file_name = os.path.basename(file_path) 49 | p = file_name.split('.') 50 | name = '' 51 | for i in range(len(p)-1): 52 | name += p[i] 53 | # file_name = p[] 54 | return name 55 | 56 | def Convert(size, box): 57 | dw = 1./size[0] 58 | dh = 1./size[1] 59 | x = (box[0] + box[1])/2.0 60 | y = (box[2] + box[3])/2.0 61 | w = abs(box[1] - box[0]) 62 | h = abs(box[3] - box[2]) 63 | x = x*dw 64 | w = w*dw 65 | y = y*dh 66 | h = h*dh 67 | return (x,y,w,h) 68 | 69 | # 计算前后帧之间的多个检测框间的iou 70 | def batch_iou(boxes1, boxes2, width, height): 71 | img1 = np.zeros((height,width), dtype=np.int) 72 | for b in boxes1: 73 | x1 = int(b[0]*width) 74 | x2 = x1+int(b[2]*width) 75 | y1 = int(b[1]*height) 76 | y2 = y1+int(b[3]*height) 77 | img1[y1:y2,x1:x2] = 1 78 | img2 = np.zeros((height,width), dtype=np.int) 79 | for b in boxes2: 80 | x1 = int(b[0]*width) 81 | x2 = x1+int(b[2]*width) 82 | y1 = int(b[1]*height) 83 | y2 = y1+int(b[3]*height) 84 | img2[y1:y2,x1:x2] = 1 85 | img = img1 + img2 86 | union = np.where(img>0) 87 | inter = np.where(img>1) 88 | iou = float(len(inter[0]))/len(union[0]) 89 | return iou 90 | 91 | def batch_analysis(meta_file,cfg_file,wgt_file,thresh,nms,src_path,dst_path): 92 | image_list = listdir(src_path) 93 | image_list.sort() 94 | image_num = len(image_list) 95 | meta = dn.load_meta(meta_file) 96 | object_type = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)] 97 | net = dn.load_net(cfg_file,wgt_file,0) 98 | move_count = 0 99 | boxes_last = [] 100 | for j,image_path in enumerate(image_list): 101 | print(str(j)+'/'+str(image_num)+" moved: "+str(move_count)) 102 | # print(image_path) 103 | try: 104 | img = cv2.imread(image_path) 105 | except: 106 | print('can not read image******************************************') 107 | continue 108 | h,w = img.shape[:2] 109 | image_name = getFileName(image_path) 110 | # print(image_name) 111 | image_name = image_name.replace('(','1_') 112 | image_name = image_name.replace(')','_1') 113 | img_save_path = os.path.join(dst_path,image_name+'.jpg') 114 | # print(img_save_path) 115 | det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh) 116 | boxes = [] 117 | is_move_file = False 118 | if j%10 == 0: 119 | is_move_file = True 120 | for d in det: 121 | # try: 122 | # img = cv2.imread(image_path) 123 | # except: 124 | # print('can not read image******************************************') 125 | # continue 126 | # h,w = img.shape[:2] 127 | boxes.append(d[2:]) 128 | print('qqqqq,', d) 129 | bw = d[4]*w 130 | bh = d[5]*h 131 | if bw < 20 or bh < 20: 132 | print("bw or bh is less than 20") 133 | continue 134 | obj_type = d[0] 135 | if obj_type == 'tricycle': 136 | print("tricycle ************************************************") 137 | is_move_file = True 138 | break 139 | elif obj_type == 'car': 140 | if bw*bh/(w*h) > 0.25: 141 | print("big car ....................................................") 142 | is_move_file = True 143 | break 144 | if boxes_last != [] and boxes != []: 145 | iou = batch_iou(boxes_last,boxes,w,h) 146 | # print('iou: '+str(iou)) 147 | if iou > 0.6: 148 | print('batch iou: '+str(iou)) 149 | is_move_file = False 150 | # continue 151 | if is_move_file: 152 | move_count += 1 153 | if not os.path.exists(img_save_path): 154 | mymovefile(image_path,img_save_path) 155 | boxes_last = boxes 156 | dn.free_net(net) 157 | 158 | 159 | if __name__ == "__main__": 160 | #dn.set_gpu(0) 161 | src_path = "/mnt/diskc/zhoukai/puer0605/" # 原始的图片目录 162 | dst_path = "/mnt/diskc/zhoukai/puer0605/puer_jingjian" # 过滤后的图片目录 163 | cfg_file = b"/users/duanyou/c5/v4_all_train/yolov4_test.cfg" 164 | wgt_file = b"/users/duanyou/c5/v4_all_train/yolov4_5000.weights" 165 | meta_file = b"/users/duanyou/c5/v4_all_train/multiClass.data" 166 | if not os.path.exists(dst_path): 167 | os.mkdir(dst_path) 168 | batch_analysis(meta_file,cfg_file,wgt_file,0.2,0.45,src_path,dst_path) 169 | -------------------------------------------------------------------------------- /mAPEvaluate/findImgByObjectType_zhou.py: -------------------------------------------------------------------------------- 1 | import os 2 | import darknet as dn 3 | import cv2 4 | import shutil 5 | import numpy as np 6 | 7 | def mycopyfile(srcfile,dstfile): 8 | if not os.path.isfile(srcfile): 9 | print("%s not exist!"%(srcfile)) 10 | else: 11 | fpath,fname=os.path.split(dstfile) #分离文件名和路径 12 | if not os.path.exists(fpath): 13 | os.makedirs(fpath) #创建路径 14 | shutil.copyfile(srcfile,dstfile) #复制文件 15 | print("copy %s -> %s"%( srcfile,dstfile)) 16 | 17 | def mymovefile(srcfile,dstfile): 18 | if not os.path.isfile(srcfile): 19 | print("%s not exist!"%(srcfile)) 20 | else: 21 | fpath,fname=os.path.split(dstfile) #分离文件名和路径 22 | if not os.path.exists(fpath): 23 | os.makedirs(fpath) #创建路径 24 | shutil.move(srcfile,dstfile) #移动文件 25 | print("move %s -> %s"%( srcfile,dstfile)) 26 | 27 | def listdir(path): 28 | list_name = [] 29 | for file in os.listdir(path): 30 | file_path = os.path.join(path, file) 31 | if os.path.isdir(file_path): 32 | list_name += listdir(file_path) 33 | else: 34 | list_name.append(file_path) 35 | return list_name 36 | 37 | def imagePath2labelPath(image_path): 38 | image_dir = os.path.dirname(image_path) 39 | p = image_dir.split('/') 40 | root_dir = "/".join(p[:-1]) 41 | label_dir = os.path.join(root_dir,'Annotations') 42 | image_name = os.path.basename(image_path) 43 | image_name = image_name.replace(".jpg", "") 44 | label_path = os.path.join(label_dir, image_name+'.xml') 45 | return label_path 46 | 47 | def getFileName(file_path): 48 | file_name = os.path.basename(file_path) 49 | p = file_name.split('.') 50 | name = '' 51 | for i in range(len(p)-1): 52 | name += p[i] 53 | # file_name = p[] 54 | return name 55 | 56 | def Convert(size, box): 57 | dw = 1./size[0] 58 | dh = 1./size[1] 59 | x = (box[0] + box[1])/2.0 60 | y = (box[2] + box[3])/2.0 61 | w = abs(box[1] - box[0]) 62 | h = abs(box[3] - box[2]) 63 | x = x*dw 64 | w = w*dw 65 | y = y*dh 66 | h = h*dh 67 | return (x,y,w,h) 68 | 69 | # 计算前后帧之间的多个检测框间的iou 70 | def batch_iou(boxes1, boxes2, width, height): 71 | img1 = np.zeros((height,width), dtype=np.int) 72 | for b in boxes1: 73 | x1 = int(b[0]*width) 74 | x2 = x1+int(b[2]*width) 75 | y1 = int(b[1]*height) 76 | y2 = y1+int(b[3]*height) 77 | img1[y1:y2,x1:x2] = 1 78 | img2 = np.zeros((height,width), dtype=np.int) 79 | for b in boxes2: 80 | x1 = int(b[0]*width) 81 | x2 = x1+int(b[2]*width) 82 | y1 = int(b[1]*height) 83 | y2 = y1+int(b[3]*height) 84 | img2[y1:y2,x1:x2] = 1 85 | img = img1 + img2 86 | union = np.where(img>0) 87 | inter = np.where(img>1) 88 | iou = float(len(inter[0]))/len(union[0]) 89 | return iou 90 | 91 | def batch_analysis(meta_file,cfg_file,wgt_file,thresh,nms,src_path,dst_path): 92 | 93 | image_list = listdir(src_path) 94 | image_list.sort() 95 | image_num = len(image_list) 96 | meta = dn.load_meta(meta_file) 97 | object_type = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)] 98 | net = dn.load_net(cfg_file,wgt_file,0) 99 | move_count = 0 100 | boxes_last = [] 101 | 102 | for j,image_path in enumerate(image_list): 103 | 104 | print(str(j)+'/'+str(image_num)+" moved: "+str(move_count)) 105 | # print(image_path) 106 | 107 | try: 108 | img = cv2.imread(image_path) 109 | except: 110 | print('can not read image******************************************') 111 | continue 112 | h,w = img.shape[:2] 113 | image_name = getFileName(image_path) 114 | print("image_name", image_name) 115 | image_name = image_name.replace('(','1_') 116 | image_name = image_name.replace(')','_1') 117 | img_save_path = os.path.join(dst_path,image_name+'.jpg') 118 | # print(img_save_path) 119 | det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh) 120 | boxes = [] 121 | is_move_file = False 122 | 123 | if j%20 == 0: #20数值越大 比对iou的间隔越大 124 | is_move_file = True 125 | 126 | for d in det: 127 | # try: 128 | # img = cv2.imread(image_path) 129 | # except: 130 | # print('can not read image******************************************') 131 | # continue 132 | # h,w = img.shape[:2] 133 | print("d",d) 134 | boxes.append(d[2:]) 135 | bw = d[4]*w 136 | bh = d[5]*h 137 | # if bw < 20 or bh < 20: 138 | # print("bw or bh is less than 20") 139 | # continue 140 | # obj_type = d[0] 141 | # if obj_type == 'tricycle': 142 | # print("tricycle ************************************************") 143 | # is_move_file = True 144 | # break 145 | # elif obj_type == 'car': 146 | # if bw*bh/(w*h) > 0.25: 147 | # print("big car ....................................................") 148 | # is_move_file = True 149 | # break 150 | if boxes_last != [] and boxes != []: 151 | iou = batch_iou(boxes_last,boxes,w,h) 152 | # print('iou: '+str(iou)) 153 | if iou > 0.6: 154 | print('batch iou: '+str(iou)) 155 | is_move_file = False 156 | print("iou^^^^^^^^^^^^^^^^^^^^^^^^^") 157 | # continue 158 | if is_move_file: 159 | move_count += 1 160 | if not os.path.exists(img_save_path): 161 | mymovefile(image_path,img_save_path) 162 | boxes_last = boxes 163 | dn.free_net(net) 164 | 165 | 166 | if __name__ == "__main__": 167 | # dn.set_gpu(3) 168 | src_path = "/mnt/diskc/zhoukai/puer0605/" # 原始的图片目录 169 | dst_path = "/mnt/diskc/zhoukai/puer0605/puer_jingjian" # 过滤后的图片目录 170 | cfg_file = b"/users/duanyou/c5/v4_all_train/yolov4_test.cfg" 171 | wgt_file = b"/users/duanyou/c5/v4_all_train/yolov4_5000.weights" 172 | meta_file = b"/users/duanyou/c5/v4_all_train/multiClass.data" 173 | if not os.path.exists(dst_path): 174 | os.mkdir(dst_path) 175 | batch_analysis(meta_file,cfg_file,wgt_file,0.2,0.45,src_path,dst_path) 176 | -------------------------------------------------------------------------------- /mAPEvaluate/model_analysis_v4all.py: -------------------------------------------------------------------------------- 1 | import os 2 | import darknet as dn 3 | 4 | import cv2 5 | import time 6 | import numpy 7 | import copy 8 | 9 | import cmp_det_label as cdl 10 | from readAndSaveDarknetDetRes import readDetRes,saveDetRes 11 | from readAnnotations import LoadLabel 12 | from voc_eval import voc_eval 13 | 14 | #读取文件列表 15 | def LoadFileList(files): 16 | fl = open(files,"r") 17 | file_lists = [] 18 | while True: 19 | lines = fl.readlines() 20 | if len(lines) == 0: 21 | break 22 | #print(path_list) 23 | 24 | for line in lines: 25 | line = line.strip('\n') 26 | # ph = line.split("/") 27 | # file_name = ph[-1] 28 | # file_name = os.path.basename(line) 29 | # file_name = file_name.replace(".jpg", "") 30 | file_lists.append(line) 31 | #print(file_name) 32 | #print(path_lists) 33 | fl.close() 34 | return file_lists 35 | 36 | def listdir(path): 37 | list_name = [] 38 | for file in os.listdir(path): 39 | file_path = os.path.join(path, file) 40 | if os.path.isdir(file_path): 41 | continue 42 | # listdir(file_path, list_name) 43 | else: 44 | list_name.append(file_path) 45 | return list_name 46 | 47 | def imagePath2labelPath(image_path): 48 | image_dir = os.path.dirname(image_path) 49 | p = image_dir.split('/') 50 | root_dir = "/".join(p[:-1]) 51 | label_dir = os.path.join(root_dir,'Annotations') 52 | image_name = os.path.basename(image_path) 53 | image_name = image_name.replace(".jpg", "") 54 | label_path = os.path.join(label_dir, image_name+'.xml') 55 | return label_path 56 | 57 | def getFileName(file_path): 58 | file_name = os.path.basename(file_path) 59 | p = file_name.split('.') 60 | name = '' 61 | for i in range(len(p)-1): 62 | name += p[i] 63 | # file_name = p[] 64 | return name 65 | 66 | def getMetaCfgName(file_path): 67 | # 寻找file_path的同文件夹里的.data文件 68 | p = os.path.dirname(file_path) 69 | for file in os.listdir(p): 70 | if '.data' in file: 71 | data_path = file 72 | data_path = p + '/' + data_path 73 | if 'test.cfg' in file: 74 | cfg_path = file 75 | cfg_path = p + '/' + cfg_path 76 | 77 | return data_path.encode('utf-8'), cfg_path.encode('utf-8') 78 | 79 | def batch_detection(): 80 | pass 81 | 82 | def batch_analysis(weights_list_file, image_list_file, thresh, iou_thresh,result_dir): 83 | image_list = LoadFileList(image_list_file) 84 | image_num = len(image_list) 85 | weights_list = LoadFileList(weights_list_file) 86 | result = [] 87 | for weights in weights_list: 88 | weights_name = getFileName(weights) 89 | 90 | # print('weights_name: ',weights) 91 | 92 | meta_file,cfg_file = getMetaCfgName(weights) 93 | # meta = dn.load_meta(meta_file) 94 | # net = dn.load_net(cfg_file,bytes(weights,'utf-8'),0) 95 | 96 | # 选择对应的dn 97 | meta = dn.load_meta(meta_file) 98 | net = dn.load_net(cfg_file,bytes(weights,'utf-8'),0) 99 | 100 | object_type = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)] 101 | 102 | result_path = os.path.join(result_dir,weights_name) 103 | if not os.path.exists(result_path): 104 | os.mkdir(result_path) 105 | 106 | # detect result and save to text 107 | timeall = 0 108 | for j,image_path in enumerate(image_list): 109 | print('detect: '+str(j+1)+'/'+str(len(image_list))) 110 | label_path = imagePath2labelPath(image_path) 111 | image_name = getFileName(image_path) 112 | det_save_path = os.path.join(result_path,image_name+'.txt') 113 | # det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh) 114 | 115 | # 选择对应的dn 116 | det,time1 = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh) 117 | timeall = timeall + time1; 118 | 119 | # save detection result to text 120 | saveDetRes(det,det_save_path,object_type) 121 | time.sleep(0.001) 122 | print('xxxxxxxxxxx', 'FPS, ',len(image_list)/timeall) 123 | # dn.free_net(net) 124 | 125 | # campare label and detection result 126 | for i,objtype in enumerate(object_type): 127 | 128 | # if objtype != 'fr': 129 | # continue 130 | total_label = 0 131 | total_detect = 0 132 | total_corr = 0 133 | total_iou = 0 134 | cmp_result = [] 135 | det_ = [] 136 | annopath = [] 137 | 138 | detall = [['name','obj_type', 'score',0,0,0,0]] # 此处为xywh(中心),应该变为xmin,ymin,xmax,ymax 139 | 140 | imagesetfile = [] 141 | for j,image_path in enumerate(image_list): 142 | label_path = imagePath2labelPath(image_path) 143 | image_name = getFileName(image_path) 144 | imagesetfile.append(image_name) 145 | img_save_path = os.path.join(result_path,image_name+'.jpg') 146 | det_save_path = os.path.join(result_path,image_name+'.txt') 147 | 148 | # detpath.append(det_save_path) 149 | annopath.append(label_path) 150 | # print(img_save_path) 151 | label = [] 152 | if os.path.exists(label_path): 153 | label = LoadLabel(label_path,object_type) 154 | 155 | # save detection result to text 156 | det = readDetRes(det_save_path) 157 | for d in det: 158 | if d[0] > len(object_type)-1: 159 | d[0] = ' ' 160 | continue 161 | d[0] = object_type[d[0]] 162 | 163 | for d in det: 164 | xmin = float(copy.deepcopy(d[2])) - float(copy.deepcopy(d[4]))/2.0 165 | ymin = float(copy.deepcopy(d[3])) - float(copy.deepcopy(d[5]))/2.0 166 | xmax = float(copy.deepcopy(d[2])) + float(copy.deepcopy(d[4]))/2.0 167 | ymax = float(copy.deepcopy(d[3])) + float(copy.deepcopy(d[5]))/2.0 168 | # 该文件格式:imagename1 type confidence xmin ymin xmax ymax 169 | d_ = [image_name, d[0], d[1], xmin, ymin, xmax, ymax] 170 | det_.append(d_) 171 | 172 | if len(det_) != 0: 173 | detall = numpy.vstack((detall, det_)) 174 | det_=[] 175 | 176 | if i > 0: 177 | image_path = img_save_path 178 | # print(j,image_path) 179 | img = cv2.imread(image_path) 180 | if img is None: 181 | print("load image error&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&") 182 | continue 183 | 184 | cmp_res = cdl.cmp_data(objtype, det, label, thresh, iou_thresh, img) 185 | 186 | cmp_res.update_tracking({'image_name':image_name}) 187 | total_corr += cmp_res['correct'] 188 | total_iou += cmp_res['avg_iou']*cmp_res['label_num'] 189 | 190 | cmp_result.append(cmp_res) 191 | print("%s: %d/%d label: %d detect: %d correct: %d recall: %f avg_iou: %f accuracy: %f precision: %f\n" % \ 192 | (str(objtype),j+1,image_num,cmp_res['label_num'],cmp_res['detect_num'],\ 193 | cmp_res['correct'],cmp_res['recall'],cmp_res['avg_iou'],\ 194 | cmp_res['accuracy'],cmp_res['precision'])) 195 | total_label += cmp_res['label_num'] 196 | total_detect += cmp_res['detect_num'] 197 | cv2.imwrite(img_save_path,img) 198 | img = [] 199 | time.sleep(0.001) 200 | 201 | # 求出AP值 202 | # ap=0 203 | detall = numpy.delete(detall, 0, axis = 0) 204 | det_objtype = [obj for obj in detall if obj[1] == objtype] 205 | if len(det_objtype) == 0: 206 | ap = 0 207 | else: 208 | ap = voc_eval(det_objtype, annopath, imagesetfile, objtype, iou_thresh) 209 | detall=[] 210 | 211 | #数据集分析结果 212 | avg_recall = 0 213 | if total_label > 0: 214 | avg_recall = total_corr/float(total_label) 215 | avg_iou = 0 216 | if total_iou > 0: 217 | avg_iou = total_iou/total_label 218 | avg_acc = 0 219 | if total_label+total_detect-total_corr > 0: 220 | avg_acc = float(total_corr)/(total_label+total_detect-total_corr) 221 | avg_precision = 0 222 | if total_detect > 0: 223 | avg_precision = float(total_corr)/total_detect 224 | total_result = [total_label,total_detect,total_corr,avg_recall,avg_iou,avg_acc,avg_precision] 225 | cdl.ExportAnaRes(objtype,cmp_result,total_result,image_path,result_path) 226 | print("total_label: %d total_detect: %d total_corr: %d recall: %f average iou: %f accuracy: %f precision: %f ap: %f\n" % \ 227 | (total_result[0],total_result[1],total_result[2],total_result[3],total_result[4],total_result[5],total_result[6],ap)) 228 | 229 | result.append([weights_name]+[objtype]+total_result+[float(ap)]) 230 | cdl.ExportAnaResAll(result, result_dir) 231 | time.sleep(0.001) 232 | 233 | if __name__ == "__main__": 234 | 235 | dn.set_gpu(4) 236 | weights_list_file = "/users/duanyou/c5/v4_all_train/weights.txt" 237 | 238 | # # all_test 239 | data_path = "/users/duanyou/c5/all_pretrain" 240 | image_list_file = os.path.join(data_path,"test.txt") 241 | result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_all/") 242 | if not os.path.exists(result_dir): 243 | os.mkdir(result_dir) 244 | batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir) 245 | 246 | # # changsha_test 247 | data_path = "/users/duanyou/c5/changsha" 248 | image_list_file = os.path.join(data_path,"test.txt") 249 | result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_changsha/") 250 | if not os.path.exists(result_dir): 251 | os.mkdir(result_dir) 252 | batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir) 253 | 254 | # # hezhoupucheng_test 255 | data_path = "/users/duanyou/c5/hezhoupucheng" 256 | image_list_file = os.path.join(data_path,"test.txt") 257 | result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_hezhoupucheng/") 258 | if not os.path.exists(result_dir): 259 | os.mkdir(result_dir) 260 | batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir) 261 | 262 | # # puer_test 263 | data_path = "/users/duanyou/c5/puer" 264 | image_list_file = os.path.join(data_path,"test.txt") 265 | result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_puer/") 266 | if not os.path.exists(result_dir): 267 | os.mkdir(result_dir) 268 | batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir) 269 | 270 | # yancheng_test 271 | # data_path = "/users/duanyou/c5/yancheng" 272 | # image_list_file = os.path.join(data_path,"test.txt") 273 | # result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_yancheng/") 274 | # if not os.path.exists(result_dir): 275 | # os.mkdir(result_dir) 276 | # batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir) 277 | -------------------------------------------------------------------------------- /mAPEvaluate/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | 7 | import xml.etree.ElementTree as ET 8 | import os 9 | import numpy as np 10 | 11 | 12 | def convert(size, box): # box=xmin,ymin,xmax,ymax 13 | dw = 1. / size[0] 14 | dh = 1. / size[1] 15 | xmin = box[0] * dw 16 | ymin = box[1] * dh 17 | xmax = box[2] * dw 18 | ymax = box[3] * dh 19 | return (xmin, ymin, xmax, ymax) 20 | 21 | 22 | def parse_rec(filename): # 读取标注的xml文件 23 | """ Parse a PASCAL VOC xml file """ 24 | in_file = open(filename) 25 | xml_info = in_file.read() 26 | try: 27 | root = ET.fromstring(xml_info) 28 | except(Exception, e): 29 | print("Error: cannot parse file") 30 | objects = [] 31 | if root.find('markNode') != None: 32 | obj = root.find('markNode').find('object') 33 | if obj != None: 34 | w = int(root.find('width').text) 35 | h = int(root.find('height').text) 36 | for obj in root.iter('object'): 37 | if 'non_interest' in str(obj.find('targettype').text): 38 | continue 39 | obj_struct = {} 40 | if obj.find('targettype').text == 'car_rear' or obj.find('targettype').text == 'car_front': 41 | obj_struct['name'] = 'fr' 42 | else: 43 | obj_struct['name'] = obj.find('targettype').text 44 | obj_struct['pose'] = 0 # obj.find('pose').text 45 | obj_struct['truncated'] = 0 # int(obj.find('truncated').text) 46 | obj_struct['difficult'] = 0 # int(obj.find('difficult').text) 47 | # bbox = obj.find('bndbox') 48 | b = [float(obj.find('bndbox').find('xmin').text), 49 | float(obj.find('bndbox').find('ymin').text), 50 | float(obj.find('bndbox').find('xmax').text), 51 | float(obj.find('bndbox').find('ymax').text)] 52 | bb = convert((w, h), b) 53 | if bb is None: 54 | continue 55 | obj_struct['bbox'] = [bb[0], bb[1], bb[2], bb[3]] 56 | objects.append(obj_struct) 57 | return objects 58 | 59 | 60 | def voc_ap(rec, prec): 61 | # 采用更为精确的逐点积分方法 62 | # correct AP calculation 63 | # first append sentinel values at the end 64 | mrec = np.concatenate(([0.], rec, [1.])) 65 | mpre = np.concatenate(([0.], prec, [0.])) 66 | 67 | # compute the precision envelope 68 | for i in range(mpre.size - 1, 0, -1): 69 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 70 | 71 | # to calculate area under PR curve, look for points 72 | # where X axis (recall) changes value 73 | i = np.where(mrec[1:] != mrec[:-1])[0] 74 | 75 | # and sum (\Delta recall) * prec 76 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 77 | return ap 78 | 79 | 80 | def voc_eval(detpath, 81 | annopath, 82 | imagesetfile, 83 | classname, 84 | ovthresh=0.5): 85 | """ 86 | :param detpath: 87 | :param annopath: 88 | :param imagesetfile: 89 | :param classname: 90 | :param ovthresh: 91 | :return: 92 | """ 93 | # 主函数,计算当前类别的recall和precision 94 | # #detpath检测结果txt文件,路径VOCdevkit/results/VOC20xx/Main/_det_test_aeroplane.txt。 95 | # 该文件格式:imagename1 type confidence xmin ymin xmax ymax (图像1的第一个结果) 96 | # imagename1 type confidence xmin ymin xmax ymax (图像1的第二个结果) 97 | # imagename1 type confidence xmin ymin xmax ymax (图像2的第一个结果) 98 | # ...... 99 | # 每个结果占一行,检测到多少个BBox就有多少行,这里假设有20000个检测结果 100 | 101 | # detpath: Path to detections 102 | # detpath.format(classname) should produce the detection results file. 103 | # annopath: Path to annotations 104 | # annopath.format(imagename) should be the xml annotations file. #xml 标注文件。 105 | # imagesetfile: Text file containing the list of images, one image per line. #数据集划分txt文件,路径VOCdevkit/VOC20xx/ImageSets/Main/test.txt这里假设测试图像1000张,那么该txt文件1000行。 106 | # classname: Category name (duh) #种类的名字,即类别,假设类别2(一类目标+背景)。 107 | # cachedir: Directory for caching the annotations #缓存标注的目录路径VOCdevkit/annotation_cache,图像数据只读文件,为了避免每次都要重新读数据集原始数据。 108 | # [ovthresh]: Overlap threshold (default = 0.5) #重叠的多少大小。 109 | # [use_07_metric]: Whether to use VOC07's 11 point AP computation 110 | # (default False) #是否使用VOC07的AP计算方法,voc07是11个点采样。 111 | 112 | # assumes detections are in detpath.format(classname) 113 | # assumes annotations are in annopath.format(imagename) 114 | # assumes imagesetfile is a text file with each line an image name 115 | # cachedir caches the annotations in a pickle file 116 | 117 | imagenames = [x.strip() for x in imagesetfile] 118 | 119 | # parse_rec函数读取当前图像标注文件,返回当前图像标注,存于recs字典(key是图像名,values是gt) 120 | recs = {} 121 | for i, imagename in enumerate(imagenames): 122 | # recs[imagename] = parse_rec(annopath.format(imagename)) 123 | recs[imagename] = parse_rec(annopath[i]) 124 | 125 | # extract gt objects for this class #按类别获取标注文件,recall和precision都是针对不同类别而言的,AP也是对各个类别分别算的。 126 | class_recs = {} # 当前类别的标注 127 | npos = 0 # npos标记的目标数量 128 | for imagename in imagenames: 129 | R = [obj for obj in recs[imagename] if obj['name'] == classname] # 过滤,只保留recs中指定类别的项,存为R。 130 | bbox = np.array([x['bbox'] for x in R]) # 抽取bbox 131 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) # 如果数据集没有difficult,所有项都是0. 132 | 133 | det = [False] * len(R) # len(R)就是当前类别的gt目标个数,det表示是否检测到,初始化为false。 134 | npos = npos + sum(~difficult) # 自增,非difficult样本数量,如果数据集没有difficult,npos数量就是gt数量。 135 | class_recs[imagename] = {'bbox': bbox, 136 | 'difficult': difficult, 137 | 'det': det} 138 | 139 | # read dets 读取检测结果 140 | splitlines = detpath # 该文件格式:imagename1 type confidence xmin ymin xmax ymax 141 | # splitlines = [x.strip().split(' ') for x in detpath] # 假设检测结果有20000个,则splitlines长度20000 142 | image_ids = [x[0] for x in splitlines] # 检测结果中的图像名,image_ids长度20000,但实际图像只有1000张,因为一张图像上可以有多个目标检测结果 143 | confidence = np.array([float(x[2]) for x in splitlines]) # 检测结果置信度 144 | BB = np.array([[float(z) for z in x[3:]] for x in splitlines]) # 变为浮点型的bbox。 145 | 146 | npos = len(image_ids) 147 | 148 | # sort by confidence 将20000各检测结果按置信度排序 149 | sorted_ind = np.argsort(-confidence) # 对confidence的index根据值大小进行降序排列。 150 | sorted_scores = np.sort(-confidence) # 降序排列。 151 | BB = BB[sorted_ind, :] # 重排bbox,由大概率到小概率。 152 | image_ids = [image_ids[x] for x in sorted_ind] 153 | 154 | # go down dets and mark TPs and FPs 155 | nd = len(image_ids) # 注意这里是20000,不是1000 156 | tp = np.zeros(nd) # true positive,长度20000 157 | fp = np.zeros(nd) # false positive,长度20000 158 | for d in range(nd): # 遍历所有检测结果,因为已经排序,所以这里是从置信度最高到最低遍历 159 | R = class_recs[image_ids[d]] # 当前检测结果所在图像的所有同类别gt 160 | bb = BB[d, :].astype(float) # 当前检测结果bbox坐标 161 | ovmax = -np.inf 162 | BBGT = R['bbox'].astype(float) # 当前检测结果所在图像的所有同类别gt的bbox坐标 163 | 164 | if BBGT.size > 0: 165 | # compute overlaps 计算当前检测结果,与该检测结果所在图像的标注重合率,一对多用到python的broadcast机制 166 | # intersection 167 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 168 | iymin = np.maximum(BBGT[:, 1], bb[1]) 169 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 170 | iymax = np.minimum(BBGT[:, 3], bb[3]) 171 | iw = np.maximum(ixmax - ixmin + 1., 0.) 172 | ih = np.maximum(iymax - iymin + 1., 0.) 173 | inters = iw * ih 174 | 175 | # union 176 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 177 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 178 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 179 | 180 | overlaps = inters / uni 181 | ovmax = np.max(overlaps) # 最大重合率 182 | jmax = np.argmax(overlaps) # 最大重合率对应的gt,返回最大索引数 183 | # print('overlaps',overlaps,'ovmax',ovmax,'jmax ',jmax) 184 | 185 | if ovmax > ovthresh: # 如果当前检测结果与真实标注最大重合率满足阈值 186 | # if not R['difficult'][jmax]: 187 | if not R['det'][jmax]: 188 | tp[d] = 1. # 正检数目+1 189 | R['det'][jmax] = True # 该gt被置为已检测到,下一次若还有另一个检测结果与之重合率满足阈值,则不能认为多检测到一个目标 190 | else: # 相反,认为检测到一个虚警 191 | fp[d] = 1. 192 | else: # 不满足阈值,肯定是虚警 193 | fp[d] = 1. 194 | 195 | # compute precision recall 196 | fp = np.cumsum(fp) # 积分图,在当前节点前的虚警数量,fp长度 197 | tp = np.cumsum(tp) # 积分图,在当前节点前的正检数量 198 | rec = tp / float(npos) # 召回率,长度20000,从0到1 199 | # avoid divide by zero in case the first detection matches a difficult 200 | # ground truth 准确率,长度20000,长度20000,从1到0 201 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 202 | ap = voc_ap(rec, prec) 203 | 204 | return ap 205 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy == 1.17 2 | opencv-python >= 4.1 3 | torch >= 1.5 4 | torchvision 5 | matplotlib 6 | pycocotools 7 | tqdm 8 | pillow 9 | tensorboard >= 1.14 10 | 11 | # Nvidia Apex (optional) for mixed precision training -------------------------- 12 | # git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user 13 | -------------------------------------------------------------------------------- /test5_track.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/test5_track.gif -------------------------------------------------------------------------------- /tracker/basetrack.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import numpy as np 4 | from collections import defaultdict 5 | from collections import OrderedDict 6 | 7 | 8 | class TrackState(object): 9 | New = 0 10 | Tracked = 1 11 | Lost = 2 12 | Removed = 3 13 | 14 | 15 | # TODO: Create a multi-object class BaseTrack class 16 | class MCBaseTrack(object): 17 | _count_dict = defaultdict(int) # the MCBaseTrack class owns this dict 18 | 19 | track_id = 0 20 | is_activated = False 21 | state = TrackState.New 22 | 23 | history = OrderedDict() 24 | features = [] 25 | curr_feature = None 26 | score = 0 27 | start_frame = 0 28 | frame_id = 0 29 | time_since_update = 0 30 | 31 | # multi-camera 32 | location = (np.inf, np.inf) 33 | 34 | @property 35 | def end_frame(self): 36 | return self.frame_id 37 | 38 | # @even: reset track id 39 | @staticmethod 40 | def init_count(num_classes): 41 | """ 42 | Initiate _count for all object classes 43 | :param num_classes: 44 | """ 45 | for cls_id in range(num_classes): 46 | MCBaseTrack._count_dict[cls_id] = 0 47 | 48 | @staticmethod 49 | def next_id(cls_id): 50 | MCBaseTrack._count_dict[cls_id] += 1 51 | return MCBaseTrack._count_dict[cls_id] 52 | 53 | @staticmethod 54 | def reset_track_count(cls_id): 55 | MCBaseTrack._count_dict[cls_id] = 0 56 | 57 | def activate(self, *args): 58 | raise NotImplementedError 59 | 60 | def predict(self): 61 | raise NotImplementedError 62 | 63 | def update(self, *args, **kwargs): 64 | raise NotImplementedError 65 | 66 | def mark_lost(self): 67 | self.state = TrackState.Lost 68 | 69 | def mark_removed(self): 70 | self.state = TrackState.Removed 71 | 72 | 73 | class BaseTrack(object): 74 | _count = 0 75 | 76 | track_id = 0 77 | is_activated = False 78 | state = TrackState.New 79 | 80 | history = OrderedDict() 81 | features = [] 82 | curr_feature = None 83 | score = 0 84 | start_frame = 0 85 | frame_id = 0 86 | time_since_update = 0 87 | 88 | # multi-camera 89 | location = (np.inf, np.inf) 90 | 91 | @property 92 | def end_frame(self): 93 | return self.frame_id 94 | 95 | @staticmethod 96 | def next_id(): 97 | BaseTrack._count += 1 98 | return BaseTrack._count 99 | 100 | # @even: reset track id 101 | @staticmethod 102 | def reset_track_count(): 103 | BaseTrack._count = 0 104 | 105 | def activate(self, *args): 106 | raise NotImplementedError 107 | 108 | def predict(self): 109 | raise NotImplementedError 110 | 111 | def update(self, *args, **kwargs): 112 | raise NotImplementedError 113 | 114 | def mark_lost(self): 115 | self.state = TrackState.Lost 116 | 117 | def mark_removed(self): 118 | self.state = TrackState.Removed 119 | -------------------------------------------------------------------------------- /tracker/matching.py: -------------------------------------------------------------------------------- 1 | import lap 2 | import numpy as np 3 | import scipy 4 | from cython_bbox import bbox_overlaps as bbox_ious 5 | from scipy.spatial.distance import cdist 6 | from tracking_utils import kalman_filter 7 | 8 | 9 | def merge_matches(m1, m2, shape): 10 | O, P, Q = shape 11 | m1 = np.asarray(m1) 12 | m2 = np.asarray(m2) 13 | 14 | M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P)) 15 | M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q)) 16 | 17 | mask = M1 * M2 18 | match = mask.nonzero() 19 | match = list(zip(match[0], match[1])) 20 | unmatched_O = tuple(set(range(O)) - set([i for i, j in match])) 21 | unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match])) 22 | 23 | return match, unmatched_O, unmatched_Q 24 | 25 | 26 | def _indices_to_matches(cost_matrix, indices, thresh): 27 | matched_cost = cost_matrix[tuple(zip(*indices))] 28 | matched_mask = (matched_cost <= thresh) 29 | 30 | matches = indices[matched_mask] 31 | unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0])) 32 | unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1])) 33 | 34 | return matches, unmatched_a, unmatched_b 35 | 36 | 37 | def linear_assignment(cost_matrix, thresh): 38 | """ 39 | :param cost_matrix: 40 | :param thresh: 41 | :return: 42 | """ 43 | if cost_matrix.size == 0: 44 | return np.empty((0, 2), dtype=int), \ 45 | tuple(range(cost_matrix.shape[0])), \ 46 | tuple(range(cost_matrix.shape[1])) 47 | 48 | matches, unmatched_a, unmatched_b = [], [], [] 49 | cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh) 50 | 51 | for ix, mx in enumerate(x): 52 | if mx >= 0: 53 | matches.append([ix, mx]) 54 | 55 | unmatched_a = np.where(x < 0)[0] 56 | unmatched_b = np.where(y < 0)[0] 57 | matches = np.asarray(matches) 58 | 59 | return matches, unmatched_a, unmatched_b 60 | 61 | 62 | def ious(atlbrs, btlbrs): 63 | """ 64 | Compute cost based on IoU 65 | :type atlbrs: list[tlbr] | np.ndarray 66 | :type atlbrs: list[tlbr] | np.ndarray 67 | 68 | :rtype ious np.ndarray 69 | """ 70 | ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) 71 | if ious.size == 0: 72 | return ious 73 | 74 | ious = bbox_ious( 75 | np.ascontiguousarray(atlbrs, dtype=np.float), 76 | np.ascontiguousarray(btlbrs, dtype=np.float) 77 | ) 78 | 79 | return ious 80 | 81 | 82 | def iou_distance(atracks, btracks): 83 | """ 84 | Compute cost based on IoU 85 | :type atracks: list[STrack] 86 | :type btracks: list[STrack] 87 | 88 | :rtype cost_matrix np.ndarray 89 | """ 90 | 91 | if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or ( 92 | len(btracks) > 0 and isinstance(btracks[0], np.ndarray)): 93 | atlbrs = atracks 94 | btlbrs = btracks 95 | else: 96 | atlbrs = [track.tlbr for track in atracks] 97 | btlbrs = [track.tlbr for track in btracks] 98 | 99 | _ious = ious(atlbrs, btlbrs) 100 | cost_matrix = 1 - _ious 101 | 102 | return cost_matrix 103 | 104 | 105 | # TODO: using GIOU, DIOU, CIOU... to replace IOU 106 | 107 | def embedding_distance(tracks, detections, metric='cosine'): 108 | """ 109 | :param tracks: list[STrack] 110 | :param detections: list[BaseTrack] 111 | :param metric: 112 | :return: cost_matrix np.ndarray 113 | """ 114 | cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float) 115 | if cost_matrix.size == 0: 116 | return cost_matrix 117 | 118 | det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float) 119 | # for i, track in enumerate(tracks): 120 | # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric)) 121 | track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float) 122 | 123 | # default: cosine distance 124 | # Nomalized features 125 | cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric)) 126 | 127 | return cost_matrix 128 | 129 | 130 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False): 131 | """ 132 | :param kf: 133 | :param cost_matrix: 134 | :param tracks: 135 | :param detections: 136 | :param only_position: 137 | :return: 138 | """ 139 | if cost_matrix.size == 0: 140 | return cost_matrix 141 | 142 | gating_dim = 2 if only_position else 4 143 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 144 | measurements = np.asarray([det.to_xyah() for det in detections]) 145 | 146 | for row, track in enumerate(tracks): 147 | gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position) 148 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 149 | 150 | return cost_matrix 151 | 152 | 153 | def fuse_motion(kf, 154 | cost_matrix, 155 | tracks, 156 | detections, 157 | only_position=False, 158 | lambda_=0.98): 159 | """ 160 | :param kf: 161 | :param cost_matrix: 162 | :param tracks: 163 | :param detections: 164 | :param only_position: 165 | :param lambda_: 166 | :return: 167 | """ 168 | if cost_matrix.size == 0: 169 | return cost_matrix 170 | 171 | gating_dim = 2 if only_position else 4 172 | gating_threshold = kalman_filter.chi2inv95[gating_dim] 173 | measurements = np.asarray([det.to_xyah() for det in detections]) 174 | 175 | for row, track in enumerate(tracks): 176 | gating_distance = kf.gating_distance(track.mean, 177 | track.covariance, 178 | measurements, 179 | only_position, 180 | metric='maha') 181 | cost_matrix[row, gating_distance > gating_threshold] = np.inf 182 | cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance 183 | 184 | return cost_matrix 185 | -------------------------------------------------------------------------------- /tracking_utils/evaluation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import copy 4 | import motmetrics as mm 5 | mm.lap.default_solver = 'lap' 6 | 7 | from tracking_utils.io import read_results, unzip_objs 8 | 9 | 10 | class Evaluator(object): 11 | 12 | def __init__(self, data_root, seq_name, data_type): 13 | self.data_root = data_root 14 | self.seq_name = seq_name 15 | self.data_type = data_type 16 | 17 | self.load_annotations() 18 | self.reset_accumulator() 19 | 20 | def load_annotations(self): 21 | assert self.data_type == 'mot' 22 | 23 | gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt') 24 | self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True) 25 | self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True) 26 | 27 | def reset_accumulator(self): 28 | self.acc = mm.MOTAccumulator(auto_id=True) 29 | 30 | def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False): 31 | # results 32 | trk_tlwhs = np.copy(trk_tlwhs) 33 | trk_ids = np.copy(trk_ids) 34 | 35 | # gts 36 | gt_objs = self.gt_frame_dict.get(frame_id, []) 37 | gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2] 38 | 39 | # ignore boxes 40 | ignore_objs = self.gt_ignore_frame_dict.get(frame_id, []) 41 | ignore_tlwhs = unzip_objs(ignore_objs)[0] 42 | 43 | # remove ignored results 44 | keep = np.ones(len(trk_tlwhs), dtype=bool) 45 | iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5) 46 | if len(iou_distance) > 0: 47 | match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 48 | match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 49 | match_ious = iou_distance[match_is, match_js] 50 | 51 | match_js = np.asarray(match_js, dtype=int) 52 | match_js = match_js[np.logical_not(np.isnan(match_ious))] 53 | keep[match_js] = False 54 | trk_tlwhs = trk_tlwhs[keep] 55 | trk_ids = trk_ids[keep] 56 | #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance) 57 | #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js]) 58 | #match_ious = iou_distance[match_is, match_js] 59 | 60 | #match_js = np.asarray(match_js, dtype=int) 61 | #match_js = match_js[np.logical_not(np.isnan(match_ious))] 62 | #keep[match_js] = False 63 | #trk_tlwhs = trk_tlwhs[keep] 64 | #trk_ids = trk_ids[keep] 65 | 66 | # get distance matrix 67 | iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5) 68 | 69 | # acc 70 | self.acc.update(gt_ids, trk_ids, iou_distance) 71 | 72 | if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'): 73 | events = self.acc.last_mot_events # only supported by https://github.com/longcw/py-motmetrics 74 | else: 75 | events = None 76 | return events 77 | 78 | def eval_file(self, filename): 79 | self.reset_accumulator() 80 | 81 | result_frame_dict = read_results(filename, self.data_type, is_gt=False) 82 | frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys()))) 83 | for frame_id in frames: 84 | trk_objs = result_frame_dict.get(frame_id, []) 85 | trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2] 86 | self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False) 87 | 88 | return self.acc 89 | 90 | @staticmethod 91 | def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')): 92 | names = copy.deepcopy(names) 93 | if metrics is None: 94 | metrics = mm.metrics.motchallenge_metrics 95 | metrics = copy.deepcopy(metrics) 96 | 97 | mh = mm.metrics.create() 98 | summary = mh.compute_many( 99 | accs, 100 | metrics=metrics, 101 | names=names, 102 | generate_overall=True 103 | ) 104 | 105 | return summary 106 | 107 | @staticmethod 108 | def save_summary(summary, filename): 109 | import pandas as pd 110 | writer = pd.ExcelWriter(filename) 111 | summary.to_excel(writer) 112 | writer.save() 113 | -------------------------------------------------------------------------------- /tracking_utils/io.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Dict 3 | import numpy as np 4 | 5 | from tracking_utils.log import logger 6 | 7 | 8 | def write_results(filename, results_dict: Dict, data_type: str): 9 | if not filename: 10 | return 11 | path = os.path.dirname(filename) 12 | if not os.path.exists(path): 13 | os.makedirs(path) 14 | 15 | if data_type in ('mot', 'mcmot', 'lab'): 16 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n' 17 | elif data_type == 'kitti': 18 | save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n' 19 | else: 20 | raise ValueError(data_type) 21 | 22 | with open(filename, 'w') as f: 23 | for frame_id, frame_data in results_dict.items(): 24 | if data_type == 'kitti': 25 | frame_id -= 1 26 | for tlwh, track_id in frame_data: 27 | if track_id < 0: 28 | continue 29 | 30 | x1, y1, w, h = tlwh 31 | x2, y2 = x1 + w, y1 + h 32 | line = save_format.format(frame=frame_id, id=track_id, 33 | x1=x1, y1=y1, w=w, h=h, 34 | score=1.0) 35 | f.write(line) 36 | logger.info('Save results to {}'.format(filename)) 37 | 38 | 39 | def write_results_dict(results_f_path, results_dict, data_type, num_classes=5): 40 | """ 41 | :param results_f_path: 42 | :param results_dict: 43 | :param data_type: 44 | :param num_classes: 45 | :return: 46 | """ 47 | if data_type == 'mot': 48 | save_format = '{frame},{id},{x1},{y1},{w},{h},1,{cls_id},1\n' 49 | elif data_type == 'kitti': 50 | save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n' 51 | else: 52 | raise ValueError(data_type) 53 | 54 | with open(results_f_path, 'w') as f: 55 | for cls_id in range(num_classes): # process each object class 56 | cls_results = results_dict[cls_id] 57 | for fr_id, tlwhs, track_ids in cls_results: # fr_id starts from 1 58 | if data_type == 'kitti': 59 | fr_id -= 1 60 | 61 | for tlwh, track_id in zip(tlwhs, track_ids): 62 | if track_id < 0: 63 | continue 64 | 65 | x1, y1, w, h = tlwh 66 | # x2, y2 = x1 + w, y1 + h 67 | line = save_format.format(frame=fr_id, 68 | id=track_id, 69 | x1=x1, y1=y1, w=w, h=h, 70 | cls_id=cls_id) 71 | # if fr_id == 1: 72 | # print(line) 73 | 74 | f.write(line) 75 | # f.flush() 76 | 77 | logger.info('Save results to {}.\n'.format(results_f_path)) 78 | 79 | 80 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False): 81 | if data_type in ('mot', 'lab'): 82 | read_fun = read_mot_results 83 | else: 84 | raise ValueError('Unknown data type: {}'.format(data_type)) 85 | 86 | return read_fun(filename, is_gt, is_ignore) 87 | 88 | 89 | """ 90 | labels={'ped', ... % 1 91 | 'person_on_vhcl', ... % 2 92 | 'car', ... % 3 93 | 'bicycle', ... % 4 94 | 'mbike', ... % 5 95 | 'non_mot_vhcl', ... % 6 96 | 'static_person', ... % 7 97 | 'distractor', ... % 8 98 | 'occluder', ... % 9 99 | 'occluder_on_grnd', ... %10 100 | 'occluder_full', ... % 11 101 | 'reflection', ... % 12 102 | 'crowd' ... % 13 103 | }; 104 | """ 105 | 106 | 107 | def read_mot_results(filename, is_gt, is_ignore): 108 | valid_labels = {1} 109 | ignore_labels = {2, 7, 8, 12} 110 | results_dict = dict() 111 | if os.path.isfile(filename): 112 | with open(filename, 'r') as f: 113 | for line in f.readlines(): 114 | linelist = line.split(',') 115 | if len(linelist) < 7: 116 | continue 117 | fid = int(linelist[0]) 118 | if fid < 1: 119 | continue 120 | results_dict.setdefault(fid, list()) 121 | 122 | if is_gt: 123 | if 'MOT16-' in filename or 'MOT17-' in filename: 124 | label = int(float(linelist[7])) 125 | mark = int(float(linelist[6])) 126 | if mark == 0 or label not in valid_labels: 127 | continue 128 | score = 1 129 | elif is_ignore: 130 | if 'MOT16-' in filename or 'MOT17-' in filename: 131 | label = int(float(linelist[7])) 132 | vis_ratio = float(linelist[8]) 133 | if label not in ignore_labels and vis_ratio >= 0: 134 | continue 135 | else: 136 | continue 137 | score = 1 138 | else: 139 | score = float(linelist[6]) 140 | 141 | tlwh = tuple(map(float, linelist[2:6])) 142 | target_id = int(linelist[1]) 143 | 144 | results_dict[fid].append((tlwh, target_id, score)) 145 | 146 | return results_dict 147 | 148 | 149 | def unzip_objs(objs): 150 | if len(objs) > 0: 151 | tlwhs, ids, scores = zip(*objs) 152 | else: 153 | tlwhs, ids, scores = [], [], [] 154 | tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4) 155 | 156 | return tlwhs, ids, scores 157 | -------------------------------------------------------------------------------- /tracking_utils/kalman_filter.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | import scipy.linalg 4 | 5 | """ 6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of 7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv 8 | function and used as Mahalanobis gating threshold. 9 | """ 10 | chi2inv95 = { 11 | 1: 3.8415, 12 | 2: 5.9915, 13 | 3: 7.8147, 14 | 4: 9.4877, # 4: 9.4877 15 | 5: 11.070, 16 | 6: 12.592, 17 | 7: 14.067, 18 | 8: 15.507, 19 | 9: 16.919} 20 | 21 | 22 | class KalmanFilter(object): 23 | """ 24 | A simple Kalman filter for tracking bounding boxes in image space. 25 | 26 | The 8-dimensional state space 27 | 28 | x, y, a, h, vx, vy, va, vh 29 | 30 | contains the bounding box center position (x, y), aspect ratio a, height h, 31 | and their respective velocities. 32 | 33 | Object motion follows a constant velocity model. The bounding box location 34 | (x, y, a, h) is taken as direct observation of the state space (linear 35 | observation model). 36 | 37 | """ 38 | 39 | def __init__(self): 40 | ndim, dt = 4, 1. 41 | 42 | # Create Kalman filter model matrices. 43 | self._motion_mat = np.eye(2 * ndim, 2 * ndim) 44 | for i in range(ndim): 45 | self._motion_mat[i, ndim + i] = dt 46 | self._update_mat = np.eye(ndim, 2 * ndim) 47 | 48 | # Motion and observation uncertainty are chosen relative to the current 49 | # state estimate. These weights control the amount of uncertainty in 50 | # the model. This is a bit hacky. 51 | self._std_weight_position = 1. / 20 52 | self._std_weight_velocity = 1. / 160 53 | 54 | def initiate(self, measurement): 55 | """Create track from unassociated measurement. 56 | 57 | Parameters 58 | ---------- 59 | measurement : ndarray 60 | Bounding box coordinates (x, y, a, h) with center position (x, y), 61 | aspect ratio a, and height h. 62 | 63 | Returns 64 | ------- 65 | (ndarray, ndarray) 66 | Returns the mean vector (8 dimensional) and covariance matrix (8x8 67 | dimensional) of the new track. Unobserved velocities are initialized 68 | to 0 mean. 69 | 70 | """ 71 | mean_pos = measurement 72 | mean_vel = np.zeros_like(mean_pos) 73 | mean = np.r_[mean_pos, mean_vel] 74 | 75 | std = [ 76 | 2 * self._std_weight_position * measurement[3], 77 | 2 * self._std_weight_position * measurement[3], 78 | 1e-2, 79 | 2 * self._std_weight_position * measurement[3], 80 | 10 * self._std_weight_velocity * measurement[3], 81 | 10 * self._std_weight_velocity * measurement[3], 82 | 1e-5, 83 | 10 * self._std_weight_velocity * measurement[3]] 84 | covariance = np.diag(np.square(std)) 85 | return mean, covariance 86 | 87 | def predict(self, mean, covariance): 88 | """Run Kalman filter prediction step. 89 | 90 | Parameters 91 | ---------- 92 | mean : ndarray 93 | The 8 dimensional mean vector of the object state at the previous 94 | time step. 95 | covariance : ndarray 96 | The 8x8 dimensional covariance matrix of the object state at the 97 | previous time step. 98 | 99 | Returns 100 | ------- 101 | (ndarray, ndarray) 102 | Returns the mean vector and covariance matrix of the predicted 103 | state. Unobserved velocities are initialized to 0 mean. 104 | 105 | """ 106 | std_pos = [ 107 | self._std_weight_position * mean[3], 108 | self._std_weight_position * mean[3], 109 | 1e-2, 110 | self._std_weight_position * mean[3]] 111 | std_vel = [ 112 | self._std_weight_velocity * mean[3], 113 | self._std_weight_velocity * mean[3], 114 | 1e-5, 115 | self._std_weight_velocity * mean[3]] 116 | motion_cov = np.diag(np.square(np.r_[std_pos, std_vel])) 117 | 118 | # mean = np.dot(self._motion_mat, mean) 119 | mean = np.dot(mean, self._motion_mat.T) 120 | covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov 121 | 122 | return mean, covariance 123 | 124 | def project(self, mean, covariance): 125 | """Project state distribution to measurement space. 126 | 127 | Parameters 128 | ---------- 129 | mean : ndarray 130 | The state's mean vector (8 dimensional array). 131 | covariance : ndarray 132 | The state's covariance matrix (8x8 dimensional). 133 | 134 | Returns 135 | ------- 136 | (ndarray, ndarray) 137 | Returns the projected mean and covariance matrix of the given state 138 | estimate. 139 | 140 | """ 141 | std = [ 142 | self._std_weight_position * mean[3], 143 | self._std_weight_position * mean[3], 144 | 1e-1, 145 | self._std_weight_position * mean[3]] 146 | innovation_cov = np.diag(np.square(std)) 147 | 148 | mean = np.dot(self._update_mat, mean) 149 | covariance = np.linalg.multi_dot(( 150 | self._update_mat, covariance, self._update_mat.T)) 151 | return mean, covariance + innovation_cov 152 | 153 | def multi_predict(self, mean, covariance): 154 | """Run Kalman filter prediction step (Vectorized version). 155 | Parameters 156 | ---------- 157 | mean : ndarray 158 | The Nx8 dimensional mean matrix of the object states at the previous 159 | time step. 160 | covariance : ndarray 161 | The Nx8x8 dimensional covariance matrics of the object states at the 162 | previous time step. 163 | Returns 164 | ------- 165 | (ndarray, ndarray) 166 | Returns the mean vector and covariance matrix of the predicted 167 | state. Unobserved velocities are initialized to 0 mean. 168 | """ 169 | std_pos = [ 170 | self._std_weight_position * mean[:, 3], 171 | self._std_weight_position * mean[:, 3], 172 | 1e-2 * np.ones_like(mean[:, 3]), 173 | self._std_weight_position * mean[:, 3]] 174 | std_vel = [ 175 | self._std_weight_velocity * mean[:, 3], 176 | self._std_weight_velocity * mean[:, 3], 177 | 1e-5 * np.ones_like(mean[:, 3]), 178 | self._std_weight_velocity * mean[:, 3]] 179 | sqr = np.square(np.r_[std_pos, std_vel]).T 180 | 181 | motion_cov = [] 182 | for i in range(len(mean)): 183 | motion_cov.append(np.diag(sqr[i])) 184 | motion_cov = np.asarray(motion_cov) 185 | 186 | mean = np.dot(mean, self._motion_mat.T) 187 | left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2)) 188 | covariance = np.dot(left, self._motion_mat.T) + motion_cov 189 | 190 | return mean, covariance 191 | 192 | def update(self, mean, covariance, measurement): 193 | """Run Kalman filter correction step. 194 | 195 | Parameters 196 | ---------- 197 | mean : ndarray 198 | The predicted state's mean vector (8 dimensional). 199 | covariance : ndarray 200 | The state's covariance matrix (8x8 dimensional). 201 | measurement : ndarray 202 | The 4 dimensional measurement vector (x, y, a, h), where (x, y) 203 | is the center position, a the aspect ratio, and h the height of the 204 | bounding box. 205 | 206 | Returns 207 | ------- 208 | (ndarray, ndarray) 209 | Returns the measurement-corrected state distribution. 210 | 211 | """ 212 | projected_mean, projected_cov = self.project(mean, covariance) 213 | 214 | chol_factor, lower = scipy.linalg.cho_factor( 215 | projected_cov, lower=True, check_finite=False) 216 | kalman_gain = scipy.linalg.cho_solve( 217 | (chol_factor, lower), np.dot(covariance, self._update_mat.T).T, 218 | check_finite=False).T 219 | innovation = measurement - projected_mean 220 | 221 | new_mean = mean + np.dot(innovation, kalman_gain.T) 222 | new_covariance = covariance - np.linalg.multi_dot(( 223 | kalman_gain, projected_cov, kalman_gain.T)) 224 | return new_mean, new_covariance 225 | 226 | def gating_distance(self, 227 | mean, 228 | covariance, 229 | measurements, 230 | only_position=False, 231 | metric='maha'): 232 | """Compute gating distance between state distribution and measurements. 233 | A suitable distance threshold can be obtained from `chi2inv95`. If 234 | `only_position` is False, the chi-square distribution has 4 degrees of 235 | freedom, otherwise 2. 236 | Parameters 237 | ---------- 238 | :param mean : ndarray 239 | Mean vector over the state distribution (8 dimensional). 240 | :param covariance : ndarray 241 | Covariance of the state distribution (8x8 dimensional). 242 | :param measurements : ndarray 243 | An Nx4 dimensional matrix of N measurements, each in 244 | format (x, y, a, h) where (x, y) is the bounding box center 245 | position, a the aspect ratio, and h the height. 246 | :param only_position : Optional[bool] 247 | If True, distance computation is done with respect to the bounding 248 | box center position only. 249 | :param metric 250 | :return: 251 | ------- 252 | ndarray 253 | Returns an array of length N, where the i-th element contains the 254 | squared Mahalanobis distance between (mean, covariance) and 255 | `measurements[i]`. 256 | """ 257 | mean, covariance = self.project(mean, covariance) 258 | if only_position: 259 | mean, covariance = mean[:2], covariance[:2, :2] 260 | measurements = measurements[:, :2] 261 | 262 | d = measurements - mean 263 | if metric == 'gaussian': 264 | return np.sum(d * d, axis=1) 265 | elif metric == 'maha': 266 | cholesky_factor = np.linalg.cholesky(covariance) 267 | z = scipy.linalg.solve_triangular(cholesky_factor, 268 | d.T, 269 | lower=True, 270 | check_finite=False, 271 | overwrite_b=True) 272 | squared_maha = np.sum(z * z, axis=0) 273 | return squared_maha 274 | else: 275 | raise ValueError('invalid distance metric') 276 | -------------------------------------------------------------------------------- /tracking_utils/log.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | def get_logger(name='root'): 5 | formatter = logging.Formatter( 6 | # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s') 7 | fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S') 8 | 9 | handler = logging.StreamHandler() 10 | handler.setFormatter(formatter) 11 | 12 | logger = logging.getLogger(name) 13 | logger.setLevel(logging.DEBUG) 14 | logger.addHandler(handler) 15 | 16 | return logger 17 | 18 | 19 | logger = get_logger('root') 20 | -------------------------------------------------------------------------------- /tracking_utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from tracking_utils import _C 4 | 5 | nms = _C.nms 6 | # nms.__doc__ = """ 7 | # This function performs Non-maximum suppresion""" 8 | -------------------------------------------------------------------------------- /tracking_utils/parse_config.py: -------------------------------------------------------------------------------- 1 | def parse_model_cfg(path): 2 | """Parses the yolo-v3 layer configuration file and returns module definitions""" 3 | file = open(path, 'r') 4 | lines = file.read().split('\n') 5 | lines = [x for x in lines if x and not x.startswith('#')] 6 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 7 | module_defs = [] 8 | for line in lines: 9 | if line.startswith('['): # This marks the start of a new block 10 | module_defs.append({}) 11 | module_defs[-1]['type'] = line[1:-1].rstrip() 12 | if module_defs[-1]['type'] == 'convolutional': 13 | module_defs[-1]['batch_normalize'] = 0 14 | else: 15 | key, value = line.split("=") 16 | value = value.strip() 17 | module_defs[-1][key.rstrip()] = value.strip() 18 | 19 | return module_defs 20 | 21 | 22 | def parse_data_cfg(path): 23 | """Parses the data configuration file""" 24 | options = dict() 25 | options['gpus'] = '0' 26 | options['num_workers'] = '10' 27 | with open(path, 'r') as fp: 28 | lines = fp.readlines() 29 | for line in lines: 30 | line = line.strip() 31 | if line == '' or line.startswith('#'): 32 | continue 33 | key, value = line.split('=') 34 | options[key.strip()] = value.strip() 35 | return options 36 | -------------------------------------------------------------------------------- /tracking_utils/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | 11 | class Timer(object): 12 | """A simple timer.""" 13 | def __init__(self): 14 | self.total_time = 0. 15 | self.calls = 0 16 | self.start_time = 0. 17 | self.diff = 0. 18 | self.average_time = 0. 19 | 20 | self.duration = 0. 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.diff = time.time() - self.start_time 29 | self.total_time += self.diff 30 | self.calls += 1 31 | self.average_time = self.total_time / self.calls 32 | if average: 33 | self.duration = self.average_time 34 | else: 35 | self.duration = self.diff 36 | return self.duration 37 | 38 | def clear(self): 39 | self.total_time = 0. 40 | self.calls = 0 41 | self.start_time = 0. 42 | self.diff = 0. 43 | self.average_time = 0. 44 | self.duration = 0. 45 | 46 | -------------------------------------------------------------------------------- /tracking_utils/visualization.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import numpy as np 4 | import cv2 5 | 6 | # cls_color_dict = { 7 | # 'car': [180, 105, 255], # hot pink 8 | # 'bicycle': [219, 112, 147], # MediumPurple 9 | # 'person': [98, 130, 238], # Salmon 10 | # 'cyclist': [181, 228, 255], 11 | # 'tricycle': [211, 85, 186] 12 | # } 13 | 14 | # np.random.seed(0) 15 | 16 | 17 | def tlwhs_to_tlbrs(tlwhs): 18 | tlbrs = np.copy(tlwhs) 19 | if len(tlbrs) == 0: 20 | return tlbrs 21 | tlbrs[:, 2] += tlwhs[:, 0] 22 | tlbrs[:, 3] += tlwhs[:, 1] 23 | return tlbrs 24 | 25 | 26 | def get_color(idx): 27 | idx *= 3 28 | # idx += np.random.randint(50, 255) 29 | color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255) 30 | return color 31 | 32 | 33 | def resize_image(image, max_size=800): 34 | if max(image.shape[:2]) > max_size: 35 | scale = float(max_size) / max(image.shape[:2]) 36 | image = cv2.resize(image, None, fx=scale, fy=scale) 37 | return image 38 | 39 | 40 | def plot_detects(img, 41 | dets, 42 | num_classes, 43 | frame_id, 44 | id2cls): 45 | """ 46 | plot detection results of this frame(or image) 47 | :param img: 48 | :param dets: 49 | :param num_classes: 50 | :param frame_id: 51 | :param id2cls: 52 | :return: 53 | """ 54 | if dets is None: 55 | return img 56 | 57 | img = np.ascontiguousarray(np.copy(img)) 58 | # im_h, im_w = img.shape[:2] 59 | 60 | text_scale = max(1.0, img.shape[1] / 1200.0) # 1600. 61 | text_thickness = 2 62 | line_thickness = max(1, int(img.shape[1] / 600.0)) 63 | 64 | # plot each object of the object class 65 | for obj_i, obj in enumerate(dets): 66 | # left, top, right, down, score, cls_id 67 | x1, y1, x2, y2, score, cls_id = obj 68 | cls_id = int(cls_id.detach().cpu()) 69 | cls_name = id2cls[int(cls_id)] 70 | box_int = tuple(map(int, (x1, y1, x2, y2))) 71 | # cls_color = cls_color_dict[cls_name] 72 | cls_color = get_color(abs(cls_id)) 73 | 74 | # draw bbox for each object 75 | cv2.rectangle(img, 76 | box_int[0:2], 77 | box_int[2:4], 78 | color=cls_color, 79 | thickness=line_thickness) 80 | 81 | # draw class name and score 82 | cv2.putText(img, 83 | cls_name + ' {:.3f}'.format(float(score)), 84 | (box_int[0], box_int[1]), 85 | cv2.FONT_HERSHEY_PLAIN, 86 | text_scale, 87 | [0, 255, 255], # cls_id: yellow 88 | thickness=text_thickness) 89 | 90 | return img 91 | 92 | 93 | def plot_tracks(image, 94 | tlwhs_dict, 95 | obj_ids_dict, 96 | num_classes, 97 | scores=None, 98 | frame_id=0, 99 | id2cls=None): 100 | """ 101 | :param image: 102 | :param tlwhs_dict: 103 | :param obj_ids_dict: 104 | :param num_classes: 105 | :param scores: 106 | :param frame_id: 107 | :param id2cls: 108 | :return: 109 | """ 110 | img = np.ascontiguousarray(np.copy(image)) 111 | im_h, im_w = img.shape[:2] 112 | 113 | # top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 114 | 115 | text_scale = max(1.0, image.shape[1] / 1200.) # 1600. 116 | # text_thickness = 1 if text_scale > 1.1 else 1 117 | text_thickness = 2 # 自定义ID文本线宽 118 | line_thickness = max(1, int(image.shape[1] / 500.)) 119 | 120 | radius = max(5, int(im_w / 140.)) 121 | 122 | for cls_id in range(num_classes): 123 | cls_tlwhs = tlwhs_dict[cls_id] 124 | obj_ids = obj_ids_dict[cls_id] 125 | 126 | for i, tlwh_i in enumerate(cls_tlwhs): 127 | x1, y1, w, h = tlwh_i 128 | int_box = tuple(map(int, (x1, y1, x1 + w, y1 + h))) # x1, y1, x2, y2 129 | obj_id = int(obj_ids[i]) 130 | id_text = '{}'.format(int(obj_id)) 131 | 132 | _line_thickness = 1 if obj_id <= 0 else line_thickness 133 | color = get_color(abs(obj_id)) 134 | # cls_color = cls_color_dict[id2cls[cls_id]] 135 | 136 | # draw bbox 137 | cv2.rectangle(img=img, 138 | pt1=int_box[0:2], # (x1, y1) 139 | pt2=int_box[2:4], # (x2, y2) 140 | color=color, 141 | thickness=line_thickness) 142 | 143 | # draw class name and index 144 | cv2.putText(img, 145 | id2cls[cls_id], 146 | (int(x1), int(y1)), 147 | cv2.FONT_HERSHEY_PLAIN, 148 | text_scale, 149 | (0, 255, 255), # cls_id: yellow 150 | thickness=text_thickness) 151 | 152 | txt_w, txt_h = cv2.getTextSize(id2cls[cls_id], 153 | fontFace=cv2.FONT_HERSHEY_PLAIN, 154 | fontScale=text_scale, thickness=text_thickness) 155 | 156 | cv2.putText(img, 157 | id_text, 158 | (int(x1), int(y1) - txt_h), 159 | cv2.FONT_HERSHEY_PLAIN, 160 | text_scale, 161 | (0, 255, 255), # cls_id: yellow 162 | thickness=text_thickness) 163 | 164 | return img 165 | 166 | 167 | def plot_tracking(image, 168 | tlwhs, 169 | obj_ids, 170 | scores=None, 171 | frame_id=0, 172 | fps=0., 173 | ids2=None, 174 | cls_id=0): 175 | """ 176 | :param image: 177 | :param tlwhs: 178 | :param obj_ids: 179 | :param scores: 180 | :param frame_id: 181 | :param fps: 182 | :param ids2: 183 | :param cls_id: 184 | :return: 185 | """ 186 | im = np.ascontiguousarray(np.copy(image)) 187 | im_h, im_w = im.shape[:2] 188 | 189 | # top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255 190 | 191 | text_scale = max(1.0, image.shape[1] / 1200.) # 1600. 192 | # text_thickness = 1 if text_scale > 1.1 else 1 193 | text_thickness = 2 # 自定义ID文本线宽 194 | line_thickness = max(1, int(image.shape[1] / 500.)) 195 | 196 | radius = max(5, int(im_w / 140.)) 197 | cv2.putText(im, 'frame: %d fps: %.2f num: %d' 198 | % (frame_id, fps, len(tlwhs)), 199 | (0, int(15 * text_scale)), 200 | cv2.FONT_HERSHEY_PLAIN, 201 | text_scale, 202 | (0, 0, 255), 203 | thickness=2) 204 | 205 | for i, tlwh in enumerate(tlwhs): 206 | x1, y1, w, h = tlwh 207 | int_box = tuple(map(int, (x1, y1, x1 + w, y1 + h))) 208 | obj_id = int(obj_ids[i]) 209 | id_text = '{}'.format(int(obj_id)) 210 | 211 | if ids2 is not None: 212 | id_text = id_text + ', {}'.format(int(ids2[i])) 213 | 214 | _line_thickness = 1 if obj_id <= 0 else line_thickness 215 | color = get_color(abs(obj_id)) 216 | cv2.rectangle(im, int_box[0:2], int_box[2:4], color=color, thickness=line_thickness) # bbox: 随机颜色 217 | 218 | # 绘制id编号 219 | cv2.putText(im, 220 | id_text, 221 | (int_box[0], int_box[1] + 30), 222 | cv2.FONT_HERSHEY_PLAIN, 223 | text_scale, 224 | (0, 255, 255), # id: yellow 225 | thickness=text_thickness) 226 | 227 | # 绘制目标类别 228 | cv2.putText(im, 229 | id2cls[cls_id], 230 | (int(x1), int(y1)), 231 | cv2.FONT_HERSHEY_PLAIN, 232 | text_scale, 233 | (0, 255, 255), # cls_id: yellow 234 | thickness=text_thickness) 235 | 236 | return im 237 | 238 | 239 | def plot_trajectory(image, tlwhs, track_ids): 240 | image = image.copy() 241 | for one_tlwhs, track_id in zip(tlwhs, track_ids): 242 | color = get_color(int(track_id)) 243 | for tlwh in one_tlwhs: 244 | x1, y1, w, h = tuple(map(int, tlwh)) 245 | cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2) 246 | 247 | return image 248 | 249 | 250 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None): 251 | """ 252 | :param image: 253 | :param tlbrs: 254 | :param scores: 255 | :param color: 256 | :param ids: 257 | :return: 258 | """ 259 | im = np.copy(image) 260 | text_scale = max(1, image.shape[1] / 800.) 261 | thickness = 2 if text_scale > 1.3 else 1 262 | for i, det in enumerate(tlbrs): 263 | x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int) 264 | if len(det) >= 7: 265 | label = 'det' if det[5] > 0 else 'trk' 266 | if ids is not None: 267 | text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i]) 268 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 269 | thickness=thickness) 270 | else: 271 | text = '{}# {:.2f}'.format(label, det[6]) 272 | 273 | if scores is not None: 274 | text = '{:.2f}'.format(scores[i]) 275 | cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255), 276 | thickness=thickness) 277 | 278 | cv2.rectangle(im, (x1, y1), (x2, y2), color, 2) 279 | 280 | return im 281 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /utils/evolve.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #for i in 0 1 2 3 3 | #do 4 | # t=ultralytics/yolov3:v139 && sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t evaluate_utils/evolve.sh $i 5 | # sleep 30 6 | #done 7 | 8 | while true; do 9 | # python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.conv.15 --multi --bucket ult/wer --evolve --cache --device $1 --cfg yolov3-tiny3-1cls.cfg --single --adam 10 | # python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --multi --bucket ult/athena --evolve --device $1 --cfg yolov3-spp-1cls.cfg 11 | 12 | python3 train.py --data coco2014.data --img-size 512 608 --epochs 27 --batch 8 --accum 8 --evolve --weights '' --bucket ult/coco/sppa_512 --device $1 --cfg yolov3-sppa.cfg --multi 13 | done 14 | 15 | 16 | # coco epoch times --img-size 416 608 --epochs 27 --batch 16 --accum 4 17 | # 36:34 2080ti 18 | # 21:58 V100 19 | # 63:00 T4 -------------------------------------------------------------------------------- /utils/gcp.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # New VM 4 | rm -rf sample_data yolov3 5 | git clone https://github.com/ultralytics/yolov3 6 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test # branch 7 | # sudo apt-get install zip 8 | #git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex 9 | sudo conda install -yc conda-forge scikit-image pycocotools 10 | # python3 -c "from yolov3.evaluate_utils.google_utils import gdrive_download; gdrive_download('193Zp_ye-3qXMonR1nZj3YyxMtQkMy50k','coco2014.zip')" 11 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1WQT6SOktSe8Uw6r10-2JhbEhMY5DJaph','coco2017.zip')" 12 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1C3HewOG9akA3y456SZLBJZfNDPkBwAto','knife.zip')" 13 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('13g3LqdpkNE8sPosVJT6KFXlfoMypzRP4','sm4.zip')" 14 | sudo shutdown 15 | 16 | # Mount local SSD 17 | lsblk 18 | sudo mkfs.ext4 -F /dev/nvme0n1 19 | sudo mkdir -p /mnt/disks/nvme0n1 20 | sudo mount /dev/nvme0n1 /mnt/disks/nvme0n1 21 | sudo chmod a+w /mnt/disks/nvme0n1 22 | cp -r coco /mnt/disks/nvme0n1 23 | 24 | # Kill All 25 | t=ultralytics/yolov3:v1 26 | docker kill $(docker ps -a -q --filter ancestor=$t) 27 | 28 | # Evolve coco 29 | sudo -s 30 | t=ultralytics/yolov3:evolve 31 | # docker kill $(docker ps -a -q --filter ancestor=$t) 32 | for i in 0 1 6 7 33 | do 34 | docker pull $t && docker run --gpus all -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t bash utils/evolve.sh $i 35 | sleep 30 36 | done 37 | 38 | #COCO training 39 | n=131 && t=ultralytics/coco:v131 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 320 640 --epochs 300 --batch 16 --weights '' --device 0 --cfg yolov3-spp.cfg --bucket ult/coco --name $n && sudo shutdown 40 | n=132 && t=ultralytics/coco:v131 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 320 640 --epochs 300 --batch 64 --weights '' --device 0 --cfg yolov3-tiny.cfg --bucket ult/coco --name $n && sudo shutdown 41 | -------------------------------------------------------------------------------- /utils/google_utils.py: -------------------------------------------------------------------------------- 1 | # This file contains google evaluate_utils: https://cloud.google.com/storage/docs/reference/libraries 2 | # pip install --upgrade google-cloud-storage 3 | 4 | import os 5 | import time 6 | 7 | 8 | # from google.cloud import storage 9 | 10 | 11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'): 12 | # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f 13 | # Downloads a file from Google Drive, accepting presented query 14 | # from evaluate_utils.google_utils import *; gdrive_download() 15 | t = time.time() 16 | 17 | print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='') 18 | os.remove(name) if os.path.exists(name) else None # remove existing 19 | os.remove('cookie') if os.path.exists('cookie') else None 20 | 21 | # Attempt file download 22 | os.system("curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id) 23 | if os.path.exists('cookie'): # large file 24 | s = "curl -Lb ./cookie \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % ( 25 | id, name) 26 | else: # small file 27 | s = "curl -s -L -o %s 'https://drive.google.com/uc?export=download&id=%s'" % (name, id) 28 | r = os.system(s) # execute, capture return values 29 | os.remove('cookie') if os.path.exists('cookie') else None 30 | 31 | # Error check 32 | if r != 0: 33 | os.remove(name) if os.path.exists(name) else None # remove partial 34 | print('Download error ') # raise Exception('Download error') 35 | return r 36 | 37 | # Unzip if archive 38 | if name.endswith('.zip'): 39 | print('unzipping... ', end='') 40 | os.system('unzip -q %s' % name) # unzip 41 | os.remove(name) # remove zip to free space 42 | 43 | print('Done (%.1fs)' % (time.time() - t)) 44 | return r 45 | 46 | 47 | def upload_blob(bucket_name, source_file_name, destination_blob_name): 48 | # Uploads a file to a bucket 49 | # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python 50 | 51 | storage_client = storage.Client() 52 | bucket = storage_client.get_bucket(bucket_name) 53 | blob = bucket.blob(destination_blob_name) 54 | 55 | blob.upload_from_filename(source_file_name) 56 | 57 | print('File {} uploaded to {}.'.format( 58 | source_file_name, 59 | destination_blob_name)) 60 | 61 | 62 | def download_blob(bucket_name, source_blob_name, destination_file_name): 63 | # Uploads a blob from a bucket 64 | storage_client = storage.Client() 65 | bucket = storage_client.get_bucket(bucket_name) 66 | blob = bucket.blob(source_blob_name) 67 | 68 | blob.download_to_filename(destination_file_name) 69 | 70 | print('Blob {} downloaded to {}.'.format( 71 | source_blob_name, 72 | destination_file_name)) 73 | -------------------------------------------------------------------------------- /utils/layers.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import torch.nn.functional as F 4 | from utils.utils import * 5 | 6 | try: 7 | from mish_cuda import MishCuda as Mish 8 | except: 9 | class Mish(nn.Module): # https://github.com/digantamisra98/Mish 10 | def forward(self, x): 11 | return x * F.softplus(x).tanh() 12 | 13 | 14 | def make_divisible(v, divisor): 15 | # Function ensures all layers have a channel number that is divisible by 8 16 | # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 17 | return math.ceil(v / divisor) * divisor 18 | 19 | 20 | class Flatten(nn.Module): 21 | # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions 22 | def forward(self, x): 23 | return x.view(x.size(0), -1) 24 | 25 | 26 | class Concat(nn.Module): 27 | # Concatenate a list of tensors along dimension 28 | def __init__(self, dimension=1): 29 | super(Concat, self).__init__() 30 | self.d = dimension 31 | 32 | def forward(self, x): 33 | return torch.cat(x, self.d) 34 | 35 | 36 | class RouteGroup(nn.Module): 37 | def __init__(self, layers, groups, group_id): 38 | super(RouteGroup, self).__init__() 39 | self.layers = layers 40 | self.multi = len(layers) > 1 41 | self.groups = groups 42 | self.group_id = group_id 43 | 44 | def forward(self, x, outputs): 45 | if self.multi: 46 | outs = [] 47 | for layer in self.layers: 48 | out = torch.chunk(outputs[layer], self.groups, dim=1) 49 | outs.append(out[self.group_id]) 50 | return torch.cat(outs, dim=1) 51 | else: 52 | out = torch.chunk(outputs[self.layers[0]], self.groups, dim=1) 53 | return out[self.group_id] 54 | 55 | 56 | # scaled_channels layer 57 | class ScaleChannels(nn.Module): 58 | def __init__(self, layers): 59 | super(ScaleChannels, self).__init__() 60 | self.layers = layers 61 | 62 | # assert len(self.layers) == 1 63 | 64 | def forward(self, x, outputs): 65 | # Scalar is current input: x 66 | # H×W = 1×1 67 | # assert x.shape[2] == 1 and x.shape[3] == 1 68 | 69 | layer = outputs[self.layers[0]] 70 | 71 | # assert x.shape[1] == layer.shape[1] 72 | 73 | # Do Scaling: applying broadcasting here 74 | x = x * layer 75 | 76 | return x 77 | 78 | 79 | # Dropout layer 80 | class Dropout(nn.Module): 81 | def __init__(self, prob): 82 | super(Dropout, self).__init__() 83 | self.prob = float(prob) 84 | 85 | def forward(self, x): 86 | return F.dropout(x, p=self.prob) 87 | 88 | 89 | # To do global average pooling 90 | class GlobalAvgPool(nn.Module): 91 | def __init__(self): 92 | super(GlobalAvgPool, self).__init__() 93 | 94 | def forward(self, x): 95 | return F.adaptive_avg_pool2d(x, (1, 1)) # set output size (1, 1) 96 | 97 | 98 | class FeatureConcat(nn.Module): 99 | def __init__(self, layers): 100 | super(FeatureConcat, self).__init__() 101 | self.layers = layers # layer indices 102 | self.multiple = len(layers) > 1 # multiple layers flag 103 | 104 | def forward(self, x, outputs): 105 | return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]] 106 | 107 | 108 | class FeatureConcat_l(nn.Module): 109 | def __init__(self, layers): 110 | super(FeatureConcat_l, self).__init__() 111 | self.layers = layers # layer indices 112 | self.multiple = len(layers) > 1 # multiple layers flag 113 | 114 | def forward(self, x, outputs): 115 | return torch.cat([outputs[i][:, :outputs[i].shape[1] // 2, :, :] for i in self.layers], 1) if self.multiple else \ 116 | outputs[self.layers[0]][:, :outputs[self.layers[0]].shape[1] // 2, :, :] 117 | 118 | 119 | class WeightedFeatureFusion(nn.Module): # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070 120 | def __init__(self, layers, weight=False): 121 | super(WeightedFeatureFusion, self).__init__() 122 | self.layers = layers # layer indices 123 | self.weight = weight # apply weights boolean 124 | self.n = len(layers) + 1 # number of layers 125 | if weight: 126 | self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True) # layer weights 127 | 128 | def forward(self, x, outputs): 129 | # Weights 130 | if self.weight: 131 | w = torch.sigmoid(self.w) * (2 / self.n) # sigmoid weights (0-1) 132 | x = x * w[0] 133 | 134 | # Fusion 135 | nx = x.shape[1] # input channels 136 | for i in range(self.n - 1): 137 | a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]] # feature to add 138 | na = a.shape[1] # feature channels 139 | 140 | # Adjust channels 141 | if nx == na: # same shape 142 | x = x + a 143 | elif nx > na: # slice input 144 | x[:, :na] = x[:, :na] + a # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a 145 | else: # slice feature 146 | x = x + a[:, :nx] 147 | 148 | return x 149 | 150 | 151 | class MixConv2d(nn.Module): # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595 152 | def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): 153 | super(MixConv2d, self).__init__() 154 | 155 | groups = len(k) 156 | if method == 'equal_ch': # equal channels per group 157 | i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices 158 | ch = [(i == g).sum() for g in range(groups)] 159 | else: # 'equal_params': equal parameter count per group 160 | b = [out_ch] + [0] * groups 161 | a = np.eye(groups + 1, groups, k=-1) 162 | a -= np.roll(a, 1, axis=1) 163 | a *= np.array(k) ** 2 164 | a[0] = 1 165 | ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b 166 | 167 | self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch, 168 | out_channels=ch[g], 169 | kernel_size=k[g], 170 | stride=stride, 171 | padding=k[g] // 2, # 'same' pad 172 | dilation=dilation, 173 | bias=bias) for g in range(groups)]) 174 | 175 | def forward(self, x): 176 | return torch.cat([m(x) for m in self.m], 1) 177 | 178 | 179 | class MixDeConv2d(nn.Module): # MixDeConv: Mixed Depthwise DeConvolutional Kernels https://arxiv.org/abs/1907.09595 180 | def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'): 181 | super(MixDeConv2d, self).__init__() 182 | 183 | groups = len(k) 184 | if method == 'equal_ch': # equal channels per group 185 | i = torch.linspace(0, groups - 1E-6, out_ch).floor() # out_ch indices 186 | ch = [(i == g).sum() for g in range(groups)] 187 | else: # 'equal_params': equal parameter count per group 188 | b = [out_ch] + [0] * groups 189 | a = np.eye(groups + 1, groups, k=-1) 190 | a -= np.roll(a, 1, axis=1) 191 | a *= np.array(k) ** 2 192 | a[0] = 1 193 | ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int) # solve for equal weight indices, ax = b 194 | 195 | self.m = nn.ModuleList([nn.ConvTranspose2d(in_channels=in_ch, 196 | out_channels=ch[g], 197 | kernel_size=k[g], 198 | stride=stride, 199 | padding=k[g] // 2, # 'same' pad 200 | dilation=dilation, 201 | bias=bias) for g in range(groups)]) 202 | 203 | def forward(self, x): 204 | return torch.cat([m(x) for m in self.m], 1) 205 | 206 | 207 | # Activation functions below ------------------------------------------------------------------------------------------- 208 | class SwishImplementation(torch.autograd.Function): 209 | @staticmethod 210 | def forward(ctx, x): 211 | ctx.save_for_backward(x) 212 | return x * torch.sigmoid(x) 213 | 214 | @staticmethod 215 | def backward(ctx, grad_output): 216 | x = ctx.saved_tensors[0] 217 | sx = torch.sigmoid(x) # sigmoid(ctx) 218 | return grad_output * (sx * (1 + x * (1 - sx))) 219 | 220 | 221 | class MishImplementation(torch.autograd.Function): 222 | @staticmethod 223 | def forward(ctx, x): 224 | ctx.save_for_backward(x) 225 | return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x))) 226 | 227 | @staticmethod 228 | def backward(ctx, grad_output): 229 | x = ctx.saved_tensors[0] 230 | sx = torch.sigmoid(x) 231 | fx = F.softplus(x).tanh() 232 | return grad_output * (fx + x * sx * (1 - fx * fx)) 233 | 234 | 235 | class MemoryEfficientSwish(nn.Module): 236 | def forward(self, x): 237 | return SwishImplementation.apply(x) 238 | 239 | 240 | class MemoryEfficientMish(nn.Module): 241 | def forward(self, x): 242 | return MishImplementation.apply(x) 243 | 244 | 245 | class Swish(nn.Module): 246 | def forward(self, x): 247 | return x * torch.sigmoid(x) 248 | 249 | 250 | class HardSwish(nn.Module): # https://arxiv.org/pdf/1905.02244.pdf 251 | def forward(self, x): 252 | return x * F.hardtanh(x + 3, 0., 6., True) / 6. 253 | -------------------------------------------------------------------------------- /utils/parse_config.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import os 4 | import numpy as np 5 | 6 | 7 | def parse_model_cfg(path): 8 | """ 9 | :param path: 10 | :return: 11 | """ 12 | # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3' 13 | if not path.endswith('.cfg'): # add .cfg suffix if omitted 14 | path += '.cfg' 15 | if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path): # add cfg/ prefix if omitted 16 | path = 'cfg' + os.sep + path 17 | 18 | with open(path, 'r') as f: 19 | lines = f.read().split('\n') 20 | 21 | lines = [x for x in lines if x and not x.startswith('#')] 22 | lines = [x.rstrip().lstrip() for x in lines] # get rid of fringe whitespaces 23 | mdefs = [] # module definitions 24 | 25 | for line in lines: 26 | if line.startswith('['): # This marks the start of a new block 27 | mdefs.append({}) 28 | mdefs[-1]['type'] = line[1:-1].rstrip() 29 | if mdefs[-1]['type'] == 'convolutional': 30 | mdefs[-1]['batch_normalize'] = 0 # pre-populate with zeros (may be overwritten later) 31 | else: 32 | key, val = line.split("=") 33 | key = key.rstrip() 34 | 35 | if key == 'anchors': # return np-array 36 | mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2)) # np anchors 37 | elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val): # return array 38 | mdefs[-1][key] = [int(x) for x in val.split(',')] 39 | else: 40 | val = val.strip() 41 | if val.isnumeric(): # return int or float 42 | mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val) 43 | else: 44 | mdefs[-1][key] = val # return string 45 | 46 | # Check all fields are supported 47 | supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups', 48 | 'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random', 49 | 'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind', 50 | 'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'groups', 'group_id', 'probability'] 51 | 52 | f = [] # fields 53 | for x in mdefs[1:]: 54 | [f.append(k) for k in x if k not in f] 55 | u = [x for x in f if x not in supported] # unsupported fields 56 | assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path) 57 | 58 | return mdefs 59 | 60 | 61 | def parse_data_cfg(path): 62 | # Parses the data configuration file 63 | if not os.path.exists(path) and os.path.exists('data' + os.sep + path): # add data/ prefix if omitted 64 | path = 'data' + os.sep + path 65 | 66 | with open(path, 'r') as f: 67 | lines = f.readlines() 68 | 69 | options = dict() 70 | for line in lines: 71 | line = line.strip() 72 | if line == '' or line.startswith('#'): 73 | continue 74 | key, val = line.split('=') 75 | options[key.strip()] = val.strip() 76 | 77 | return options 78 | -------------------------------------------------------------------------------- /utils/process_mcmot_dataset.py: -------------------------------------------------------------------------------- 1 | # encoding=utf-8 2 | 3 | import os 4 | import time 5 | import shutil 6 | import re 7 | import cv2 8 | import pickle 9 | import numpy as np 10 | from collections import defaultdict 11 | 12 | classes = [ 13 | 'car', # 0 14 | 'bicycle', # 1 15 | 'person', # 2 16 | 'cyclist', # 3 17 | 'tricycle' # 4 18 | ] # 5类(不包括背景) 19 | 20 | cls2id = { 21 | 'car': 0, 22 | 'bicycle': 1, 23 | 'person': 2, 24 | 'cyclist': 3, 25 | 'tricycle': 4 26 | } 27 | 28 | id2cls = { 29 | 0: 'car', 30 | 1: 'bicycle', 31 | 2: 'person', 32 | 3: 'cyclist', 33 | 4: 'tricycle' 34 | } 35 | 36 | # 视频训练数据图片的宽高是固定的 37 | W, H = 1920, 1080 38 | 39 | 40 | def gen_labels_for_seq(dark_txt_path, seq_label_dir, classes, one_plus=True): 41 | """ 42 | """ 43 | global seq_max_id_dict, start_id_dict, fr_cnt 44 | 45 | # ----- 开始一个视频seq的label生成 46 | # 每遇到一个待处理的视频seq, reset各类max_id为0 47 | for class_type in classes: 48 | seq_max_id_dict[class_type] = 0 49 | 50 | # 记录当前seq各个类别的track id集合 51 | id_set_dict = defaultdict(set) 52 | 53 | # 读取dark label(读取该视频seq的标注文件, 一行代表一帧) 54 | with open(dark_txt_path, 'r', encoding='utf-8') as r_h: 55 | # 读视频标注文件的每一行: 每一行即一帧 56 | for line in r_h.readlines(): 57 | fr_cnt += 1 58 | 59 | line = line.split(',') 60 | fr_id = int(line[0]) 61 | n_objs = int(line[1]) 62 | # print('\nFrame {:d} in seq {}, total {:d} objects'.format(f_id + 1, seq_name, n_objs)) 63 | 64 | # 当前帧所有的检测目标label信息 65 | fr_label_objs = [] 66 | 67 | # 遍历该帧的每一个object 68 | for cur in range(2, len(line), 6): # cursor 69 | class_type = line[cur + 5].strip() 70 | class_id = cls2id[class_type] # class type => class id 71 | 72 | # 解析track id 73 | if one_plus: 74 | track_id = int(line[cur]) + 1 # track_id从1开始统计 75 | else: 76 | track_id = int(line[cur]) 77 | 78 | # 更新该视频seq各类检测目标(背景一直为0)的max track id 79 | if track_id > seq_max_id_dict[class_type]: 80 | seq_max_id_dict[class_type] = track_id 81 | 82 | # 记录当前seq各个类别的track id集合 83 | id_set_dict[class_type].add(track_id) 84 | 85 | # 根据起始track id更新在整个数据集中的实际track id 86 | track_id += start_id_dict[class_type] 87 | 88 | # 读取bbox坐标 89 | x1, y1 = int(line[cur + 1]), int(line[cur + 2]) 90 | x2, y2 = int(line[cur + 3]), int(line[cur + 4]) 91 | 92 | # 根据图像分辨率, 裁剪bbox 93 | x1 = x1 if x1 >= 0 else 0 94 | x1 = x1 if x1 < W else W - 1 95 | y1 = y1 if y1 >= 0 else 0 96 | y1 = y1 if y1 < H else H - 1 97 | x2 = x2 if x2 >= 0 else 0 98 | x2 = x2 if x2 < W else W - 1 99 | y2 = y2 if y2 >= 0 else 0 100 | y2 = y2 if y2 < H else H - 1 101 | 102 | # 计算bbox center和bbox width&height 103 | bbox_center_x = 0.5 * float(x1 + x2) 104 | bbox_center_y = 0.5 * float(y1 + y2) 105 | bbox_width = float(x2 - x1 + 1) 106 | bbox_height = float(y2 - y1 + 1) 107 | 108 | # bbox center和bbox width&height归一化到[0.0, 1.0] 109 | bbox_center_x /= W 110 | bbox_center_y /= H 111 | bbox_width /= W 112 | bbox_height /= H 113 | 114 | # 打印中间结果, 验证是否解析正确... 115 | # print(track_id, x1, y1, x2, y2, class_type) 116 | 117 | # 每一帧对应的label中的每一行 118 | obj_str = '{:d} {:d} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format( 119 | class_id, # class id: 从0开始计算 120 | track_id, # track id: 从1开始计算 121 | bbox_center_x, # center_x 122 | bbox_center_y, # center_y 123 | bbox_width, # bbox_w 124 | bbox_height) # bbox_h 125 | # print(obj_str, end='') 126 | fr_label_objs.append(obj_str) 127 | 128 | # ----- 该帧解析结束, 输出该帧的label文件: 每一帧图像对应一个txt格式的label文件 129 | label_f_path = seq_label_dir + '/{:05d}.txt'.format(fr_id) 130 | with open(label_f_path, 'w', encoding='utf-8') as w_h: 131 | for obj in fr_label_objs: 132 | w_h.write(obj) 133 | # print('{} written\n'.format(label_f_path)) 134 | 135 | return id_set_dict 136 | 137 | 138 | """ 139 | 将DarkLabel的标注格式: frame# n_obj [id, x1, y1, x2, y2, label] 140 | 转化为MCMOT的输入格式: 141 | 1. 每张图对应一个txt的label文件 142 | 2. 每行代表一个检测目标: cls_id, track_id, center_x, center_y, bbox_w, bbox_h(每个目标6列) 143 | """ 144 | 145 | 146 | def dark_label2mcmot_label(data_root, one_plus=True, dict_path=None, viz_root=None): 147 | """ 148 | :param data_root: 149 | :param one_plus: 150 | :param dict_path: 151 | :param viz_root: 152 | :return: 153 | """ 154 | if not os.path.isdir(data_root): 155 | print('[Err]: invalid data root') 156 | return 157 | 158 | img_root = data_root + '/JPEGImages' 159 | if not os.path.isdir(img_root): 160 | print('[Err]: invalid image root') 161 | 162 | # 创建标签文件根目录 163 | label_root = data_root + '/labels_with_ids' 164 | if not os.path.isdir(label_root): 165 | os.makedirs(label_root) 166 | else: 167 | shutil.rmtree(label_root) 168 | os.makedirs(label_root) 169 | 170 | # ---------- 参数初始化 171 | # 为视频seq的每个检测类别设置[起始]track id 172 | global start_id_dict 173 | start_id_dict = defaultdict(int) # str => int 174 | for class_type in classes: # 初始化 175 | start_id_dict[class_type] = 0 176 | 177 | # 记录每一个视频seq各类最大的track id 178 | global seq_max_id_dict 179 | seq_max_id_dict = defaultdict(int) 180 | 181 | global fr_cnt 182 | fr_cnt = 0 183 | 184 | # ----------- 开始处理 185 | seq_list = os.listdir(img_root) 186 | seqs = sorted(seq_list, key=lambda x: int(x.split('_')[-1])) 187 | 188 | # 遍历每一段视频seq 189 | for seq_name in seqs: 190 | seq_dir = img_root + '/' + seq_name 191 | print('\nProcessing seq', seq_dir) 192 | 193 | # 为该视频seq创建label目录 194 | seq_label_dir = label_root + '/' + seq_name 195 | if not os.path.isdir(seq_label_dir): 196 | os.makedirs(seq_label_dir) 197 | else: 198 | shutil.rmtree(seq_label_dir) 199 | os.makedirs(seq_label_dir) 200 | 201 | dark_txt_path = seq_dir + '/' + seq_name + '_gt.txt' 202 | if not os.path.isfile(dark_txt_path): 203 | print('[Warning]: invalid dark label file.') 204 | continue 205 | 206 | # 当前seq生成labels 207 | id_set_dict = gen_labels_for_seq(dark_txt_path, seq_label_dir, classes, one_plus) 208 | 209 | # 输出该视频seq各个检测类别的max track id(从1开始) 210 | for k, v in seq_max_id_dict.items(): 211 | print('seq {}'.format(seq_name) + ' ' + 212 | k + ' max track id {:d}'.format(v)) 213 | 214 | # 输出当前seq各个类别的track id数(独一无二的id个数) 215 | cls_id_set = id_set_dict[k] 216 | print('seq {}'.format(seq_name) + ' ' + 217 | k + ' track id number {:d}'.format(len(cls_id_set))) 218 | 219 | if len(cls_id_set) != v: 220 | print(cls_id_set) 221 | 222 | # 处理完成一个视频seq, 基于seq_max_id_dict, 更新各类别start track id 223 | # for k, v in start_id_dict.items(): 224 | # start_id_dict[k] += seq_max_id_dict[k] 225 | 226 | # 处理完成一个视频seq, 基于id_set_dict, 更新各类别start track id 227 | for k, v in start_id_dict.items(): 228 | start_id_dict[k] += len(id_set_dict[k]) 229 | 230 | # 输出所有视频seq各个检测类别的track id总数 231 | print('\n') 232 | for k, v in start_id_dict.items(): 233 | print(k + ' total ' + str(v) + ' track ids') 234 | print('Total {} frames.'.format(fr_cnt)) 235 | 236 | # 序列化max_id_dict到磁盘 237 | if not dict_path is None: 238 | max_id_dict = {cls2id[k]:v for k, v in start_id_dict.items()} 239 | with open(dict_path, 'wb') as f: 240 | np.savez(dict_path, max_id_dict=max_id_dict) # set key 'max_id_dict' 241 | 242 | print('{:s} dumped.'.format(dict_path)) 243 | 244 | 245 | def gen_mcmot_data(img_root, out_f_path): 246 | """ 247 | 248 | :param img_root: 249 | :return: 250 | """ 251 | if not os.path.isdir(img_root): 252 | print('[Err]: ') 253 | return 254 | 255 | dir_names = [img_root + '/' + x for x in os.listdir(img_root) if os.path.isdir(img_root + '/' + x)] 256 | 257 | with open(out_f_path, 'w', encoding='utf-8') as w_h: 258 | for dir in tqdm(dir_names): 259 | for img_name in os.listdir(dir): 260 | if not img_name.endswith('.jpg'): 261 | continue 262 | 263 | img_path = dir + '/' + img_name 264 | if not os.path.isfile(img_path): 265 | print('[Warning]: invalid image file.') 266 | continue 267 | 268 | w_h.write(img_path + '\n') 269 | 270 | 271 | if __name__ == '__main__': 272 | dark_label2mcmot_label(data_root='/mnt/diskb/even/dataset/MCMOT', 273 | one_plus=True, 274 | dict_path='/mnt/diskb/even/dataset/MCMOT/max_id_dict.npz', 275 | viz_root=None) 276 | 277 | gen_mcmot_data(img_root='/mnt/diskb/even/dataset/MCMOT/JPEGImages', 278 | out_f_path='/mnt/diskb/even/YOLOV4/data/train_mcmot.txt') 279 | -------------------------------------------------------------------------------- /utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | import time 4 | from copy import deepcopy 5 | 6 | import torch 7 | import torch.backends.cudnn as cudnn 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | def init_seeds(seed=0): 13 | torch.manual_seed(seed) 14 | 15 | # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html 16 | if seed == 0: 17 | cudnn.deterministic = True 18 | cudnn.benchmark = False 19 | 20 | 21 | def select_device(device='', apex=False, batch_size=None): 22 | # device = 'cpu' or '0' or '0,1,2,3' 23 | cpu_request = device.lower() == 'cpu' 24 | if device and not cpu_request: # if device requested other than 'cpu' 25 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 26 | assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity 27 | 28 | cuda = False if cpu_request else torch.cuda.is_available() 29 | if cuda: 30 | c = 1024 ** 2 # bytes to MB 31 | ng = torch.cuda.device_count() 32 | if ng > 1 and batch_size: # check that batch_size is compatible with device_count 33 | assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) 34 | x = [torch.cuda.get_device_properties(i) for i in range(ng)] 35 | s = 'Using CUDA ' + ('Apex ' if apex else '') # apex for mixed precision https://github.com/NVIDIA/apex 36 | for i in range(0, ng): 37 | if i == 1: 38 | s = ' ' * len(s) 39 | print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" % 40 | (s, i, x[i].name, x[i].total_memory / c)) 41 | else: 42 | print('Using CPU') 43 | 44 | print('') # skip a line 45 | return torch.device('cuda:0' if cuda else 'cpu') 46 | 47 | 48 | def time_synchronized(): 49 | torch.cuda.synchronize() if torch.cuda.is_available() else None 50 | return time.time() 51 | 52 | 53 | def initialize_weights(model): 54 | for m in model.modules(): 55 | t = type(m) 56 | if t is nn.Conv2d: 57 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 58 | elif t is nn.BatchNorm2d: 59 | m.eps = 1e-4 60 | m.momentum = 0.03 61 | elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 62 | m.inplace = True 63 | 64 | 65 | def find_modules(model, mclass=nn.Conv2d): 66 | # finds layer indices matching module class 'mclass' 67 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 68 | 69 | 70 | def fuse_conv_and_bn(conv, bn): 71 | # https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 72 | with torch.no_grad(): 73 | # init 74 | fusedconv = torch.nn.Conv2d(conv.in_channels, 75 | conv.out_channels, 76 | kernel_size=conv.kernel_size, 77 | stride=conv.stride, 78 | padding=conv.padding, 79 | bias=True) 80 | 81 | # prepare filters 82 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 83 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 84 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size())) 85 | 86 | # prepare spatial bias 87 | if conv.bias is not None: 88 | b_conv = conv.bias 89 | else: 90 | b_conv = torch.zeros(conv.weight.size(0)) 91 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 92 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 93 | 94 | return fusedconv 95 | 96 | 97 | def model_info(model, verbose=False): 98 | # Plots a line-by-line description of a PyTorch model 99 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 100 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 101 | if verbose: 102 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 103 | for i, (name, p) in enumerate(model.named_parameters()): 104 | name = name.replace('module_list.', '') 105 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 106 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 107 | 108 | try: # FLOPS 109 | from thop import profile 110 | macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False) 111 | fs = ', %.1f GFLOPS' % (macs / 1E9 * 2) 112 | except: 113 | fs = '' 114 | 115 | print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs)) 116 | 117 | 118 | def load_classifier(name='resnet101', n=2): 119 | # Loads a pretrained model reshaped to n-class output 120 | import pretrainedmodels # https://github.com/Cadene/pretrained-models.pytorch#torchvision 121 | model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet') 122 | 123 | # Display model properties 124 | for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']: 125 | print(x + ' =', eval(x)) 126 | 127 | # Reshape output to n classes 128 | filters = model.last_linear.weight.shape[1] 129 | model.last_linear.bias = torch.nn.Parameter(torch.zeros(n)) 130 | model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters)) 131 | model.last_linear.out_features = n 132 | return model 133 | 134 | 135 | def scale_img(img, ratio=1.0, same_shape=True): # img(16,3,256,416), r=ratio 136 | # scales img(bs,3,y,x) by ratio 137 | h, w = img.shape[2:] 138 | s = (int(h * ratio), int(w * ratio)) # new size 139 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 140 | if not same_shape: # pad/crop img 141 | gs = 64 # (pixels) grid size 142 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 143 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 144 | 145 | 146 | class ModelEMA: 147 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 148 | Keep a moving average of everything in the model state_dict (parameters and buffers). 149 | This is intended to allow functionality like 150 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 151 | A smoothed version of the weights is necessary for some training schemes to perform well. 152 | E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use 153 | RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA 154 | smoothing of weights to match results. Pay attention to the decay constant you are using 155 | relative to your update count per epoch. 156 | To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but 157 | disable validation of the EMA weights. Validation will have to be done manually in a separate 158 | process, or after the training stops converging. 159 | This class is sensitive where it is initialized in the sequence of model init, 160 | GPU assignment and distributed training wrappers. 161 | I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU. 162 | """ 163 | 164 | def __init__(self, model, decay=0.9999, device=''): 165 | # make a copy of the model for accumulating moving average of weights 166 | self.ema = deepcopy(model) 167 | self.ema.eval() 168 | self.updates = 0 # number of EMA updates 169 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 170 | self.device = device # perform ema on different device from model if set 171 | if device: 172 | self.ema.to(device=device) 173 | for p in self.ema.parameters(): 174 | p.requires_grad_(False) 175 | 176 | def update(self, model): 177 | self.updates += 1 178 | d = self.decay(self.updates) 179 | with torch.no_grad(): 180 | if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel): 181 | msd, esd = model.module.state_dict(), self.ema.module.state_dict() 182 | else: 183 | msd, esd = model.state_dict(), self.ema.state_dict() 184 | 185 | for k, v in esd.items(): 186 | if v.dtype.is_floating_point: 187 | v *= d 188 | v += (1. - d) * msd[k].detach() 189 | 190 | def update_attr(self, model): 191 | # Assign attributes (which may change during training) 192 | for k in model.__dict__.keys(): 193 | if not k.startswith('_'): 194 | setattr(self.ema, k, getattr(model, k)) 195 | -------------------------------------------------------------------------------- /yolov4-tiny-3l_no_group_id_no_upsample.cfg: -------------------------------------------------------------------------------- 1 | [net] 2 | # Testing 3 | #batch=1 4 | #subdivisions=1 5 | # Training 6 | batch=64 7 | subdivisions=4 8 | width=768 9 | height=448 10 | channels=3 11 | momentum=0.9 12 | decay=0.0005 13 | angle=0 14 | saturation = 1.5 15 | exposure = 1.5 16 | hue=.1 17 | 18 | learning_rate=0.00002 19 | burn_in=1000 20 | max_batches = 500200 21 | policy=steps 22 | steps=400000,450000 23 | scales=.1,.1 24 | 25 | #mosaic=1 26 | 27 | [convolutional] 28 | batch_normalize=1 29 | filters=32 30 | size=3 31 | stride=2 32 | pad=1 33 | activation=leaky 34 | 35 | [convolutional] 36 | batch_normalize=1 37 | filters=64 38 | size=3 39 | stride=2 40 | pad=1 41 | activation=leaky 42 | 43 | [convolutional] 44 | batch_normalize=1 45 | filters=64 46 | size=3 47 | stride=1 48 | pad=1 49 | activation=leaky 50 | 51 | [route] 52 | layers=-1 53 | #groups=2 54 | #group_id=1 55 | 56 | [convolutional] 57 | batch_normalize=1 58 | filters=32 59 | size=3 60 | stride=1 61 | pad=1 62 | activation=leaky 63 | 64 | [convolutional] 65 | batch_normalize=1 66 | filters=32 67 | size=3 68 | stride=1 69 | pad=1 70 | activation=leaky 71 | 72 | [route] 73 | layers = -1,-2 74 | 75 | [convolutional] 76 | batch_normalize=1 77 | filters=64 78 | size=1 79 | stride=1 80 | pad=1 81 | activation=leaky 82 | 83 | [route] 84 | layers = -6,-1 85 | 86 | [convolutional] 87 | batch_normalize=1 88 | filters=64 89 | size=3 90 | stride=1 91 | pad=1 92 | activation=leaky 93 | 94 | [route] 95 | layers=-1 96 | #groups=2 97 | #group_id=1 98 | 99 | [convolutional] 100 | batch_normalize=1 101 | filters=32 102 | size=3 103 | stride=1 104 | pad=1 105 | activation=leaky 106 | 107 | [convolutional] 108 | batch_normalize=1 109 | filters=32 110 | size=3 111 | stride=1 112 | pad=1 113 | activation=leaky 114 | 115 | [route] 116 | layers = -1,-2 117 | 118 | [convolutional] 119 | batch_normalize=1 120 | filters=64 121 | size=1 122 | stride=1 123 | pad=1 124 | activation=leaky 125 | 126 | [route] 127 | layers = -6,-1 128 | 129 | [maxpool] 130 | size=2 131 | stride=2 132 | 133 | [convolutional] 134 | batch_normalize=1 135 | filters=128 136 | size=3 137 | stride=1 138 | pad=1 139 | activation=leaky 140 | 141 | [route] 142 | layers=-1 143 | #groups=2 144 | #group_id=1 145 | 146 | [convolutional] 147 | batch_normalize=1 148 | filters=64 149 | size=3 150 | stride=1 151 | pad=1 152 | activation=leaky 153 | 154 | [convolutional] 155 | batch_normalize=1 156 | filters=64 157 | size=3 158 | stride=1 159 | pad=1 160 | activation=leaky 161 | 162 | [route] 163 | layers = -1,-2 164 | 165 | [convolutional] 166 | batch_normalize=1 167 | filters=128 168 | size=1 169 | stride=1 170 | pad=1 171 | activation=leaky 172 | 173 | [route] 174 | layers = -6,-1 175 | 176 | [maxpool] 177 | size=2 178 | stride=2 179 | 180 | [convolutional] 181 | batch_normalize=1 182 | filters=256 183 | size=3 184 | stride=1 185 | pad=1 186 | activation=leaky 187 | 188 | [route] 189 | layers=-1 190 | #groups=2 191 | #group_id=1 192 | 193 | [convolutional] 194 | batch_normalize=1 195 | filters=128 196 | size=3 197 | stride=1 198 | pad=1 199 | activation=leaky 200 | 201 | [convolutional] 202 | batch_normalize=1 203 | filters=128 204 | size=3 205 | stride=1 206 | pad=1 207 | activation=leaky 208 | 209 | [route] 210 | layers = -1,-2 211 | 212 | [convolutional] 213 | batch_normalize=1 214 | filters=256 215 | size=1 216 | stride=1 217 | pad=1 218 | activation=leaky 219 | 220 | [route] 221 | layers = -6,-1 222 | 223 | [maxpool] 224 | size=2 225 | stride=2 226 | 227 | [convolutional] 228 | batch_normalize=1 229 | filters=512 230 | size=3 231 | stride=1 232 | pad=1 233 | activation=leaky 234 | 235 | ################################## 236 | 237 | [convolutional] 238 | batch_normalize=1 239 | filters=256 240 | size=1 241 | stride=1 242 | pad=1 243 | activation=leaky 244 | 245 | [convolutional] 246 | batch_normalize=1 247 | filters=512 248 | size=3 249 | stride=1 250 | pad=1 251 | activation=leaky 252 | 253 | [convolutional] 254 | size=1 255 | stride=1 256 | pad=1 257 | filters=30 258 | activation=linear 259 | 260 | [yolo] 261 | mask = 6,7,8 262 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 263 | classes=5 264 | num=9 265 | jitter=.3 266 | ignore_thresh = .7 267 | truth_thresh = 1 268 | scale_x_y = 1.05 269 | iou_thresh=0.213 270 | cls_normalizer=1.0 271 | iou_normalizer=0.07 272 | iou_loss=ciou 273 | nms_kind=diounms 274 | beta_nms=0.6 275 | # iou_thresh_kind=ciou 276 | 277 | [route] 278 | layers = -4 279 | 280 | [convolutional] 281 | batch_normalize=1 282 | filters=128 283 | size=1 284 | stride=1 285 | pad=1 286 | activation=leaky 287 | 288 | [upsample] 289 | stride=2 290 | 291 | [route] 292 | layers = -1, 30 293 | 294 | [convolutional] 295 | batch_normalize=1 296 | filters=256 297 | size=3 298 | stride=1 299 | pad=1 300 | activation=leaky 301 | 302 | [convolutional] 303 | size=1 304 | stride=1 305 | pad=1 306 | filters=30 307 | activation=linear 308 | 309 | [yolo] 310 | mask = 3,4,5 311 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 312 | classes=5 313 | num=9 314 | jitter=.3 315 | ignore_thresh = .7 316 | truth_thresh = 1 317 | scale_x_y = 1.05 318 | iou_thresh=0.213 319 | cls_normalizer=1.0 320 | iou_normalizer=0.07 321 | iou_loss=ciou 322 | nms_kind=diounms 323 | beta_nms=0.6 324 | # iou_thresh_kind=ciou 325 | 326 | [route] 327 | layers = -3 328 | 329 | [convolutional] 330 | batch_normalize=1 331 | filters=64 332 | size=1 333 | stride=1 334 | pad=1 335 | activation=leaky 336 | 337 | [upsample] 338 | stride=2 339 | 340 | [route] 341 | layers = -1, 22 342 | 343 | [convolutional] 344 | batch_normalize=1 345 | filters=128 346 | size=3 347 | stride=1 348 | pad=1 349 | activation=leaky 350 | 351 | [convolutional] 352 | size=1 353 | stride=1 354 | pad=1 355 | filters=30 356 | activation=linear 357 | 358 | [yolo] 359 | mask = 0,1,2 360 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401 361 | classes=5 362 | num=9 363 | jitter=.3 364 | ignore_thresh = .7 365 | truth_thresh = 1 366 | scale_x_y = 1.05 367 | iou_thresh=0.213 368 | cls_normalizer=1.0 369 | iou_normalizer=0.07 370 | iou_loss=ciou 371 | nms_kind=diounms 372 | beta_nms=0.6 373 | #iou_thresh_kind=ciou 374 | 375 | [route] 376 | layers=-17 377 | 378 | [convolutional] 379 | size=1 380 | stride=1 381 | pad=1 382 | filters=128 383 | activation=linear 384 | 385 | [route] 386 | layers=-12 387 | 388 | [convolutional] 389 | size=1 390 | stride=1 391 | pad=1 392 | filters=128 393 | activation=linear 394 | 395 | [route] 396 | layers=-7 397 | 398 | [convolutional] 399 | size=1 400 | stride=1 401 | pad=1 402 | filters=128 403 | activation=linear 404 | --------------------------------------------------------------------------------