├── LICENSE
├── MOTEvaluate
    ├── __init__.py
    ├── evaluate.py
    ├── evaluate_pipeline.py
    └── evaluate_utils
    │   ├── __init__.py
    │   ├── bbox.py
    │   ├── convert.py
    │   ├── io.py
    │   └── measurements.py
├── MVI_39401_track_fps12.gif
├── MVI_39501_track_fps12.gif
├── MVI_40855_track_fps12.gif
├── README.md
├── auto_weighted_loss.py
├── cfg
    ├── mobile-yolo-3l.cfg
    ├── yolov4-pacsp-mish.cfg
    ├── yolov4-pacsp-s-mish.cfg
    ├── yolov4-pacsp-s.cfg
    ├── yolov4-pacsp-x-mish.cfg
    ├── yolov4-pacsp-x.cfg
    ├── yolov4-pacsp.cfg
    ├── yolov4-paspp-mcmot.cfg
    ├── yolov4-paspp.cfg
    └── yolov4-tiny.cfg
├── data
    ├── coco.data
    ├── coco.names
    ├── coco1.data
    ├── coco1.txt
    ├── coco16.data
    ├── coco16.txt
    ├── coco1cls.data
    ├── coco1cls.txt
    ├── coco2014.data
    ├── coco2017.data
    ├── coco64.data
    ├── coco64.txt
    ├── coco_paper.names
    ├── get_coco2014.sh
    ├── get_coco2017.sh
    ├── mcmot.data
    ├── mcmot.names
    ├── mcmot_det.data
    ├── mcmot_det_old.train
    ├── mcmot_det_train_old.txt
    ├── test1.txt
    ├── test2.txt
    └── train1.txt
├── demo.py
├── detect.py
├── mAPEvaluate
    ├── DetectImgAndWriteResultToXml.py
    ├── ReadAndSaveDarknetDetRes.py
    ├── ReadAnnotations.py
    ├── TestmApDetect.py
    ├── cmp_det_label.py
    ├── cmp_det_label_sf.py
    ├── darknet.py
    ├── darknet_ori_diou_cfg.py
    ├── findImgByObjectType.py
    ├── findImgByObjectType_zhou.py
    ├── model_analysis_PLM.py
    ├── model_analysis_half.py
    ├── model_analysis_v4_coco.py
    ├── model_analysis_v4all.py
    ├── model_analysis_v4half.py
    └── voc_eval.py
├── models.py
├── requirements.txt
├── test.py
├── test5_track.gif
├── test_half.py
├── tracker
    ├── basetrack.py
    ├── matching.py
    └── multitracker.py
├── tracking_utils
    ├── evaluation.py
    ├── io.py
    ├── kalman_filter.py
    ├── log.py
    ├── nms.py
    ├── parse_config.py
    ├── timer.py
    ├── utils.py
    └── visualization.py
├── train.py
├── utils
    ├── __init__.py
    ├── adabound.py
    ├── datasets.py
    ├── evolve.sh
    ├── gcp.sh
    ├── gen_dataset_mcmot.py
    ├── google_utils.py
    ├── layers.py
    ├── parse_config.py
    ├── process_mcmot_dataset.py
    ├── torch_utils.py
    └── utils.py
└── yolov4-tiny-3l_no_group_id_no_upsample.cfg


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Even
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MOTEvaluate/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MOTEvaluate/__init__.py


--------------------------------------------------------------------------------
/MOTEvaluate/evaluate_pipeline.py:
--------------------------------------------------------------------------------
 1 | # encoding=utf-8
 2 | 
 3 | import os
 4 | from MOTEvaluate.evaluate_utils.convert import convert_seqs
 5 | from MOTEvaluate.evaluate import evaluate_mcmot_seqs
 6 | from demo import DemoRunner
 7 | 
 8 | 
 9 | # build evaluation pipeline for test set
10 | def evaluate_test_set(test_root):
11 |     """
12 |     :param test_root:
13 |     :return:
14 |     """
15 |     # ---------- set Project root
16 |     ROOT = '/mnt/diskb/even/YOLOV4'
17 | 
18 |     # ---------- init demo runner
19 |     demo = DemoRunner()
20 | 
21 |     # ---------- set object class names
22 |     demo.opt.names = ROOT + '/data/mcmot.names'
23 | 
24 |     # ----------- set weights and cfg file for different models
25 |     # demo.opt.cfg = ROOT + '/cfg/' + 'yolov4-tiny-3l_no_group_id_no_upsample.cfg'
26 |     # demo.opt.weights = ROOT + '/weights/' + 'v4_tiny3l_no_upsample_track_last.pt'
27 | 
28 |     # demo.opt.cfg = ROOT + '/cfg/' + 'yolov4_mobilev2_2l.cfg'
29 |     # demo.opt.weights = ROOT + '/weights/' + 'track_last.pt'
30 | 
31 |     demo.opt.cfg = ROOT + '/cfg/' + 'yolov4-tiny-3l_no_group_id_no_upsample.cfg'
32 |     demo.opt.weights = ROOT + '/weights/' + 'track_last.pt'
33 | 
34 |     if not os.path.isfile(demo.opt.cfg):
35 |         print('[Err]: invalid cfg file.')
36 |         return
37 |     if not os.path.isfile(demo.opt.weights):
38 |         print('[Err]: invalid weight file.')
39 |         return
40 | 
41 |     print('Cfg: {:s}.'.format(demo.opt.cfg))
42 |     print('Weights: {:s}.\n'.format(demo.opt.weights))
43 | 
44 |     # ----------- set test input videos' dir and tracking results dir
45 |     demo.opt.videos = '/mnt/diskb/even/dataset/MCMOT_Evaluate'
46 |     demo.opt.save_img_dir = demo.opt.videos
47 | 
48 |     # ---------- set standard out fps and interval: set test fps
49 |     demo.opt.outFPS = 12
50 |     demo.opt.interval = 1
51 | 
52 |     # ---------- labels preparation
53 |     # Check test root for video and dark label format label file(txt)
54 |     # Convert dark-label label file to mot16 format
55 |     convert_seqs(seq_root=test_root,
56 |                  interval=demo.opt.interval,
57 |                  default_fps=demo.opt.outFPS,
58 |                  one_plus=True)
59 |     # ----------
60 | 
61 |     # ---------- Run tracking
62 |     # Call mcmot-yolov4(demo.py) to do tracking(generate results.txt)
63 |     # set task mode and output results type
64 |     demo.opt.task = 'track'
65 |     demo.opt.output_type = 'txts'
66 | 
67 |     # run tracking and output results.txt(MOT16)
68 |     demo.run()
69 |     # ----------
70 | 
71 |     # --------- Run evaluation
72 |     out_fps = demo.opt.outFPS // int(demo.opt.interval)
73 |     evaluate_mcmot_seqs(test_root, default_fps=out_fps)
74 |     # ---------
75 | 
76 | 
77 | if __name__ == '__main__':
78 |     evaluate_test_set(test_root='/mnt/diskb/even/dataset/MCMOT_Evaluate')
79 |     print('Done.')
80 | 


--------------------------------------------------------------------------------
/MOTEvaluate/evaluate_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MOTEvaluate/evaluate_utils/__init__.py


--------------------------------------------------------------------------------
/MOTEvaluate/evaluate_utils/bbox.py:
--------------------------------------------------------------------------------
 1 | """
 2 | 2D MOT2016 Evaluation Toolkit
 3 | An python reimplementation of toolkit in
 4 |  2DMOT16(https://motchallenge.net/data/MOT16/)
 5 | 
 6 | This file computes bounding box overlap
 7 | 
 8 | (C) Yiwen Liu(765305261@qq.com), 2020-10
 9 | """
10 | import numpy as np
11 | 
12 | 
13 | def bbox_overlap(ex_box, gt_box):
14 |     ex_box = ex_box.reshape(-1, 4)
15 |     gt_box = gt_box.reshape(-1, 4)
16 |     paded_gt = np.tile(gt_box, [ex_box.shape[0], 1])
17 |     insec = intersection(ex_box, paded_gt)
18 | 
19 |     uni = area_sum(ex_box, paded_gt) - insec
20 |     return insec / uni
21 | 
22 | 
23 | def intersection(a, b):
24 |     x = np.maximum(a[:, 0], b[:, 0])
25 |     y = np.maximum(a[:, 1], b[:, 1])
26 |     w = np.minimum(a[:, 2], b[:, 2]) - x
27 |     h = np.minimum(a[:, 3], b[:, 3]) - y
28 |     return np.maximum(w, 0) * np.maximum(h, 0)
29 | 
30 | 
31 | def area_sum(a, b):
32 |     return (a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]) + \
33 |         (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
34 | 


--------------------------------------------------------------------------------
/MOTEvaluate/evaluate_utils/convert.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | import os
  4 | import cv2
  5 | 
  6 | classes = [
  7 |     'car',  # 0
  8 |     'bicycle',  # 1
  9 |     'person',  # 2
 10 |     'cyclist',  # 3
 11 |     'tricycle'  # 4
 12 | ]  # 5类(不包括背景)
 13 | 
 14 | cls2id = {
 15 |     'car': 0,
 16 |     'bicycle': 1,
 17 |     'person': 2,
 18 |     'cyclist': 3,
 19 |     'tricycle': 4
 20 | }
 21 | 
 22 | id2cls = {
 23 |     0: 'car',
 24 |     1: 'bicycle',
 25 |     2: 'person',
 26 |     3: 'cyclist',
 27 |     4: 'tricycle'
 28 | }
 29 | 
 30 | # 图片数据的宽高
 31 | W, H = 1920, 1080
 32 | 
 33 | 
 34 | def convert_darklabel_2_mot16(darklabel_txt_path,
 35 |                               interval=1,
 36 |                               default_fps=12,
 37 |                               one_plus=True,
 38 |                               out_mot16_path=None):
 39 |     """
 40 |     将darklabel标注格式frame # n [id, x1, y1, x2, y2, label]
 41 |     转换成mot16格式
 42 |     """
 43 |     if not os.path.isfile(darklabel_txt_path):
 44 |         print('[Err]: invalid input file path.')
 45 |         return
 46 | 
 47 |     if out_mot16_path is None:
 48 |         out_fps = default_fps // int(interval)
 49 |         print('[Note]: out_mot16_path not defined, using default.')
 50 |         dir_name, file_name = os.path.split(darklabel_txt_path)
 51 |         out_mot16_path = dir_name + '/' + \
 52 |                          file_name.split('.')[0] + \
 53 |                          '_mot16_fps{:d}.txt'.format(out_fps)
 54 | 
 55 |     with open(darklabel_txt_path, 'r', encoding='utf-8') as r_h, \
 56 |             open(out_mot16_path, 'w', encoding='utf-8') as w_h:
 57 |         lines = r_h.readlines()
 58 | 
 59 |         # 遍历每一帧
 60 |         fr_idx = 0
 61 |         for fr_i, line in enumerate(lines):
 62 |             if fr_i % interval != 0:
 63 |                 continue
 64 | 
 65 |             line = line.strip().split(',')
 66 |             fr_id = int(line[0])
 67 |             n_objs = int(line[1])
 68 | 
 69 |             # 遍历当前帧的每一个object
 70 |             for cur in range(2, len(line), 6):
 71 |                 class_type = line[cur + 5].strip()
 72 |                 class_id = cls2id[class_type]  # class type => class id
 73 | 
 74 |                 # 读取track id
 75 |                 if one_plus:
 76 |                     track_id = int(line[cur]) + 1  # track_id从1开始统计
 77 |                 else:
 78 |                     track_id = int(line[cur])
 79 | 
 80 |                 # 读取bbox坐标
 81 |                 x1, y1 = int(line[cur + 1]), int(line[cur + 2])
 82 |                 x2, y2 = int(line[cur + 3]), int(line[cur + 4])
 83 | 
 84 |                 # 根据图像分辨率, 裁剪bbox
 85 |                 x1 = x1 if x1 >= 0 else 0
 86 |                 x1 = x1 if x1 < W else W - 1
 87 |                 y1 = y1 if y1 >= 0 else 0
 88 |                 y1 = y1 if y1 < H else H - 1
 89 |                 x2 = x2 if x2 >= 0 else 0
 90 |                 x2 = x2 if x2 < W else W - 1
 91 |                 y2 = y2 if y2 >= 0 else 0
 92 |                 y2 = y2 if y2 < H else H - 1
 93 | 
 94 |                 left, top = x1, y1
 95 |                 width, height = x2 - x1, y2 - y1
 96 | 
 97 |                 # 写入该obj的数据
 98 |                 if interval == 1:
 99 |                     write_line_str = str(fr_id + 1) + ',' \
100 |                                      + str(track_id) + ',' \
101 |                                      + str(left) + ',' \
102 |                                      + str(top) + ',' \
103 |                                      + str(width) + ',' \
104 |                                      + str(height) + ',' \
105 |                                      + '1,' + str(class_id) + ',' + '1'
106 |                 else:
107 |                     write_line_str = str(fr_idx + 1) + ',' \
108 |                                      + str(track_id) + ',' \
109 |                                      + str(left) + ',' \
110 |                                      + str(top) + ',' \
111 |                                      + str(width) + ',' \
112 |                                      + str(height) + ',' \
113 |                                      + '1,' + str(class_id) + ',' + '1'
114 |                 # print(write_line_str)
115 |                 w_h.write(write_line_str + '\n')
116 | 
117 |             fr_idx += 1
118 |         print('Total {:d} frames sampled'.format(fr_idx))
119 | 
120 |     print('{:s} written.'.format(out_mot16_path))
121 | 
122 | 
123 | def convert_seqs(seq_root, interval=1, default_fps=12, one_plus=True):
124 |     """
125 |     """
126 |     if not os.path.isdir(seq_root):
127 |         print('[Err]: invalid seq root.')
128 |         return
129 | 
130 |     seq_names = [x for x in os.listdir(seq_root) if x.endswith('.mp4')]
131 |     for seq_name in seq_names:
132 |         darklabel_txt_path = seq_root  + '/' + seq_name[:-4] + '_gt.txt'
133 | 
134 |         # ---------- do pasing for a seq
135 |         convert_darklabel_2_mot16(darklabel_txt_path,
136 |                                   interval=interval,
137 |                                   default_fps=default_fps,
138 |                                   one_plus=one_plus,
139 |                                   out_mot16_path=None)
140 |         # ----------
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     # convert_darklabel_2_mot16(darklabel_txt_path='F:/seq_data/images/mcmot_seq_imgs_1/mcmot_seq_imgs_1_gt.txt')
145 |     convert_seqs(seq_root='/mnt/diskb/even/dataset/MCMOT_Evaluate',
146 |                  interval=1,
147 |                  default_fps=12,
148 |                  one_plus=True)
149 |     # convert_darklabel_2_mot16(darklabel_txt_path='F:/val_seq/val_1_gt.txt',
150 |     #                           interval=1,
151 |     #                           fps=12,
152 |     #                           one_plus=False,
153 |     #                           out_mot16_path=None)
154 | 
155 |     print('Done.')
156 | 


--------------------------------------------------------------------------------
/MOTEvaluate/evaluate_utils/io.py:
--------------------------------------------------------------------------------
  1 | """
  2 | 2D MOT2016 Evaluation Toolkit
  3 | An python reimplementation of toolkit in
  4 | 2DMOT16(https://motchallenge.net/data/MOT16/)
  5 | 
  6 | This file deals with file IO / invalid annotation
  7 |  removal / result output
  8 | 
  9 | (C) Yiwen Liu(765305261@qq.com), 2020-10
 10 | """
 11 | import os
 12 | import numpy as np
 13 | 
 14 | 
 15 | def read_seqmaps(fname):
 16 |     """
 17 |     seqmap: list the sequence name to be evaluated
 18 |     """
 19 |     assert os.path.exists(fname), 'File %s not exists!' % fname
 20 |     with open(fname, 'r') as fid:
 21 |         lines = [line.strip() for line in fid.readlines()]
 22 |         seq_names = lines[1:]
 23 |     return seq_names
 24 | 
 25 | 
 26 | def read_txt_to_struct(f_name):
 27 |     """
 28 |     read txt to structure, the column represents:
 29 |     [frame number] [identity number] [bbox left] [bbox top]
 30 |      [bbox width] [bbox height] [DET: detection score,
 31 |       GT: ignored class flag] [class] [visibility ratio]
 32 |     """
 33 |     data = []
 34 |     with open(f_name, 'r', encoding='utf-8') as fid:
 35 |         lines = fid.readlines()
 36 |         for line in lines:
 37 |             line = list(map(float, line.strip().split(',')))
 38 |             data.append(line)
 39 |     data = np.array(data)
 40 | 
 41 |     # change tlwh format to xyxy format
 42 |     data[:, 4:6] += data[:, 2:4]
 43 |     return data
 44 | 
 45 | 
 46 | def extract_valid_gt_data(all_data, remove_ofv=False):
 47 |     """
 48 |     remove non-valid classes.
 49 |     following mot2016 format,
 50 |      valid class include [1: pedestrain],
 51 |      distractor classes include [2: person on vehicle,
 52 |       7: static person, 8: distractor, 12: reflection].
 53 |     """
 54 |     distractor_classes = [2, 7, 8, 12]
 55 |     valid_classes = [1]
 56 |     original = all_data.shape[0]
 57 | 
 58 |     # remove classes in other classes, pedestrain and distractors
 59 |     # left for furthur usages
 60 |     selected = np.array([
 61 |         i for i in range(all_data.shape[0])
 62 |         if all_data[i, 7] in valid_classes + distractor_classes])
 63 |     all_data = all_data[selected, :]
 64 | 
 65 |     # remove boxes whose centers is out of view
 66 |     # Cause this tool is not only set for MOT, thus resolution is not assumed
 67 |     #  provided. In MOT, the maximum width andd height should be taken into
 68 |     #  consirderation
 69 | 
 70 |     # PS: As stated by author of MOT benchmark, it would be better the tracker
 71 |     #  could figure out the out of view pedestrain like human does. Thus no
 72 |     #  filtering
 73 |     if remove_ofv:  # remove out of view for ground truth
 74 |         selected = np.array([i for i in range(all_data.shape[0])
 75 |                              if (all_data[i, 2] + all_data[i, 4]) / 2 >= 0 and
 76 |                              (all_data[i, 3] + all_data[i, 5]) / 2 >= 0])
 77 | 
 78 |         # not consider right and bottom out of range here. Anyway ofv is not
 79 |         # removed in MOT2016
 80 |         # selected = np.array([i for i in xrange(all_data.shape[0])
 81 |         #                       if (all_data[i, 2] + all_data[i, 4]) / 2 != 0
 82 |         #                          ])
 83 |         all_data = all_data[selected, :]
 84 | 
 85 |     # remove non-human classes from ground truth,
 86 |     # and return distractor identities
 87 |     cond = np.array(
 88 |         [i in valid_classes + distractor_classes for i in all_data[:, 7]])
 89 |     selected = np.where(cond == True)[0]
 90 |     all_data = all_data[selected, :]  # not necessary?
 91 | 
 92 |     print('[GT PREPROCESSING]: Removing non-people classes, remaining '
 93 |           '{}/{} boxes'.format(all_data.shape[0], original))
 94 |     cond = np.array([i in distractor_classes for i in all_data[:, 7]])
 95 |     selected = np.where(cond == True)[0]
 96 | 
 97 |     all_dsitractor_ids = all_data[selected, 1]
 98 |     unique_distractor_ids = np.unique(all_dsitractor_ids)
 99 |     return all_data, unique_distractor_ids
100 | 
101 | 
102 | def print_format(widths, formaters, values, form_attr):
103 |     return ' '.join([(form_attr % (width, form)).format(val) for (
104 |         form, width, val) in zip(formaters, widths, values)])
105 | 
106 | 
107 | def print_format_name(widths, values, form_attr):
108 |     return ' '.join([(form_attr % (width)).format(val) for (width, val) in zip(
109 |         widths, values)])
110 | 
111 | 
112 | def print_metrics(header, metrics, banner=25):
113 |     """
114 |     """
115 |     if len(metrics) == 17:
116 |         print_metrics_ext(header, metrics)
117 |         return
118 | 
119 |     print('\n', '*' * banner, header, '*' * banner)
120 |     # metric_names_long = ['Recall', 'Precision', 'False Alarm Rate',
121 |     #                      'GT Tracks', 'Mostly Tracked', 'Partially Tracked',
122 |     #                      'Mostly Lost', 'False Positives', 'False Negatives',
123 |     #                      'ID Switches', 'Fragmentations',
124 |     #                      'MOTA', 'MOTP', 'MOTA Log']
125 | 
126 |     metric_names_short = ['Rcll', 'Prcn', 'FAR',
127 |                           'GT', 'MT', 'PT', 'ML',
128 |                           'FP', 'FN', 'IDs', 'FM',
129 |                           'MOTA', 'MOTP', 'MOTAL']
130 | 
131 |     # metric_widths_long = [6, 9, 16, 9, 14, 17, 11, 15, 15, 11, 14, 5, 5, 8]
132 |     metric_widths_short = [5, 5, 5, 4, 4, 4, 4, 6, 6, 5, 5, 5, 5, 5]
133 | 
134 |     metric_format_long = ['.1f', '.1f', '.2f',
135 |                           '.0f', '.0f', '.0f', '.0f',
136 |                           '.0f', '.0f', '.0f', '.0f',
137 |                           '.1f', '.1f', '.1f']
138 | 
139 |     splits = [(0, 3), (3, 7), (7, 11), (11, 14)]
140 |     print(' | '.join([print_format_name(
141 |         metric_widths_short[start:end],
142 |         metric_names_short[start:end], '{0: <%d}')
143 |         for (start, end) in splits]))
144 | 
145 |     metric_str = ' | '.join([print_format(
146 |         metric_widths_short[start:end],
147 |         metric_format_long[start:end],
148 |         metrics[start:end], '{:%d%s}') for (start, end) in splits])
149 |     # metric_str = metric_str[1:]
150 |     print(metric_str)
151 | 
152 | 
153 | def print_metrics_ext(header, metrics, banner=30):
154 |     print('\n{} {} {}'.format('*' * banner, header, '*' * banner))
155 |     # metric_names_long = ['IDF1', 'IDP', 'IDR',
156 |     #                      'Recall', 'Precision', 'False Alarm Rate',
157 |     #                      'GT Tracks', 'Mostly Tracked', 'Partially Tracked',
158 |     #                      'Mostly Lost',
159 |     #                      'False Positives', 'False Negatives', 'ID Switches',
160 |     #                      'Fragmentations',
161 |     #                      'MOTA', 'MOTP', 'MOTA Log']
162 | 
163 |     metric_names_short = ['IDF1', 'IDP', 'IDR',
164 |                           'Rcll', 'Prcn', 'FAR',
165 |                           'GT', 'MT', 'PT', 'ML',
166 |                           'FP', 'FN', 'IDs', 'FM',
167 |                           'MOTA', 'MOTP', 'MOTAL']
168 | 
169 |     # metric_widths_long = [5, 4, 4, 6, 9, 16,
170 |     #   9, 14, 17, 11, 15, 15, 11, 14, 5, 5, 8]
171 |     metric_widths_short = [5, 4, 4, 5, 5, 5, 4, 4, 4, 4, 6, 6, 5, 5, 5, 5, 5]
172 | 
173 |     metric_format_long = ['.1f', '.1f', '.1f',
174 |                           '.1f', '.1f', '.2f',
175 |                           '.0f', '.0f', '.0f', '.0f',
176 |                           '.0f', '.0f', '.0f', '.0f',
177 |                           '.1f', '.1f', '.1f']
178 | 
179 |     splits = [(0, 3), (3, 6), (6, 10), (10, 14), (14, 17)]
180 | 
181 |     print(' | '.join([print_format_name(
182 |         metric_widths_short[start:end],
183 |         metric_names_short[start:end], '{0: <%d}')
184 |         for (start, end) in splits]))
185 | 
186 |     metric_str = ' | '.join([print_format(
187 |         metric_widths_short[start:end],
188 |         metric_format_long[start:end],
189 |         metrics[start:end], '{:%d%s}')
190 |         for (start, end) in splits])
191 | 
192 |     print(metric_str)
193 |     print('\n\n')
194 | 


--------------------------------------------------------------------------------
/MVI_39401_track_fps12.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MVI_39401_track_fps12.gif


--------------------------------------------------------------------------------
/MVI_39501_track_fps12.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MVI_39501_track_fps12.gif


--------------------------------------------------------------------------------
/MVI_40855_track_fps12.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/MVI_40855_track_fps12.gif


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # YOLOV4_MCMOT
 2 | Using YOLOV4 as detector for MCMOT.
 3 | 
 4 | ## Tracking demo of C5(car, bicycle, person, cyclist, tricycle) using YOLOV4-tiny backbone
 5 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/test5_track.gif)
 6 | 
 7 | ## Tracking demo of UA-DETRAC dataset using mobilenetv2-yolo backbone(2 layers of yolo output)
 8 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/MVI_39401_track_fps12.gif)
 9 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/MVI_39501_track_fps12.gif)
10 | ![image](https://github.com/CaptainEven/YOLOV4_MCMOT/blob/YOLOV4_MCMOT_dev/MVI_40855_track_fps12.gif)
11 | 


--------------------------------------------------------------------------------
/auto_weighted_loss.py:
--------------------------------------------------------------------------------
 1 | # encoding=utf-8
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class AutomaticWeightedLoss(nn.Module):
 8 |     """automatically weighted multi-task loss_funcs
 9 |     Params：
10 |         num: int，the number of loss_funcs
11 |         x: multi-task loss_funcs
12 |     Examples：
13 |         loss1=1
14 |         loss2=2
15 |         awl = AutomaticWeightedLoss(2)
16 |         loss_sum = awl(loss1, loss2)
17 |     """
18 | 
19 |     def __init__(self, num=2):
20 |         super(AutomaticWeightedLoss, self).__init__()
21 |         params = torch.ones(num, requires_grad=True)
22 |         self.params = torch.nn.Parameter(params)
23 | 
24 |     def forward(self, *x):
25 |         loss_sum = 0.0
26 |         for i, loss in enumerate(x):
27 |             loss_sum += 0.5 / (self.params[i] ** 2) * loss + torch.log(1 + self.params[i] ** 2)
28 |         return loss_sum
29 | 
30 | 
31 | if __name__ == '__main__':
32 |     awl = AutomaticWeightedLoss(2)
33 |     print(awl.parameters())
34 | 


--------------------------------------------------------------------------------
/cfg/mobile-yolo-3l.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | batch=64
  3 | subdivisions=16
  4 | width=768
  5 | height=448
  6 | channels=3
  7 | momentum=0.9
  8 | decay=4e-5
  9 | angle=0
 10 | saturation=1.5
 11 | exposure=1.5
 12 | hue=.1
 13 | 
 14 | learning_rate=0.00001
 15 | burn_in=1000
 16 | max_batches=800020
 17 | policy=steps
 18 | steps=400000,650000
 19 | scales=.1,.1
 20 | 
 21 | [convolutional]
 22 | filters=32
 23 | size=3
 24 | stride=2
 25 | pad=1
 26 | batch_normalize=1
 27 | activation=relu
 28 | 
 29 | [convolutional]
 30 | filters=32
 31 | size=1
 32 | stride=1
 33 | pad=1
 34 | batch_normalize=1
 35 | activation=relu
 36 | 
 37 | [convolutional]
 38 | filters=32
 39 | size=3
 40 | groups=32
 41 | stride=1
 42 | pad=1
 43 | batch_normalize=1
 44 | activation=relu
 45 | 
 46 | [convolutional]
 47 | filters=16
 48 | size=1
 49 | stride=1
 50 | pad=1
 51 | batch_normalize=1
 52 | activation=linear
 53 | 
 54 | [convolutional]
 55 | filters=96
 56 | size=1
 57 | stride=1
 58 | pad=1
 59 | batch_normalize=1
 60 | activation=relu
 61 | 
 62 | [convolutional]
 63 | filters=96
 64 | size=3
 65 | groups=96
 66 | stride=2
 67 | pad=1
 68 | batch_normalize=1
 69 | activation=relu
 70 | 
 71 | [convolutional]
 72 | filters=24
 73 | size=1
 74 | stride=1
 75 | pad=1
 76 | batch_normalize=1
 77 | activation=linear
 78 | 
 79 | [convolutional]
 80 | filters=144
 81 | size=1
 82 | stride=1
 83 | pad=1
 84 | batch_normalize=1
 85 | activation=relu
 86 | 
 87 | [convolutional]
 88 | filters=144
 89 | size=3
 90 | groups=144
 91 | stride=1
 92 | pad=1
 93 | batch_normalize=1
 94 | activation=relu
 95 | 
 96 | [convolutional]
 97 | filters=24
 98 | size=1
 99 | stride=1
100 | pad=1
101 | batch_normalize=1
102 | activation=linear
103 | 
104 | [shortcut]
105 | from=-4
106 | activation=linear
107 | 
108 | [convolutional]
109 | filters=144
110 | size=1
111 | stride=1
112 | pad=1
113 | batch_normalize=1
114 | activation=relu
115 | 
116 | [convolutional]
117 | filters=144
118 | size=3
119 | groups=144
120 | stride=2
121 | pad=1
122 | batch_normalize=1
123 | activation=relu
124 | 
125 | [convolutional]
126 | filters=32
127 | size=1
128 | stride=1
129 | pad=1
130 | batch_normalize=1
131 | activation=linear
132 | 
133 | [convolutional]
134 | filters=192
135 | size=1
136 | stride=1
137 | pad=1
138 | batch_normalize=1
139 | activation=relu
140 | 
141 | [convolutional]
142 | filters=192
143 | size=3
144 | groups=192
145 | stride=1
146 | pad=1
147 | batch_normalize=1
148 | activation=relu
149 | 
150 | [convolutional]
151 | filters=32
152 | size=1
153 | stride=1
154 | pad=1
155 | batch_normalize=1
156 | activation=linear
157 | 
158 | [shortcut]
159 | from=-4
160 | activation=linear
161 | 
162 | [convolutional]
163 | filters=192
164 | size=1
165 | stride=1
166 | pad=1
167 | batch_normalize=1
168 | activation=relu
169 | 
170 | [convolutional]
171 | filters=192
172 | size=3
173 | groups=192
174 | stride=1
175 | pad=1
176 | batch_normalize=1
177 | activation=relu
178 | 
179 | [convolutional]
180 | filters=32
181 | size=1
182 | stride=1
183 | pad=1
184 | batch_normalize=1
185 | activation=linear
186 | 
187 | [shortcut]
188 | from=-4
189 | activation=linear
190 | 
191 | [convolutional]
192 | filters=192
193 | size=1
194 | stride=1
195 | pad=1
196 | batch_normalize=1
197 | activation=relu
198 | 
199 | [convolutional]
200 | filters=192
201 | size=3
202 | groups=192
203 | stride=1
204 | pad=1
205 | batch_normalize=1
206 | activation=relu
207 | 
208 | [convolutional]
209 | filters=64
210 | size=1
211 | stride=1
212 | pad=1
213 | batch_normalize=1
214 | activation=linear
215 | 
216 | [convolutional]
217 | filters=384
218 | size=1
219 | stride=1
220 | pad=1
221 | batch_normalize=1
222 | activation=relu
223 | 
224 | [convolutional]
225 | filters=384
226 | size=3
227 | groups=384
228 | stride=1
229 | pad=1
230 | batch_normalize=1
231 | activation=relu
232 | 
233 | [convolutional]
234 | filters=64
235 | size=1
236 | stride=1
237 | pad=1
238 | batch_normalize=1
239 | activation=linear
240 | 
241 | [shortcut]
242 | from=-4
243 | activation=linear
244 | 
245 | [convolutional]
246 | filters=384
247 | size=1
248 | stride=1
249 | pad=1
250 | batch_normalize=1
251 | activation=relu
252 | 
253 | [convolutional]
254 | filters=384
255 | size=3
256 | groups=384
257 | stride=1
258 | pad=1
259 | batch_normalize=1
260 | activation=relu
261 | 
262 | [convolutional]
263 | filters=64
264 | size=1
265 | stride=1
266 | pad=1
267 | batch_normalize=1
268 | activation=linear
269 | 
270 | [shortcut]
271 | from=-4
272 | activation=linear
273 | 
274 | [convolutional]
275 | filters=384
276 | size=1
277 | stride=1
278 | pad=1
279 | batch_normalize=1
280 | activation=relu
281 | 
282 | [convolutional]
283 | filters=384
284 | size=3
285 | groups=384
286 | stride=1
287 | pad=1
288 | batch_normalize=1
289 | activation=relu
290 | 
291 | [convolutional]
292 | filters=64
293 | size=1
294 | stride=1
295 | pad=1
296 | batch_normalize=1
297 | activation=linear
298 | 
299 | [shortcut]
300 | from=-4
301 | activation=linear
302 | 
303 | [convolutional]
304 | filters=384
305 | size=1
306 | stride=1
307 | pad=1
308 | batch_normalize=1
309 | activation=relu
310 | 
311 | [convolutional]
312 | filters=384
313 | size=3
314 | groups=384
315 | stride=2
316 | pad=1
317 | batch_normalize=1
318 | activation=relu
319 | 
320 | [convolutional]
321 | filters=96
322 | size=1
323 | stride=1
324 | pad=1
325 | batch_normalize=1
326 | activation=linear
327 | 
328 | [convolutional]
329 | filters=576
330 | size=1
331 | stride=1
332 | pad=1
333 | batch_normalize=1
334 | activation=relu
335 | 
336 | [convolutional]
337 | filters=576
338 | size=3
339 | groups=576
340 | stride=1
341 | pad=1
342 | batch_normalize=1
343 | activation=relu
344 | 
345 | [convolutional]
346 | filters=96
347 | size=1
348 | stride=1
349 | pad=1
350 | batch_normalize=1
351 | activation=linear
352 | 
353 | [shortcut]
354 | from=-4
355 | activation=linear
356 | 
357 | [convolutional]
358 | filters=576
359 | size=1
360 | stride=1
361 | pad=1
362 | batch_normalize=1
363 | activation=relu
364 | 
365 | [convolutional]
366 | filters=576
367 | size=3
368 | groups=576
369 | stride=1
370 | pad=1
371 | batch_normalize=1
372 | activation=relu
373 | 
374 | [convolutional]
375 | filters=96
376 | size=1
377 | stride=1
378 | pad=1
379 | batch_normalize=1
380 | activation=linear
381 | 
382 | [shortcut]
383 | from=-4
384 | activation=linear
385 | 
386 | [convolutional]
387 | filters=576
388 | size=1
389 | stride=1
390 | pad=1
391 | batch_normalize=1
392 | activation=relu
393 | 
394 | [convolutional]
395 | filters=576
396 | size=3
397 | groups=576
398 | stride=2
399 | pad=1
400 | batch_normalize=1
401 | activation=relu
402 | 
403 | [convolutional]
404 | filters=160
405 | size=1
406 | stride=1
407 | pad=1
408 | batch_normalize=1
409 | activation=linear
410 | 
411 | [convolutional]
412 | filters=960
413 | size=1
414 | stride=1
415 | pad=1
416 | batch_normalize=1
417 | activation=relu
418 | 
419 | [convolutional]
420 | filters=960
421 | size=3
422 | groups=960
423 | stride=1
424 | pad=1
425 | batch_normalize=1
426 | activation=relu
427 | 
428 | [convolutional]
429 | filters=160
430 | size=1
431 | stride=1
432 | pad=1
433 | batch_normalize=1
434 | activation=linear
435 | 
436 | [shortcut]
437 | from=-4
438 | activation=linear
439 | 
440 | [convolutional]
441 | filters=960
442 | size=1
443 | stride=1
444 | pad=1
445 | batch_normalize=1
446 | activation=relu
447 | 
448 | [convolutional]
449 | filters=960
450 | size=3
451 | groups=960
452 | stride=1
453 | pad=1
454 | batch_normalize=1
455 | activation=relu
456 | 
457 | [convolutional]
458 | filters=160
459 | size=1
460 | stride=1
461 | pad=1
462 | batch_normalize=1
463 | activation=linear
464 | 
465 | [shortcut]
466 | from=-4
467 | activation=linear
468 | 
469 | ### SPP ###
470 | [maxpool]
471 | stride=1
472 | size=3
473 | 
474 | [route]
475 | layers=-2
476 | 
477 | [maxpool]
478 | stride=1
479 | size=5
480 | 
481 | [route]
482 | layers=-4
483 | 
484 | [maxpool]
485 | stride=1
486 | size=9
487 | 
488 | [route]
489 | layers=-1,-3,-5,-6
490 | 
491 | ### End SPP ###
492 | [convolutional]
493 | filters=288
494 | size=1
495 | stride=1
496 | pad=1
497 | batch_normalize=1
498 | activation=relu
499 | 
500 | [convolutional]
501 | filters=288
502 | size=3
503 | groups=288
504 | stride=1
505 | pad=1
506 | batch_normalize=1
507 | activation=relu
508 | 
509 | [convolutional]
510 | filters=96
511 | size=1
512 | stride=1
513 | pad=1
514 | batch_normalize=1
515 | activation=relu
516 | 
517 | [convolutional]
518 | filters=384
519 | size=1
520 | stride=1
521 | pad=1
522 | batch_normalize=1
523 | activation=relu
524 | 
525 | [convolutional]
526 | size=1
527 | stride=1
528 | pad=1
529 | filters=30
530 | activation=linear
531 | 
532 | [yolo]
533 | mask = 6,7,8
534 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
535 | classes=5
536 | num=9
537 | jitter=.3
538 | ignore_thresh=.7
539 | truth_thresh=1
540 | random=1
541 | scale_x_y=1.05
542 | iou_thresh=0.213
543 | cls_normalizer=1.0
544 | iou_normalizer=0.07
545 | iou_loss=ciou
546 | nms_kind=greedynms
547 | beta_nms=0.6
548 | 
549 | [route]
550 | layers= 65
551 | 
552 | [upsample]
553 | stride=2
554 | 
555 | [route]
556 | layers=-1,48
557 | 
558 | [convolutional]
559 | filters=80
560 | size=1
561 | stride=1
562 | pad=1
563 | batch_normalize=1
564 | activation=relu
565 | 
566 | [convolutional]
567 | filters=288
568 | size=1
569 | stride=1
570 | pad=1
571 | batch_normalize=1
572 | activation=relu
573 | 
574 | [convolutional]
575 | filters=288
576 | size=3
577 | groups=288
578 | stride=1
579 | pad=1
580 | batch_normalize=1
581 | activation=relu
582 | 
583 | [convolutional]
584 | filters=192
585 | size=1
586 | stride=1
587 | pad=1
588 | batch_normalize=1
589 | activation=relu
590 | 
591 | [convolutional]
592 | filters=288
593 | size=1
594 | stride=1
595 | pad=1
596 | batch_normalize=1
597 | activation=relu
598 | 
599 | [convolutional]
600 | size=1
601 | stride=1
602 | pad=1
603 | filters=30
604 | activation=linear
605 | 
606 | [yolo]
607 | mask = 3,4,5
608 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
609 | classes=5
610 | num=9
611 | jitter=.3
612 | ignore_thresh=.7
613 | truth_thresh=1
614 | random=1
615 | scale_x_y=1.05
616 | iou_thresh=0.213
617 | cls_normalizer=1.0
618 | iou_normalizer=0.07
619 | iou_loss=ciou
620 | nms_kind=greedynms
621 | beta_nms=0.6
622 | 
623 | [route]
624 | layers= 74
625 | 
626 | [upsample]
627 | stride=2
628 | 
629 | [route]
630 | layers=-1,37
631 | 
632 | [convolutional]
633 | filters=80
634 | size=1
635 | stride=1
636 | pad=1
637 | batch_normalize=1
638 | activation=relu
639 | 
640 | [convolutional]
641 | filters=288
642 | size=1
643 | stride=1
644 | pad=1
645 | batch_normalize=1
646 | activation=relu
647 | 
648 | [convolutional]
649 | filters=288
650 | size=3
651 | groups=288
652 | stride=1
653 | pad=1
654 | batch_normalize=1
655 | activation=relu
656 | 
657 | [convolutional]
658 | filters=192
659 | size=1
660 | stride=1
661 | pad=1
662 | batch_normalize=1
663 | activation=relu
664 | 
665 | [convolutional]
666 | filters=288
667 | size=1
668 | stride=1
669 | pad=1
670 | batch_normalize=1
671 | activation=relu
672 | 
673 | [convolutional]
674 | size=1
675 | stride=1
676 | pad=1
677 | filters=30
678 | activation=linear
679 | 
680 | [yolo]
681 | mask = 0,1,2
682 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
683 | classes=5
684 | num=9
685 | jitter=.3
686 | ignore_thresh=.7
687 | truth_thresh=1
688 | random=1
689 | scale_x_y=1.05
690 | iou_thresh=0.213
691 | cls_normalizer=1.0
692 | iou_normalizer=0.07
693 | iou_loss=ciou
694 | nms_kind=greedynms
695 | beta_nms=0.6
696 | 
697 | [route]                
698 | layers=-23             
699 | 
700 | [convolutional]        
701 | size=1
702 | stride=1
703 | pad=1
704 | filters=128
705 | activation=linear
706 | 
707 | [route]                
708 | layers=-15             
709 | 
710 | [convolutional]        
711 | size=1
712 | stride=1
713 | pad=1
714 | filters=128
715 | activation=linear
716 | 
717 | [route]                
718 | layers=-7              
719 | 
720 | [convolutional]        
721 | size=1
722 | stride=1
723 | pad=1
724 | filters=128
725 | activation=linear


--------------------------------------------------------------------------------
/cfg/yolov4-tiny.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=1
  8 | width=416
  9 | height=416
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.00261
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | [convolutional]
 26 | batch_normalize=1
 27 | filters=32
 28 | size=3
 29 | stride=2
 30 | pad=1
 31 | activation=leaky
 32 | 
 33 | [convolutional]
 34 | batch_normalize=1
 35 | filters=64
 36 | size=3
 37 | stride=2
 38 | pad=1
 39 | activation=leaky
 40 | 
 41 | [convolutional]
 42 | batch_normalize=1
 43 | filters=64
 44 | size=3
 45 | stride=1
 46 | pad=1
 47 | activation=leaky
 48 | 
 49 | [route_lhalf]
 50 | layers=-1
 51 | 
 52 | [convolutional]
 53 | batch_normalize=1
 54 | filters=32
 55 | size=3
 56 | stride=1
 57 | pad=1
 58 | activation=leaky
 59 | 
 60 | [convolutional]
 61 | batch_normalize=1
 62 | filters=32
 63 | size=3
 64 | stride=1
 65 | pad=1
 66 | activation=leaky
 67 | 
 68 | [route]
 69 | layers = -1,-2
 70 | 
 71 | [convolutional]
 72 | batch_normalize=1
 73 | filters=64
 74 | size=1
 75 | stride=1
 76 | pad=1
 77 | activation=leaky
 78 | 
 79 | [route]
 80 | layers = -6,-1
 81 | 
 82 | [maxpool]
 83 | size=2
 84 | stride=2
 85 | 
 86 | [convolutional]
 87 | batch_normalize=1
 88 | filters=128
 89 | size=3
 90 | stride=1
 91 | pad=1
 92 | activation=leaky
 93 | 
 94 | [route_lhalf]
 95 | layers=-1
 96 | 
 97 | [convolutional]
 98 | batch_normalize=1
 99 | filters=64
100 | size=3
101 | stride=1
102 | pad=1
103 | activation=leaky
104 | 
105 | [convolutional]
106 | batch_normalize=1
107 | filters=64
108 | size=3
109 | stride=1
110 | pad=1
111 | activation=leaky
112 | 
113 | [route]
114 | layers = -1,-2
115 | 
116 | [convolutional]
117 | batch_normalize=1
118 | filters=128
119 | size=1
120 | stride=1
121 | pad=1
122 | activation=leaky
123 | 
124 | [route]
125 | layers = -6,-1
126 | 
127 | [maxpool]
128 | size=2
129 | stride=2
130 | 
131 | [convolutional]
132 | batch_normalize=1
133 | filters=256
134 | size=3
135 | stride=1
136 | pad=1
137 | activation=leaky
138 | 
139 | [route_lhalf]
140 | layers=-1
141 | 
142 | [convolutional]
143 | batch_normalize=1
144 | filters=128
145 | size=3
146 | stride=1
147 | pad=1
148 | activation=leaky
149 | 
150 | [convolutional]
151 | batch_normalize=1
152 | filters=128
153 | size=3
154 | stride=1
155 | pad=1
156 | activation=leaky
157 | 
158 | [route]
159 | layers = -1,-2
160 | 
161 | [convolutional]
162 | batch_normalize=1
163 | filters=256
164 | size=1
165 | stride=1
166 | pad=1
167 | activation=leaky
168 | 
169 | [route]
170 | layers = -6,-1
171 | 
172 | [maxpool]
173 | size=2
174 | stride=2
175 | 
176 | [convolutional]
177 | batch_normalize=1
178 | filters=512
179 | size=3
180 | stride=1
181 | pad=1
182 | activation=leaky
183 | 
184 | ##################################
185 | 
186 | [convolutional]
187 | batch_normalize=1
188 | filters=256
189 | size=1
190 | stride=1
191 | pad=1
192 | activation=leaky
193 | 
194 | [convolutional]
195 | batch_normalize=1
196 | filters=512
197 | size=3
198 | stride=1
199 | pad=1
200 | activation=leaky
201 | 
202 | [convolutional]
203 | size=1
204 | stride=1
205 | pad=1
206 | filters=255
207 | activation=linear
208 | 
209 | 
210 | 
211 | [yolo]
212 | mask = 3,4,5
213 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
214 | classes=80
215 | num=6
216 | jitter=.3
217 | scale_x_y = 1.05
218 | cls_normalizer=1.0
219 | iou_normalizer=0.07
220 | iou_loss=ciou
221 | ignore_thresh = .7
222 | truth_thresh = 1
223 | random=0
224 | nms_kind=greedynms
225 | beta_nms=0.6
226 | 
227 | [route]
228 | layers = -4
229 | 
230 | [convolutional]
231 | batch_normalize=1
232 | filters=128
233 | size=1
234 | stride=1
235 | pad=1
236 | activation=leaky
237 | 
238 | [upsample]
239 | stride=2
240 | 
241 | [route]
242 | layers = -1, 23
243 | 
244 | [convolutional]
245 | batch_normalize=1
246 | filters=256
247 | size=3
248 | stride=1
249 | pad=1
250 | activation=leaky
251 | 
252 | [convolutional]
253 | size=1
254 | stride=1
255 | pad=1
256 | filters=255
257 | activation=linear
258 | 
259 | [yolo]
260 | mask = 1,2,3
261 | anchors = 10,14,  23,27,  37,58,  81,82,  135,169,  344,319
262 | classes=80
263 | num=6
264 | jitter=.3
265 | scale_x_y = 1.05
266 | cls_normalizer=1.0
267 | iou_normalizer=0.07
268 | iou_loss=ciou
269 | ignore_thresh = .7
270 | truth_thresh = 1
271 | random=0
272 | nms_kind=greedynms
273 | beta_nms=0.6
274 | 


--------------------------------------------------------------------------------
/data/coco.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/train2017.txt
3 | valid=../coco/testdev2017.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/data/coco.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 | 


--------------------------------------------------------------------------------
/data/coco1.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=data/coco1.txt
3 | valid=data/coco1.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/data/coco1.txt:
--------------------------------------------------------------------------------
1 | ../coco/images/train2017/000000109622.jpg
2 | 


--------------------------------------------------------------------------------
/data/coco16.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=data/coco16.txt
3 | valid=data/coco16.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/data/coco16.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2017/000000109622.jpg
 2 | ../coco/images/train2017/000000160694.jpg
 3 | ../coco/images/train2017/000000308590.jpg
 4 | ../coco/images/train2017/000000327573.jpg
 5 | ../coco/images/train2017/000000062929.jpg
 6 | ../coco/images/train2017/000000512793.jpg
 7 | ../coco/images/train2017/000000371735.jpg
 8 | ../coco/images/train2017/000000148118.jpg
 9 | ../coco/images/train2017/000000309856.jpg
10 | ../coco/images/train2017/000000141882.jpg
11 | ../coco/images/train2017/000000318783.jpg
12 | ../coco/images/train2017/000000337760.jpg
13 | ../coco/images/train2017/000000298197.jpg
14 | ../coco/images/train2017/000000042421.jpg
15 | ../coco/images/train2017/000000328898.jpg
16 | ../coco/images/train2017/000000458856.jpg
17 | 


--------------------------------------------------------------------------------
/data/coco1cls.data:
--------------------------------------------------------------------------------
1 | classes=1
2 | train=data/coco1cls.txt
3 | valid=data/coco1cls.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/data/coco1cls.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2017/000000000901.jpg
 2 | ../coco/images/train2017/000000001464.jpg
 3 | ../coco/images/train2017/000000003220.jpg
 4 | ../coco/images/train2017/000000003365.jpg
 5 | ../coco/images/train2017/000000004772.jpg
 6 | ../coco/images/train2017/000000009987.jpg
 7 | ../coco/images/train2017/000000010498.jpg
 8 | ../coco/images/train2017/000000012455.jpg
 9 | ../coco/images/train2017/000000013992.jpg
10 | ../coco/images/train2017/000000014125.jpg
11 | ../coco/images/train2017/000000016314.jpg
12 | ../coco/images/train2017/000000016670.jpg
13 | ../coco/images/train2017/000000018412.jpg
14 | ../coco/images/train2017/000000021212.jpg
15 | ../coco/images/train2017/000000021826.jpg
16 | ../coco/images/train2017/000000030566.jpg
17 | 


--------------------------------------------------------------------------------
/data/coco2014.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=../coco/trainvalno5k.txt
3 | valid=../coco/5k.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/data/coco2017.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=./data/coco/train2017.txt
3 | valid=./data/coco/val2017.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/data/coco64.data:
--------------------------------------------------------------------------------
1 | classes=80
2 | train=data/coco64.txt
3 | valid=data/coco64.txt
4 | names=data/coco.names
5 | 


--------------------------------------------------------------------------------
/data/coco64.txt:
--------------------------------------------------------------------------------
 1 | ../coco/images/train2017/000000109622.jpg
 2 | ../coco/images/train2017/000000160694.jpg
 3 | ../coco/images/train2017/000000308590.jpg
 4 | ../coco/images/train2017/000000327573.jpg
 5 | ../coco/images/train2017/000000062929.jpg
 6 | ../coco/images/train2017/000000512793.jpg
 7 | ../coco/images/train2017/000000371735.jpg
 8 | ../coco/images/train2017/000000148118.jpg
 9 | ../coco/images/train2017/000000309856.jpg
10 | ../coco/images/train2017/000000141882.jpg
11 | ../coco/images/train2017/000000318783.jpg
12 | ../coco/images/train2017/000000337760.jpg
13 | ../coco/images/train2017/000000298197.jpg
14 | ../coco/images/train2017/000000042421.jpg
15 | ../coco/images/train2017/000000328898.jpg
16 | ../coco/images/train2017/000000458856.jpg
17 | ../coco/images/train2017/000000073824.jpg
18 | ../coco/images/train2017/000000252846.jpg
19 | ../coco/images/train2017/000000459590.jpg
20 | ../coco/images/train2017/000000273650.jpg
21 | ../coco/images/train2017/000000331311.jpg
22 | ../coco/images/train2017/000000156326.jpg
23 | ../coco/images/train2017/000000262985.jpg
24 | ../coco/images/train2017/000000253580.jpg
25 | ../coco/images/train2017/000000447976.jpg
26 | ../coco/images/train2017/000000378077.jpg
27 | ../coco/images/train2017/000000259913.jpg
28 | ../coco/images/train2017/000000424553.jpg
29 | ../coco/images/train2017/000000000612.jpg
30 | ../coco/images/train2017/000000267625.jpg
31 | ../coco/images/train2017/000000566012.jpg
32 | ../coco/images/train2017/000000196664.jpg
33 | ../coco/images/train2017/000000363331.jpg
34 | ../coco/images/train2017/000000057992.jpg
35 | ../coco/images/train2017/000000520047.jpg
36 | ../coco/images/train2017/000000453903.jpg
37 | ../coco/images/train2017/000000162083.jpg
38 | ../coco/images/train2017/000000268516.jpg
39 | ../coco/images/train2017/000000277436.jpg
40 | ../coco/images/train2017/000000189744.jpg
41 | ../coco/images/train2017/000000041128.jpg
42 | ../coco/images/train2017/000000527728.jpg
43 | ../coco/images/train2017/000000465269.jpg
44 | ../coco/images/train2017/000000246833.jpg
45 | ../coco/images/train2017/000000076784.jpg
46 | ../coco/images/train2017/000000323715.jpg
47 | ../coco/images/train2017/000000560463.jpg
48 | ../coco/images/train2017/000000006263.jpg
49 | ../coco/images/train2017/000000094701.jpg
50 | ../coco/images/train2017/000000521359.jpg
51 | ../coco/images/train2017/000000302903.jpg
52 | ../coco/images/train2017/000000047559.jpg
53 | ../coco/images/train2017/000000480583.jpg
54 | ../coco/images/train2017/000000050025.jpg
55 | ../coco/images/train2017/000000084512.jpg
56 | ../coco/images/train2017/000000508913.jpg
57 | ../coco/images/train2017/000000093708.jpg
58 | ../coco/images/train2017/000000070493.jpg
59 | ../coco/images/train2017/000000539270.jpg
60 | ../coco/images/train2017/000000474402.jpg
61 | ../coco/images/train2017/000000209842.jpg
62 | ../coco/images/train2017/000000028820.jpg
63 | ../coco/images/train2017/000000154257.jpg
64 | ../coco/images/train2017/000000342499.jpg
65 | 


--------------------------------------------------------------------------------
/data/coco_paper.names:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorcycle
 5 | airplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | street sign
13 | stop sign
14 | parking meter
15 | bench
16 | bird
17 | cat
18 | dog
19 | horse
20 | sheep
21 | cow
22 | elephant
23 | bear
24 | zebra
25 | giraffe
26 | hat
27 | backpack
28 | umbrella
29 | shoe
30 | eye glasses
31 | handbag
32 | tie
33 | suitcase
34 | frisbee
35 | skis
36 | snowboard
37 | sports ball
38 | kite
39 | baseball bat
40 | baseball glove
41 | skateboard
42 | surfboard
43 | tennis racket
44 | bottle
45 | plate
46 | wine glass
47 | cup
48 | fork
49 | knife
50 | spoon
51 | bowl
52 | banana
53 | apple
54 | sandwich
55 | orange
56 | broccoli
57 | carrot
58 | hot dog
59 | pizza
60 | donut
61 | cake
62 | chair
63 | couch
64 | potted plant
65 | bed
66 | mirror
67 | dining table
68 | window
69 | desk
70 | toilet
71 | door
72 | tv
73 | laptop
74 | mouse
75 | remote
76 | keyboard
77 | cell phone
78 | microwave
79 | oven
80 | toaster
81 | sink
82 | refrigerator
83 | blender
84 | book
85 | clock
86 | vase
87 | scissors
88 | teddy bear
89 | hair drier
90 | toothbrush
91 | hair brush


--------------------------------------------------------------------------------
/data/get_coco2014.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Zip coco folder
 3 | # zip -r coco.zip coco
 4 | # tar -czvf coco.tar.gz coco
 5 | 
 6 | # Download labels from Google Drive, accepting presented query
 7 | filename="coco2014labels.zip"
 8 | fileid="1s6-CmF5_SElM28r52P1OUrCcuXZN-SFo"
 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
11 | rm ./cookie
12 | 
13 | # Unzip labels
14 | unzip -q ${filename}  # for coco.zip
15 | # tar -xzf ${filename}  # for coco.tar.gz
16 | rm ${filename}
17 | 
18 | # Download and unzip images
19 | cd coco/images
20 | f="train2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
21 | f="val2014.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
22 | 
23 | # cd out
24 | cd ../..
25 | 


--------------------------------------------------------------------------------
/data/get_coco2017.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Zip coco folder
 3 | # zip -r coco.zip coco
 4 | # tar -czvf coco.tar.gz coco
 5 | 
 6 | # Download labels from Google Drive, accepting presented query
 7 | filename="coco2017labels.zip"
 8 | fileid="1cXZR_ckHki6nddOmcysCuuJFM--T-Q6L"
 9 | curl -c ./cookie -s -L "https://drive.google.com/uc?export=download&id=${fileid}" > /dev/null
10 | curl -Lb ./cookie "https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=${fileid}" -o ${filename}
11 | rm ./cookie
12 | 
13 | # Unzip labels
14 | unzip -q ${filename}  # for coco.zip
15 | # tar -xzf ${filename}  # for coco.tar.gz
16 | rm ${filename}
17 | 
18 | # Download and unzip images
19 | cd coco/images
20 | f="train2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
21 | f="val2017.zip" && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f && rm $f
22 | 
23 | # cd out
24 | cd ../..
25 | 


--------------------------------------------------------------------------------
/data/mcmot.data:
--------------------------------------------------------------------------------
1 | classes=5
2 | train=./data/train_mcmot.txt
3 | valid=./data/val_mcmot.txt
4 | names=data/mcmot.names


--------------------------------------------------------------------------------
/data/mcmot.names:
--------------------------------------------------------------------------------
1 | car
2 | bicycle
3 | person
4 | cyclist
5 | tricycle
6 | 


--------------------------------------------------------------------------------
/data/mcmot_det.data:
--------------------------------------------------------------------------------
1 | classes=5
2 | train=./data/mcmot_det.train
3 | valid=./data/mcmot_det_test.txt
4 | names=data/mcmot.names


--------------------------------------------------------------------------------
/data/test2.txt:
--------------------------------------------------------------------------------
1 | /mnt/diskb/maqiao/multiClass/multiClass190827/JPEGImages/2_2018-05-31_10-20-01-207_3-1527733441.jpg


--------------------------------------------------------------------------------
/detect.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | from sys import platform
  3 | 
  4 | from models import *  # set ONNX_EXPORT in models.py
  5 | from utils.datasets import *
  6 | from utils.utils import *
  7 | 
  8 | 
  9 | def detect(save_img=False):
 10 |     img_size = (320, 192) if ONNX_EXPORT else opt.img_size  # (320, 192) or (416, 256) or (608, 352) for (height, width)
 11 |     out, source, weights, half, view_img, save_txt = opt.output, opt.source, opt.weights, opt.half, opt.view_img, opt.save_txt
 12 |     web_cam = source == '0' or source.startswith('rtsp') or source.startswith('http') or source.endswith('.txt')
 13 | 
 14 |     # Initialize
 15 |     device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)
 16 |     if os.path.exists(out):
 17 |         shutil.rmtree(out)  # delete output folder
 18 |     os.makedirs(out)  # make new output folder
 19 | 
 20 |     # Initialize model
 21 |     # model = Darknet(opt.cfg, img_size)
 22 |     max_ids_dict = {
 23 |         0: 330,
 24 |         1: 102,
 25 |         2: 104,
 26 |         3: 312,
 27 |         4: 53
 28 |     }
 29 |     model = Darknet(opt.cfg, (img_size, img_size), False, max_ids_dict, 128, 'detect').to(device)
 30 | 
 31 |     # Load weights
 32 |     attempt_download(weights)
 33 |     if weights.endswith('.pt'):  # pytorch format
 34 |         chkpt = torch.load(weights, map_location=device)
 35 |         model.load_state_dict(chkpt['model'])
 36 |         if 'epoch' in chkpt.keys():
 37 |             print('Checkpoint of epoch {} loaded.'.format(chkpt['epoch']))
 38 |     else:  # darknet format
 39 |         load_darknet_weights(model, weights)
 40 | 
 41 |     # Second-stage classifier
 42 |     classify = False
 43 |     if classify:
 44 |         model_c = torch_utils.load_classifier(name='resnet101', n=2)  # initialize
 45 |         model_c.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model'])  # load weights
 46 |         model_c.to(device).eval()
 47 | 
 48 |     # Eval mode
 49 |     model.to(device).eval()
 50 | 
 51 |     # Fuse Conv2d + BatchNorm2d layers
 52 |     # model.fuse()
 53 | 
 54 |     # Export mode
 55 |     if ONNX_EXPORT:
 56 |         model.fuse()
 57 |         img = torch.zeros((1, 3) + img_size)  # (1, 3, 320, 192)
 58 |         f = opt.weights.replace(opt.weights.split('.')[-1], 'onnx')  # *.onnx filename
 59 |         torch.onnx.export(model, img, f, verbose=False, opset_version=11,
 60 |                           input_names=['images'], output_names=['classes', 'boxes'])
 61 | 
 62 |         # Validate exported model
 63 |         import onnx
 64 |         model = onnx.load(f)  # Load the ONNX model
 65 |         onnx.checker.check_model(model)  # Check that the IR is well formed
 66 |         print(onnx.helper.printable_graph(model.graph))  # Print a human readable representation of the graph
 67 |         return
 68 | 
 69 |     # Half precision
 70 |     half = half and device.type != 'cpu'  # half precision only supported on CUDA
 71 |     if half:
 72 |         model.half()
 73 | 
 74 |     # Set Data loader
 75 |     vid_path, vid_writer = None, None
 76 |     if web_cam:
 77 |         view_img = True
 78 |         torch.backends.cudnn.benchmark = True  # set True to speed up constant image size inference
 79 |         dataset = LoadStreams(source, img_size=img_size)
 80 |     else:
 81 |         save_img = True
 82 |         dataset = LoadImages(source, net_w=img_size)
 83 | 
 84 |     # Get names and colors
 85 |     names = load_classes(opt.names)
 86 |     colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]
 87 | 
 88 |     # Run inference
 89 |     t0 = time.time()
 90 |     img = torch.zeros((1, 3, img_size, img_size), device=device)  # init img
 91 |     # _ = model.forward(img.half() if half else img.float()) if device.type != 'cpu' else None  # run once
 92 |     for path, img, im0s, vid_cap in dataset:
 93 |         img = torch.from_numpy(img).to(device)
 94 |         img = img.half() if half else img.float()  # uint8 to fp16/32
 95 |         img /= 255.0  # 0 - 255 to 0.0 - 1.0
 96 |         if img.ndimension() == 3:
 97 |             img = img.unsqueeze(0)
 98 | 
 99 |         # ----- Inference
100 |         t1 = torch_utils.time_synchronized()
101 | 
102 |         # only get aggregated result, not original YOLO output
103 |         pred = model.forward(img, augment=opt.augment)[0]
104 | 
105 |         t2 = torch_utils.time_synchronized()
106 |         # -----
107 | 
108 |         # to float
109 |         if half:
110 |             pred = pred.float()
111 | 
112 |         # Apply NMS
113 |         pred = non_max_suppression(pred,
114 |                                    opt.conf_thres,
115 |                                    opt.iou_thres,
116 |                                    merge=False,
117 |                                    classes=opt.classes,
118 |                                    agnostic=opt.agnostic_nms)
119 | 
120 |         # Apply Classifier
121 |         if classify:
122 |             pred = apply_classifier(pred, model_c, img, im0s)
123 | 
124 |         # Process detections
125 |         for i, det in enumerate(pred):  # detections per image
126 |             if web_cam:  # batch_size >= 1
127 |                 p, s, im0 = path[i], '%g: ' % i, im0s[i]
128 |             else:
129 |                 p, s, im0 = path, '', im0s
130 | 
131 |             save_path = str(Path(out) / Path(p).name)
132 |             s += '%gx%g ' % img.shape[2:]  # print string
133 |             if det is not None and len(det):
134 |                 # Rescale boxes from img_size to im0 size(from net input size to original size)
135 |                 det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
136 | 
137 |                 # Print results
138 |                 for c in det[:, -1].unique():
139 |                     n = (det[:, -1] == c).sum()  # detections per class
140 |                     s += '%g %ss, ' % (n, names[int(c)])  # add to string
141 | 
142 |                 # Write results
143 |                 for *xyxy, conf, cls in det:  # x1, y1, x2, y2, confidence, cls_id
144 |                     if save_txt:  # Write to file
145 |                         with open(save_path + '.txt', 'a') as file:
146 |                             file.write(('%g ' * 6 + '\n') % (*xyxy, cls, conf))
147 | 
148 |                     if save_img or view_img:  # Add bbox to image
149 |                         label = '%s %.2f' % (names[int(cls)], conf)
150 |                         plot_one_box(xyxy, im0, label=label, color=colors[int(cls)])
151 | 
152 |             # Print time (inference + NMS)
153 |             print('%sDone. (%.3fs)' % (s, t2 - t1))
154 | 
155 |             # Stream results
156 |             if view_img:
157 |                 cv2.imshow(p, im0)
158 |                 if cv2.waitKey(1) == ord('q'):  # q to quit
159 |                     raise StopIteration
160 | 
161 |             # Save results (image with detections)
162 |             if save_img:
163 |                 if dataset.mode == 'images':
164 |                     cv2.imwrite(save_path, im0)
165 |                 else:
166 |                     if vid_path != save_path:  # new video
167 |                         vid_path = save_path
168 |                         if isinstance(vid_writer, cv2.VideoWriter):
169 |                             vid_writer.release()  # release previous video writer
170 | 
171 |                         fps = vid_cap.get(cv2.CAP_PROP_FPS)
172 |                         w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
173 |                         h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
174 |                         vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*opt.fourcc), fps, (w, h))
175 |                     vid_writer.write(im0)
176 | 
177 |     if save_txt or save_img:
178 |         print('Results saved to %s' % os.getcwd() + os.sep + out)
179 |         if platform == 'darwin':  # MacOS
180 |             os.system('open ' + save_path)
181 | 
182 |     print('Done. (%.3fs)' % (time.time() - t0))
183 | 
184 | 
185 | if __name__ == '__main__':
186 |     parser = argparse.ArgumentParser()
187 |     parser.add_argument('--cfg', type=str, default='cfg/yolov4_half-mcmot.cfg', help='*.cfg path')
188 |     parser.add_argument('--names', type=str, default='data/mcmot.names', help='*.names path')
189 |     parser.add_argument('--weights', type=str, default='weights/track_last.weights', help='weights path')
190 |     parser.add_argument('--source', type=str, default='data/samples', help='source')  # input file/folder, 0 for webcam
191 |     parser.add_argument('--output', type=str, default='output', help='output folder')  # output folder
192 |     parser.add_argument('--img-size', type=int, default=768, help='inference size (pixels)')
193 |     parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
194 |     parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS')
195 |     parser.add_argument('--fourcc', type=str, default='mp4v', help='output video codec (verify ffmpeg support)')
196 |     parser.add_argument('--half', action='store_true', help='half precision FP16 inference')
197 |     parser.add_argument('--device', default='0', help='device id (i.e. 0 or 0,1) or cpu')
198 |     parser.add_argument('--view-img', action='store_true', help='display results')
199 |     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
200 |     parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
201 |     parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
202 |     parser.add_argument('--augment', action='store_true', help='augmented inference')
203 |     opt = parser.parse_args()
204 |     print(opt)
205 | 
206 |     with torch.no_grad():
207 |         detect()
208 | 


--------------------------------------------------------------------------------
/mAPEvaluate/DetectImgAndWriteResultToXml.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import darknet as dn
  3 | import cv2
  4 | import shutil
  5 | from lxml import etree, objectify
  6 | import os,glob
  7 | import xml.etree.ElementTree as ET
  8 | 
  9 | def mycopyfile(srcfile,dstfile):
 10 |     if not os.path.isfile(srcfile):
 11 |         print("%s not exist!"%(srcfile))
 12 |     else:
 13 |         fpath,fname=os.path.split(dstfile)    #分离文件名和路径
 14 |         if not os.path.exists(fpath):
 15 |             os.makedirs(fpath)                #创建路径
 16 |         shutil.copyfile(srcfile,dstfile)      #复制文件
 17 |         print("copy %s -> %s"%( srcfile,dstfile))
 18 | 
 19 | def mymovefile(srcfile,dstfile):
 20 |     if not os.path.isfile(srcfile):
 21 |         print("%s not exist!"%(srcfile))
 22 |     else:
 23 |         fpath,fname=os.path.split(dstfile)    #分离文件名和路径
 24 |         if not os.path.exists(fpath):
 25 |             os.makedirs(fpath)                #创建路径
 26 |         shutil.move(srcfile,dstfile)          #移动文件
 27 |         print("move %s -> %s"%( srcfile,dstfile))
 28 | 
 29 | def listdir(path, ftype):
 30 |     list_name = []
 31 |     for f in os.listdir(path):
 32 |         if os.path.splitext(f)[-1] != ftype:
 33 |             continue
 34 |         file_path = os.path.join(path, f)
 35 |         if os.path.isdir(file_path):
 36 |             continue
 37 |             # listdir(file_path, list_name)
 38 |         else:  
 39 |             list_name.append(file_path)
 40 |     return list_name
 41 | 
 42 | def imagePath2labelPath(image_path):
 43 |     image_dir = os.path.dirname(image_path)
 44 |     p = image_dir.split('/')
 45 |     root_dir = "/".join(p[:-1])
 46 |     label_dir = os.path.join(root_dir,'Annotations')
 47 |     image_name = os.path.basename(image_path)
 48 |     image_name = image_name.replace(".jpg", "")
 49 |     label_path = os.path.join(label_dir, image_name+'.xml')
 50 |     return label_path
 51 | 
 52 | def getFileName(file_path):
 53 |     file_name = os.path.basename(file_path)
 54 |     file_name = file_name.replace('.jpg', '').replace('.png', '')
 55 |     # p = file_name.split('.')
 56 |     # name = ''
 57 |     # for i in range(len(p)-1):
 58 |     #     name += p[i]
 59 |     # file_name = p[]
 60 |     return file_name
 61 | 
 62 | def Convert(size, box):
 63 |     dw = 1./size[0]
 64 |     dh = 1./size[1]
 65 |     x = (box[0] + box[1])/2.0
 66 |     y = (box[2] + box[3])/2.0
 67 |     w = abs(box[1] - box[0])
 68 |     h = abs(box[3] - box[2])
 69 |     x = x*dw
 70 |     w = w*dw
 71 |     y = y*dh
 72 |     h = h*dh
 73 |     return (x,y,w,h)
 74 | 
 75 | def writeXml(xmlfile, imgW, imgH, img_name, det_result):
 76 |     E = objectify.ElementMaker(annotate=False)
 77 |     anno_dataroot = E.dataroot(
 78 |         E.folder(''),
 79 |         E.filename(img_name),
 80 |         E.createdata(''),
 81 |         E.modifydata(''),
 82 |         E.width(imgW),
 83 |         E.height(imgH),
 84 |         E.DayNight(''),
 85 |         E.weather(''),
 86 |         E.Marker('Alg'),
 87 |         E.location(''),
 88 |         E.imageinfo(''),
 89 |         E.source(''),
 90 |         E.database('')
 91 |     )
 92 | 
 93 |     E_markNode = objectify.ElementMaker(annotate=False)
 94 |     anno_markNode = E_markNode.markNode()
 95 | 
 96 |     for i,obj in enumerate(det_result[0]):
 97 |         # print('det_result: ', det_result)
 98 |         # print('obj: ', obj)
 99 |         targettype = obj[0]
100 |         x = obj[2]*imgW
101 |         y = obj[3]*imgH
102 |         w = obj[4]*imgW
103 |         h = obj[5]*imgH
104 |         xmin = (int)(x - w/2)
105 |         ymin = (int)(y - h/2)
106 |         xmax = (int)(x + w/2)
107 |         ymax = (int)(y + h/2)
108 |         if xmin < 0:
109 |             xmin = 0
110 |         if ymin < 0:
111 |             ymin = 0
112 |         if xmax > imgW - 1:
113 |             xmax = imgW - 1
114 |         if ymax > imgH - 1:
115 |             ymax = imgH - 1
116 |         if xmax - xmin <= 65:
117 |             print(obj[0],x,y,w,h)
118 |             print('obj width less than 10')
119 |             continue
120 |         if ymax - ymin <= 65:
121 |             print(obj[0],x,y,w,h)
122 |             print('obj height less than 10')
123 |             continue
124 |         cartype = ''
125 |         # if targettype == 'car_front':
126 |         #     continue
127 |         if targettype == 'fr':
128 |             targettype = 'car_front'
129 | 
130 |         if targettype == 'car' or targettype == 'car_front':
131 |             cartype = 'saloon_car'
132 |         
133 |         E_object = objectify.ElementMaker(annotate=False)
134 |         anno_object = E_object.object(
135 |             E_object.index(i+1),
136 |             E_object.targettype(targettype),
137 |             E_object.cartype(cartype),
138 |             E_object.cartypechild(),
139 |             E_object.pose(),
140 |             E_object.truncated(),
141 |             E_object.difficult(),
142 |             E_object.remark()
143 |         )
144 | 
145 |         E_bndbox = objectify.ElementMaker(annotate=False)
146 |         anno_bndbox = E_bndbox.bndbox(
147 |             E_bndbox.xmin(xmin),
148 |             E_bndbox.ymin(ymin),
149 |             E_bndbox.xmax(xmax),
150 |             E_bndbox.ymax(ymax)
151 |         )
152 |         anno_object.append(anno_bndbox)
153 |         anno_markNode.append(anno_object)
154 |     anno_dataroot.append(anno_markNode)
155 | 
156 |     etree.ElementTree(anno_dataroot).write(xmlfile, encoding='utf-8', xml_declaration=True)
157 | 
158 | 
159 | def batch_analysis(meta_file,cfg_file,wgt_file,meta_file_fr,cfg_file_fr,wgt_file_fr,
160 |                     thresh,nms,img_path,xml_path):
161 |     image_list = listdir(img_path,'.jpg')
162 |     image_num = len(image_list)
163 |     meta = dn.load_meta(meta_file)
164 |     net = dn.load_net(cfg_file,wgt_file,0)
165 |     # meta_fr = dn.load_meta(meta_file_fr)
166 |     # net_fr = dn.load_net(cfg_file_fr,wgt_file_fr,0)
167 |     move_count = 0
168 |     for j,image_path in enumerate(image_list):
169 |         print(str(j)+'/'+str(image_num)+"  "+image_path)
170 |         image_name = getFileName(image_path)
171 |         img_save_path = os.path.join(img_path,image_name+'.jpg')
172 |         xml_save_path = os.path.join(xml_path,image_name+'.xml')
173 |         # if os.path.exists(xml_save_path):
174 |         #     continue
175 |         # print(img_save_path)
176 |         det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh)
177 |         # det_fr = dn.detect_ext(net_fr, meta_fr, bytes(image_path,'utf-8'),thresh)
178 |         img = cv2.imread(image_path)
179 |         if img is None:
180 |             print('Can not open image')
181 |             continue
182 |         h,w,c = img.shape
183 |         writeXml(xml_save_path,w,h,image_name,det)
184 |     dn.free_net(net)
185 | 
186 | def batch_analysis_c6(meta_file,cfg_file,wgt_file,thresh,nms,img_path,xml_path):
187 |     image_list = listdir(img_path,'.jpg')
188 |     image_num = len(image_list)
189 |     meta = dn.load_meta(meta_file)
190 |     net = dn.load_net(cfg_file,wgt_file,0)
191 |     # meta_fr = dn.load_meta(meta_file_fr)
192 |     # net_fr = dn.load_net(cfg_file_fr,wgt_file_fr,0)
193 |     move_count = 0
194 |     for j,image_path in enumerate(image_list):
195 |         print(str(j)+'/'+str(image_num)+"  "+image_path)
196 |         image_name = getFileName(image_path)
197 |         img_save_path = os.path.join(img_path,image_name+'.jpg')
198 |         xml_save_path = os.path.join(xml_path,image_name+'.xml')
199 |         # if os.path.exists(xml_save_path):
200 |         #     continue
201 |         # print(img_save_path)
202 |         det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh)
203 |         # det_fr = dn.detect_ext(net_fr, meta_fr, bytes(image_path,'utf-8'),thresh)
204 |         img = cv2.imread(image_path)
205 |         if img is None:
206 |             print('Can not open image')
207 |             continue
208 |         h,w,c = img.shape
209 |         writeXml(xml_save_path,w,h,image_name,det)
210 |     dn.free_net(net)
211 | 
212 | if __name__ == "__main__":
213 |     dn.set_gpu(5)
214 |     # img_path = "/mnt/diskc/maqiao/data/20191104/JPEGImages/JPEGImages"
215 | 
216 |     # 11.25，需要夏燎安排人标注的
217 |     # img_path = '/mnt/diskc/maqiao/data/20191122'
218 |     # img_path = '/mnt/diskc/maqiao/data/yc20191101~20191119/train'
219 |     img_path = '/mnt/diskd/Data_all/SCSN0002-7-12-15'
220 |     # img_path = '/mnt/diskd/Data_all/待标注数据20200616'
221 |     # img_path = '/users/duanyou/backup_c5/test_1/JPEGImages'
222 |     # img_path = '/mnt/diskb/duanyou/需要标注的数据/shangfang_20200605'
223 |     # img_path = '/users/duanyou/backup_c5/test_4/train'
224 |     # img_path = '/users/duanyou/backup_c5/test_2/1230标注'
225 |     # img_path = '/mnt/diskd/Data_all/多目标类型/需要标注的垂停20191217-大连-蒲城-盐城-长沙/train'
226 | 
227 | 
228 |     xml_path = img_path
229 |     # if not os.path.exists(xml_path):
230 |     #     os.mkdir(xml_path)
231 |     # xml_path_fr = os.path.join(img_path,'FR_xml')
232 |     # if not os.path.exists(xml_path_fr):
233 |     #     os.mkdir(xml_path_fr)
234 | 
235 |     # ## multiClass_c5
236 |     # cfg_file = b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_test.cfg"
237 |     # wgt_file = b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_145000.weights"
238 |     # meta_file = b"/users/maqiao/mq/Data_checked/multiClass/backup_c5/multiClass.data"
239 | 
240 |     # # ## FR
241 |     # cfg_file_fr = b"models/FR/tiny-yolo-voc-decode.cfg"
242 |     # wgt_file_fr = b"models/FR/tiny_yolo_voc_FR_final.weights"
243 |     # meta_file_fr = b"models/FR/FR.data"
244 | 
245 |     # ## hzpc
246 |     # cfg_file_c6 = b"/users/duanyou/c5/hezhoupucheng/multiClass_test.cfg"
247 |     # wgt_file_c6 = b"/users/duanyou/c5/hezhoupucheng/multiClass_1084000_20200526.weights"
248 |     # meta_file_c6 = b"/users/duanyou/c5/hezhoupucheng/multiClass.data"
249 | 
250 |     # ## multiClass_c6， 直接用c6的模型跑全部结果【c6 垂停】
251 |     # cfg_file_c6 = b"/users/duanyou/backup_c6/experiments/c6_chuiting/multiClass_c6_test.cfg"
252 |     # wgt_file_c6 = b"/users/duanyou/backup_c6/experiments/c6_chuiting/multiClass_c6_891000_20200310_best.weights"
253 |     # meta_file_c6 = b"/users/duanyou/backup_c6/experiments/c6_chuiting/multiClass_c6.data"
254 | 
255 |     # new model
256 |     cfg_file_c6 = b"/users/duanyou/c5/v4_all_train/v4all_mish_for_yujiazai/yolov4_test.cfg"
257 |     wgt_file_c6 = b"/users/duanyou/c5/v4_all_train/v4all_mish_for_yujiazai/yolov4_19000.weights"
258 |     meta_file_c6 = b"/users/duanyou/c5/v4_all_train/multiClass.data"
259 | 
260 |     # batch_analysis(meta_file,cfg_file,wgt_file,meta_file_fr,cfg_file_fr,wgt_file_fr,
261 |     #                 0.25,0.45,img_path,xml_path)
262 |     batch_analysis_c6(meta_file_c6,cfg_file_c6,wgt_file_c6,0.25,0.45,img_path,xml_path)
263 | 


--------------------------------------------------------------------------------
/mAPEvaluate/ReadAndSaveDarknetDetRes.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | 
 4 | # import darknet as dn
 5 | 
 6 | def read_det_res(res_path):
 7 |     fr = open(res_path, 'r')
 8 |     if fr is None:
 9 |         return -1
10 |     cn = 0
11 |     num = 0
12 |     detect_objs = []
13 |     for line in fr.readlines():  # 依次读取每行
14 |         line = line.strip()  # 去掉每行头尾空白
15 |         if cn == 0:
16 |             tmp, num = [str(i) for i in line.split("=")]
17 |             # print("object num: ", int(num))
18 |         else:
19 |             obj = [float(i) for i in line.split()]
20 |             obj[0] = int(obj[0])
21 |             detect_objs.append(obj)
22 |             # print(obj)
23 |         cn += 1
24 | 
25 |     return detect_objs
26 | 
27 | 
28 | def save_det_res(det, det_save_path, cls_names):
29 |     """
30 |     :param det:
31 |     :param det_save_path:
32 |     :param cls_names:
33 |     :return:
34 |     """
35 |     res = 0
36 |     f = open(det_save_path, 'w')
37 |     if f is None:
38 |         res = -1
39 |         return res
40 | 
41 |     f.write('class prob x y w h total=' + str(len(det)) + '\n')
42 |     for d in det:
43 |         if d[0] not in cls_names:
44 |             res = -2
45 |             continue
46 | 
47 |         obj_cls = cls_names.index(d[0])
48 |         f.write('%d %f %f %f %f %f\n' % (obj_cls, d[1], d[2], d[3], d[4], d[5]))
49 |         # print(obj_cls,d[2],d[3],d[4],d[5])
50 | 
51 |     f.close()
52 | 
53 |     return res
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     # detect
58 |     print('done')
59 |     # net = dn.load_net(b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_test.cfg", 
60 |     #                 b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_60000.weights", 0)
61 |     # meta = dn.load_meta(b"/users/maqiao/mq/Data_checked/multiClass/backup_c5/multiClass.data")
62 |     # r = dn.detect_ext(net, meta, b"/users/maqiao/mq/Data_checked/multiClass/multiClass0320/JPEGImages_ori/000000.jpg")
63 |     # dn.free_net(net)
64 |     # print(meta.classes)
65 |     # for c in range(meta.classes):
66 |     #     print(meta.names[c])
67 |     # print(r)
68 | 
69 |     # # save detection result to text
70 |     # cls_names = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)]
71 |     # saveDetRes(r, 'result.txt', cls_names)
72 | 
73 |     # # read detection result
74 |     # objs = readDetRes('result.txt')
75 |     # print(objs)
76 | 


--------------------------------------------------------------------------------
/mAPEvaluate/ReadAnnotations.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | import os
  4 | import xml.etree.ElementTree as ET
  5 | 
  6 | 
  7 | def Convert(size, box):
  8 |     """
  9 |     :param size:
 10 |     :param box:
 11 |     :return:
 12 |     """
 13 |     dw = 1.0 / size[0]
 14 |     dh = 1.0 / size[1]
 15 |     x = (box[0] + box[1]) / 2.0
 16 |     y = (box[2] + box[3]) / 2.0
 17 |     w = abs(box[1] - box[0])
 18 |     h = abs(box[3] - box[2])
 19 |     x = x * dw
 20 |     w = w * dw
 21 |     y = y * dh
 22 |     h = h * dh
 23 | 
 24 |     return (x, y, w, h)
 25 | 
 26 | 
 27 | # 读取标注数据
 28 | def load_label(label_file, object_type):
 29 |     fl = open(label_file)
 30 |     cn = 0
 31 |     num = 0
 32 |     label_objs = []
 33 |     label_info = fl.read()
 34 |     if label_info.find('dataroot') < 0:
 35 |         print("Can not find dataroot")
 36 |         fl.close()
 37 |         return label_objs
 38 | 
 39 |     try:
 40 |         root = ET.fromstring(label_info)
 41 |     except(Exception, e):
 42 |         print("Error: cannot parse file")
 43 |         # n = raw_input()
 44 |         fl.close()
 45 |         return label_objs
 46 | 
 47 |     if root.find('markNode') != None:
 48 |         obj = root.find('markNode').find('object')
 49 |         if obj != None:
 50 |             w = int(root.find('width').text)
 51 |             h = int(root.find('height').text)
 52 |             # print("w:%d,h%d" % (w, h))
 53 |             for obj in root.iter('object'):
 54 |                 target_type = obj.find('targettype').text
 55 |                 car_type = obj.find('cartype').text
 56 |                 if target_type == 'car_front' or target_type == 'car_rear' or target_type == 'car_fr':
 57 |                     target_type = 'fr'
 58 |                 if target_type not in object_type and car_type not in object_type:
 59 |                     # print("********************************* "+str(targettype) + "is not in class list *************************")
 60 |                     continue
 61 | 
 62 |                 # classes_c9
 63 |                 # if targettype == "car":
 64 |                 #     cartype = obj.find('cartype').text
 65 |                 #     # print(cartype)
 66 |                 #     if cartype == 'motorcycle':
 67 |                 #         targettype = "bicycle"
 68 |                 #     elif cartype == 'truck':
 69 |                 #         targettype = "truck" 
 70 |                 #     elif cartype == 'waggon':
 71 |                 #         targettype = 'waggon'
 72 |                 #     elif cartype == 'passenger_car':
 73 |                 #         targettype = 'passenger_car'
 74 |                 #     elif cartype == 'unkonwn' or cartype == "shop_truck":
 75 |                 #         targettype = "other"
 76 | 
 77 |                 # classes_c5
 78 |                 if target_type == 'car':
 79 |                     car_type = obj.find('cartype').text
 80 |                     if car_type == 'motorcycle':
 81 |                         target_type = 'bicycle'
 82 |                 if target_type == "motorcycle":
 83 |                     target_type = "bicycle"
 84 | 
 85 |                 xml_box = obj.find('bndbox')
 86 |                 b = (float(xml_box.find('xmin').text),
 87 |                      float(xml_box.find('xmax').text),
 88 |                      float(xml_box.find('ymin').text),
 89 |                      float(xml_box.find('ymax').text))
 90 |                 bb = Convert((w, h), b)
 91 | 
 92 |                 obj = [target_type, float(bb[0]), float(bb[1]), float(bb[2]), float(bb[3])]
 93 |                 # print(obj)
 94 |                 label_objs.append(obj)
 95 | 
 96 |     return label_objs
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     label_file = '/mnt/diskb/maqiao/multiClass/test_c6/Annotations/1_5_1.xml'
101 |     object_types = ['car', 'bicycle', 'person', 'cyclist', 'tricycle', 'fr', ]
102 | 
103 |     objs = load_label(label_file, object_types)
104 |     print(objs)
105 | 


--------------------------------------------------------------------------------
/mAPEvaluate/cmp_det_label.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import xlwt
  4 | 
  5 | 
  6 | def overlap(x1, w1, x2, w2):
  7 |     l1 = x1 - w1 / 2.
  8 |     l2 = x2 - w2 / 2.
  9 |     left = l1 if l1 > l2 else l2
 10 |     r1 = x1 + w1 / 2.
 11 |     r2 = x2 + w2 / 2.
 12 |     right = r1 if r1 < r2 else r2
 13 |     return right - left
 14 | 
 15 | 
 16 | def box_intersection(box1, box2):
 17 |     w = overlap(box1[0], box1[2], box2[0], box2[2])
 18 |     h = overlap(box1[1], box1[3], box2[1], box2[3])
 19 |     if w < 0 or h < 0:
 20 |         return 0
 21 |     area = w * h
 22 |     return area
 23 | 
 24 | 
 25 | def box_union(box1, box2):
 26 |     i = box_intersection(box1, box2)
 27 |     u = box1[2] * box1[3] + box2[2] * box2[3] - i
 28 |     return u
 29 | 
 30 | 
 31 | def box_iou(box1, box2):
 32 |     return box_intersection(box1, box2) / box_union(box1, box2)
 33 | 
 34 | 
 35 | def box_to_rect(box, width, height):
 36 |     x = box[0]
 37 |     y = box[1]
 38 |     w = box[2]
 39 |     h = box[3]
 40 |     left = (x - w / 2.) * width
 41 |     top = (y - h / 2.) * height
 42 |     right = (x + w / 2.) * width
 43 |     bottom = (y + h / 2.) * height
 44 |     return [int(left), int(top), int(right), int(bottom)]
 45 | 
 46 | 
 47 | # 比较每张图片的检测结果和标记数据
 48 | def CmpData(cmp_type, detect_objs, label_objs, thresh, iou_thresh, img):
 49 |     # img = cv2.imread("%s/%s.jpg" % (image_path,file_name))
 50 | 
 51 |     df = [False for n in range(0, len(detect_objs))]
 52 |     correct = 0
 53 |     iou = 0
 54 |     label_num = 0
 55 |     for lobj in label_objs:
 56 |         if lobj[0] != cmp_type:
 57 |             continue
 58 |         label_num += 1
 59 |         box1 = [lobj[1], lobj[2], lobj[3], lobj[4]]
 60 |         rect1 = box_to_rect(box1, img.shape[1], img.shape[0])
 61 |         best_iou = 0
 62 |         rect2 = []
 63 |         best_no = -1
 64 |         for dno, dobj in enumerate(detect_objs):
 65 |             if lobj[0] != dobj[0]:
 66 |                 continue
 67 |             box2 = [dobj[2], dobj[3], dobj[4], dobj[5]]
 68 |             biou = box_iou(box1, box2)
 69 |             if dobj[1] > thresh and biou > best_iou:
 70 |                 best_no = dno
 71 |                 best_iou = biou
 72 |                 rect2 = box_to_rect(box2, img.shape[1], img.shape[0])
 73 |         iou += best_iou
 74 |         # if best_iou > iou_thresh:
 75 |         if best_iou > iou_thresh and not df[best_no]:  #### 若df[best_no]已经是true了，则证明这个检测结果没有匹配的GT，且置信度大于thresh，则算虚警
 76 |             correct += 1
 77 |             df[best_no] = True  # df相当于该gt被置为已检测到，下一次若还有另一个检测结果与之重合率满足阈值，则不能认为多检测到一个目标
 78 |             cv2.rectangle(img, (rect1[0], rect1[1]), (rect1[2], rect1[3]), (0, 255, 0), 3)  # 绿色 label
 79 |             cv2.rectangle(img, (rect2[0], rect2[1]), (rect2[2], rect2[3]), (255, 0, 0), 3)  # 蓝色 detection
 80 |             txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2))
 81 |             cv2.putText(img, txt, (rect2[0], rect2[1]), 0, 1, (0, 0, 255), 2)
 82 |         else:
 83 |             cv2.rectangle(img, (rect1[0], rect1[1]), (rect1[2], rect1[3]), (0, 255, 255), 3)  # 黄色，未检测到的GT
 84 | 
 85 |     detect_num = 0
 86 |     for i, dobj in enumerate(detect_objs):
 87 |         if dobj[0] != cmp_type:
 88 |             continue
 89 |         if dobj[1] > thresh:
 90 |             detect_num += 1
 91 |         box2 = [dobj[2], dobj[3], dobj[4], dobj[5]]
 92 |         if not df[i]:  # 如果df[i]=False，则表明这个检测结果没有匹配的GT，且置信度大于thresh，则算虚警，相当于R['det'][jmax]
 93 |             if dobj[1] > thresh:
 94 |                 rect2 = box_to_rect(box2, img.shape[1], img.shape[0])
 95 |                 cv2.rectangle(img, (rect2[0], rect2[1]), (rect2[2], rect2[3]), (0, 0, 255), 3)  # 红色 虚警
 96 |                 txt = cmp_type + ':' + str(round(dobj[1], 2))
 97 |                 cv2.putText(img, txt, (rect2[0], rect2[1]), 0, 1, (0, 0, 255), 2)
 98 | 
 99 |     # cv2.imwrite("%s/show_result/%s_r.jpg" % (result_path,file_name),img)
100 | 
101 |     tp = correct
102 |     fp = detect_num - tp
103 |     tn = 0
104 |     fn = label_num - tp
105 |     avg_iou = 0
106 |     recall = 0
107 |     accuracy = 0
108 |     precision = 0
109 |     if 0 == label_num:
110 |         avg_iou = 0
111 |         recall = 1
112 |         accuracy = 1 if detect_num == 0 else 0
113 |         precision = 1 if detect_num == 0 else 0
114 |     else:
115 |         avg_iou = iou / label_num
116 |         recall = correct / float(label_num)
117 |         accuracy = correct / float(tp + fn + fp + tn)
118 |         corr = (correct if correct < detect_num else detect_num)  # 检测正确数大于检测结果数的情况，即同一个目标多次标记
119 |         precision = 0 if detect_num == 0 else corr / float(detect_num)
120 | 
121 |     cmp_res = {'label_num': label_num, 'detect_num': detect_num, 'correct': correct, \
122 |                'recall': recall, 'avg_iou': avg_iou, 'accuracy': accuracy, 'precision': precision}
123 | 
124 |     return cmp_res
125 | 
126 | 
127 | # 输出分析结果到excel文件中
128 | def ExportAnaRes(objtype, res1, total_result, image_path, result_path):
129 |     f = xlwt.Workbook()  # 创建工作簿
130 |     sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
131 |     row0 = [u'图片名', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision']
132 |     for i in range(0, len(row0)):
133 |         sheet1.write(0, i, row0[i])
134 | 
135 |     for r in range(0, len(res1)):
136 |         sheet1.write(r + 1, 0, res1[r]['image_name'])
137 |         sheet1.write(r + 1, 1, res1[r]['label_num'])
138 |         sheet1.write(r + 1, 2, res1[r]['detect_num'])
139 |         sheet1.write(r + 1, 3, res1[r]['correct'])
140 |         sheet1.write(r + 1, 4, res1[r]['recall'])
141 |         sheet1.write(r + 1, 5, res1[r]['avg_iou'])
142 |         sheet1.write(r + 1, 6, res1[r]['accuracy'])
143 |         sheet1.write(r + 1, 7, res1[r]['precision'])
144 | 
145 |     row_end = [u'total', total_result[0], total_result[1], total_result[2], total_result[3], \
146 |                total_result[4], total_result[5], total_result[6]]
147 |     for i in range(0, len(row_end)):
148 |         sheet1.write(len(res1) + 2, i, row_end[i])
149 | 
150 |     save_name = "AnalyseResult_%s.xls" % (objtype)
151 |     save_path = os.path.join(result_path, save_name)
152 |     f.save(save_path)
153 | 
154 | 
155 | def ExportAnaResAll(results, result_path):
156 |     f = xlwt.Workbook()  # 创建工作簿
157 |     sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
158 |     row0 = [u'模型', u'目标类型', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision', u'AP']
159 |     for i in range(0, len(row0)):
160 |         sheet1.write(0, i, row0[i])
161 |     for r in range(len(results)):
162 |         total_result = results[r]
163 |         for i in range(0, len(results[r])):
164 |             sheet1.write(r + 1, i, results[r][i])
165 | 
166 |     save_path = os.path.join(result_path, 'AnalyseResultAll.xls')
167 |     f.save(save_path)
168 | 


--------------------------------------------------------------------------------
/mAPEvaluate/cmp_det_label_sf.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | import os
  4 | import cv2
  5 | import xlwt
  6 | 
  7 | 
  8 | def overlap(x1, w1, x2, w2):
  9 |     l1 = x1 - w1 / 2.
 10 |     l2 = x2 - w2 / 2.
 11 |     left = l1 if l1 > l2 else l2
 12 |     r1 = x1 + w1 / 2.
 13 |     r2 = x2 + w2 / 2.
 14 |     right = r1 if r1 < r2 else r2
 15 |     return right - left
 16 | 
 17 | 
 18 | def box_intersection(box1, box2):
 19 |     w = overlap(box1[0], box1[2], box2[0], box2[2])
 20 |     h = overlap(box1[1], box1[3], box2[1], box2[3])
 21 | 
 22 |     if w < 0 or h < 0:
 23 |         return 0
 24 | 
 25 |     area = w * h
 26 |     return area
 27 | 
 28 | 
 29 | def box_union(box1, box2):
 30 |     i = box_intersection(box1, box2)
 31 |     u = box1[2] * box1[3] + box2[2] * box2[3] - i
 32 |     return u
 33 | 
 34 | 
 35 | def box_iou(box1, box2):
 36 |     return box_intersection(box1, box2) / box_union(box1, box2)
 37 | 
 38 | 
 39 | def box_to_rect(box, width, height):
 40 |     x = box[0]
 41 |     y = box[1]
 42 |     w = box[2]
 43 |     h = box[3]
 44 |     left = (x - w / 2.) * width
 45 |     top = (y - h / 2.) * height
 46 |     right = (x + w / 2.) * width
 47 |     bottom = (y + h / 2.) * height
 48 |     return [int(left), int(top), int(right), int(bottom)]
 49 | 
 50 | 
 51 | # 比较每张图片的检测结果和标记数据
 52 | def cmp_data(cmp_type, detect_objs, label_objs, thresh, iou_thresh, img):
 53 |     # img = cv2.imread("%s/%s.jpg" % (image_path,file_name))
 54 | 
 55 |     df = [False for n in range(0, len(detect_objs))]
 56 |     correct = 0
 57 |     iou = 0
 58 |     label_num = 0
 59 |     for l_obj in label_objs:
 60 |         if l_obj[0] != cmp_type:
 61 |             continue
 62 | 
 63 |         label_num += 1
 64 |         box_1 = [l_obj[1], l_obj[2], l_obj[3], l_obj[4]]
 65 |         rect_1 = box_to_rect(box_1, img.shape[1], img.shape[0])
 66 |         best_iou = 0
 67 |         rect_2 = []
 68 |         best_no = -1
 69 |         for d_no, d_obj in enumerate(detect_objs):
 70 |             if l_obj[0] != d_obj[0]:
 71 |                 continue
 72 | 
 73 |             box_2 = [d_obj[2], d_obj[3], d_obj[4], d_obj[5]]
 74 |             biou = box_iou(box_1, box_2)
 75 |             if d_obj[1] > thresh and biou > best_iou:
 76 |                 best_no = d_no
 77 |                 best_iou = biou
 78 |                 rect_2 = box_to_rect(box_2, img.shape[1], img.shape[0])
 79 |         iou += best_iou
 80 | 
 81 |         # if best_iou > iou_thresh:
 82 |         if best_iou > iou_thresh and not df[best_no]:  # 若df[best_no]已经是true了，则证明这个检测结果没有匹配的GT，且置信度大于thresh，则算虚警
 83 |             correct += 1
 84 |             df[best_no] = True  # df相当于该gt被置为已检测到，下一次若还有另一个检测结果与之重合率满足阈值，则不能认为多检测到一个目标
 85 |             # cv2.rectangle(img,(rect1[0],rect1[1]),(rect1[2],rect1[3]),(0,255,0),3)# 绿色 label
 86 |             if cmp_type == 'car':
 87 |                 cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 0), 3)
 88 |                 txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2))
 89 |                 cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 0), 2)
 90 |             elif cmp_type == 'bicycle':
 91 |                 cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 255, 0), 3)
 92 |                 txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2))
 93 |                 cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 255, 0), 2)
 94 |             elif cmp_type == 'person':
 95 |                 cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 255), 3)
 96 |                 txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2))
 97 |                 cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 255), 2)
 98 |             elif cmp_type == 'cyclist':
 99 |                 cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 0), 3)
100 |                 txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2))
101 |                 cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 0), 2)
102 |             elif cmp_type == 'tricycle':
103 |                 cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 0, 255), 3)
104 |                 txt = cmp_type + ':' + str(round(detect_objs[best_no][1], 2))
105 |                 cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 0, 255), 2)
106 |             elif cmp_type == 'fr':
107 |                 cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 255), 3)
108 |                 txt = 'fr' + ':' + str(round(detect_objs[best_no][1], 2))
109 |                 cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 255), 2)
110 |         # else:
111 |         #     cv2.rectangle(img,(rect1[0],rect1[1]),(rect1[2],rect1[3]),(0,255,255),3) # 黄色，未检测到的GT
112 | 
113 |     detect_num = 0
114 |     for i, d_obj in enumerate(detect_objs):
115 |         if d_obj[0] != cmp_type:
116 |             continue
117 | 
118 |         if d_obj[1] > thresh:
119 |             detect_num += 1
120 | 
121 |         box_2 = [d_obj[2], d_obj[3], d_obj[4], d_obj[5]]
122 |         if not df[i]:  # 如果df[i]=False，则表明这个检测结果没有匹配的GT, 且置信度大于thresh，则算虚警，相当于R['det'][jmax]
123 |             if d_obj[1] > thresh:
124 |                 rect_2 = box_to_rect(box_2, img.shape[1], img.shape[0])
125 | 
126 |                 # cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255,0,0), 3) # 红色 虚警
127 |                 # if cmp_type == 'fr':
128 |                 #     cmp_type1 = 'shangfan'
129 |                 # else:
130 |                 #     cmp_type1 = cmp_type
131 |                 # txt = cmp_type1+':'+str(round(d_obj[1],2))
132 |                 # cv2.putText(img,txt,(rect_2[0],rect_2[1]), 0, 1, (255,0,0),2)
133 | 
134 |                 if cmp_type == 'car':
135 |                     cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 0), 3)
136 |                     txt = cmp_type + ':' + str(round(d_obj[1], 2))
137 |                     cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 0), 2)
138 |                 elif cmp_type == 'bicycle':
139 |                     cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 255, 0), 3)
140 |                     txt = cmp_type + ':' + str(round(d_obj[1], 2))
141 |                     cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 255, 0), 2)
142 |                 elif cmp_type == 'person':
143 |                     cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 255), 3)
144 |                     txt = cmp_type + ':' + str(round(d_obj[1], 2))
145 |                     cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 255), 2)
146 |                 elif cmp_type == 'cyclist':
147 |                     cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 255, 0), 3)
148 |                     txt = cmp_type + ':' + str(round(d_obj[1], 2))
149 |                     cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 255, 0), 2)
150 |                 elif cmp_type == 'tricycle':
151 |                     cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (0, 0, 255), 3)
152 |                     txt = cmp_type + ':' + str(round(d_obj[1], 2))
153 |                     cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (0, 0, 255), 2)
154 |                 elif cmp_type == 'fr':
155 |                     cv2.rectangle(img, (rect_2[0], rect_2[1]), (rect_2[2], rect_2[3]), (255, 0, 255), 3)
156 |                     txt = 'fr' + ':' + str(round(d_obj[1], 2))
157 |                     cv2.putText(img, txt, (rect_2[0], rect_2[1]), 0, 1, (255, 0, 255), 2)
158 | 
159 |     # cv2.imwrite("%s/show_result/%s_r.jpg" % (result_path, file_name), img)
160 | 
161 |     tp = correct
162 |     fp = detect_num - tp
163 |     tn = 0
164 |     fn = label_num - tp
165 |     avg_iou = 0
166 |     recall = 0
167 |     accuracy = 0
168 |     precision = 0
169 |     if 0 == label_num:
170 |         avg_iou = 0
171 |         recall = 1
172 |         accuracy = 1 if detect_num == 0 else 0
173 |         precision = 1 if detect_num == 0 else 0
174 |     else:
175 |         avg_iou = iou / label_num
176 |         recall = correct / float(label_num)
177 |         accuracy = correct / float(tp + fn + fp + tn)
178 |         corr = (correct if correct < detect_num else detect_num)  # 检测正确数大于检测结果数的情况，即同一个目标多次标记
179 |         precision = 0 if detect_num == 0 else corr / float(detect_num)
180 | 
181 |     cmp_res = {'label_num': label_num,
182 |                'detect_num': detect_num,
183 |                'correct': correct,
184 |                'recall': recall,
185 |                'avg_iou': avg_iou,
186 |                'accuracy': accuracy,
187 |                'precision': precision}
188 | 
189 |     return cmp_res
190 | 
191 | 
192 | # 输出分析结果到excel文件中
193 | def ExportAnaRes(objtype, res1, total_result, image_path, result_path):
194 |     f = xlwt.Workbook()  # 创建工作簿
195 |     sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
196 |     row0 = [u'图片名', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision']
197 |     for i in range(0, len(row0)):
198 |         sheet1.write(0, i, row0[i])
199 | 
200 |     for r in range(0, len(res1)):
201 |         sheet1.write(r + 1, 0, res1[r]['image_name'])
202 |         sheet1.write(r + 1, 1, res1[r]['label_num'])
203 |         sheet1.write(r + 1, 2, res1[r]['detect_num'])
204 |         sheet1.write(r + 1, 3, res1[r]['correct'])
205 |         sheet1.write(r + 1, 4, res1[r]['recall'])
206 |         sheet1.write(r + 1, 5, res1[r]['avg_iou'])
207 |         sheet1.write(r + 1, 6, res1[r]['accuracy'])
208 |         sheet1.write(r + 1, 7, res1[r]['precision'])
209 | 
210 |     row_end = [u'total', total_result[0], total_result[1], total_result[2], total_result[3], \
211 |                total_result[4], total_result[5], total_result[6]]
212 |     for i in range(0, len(row_end)):
213 |         sheet1.write(len(res1) + 2, i, row_end[i])
214 | 
215 |     save_name = "AnalyseResult_%s.xls" % (objtype)
216 |     save_path = os.path.join(result_path, save_name)
217 |     f.save(save_path)
218 | 
219 | 
220 | def ExportAnaResAll(results, result_path):
221 |     f = xlwt.Workbook()  # 创建工作簿
222 |     sheet1 = f.add_sheet(u'sheet1', cell_overwrite_ok=True)
223 |     row0 = [u'模型', u'目标类型', u'标注目标', u'检测目标', u'检测正确', u'recall', u'iou', u'accuracy', u'precision', u'AP']
224 |     for i in range(0, len(row0)):
225 |         sheet1.write(0, i, row0[i])
226 |     for r in range(len(results)):
227 |         total_result = results[r]
228 |         for i in range(0, len(results[r])):
229 |             sheet1.write(r + 1, i, results[r][i])
230 | 
231 |     save_path = os.path.join(result_path, 'AnalyseResultAll.xls')
232 |     f.save(save_path)
233 |     print('{:s} exported.'.format(save_path))
234 | 


--------------------------------------------------------------------------------
/mAPEvaluate/darknet_ori_diou_cfg.py:
--------------------------------------------------------------------------------
  1 | from ctypes import *
  2 | import math
  3 | import random
  4 | import cv2
  5 | import time
  6 | 
  7 | def sample(probs):
  8 |     s = sum(probs)
  9 |     probs = [a/s for a in probs]
 10 |     r = random.uniform(0, 1)
 11 |     for i in range(len(probs)):
 12 |         r = r - probs[i]
 13 |         if r <= 0:
 14 |             return i
 15 |     return len(probs)-1
 16 | 
 17 | def c_array(ctype, values):
 18 |     arr = (ctype*len(values))()
 19 |     arr[:] = values
 20 |     return arr
 21 | 
 22 | class BOX(Structure):
 23 |     _fields_ = [("x", c_float),
 24 |                 ("y", c_float),
 25 |                 ("w", c_float),
 26 |                 ("h", c_float)]
 27 | 
 28 | class DETECTION(Structure):
 29 |     _fields_ = [("bbox", BOX),
 30 |                 ("classes", c_int),
 31 |                 ("prob", POINTER(c_float)),
 32 |                 ("mask", POINTER(c_float)),
 33 |                 ("objectness", c_float),
 34 |                 ("sort_class", c_int)]
 35 | 
 36 | 
 37 | class IMAGE(Structure):
 38 |     _fields_ = [("w", c_int),
 39 |                 ("h", c_int),
 40 |                 ("c", c_int),
 41 |                 ("data", POINTER(c_float))]
 42 | 
 43 | class METADATA(Structure):
 44 |     _fields_ = [("classes", c_int),
 45 |                 ("names", POINTER(c_char_p))]
 46 | 
 47 | 
 48 | 
 49 | lib = CDLL("/users/duanyou/backup_c6/v3tiny_experiments/1_v3tiny_diou/diou_darknet/libdarknet.so", RTLD_GLOBAL)
 50 | 
 51 | lib.network_width.argtypes = [c_void_p]
 52 | lib.network_width.restype = c_int
 53 | lib.network_height.argtypes = [c_void_p]
 54 | lib.network_height.restype = c_int
 55 | 
 56 | predict = lib.network_predict
 57 | predict.argtypes = [c_void_p, POINTER(c_float)]
 58 | predict.restype = POINTER(c_float)
 59 | 
 60 | set_gpu = lib.cuda_set_device
 61 | set_gpu.argtypes = [c_int]
 62 | 
 63 | make_image = lib.make_image
 64 | make_image.argtypes = [c_int, c_int, c_int]
 65 | make_image.restype = IMAGE
 66 | 
 67 | get_network_boxes = lib.get_network_boxes
 68 | get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
 69 | get_network_boxes.restype = POINTER(DETECTION)
 70 | 
 71 | make_network_boxes = lib.make_network_boxes
 72 | make_network_boxes.argtypes = [c_void_p]
 73 | make_network_boxes.restype = POINTER(DETECTION)
 74 | 
 75 | free_detections = lib.free_detections
 76 | free_detections.argtypes = [POINTER(DETECTION), c_int]
 77 | 
 78 | free_ptrs = lib.free_ptrs
 79 | free_ptrs.argtypes = [POINTER(c_void_p), c_int]
 80 | 
 81 | network_predict = lib.network_predict
 82 | network_predict.argtypes = [c_void_p, POINTER(c_float)]
 83 | 
 84 | reset_rnn = lib.reset_rnn
 85 | reset_rnn.argtypes = [c_void_p]
 86 | 
 87 | load_net = lib.load_network
 88 | load_net.argtypes = [c_char_p, c_char_p, c_int]
 89 | load_net.restype = c_void_p
 90 | 
 91 | free_net = lib.free_network
 92 | free_net.argtypes = [c_void_p]
 93 | 
 94 | do_nms_obj = lib.do_nms_obj
 95 | do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
 96 | 
 97 | do_nms_sort = lib.do_nms_sort
 98 | do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
 99 | 
100 | free_image = lib.free_image
101 | free_image.argtypes = [IMAGE]
102 | 
103 | letterbox_image = lib.letterbox_image
104 | letterbox_image.argtypes = [IMAGE, c_int, c_int]
105 | letterbox_image.restype = IMAGE
106 | 
107 | load_meta = lib.get_metadata
108 | lib.get_metadata.argtypes = [c_char_p]
109 | lib.get_metadata.restype = METADATA
110 | 
111 | load_image = lib.load_image_color
112 | load_image.argtypes = [c_char_p, c_int, c_int]
113 | load_image.restype = IMAGE
114 | 
115 | rgbgr_image = lib.rgbgr_image
116 | rgbgr_image.argtypes = [IMAGE]
117 | 
118 | predict_image = lib.network_predict_image
119 | predict_image.argtypes = [c_void_p, IMAGE]
120 | predict_image.restype = POINTER(c_float)
121 | 
122 | predict_image_cls = lib.network_predict_image
123 | predict_image_cls.argtypes = [c_void_p, IMAGE]
124 | predict_image_cls.restype = POINTER(c_float)
125 | 
126 | def classify(net, meta, im):
127 |     out = predict_image_cls(net, im)
128 |     res = []
129 |     for i in range(meta.classes):
130 |         res.append((meta.names[i].decode('utf-8').strip(), out[i]))
131 |     res = sorted(res, key=lambda x: -x[1])
132 |     return res
133 | 
134 | def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
135 |     im = load_image(image, 0, 0)
136 |     num = c_int(0)
137 |     pnum = pointer(num)
138 |     predict_image(net, im)
139 |     dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
140 |     num = pnum[0]
141 |     if (nms): do_nms_obj(dets, num, meta.classes, nms)
142 | 
143 |     res = []
144 |     for j in range(num):
145 |         for i in range(meta.classes):
146 |             if dets[j].prob[i] > 0:
147 |                 b = dets[j].bbox
148 |                 res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
149 |     res = sorted(res, key=lambda x: -x[1])
150 |     free_image(im)
151 |     free_detections(dets, num)
152 |     return res
153 | 
154 | def detect_ext(net, meta, image, thresh=.2, hier_thresh=.5, nms=.45):
155 |     im = load_image(image, 0, 0)
156 |     num = c_int(0)
157 |     pnum = pointer(num)
158 |     starttime = time.time()
159 |     predict_image(net, im)
160 |     endtime = time.time()
161 |     print('xxxxxxxxxxxxxxxxxxxx ', endtime - starttime)
162 |     dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
163 |     num = pnum[0]
164 |     # if (nms): do_nms_obj(dets, num, meta.classes, nms)
165 |     if (nms): do_nms_sort(dets, num, meta.classes, nms)
166 | 
167 |     res = []
168 |     for j in range(num):
169 |         for i in range(meta.classes):
170 |             if dets[j].prob[i] > 0:
171 |                 b = dets[j].bbox
172 |                 b.x /= im.w
173 |                 b.y /= im.h
174 |                 b.w /= im.w
175 |                 b.h /= im.h
176 |                 res.append([meta.names[i].decode('utf-8').strip(), dets[j].prob[i], b.x, b.y, b.w, b.h])
177 |     res = sorted(res, key=lambda x: -x[1])
178 |     free_image(im)
179 |     free_detections(dets, num)
180 |     return res,endtime - starttime
181 | 
182 | if __name__ == "__main__":
183 |     #net = load_net("cfg/densenet201.cfg", "/home/pjreddie/trained/densenet201.weights", 0)
184 |     #im = load_image("data/wolf.jpg", 0, 0)
185 |     #meta = load_meta("cfg/imagenet1k.data")
186 |     #r = classify(net, meta, im)
187 |     #print r[:10]
188 |     # for i in range(10):
189 |         net = load_net(b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_test.cfg", 
190 |                     b"/users/maqiao/mq/Data_checked/multiClass/backup_yolov3-spp/multiClass_yolov3-spp_60000.weights", 0)
191 |         meta = load_meta(b"/users/maqiao/mq/Data_checked/multiClass/backup_c5/multiClass.data")
192 |         r = detect_ext(net, meta, b"/users/maqiao/mq/Data_checked/multiClass/multiClass0320/JPEGImages_ori/000000.jpg")
193 |         free_net(net)
194 |         print(meta.classes)
195 |         for c in range(meta.classes):
196 |             print(meta.names[c])
197 |         print(r)
198 | 
199 |     # import cv2
200 |     # img = cv2.imread("/mnt/diskc/xiaofan/darknet_2019/data/car_test.jpg")
201 |     # for detect in r:
202 |     #     cv2.rectangle(img, (int(detect[2][0])-32, int(detect[2][1])-32), 
203 |     #                        (int(detect[2][0])+int(detect[2][2]), int(detect[2][1])+int(detect[2][3])), 
204 |     #                        (0, 0, 255), 3)
205 |     # cv2.imwrite('result.jpg', img)
206 |     
207 | 
208 | 
209 |     # # classify
210 |     # meta = 
211 |     # net = load_net()
212 | 


--------------------------------------------------------------------------------
/mAPEvaluate/findImgByObjectType.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import darknet as dn
  3 | import cv2
  4 | import shutil
  5 | import numpy as np
  6 | 
  7 | def mycopyfile(srcfile,dstfile):
  8 |     if not os.path.isfile(srcfile):
  9 |         print("%s not exist!"%(srcfile))
 10 |     else:
 11 |         fpath,fname=os.path.split(dstfile)    #分离文件名和路径
 12 |         if not os.path.exists(fpath):
 13 |             os.makedirs(fpath)                #创建路径
 14 |         shutil.copyfile(srcfile,dstfile)      #复制文件
 15 |         print("copy %s -> %s"%( srcfile,dstfile))
 16 | 
 17 | def mymovefile(srcfile,dstfile):
 18 |     if not os.path.isfile(srcfile):
 19 |         print("%s not exist!"%(srcfile))
 20 |     else:
 21 |         fpath,fname=os.path.split(dstfile)    #分离文件名和路径
 22 |         if not os.path.exists(fpath):
 23 |             os.makedirs(fpath)                #创建路径
 24 |         shutil.move(srcfile,dstfile)          #移动文件
 25 |         print("move %s -> %s"%( srcfile,dstfile))
 26 | 
 27 | def listdir(path):
 28 |     list_name = []
 29 |     for file in os.listdir(path):
 30 |         file_path = os.path.join(path, file)
 31 |         if os.path.isdir(file_path):
 32 |             list_name += listdir(file_path)
 33 |         else:  
 34 |             list_name.append(file_path)
 35 |     return list_name
 36 | 
 37 | def imagePath2labelPath(image_path):
 38 |     image_dir = os.path.dirname(image_path)
 39 |     p = image_dir.split('/')
 40 |     root_dir = "/".join(p[:-1])
 41 |     label_dir = os.path.join(root_dir,'Annotations')
 42 |     image_name = os.path.basename(image_path)
 43 |     image_name = image_name.replace(".jpg", "")
 44 |     label_path = os.path.join(label_dir, image_name+'.xml')
 45 |     return label_path
 46 | 
 47 | def getFileName(file_path):
 48 |     file_name = os.path.basename(file_path)
 49 |     p = file_name.split('.')
 50 |     name = ''
 51 |     for i in range(len(p)-1):
 52 |         name += p[i]
 53 |     # file_name = p[]
 54 |     return name
 55 | 
 56 | def Convert(size, box):
 57 |     dw = 1./size[0]
 58 |     dh = 1./size[1]
 59 |     x = (box[0] + box[1])/2.0
 60 |     y = (box[2] + box[3])/2.0
 61 |     w = abs(box[1] - box[0])
 62 |     h = abs(box[3] - box[2])
 63 |     x = x*dw
 64 |     w = w*dw
 65 |     y = y*dh
 66 |     h = h*dh
 67 |     return (x,y,w,h)
 68 | 
 69 | # 计算前后帧之间的多个检测框间的iou
 70 | def batch_iou(boxes1, boxes2, width, height):
 71 |     img1 = np.zeros((height,width), dtype=np.int)
 72 |     for b in boxes1:
 73 |         x1 = int(b[0]*width)
 74 |         x2 = x1+int(b[2]*width)
 75 |         y1 = int(b[1]*height)
 76 |         y2 = y1+int(b[3]*height)
 77 |         img1[y1:y2,x1:x2] = 1
 78 |     img2 = np.zeros((height,width), dtype=np.int)
 79 |     for b in boxes2:
 80 |         x1 = int(b[0]*width)
 81 |         x2 = x1+int(b[2]*width)
 82 |         y1 = int(b[1]*height)
 83 |         y2 = y1+int(b[3]*height)
 84 |         img2[y1:y2,x1:x2] = 1
 85 |     img = img1 + img2
 86 |     union = np.where(img>0)
 87 |     inter = np.where(img>1)
 88 |     iou = float(len(inter[0]))/len(union[0])
 89 |     return iou
 90 |     
 91 | def batch_analysis(meta_file,cfg_file,wgt_file,thresh,nms,src_path,dst_path):
 92 |     image_list = listdir(src_path)
 93 |     image_list.sort()
 94 |     image_num = len(image_list)
 95 |     meta = dn.load_meta(meta_file)
 96 |     object_type = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)]
 97 |     net = dn.load_net(cfg_file,wgt_file,0)
 98 |     move_count = 0
 99 |     boxes_last = []
100 |     for j,image_path in enumerate(image_list):
101 |         print(str(j)+'/'+str(image_num)+"  moved: "+str(move_count))
102 |         # print(image_path)
103 |         try:
104 |             img = cv2.imread(image_path)
105 |         except:
106 |             print('can not read image******************************************')
107 |             continue
108 |         h,w = img.shape[:2]
109 |         image_name = getFileName(image_path)
110 |         # print(image_name)
111 |         image_name = image_name.replace('(','1_')
112 |         image_name = image_name.replace(')','_1')
113 |         img_save_path = os.path.join(dst_path,image_name+'.jpg')
114 |         # print(img_save_path)
115 |         det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh)
116 |         boxes = []
117 |         is_move_file = False
118 |         if j%10 == 0:
119 |             is_move_file = True
120 |         for d in det:
121 |             # try:
122 |             #     img = cv2.imread(image_path)
123 |             # except:
124 |             #     print('can not read image******************************************')
125 |             #     continue
126 |             # h,w = img.shape[:2]
127 |             boxes.append(d[2:])
128 |             print('qqqqq,', d)
129 |             bw = d[4]*w
130 |             bh = d[5]*h
131 |             if bw < 20 or bh < 20:
132 |                 print("bw or bh is less than 20")
133 |                 continue
134 |             obj_type = d[0]
135 |             if obj_type == 'tricycle':
136 |                 print("tricycle ************************************************")
137 |                 is_move_file = True
138 |                 break
139 |             elif obj_type == 'car':
140 |                 if bw*bh/(w*h) > 0.25:
141 |                     print("big car ....................................................")
142 |                     is_move_file = True
143 |                     break
144 |         if boxes_last != [] and boxes != []:
145 |             iou = batch_iou(boxes_last,boxes,w,h)
146 |             # print('iou: '+str(iou))
147 |             if iou > 0.6:
148 |                 print('batch iou: '+str(iou))
149 |                 is_move_file = False
150 |                 # continue
151 |         if is_move_file:
152 |             move_count += 1
153 |             if not os.path.exists(img_save_path):
154 |                 mymovefile(image_path,img_save_path)
155 |             boxes_last = boxes
156 |     dn.free_net(net)
157 | 
158 | 
159 | if __name__ == "__main__":
160 |     #dn.set_gpu(0)
161 |     src_path = "/mnt/diskc/zhoukai/puer0605/" # 原始的图片目录
162 |     dst_path = "/mnt/diskc/zhoukai/puer0605/puer_jingjian" # 过滤后的图片目录
163 |     cfg_file = b"/users/duanyou/c5/v4_all_train/yolov4_test.cfg"
164 |     wgt_file = b"/users/duanyou/c5/v4_all_train/yolov4_5000.weights"
165 |     meta_file = b"/users/duanyou/c5/v4_all_train/multiClass.data"
166 |     if not os.path.exists(dst_path):
167 |         os.mkdir(dst_path)
168 |     batch_analysis(meta_file,cfg_file,wgt_file,0.2,0.45,src_path,dst_path)
169 |     


--------------------------------------------------------------------------------
/mAPEvaluate/findImgByObjectType_zhou.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import darknet as dn
  3 | import cv2
  4 | import shutil
  5 | import numpy as np
  6 | 
  7 | def mycopyfile(srcfile,dstfile):
  8 |     if not os.path.isfile(srcfile):
  9 |         print("%s not exist!"%(srcfile))
 10 |     else:
 11 |         fpath,fname=os.path.split(dstfile)    #分离文件名和路径
 12 |         if not os.path.exists(fpath):
 13 |             os.makedirs(fpath)                #创建路径
 14 |         shutil.copyfile(srcfile,dstfile)      #复制文件
 15 |         print("copy %s -> %s"%( srcfile,dstfile))
 16 | 
 17 | def mymovefile(srcfile,dstfile):
 18 |     if not os.path.isfile(srcfile):
 19 |         print("%s not exist!"%(srcfile))
 20 |     else:
 21 |         fpath,fname=os.path.split(dstfile)    #分离文件名和路径
 22 |         if not os.path.exists(fpath):
 23 |             os.makedirs(fpath)                #创建路径
 24 |         shutil.move(srcfile,dstfile)          #移动文件
 25 |         print("move %s -> %s"%( srcfile,dstfile))
 26 | 
 27 | def listdir(path):
 28 |     list_name = []
 29 |     for file in os.listdir(path):
 30 |         file_path = os.path.join(path, file)
 31 |         if os.path.isdir(file_path):
 32 |             list_name += listdir(file_path)
 33 |         else:  
 34 |             list_name.append(file_path)
 35 |     return list_name
 36 | 
 37 | def imagePath2labelPath(image_path):
 38 |     image_dir = os.path.dirname(image_path)
 39 |     p = image_dir.split('/')
 40 |     root_dir = "/".join(p[:-1])
 41 |     label_dir = os.path.join(root_dir,'Annotations')
 42 |     image_name = os.path.basename(image_path)
 43 |     image_name = image_name.replace(".jpg", "")
 44 |     label_path = os.path.join(label_dir, image_name+'.xml')
 45 |     return label_path
 46 | 
 47 | def getFileName(file_path):
 48 |     file_name = os.path.basename(file_path)
 49 |     p = file_name.split('.')
 50 |     name = ''
 51 |     for i in range(len(p)-1):
 52 |         name += p[i]
 53 |     # file_name = p[]
 54 |     return name
 55 | 
 56 | def Convert(size, box):
 57 |     dw = 1./size[0]
 58 |     dh = 1./size[1]
 59 |     x = (box[0] + box[1])/2.0
 60 |     y = (box[2] + box[3])/2.0
 61 |     w = abs(box[1] - box[0])
 62 |     h = abs(box[3] - box[2])
 63 |     x = x*dw
 64 |     w = w*dw
 65 |     y = y*dh
 66 |     h = h*dh
 67 |     return (x,y,w,h)
 68 | 
 69 | # 计算前后帧之间的多个检测框间的iou
 70 | def batch_iou(boxes1, boxes2, width, height):
 71 |     img1 = np.zeros((height,width), dtype=np.int)
 72 |     for b in boxes1:
 73 |         x1 = int(b[0]*width)
 74 |         x2 = x1+int(b[2]*width)
 75 |         y1 = int(b[1]*height)
 76 |         y2 = y1+int(b[3]*height)
 77 |         img1[y1:y2,x1:x2] = 1
 78 |     img2 = np.zeros((height,width), dtype=np.int)
 79 |     for b in boxes2:
 80 |         x1 = int(b[0]*width)
 81 |         x2 = x1+int(b[2]*width)
 82 |         y1 = int(b[1]*height)
 83 |         y2 = y1+int(b[3]*height)
 84 |         img2[y1:y2,x1:x2] = 1
 85 |     img = img1 + img2
 86 |     union = np.where(img>0)
 87 |     inter = np.where(img>1)
 88 |     iou = float(len(inter[0]))/len(union[0])
 89 |     return iou
 90 |     
 91 | def batch_analysis(meta_file,cfg_file,wgt_file,thresh,nms,src_path,dst_path):
 92 | 
 93 |     image_list = listdir(src_path)
 94 |     image_list.sort()
 95 |     image_num = len(image_list)
 96 |     meta = dn.load_meta(meta_file)
 97 |     object_type = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)]
 98 |     net = dn.load_net(cfg_file,wgt_file,0)
 99 |     move_count = 0
100 |     boxes_last = []
101 |     
102 |     for j,image_path in enumerate(image_list):
103 |     
104 |         print(str(j)+'/'+str(image_num)+"  moved: "+str(move_count))
105 |         # print(image_path)
106 |         
107 |         try:
108 |             img = cv2.imread(image_path)
109 |         except:
110 |             print('can not read image******************************************')
111 |             continue
112 |         h,w = img.shape[:2]
113 |         image_name = getFileName(image_path)
114 |         print("image_name", image_name)
115 |         image_name = image_name.replace('(','1_')
116 |         image_name = image_name.replace(')','_1')
117 |         img_save_path = os.path.join(dst_path,image_name+'.jpg')
118 |         # print(img_save_path)
119 |         det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh)
120 |         boxes = []
121 |         is_move_file = False
122 |         
123 |         if j%20 == 0:   #20数值越大 比对iou的间隔越大
124 |             is_move_file = True
125 |             
126 |         for d in det:
127 |             # try:
128 |             #     img = cv2.imread(image_path)
129 |             # except:
130 |             #     print('can not read image******************************************')
131 |             #     continue
132 |             # h,w = img.shape[:2]
133 |             print("d",d)
134 |             boxes.append(d[2:])
135 |             bw = d[4]*w
136 |             bh = d[5]*h
137 | #            if bw < 20 or bh < 20:
138 | #                print("bw or bh is less than 20")
139 | #                continue
140 | #            obj_type = d[0]
141 | #            if obj_type == 'tricycle':
142 | #                print("tricycle ************************************************")
143 | #                is_move_file = True
144 | #                break
145 | #            elif obj_type == 'car':
146 | #                if bw*bh/(w*h) > 0.25:
147 | #                    print("big car ....................................................")
148 | #                    is_move_file = True
149 | #                    break
150 |         if boxes_last != [] and boxes != []:
151 |             iou = batch_iou(boxes_last,boxes,w,h)
152 |             # print('iou: '+str(iou))
153 |             if iou > 0.6:
154 |                 print('batch iou: '+str(iou))
155 |                 is_move_file = False
156 |                 print("iou^^^^^^^^^^^^^^^^^^^^^^^^^")
157 |                 # continue
158 |         if is_move_file:
159 |             move_count += 1
160 |             if not os.path.exists(img_save_path):
161 |                 mymovefile(image_path,img_save_path)
162 |             boxes_last = boxes
163 |     dn.free_net(net)
164 | 
165 | 
166 | if __name__ == "__main__":
167 |     # dn.set_gpu(3)
168 |     src_path = "/mnt/diskc/zhoukai/puer0605/" # 原始的图片目录
169 |     dst_path = "/mnt/diskc/zhoukai/puer0605/puer_jingjian" # 过滤后的图片目录
170 |     cfg_file = b"/users/duanyou/c5/v4_all_train/yolov4_test.cfg"
171 |     wgt_file = b"/users/duanyou/c5/v4_all_train/yolov4_5000.weights"
172 |     meta_file = b"/users/duanyou/c5/v4_all_train/multiClass.data"
173 |     if not os.path.exists(dst_path):
174 |         os.mkdir(dst_path)
175 |     batch_analysis(meta_file,cfg_file,wgt_file,0.2,0.45,src_path,dst_path)
176 |     


--------------------------------------------------------------------------------
/mAPEvaluate/model_analysis_v4all.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import darknet as dn
  3 | 
  4 | import cv2
  5 | import time
  6 | import numpy
  7 | import copy
  8 | 
  9 | import cmp_det_label as cdl
 10 | from readAndSaveDarknetDetRes import readDetRes,saveDetRes
 11 | from readAnnotations import LoadLabel
 12 | from voc_eval import voc_eval
 13 | 
 14 | #读取文件列表
 15 | def LoadFileList(files):
 16 |     fl = open(files,"r")
 17 |     file_lists = []
 18 |     while True:
 19 |         lines = fl.readlines()
 20 |         if len(lines) == 0:
 21 |             break
 22 |         #print(path_list)
 23 | 
 24 |         for line in lines:
 25 |             line = line.strip('\n')
 26 |             # ph = line.split("/")
 27 |             # file_name = ph[-1]
 28 |             # file_name = os.path.basename(line)
 29 |             # file_name = file_name.replace(".jpg", "")
 30 |             file_lists.append(line)
 31 |             #print(file_name)
 32 |         #print(path_lists)
 33 |     fl.close()
 34 |     return file_lists
 35 | 
 36 | def listdir(path):
 37 |     list_name = []
 38 |     for file in os.listdir(path):
 39 |         file_path = os.path.join(path, file)
 40 |         if os.path.isdir(file_path):
 41 |             continue
 42 |             # listdir(file_path, list_name)
 43 |         else:  
 44 |             list_name.append(file_path)
 45 |     return list_name
 46 | 
 47 | def imagePath2labelPath(image_path):
 48 |     image_dir = os.path.dirname(image_path)
 49 |     p = image_dir.split('/')
 50 |     root_dir = "/".join(p[:-1])
 51 |     label_dir = os.path.join(root_dir,'Annotations')
 52 |     image_name = os.path.basename(image_path)
 53 |     image_name = image_name.replace(".jpg", "")
 54 |     label_path = os.path.join(label_dir, image_name+'.xml')
 55 |     return label_path
 56 | 
 57 | def getFileName(file_path):
 58 |     file_name = os.path.basename(file_path)
 59 |     p = file_name.split('.')
 60 |     name = ''
 61 |     for i in range(len(p)-1):
 62 |         name += p[i]
 63 |     # file_name = p[]
 64 |     return name
 65 | 
 66 | def getMetaCfgName(file_path):
 67 |     # 寻找file_path的同文件夹里的.data文件
 68 |     p = os.path.dirname(file_path)
 69 |     for file in os.listdir(p):
 70 |         if '.data' in file:
 71 |             data_path = file
 72 |             data_path = p + '/' + data_path
 73 |         if 'test.cfg' in file:
 74 |             cfg_path = file
 75 |             cfg_path = p + '/' + cfg_path
 76 | 
 77 |     return data_path.encode('utf-8'), cfg_path.encode('utf-8')
 78 | 
 79 | def batch_detection():
 80 |     pass
 81 | 
 82 | def batch_analysis(weights_list_file, image_list_file, thresh, iou_thresh,result_dir):
 83 |     image_list = LoadFileList(image_list_file)
 84 |     image_num = len(image_list)
 85 |     weights_list = LoadFileList(weights_list_file)
 86 |     result = []
 87 |     for weights in weights_list:
 88 |         weights_name = getFileName(weights)
 89 | 
 90 |         # print('weights_name: ',weights)
 91 | 
 92 |         meta_file,cfg_file = getMetaCfgName(weights)
 93 |         # meta = dn.load_meta(meta_file)
 94 |         # net = dn.load_net(cfg_file,bytes(weights,'utf-8'),0)
 95 | 
 96 |         # 选择对应的dn
 97 |         meta = dn.load_meta(meta_file)
 98 |         net = dn.load_net(cfg_file,bytes(weights,'utf-8'),0)
 99 | 
100 |         object_type = [meta.names[i].decode('utf-8').strip() for i in range(meta.classes)]
101 | 
102 |         result_path = os.path.join(result_dir,weights_name)
103 |         if not os.path.exists(result_path):
104 |             os.mkdir(result_path)
105 | 
106 |         # detect result and save to text
107 |         timeall = 0
108 |         for j,image_path in enumerate(image_list):
109 |             print('detect: '+str(j+1)+'/'+str(len(image_list)))
110 |             label_path = imagePath2labelPath(image_path)
111 |             image_name = getFileName(image_path)
112 |             det_save_path = os.path.join(result_path,image_name+'.txt')
113 |             # det = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh)
114 | 
115 |             # 选择对应的dn
116 |             det,time1 = dn.detect_ext(net, meta, bytes(image_path,'utf-8'),thresh)
117 |             timeall = timeall + time1;
118 | 
119 |             # save detection result to text
120 |             saveDetRes(det,det_save_path,object_type)
121 |             time.sleep(0.001)
122 |         print('xxxxxxxxxxx', 'FPS, ',len(image_list)/timeall)
123 |         # dn.free_net(net)
124 | 
125 |         # campare label and detection result
126 |         for i,objtype in enumerate(object_type):
127 | 
128 |             # if objtype != 'fr':
129 |             #     continue
130 |             total_label = 0
131 |             total_detect = 0
132 |             total_corr = 0
133 |             total_iou = 0
134 |             cmp_result = []
135 |             det_ = []
136 |             annopath = []
137 | 
138 |             detall = [['name','obj_type', 'score',0,0,0,0]] # 此处为xywh(中心)，应该变为xmin,ymin,xmax,ymax
139 | 
140 |             imagesetfile = []
141 |             for j,image_path in enumerate(image_list):
142 |                 label_path = imagePath2labelPath(image_path)
143 |                 image_name = getFileName(image_path)
144 |                 imagesetfile.append(image_name)
145 |                 img_save_path = os.path.join(result_path,image_name+'.jpg')
146 |                 det_save_path = os.path.join(result_path,image_name+'.txt')
147 | 
148 |                 # detpath.append(det_save_path)
149 |                 annopath.append(label_path)
150 |                 # print(img_save_path)
151 |                 label = []
152 |                 if os.path.exists(label_path):
153 |                     label = LoadLabel(label_path,object_type)
154 | 
155 |                 # save detection result to text
156 |                 det = readDetRes(det_save_path)
157 |                 for d in det:
158 |                     if d[0] > len(object_type)-1:
159 |                         d[0] = ' '
160 |                         continue 
161 |                     d[0] = object_type[d[0]]
162 | 
163 |                 for d in det:
164 |                     xmin = float(copy.deepcopy(d[2])) - float(copy.deepcopy(d[4]))/2.0
165 |                     ymin = float(copy.deepcopy(d[3])) - float(copy.deepcopy(d[5]))/2.0
166 |                     xmax = float(copy.deepcopy(d[2])) + float(copy.deepcopy(d[4]))/2.0
167 |                     ymax = float(copy.deepcopy(d[3])) + float(copy.deepcopy(d[5]))/2.0
168 |                     # 该文件格式：imagename1 type confidence xmin ymin xmax ymax
169 |                     d_ = [image_name, d[0], d[1], xmin, ymin, xmax, ymax]
170 |                     det_.append(d_)
171 | 
172 |                 if len(det_) != 0:
173 |                     detall = numpy.vstack((detall, det_))
174 |                 det_=[]
175 | 
176 |                 if i > 0:
177 |                     image_path = img_save_path
178 |                 # print(j,image_path)
179 |                 img = cv2.imread(image_path)
180 |                 if img is None:
181 |                     print("load image error&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&")
182 |                     continue
183 | 
184 |                 cmp_res = cdl.cmp_data(objtype, det, label, thresh, iou_thresh, img)
185 | 
186 |                 cmp_res.update_tracking({'image_name':image_name})
187 |                 total_corr += cmp_res['correct']
188 |                 total_iou += cmp_res['avg_iou']*cmp_res['label_num']
189 | 
190 |                 cmp_result.append(cmp_res)
191 |                 print("%s: %d/%d  label: %d   detect: %d   correct: %d   recall: %f   avg_iou: %f   accuracy: %f   precision: %f\n" % \
192 |                     (str(objtype),j+1,image_num,cmp_res['label_num'],cmp_res['detect_num'],\
193 |                         cmp_res['correct'],cmp_res['recall'],cmp_res['avg_iou'],\
194 |                             cmp_res['accuracy'],cmp_res['precision']))
195 |                 total_label += cmp_res['label_num']
196 |                 total_detect += cmp_res['detect_num']
197 |                 cv2.imwrite(img_save_path,img)
198 |                 img = []
199 |                 time.sleep(0.001)
200 | 
201 |             # 求出AP值
202 |             # ap=0
203 |             detall = numpy.delete(detall, 0, axis = 0)
204 |             det_objtype = [obj for obj in detall if obj[1] == objtype]
205 |             if len(det_objtype) == 0:
206 |                 ap = 0
207 |             else:
208 |                 ap = voc_eval(det_objtype, annopath, imagesetfile, objtype, iou_thresh)
209 |             detall=[]
210 | 
211 |             #数据集分析结果
212 |             avg_recall = 0
213 |             if total_label > 0:
214 |                 avg_recall = total_corr/float(total_label)
215 |             avg_iou = 0
216 |             if total_iou > 0:
217 |                 avg_iou = total_iou/total_label
218 |             avg_acc = 0
219 |             if total_label+total_detect-total_corr > 0:
220 |                 avg_acc = float(total_corr)/(total_label+total_detect-total_corr)
221 |             avg_precision = 0
222 |             if total_detect > 0:
223 |                 avg_precision = float(total_corr)/total_detect
224 |             total_result = [total_label,total_detect,total_corr,avg_recall,avg_iou,avg_acc,avg_precision]
225 |             cdl.ExportAnaRes(objtype,cmp_result,total_result,image_path,result_path)
226 |             print("total_label: %d   total_detect: %d   total_corr: %d   recall: %f   average iou: %f   accuracy: %f   precision: %f ap: %f\n" % \
227 |                 (total_result[0],total_result[1],total_result[2],total_result[3],total_result[4],total_result[5],total_result[6],ap))
228 |             
229 |             result.append([weights_name]+[objtype]+total_result+[float(ap)])
230 |         cdl.ExportAnaResAll(result, result_dir)
231 |         time.sleep(0.001)
232 | 
233 | if __name__ == "__main__":
234 |     
235 |     dn.set_gpu(4)
236 |     weights_list_file = "/users/duanyou/c5/v4_all_train/weights.txt"
237 | 
238 |     # # all_test
239 |     data_path = "/users/duanyou/c5/all_pretrain"
240 |     image_list_file = os.path.join(data_path,"test.txt")
241 |     result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_all/")
242 |     if not os.path.exists(result_dir):
243 |         os.mkdir(result_dir)
244 |     batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir)
245 | 
246 |     # # changsha_test
247 |     data_path = "/users/duanyou/c5/changsha"
248 |     image_list_file = os.path.join(data_path,"test.txt")
249 |     result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_changsha/")
250 |     if not os.path.exists(result_dir):
251 |         os.mkdir(result_dir)
252 |     batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir)
253 | 
254 |     # # hezhoupucheng_test
255 |     data_path = "/users/duanyou/c5/hezhoupucheng"
256 |     image_list_file = os.path.join(data_path,"test.txt")
257 |     result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_hezhoupucheng/")
258 |     if not os.path.exists(result_dir):
259 |         os.mkdir(result_dir)
260 |     batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir)
261 | 
262 |     # # puer_test
263 |     data_path = "/users/duanyou/c5/puer"
264 |     image_list_file = os.path.join(data_path,"test.txt")
265 |     result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_puer/")
266 |     if not os.path.exists(result_dir):
267 |         os.mkdir(result_dir)
268 |     batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir)
269 | 
270 |     # yancheng_test
271 |     # data_path = "/users/duanyou/c5/yancheng"
272 |     # image_list_file = os.path.join(data_path,"test.txt")
273 |     # result_dir = os.path.join("/users/duanyou/c5/results_v4all/results_yancheng/")
274 |     # if not os.path.exists(result_dir):
275 |     #     os.mkdir(result_dir)
276 |     # batch_analysis(weights_list_file,image_list_file,0.20,0.45,result_dir)
277 |     


--------------------------------------------------------------------------------
/mAPEvaluate/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | 
  7 | import xml.etree.ElementTree as ET
  8 | import os
  9 | import numpy as np
 10 | 
 11 | 
 12 | def convert(size, box):  # box=xmin,ymin,xmax,ymax
 13 |     dw = 1. / size[0]
 14 |     dh = 1. / size[1]
 15 |     xmin = box[0] * dw
 16 |     ymin = box[1] * dh
 17 |     xmax = box[2] * dw
 18 |     ymax = box[3] * dh
 19 |     return (xmin, ymin, xmax, ymax)
 20 | 
 21 | 
 22 | def parse_rec(filename):  # 读取标注的xml文件
 23 |     """ Parse a PASCAL VOC xml file """
 24 |     in_file = open(filename)
 25 |     xml_info = in_file.read()
 26 |     try:
 27 |         root = ET.fromstring(xml_info)
 28 |     except(Exception, e):
 29 |         print("Error: cannot parse file")
 30 |     objects = []
 31 |     if root.find('markNode') != None:
 32 |         obj = root.find('markNode').find('object')
 33 |         if obj != None:
 34 |             w = int(root.find('width').text)
 35 |             h = int(root.find('height').text)
 36 |             for obj in root.iter('object'):
 37 |                 if 'non_interest' in str(obj.find('targettype').text):
 38 |                     continue
 39 |                 obj_struct = {}
 40 |                 if obj.find('targettype').text == 'car_rear' or obj.find('targettype').text == 'car_front':
 41 |                     obj_struct['name'] = 'fr'
 42 |                 else:
 43 |                     obj_struct['name'] = obj.find('targettype').text
 44 |                 obj_struct['pose'] = 0  # obj.find('pose').text
 45 |                 obj_struct['truncated'] = 0  # int(obj.find('truncated').text)
 46 |                 obj_struct['difficult'] = 0  # int(obj.find('difficult').text)
 47 |                 # bbox = obj.find('bndbox')
 48 |                 b = [float(obj.find('bndbox').find('xmin').text),
 49 |                      float(obj.find('bndbox').find('ymin').text),
 50 |                      float(obj.find('bndbox').find('xmax').text),
 51 |                      float(obj.find('bndbox').find('ymax').text)]
 52 |                 bb = convert((w, h), b)
 53 |                 if bb is None:
 54 |                     continue
 55 |                 obj_struct['bbox'] = [bb[0], bb[1], bb[2], bb[3]]
 56 |                 objects.append(obj_struct)
 57 |     return objects
 58 | 
 59 | 
 60 | def voc_ap(rec, prec):
 61 |     # 采用更为精确的逐点积分方法
 62 |     # correct AP calculation
 63 |     # first append sentinel values at the end
 64 |     mrec = np.concatenate(([0.], rec, [1.]))
 65 |     mpre = np.concatenate(([0.], prec, [0.]))
 66 | 
 67 |     # compute the precision envelope
 68 |     for i in range(mpre.size - 1, 0, -1):
 69 |         mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 70 | 
 71 |     # to calculate area under PR curve, look for points
 72 |     # where X axis (recall) changes value
 73 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 74 | 
 75 |     # and sum (\Delta recall) * prec
 76 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 77 |     return ap
 78 | 
 79 | 
 80 | def voc_eval(detpath,
 81 |              annopath,
 82 |              imagesetfile,
 83 |              classname,
 84 |              ovthresh=0.5):
 85 |     """
 86 |     :param detpath:
 87 |     :param annopath:
 88 |     :param imagesetfile:
 89 |     :param classname:
 90 |     :param ovthresh:
 91 |     :return:
 92 |     """
 93 |     # 主函数，计算当前类别的recall和precision
 94 |     # #detpath检测结果txt文件，路径VOCdevkit/results/VOC20xx/Main/<comp_id>_det_test_aeroplane.txt。
 95 |     # 该文件格式：imagename1 type confidence xmin ymin xmax ymax  (图像1的第一个结果)
 96 |     #           imagename1 type confidence xmin ymin xmax ymax  (图像1的第二个结果)
 97 |     #           imagename1 type confidence xmin ymin xmax ymax  (图像2的第一个结果)
 98 |     #           ......
 99 |     # 每个结果占一行，检测到多少个BBox就有多少行，这里假设有20000个检测结果
100 | 
101 |     # detpath: Path to detections
102 |     #     detpath.format(classname) should produce the detection results file.
103 |     # annopath: Path to annotations
104 |     #     annopath.format(imagename) should be the xml annotations file. #xml 标注文件。
105 |     # imagesetfile: Text file containing the list of images, one image per line. #数据集划分txt文件，路径VOCdevkit/VOC20xx/ImageSets/Main/test.txt这里假设测试图像1000张，那么该txt文件1000行。
106 |     # classname: Category name (duh) #种类的名字，即类别，假设类别2（一类目标+背景）。
107 |     # cachedir: Directory for caching the annotations #缓存标注的目录路径VOCdevkit/annotation_cache,图像数据只读文件，为了避免每次都要重新读数据集原始数据。
108 |     # [ovthresh]: Overlap threshold (default = 0.5) #重叠的多少大小。
109 |     # [use_07_metric]: Whether to use VOC07's 11 point AP computation
110 |     #     (default False) #是否使用VOC07的AP计算方法，voc07是11个点采样。
111 | 
112 |     # assumes detections are in detpath.format(classname)
113 |     # assumes annotations are in annopath.format(imagename)
114 |     # assumes imagesetfile is a text file with each line an image name
115 |     # cachedir caches the annotations in a pickle file
116 | 
117 |     imagenames = [x.strip() for x in imagesetfile]
118 | 
119 |     # parse_rec函数读取当前图像标注文件，返回当前图像标注，存于recs字典（key是图像名，values是gt）
120 |     recs = {}
121 |     for i, imagename in enumerate(imagenames):
122 |         # recs[imagename] = parse_rec(annopath.format(imagename))
123 |         recs[imagename] = parse_rec(annopath[i])
124 | 
125 |     # extract gt objects for this class #按类别获取标注文件，recall和precision都是针对不同类别而言的，AP也是对各个类别分别算的。
126 |     class_recs = {}  # 当前类别的标注
127 |     npos = 0  # npos标记的目标数量
128 |     for imagename in imagenames:
129 |         R = [obj for obj in recs[imagename] if obj['name'] == classname]  # 过滤，只保留recs中指定类别的项，存为R。
130 |         bbox = np.array([x['bbox'] for x in R])  # 抽取bbox
131 |         difficult = np.array([x['difficult'] for x in R]).astype(np.bool)  # 如果数据集没有difficult,所有项都是0.
132 | 
133 |         det = [False] * len(R)  # len(R)就是当前类别的gt目标个数，det表示是否检测到，初始化为false。
134 |         npos = npos + sum(~difficult)  # 自增，非difficult样本数量，如果数据集没有difficult，npos数量就是gt数量。
135 |         class_recs[imagename] = {'bbox': bbox,
136 |                                  'difficult': difficult,
137 |                                  'det': det}
138 | 
139 |     # read dets 读取检测结果
140 |     splitlines = detpath  # 该文件格式：imagename1 type confidence xmin ymin xmax ymax
141 |     # splitlines = [x.strip().split(' ') for x in detpath]  # 假设检测结果有20000个，则splitlines长度20000
142 |     image_ids = [x[0] for x in splitlines]  # 检测结果中的图像名，image_ids长度20000，但实际图像只有1000张，因为一张图像上可以有多个目标检测结果
143 |     confidence = np.array([float(x[2]) for x in splitlines])  # 检测结果置信度
144 |     BB = np.array([[float(z) for z in x[3:]] for x in splitlines])  # 变为浮点型的bbox。
145 | 
146 |     npos = len(image_ids)
147 | 
148 |     # sort by confidence 将20000各检测结果按置信度排序
149 |     sorted_ind = np.argsort(-confidence)  # 对confidence的index根据值大小进行降序排列。
150 |     sorted_scores = np.sort(-confidence)  # 降序排列。
151 |     BB = BB[sorted_ind, :]  # 重排bbox，由大概率到小概率。
152 |     image_ids = [image_ids[x] for x in sorted_ind]
153 | 
154 |     # go down dets and mark TPs and FPs
155 |     nd = len(image_ids)  # 注意这里是20000，不是1000
156 |     tp = np.zeros(nd)  # true positive，长度20000
157 |     fp = np.zeros(nd)  # false positive，长度20000
158 |     for d in range(nd):  # 遍历所有检测结果，因为已经排序，所以这里是从置信度最高到最低遍历
159 |         R = class_recs[image_ids[d]]  # 当前检测结果所在图像的所有同类别gt
160 |         bb = BB[d, :].astype(float)  # 当前检测结果bbox坐标
161 |         ovmax = -np.inf
162 |         BBGT = R['bbox'].astype(float)  # 当前检测结果所在图像的所有同类别gt的bbox坐标
163 | 
164 |         if BBGT.size > 0:
165 |             # compute overlaps 计算当前检测结果，与该检测结果所在图像的标注重合率，一对多用到python的broadcast机制
166 |             # intersection
167 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
168 |             iymin = np.maximum(BBGT[:, 1], bb[1])
169 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
170 |             iymax = np.minimum(BBGT[:, 3], bb[3])
171 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
172 |             ih = np.maximum(iymax - iymin + 1., 0.)
173 |             inters = iw * ih
174 | 
175 |             # union
176 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
177 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
178 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
179 | 
180 |             overlaps = inters / uni
181 |             ovmax = np.max(overlaps)  # 最大重合率
182 |             jmax = np.argmax(overlaps)  # 最大重合率对应的gt,返回最大索引数
183 |             # print('overlaps',overlaps,'ovmax',ovmax,'jmax ',jmax)
184 | 
185 |         if ovmax > ovthresh:  # 如果当前检测结果与真实标注最大重合率满足阈值
186 |             # if not R['difficult'][jmax]:
187 |             if not R['det'][jmax]:
188 |                 tp[d] = 1.  # 正检数目+1
189 |                 R['det'][jmax] = True  # 该gt被置为已检测到，下一次若还有另一个检测结果与之重合率满足阈值，则不能认为多检测到一个目标
190 |             else:  # 相反，认为检测到一个虚警
191 |                 fp[d] = 1.
192 |         else:  # 不满足阈值，肯定是虚警
193 |             fp[d] = 1.
194 | 
195 |     # compute precision recall
196 |     fp = np.cumsum(fp)  # 积分图，在当前节点前的虚警数量，fp长度
197 |     tp = np.cumsum(tp)  # 积分图，在当前节点前的正检数量
198 |     rec = tp / float(npos)  # 召回率，长度20000，从0到1
199 |     # avoid divide by zero in case the first detection matches a difficult
200 |     # ground truth 准确率，长度20000，长度20000，从1到0
201 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
202 |     ap = voc_ap(rec, prec)
203 | 
204 |     return ap
205 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy == 1.17
 2 | opencv-python >= 4.1
 3 | torch >= 1.5
 4 | torchvision
 5 | matplotlib
 6 | pycocotools
 7 | tqdm
 8 | pillow
 9 | tensorboard >= 1.14
10 | 
11 | # Nvidia Apex (optional) for mixed precision training --------------------------
12 | # git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user
13 | 


--------------------------------------------------------------------------------
/test5_track.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CaptainEven/YOLOV4_MCMOT/0cf60be77ae6d088e079ccefedbf71a05f938890/test5_track.gif


--------------------------------------------------------------------------------
/tracker/basetrack.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | import numpy as np
  4 | from collections import defaultdict
  5 | from collections import OrderedDict
  6 | 
  7 | 
  8 | class TrackState(object):
  9 |     New = 0
 10 |     Tracked = 1
 11 |     Lost = 2
 12 |     Removed = 3
 13 | 
 14 | 
 15 | # TODO: Create a multi-object class BaseTrack class
 16 | class MCBaseTrack(object):
 17 |     _count_dict = defaultdict(int)  # the MCBaseTrack class owns this dict
 18 | 
 19 |     track_id = 0
 20 |     is_activated = False
 21 |     state = TrackState.New
 22 | 
 23 |     history = OrderedDict()
 24 |     features = []
 25 |     curr_feature = None
 26 |     score = 0
 27 |     start_frame = 0
 28 |     frame_id = 0
 29 |     time_since_update = 0
 30 | 
 31 |     # multi-camera
 32 |     location = (np.inf, np.inf)
 33 | 
 34 |     @property
 35 |     def end_frame(self):
 36 |         return self.frame_id
 37 | 
 38 |     # @even: reset track id
 39 |     @staticmethod
 40 |     def init_count(num_classes):
 41 |         """
 42 |         Initiate _count for all object classes
 43 |         :param num_classes:
 44 |         """
 45 |         for cls_id in range(num_classes):
 46 |             MCBaseTrack._count_dict[cls_id] = 0
 47 | 
 48 |     @staticmethod
 49 |     def next_id(cls_id):
 50 |         MCBaseTrack._count_dict[cls_id] += 1
 51 |         return MCBaseTrack._count_dict[cls_id]
 52 | 
 53 |     @staticmethod
 54 |     def reset_track_count(cls_id):
 55 |         MCBaseTrack._count_dict[cls_id] = 0
 56 | 
 57 |     def activate(self, *args):
 58 |         raise NotImplementedError
 59 | 
 60 |     def predict(self):
 61 |         raise NotImplementedError
 62 | 
 63 |     def update(self, *args, **kwargs):
 64 |         raise NotImplementedError
 65 | 
 66 |     def mark_lost(self):
 67 |         self.state = TrackState.Lost
 68 | 
 69 |     def mark_removed(self):
 70 |         self.state = TrackState.Removed
 71 | 
 72 | 
 73 | class BaseTrack(object):
 74 |     _count = 0
 75 | 
 76 |     track_id = 0
 77 |     is_activated = False
 78 |     state = TrackState.New
 79 | 
 80 |     history = OrderedDict()
 81 |     features = []
 82 |     curr_feature = None
 83 |     score = 0
 84 |     start_frame = 0
 85 |     frame_id = 0
 86 |     time_since_update = 0
 87 | 
 88 |     # multi-camera
 89 |     location = (np.inf, np.inf)
 90 | 
 91 |     @property
 92 |     def end_frame(self):
 93 |         return self.frame_id
 94 | 
 95 |     @staticmethod
 96 |     def next_id():
 97 |         BaseTrack._count += 1
 98 |         return BaseTrack._count
 99 | 
100 |     # @even: reset track id
101 |     @staticmethod
102 |     def reset_track_count():
103 |         BaseTrack._count = 0
104 | 
105 |     def activate(self, *args):
106 |         raise NotImplementedError
107 | 
108 |     def predict(self):
109 |         raise NotImplementedError
110 | 
111 |     def update(self, *args, **kwargs):
112 |         raise NotImplementedError
113 | 
114 |     def mark_lost(self):
115 |         self.state = TrackState.Lost
116 | 
117 |     def mark_removed(self):
118 |         self.state = TrackState.Removed
119 | 


--------------------------------------------------------------------------------
/tracker/matching.py:
--------------------------------------------------------------------------------
  1 | import lap
  2 | import numpy as np
  3 | import scipy
  4 | from cython_bbox import bbox_overlaps as bbox_ious
  5 | from scipy.spatial.distance import cdist
  6 | from tracking_utils import kalman_filter
  7 | 
  8 | 
  9 | def merge_matches(m1, m2, shape):
 10 |     O, P, Q = shape
 11 |     m1 = np.asarray(m1)
 12 |     m2 = np.asarray(m2)
 13 | 
 14 |     M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))
 15 |     M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))
 16 | 
 17 |     mask = M1 * M2
 18 |     match = mask.nonzero()
 19 |     match = list(zip(match[0], match[1]))
 20 |     unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))
 21 |     unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))
 22 | 
 23 |     return match, unmatched_O, unmatched_Q
 24 | 
 25 | 
 26 | def _indices_to_matches(cost_matrix, indices, thresh):
 27 |     matched_cost = cost_matrix[tuple(zip(*indices))]
 28 |     matched_mask = (matched_cost <= thresh)
 29 | 
 30 |     matches = indices[matched_mask]
 31 |     unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))
 32 |     unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))
 33 | 
 34 |     return matches, unmatched_a, unmatched_b
 35 | 
 36 | 
 37 | def linear_assignment(cost_matrix, thresh):
 38 |     """
 39 |     :param cost_matrix:
 40 |     :param thresh:
 41 |     :return:
 42 |     """
 43 |     if cost_matrix.size == 0:
 44 |         return np.empty((0, 2), dtype=int), \
 45 |                tuple(range(cost_matrix.shape[0])), \
 46 |                tuple(range(cost_matrix.shape[1]))
 47 | 
 48 |     matches, unmatched_a, unmatched_b = [], [], []
 49 |     cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
 50 | 
 51 |     for ix, mx in enumerate(x):
 52 |         if mx >= 0:
 53 |             matches.append([ix, mx])
 54 | 
 55 |     unmatched_a = np.where(x < 0)[0]
 56 |     unmatched_b = np.where(y < 0)[0]
 57 |     matches = np.asarray(matches)
 58 | 
 59 |     return matches, unmatched_a, unmatched_b
 60 | 
 61 | 
 62 | def ious(atlbrs, btlbrs):
 63 |     """
 64 |     Compute cost based on IoU
 65 |     :type atlbrs: list[tlbr] | np.ndarray
 66 |     :type atlbrs: list[tlbr] | np.ndarray
 67 | 
 68 |     :rtype ious np.ndarray
 69 |     """
 70 |     ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
 71 |     if ious.size == 0:
 72 |         return ious
 73 | 
 74 |     ious = bbox_ious(
 75 |         np.ascontiguousarray(atlbrs, dtype=np.float),
 76 |         np.ascontiguousarray(btlbrs, dtype=np.float)
 77 |     )
 78 | 
 79 |     return ious
 80 | 
 81 | 
 82 | def iou_distance(atracks, btracks):
 83 |     """
 84 |     Compute cost based on IoU
 85 |     :type atracks: list[STrack]
 86 |     :type btracks: list[STrack]
 87 | 
 88 |     :rtype cost_matrix np.ndarray
 89 |     """
 90 | 
 91 |     if (len(atracks) > 0 and isinstance(atracks[0], np.ndarray)) or (
 92 |             len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
 93 |         atlbrs = atracks
 94 |         btlbrs = btracks
 95 |     else:
 96 |         atlbrs = [track.tlbr for track in atracks]
 97 |         btlbrs = [track.tlbr for track in btracks]
 98 | 
 99 |     _ious = ious(atlbrs, btlbrs)
100 |     cost_matrix = 1 - _ious
101 | 
102 |     return cost_matrix
103 | 
104 | 
105 | # TODO: using GIOU, DIOU, CIOU... to replace IOU
106 | 
107 | def embedding_distance(tracks, detections, metric='cosine'):
108 |     """
109 |     :param tracks: list[STrack]
110 |     :param detections: list[BaseTrack]
111 |     :param metric:
112 |     :return: cost_matrix np.ndarray
113 |     """
114 |     cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float)
115 |     if cost_matrix.size == 0:
116 |         return cost_matrix
117 | 
118 |     det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float)
119 |     # for i, track in enumerate(tracks):
120 |     # cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))
121 |     track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float)
122 | 
123 |     # default: cosine distance
124 |     # Nomalized features
125 |     cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))
126 | 
127 |     return cost_matrix
128 | 
129 | 
130 | def gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):
131 |     """
132 |     :param kf:
133 |     :param cost_matrix:
134 |     :param tracks:
135 |     :param detections:
136 |     :param only_position:
137 |     :return:
138 |     """
139 |     if cost_matrix.size == 0:
140 |         return cost_matrix
141 | 
142 |     gating_dim = 2 if only_position else 4
143 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
144 |     measurements = np.asarray([det.to_xyah() for det in detections])
145 | 
146 |     for row, track in enumerate(tracks):
147 |         gating_distance = kf.gating_distance(track.mean, track.covariance, measurements, only_position)
148 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
149 | 
150 |     return cost_matrix
151 | 
152 | 
153 | def fuse_motion(kf,
154 |                 cost_matrix,
155 |                 tracks,
156 |                 detections,
157 |                 only_position=False,
158 |                 lambda_=0.98):
159 |     """
160 |     :param kf:
161 |     :param cost_matrix:
162 |     :param tracks:
163 |     :param detections:
164 |     :param only_position:
165 |     :param lambda_:
166 |     :return:
167 |     """
168 |     if cost_matrix.size == 0:
169 |         return cost_matrix
170 | 
171 |     gating_dim = 2 if only_position else 4
172 |     gating_threshold = kalman_filter.chi2inv95[gating_dim]
173 |     measurements = np.asarray([det.to_xyah() for det in detections])
174 | 
175 |     for row, track in enumerate(tracks):
176 |         gating_distance = kf.gating_distance(track.mean,
177 |                                              track.covariance,
178 |                                              measurements,
179 |                                              only_position,
180 |                                              metric='maha')
181 |         cost_matrix[row, gating_distance > gating_threshold] = np.inf
182 |         cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance
183 | 
184 |     return cost_matrix
185 | 


--------------------------------------------------------------------------------
/tracking_utils/evaluation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import copy
  4 | import motmetrics as mm
  5 | mm.lap.default_solver = 'lap'
  6 | 
  7 | from tracking_utils.io import read_results, unzip_objs
  8 | 
  9 | 
 10 | class Evaluator(object):
 11 | 
 12 |     def __init__(self, data_root, seq_name, data_type):
 13 |         self.data_root = data_root
 14 |         self.seq_name = seq_name
 15 |         self.data_type = data_type
 16 | 
 17 |         self.load_annotations()
 18 |         self.reset_accumulator()
 19 | 
 20 |     def load_annotations(self):
 21 |         assert self.data_type == 'mot'
 22 | 
 23 |         gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')
 24 |         self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)
 25 |         self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)
 26 | 
 27 |     def reset_accumulator(self):
 28 |         self.acc = mm.MOTAccumulator(auto_id=True)
 29 | 
 30 |     def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):
 31 |         # results
 32 |         trk_tlwhs = np.copy(trk_tlwhs)
 33 |         trk_ids = np.copy(trk_ids)
 34 | 
 35 |         # gts
 36 |         gt_objs = self.gt_frame_dict.get(frame_id, [])
 37 |         gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]
 38 | 
 39 |         # ignore boxes
 40 |         ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])
 41 |         ignore_tlwhs = unzip_objs(ignore_objs)[0]
 42 | 
 43 |         # remove ignored results
 44 |         keep = np.ones(len(trk_tlwhs), dtype=bool)
 45 |         iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
 46 |         if len(iou_distance) > 0:
 47 |             match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 48 |             match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 49 |             match_ious = iou_distance[match_is, match_js]
 50 | 
 51 |             match_js = np.asarray(match_js, dtype=int)
 52 |             match_js = match_js[np.logical_not(np.isnan(match_ious))]
 53 |             keep[match_js] = False
 54 |             trk_tlwhs = trk_tlwhs[keep]
 55 |             trk_ids = trk_ids[keep]
 56 |         #match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
 57 |         #match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
 58 |         #match_ious = iou_distance[match_is, match_js]
 59 | 
 60 |         #match_js = np.asarray(match_js, dtype=int)
 61 |         #match_js = match_js[np.logical_not(np.isnan(match_ious))]
 62 |         #keep[match_js] = False
 63 |         #trk_tlwhs = trk_tlwhs[keep]
 64 |         #trk_ids = trk_ids[keep]
 65 | 
 66 |         # get distance matrix
 67 |         iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)
 68 | 
 69 |         # acc
 70 |         self.acc.update(gt_ids, trk_ids, iou_distance)
 71 | 
 72 |         if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):
 73 |             events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
 74 |         else:
 75 |             events = None
 76 |         return events
 77 | 
 78 |     def eval_file(self, filename):
 79 |         self.reset_accumulator()
 80 | 
 81 |         result_frame_dict = read_results(filename, self.data_type, is_gt=False)
 82 |         frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))
 83 |         for frame_id in frames:
 84 |             trk_objs = result_frame_dict.get(frame_id, [])
 85 |             trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]
 86 |             self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)
 87 | 
 88 |         return self.acc
 89 | 
 90 |     @staticmethod
 91 |     def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
 92 |         names = copy.deepcopy(names)
 93 |         if metrics is None:
 94 |             metrics = mm.metrics.motchallenge_metrics
 95 |         metrics = copy.deepcopy(metrics)
 96 | 
 97 |         mh = mm.metrics.create()
 98 |         summary = mh.compute_many(
 99 |             accs,
100 |             metrics=metrics,
101 |             names=names,
102 |             generate_overall=True
103 |         )
104 | 
105 |         return summary
106 | 
107 |     @staticmethod
108 |     def save_summary(summary, filename):
109 |         import pandas as pd
110 |         writer = pd.ExcelWriter(filename)
111 |         summary.to_excel(writer)
112 |         writer.save()
113 | 


--------------------------------------------------------------------------------
/tracking_utils/io.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Dict
  3 | import numpy as np
  4 | 
  5 | from tracking_utils.log import logger
  6 | 
  7 | 
  8 | def write_results(filename, results_dict: Dict, data_type: str):
  9 |     if not filename:
 10 |         return
 11 |     path = os.path.dirname(filename)
 12 |     if not os.path.exists(path):
 13 |         os.makedirs(path)
 14 | 
 15 |     if data_type in ('mot', 'mcmot', 'lab'):
 16 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\n'
 17 |     elif data_type == 'kitti':
 18 |         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\n'
 19 |     else:
 20 |         raise ValueError(data_type)
 21 | 
 22 |     with open(filename, 'w') as f:
 23 |         for frame_id, frame_data in results_dict.items():
 24 |             if data_type == 'kitti':
 25 |                 frame_id -= 1
 26 |             for tlwh, track_id in frame_data:
 27 |                 if track_id < 0:
 28 |                     continue
 29 | 
 30 |                 x1, y1, w, h = tlwh
 31 |                 x2, y2 = x1 + w, y1 + h
 32 |                 line = save_format.format(frame=frame_id, id=track_id,
 33 |                                           x1=x1, y1=y1, w=w, h=h,
 34 |                                           score=1.0)
 35 |                 f.write(line)
 36 |     logger.info('Save results to {}'.format(filename))
 37 | 
 38 | 
 39 | def write_results_dict(results_f_path, results_dict, data_type, num_classes=5):
 40 |     """
 41 |     :param results_f_path:
 42 |     :param results_dict:
 43 |     :param data_type:
 44 |     :param num_classes:
 45 |     :return:
 46 |     """
 47 |     if data_type == 'mot':
 48 |         save_format = '{frame},{id},{x1},{y1},{w},{h},1,{cls_id},1\n'
 49 |     elif data_type == 'kitti':
 50 |         save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\n'
 51 |     else:
 52 |         raise ValueError(data_type)
 53 | 
 54 |     with open(results_f_path, 'w') as f:
 55 |         for cls_id in range(num_classes):  # process each object class
 56 |             cls_results = results_dict[cls_id]
 57 |             for fr_id, tlwhs, track_ids in cls_results:  # fr_id starts from 1
 58 |                 if data_type == 'kitti':
 59 |                     fr_id -= 1
 60 | 
 61 |                 for tlwh, track_id in zip(tlwhs, track_ids):
 62 |                     if track_id < 0:
 63 |                         continue
 64 | 
 65 |                     x1, y1, w, h = tlwh
 66 |                     # x2, y2 = x1 + w, y1 + h
 67 |                     line = save_format.format(frame=fr_id,
 68 |                                               id=track_id,
 69 |                                               x1=x1, y1=y1, w=w, h=h,
 70 |                                               cls_id=cls_id)
 71 |                     # if fr_id == 1:
 72 |                     #     print(line)
 73 | 
 74 |                     f.write(line)
 75 |                     # f.flush()
 76 | 
 77 |     logger.info('Save results to {}.\n'.format(results_f_path))
 78 | 
 79 | 
 80 | def read_results(filename, data_type: str, is_gt=False, is_ignore=False):
 81 |     if data_type in ('mot', 'lab'):
 82 |         read_fun = read_mot_results
 83 |     else:
 84 |         raise ValueError('Unknown data type: {}'.format(data_type))
 85 | 
 86 |     return read_fun(filename, is_gt, is_ignore)
 87 | 
 88 | 
 89 | """
 90 | labels={'ped', ...			% 1
 91 | 'person_on_vhcl', ...	% 2
 92 | 'car', ...				% 3
 93 | 'bicycle', ...			% 4
 94 | 'mbike', ...			% 5
 95 | 'non_mot_vhcl', ...		% 6
 96 | 'static_person', ...	% 7
 97 | 'distractor', ...		% 8
 98 | 'occluder', ...			% 9
 99 | 'occluder_on_grnd', ...		%10
100 | 'occluder_full', ...		% 11
101 | 'reflection', ...		% 12
102 | 'crowd' ...			% 13
103 | };
104 | """
105 | 
106 | 
107 | def read_mot_results(filename, is_gt, is_ignore):
108 |     valid_labels = {1}
109 |     ignore_labels = {2, 7, 8, 12}
110 |     results_dict = dict()
111 |     if os.path.isfile(filename):
112 |         with open(filename, 'r') as f:
113 |             for line in f.readlines():
114 |                 linelist = line.split(',')
115 |                 if len(linelist) < 7:
116 |                     continue
117 |                 fid = int(linelist[0])
118 |                 if fid < 1:
119 |                     continue
120 |                 results_dict.setdefault(fid, list())
121 | 
122 |                 if is_gt:
123 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
124 |                         label = int(float(linelist[7]))
125 |                         mark = int(float(linelist[6]))
126 |                         if mark == 0 or label not in valid_labels:
127 |                             continue
128 |                     score = 1
129 |                 elif is_ignore:
130 |                     if 'MOT16-' in filename or 'MOT17-' in filename:
131 |                         label = int(float(linelist[7]))
132 |                         vis_ratio = float(linelist[8])
133 |                         if label not in ignore_labels and vis_ratio >= 0:
134 |                             continue
135 |                     else:
136 |                         continue
137 |                     score = 1
138 |                 else:
139 |                     score = float(linelist[6])
140 | 
141 |                 tlwh = tuple(map(float, linelist[2:6]))
142 |                 target_id = int(linelist[1])
143 | 
144 |                 results_dict[fid].append((tlwh, target_id, score))
145 | 
146 |     return results_dict
147 | 
148 | 
149 | def unzip_objs(objs):
150 |     if len(objs) > 0:
151 |         tlwhs, ids, scores = zip(*objs)
152 |     else:
153 |         tlwhs, ids, scores = [], [], []
154 |     tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)
155 | 
156 |     return tlwhs, ids, scores
157 | 


--------------------------------------------------------------------------------
/tracking_utils/kalman_filter.py:
--------------------------------------------------------------------------------
  1 | # vim: expandtab:ts=4:sw=4
  2 | import numpy as np
  3 | import scipy.linalg
  4 | 
  5 | """
  6 | Table for the 0.95 quantile of the chi-square distribution with N degrees of
  7 | freedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv
  8 | function and used as Mahalanobis gating threshold.
  9 | """
 10 | chi2inv95 = {
 11 |     1: 3.8415,
 12 |     2: 5.9915,
 13 |     3: 7.8147,
 14 |     4: 9.4877,  # 4: 9.4877
 15 |     5: 11.070,
 16 |     6: 12.592,
 17 |     7: 14.067,
 18 |     8: 15.507,
 19 |     9: 16.919}
 20 | 
 21 | 
 22 | class KalmanFilter(object):
 23 |     """
 24 |     A simple Kalman filter for tracking bounding boxes in image space.
 25 | 
 26 |     The 8-dimensional state space
 27 | 
 28 |         x, y, a, h, vx, vy, va, vh
 29 | 
 30 |     contains the bounding box center position (x, y), aspect ratio a, height h,
 31 |     and their respective velocities.
 32 | 
 33 |     Object motion follows a constant velocity model. The bounding box location
 34 |     (x, y, a, h) is taken as direct observation of the state space (linear
 35 |     observation model).
 36 | 
 37 |     """
 38 | 
 39 |     def __init__(self):
 40 |         ndim, dt = 4, 1.
 41 | 
 42 |         # Create Kalman filter model matrices.
 43 |         self._motion_mat = np.eye(2 * ndim, 2 * ndim)
 44 |         for i in range(ndim):
 45 |             self._motion_mat[i, ndim + i] = dt
 46 |         self._update_mat = np.eye(ndim, 2 * ndim)
 47 | 
 48 |         # Motion and observation uncertainty are chosen relative to the current
 49 |         # state estimate. These weights control the amount of uncertainty in
 50 |         # the model. This is a bit hacky.
 51 |         self._std_weight_position = 1. / 20
 52 |         self._std_weight_velocity = 1. / 160
 53 | 
 54 |     def initiate(self, measurement):
 55 |         """Create track from unassociated measurement.
 56 | 
 57 |         Parameters
 58 |         ----------
 59 |         measurement : ndarray
 60 |             Bounding box coordinates (x, y, a, h) with center position (x, y),
 61 |             aspect ratio a, and height h.
 62 | 
 63 |         Returns
 64 |         -------
 65 |         (ndarray, ndarray)
 66 |             Returns the mean vector (8 dimensional) and covariance matrix (8x8
 67 |             dimensional) of the new track. Unobserved velocities are initialized
 68 |             to 0 mean.
 69 | 
 70 |         """
 71 |         mean_pos = measurement
 72 |         mean_vel = np.zeros_like(mean_pos)
 73 |         mean = np.r_[mean_pos, mean_vel]
 74 | 
 75 |         std = [
 76 |             2 * self._std_weight_position * measurement[3],
 77 |             2 * self._std_weight_position * measurement[3],
 78 |             1e-2,
 79 |             2 * self._std_weight_position * measurement[3],
 80 |             10 * self._std_weight_velocity * measurement[3],
 81 |             10 * self._std_weight_velocity * measurement[3],
 82 |             1e-5,
 83 |             10 * self._std_weight_velocity * measurement[3]]
 84 |         covariance = np.diag(np.square(std))
 85 |         return mean, covariance
 86 | 
 87 |     def predict(self, mean, covariance):
 88 |         """Run Kalman filter prediction step.
 89 | 
 90 |         Parameters
 91 |         ----------
 92 |         mean : ndarray
 93 |             The 8 dimensional mean vector of the object state at the previous
 94 |             time step.
 95 |         covariance : ndarray
 96 |             The 8x8 dimensional covariance matrix of the object state at the
 97 |             previous time step.
 98 | 
 99 |         Returns
100 |         -------
101 |         (ndarray, ndarray)
102 |             Returns the mean vector and covariance matrix of the predicted
103 |             state. Unobserved velocities are initialized to 0 mean.
104 | 
105 |         """
106 |         std_pos = [
107 |             self._std_weight_position * mean[3],
108 |             self._std_weight_position * mean[3],
109 |             1e-2,
110 |             self._std_weight_position * mean[3]]
111 |         std_vel = [
112 |             self._std_weight_velocity * mean[3],
113 |             self._std_weight_velocity * mean[3],
114 |             1e-5,
115 |             self._std_weight_velocity * mean[3]]
116 |         motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
117 | 
118 |         # mean = np.dot(self._motion_mat, mean)
119 |         mean = np.dot(mean, self._motion_mat.T)
120 |         covariance = np.linalg.multi_dot((self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
121 | 
122 |         return mean, covariance
123 | 
124 |     def project(self, mean, covariance):
125 |         """Project state distribution to measurement space.
126 | 
127 |         Parameters
128 |         ----------
129 |         mean : ndarray
130 |             The state's mean vector (8 dimensional array).
131 |         covariance : ndarray
132 |             The state's covariance matrix (8x8 dimensional).
133 | 
134 |         Returns
135 |         -------
136 |         (ndarray, ndarray)
137 |             Returns the projected mean and covariance matrix of the given state
138 |             estimate.
139 | 
140 |         """
141 |         std = [
142 |             self._std_weight_position * mean[3],
143 |             self._std_weight_position * mean[3],
144 |             1e-1,
145 |             self._std_weight_position * mean[3]]
146 |         innovation_cov = np.diag(np.square(std))
147 | 
148 |         mean = np.dot(self._update_mat, mean)
149 |         covariance = np.linalg.multi_dot((
150 |             self._update_mat, covariance, self._update_mat.T))
151 |         return mean, covariance + innovation_cov
152 | 
153 |     def multi_predict(self, mean, covariance):
154 |         """Run Kalman filter prediction step (Vectorized version).
155 |         Parameters
156 |         ----------
157 |         mean : ndarray
158 |             The Nx8 dimensional mean matrix of the object states at the previous
159 |             time step.
160 |         covariance : ndarray
161 |             The Nx8x8 dimensional covariance matrics of the object states at the
162 |             previous time step.
163 |         Returns
164 |         -------
165 |         (ndarray, ndarray)
166 |             Returns the mean vector and covariance matrix of the predicted
167 |             state. Unobserved velocities are initialized to 0 mean.
168 |         """
169 |         std_pos = [
170 |             self._std_weight_position * mean[:, 3],
171 |             self._std_weight_position * mean[:, 3],
172 |             1e-2 * np.ones_like(mean[:, 3]),
173 |             self._std_weight_position * mean[:, 3]]
174 |         std_vel = [
175 |             self._std_weight_velocity * mean[:, 3],
176 |             self._std_weight_velocity * mean[:, 3],
177 |             1e-5 * np.ones_like(mean[:, 3]),
178 |             self._std_weight_velocity * mean[:, 3]]
179 |         sqr = np.square(np.r_[std_pos, std_vel]).T
180 | 
181 |         motion_cov = []
182 |         for i in range(len(mean)):
183 |             motion_cov.append(np.diag(sqr[i]))
184 |         motion_cov = np.asarray(motion_cov)
185 | 
186 |         mean = np.dot(mean, self._motion_mat.T)
187 |         left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))
188 |         covariance = np.dot(left, self._motion_mat.T) + motion_cov
189 | 
190 |         return mean, covariance
191 | 
192 |     def update(self, mean, covariance, measurement):
193 |         """Run Kalman filter correction step.
194 | 
195 |         Parameters
196 |         ----------
197 |         mean : ndarray
198 |             The predicted state's mean vector (8 dimensional).
199 |         covariance : ndarray
200 |             The state's covariance matrix (8x8 dimensional).
201 |         measurement : ndarray
202 |             The 4 dimensional measurement vector (x, y, a, h), where (x, y)
203 |             is the center position, a the aspect ratio, and h the height of the
204 |             bounding box.
205 | 
206 |         Returns
207 |         -------
208 |         (ndarray, ndarray)
209 |             Returns the measurement-corrected state distribution.
210 | 
211 |         """
212 |         projected_mean, projected_cov = self.project(mean, covariance)
213 | 
214 |         chol_factor, lower = scipy.linalg.cho_factor(
215 |             projected_cov, lower=True, check_finite=False)
216 |         kalman_gain = scipy.linalg.cho_solve(
217 |             (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
218 |             check_finite=False).T
219 |         innovation = measurement - projected_mean
220 | 
221 |         new_mean = mean + np.dot(innovation, kalman_gain.T)
222 |         new_covariance = covariance - np.linalg.multi_dot((
223 |             kalman_gain, projected_cov, kalman_gain.T))
224 |         return new_mean, new_covariance
225 | 
226 |     def gating_distance(self,
227 |                         mean,
228 |                         covariance,
229 |                         measurements,
230 |                         only_position=False,
231 |                         metric='maha'):
232 |         """Compute gating distance between state distribution and measurements.
233 |         A suitable distance threshold can be obtained from `chi2inv95`. If
234 |         `only_position` is False, the chi-square distribution has 4 degrees of
235 |         freedom, otherwise 2.
236 |         Parameters
237 |         ----------
238 |         :param mean : ndarray
239 |             Mean vector over the state distribution (8 dimensional).
240 |         :param covariance : ndarray
241 |             Covariance of the state distribution (8x8 dimensional).
242 |         :param measurements : ndarray
243 |             An Nx4 dimensional matrix of N measurements, each in
244 |             format (x, y, a, h) where (x, y) is the bounding box center
245 |             position, a the aspect ratio, and h the height.
246 |         :param only_position : Optional[bool]
247 |             If True, distance computation is done with respect to the bounding
248 |             box center position only.
249 |         :param metric
250 |         :return:
251 |         -------
252 |         ndarray
253 |             Returns an array of length N, where the i-th element contains the
254 |             squared Mahalanobis distance between (mean, covariance) and
255 |             `measurements[i]`.
256 |         """
257 |         mean, covariance = self.project(mean, covariance)
258 |         if only_position:
259 |             mean, covariance = mean[:2], covariance[:2, :2]
260 |             measurements = measurements[:, :2]
261 | 
262 |         d = measurements - mean
263 |         if metric == 'gaussian':
264 |             return np.sum(d * d, axis=1)
265 |         elif metric == 'maha':
266 |             cholesky_factor = np.linalg.cholesky(covariance)
267 |             z = scipy.linalg.solve_triangular(cholesky_factor,
268 |                                               d.T,
269 |                                               lower=True,
270 |                                               check_finite=False,
271 |                                               overwrite_b=True)
272 |             squared_maha = np.sum(z * z, axis=0)
273 |             return squared_maha
274 |         else:
275 |             raise ValueError('invalid distance metric')
276 | 


--------------------------------------------------------------------------------
/tracking_utils/log.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | 
 4 | def get_logger(name='root'):
 5 |     formatter = logging.Formatter(
 6 |         # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')
 7 |         fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
 8 | 
 9 |     handler = logging.StreamHandler()
10 |     handler.setFormatter(formatter)
11 | 
12 |     logger = logging.getLogger(name)
13 |     logger.setLevel(logging.DEBUG)
14 |     logger.addHandler(handler)
15 | 
16 |     return logger
17 | 
18 | 
19 | logger = get_logger('root')
20 | 


--------------------------------------------------------------------------------
/tracking_utils/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from tracking_utils import _C
4 | 
5 | nms = _C.nms
6 | # nms.__doc__ = """
7 | # This function performs Non-maximum suppresion"""
8 | 


--------------------------------------------------------------------------------
/tracking_utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | def parse_model_cfg(path):
 2 |     """Parses the yolo-v3 layer configuration file and returns module definitions"""
 3 |     file = open(path, 'r')
 4 |     lines = file.read().split('\n')
 5 |     lines = [x for x in lines if x and not x.startswith('#')]
 6 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
 7 |     module_defs = []
 8 |     for line in lines:
 9 |         if line.startswith('['):  # This marks the start of a new block
10 |             module_defs.append({})
11 |             module_defs[-1]['type'] = line[1:-1].rstrip()
12 |             if module_defs[-1]['type'] == 'convolutional':
13 |                 module_defs[-1]['batch_normalize'] = 0
14 |         else:
15 |             key, value = line.split("=")
16 |             value = value.strip()
17 |             module_defs[-1][key.rstrip()] = value.strip()
18 | 
19 |     return module_defs
20 | 
21 | 
22 | def parse_data_cfg(path):
23 |     """Parses the data configuration file"""
24 |     options = dict()
25 |     options['gpus'] = '0'
26 |     options['num_workers'] = '10'
27 |     with open(path, 'r') as fp:
28 |         lines = fp.readlines()
29 |     for line in lines:
30 |         line = line.strip()
31 |         if line == '' or line.startswith('#'):
32 |             continue
33 |         key, value = line.split('=')
34 |         options[key.strip()] = value.strip()
35 |     return options
36 | 


--------------------------------------------------------------------------------
/tracking_utils/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | 
11 | class Timer(object):
12 |     """A simple timer."""
13 |     def __init__(self):
14 |         self.total_time = 0.
15 |         self.calls = 0
16 |         self.start_time = 0.
17 |         self.diff = 0.
18 |         self.average_time = 0.
19 | 
20 |         self.duration = 0.
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.diff = time.time() - self.start_time
29 |         self.total_time += self.diff
30 |         self.calls += 1
31 |         self.average_time = self.total_time / self.calls
32 |         if average:
33 |             self.duration = self.average_time
34 |         else:
35 |             self.duration = self.diff
36 |         return self.duration
37 | 
38 |     def clear(self):
39 |         self.total_time = 0.
40 |         self.calls = 0
41 |         self.start_time = 0.
42 |         self.diff = 0.
43 |         self.average_time = 0.
44 |         self.duration = 0.
45 | 
46 | 


--------------------------------------------------------------------------------
/tracking_utils/visualization.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | import numpy as np
  4 | import cv2
  5 | 
  6 | # cls_color_dict = {
  7 | #     'car': [180, 105, 255],  # hot pink
  8 | #     'bicycle': [219, 112, 147],  # MediumPurple
  9 | #     'person': [98, 130, 238],  # Salmon
 10 | #     'cyclist': [181, 228, 255],
 11 | #     'tricycle': [211, 85, 186]
 12 | # }
 13 | 
 14 | # np.random.seed(0)
 15 | 
 16 | 
 17 | def tlwhs_to_tlbrs(tlwhs):
 18 |     tlbrs = np.copy(tlwhs)
 19 |     if len(tlbrs) == 0:
 20 |         return tlbrs
 21 |     tlbrs[:, 2] += tlwhs[:, 0]
 22 |     tlbrs[:, 3] += tlwhs[:, 1]
 23 |     return tlbrs
 24 | 
 25 | 
 26 | def get_color(idx):
 27 |     idx *= 3
 28 |     # idx += np.random.randint(50, 255)
 29 |     color = ((37 * idx) % 255, (17 * idx) % 255, (29 * idx) % 255)
 30 |     return color
 31 | 
 32 | 
 33 | def resize_image(image, max_size=800):
 34 |     if max(image.shape[:2]) > max_size:
 35 |         scale = float(max_size) / max(image.shape[:2])
 36 |         image = cv2.resize(image, None, fx=scale, fy=scale)
 37 |     return image
 38 | 
 39 | 
 40 | def plot_detects(img,
 41 |                  dets,
 42 |                  num_classes,
 43 |                  frame_id,
 44 |                  id2cls):
 45 |     """
 46 |     plot detection results of this frame(or image)
 47 |     :param img:
 48 |     :param dets:
 49 |     :param num_classes:
 50 |     :param frame_id:
 51 |     :param id2cls:
 52 |     :return:
 53 |     """
 54 |     if dets is None:
 55 |         return img
 56 | 
 57 |     img = np.ascontiguousarray(np.copy(img))
 58 |     # im_h, im_w = img.shape[:2]
 59 | 
 60 |     text_scale = max(1.0, img.shape[1] / 1200.0)  # 1600.
 61 |     text_thickness = 2
 62 |     line_thickness = max(1, int(img.shape[1] / 600.0))
 63 | 
 64 |     # plot each object of the object class
 65 |     for obj_i, obj in enumerate(dets):
 66 |         # left, top, right, down, score, cls_id
 67 |         x1, y1, x2, y2, score, cls_id = obj
 68 |         cls_id = int(cls_id.detach().cpu())
 69 |         cls_name = id2cls[int(cls_id)]
 70 |         box_int = tuple(map(int, (x1, y1, x2, y2)))
 71 |         # cls_color = cls_color_dict[cls_name]
 72 |         cls_color = get_color(abs(cls_id))
 73 | 
 74 |         # draw bbox for each object
 75 |         cv2.rectangle(img,
 76 |                       box_int[0:2],
 77 |                       box_int[2:4],
 78 |                       color=cls_color,
 79 |                       thickness=line_thickness)
 80 | 
 81 |         # draw class name and score
 82 |         cv2.putText(img,
 83 |                     cls_name + ' {:.3f}'.format(float(score)),
 84 |                     (box_int[0], box_int[1]),
 85 |                     cv2.FONT_HERSHEY_PLAIN,
 86 |                     text_scale,
 87 |                     [0, 255, 255],  # cls_id: yellow
 88 |                     thickness=text_thickness)
 89 | 
 90 |     return img
 91 | 
 92 | 
 93 | def plot_tracks(image,
 94 |                 tlwhs_dict,
 95 |                 obj_ids_dict,
 96 |                 num_classes,
 97 |                 scores=None,
 98 |                 frame_id=0,
 99 |                 id2cls=None):
100 |     """
101 |     :param image:
102 |     :param tlwhs_dict:
103 |     :param obj_ids_dict:
104 |     :param num_classes:
105 |     :param scores:
106 |     :param frame_id:
107 |     :param id2cls:
108 |     :return:
109 |     """
110 |     img = np.ascontiguousarray(np.copy(image))
111 |     im_h, im_w = img.shape[:2]
112 | 
113 |     # top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
114 | 
115 |     text_scale = max(1.0, image.shape[1] / 1200.)  # 1600.
116 |     # text_thickness = 1 if text_scale > 1.1 else 1
117 |     text_thickness = 2  # 自定义ID文本线宽
118 |     line_thickness = max(1, int(image.shape[1] / 500.))
119 | 
120 |     radius = max(5, int(im_w / 140.))
121 | 
122 |     for cls_id in range(num_classes):
123 |         cls_tlwhs = tlwhs_dict[cls_id]
124 |         obj_ids = obj_ids_dict[cls_id]
125 | 
126 |         for i, tlwh_i in enumerate(cls_tlwhs):
127 |             x1, y1, w, h = tlwh_i
128 |             int_box = tuple(map(int, (x1, y1, x1 + w, y1 + h)))  # x1, y1, x2, y2
129 |             obj_id = int(obj_ids[i])
130 |             id_text = '{}'.format(int(obj_id))
131 | 
132 |             _line_thickness = 1 if obj_id <= 0 else line_thickness
133 |             color = get_color(abs(obj_id))
134 |             # cls_color = cls_color_dict[id2cls[cls_id]]
135 | 
136 |             # draw bbox
137 |             cv2.rectangle(img=img,
138 |                           pt1=int_box[0:2],  # (x1, y1)
139 |                           pt2=int_box[2:4],  # (x2, y2)
140 |                           color=color,
141 |                           thickness=line_thickness)
142 | 
143 |             # draw class name and index
144 |             cv2.putText(img,
145 |                         id2cls[cls_id],
146 |                         (int(x1), int(y1)),
147 |                         cv2.FONT_HERSHEY_PLAIN,
148 |                         text_scale,
149 |                         (0, 255, 255),  # cls_id: yellow
150 |                         thickness=text_thickness)
151 | 
152 |             txt_w, txt_h = cv2.getTextSize(id2cls[cls_id],
153 |                                            fontFace=cv2.FONT_HERSHEY_PLAIN,
154 |                                            fontScale=text_scale, thickness=text_thickness)
155 | 
156 |             cv2.putText(img,
157 |                         id_text,
158 |                         (int(x1), int(y1) - txt_h),
159 |                         cv2.FONT_HERSHEY_PLAIN,
160 |                         text_scale,
161 |                         (0, 255, 255),  # cls_id: yellow
162 |                         thickness=text_thickness)
163 | 
164 |     return img
165 | 
166 | 
167 | def plot_tracking(image,
168 |                   tlwhs,
169 |                   obj_ids,
170 |                   scores=None,
171 |                   frame_id=0,
172 |                   fps=0.,
173 |                   ids2=None,
174 |                   cls_id=0):
175 |     """
176 |     :param image:
177 |     :param tlwhs:
178 |     :param obj_ids:
179 |     :param scores:
180 |     :param frame_id:
181 |     :param fps:
182 |     :param ids2:
183 |     :param cls_id:
184 |     :return:
185 |     """
186 |     im = np.ascontiguousarray(np.copy(image))
187 |     im_h, im_w = im.shape[:2]
188 | 
189 |     # top_view = np.zeros([im_w, im_w, 3], dtype=np.uint8) + 255
190 | 
191 |     text_scale = max(1.0, image.shape[1] / 1200.)  # 1600.
192 |     # text_thickness = 1 if text_scale > 1.1 else 1
193 |     text_thickness = 2  # 自定义ID文本线宽
194 |     line_thickness = max(1, int(image.shape[1] / 500.))
195 | 
196 |     radius = max(5, int(im_w / 140.))
197 |     cv2.putText(im, 'frame: %d fps: %.2f num: %d'
198 |                 % (frame_id, fps, len(tlwhs)),
199 |                 (0, int(15 * text_scale)),
200 |                 cv2.FONT_HERSHEY_PLAIN,
201 |                 text_scale,
202 |                 (0, 0, 255),
203 |                 thickness=2)
204 | 
205 |     for i, tlwh in enumerate(tlwhs):
206 |         x1, y1, w, h = tlwh
207 |         int_box = tuple(map(int, (x1, y1, x1 + w, y1 + h)))
208 |         obj_id = int(obj_ids[i])
209 |         id_text = '{}'.format(int(obj_id))
210 | 
211 |         if ids2 is not None:
212 |             id_text = id_text + ', {}'.format(int(ids2[i]))
213 | 
214 |         _line_thickness = 1 if obj_id <= 0 else line_thickness
215 |         color = get_color(abs(obj_id))
216 |         cv2.rectangle(im, int_box[0:2], int_box[2:4], color=color, thickness=line_thickness)  # bbox: 随机颜色
217 | 
218 |         # 绘制id编号
219 |         cv2.putText(im,
220 |                     id_text,
221 |                     (int_box[0], int_box[1] + 30),
222 |                     cv2.FONT_HERSHEY_PLAIN,
223 |                     text_scale,
224 |                     (0, 255, 255),  # id: yellow
225 |                     thickness=text_thickness)
226 | 
227 |         # 绘制目标类别
228 |         cv2.putText(im,
229 |                     id2cls[cls_id],
230 |                     (int(x1), int(y1)),
231 |                     cv2.FONT_HERSHEY_PLAIN,
232 |                     text_scale,
233 |                     (0, 255, 255),  # cls_id: yellow
234 |                     thickness=text_thickness)
235 | 
236 |     return im
237 | 
238 | 
239 | def plot_trajectory(image, tlwhs, track_ids):
240 |     image = image.copy()
241 |     for one_tlwhs, track_id in zip(tlwhs, track_ids):
242 |         color = get_color(int(track_id))
243 |         for tlwh in one_tlwhs:
244 |             x1, y1, w, h = tuple(map(int, tlwh))
245 |             cv2.circle(image, (int(x1 + 0.5 * w), int(y1 + h)), 2, color, thickness=2)
246 | 
247 |     return image
248 | 
249 | 
250 | def plot_detections(image, tlbrs, scores=None, color=(255, 0, 0), ids=None):
251 |     """
252 |     :param image:
253 |     :param tlbrs:
254 |     :param scores:
255 |     :param color:
256 |     :param ids:
257 |     :return:
258 |     """
259 |     im = np.copy(image)
260 |     text_scale = max(1, image.shape[1] / 800.)
261 |     thickness = 2 if text_scale > 1.3 else 1
262 |     for i, det in enumerate(tlbrs):
263 |         x1, y1, x2, y2 = np.asarray(det[:4], dtype=np.int)
264 |         if len(det) >= 7:
265 |             label = 'det' if det[5] > 0 else 'trk'
266 |             if ids is not None:
267 |                 text = '{}# {:.2f}: {:d}'.format(label, det[6], ids[i])
268 |                 cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
269 |                             thickness=thickness)
270 |             else:
271 |                 text = '{}# {:.2f}'.format(label, det[6])
272 | 
273 |         if scores is not None:
274 |             text = '{:.2f}'.format(scores[i])
275 |             cv2.putText(im, text, (x1, y1 + 30), cv2.FONT_HERSHEY_PLAIN, text_scale, (0, 255, 255),
276 |                         thickness=thickness)
277 | 
278 |         cv2.rectangle(im, (x1, y1), (x2, y2), color, 2)
279 | 
280 |     return im
281 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/utils/evolve.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #for i in 0 1 2 3
 3 | #do
 4 | #  t=ultralytics/yolov3:v139 && sudo docker pull $t && sudo nvidia-docker run -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t evaluate_utils/evolve.sh $i
 5 | #  sleep 30
 6 | #done
 7 | 
 8 | while true; do
 9 |   # python3 train.py --data ../data/sm4/out.data --img-size 320 --epochs 100 --batch 64 --accum 1 --weights yolov3-tiny.conv.15 --multi --bucket ult/wer --evolve --cache --device $1 --cfg yolov3-tiny3-1cls.cfg --single --adam
10 |   # python3 train.py --data ../out/data.data --img-size 608 --epochs 10 --batch 8 --accum 8 --weights ultralytics68.pt --multi --bucket ult/athena --evolve --device $1 --cfg yolov3-spp-1cls.cfg
11 | 
12 |   python3 train.py --data coco2014.data --img-size 512 608 --epochs 27 --batch 8 --accum 8 --evolve --weights '' --bucket ult/coco/sppa_512 --device $1 --cfg yolov3-sppa.cfg --multi
13 | done
14 | 
15 | 
16 | # coco epoch times --img-size 416 608 --epochs 27 --batch 16 --accum 4
17 | # 36:34 2080ti
18 | # 21:58 V100
19 | # 63:00 T4


--------------------------------------------------------------------------------
/utils/gcp.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # New VM
 4 | rm -rf sample_data yolov3
 5 | git clone https://github.com/ultralytics/yolov3
 6 | # git clone -b test --depth 1 https://github.com/ultralytics/yolov3 test  # branch
 7 | # sudo apt-get install zip
 8 | #git clone https://github.com/NVIDIA/apex && cd apex && pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . --user && cd .. && rm -rf apex
 9 | sudo conda install -yc conda-forge scikit-image pycocotools
10 | # python3 -c "from yolov3.evaluate_utils.google_utils import gdrive_download; gdrive_download('193Zp_ye-3qXMonR1nZj3YyxMtQkMy50k','coco2014.zip')"
11 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1WQT6SOktSe8Uw6r10-2JhbEhMY5DJaph','coco2017.zip')"
12 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('1C3HewOG9akA3y456SZLBJZfNDPkBwAto','knife.zip')"
13 | python3 -c "from yolov3.utils.google_utils import gdrive_download; gdrive_download('13g3LqdpkNE8sPosVJT6KFXlfoMypzRP4','sm4.zip')"
14 | sudo shutdown
15 | 
16 | # Mount local SSD
17 | lsblk
18 | sudo mkfs.ext4 -F /dev/nvme0n1
19 | sudo mkdir -p /mnt/disks/nvme0n1
20 | sudo mount /dev/nvme0n1 /mnt/disks/nvme0n1
21 | sudo chmod a+w /mnt/disks/nvme0n1
22 | cp -r coco /mnt/disks/nvme0n1
23 | 
24 | # Kill All
25 | t=ultralytics/yolov3:v1
26 | docker kill $(docker ps -a -q --filter ancestor=$t)
27 | 
28 | # Evolve coco
29 | sudo -s
30 | t=ultralytics/yolov3:evolve
31 | # docker kill $(docker ps -a -q --filter ancestor=$t)
32 | for i in 0 1 6 7
33 | do
34 |   docker pull $t && docker run --gpus all -d --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t bash utils/evolve.sh $i
35 |   sleep 30
36 | done
37 | 
38 | #COCO training
39 | n=131 && t=ultralytics/coco:v131 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 320 640 --epochs 300 --batch 16 --weights '' --device 0 --cfg yolov3-spp.cfg --bucket ult/coco --name $n && sudo shutdown
40 | n=132 && t=ultralytics/coco:v131 && sudo docker pull $t && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco $t python3 train.py --data coco2014.data --img-size 320 640 --epochs 300 --batch 64 --weights '' --device 0 --cfg yolov3-tiny.cfg --bucket ult/coco --name $n && sudo shutdown
41 | 


--------------------------------------------------------------------------------
/utils/google_utils.py:
--------------------------------------------------------------------------------
 1 | # This file contains google evaluate_utils: https://cloud.google.com/storage/docs/reference/libraries
 2 | # pip install --upgrade google-cloud-storage
 3 | 
 4 | import os
 5 | import time
 6 | 
 7 | 
 8 | # from google.cloud import storage
 9 | 
10 | 
11 | def gdrive_download(id='1HaXkef9z6y5l4vUnCYgdmEAj61c6bfWO', name='coco.zip'):
12 |     # https://gist.github.com/tanaikech/f0f2d122e05bf5f971611258c22c110f
13 |     # Downloads a file from Google Drive, accepting presented query
14 |     # from evaluate_utils.google_utils import *; gdrive_download()
15 |     t = time.time()
16 | 
17 |     print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
18 |     os.remove(name) if os.path.exists(name) else None  # remove existing
19 |     os.remove('cookie') if os.path.exists('cookie') else None
20 | 
21 |     # Attempt file download
22 |     os.system("curl -c ./cookie -s -L \"https://drive.google.com/uc?export=download&id=%s\" > /dev/null" % id)
23 |     if os.path.exists('cookie'):  # large file
24 |         s = "curl -Lb ./cookie \"https://drive.google.com/uc?export=download&confirm=`awk '/download/ {print $NF}' ./cookie`&id=%s\" -o %s" % (
25 |             id, name)
26 |     else:  # small file
27 |         s = "curl -s -L -o %s 'https://drive.google.com/uc?export=download&id=%s'" % (name, id)
28 |     r = os.system(s)  # execute, capture return values
29 |     os.remove('cookie') if os.path.exists('cookie') else None
30 | 
31 |     # Error check
32 |     if r != 0:
33 |         os.remove(name) if os.path.exists(name) else None  # remove partial
34 |         print('Download error ')  # raise Exception('Download error')
35 |         return r
36 | 
37 |     # Unzip if archive
38 |     if name.endswith('.zip'):
39 |         print('unzipping... ', end='')
40 |         os.system('unzip -q %s' % name)  # unzip
41 |         os.remove(name)  # remove zip to free space
42 | 
43 |     print('Done (%.1fs)' % (time.time() - t))
44 |     return r
45 | 
46 | 
47 | def upload_blob(bucket_name, source_file_name, destination_blob_name):
48 |     # Uploads a file to a bucket
49 |     # https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-python
50 | 
51 |     storage_client = storage.Client()
52 |     bucket = storage_client.get_bucket(bucket_name)
53 |     blob = bucket.blob(destination_blob_name)
54 | 
55 |     blob.upload_from_filename(source_file_name)
56 | 
57 |     print('File {} uploaded to {}.'.format(
58 |         source_file_name,
59 |         destination_blob_name))
60 | 
61 | 
62 | def download_blob(bucket_name, source_blob_name, destination_file_name):
63 |     # Uploads a blob from a bucket
64 |     storage_client = storage.Client()
65 |     bucket = storage_client.get_bucket(bucket_name)
66 |     blob = bucket.blob(source_blob_name)
67 | 
68 |     blob.download_to_filename(destination_file_name)
69 | 
70 |     print('Blob {} downloaded to {}.'.format(
71 |         source_blob_name,
72 |         destination_file_name))
73 | 


--------------------------------------------------------------------------------
/utils/layers.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | import torch.nn.functional as F
  4 | from utils.utils import *
  5 | 
  6 | try:
  7 |     from mish_cuda import MishCuda as Mish
  8 | except:
  9 |     class Mish(nn.Module):  # https://github.com/digantamisra98/Mish
 10 |         def forward(self, x):
 11 |             return x * F.softplus(x).tanh()
 12 | 
 13 | 
 14 | def make_divisible(v, divisor):
 15 |     # Function ensures all layers have a channel number that is divisible by 8
 16 |     # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 17 |     return math.ceil(v / divisor) * divisor
 18 | 
 19 | 
 20 | class Flatten(nn.Module):
 21 |     # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
 22 |     def forward(self, x):
 23 |         return x.view(x.size(0), -1)
 24 | 
 25 | 
 26 | class Concat(nn.Module):
 27 |     # Concatenate a list of tensors along dimension
 28 |     def __init__(self, dimension=1):
 29 |         super(Concat, self).__init__()
 30 |         self.d = dimension
 31 | 
 32 |     def forward(self, x):
 33 |         return torch.cat(x, self.d)
 34 | 
 35 | 
 36 | class RouteGroup(nn.Module):
 37 |     def __init__(self, layers, groups, group_id):
 38 |         super(RouteGroup, self).__init__()
 39 |         self.layers = layers
 40 |         self.multi = len(layers) > 1
 41 |         self.groups = groups
 42 |         self.group_id = group_id
 43 | 
 44 |     def forward(self, x, outputs):
 45 |         if self.multi:
 46 |             outs = []
 47 |             for layer in self.layers:
 48 |                 out = torch.chunk(outputs[layer], self.groups, dim=1)
 49 |                 outs.append(out[self.group_id])
 50 |             return torch.cat(outs, dim=1)
 51 |         else:
 52 |             out = torch.chunk(outputs[self.layers[0]], self.groups, dim=1)
 53 |             return out[self.group_id]
 54 | 
 55 | 
 56 | # scaled_channels layer
 57 | class ScaleChannels(nn.Module):
 58 |     def __init__(self, layers):
 59 |         super(ScaleChannels, self).__init__()
 60 |         self.layers = layers
 61 | 
 62 |         # assert len(self.layers) == 1
 63 | 
 64 |     def forward(self, x, outputs):
 65 |         # Scalar is current input: x
 66 |         # H×W = 1×1
 67 |         # assert x.shape[2] == 1 and x.shape[3] == 1
 68 | 
 69 |         layer = outputs[self.layers[0]]
 70 | 
 71 |         # assert x.shape[1] == layer.shape[1]
 72 | 
 73 |         # Do Scaling: applying broadcasting here
 74 |         x = x * layer
 75 | 
 76 |         return x
 77 | 
 78 | 
 79 | # Dropout layer
 80 | class Dropout(nn.Module):
 81 |     def __init__(self, prob):
 82 |         super(Dropout, self).__init__()
 83 |         self.prob = float(prob)
 84 | 
 85 |     def forward(self, x):
 86 |         return F.dropout(x, p=self.prob)
 87 | 
 88 | 
 89 | # To do global average pooling
 90 | class GlobalAvgPool(nn.Module):
 91 |     def __init__(self):
 92 |         super(GlobalAvgPool, self).__init__()
 93 | 
 94 |     def forward(self, x):
 95 |         return F.adaptive_avg_pool2d(x, (1, 1))  # set output size (1, 1)
 96 | 
 97 | 
 98 | class FeatureConcat(nn.Module):
 99 |     def __init__(self, layers):
100 |         super(FeatureConcat, self).__init__()
101 |         self.layers = layers  # layer indices
102 |         self.multiple = len(layers) > 1  # multiple layers flag
103 | 
104 |     def forward(self, x, outputs):
105 |         return torch.cat([outputs[i] for i in self.layers], 1) if self.multiple else outputs[self.layers[0]]
106 | 
107 | 
108 | class FeatureConcat_l(nn.Module):
109 |     def __init__(self, layers):
110 |         super(FeatureConcat_l, self).__init__()
111 |         self.layers = layers  # layer indices
112 |         self.multiple = len(layers) > 1  # multiple layers flag
113 | 
114 |     def forward(self, x, outputs):
115 |         return torch.cat([outputs[i][:, :outputs[i].shape[1] // 2, :, :] for i in self.layers], 1) if self.multiple else \
116 |             outputs[self.layers[0]][:, :outputs[self.layers[0]].shape[1] // 2, :, :]
117 | 
118 | 
119 | class WeightedFeatureFusion(nn.Module):  # weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
120 |     def __init__(self, layers, weight=False):
121 |         super(WeightedFeatureFusion, self).__init__()
122 |         self.layers = layers  # layer indices
123 |         self.weight = weight  # apply weights boolean
124 |         self.n = len(layers) + 1  # number of layers
125 |         if weight:
126 |             self.w = nn.Parameter(torch.zeros(self.n), requires_grad=True)  # layer weights
127 | 
128 |     def forward(self, x, outputs):
129 |         # Weights
130 |         if self.weight:
131 |             w = torch.sigmoid(self.w) * (2 / self.n)  # sigmoid weights (0-1)
132 |             x = x * w[0]
133 | 
134 |         # Fusion
135 |         nx = x.shape[1]  # input channels
136 |         for i in range(self.n - 1):
137 |             a = outputs[self.layers[i]] * w[i + 1] if self.weight else outputs[self.layers[i]]  # feature to add
138 |             na = a.shape[1]  # feature channels
139 | 
140 |             # Adjust channels
141 |             if nx == na:  # same shape
142 |                 x = x + a
143 |             elif nx > na:  # slice input
144 |                 x[:, :na] = x[:, :na] + a  # or a = nn.ZeroPad2d((0, 0, 0, 0, 0, dc))(a); x = x + a
145 |             else:  # slice feature
146 |                 x = x + a[:, :nx]
147 | 
148 |         return x
149 | 
150 | 
151 | class MixConv2d(nn.Module):  # MixConv: Mixed Depthwise Convolutional Kernels https://arxiv.org/abs/1907.09595
152 |     def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'):
153 |         super(MixConv2d, self).__init__()
154 | 
155 |         groups = len(k)
156 |         if method == 'equal_ch':  # equal channels per group
157 |             i = torch.linspace(0, groups - 1E-6, out_ch).floor()  # out_ch indices
158 |             ch = [(i == g).sum() for g in range(groups)]
159 |         else:  # 'equal_params': equal parameter count per group
160 |             b = [out_ch] + [0] * groups
161 |             a = np.eye(groups + 1, groups, k=-1)
162 |             a -= np.roll(a, 1, axis=1)
163 |             a *= np.array(k) ** 2
164 |             a[0] = 1
165 |             ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int)  # solve for equal weight indices, ax = b
166 | 
167 |         self.m = nn.ModuleList([nn.Conv2d(in_channels=in_ch,
168 |                                           out_channels=ch[g],
169 |                                           kernel_size=k[g],
170 |                                           stride=stride,
171 |                                           padding=k[g] // 2,  # 'same' pad
172 |                                           dilation=dilation,
173 |                                           bias=bias) for g in range(groups)])
174 | 
175 |     def forward(self, x):
176 |         return torch.cat([m(x) for m in self.m], 1)
177 | 
178 | 
179 | class MixDeConv2d(nn.Module):  # MixDeConv: Mixed Depthwise DeConvolutional Kernels https://arxiv.org/abs/1907.09595
180 |     def __init__(self, in_ch, out_ch, k=(3, 5, 7), stride=1, dilation=1, bias=True, method='equal_params'):
181 |         super(MixDeConv2d, self).__init__()
182 | 
183 |         groups = len(k)
184 |         if method == 'equal_ch':  # equal channels per group
185 |             i = torch.linspace(0, groups - 1E-6, out_ch).floor()  # out_ch indices
186 |             ch = [(i == g).sum() for g in range(groups)]
187 |         else:  # 'equal_params': equal parameter count per group
188 |             b = [out_ch] + [0] * groups
189 |             a = np.eye(groups + 1, groups, k=-1)
190 |             a -= np.roll(a, 1, axis=1)
191 |             a *= np.array(k) ** 2
192 |             a[0] = 1
193 |             ch = np.linalg.lstsq(a, b, rcond=None)[0].round().astype(int)  # solve for equal weight indices, ax = b
194 | 
195 |         self.m = nn.ModuleList([nn.ConvTranspose2d(in_channels=in_ch,
196 |                                                    out_channels=ch[g],
197 |                                                    kernel_size=k[g],
198 |                                                    stride=stride,
199 |                                                    padding=k[g] // 2,  # 'same' pad
200 |                                                    dilation=dilation,
201 |                                                    bias=bias) for g in range(groups)])
202 | 
203 |     def forward(self, x):
204 |         return torch.cat([m(x) for m in self.m], 1)
205 | 
206 | 
207 | # Activation functions below -------------------------------------------------------------------------------------------
208 | class SwishImplementation(torch.autograd.Function):
209 |     @staticmethod
210 |     def forward(ctx, x):
211 |         ctx.save_for_backward(x)
212 |         return x * torch.sigmoid(x)
213 | 
214 |     @staticmethod
215 |     def backward(ctx, grad_output):
216 |         x = ctx.saved_tensors[0]
217 |         sx = torch.sigmoid(x)  # sigmoid(ctx)
218 |         return grad_output * (sx * (1 + x * (1 - sx)))
219 | 
220 | 
221 | class MishImplementation(torch.autograd.Function):
222 |     @staticmethod
223 |     def forward(ctx, x):
224 |         ctx.save_for_backward(x)
225 |         return x.mul(torch.tanh(F.softplus(x)))  # x * tanh(ln(1 + exp(x)))
226 | 
227 |     @staticmethod
228 |     def backward(ctx, grad_output):
229 |         x = ctx.saved_tensors[0]
230 |         sx = torch.sigmoid(x)
231 |         fx = F.softplus(x).tanh()
232 |         return grad_output * (fx + x * sx * (1 - fx * fx))
233 | 
234 | 
235 | class MemoryEfficientSwish(nn.Module):
236 |     def forward(self, x):
237 |         return SwishImplementation.apply(x)
238 | 
239 | 
240 | class MemoryEfficientMish(nn.Module):
241 |     def forward(self, x):
242 |         return MishImplementation.apply(x)
243 | 
244 | 
245 | class Swish(nn.Module):
246 |     def forward(self, x):
247 |         return x * torch.sigmoid(x)
248 | 
249 | 
250 | class HardSwish(nn.Module):  # https://arxiv.org/pdf/1905.02244.pdf
251 |     def forward(self, x):
252 |         return x * F.hardtanh(x + 3, 0., 6., True) / 6.
253 | 


--------------------------------------------------------------------------------
/utils/parse_config.py:
--------------------------------------------------------------------------------
 1 | # encoding=utf-8
 2 | 
 3 | import os
 4 | import numpy as np
 5 | 
 6 | 
 7 | def parse_model_cfg(path):
 8 |     """
 9 |     :param path:
10 |     :return:
11 |     """
12 |     # Parse the yolo *.cfg file and return module definitions path may be 'cfg/yolov3.cfg', 'yolov3.cfg', or 'yolov3'
13 |     if not path.endswith('.cfg'):  # add .cfg suffix if omitted
14 |         path += '.cfg'
15 |     if not os.path.exists(path) and os.path.exists('cfg' + os.sep + path):  # add cfg/ prefix if omitted
16 |         path = 'cfg' + os.sep + path
17 | 
18 |     with open(path, 'r') as f:
19 |         lines = f.read().split('\n')
20 | 
21 |     lines = [x for x in lines if x and not x.startswith('#')]
22 |     lines = [x.rstrip().lstrip() for x in lines]  # get rid of fringe whitespaces
23 |     mdefs = []  # module definitions
24 | 
25 |     for line in lines:
26 |         if line.startswith('['):  # This marks the start of a new block
27 |             mdefs.append({})
28 |             mdefs[-1]['type'] = line[1:-1].rstrip()
29 |             if mdefs[-1]['type'] == 'convolutional':
30 |                 mdefs[-1]['batch_normalize'] = 0  # pre-populate with zeros (may be overwritten later)
31 |         else:
32 |             key, val = line.split("=")
33 |             key = key.rstrip()
34 | 
35 |             if key == 'anchors':  # return np-array
36 |                 mdefs[-1][key] = np.array([float(x) for x in val.split(',')]).reshape((-1, 2))  # np anchors
37 |             elif (key in ['from', 'layers', 'mask']) or (key == 'size' and ',' in val):  # return array
38 |                 mdefs[-1][key] = [int(x) for x in val.split(',')]
39 |             else:
40 |                 val = val.strip()
41 |                 if val.isnumeric():  # return int or float
42 |                     mdefs[-1][key] = int(val) if (int(val) - float(val)) == 0 else float(val)
43 |                 else:
44 |                     mdefs[-1][key] = val  # return string
45 | 
46 |     # Check all fields are supported
47 |     supported = ['type', 'batch_normalize', 'filters', 'size', 'stride', 'pad', 'activation', 'layers', 'groups',
48 |                  'from', 'mask', 'anchors', 'classes', 'num', 'jitter', 'ignore_thresh', 'truth_thresh', 'random',
49 |                  'stride_x', 'stride_y', 'weights_type', 'weights_normalization', 'scale_x_y', 'beta_nms', 'nms_kind',
50 |                  'iou_loss', 'iou_normalizer', 'cls_normalizer', 'iou_thresh', 'groups', 'group_id', 'probability']
51 | 
52 |     f = []  # fields
53 |     for x in mdefs[1:]:
54 |         [f.append(k) for k in x if k not in f]
55 |     u = [x for x in f if x not in supported]  # unsupported fields
56 |     assert not any(u), "Unsupported fields %s in %s. See https://github.com/ultralytics/yolov3/issues/631" % (u, path)
57 | 
58 |     return mdefs
59 | 
60 | 
61 | def parse_data_cfg(path):
62 |     # Parses the data configuration file
63 |     if not os.path.exists(path) and os.path.exists('data' + os.sep + path):  # add data/ prefix if omitted
64 |         path = 'data' + os.sep + path
65 | 
66 |     with open(path, 'r') as f:
67 |         lines = f.readlines()
68 | 
69 |     options = dict()
70 |     for line in lines:
71 |         line = line.strip()
72 |         if line == '' or line.startswith('#'):
73 |             continue
74 |         key, val = line.split('=')
75 |         options[key.strip()] = val.strip()
76 | 
77 |     return options
78 | 


--------------------------------------------------------------------------------
/utils/process_mcmot_dataset.py:
--------------------------------------------------------------------------------
  1 | # encoding=utf-8
  2 | 
  3 | import os
  4 | import time
  5 | import shutil
  6 | import re
  7 | import cv2
  8 | import pickle
  9 | import numpy as np
 10 | from collections import defaultdict
 11 | 
 12 | classes = [
 13 |     'car',  # 0
 14 |     'bicycle',  # 1
 15 |     'person',  # 2
 16 |     'cyclist',  # 3
 17 |     'tricycle'  # 4
 18 | ]  # 5类(不包括背景)
 19 | 
 20 | cls2id = {
 21 |     'car': 0,
 22 |     'bicycle': 1,
 23 |     'person': 2,
 24 |     'cyclist': 3,
 25 |     'tricycle': 4
 26 | }
 27 | 
 28 | id2cls = {
 29 |     0: 'car',
 30 |     1: 'bicycle',
 31 |     2: 'person',
 32 |     3: 'cyclist',
 33 |     4: 'tricycle'
 34 | }
 35 | 
 36 | # 视频训练数据图片的宽高是固定的
 37 | W, H = 1920, 1080
 38 | 
 39 | 
 40 | def gen_labels_for_seq(dark_txt_path, seq_label_dir, classes, one_plus=True):
 41 |     """
 42 |     """
 43 |     global seq_max_id_dict, start_id_dict, fr_cnt
 44 | 
 45 |     # ----- 开始一个视频seq的label生成
 46 |     # 每遇到一个待处理的视频seq, reset各类max_id为0
 47 |     for class_type in classes:
 48 |         seq_max_id_dict[class_type] = 0
 49 | 
 50 |     # 记录当前seq各个类别的track id集合
 51 |     id_set_dict = defaultdict(set)
 52 | 
 53 |     # 读取dark label(读取该视频seq的标注文件, 一行代表一帧)
 54 |     with open(dark_txt_path, 'r', encoding='utf-8') as r_h:
 55 |         # 读视频标注文件的每一行: 每一行即一帧
 56 |         for line in r_h.readlines():
 57 |             fr_cnt += 1
 58 | 
 59 |             line = line.split(',')
 60 |             fr_id = int(line[0])
 61 |             n_objs = int(line[1])
 62 |             # print('\nFrame {:d} in seq {}, total {:d} objects'.format(f_id + 1, seq_name, n_objs))
 63 | 
 64 |             # 当前帧所有的检测目标label信息
 65 |             fr_label_objs = []
 66 | 
 67 |             # 遍历该帧的每一个object
 68 |             for cur in range(2, len(line), 6):  # cursor
 69 |                 class_type = line[cur + 5].strip()
 70 |                 class_id = cls2id[class_type]  # class type => class id
 71 | 
 72 |                 # 解析track id
 73 |                 if one_plus:
 74 |                     track_id = int(line[cur]) + 1  # track_id从1开始统计
 75 |                 else:
 76 |                     track_id = int(line[cur])
 77 | 
 78 |                 # 更新该视频seq各类检测目标(背景一直为0)的max track id
 79 |                 if track_id > seq_max_id_dict[class_type]:
 80 |                     seq_max_id_dict[class_type] = track_id
 81 | 
 82 |                 # 记录当前seq各个类别的track id集合
 83 |                 id_set_dict[class_type].add(track_id)
 84 | 
 85 |                 # 根据起始track id更新在整个数据集中的实际track id
 86 |                 track_id += start_id_dict[class_type]
 87 | 
 88 |                 # 读取bbox坐标
 89 |                 x1, y1 = int(line[cur + 1]), int(line[cur + 2])
 90 |                 x2, y2 = int(line[cur + 3]), int(line[cur + 4])
 91 | 
 92 |                 # 根据图像分辨率, 裁剪bbox
 93 |                 x1 = x1 if x1 >= 0 else 0
 94 |                 x1 = x1 if x1 < W else W - 1
 95 |                 y1 = y1 if y1 >= 0 else 0
 96 |                 y1 = y1 if y1 < H else H - 1
 97 |                 x2 = x2 if x2 >= 0 else 0
 98 |                 x2 = x2 if x2 < W else W - 1
 99 |                 y2 = y2 if y2 >= 0 else 0
100 |                 y2 = y2 if y2 < H else H - 1
101 | 
102 |                 # 计算bbox center和bbox width&height
103 |                 bbox_center_x = 0.5 * float(x1 + x2)
104 |                 bbox_center_y = 0.5 * float(y1 + y2)
105 |                 bbox_width = float(x2 - x1 + 1)
106 |                 bbox_height = float(y2 - y1 + 1)
107 | 
108 |                 # bbox center和bbox width&height归一化到[0.0, 1.0]
109 |                 bbox_center_x /= W
110 |                 bbox_center_y /= H
111 |                 bbox_width /= W
112 |                 bbox_height /= H
113 | 
114 |                 # 打印中间结果, 验证是否解析正确...
115 |                 # print(track_id, x1, y1, x2, y2, class_type)
116 | 
117 |                 # 每一帧对应的label中的每一行
118 |                 obj_str = '{:d} {:d} {:.6f} {:.6f} {:.6f} {:.6f}\n'.format(
119 |                     class_id,  # class id: 从0开始计算
120 |                     track_id,  # track id: 从1开始计算
121 |                     bbox_center_x,  # center_x
122 |                     bbox_center_y,  # center_y
123 |                     bbox_width,  # bbox_w
124 |                     bbox_height)  # bbox_h
125 |                 # print(obj_str, end='')
126 |                 fr_label_objs.append(obj_str)
127 | 
128 |             # ----- 该帧解析结束, 输出该帧的label文件: 每一帧图像对应一个txt格式的label文件
129 |             label_f_path = seq_label_dir + '/{:05d}.txt'.format(fr_id)
130 |             with open(label_f_path, 'w', encoding='utf-8') as w_h:
131 |                 for obj in fr_label_objs:
132 |                     w_h.write(obj)
133 |             # print('{} written\n'.format(label_f_path))
134 | 
135 |     return id_set_dict
136 | 
137 | 
138 | """
139 | 将DarkLabel的标注格式: frame# n_obj [id, x1, y1, x2, y2, label]
140 | 转化为MCMOT的输入格式:
141 | 1. 每张图对应一个txt的label文件
142 | 2. 每行代表一个检测目标: cls_id, track_id, center_x, center_y, bbox_w, bbox_h(每个目标6列)
143 | """
144 | 
145 | 
146 | def dark_label2mcmot_label(data_root, one_plus=True, dict_path=None, viz_root=None):
147 |     """
148 |     :param data_root:
149 |     :param one_plus:
150 |     :param dict_path:
151 |     :param viz_root:
152 |     :return:
153 |     """
154 |     if not os.path.isdir(data_root):
155 |         print('[Err]: invalid data root')
156 |         return
157 | 
158 |     img_root = data_root + '/JPEGImages'
159 |     if not os.path.isdir(img_root):
160 |         print('[Err]: invalid image root')
161 | 
162 |     # 创建标签文件根目录
163 |     label_root = data_root + '/labels_with_ids'
164 |     if not os.path.isdir(label_root):
165 |         os.makedirs(label_root)
166 |     else:
167 |         shutil.rmtree(label_root)
168 |         os.makedirs(label_root)
169 | 
170 |     # ---------- 参数初始化
171 |     # 为视频seq的每个检测类别设置[起始]track id
172 |     global start_id_dict
173 |     start_id_dict = defaultdict(int)  # str => int
174 |     for class_type in classes:  # 初始化
175 |         start_id_dict[class_type] = 0
176 | 
177 |     # 记录每一个视频seq各类最大的track id
178 |     global seq_max_id_dict
179 |     seq_max_id_dict = defaultdict(int)
180 | 
181 |     global fr_cnt
182 |     fr_cnt = 0
183 | 
184 |     # ----------- 开始处理
185 |     seq_list = os.listdir(img_root)
186 |     seqs = sorted(seq_list, key=lambda x: int(x.split('_')[-1]))
187 | 
188 |     # 遍历每一段视频seq
189 |     for seq_name in seqs:
190 |         seq_dir = img_root + '/' + seq_name
191 |         print('\nProcessing seq', seq_dir)
192 | 
193 |         # 为该视频seq创建label目录
194 |         seq_label_dir = label_root + '/' + seq_name
195 |         if not os.path.isdir(seq_label_dir):
196 |             os.makedirs(seq_label_dir)
197 |         else:
198 |             shutil.rmtree(seq_label_dir)
199 |             os.makedirs(seq_label_dir)
200 | 
201 |         dark_txt_path = seq_dir + '/' + seq_name + '_gt.txt'
202 |         if not os.path.isfile(dark_txt_path):
203 |             print('[Warning]: invalid dark label file.')
204 |             continue
205 | 
206 |         # 当前seq生成labels
207 |         id_set_dict = gen_labels_for_seq(dark_txt_path, seq_label_dir, classes, one_plus)
208 | 
209 |         # 输出该视频seq各个检测类别的max track id(从1开始)
210 |         for k, v in seq_max_id_dict.items():
211 |             print('seq {}'.format(seq_name) + ' ' +
212 |                   k + ' max track id {:d}'.format(v))
213 | 
214 |             # 输出当前seq各个类别的track id数(独一无二的id个数)
215 |             cls_id_set = id_set_dict[k]
216 |             print('seq {}'.format(seq_name) + ' ' +
217 |                   k + ' track id number {:d}'.format(len(cls_id_set)))
218 | 
219 |             if len(cls_id_set) != v:
220 |                 print(cls_id_set)
221 | 
222 |         # 处理完成一个视频seq, 基于seq_max_id_dict, 更新各类别start track id
223 |         # for k, v in start_id_dict.items():
224 |         #     start_id_dict[k] += seq_max_id_dict[k]
225 | 
226 |         # 处理完成一个视频seq, 基于id_set_dict, 更新各类别start track id
227 |         for k, v in start_id_dict.items():
228 |             start_id_dict[k] += len(id_set_dict[k])
229 | 
230 |     # 输出所有视频seq各个检测类别的track id总数
231 |     print('\n')
232 |     for k, v in start_id_dict.items():
233 |         print(k + ' total ' + str(v) + ' track ids')
234 |     print('Total {} frames.'.format(fr_cnt))
235 | 
236 |     # 序列化max_id_dict到磁盘
237 |     if not dict_path is None:
238 |         max_id_dict = {cls2id[k]:v for k, v in start_id_dict.items()}
239 |         with open(dict_path, 'wb') as f:
240 |             np.savez(dict_path, max_id_dict=max_id_dict)  # set key 'max_id_dict'
241 | 
242 |     print('{:s} dumped.'.format(dict_path))
243 | 
244 | 
245 | def gen_mcmot_data(img_root, out_f_path):
246 |     """
247 | 
248 |     :param img_root:
249 |     :return:
250 |     """
251 |     if not os.path.isdir(img_root):
252 |         print('[Err]: ')
253 |         return
254 | 
255 |     dir_names = [img_root + '/' + x for x in os.listdir(img_root) if os.path.isdir(img_root + '/' + x)]
256 | 
257 |     with open(out_f_path, 'w', encoding='utf-8') as w_h:
258 |         for dir in tqdm(dir_names):
259 |             for img_name in os.listdir(dir):
260 |                 if not img_name.endswith('.jpg'):
261 |                     continue
262 | 
263 |                 img_path = dir + '/' + img_name
264 |                 if not os.path.isfile(img_path):
265 |                     print('[Warning]: invalid image file.')
266 |                     continue
267 | 
268 |                 w_h.write(img_path + '\n')
269 | 
270 | 
271 | if __name__ == '__main__':
272 |     dark_label2mcmot_label(data_root='/mnt/diskb/even/dataset/MCMOT',
273 |                            one_plus=True,
274 |                            dict_path='/mnt/diskb/even/dataset/MCMOT/max_id_dict.npz',
275 |                            viz_root=None)
276 | 
277 |     gen_mcmot_data(img_root='/mnt/diskb/even/dataset/MCMOT/JPEGImages',
278 |                    out_f_path='/mnt/diskb/even/YOLOV4/data/train_mcmot.txt')
279 | 


--------------------------------------------------------------------------------
/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import os
  3 | import time
  4 | from copy import deepcopy
  5 | 
  6 | import torch
  7 | import torch.backends.cudnn as cudnn
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | def init_seeds(seed=0):
 13 |     torch.manual_seed(seed)
 14 | 
 15 |     # Remove randomness (may be slower on Tesla GPUs) # https://pytorch.org/docs/stable/notes/randomness.html
 16 |     if seed == 0:
 17 |         cudnn.deterministic = True
 18 |         cudnn.benchmark = False
 19 | 
 20 | 
 21 | def select_device(device='', apex=False, batch_size=None):
 22 |     # device = 'cpu' or '0' or '0,1,2,3'
 23 |     cpu_request = device.lower() == 'cpu'
 24 |     if device and not cpu_request:  # if device requested other than 'cpu'
 25 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 26 |         assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device  # check availablity
 27 | 
 28 |     cuda = False if cpu_request else torch.cuda.is_available()
 29 |     if cuda:
 30 |         c = 1024 ** 2  # bytes to MB
 31 |         ng = torch.cuda.device_count()
 32 |         if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
 33 |             assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng)
 34 |         x = [torch.cuda.get_device_properties(i) for i in range(ng)]
 35 |         s = 'Using CUDA ' + ('Apex ' if apex else '')  # apex for mixed precision https://github.com/NVIDIA/apex
 36 |         for i in range(0, ng):
 37 |             if i == 1:
 38 |                 s = ' ' * len(s)
 39 |             print("%sdevice%g _CudaDeviceProperties(name='%s', total_memory=%dMB)" %
 40 |                   (s, i, x[i].name, x[i].total_memory / c))
 41 |     else:
 42 |         print('Using CPU')
 43 | 
 44 |     print('')  # skip a line
 45 |     return torch.device('cuda:0' if cuda else 'cpu')
 46 | 
 47 | 
 48 | def time_synchronized():
 49 |     torch.cuda.synchronize() if torch.cuda.is_available() else None
 50 |     return time.time()
 51 | 
 52 | 
 53 | def initialize_weights(model):
 54 |     for m in model.modules():
 55 |         t = type(m)
 56 |         if t is nn.Conv2d:
 57 |             pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
 58 |         elif t is nn.BatchNorm2d:
 59 |             m.eps = 1e-4
 60 |             m.momentum = 0.03
 61 |         elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
 62 |             m.inplace = True
 63 | 
 64 | 
 65 | def find_modules(model, mclass=nn.Conv2d):
 66 |     # finds layer indices matching module class 'mclass'
 67 |     return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
 68 | 
 69 | 
 70 | def fuse_conv_and_bn(conv, bn):
 71 |     # https://tehnokv.com/posts/fusing-batchnorm-and-conv/
 72 |     with torch.no_grad():
 73 |         # init
 74 |         fusedconv = torch.nn.Conv2d(conv.in_channels,
 75 |                                     conv.out_channels,
 76 |                                     kernel_size=conv.kernel_size,
 77 |                                     stride=conv.stride,
 78 |                                     padding=conv.padding,
 79 |                                     bias=True)
 80 | 
 81 |         # prepare filters
 82 |         w_conv = conv.weight.clone().view(conv.out_channels, -1)
 83 |         w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
 84 |         fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))
 85 | 
 86 |         # prepare spatial bias
 87 |         if conv.bias is not None:
 88 |             b_conv = conv.bias
 89 |         else:
 90 |             b_conv = torch.zeros(conv.weight.size(0))
 91 |         b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
 92 |         fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
 93 | 
 94 |         return fusedconv
 95 | 
 96 | 
 97 | def model_info(model, verbose=False):
 98 |     # Plots a line-by-line description of a PyTorch model
 99 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
100 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
101 |     if verbose:
102 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
103 |         for i, (name, p) in enumerate(model.named_parameters()):
104 |             name = name.replace('module_list.', '')
105 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
106 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
107 | 
108 |     try:  # FLOPS
109 |         from thop import profile
110 |         macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)
111 |         fs = ', %.1f GFLOPS' % (macs / 1E9 * 2)
112 |     except:
113 |         fs = ''
114 | 
115 |     print('Model Summary: %g layers, %g parameters, %g gradients%s' % (len(list(model.parameters())), n_p, n_g, fs))
116 | 
117 | 
118 | def load_classifier(name='resnet101', n=2):
119 |     # Loads a pretrained model reshaped to n-class output
120 |     import pretrainedmodels  # https://github.com/Cadene/pretrained-models.pytorch#torchvision
121 |     model = pretrainedmodels.__dict__[name](num_classes=1000, pretrained='imagenet')
122 | 
123 |     # Display model properties
124 |     for x in ['model.input_size', 'model.input_space', 'model.input_range', 'model.mean', 'model.std']:
125 |         print(x + ' =', eval(x))
126 | 
127 |     # Reshape output to n classes
128 |     filters = model.last_linear.weight.shape[1]
129 |     model.last_linear.bias = torch.nn.Parameter(torch.zeros(n))
130 |     model.last_linear.weight = torch.nn.Parameter(torch.zeros(n, filters))
131 |     model.last_linear.out_features = n
132 |     return model
133 | 
134 | 
135 | def scale_img(img, ratio=1.0, same_shape=True):  # img(16,3,256,416), r=ratio
136 |     # scales img(bs,3,y,x) by ratio
137 |     h, w = img.shape[2:]
138 |     s = (int(h * ratio), int(w * ratio))  # new size
139 |     img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
140 |     if not same_shape:  # pad/crop img
141 |         gs = 64  # (pixels) grid size
142 |         h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
143 |     return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
144 | 
145 | 
146 | class ModelEMA:
147 |     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
148 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
149 |     This is intended to allow functionality like
150 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
151 |     A smoothed version of the weights is necessary for some training schemes to perform well.
152 |     E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
153 |     RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
154 |     smoothing of weights to match results. Pay attention to the decay constant you are using
155 |     relative to your update count per epoch.
156 |     To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
157 |     disable validation of the EMA weights. Validation will have to be done manually in a separate
158 |     process, or after the training stops converging.
159 |     This class is sensitive where it is initialized in the sequence of model init,
160 |     GPU assignment and distributed training wrappers.
161 |     I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU.
162 |     """
163 | 
164 |     def __init__(self, model, decay=0.9999, device=''):
165 |         # make a copy of the model for accumulating moving average of weights
166 |         self.ema = deepcopy(model)
167 |         self.ema.eval()
168 |         self.updates = 0  # number of EMA updates
169 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
170 |         self.device = device  # perform ema on different device from model if set
171 |         if device:
172 |             self.ema.to(device=device)
173 |         for p in self.ema.parameters():
174 |             p.requires_grad_(False)
175 | 
176 |     def update(self, model):
177 |         self.updates += 1
178 |         d = self.decay(self.updates)
179 |         with torch.no_grad():
180 |             if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
181 |                 msd, esd = model.module.state_dict(), self.ema.module.state_dict()
182 |             else:
183 |                 msd, esd = model.state_dict(), self.ema.state_dict()
184 | 
185 |             for k, v in esd.items():
186 |                 if v.dtype.is_floating_point:
187 |                     v *= d
188 |                     v += (1. - d) * msd[k].detach()
189 | 
190 |     def update_attr(self, model):
191 |         # Assign attributes (which may change during training)
192 |         for k in model.__dict__.keys():
193 |             if not k.startswith('_'):
194 |                 setattr(self.ema, k, getattr(model, k))
195 | 


--------------------------------------------------------------------------------
/yolov4-tiny-3l_no_group_id_no_upsample.cfg:
--------------------------------------------------------------------------------
  1 | [net]
  2 | # Testing
  3 | #batch=1
  4 | #subdivisions=1
  5 | # Training
  6 | batch=64
  7 | subdivisions=4
  8 | width=768
  9 | height=448
 10 | channels=3
 11 | momentum=0.9
 12 | decay=0.0005
 13 | angle=0
 14 | saturation = 1.5
 15 | exposure = 1.5
 16 | hue=.1
 17 | 
 18 | learning_rate=0.00002
 19 | burn_in=1000
 20 | max_batches = 500200
 21 | policy=steps
 22 | steps=400000,450000
 23 | scales=.1,.1
 24 | 
 25 | #mosaic=1
 26 | 
 27 | [convolutional]          
 28 | batch_normalize=1
 29 | filters=32
 30 | size=3
 31 | stride=2
 32 | pad=1
 33 | activation=leaky
 34 | 
 35 | [convolutional]          
 36 | batch_normalize=1
 37 | filters=64
 38 | size=3
 39 | stride=2
 40 | pad=1
 41 | activation=leaky
 42 | 
 43 | [convolutional]          
 44 | batch_normalize=1
 45 | filters=64
 46 | size=3
 47 | stride=1
 48 | pad=1
 49 | activation=leaky
 50 | 
 51 | [route]                  
 52 | layers=-1
 53 | #groups=2
 54 | #group_id=1
 55 | 
 56 | [convolutional]          
 57 | batch_normalize=1
 58 | filters=32
 59 | size=3
 60 | stride=1
 61 | pad=1
 62 | activation=leaky
 63 | 
 64 | [convolutional]          
 65 | batch_normalize=1
 66 | filters=32
 67 | size=3
 68 | stride=1
 69 | pad=1
 70 | activation=leaky
 71 | 
 72 | [route]                  
 73 | layers = -1,-2
 74 | 
 75 | [convolutional]         
 76 | batch_normalize=1
 77 | filters=64
 78 | size=1
 79 | stride=1
 80 | pad=1
 81 | activation=leaky
 82 | 
 83 | [route]                  
 84 | layers = -6,-1
 85 | 
 86 | [convolutional]          
 87 | batch_normalize=1
 88 | filters=64
 89 | size=3
 90 | stride=1
 91 | pad=1
 92 | activation=leaky
 93 | 
 94 | [route]                  
 95 | layers=-1
 96 | #groups=2
 97 | #group_id=1
 98 | 
 99 | [convolutional]          
100 | batch_normalize=1
101 | filters=32
102 | size=3
103 | stride=1
104 | pad=1
105 | activation=leaky
106 | 
107 | [convolutional]          
108 | batch_normalize=1
109 | filters=32
110 | size=3
111 | stride=1
112 | pad=1
113 | activation=leaky
114 | 
115 | [route]                  
116 | layers = -1,-2
117 | 
118 | [convolutional]          
119 | batch_normalize=1
120 | filters=64
121 | size=1
122 | stride=1
123 | pad=1
124 | activation=leaky
125 | 
126 | [route]                  
127 | layers = -6,-1
128 | 
129 | [maxpool]                
130 | size=2
131 | stride=2
132 | 
133 | [convolutional]          
134 | batch_normalize=1
135 | filters=128
136 | size=3
137 | stride=1
138 | pad=1
139 | activation=leaky
140 | 
141 | [route]                  
142 | layers=-1
143 | #groups=2
144 | #group_id=1
145 | 
146 | [convolutional]          
147 | batch_normalize=1
148 | filters=64
149 | size=3
150 | stride=1
151 | pad=1
152 | activation=leaky
153 | 
154 | [convolutional]          
155 | batch_normalize=1
156 | filters=64
157 | size=3
158 | stride=1
159 | pad=1
160 | activation=leaky
161 | 
162 | [route]                  
163 | layers = -1,-2
164 | 
165 | [convolutional]          
166 | batch_normalize=1
167 | filters=128
168 | size=1
169 | stride=1
170 | pad=1
171 | activation=leaky
172 | 
173 | [route]                  
174 | layers = -6,-1
175 | 
176 | [maxpool]                
177 | size=2
178 | stride=2
179 | 
180 | [convolutional]          
181 | batch_normalize=1
182 | filters=256
183 | size=3
184 | stride=1
185 | pad=1
186 | activation=leaky
187 | 
188 | [route]                  
189 | layers=-1
190 | #groups=2
191 | #group_id=1
192 | 
193 | [convolutional]          
194 | batch_normalize=1
195 | filters=128
196 | size=3
197 | stride=1
198 | pad=1
199 | activation=leaky
200 | 
201 | [convolutional]          
202 | batch_normalize=1
203 | filters=128
204 | size=3
205 | stride=1
206 | pad=1
207 | activation=leaky
208 | 
209 | [route]                  
210 | layers = -1,-2
211 | 
212 | [convolutional]          
213 | batch_normalize=1
214 | filters=256
215 | size=1
216 | stride=1
217 | pad=1
218 | activation=leaky
219 | 
220 | [route]                  
221 | layers = -6,-1
222 | 
223 | [maxpool]                
224 | size=2
225 | stride=2
226 | 
227 | [convolutional]          
228 | batch_normalize=1
229 | filters=512
230 | size=3
231 | stride=1
232 | pad=1
233 | activation=leaky
234 | 
235 | ##################################
236 | 
237 | [convolutional]          
238 | batch_normalize=1
239 | filters=256
240 | size=1
241 | stride=1
242 | pad=1
243 | activation=leaky
244 | 
245 | [convolutional]          
246 | batch_normalize=1
247 | filters=512
248 | size=3
249 | stride=1
250 | pad=1
251 | activation=leaky
252 | 
253 | [convolutional]          
254 | size=1
255 | stride=1
256 | pad=1
257 | filters=30
258 | activation=linear
259 | 
260 | [yolo]                   
261 | mask = 6,7,8
262 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
263 | classes=5
264 | num=9
265 | jitter=.3
266 | ignore_thresh = .7
267 | truth_thresh = 1
268 | scale_x_y = 1.05
269 | iou_thresh=0.213
270 | cls_normalizer=1.0
271 | iou_normalizer=0.07
272 | iou_loss=ciou
273 | nms_kind=diounms
274 | beta_nms=0.6
275 | # iou_thresh_kind=ciou
276 | 
277 | [route]                  
278 | layers = -4
279 | 
280 | [convolutional]          
281 | batch_normalize=1
282 | filters=128
283 | size=1
284 | stride=1
285 | pad=1
286 | activation=leaky
287 | 
288 | [upsample]               
289 | stride=2
290 | 
291 | [route]                  
292 | layers = -1, 30
293 | 
294 | [convolutional]          
295 | batch_normalize=1
296 | filters=256
297 | size=3
298 | stride=1
299 | pad=1
300 | activation=leaky
301 | 
302 | [convolutional]          
303 | size=1
304 | stride=1
305 | pad=1
306 | filters=30
307 | activation=linear
308 | 
309 | [yolo]                   
310 | mask = 3,4,5
311 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
312 | classes=5
313 | num=9
314 | jitter=.3
315 | ignore_thresh = .7
316 | truth_thresh = 1
317 | scale_x_y = 1.05
318 | iou_thresh=0.213
319 | cls_normalizer=1.0
320 | iou_normalizer=0.07
321 | iou_loss=ciou
322 | nms_kind=diounms
323 | beta_nms=0.6
324 | # iou_thresh_kind=ciou
325 | 
326 | [route]                  
327 | layers = -3
328 | 
329 | [convolutional]          
330 | batch_normalize=1
331 | filters=64
332 | size=1
333 | stride=1
334 | pad=1
335 | activation=leaky
336 | 
337 | [upsample]               
338 | stride=2
339 | 
340 | [route]                  
341 | layers = -1, 22
342 | 
343 | [convolutional]          
344 | batch_normalize=1
345 | filters=128
346 | size=3
347 | stride=1
348 | pad=1
349 | activation=leaky
350 | 
351 | [convolutional]          
352 | size=1
353 | stride=1
354 | pad=1
355 | filters=30
356 | activation=linear
357 | 
358 | [yolo]                   
359 | mask = 0,1,2
360 | anchors = 12, 16, 19, 36, 40, 28, 36, 75, 76, 55, 72, 146, 142, 110, 192, 243, 459, 401
361 | classes=5
362 | num=9
363 | jitter=.3
364 | ignore_thresh = .7
365 | truth_thresh = 1
366 | scale_x_y = 1.05
367 | iou_thresh=0.213
368 | cls_normalizer=1.0
369 | iou_normalizer=0.07
370 | iou_loss=ciou
371 | nms_kind=diounms
372 | beta_nms=0.6
373 | #iou_thresh_kind=ciou
374 | 
375 | [route]                                    
376 | layers=-17                  
377 | 
378 | [convolutional]          
379 | size=1
380 | stride=1
381 | pad=1
382 | filters=128
383 | activation=linear 
384 | 
385 | [route]                  
386 | layers=-12               
387 | 
388 | [convolutional]          
389 | size=1
390 | stride=1
391 | pad=1
392 | filters=128
393 | activation=linear 
394 | 
395 | [route]                  
396 | layers=-7                
397 | 
398 | [convolutional]          
399 | size=1
400 | stride=1
401 | pad=1
402 | filters=128
403 | activation=linear 
404 | 


--------------------------------------------------------------------------------