├── .gitignore ├── LICENSE ├── README.md ├── assets ├── result_1.jpg ├── running_1.jpg └── running_2.jpg ├── evaluate ├── APMRToolkits │ ├── __init__.py │ ├── database.py │ └── image.py ├── JIToolkits │ ├── JI_tools.py │ └── matching.py ├── compute_APMR.py ├── compute_JI.py └── run_eval.sh ├── hubconf.py ├── lib ├── backbone │ ├── fpn.py │ └── resnet50.py ├── det_opr │ ├── anchors_generator.py │ ├── bbox_opr.py │ ├── cascade_roi_target.py │ ├── find_top_rpn_proposals.py │ ├── fpn_anchor_target.py │ ├── fpn_roi_target.py │ ├── loss_opr.py │ └── utils.py ├── layers │ ├── batch_norm.py │ ├── gpu_nms │ │ ├── compile.md │ │ └── nms.cu │ ├── lib_nms.so │ ├── nms.py │ ├── roi_pool.py │ └── setup.sh └── module │ └── rpn.py ├── megvii ├── cascade.rcnn │ └── megvii │ │ ├── res50.rcnn.double.heads.one.stage.baseline │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ ├── train_net.py │ │ └── visulize_json.py │ │ ├── res50.rcnn.double.heads.two.stages │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ ├── train_net.py │ │ └── visulize_json.py │ │ ├── res50.rcnn.one.head.one.stage.baseline │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ ├── testing.py │ │ ├── train_net.py │ │ └── visulize_json.py │ │ ├── res50.rcnn.one.head.two.stages.refinement │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ ├── train_net.py │ │ └── visulize_json.py │ │ └── res50.rcnn.one.head.two.stages │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ ├── train_net.py │ │ └── visulize_json.py ├── rcnn.emd │ └── megvii │ │ ├── res50.rcnn.double.heads.set.nms.refine.head │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ └── train_net.py │ │ ├── res50.rcnn.double.heads.set.nms.refinement │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ ├── train_net.py │ │ └── visulize_json.py │ │ └── res50.rcnn.double.heads.set.nms │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── test_net.py │ │ └── train_net.py ├── rcnn │ └── megvii │ │ └── res50.rcnn.one.head.nms │ │ ├── config.py │ │ ├── demo.py │ │ ├── network.py │ │ ├── record.txt │ │ ├── test_net.py │ │ └── train_net.py └── retinanet │ ├── res50.retinanet.fpn.double.heads.iou.inference │ ├── config.py │ ├── crowdhuman.py │ ├── demo.py │ ├── network.py │ ├── rpn_anchor_target_opr.py │ ├── test_net.py │ └── train_net.py │ ├── res50.retinanet.fpn.one.head.iou.inference.baseline.v2 │ ├── common.py │ ├── config.py │ ├── crowdhuman.py │ ├── demo.py │ ├── emd_cpu_nms.py │ ├── network.py │ ├── record.txt │ ├── test_net.py │ └── train_net.py │ ├── res50.retinanet.fpn.one.head.iou.inference.baseline │ ├── anchors_generator.py │ ├── common.py │ ├── config.py │ ├── crowdhuman.py │ ├── demo.py │ ├── network.py │ ├── record.txt │ ├── test_net.py │ └── train_net.py │ └── res50.retinanet.fpn.one.head.iou.inference │ ├── common.py │ ├── config.py │ ├── crowdhuman.py │ ├── demo.py │ ├── network.py │ ├── test_net.py │ └── train_net.py ├── model ├── cascade_emd │ ├── config.py │ ├── dataset.py │ ├── network.py │ ├── test.py │ └── train.py ├── cascade_fpn │ ├── config.py │ ├── dataset.py │ ├── inference.py │ ├── network.py │ ├── test.py │ └── train.py ├── emd_refine │ ├── config.py │ ├── dataset.py │ ├── inference.py │ ├── network.py │ ├── test.py │ └── train.py ├── emd_simple │ ├── config.py │ ├── dataset.py │ ├── inference.py │ ├── network.py │ ├── test.py │ └── train.py └── fpn_baseline │ ├── config.py │ ├── dataset.py │ ├── inference.py │ ├── network.py │ ├── test.py │ └── train.py ├── tools ├── common.py └── visulize_json.py └── utils ├── box.py ├── common.py ├── detToolkits ├── .gitignore ├── detools │ ├── __init__.py │ ├── box.py │ ├── database.py │ ├── evaluator.py │ └── image.py ├── eval.py └── eval_csv.py ├── detbox.py ├── draw.py ├── infrastructure.py ├── matching.py ├── misc_utils.py ├── nms ├── __init__.py ├── nms │ ├── __init__.py │ ├── cpu_nms.c │ ├── cpu_nms.pyx │ ├── gpu_nms.cpp │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ └── gpu_nms_kernel.cu ├── nms_opr.py └── setup.py ├── nms_wrapper.py ├── py_cpu_nms.py ├── set_nms_utils.py └── visual_utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.so 2 | .DS_Store 3 | *.sh 4 | *.md 5 | *.pth 6 | -------------------------------------------------------------------------------- /assets/result_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-model/CrowdDetection/9786f58869a55af3e0b51fc78f8638a825dae4a2/assets/result_1.jpg -------------------------------------------------------------------------------- /assets/running_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-model/CrowdDetection/9786f58869a55af3e0b51fc78f8638a825dae4a2/assets/running_1.jpg -------------------------------------------------------------------------------- /assets/running_2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-model/CrowdDetection/9786f58869a55af3e0b51fc78f8638a825dae4a2/assets/running_2.jpg -------------------------------------------------------------------------------- /evaluate/APMRToolkits/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf8 -*- 2 | __author__ = 'jyn' 3 | __email__ = 'jyn@megvii.com' 4 | 5 | from .image import * 6 | from .database import * 7 | -------------------------------------------------------------------------------- /evaluate/APMRToolkits/database.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from .image import * 5 | 6 | PERSON_CLASSES = ['background', 'person'] 7 | # DBBase 8 | class Database(object): 9 | def __init__(self, gtpath=None, dtpath=None, body_key=None, head_key=None, mode=0): 10 | """ 11 | mode=0: only body; mode=1: only head 12 | """ 13 | self.images = dict() 14 | self.eval_mode = mode 15 | self.loadData(gtpath, body_key, head_key, True) 16 | self.loadData(dtpath, body_key, head_key, False) 17 | 18 | self._ignNum = sum([self.images[i]._ignNum for i in self.images]) 19 | self._gtNum = sum([self.images[i]._gtNum for i in self.images]) 20 | self._imageNum = len(self.images) 21 | self.scorelist = None 22 | 23 | def loadData(self, fpath, body_key=None, head_key=None, if_gt=True): 24 | assert os.path.isfile(fpath), fpath + " does not exist!" 25 | with open(fpath, "r") as f: 26 | lines = f.readlines() 27 | records = [json.loads(line.strip('\n')) for line in lines] 28 | if if_gt: 29 | for record in records: 30 | self.images[record["ID"]] = Image(self.eval_mode) 31 | self.images[record["ID"]].load(record, body_key, head_key, PERSON_CLASSES, True) 32 | else: 33 | for record in records: 34 | self.images[record["ID"]].load(record, body_key, head_key, PERSON_CLASSES, False) 35 | self.images[record["ID"]].clip_all_boader() 36 | 37 | def compare(self, thres=0.5, matching=None): 38 | """ 39 | match the detection results with the groundtruth in the whole database 40 | """ 41 | assert matching is None or matching == "VOC", matching 42 | scorelist = list() 43 | for ID in self.images: 44 | if matching == "VOC": 45 | result = self.images[ID].compare_voc(thres) 46 | else: 47 | result = self.images[ID].compare_caltech(thres) 48 | scorelist.extend(result) 49 | # In the descending sort of dtbox score. 50 | scorelist.sort(key=lambda x: x[0][-1], reverse=True) 51 | self.scorelist = scorelist 52 | 53 | def eval_MR(self, ref="CALTECH_-2"): 54 | """ 55 | evaluate by Caltech-style log-average miss rate 56 | ref: str - "CALTECH_-2"/"CALTECH_-4" 57 | """ 58 | # find greater_than 59 | def _find_gt(lst, target): 60 | for idx, item in enumerate(lst): 61 | if item >= target: 62 | return idx 63 | return len(lst)-1 64 | 65 | assert ref == "CALTECH_-2" or ref == "CALTECH_-4", ref 66 | if ref == "CALTECH_-2": 67 | # CALTECH_MRREF_2: anchor points (from 10^-2 to 1) as in P.Dollar's paper 68 | ref = [0.0100, 0.0178, 0.03160, 0.0562, 0.1000, 0.1778, 0.3162, 0.5623, 1.000] 69 | else: 70 | # CALTECH_MRREF_4: anchor points (from 10^-4 to 1) as in S.Zhang's paper 71 | ref = [0.0001, 0.0003, 0.00100, 0.0032, 0.0100, 0.0316, 0.1000, 0.3162, 1.000] 72 | 73 | if self.scorelist is None: 74 | self.compare() 75 | 76 | tp, fp = 0.0, 0.0 77 | fppiX, fppiY = list(), list() 78 | for i, item in enumerate(self.scorelist): 79 | if item[1] == 1: 80 | tp += 1.0 81 | elif item[1] == 0: 82 | fp += 1.0 83 | 84 | fn = (self._gtNum - self._ignNum) - tp 85 | recall = tp / (tp + fn) 86 | precision = tp / (tp + fp) 87 | missrate = 1.0 - recall 88 | fppi = fp / self._imageNum 89 | fppiX.append(fppi) 90 | fppiY.append(missrate) 91 | 92 | score = list() 93 | for pos in ref: 94 | argmin = _find_gt(fppiX, pos) 95 | if argmin >= 0: 96 | score.append(fppiY[argmin]) 97 | score = np.array(score) 98 | MR = np.exp(np.log(score).mean()) 99 | return MR, (fppiX, fppiY) 100 | 101 | def eval_AP(self): 102 | """ 103 | :meth: evaluate by average precision 104 | """ 105 | # calculate general ap score 106 | def _calculate_map(recall, precision): 107 | assert len(recall) == len(precision) 108 | area = 0 109 | for i in range(1, len(recall)): 110 | delta_h = (precision[i-1] + precision[i]) / 2 111 | delta_w = recall[i] - recall[i-1] 112 | area += delta_w * delta_h 113 | return area 114 | 115 | tp, fp = 0.0, 0.0 116 | rpX, rpY = list(), list() 117 | total_det = len(self.scorelist) 118 | total_gt = self._gtNum - self._ignNum 119 | total_images = self._imageNum 120 | 121 | fpn = [] 122 | recalln = [] 123 | thr = [] 124 | fppi = [] 125 | for i, item in enumerate(self.scorelist): 126 | if item[1] == 1: 127 | tp += 1.0 128 | elif item[1] == 0: 129 | fp += 1.0 130 | fn = total_gt - tp 131 | recall = tp / (tp + fn) 132 | precision = tp / (tp + fp) 133 | rpX.append(recall) 134 | rpY.append(precision) 135 | fpn.append(fp) 136 | recalln.append(tp) 137 | thr.append(item[0][-1]) 138 | fppi.append(fp/total_images) 139 | 140 | AP = _calculate_map(rpX, rpY) 141 | return AP, (rpX, rpY, thr, fpn, recalln, fppi) 142 | 143 | -------------------------------------------------------------------------------- /evaluate/compute_APMR.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from APMRToolkits import * 3 | 4 | gt_path = '/data/CrowdHuman/annotation_val.odgt' 5 | dbName = 'human' 6 | def compute_APMR(dt_path, target_key=None, mode=0): 7 | database = Database(gt_path, dt_path, target_key, None, mode) 8 | database.compare() 9 | mAP,_ = database.eval_AP() 10 | mMR,_ = database.eval_MR() 11 | line = 'mAP:{:.4f}, mMR:{:.4f}.'.format(mAP, mMR) 12 | print(line) 13 | 14 | if __name__ == "__main__": 15 | parser = argparse.ArgumentParser(description='Analyze a json result file with iou match') 16 | parser.add_argument('--detfile', required=True, help='path of json result file to load') 17 | parser.add_argument('--target_key', default=None, required=True) 18 | args = parser.parse_args() 19 | compute_APMR(args.detfile, args.target_key, 0) 20 | -------------------------------------------------------------------------------- /evaluate/compute_JI.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import argparse 5 | from multiprocessing import Queue, Process 6 | 7 | from tqdm import tqdm 8 | import numpy as np 9 | 10 | from JIToolkits.JI_tools import compute_matching, get_ignores 11 | 12 | def add_path(path): 13 | if path not in sys.path: 14 | sys.path.insert(0, path) 15 | root_dir = '../' 16 | add_path(os.path.join(root_dir)) 17 | import utils.misc_utils as misc_utils 18 | 19 | gtfile = '/data/annotation_val.odgt' 20 | nr_procs = 10 21 | 22 | def evaluation_all(path, target_key): 23 | records = misc_utils.load_json_lines(path) 24 | for i in range(10): 25 | score_thr = 1e-1 * i 26 | total = len(records) 27 | stride = math.ceil(total / nr_procs) 28 | result_queue = Queue(10000) 29 | results, procs = [], [] 30 | for i in range(nr_procs): 31 | start = i*stride 32 | end = np.min([start+stride,total]) 33 | sample_data = records[start:end] 34 | p = Process(target= compute_JI_with_ignore, args=(result_queue, sample_data, score_thr, target_key)) 35 | p.start() 36 | procs.append(p) 37 | tqdm.monitor_interval = 0 38 | pbar = tqdm(total=total, leave = False, ascii = True) 39 | for i in range(total): 40 | t = result_queue.get() 41 | results.append(t) 42 | pbar.update(1) 43 | for p in procs: 44 | p.join() 45 | pbar.close() 46 | line, mean_ratio = gather(results) 47 | line = 'score_thr:{:.1f}, {}'.format(score_thr, line) 48 | print(line) 49 | 50 | def compute_JI_with_ignore(result_queue, records, score_thr, target_key, bm_thresh=0.5): 51 | for record in records: 52 | gt_boxes = misc_utils.load_bboxes(record, 'gtboxes', target_key, 'tag') 53 | gt_boxes[:,2:4] += gt_boxes[:,:2] 54 | gt_boxes = misc_utils.clip_boundary(gt_boxes, record['height'], record['width']) 55 | dt_boxes = misc_utils.load_bboxes(record, 'dtboxes', target_key, 'score') 56 | dt_boxes[:,2:4] += dt_boxes[:,:2] 57 | dt_boxes = misc_utils.clip_boundary(dt_boxes, record['height'], record['width']) 58 | keep = dt_boxes[:, -1] > score_thr 59 | dt_boxes = dt_boxes[keep][:, :-1] 60 | 61 | gt_tag = np.array(gt_boxes[:,-1]!=-1) 62 | matches = compute_matching(dt_boxes, gt_boxes[gt_tag, :4], bm_thresh) 63 | # get the unmatched_indices 64 | matched_indices = np.array([j for (j,_) in matches]) 65 | unmatched_indices = list(set(np.arange(dt_boxes.shape[0])) - set(matched_indices)) 66 | num_ignore_dt = get_ignores(dt_boxes[unmatched_indices], gt_boxes[~gt_tag, :4], bm_thresh) 67 | matched_indices = np.array([j for (_,j) in matches]) 68 | unmatched_indices = list(set(np.arange(gt_boxes[gt_tag].shape[0])) - set(matched_indices)) 69 | num_ignore_gt = get_ignores(gt_boxes[gt_tag][unmatched_indices], gt_boxes[~gt_tag, :4], bm_thresh) 70 | # compurte results 71 | eps = 1e-6 72 | k = len(matches) 73 | m = gt_tag.sum() - num_ignore_gt 74 | n = dt_boxes.shape[0] - num_ignore_dt 75 | ratio = k / (m + n -k + eps) 76 | recall = k / (m + eps) 77 | cover = k / (n + eps) 78 | noise = 1 - cover 79 | result_dict = dict(ratio = ratio, recall = recall, cover = cover, 80 | noise = noise, k = k, m = m, n = n) 81 | result_queue.put_nowait(result_dict) 82 | 83 | def gather(results): 84 | assert len(results) 85 | img_num = 0 86 | for result in results: 87 | if result['n'] != 0 or result['m'] != 0: 88 | img_num += 1 89 | mean_ratio = np.sum([rb['ratio'] for rb in results]) / img_num 90 | mean_cover = np.sum([rb['cover'] for rb in results]) / img_num 91 | mean_recall = np.sum([rb['recall'] for rb in results]) / img_num 92 | mean_noise = 1 - mean_cover 93 | valids = np.sum([rb['k'] for rb in results]) 94 | total = np.sum([rb['n'] for rb in results]) 95 | gtn = np.sum([rb['m'] for rb in results]) 96 | 97 | #line = 'mean_ratio:{:.4f}, mean_cover:{:.4f}, mean_recall:{:.4f}, mean_noise:{:.4f}, valids:{}, total:{}, gtn:{}'.format( 98 | # mean_ratio, mean_cover, mean_recall, mean_noise, valids, total, gtn) 99 | line = 'mean_ratio:{:.4f}, valids:{}, total:{}, gtn:{}'.format( 100 | mean_ratio, valids, total, gtn) 101 | return line, mean_ratio 102 | 103 | def common_process(func, cls_list, nr_procs): 104 | total = len(cls_list) 105 | stride = math.ceil(total / nr_procs) 106 | result_queue = Queue(10000) 107 | results, procs = [], [] 108 | for i in range(nr_procs): 109 | start = i*stride 110 | end = np.min([start+stride,total]) 111 | sample_data = cls_list[start:end] 112 | p = Process(target= func,args=(result_queue, sample_data)) 113 | p.start() 114 | procs.append(p) 115 | for i in range(total): 116 | t = result_queue.get() 117 | if t is None: 118 | continue 119 | results.append(t) 120 | for p in procs: 121 | p.join() 122 | return results 123 | 124 | if __name__ == "__main__": 125 | parser = argparse.ArgumentParser(description='Analyze a json result file with iou match') 126 | parser.add_argument('--detfile', required=True, help='path of json result file to load') 127 | parser.add_argument('--target_key', required=True) 128 | args = parser.parse_args() 129 | evaluation_all(args.detfile, args.target_key) 130 | -------------------------------------------------------------------------------- /evaluate/run_eval.sh: -------------------------------------------------------------------------------- 1 | python3 ./compute_APMR.py --detfile ../model/crowd_emd_simple/outputs/eval_dump/dump-30.json --target_key 'box' 2 | python3 ./compute_JI.py --detfile ../model/crowd_emd_simple/outputs/eval_dump/dump-30.json --target_key 'box' 3 | python3 ./compute_APMR.py --detfile ../model/crowd_emd_refine/outputs/eval_dump/dump-30.json --target_key 'box' 4 | python3 ./compute_JI.py --detfile ../model/crowd_emd_refine/outputs/eval_dump/dump-30.json --target_key 'box' 5 | python3 ./compute_APMR.py --detfile ../model/crowd_fpn_baseline/outputs/eval_dump/dump-30.json --target_key 'box' 6 | python3 ./compute_JI.py --detfile ../model/crowd_fpn_baseline/outputs/eval_dump/dump-30.json --target_key 'box' 7 | -------------------------------------------------------------------------------- /hubconf.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | sys.path.insert(0, "lib/") 4 | sys.path.insert(0, "model/emd_simple") 5 | from network import Network as CrowdDetEMDSimple 6 | from inference import get_data 7 | sys.path.pop(0) 8 | sys.path.insert(0, "model/emd_refine") 9 | from network import Network as CrowdDetEMDRefine 10 | sys.path.pop(0) 11 | sys.path.pop(0) 12 | -------------------------------------------------------------------------------- /lib/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import megengine.functional as F 4 | import megengine.module as M 5 | 6 | class FPN(M.Module): 7 | """ 8 | This module implements Feature Pyramid Network. 9 | It creates pyramid features built on top of some input feature maps. 10 | """ 11 | def __init__(self, bottom_up): 12 | super(FPN, self).__init__() 13 | in_channels = [256, 512, 1024, 2048] 14 | fpn_dim = 256 15 | use_bias =True 16 | 17 | lateral_convs = list() 18 | output_convs = list() 19 | for idx, in_channels in enumerate(in_channels): 20 | lateral_conv = M.Conv2d( 21 | in_channels, fpn_dim, kernel_size=1, bias=use_bias) 22 | output_conv = M.Conv2d( 23 | fpn_dim, fpn_dim, kernel_size=3, stride=1, padding=1, bias=use_bias) 24 | M.init.msra_normal_(lateral_conv.weight, mode="fan_in") 25 | M.init.msra_normal_(output_conv.weight, mode="fan_in") 26 | if use_bias: 27 | M.init.fill_(lateral_conv.bias, 0) 28 | M.init.fill_(output_conv.bias, 0) 29 | lateral_convs.append(lateral_conv) 30 | output_convs.append(output_conv) 31 | 32 | self.lateral_convs = lateral_convs[::-1] 33 | self.output_convs = output_convs[::-1] 34 | self.bottom_up = bottom_up 35 | 36 | def forward(self, x): 37 | bottom_up_features = self.bottom_up(x) 38 | bottom_up_features = bottom_up_features[::-1] 39 | results = [] 40 | prev_features = self.lateral_convs[0](bottom_up_features[0]) 41 | results.append(self.output_convs[0](prev_features)) 42 | for features, lateral_conv, output_conv in zip( 43 | bottom_up_features[1:], self.lateral_convs[1:], self.output_convs[1:] 44 | ): 45 | top_down_features = F.interpolate( 46 | prev_features, scale_factor=2, mode="BILINEAR") 47 | lateral_features = lateral_conv(features) 48 | prev_features = lateral_features + top_down_features 49 | results.append(output_conv(prev_features)) 50 | # p6 51 | last_p6 = F.max_pool2d(results[0], kernel_size=1, stride=2, padding=0) 52 | results.insert(0, last_p6) 53 | return results 54 | -------------------------------------------------------------------------------- /lib/backbone/resnet50.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import megengine.functional as F 4 | import megengine.module as M 5 | 6 | from layers.batch_norm import FrozenBatchNorm2d 7 | 8 | has_bias = False 9 | 10 | class Bottleneck(M.Module): 11 | def __init__( 12 | self, in_channels, bottleneck_channels, out_channels, 13 | stride, dilation=1): 14 | super(Bottleneck, self).__init__() 15 | 16 | self.downsample = None 17 | self.downsample = ( 18 | M.Identity() 19 | if in_channels == out_channels and stride == 1 20 | else M.Sequential( 21 | M.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=has_bias), 22 | FrozenBatchNorm2d(out_channels), 23 | ) 24 | ) 25 | 26 | self.conv1 = M.Conv2d(in_channels, bottleneck_channels, kernel_size=1, stride=1, bias=has_bias) 27 | self.bn1 = FrozenBatchNorm2d(bottleneck_channels) 28 | self.conv2 = M.Conv2d(bottleneck_channels, bottleneck_channels, kernel_size=3, stride=stride, 29 | padding=dilation, bias=has_bias, dilation=dilation) 30 | self.bn2 = FrozenBatchNorm2d(bottleneck_channels) 31 | self.conv3 = M.Conv2d(bottleneck_channels, out_channels, kernel_size=1, stride=1, bias=has_bias) 32 | self.bn3 = FrozenBatchNorm2d(out_channels) 33 | 34 | def forward(self, x): 35 | identity = x 36 | 37 | x = self.conv1(x) 38 | x = self.bn1(x) 39 | x = F.relu(x) 40 | 41 | x = self.conv2(x) 42 | x = self.bn2(x) 43 | x = F.relu(x) 44 | 45 | x = self.conv3(x) 46 | x = self.bn3(x) 47 | 48 | identity = self.downsample(identity) 49 | 50 | x += identity 51 | x = F.relu(x) 52 | 53 | return x 54 | 55 | class ResNet50(M.Module): 56 | def __init__(self): 57 | super(ResNet50, self).__init__() 58 | self.conv1 = M.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=has_bias) 59 | self.bn1 = FrozenBatchNorm2d(64) 60 | self.maxpool = M.MaxPool2d(kernel_size=3, stride=2, padding=1) 61 | 62 | block_counts = [3, 4, 6, 3] 63 | bottleneck_channels_list = [64, 128, 256, 512] 64 | out_channels_list = [256, 512, 1024, 2048] 65 | stride_list = [1, 2, 2, 2] 66 | in_channels = 64 67 | self.layer1 = self._make_layer(block_counts[0], 64, 68 | bottleneck_channels_list[0], out_channels_list[0], stride_list[0]) 69 | self.layer2 = self._make_layer(block_counts[1], out_channels_list[0], 70 | bottleneck_channels_list[1], out_channels_list[1], stride_list[1]) 71 | self.layer3 = self._make_layer(block_counts[2], out_channels_list[1], 72 | bottleneck_channels_list[2], out_channels_list[2], stride_list[2]) 73 | self.layer4 = self._make_layer(block_counts[3], out_channels_list[2], 74 | bottleneck_channels_list[3], out_channels_list[3], stride_list[3]) 75 | 76 | for l in self.modules(): 77 | if isinstance(l, M.Conv2d): 78 | M.init.msra_normal_(l.weight, mode="fan_in") 79 | if has_bias: 80 | M.init.fill_(l.bias, 0) 81 | 82 | def _make_layer(self, num_blocks, in_channels, bottleneck_channels, out_channels, stride): 83 | layers = [] 84 | for _ in range(num_blocks): 85 | layers.append(Bottleneck(in_channels, bottleneck_channels, out_channels, stride)) 86 | stride = 1 87 | in_channels = out_channels 88 | return M.Sequential(*layers) 89 | 90 | def forward(self, x): 91 | outputs = [] 92 | # stem 93 | x = self.conv1(x) 94 | x = self.bn1(x) 95 | x = F.relu(x) 96 | x = self.maxpool(x) 97 | # blocks 98 | x = self.layer1(x) 99 | outputs.append(x) 100 | x = self.layer2(x) 101 | outputs.append(x) 102 | x = self.layer3(x) 103 | outputs.append(x) 104 | x = self.layer4(x) 105 | outputs.append(x) 106 | return outputs 107 | 108 | -------------------------------------------------------------------------------- /lib/det_opr/anchors_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from megengine.core import tensor 3 | import megengine.functional as F 4 | 5 | class AnchorGenerator(): 6 | """default anchor generator for fpn. 7 | This class generate anchors by feature map in level. 8 | """ 9 | def __init__(self, base_size=16, ratios=[0.5, 1, 2], 10 | base_scale=2): 11 | self.base_size = base_size 12 | self.base_scale = np.array([base_scale]) 13 | self.anchor_ratios = ratios 14 | 15 | def _whctrs(self, anchor): 16 | """convert anchor box into (w, h, ctr_x, ctr_y) 17 | """ 18 | w = anchor[:, 2] - anchor[:, 0] + 1 19 | h = anchor[:, 3] - anchor[:, 1] + 1 20 | x_ctr = anchor[:, 0] + 0.5 * (w - 1) 21 | y_ctr = anchor[:, 1] + 0.5 * (h - 1) 22 | return w, h, x_ctr, y_ctr 23 | 24 | def get_plane_anchors(self, anchor_scales: np.ndarray): 25 | """get anchors per location on feature map. 26 | The anchor number is anchor_scales x anchor_ratios 27 | """ 28 | base_anchor = tensor([0, 0, self.base_size - 1, self.base_size - 1]) 29 | base_anchor = F.add_axis(base_anchor, 0) 30 | w, h, x_ctr, y_ctr = self._whctrs(base_anchor) 31 | # ratio enumerate 32 | size = w * h 33 | size_ratios = size / self.anchor_ratios 34 | ws = size_ratios.sqrt().round() 35 | hs = (ws * self.anchor_ratios).round() 36 | # scale enumerate 37 | anchor_scales = anchor_scales[None, ...] 38 | ws = F.add_axis(ws, 1) 39 | hs = F.add_axis(hs, 1) 40 | ws = (ws * anchor_scales).reshape(-1, 1) 41 | hs = (hs * anchor_scales).reshape(-1, 1) 42 | # make anchors 43 | anchors = F.concat( 44 | [ 45 | x_ctr - 0.5 * (ws - 1), 46 | y_ctr - 0.5 * (hs - 1), 47 | x_ctr + 0.5 * (ws - 1), 48 | y_ctr + 0.5 * (hs - 1), 49 | ], 50 | axis=1, 51 | ) 52 | return anchors.astype(np.float32) 53 | 54 | def get_center_offsets(self, featmap, stride): 55 | f_shp = featmap.shape 56 | fm_height, fm_width = f_shp[-2], f_shp[-1] 57 | 58 | shift_x = F.linspace(0, fm_width - 1, fm_width) * stride 59 | shift_y = F.linspace(0, fm_height - 1, fm_height) * stride 60 | 61 | # make the mesh grid of shift_x and shift_y 62 | mesh_shape = (fm_height, fm_width) 63 | broad_shift_x = shift_x.reshape(-1, shift_x.shape[0]).broadcast(*mesh_shape) 64 | broad_shift_y = shift_y.reshape(shift_y.shape[0], -1).broadcast(*mesh_shape) 65 | 66 | flatten_shift_x = F.add_axis(broad_shift_x.reshape(-1), 1) 67 | flatten_shift_y = F.add_axis(broad_shift_y.reshape(-1), 1) 68 | 69 | shifts = F.concat( 70 | [flatten_shift_x, flatten_shift_y, flatten_shift_x, flatten_shift_y,], 71 | axis=1) 72 | return shifts 73 | 74 | def get_anchors_by_feature(self, featmap, stride): 75 | # shifts shape: [A, 4] 76 | shifts = self.get_center_offsets(featmap, stride) 77 | # plane_anchors shape: [B, 4], e.g. B=3 78 | plane_anchors = self.get_plane_anchors(self.base_scale * stride) 79 | # all_anchors = shifts.repeat(1,3) + cell_anchors.flatten() 80 | all_anchors = F.add_axis(plane_anchors, 0) + F.add_axis(shifts, 1) 81 | all_anchors = all_anchors.reshape(-1, 4) 82 | return all_anchors 83 | 84 | def __call__(self, featmap, stride): 85 | return self.get_anchors_by_feature(featmap, stride) 86 | 87 | -------------------------------------------------------------------------------- /lib/det_opr/cascade_roi_target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import megengine as mge 3 | import megengine.random as rand 4 | import megengine.functional as F 5 | 6 | import numpy as np 7 | from config import config 8 | from det_opr.utils import mask_to_inds 9 | from det_opr.bbox_opr import box_overlap_opr, bbox_transform_opr, box_overlap_ignore_opr 10 | 11 | 12 | def cascade_roi_target(rpn_rois, im_info, gt_boxes, pos_threshold=0.5, top_k=1): 13 | return_rois = [] 14 | return_labels = [] 15 | return_bbox_targets = [] 16 | # get per image proposals and gt_boxes 17 | for bid in range(config.batch_per_gpu): 18 | gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5], :] 19 | batch_inds = mge.ones((gt_boxes_perimg.shapeof()[0], 1)) * bid 20 | #if config.proposal_append_gt: 21 | gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1) 22 | batch_roi_mask = rpn_rois[:, 0] == bid 23 | batch_roi_inds = mask_to_inds(batch_roi_mask) 24 | all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0) 25 | overlaps_normal, overlaps_ignore = box_overlap_ignore_opr( 26 | all_rois[:, 1:5], gt_boxes_perimg) 27 | overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True) 28 | overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True) 29 | # gt max and indices, ignore max and indices 30 | max_overlaps_normal = overlaps_normal[:, :top_k].reshape(-1) 31 | gt_assignment_normal = overlaps_normal_indices[:, :top_k].reshape(-1) 32 | max_overlaps_ignore = overlaps_ignore[:, :top_k].reshape(-1) 33 | gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].reshape(-1) 34 | # cons masks 35 | ignore_assign_mask = (max_overlaps_normal < config.fg_threshold) * ( 36 | max_overlaps_ignore > max_overlaps_normal) 37 | max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask) + \ 38 | max_overlaps_ignore * ignore_assign_mask 39 | gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \ 40 | gt_assignment_ignore * ignore_assign_mask 41 | gt_assignment = gt_assignment.astype(np.int32) 42 | labels = gt_boxes_perimg.ai[gt_assignment, 4] 43 | fg_mask = (max_overlaps >= config.fg_threshold) * (1 - F.equal(labels, config.ignore_label)) 44 | bg_mask = (max_overlaps < config.bg_threshold_high) * ( 45 | max_overlaps >= config.bg_threshold_low) 46 | fg_mask = fg_mask.reshape(-1, top_k) 47 | bg_mask = bg_mask.reshape(-1, top_k) 48 | #pos_max = config.num_rois * config.fg_ratio 49 | #fg_inds_mask = _bernoulli_sample_masks(fg_mask[:, 0], pos_max, 1) 50 | #neg_max = config.num_rois - fg_inds_mask.sum() 51 | #bg_inds_mask = _bernoulli_sample_masks(bg_mask[:, 0], neg_max, 1) 52 | labels = labels * fg_mask.reshape(-1) 53 | #keep_mask = fg_inds_mask + bg_inds_mask 54 | #keep_inds = mask_to_inds(keep_mask) 55 | #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])] 56 | # labels 57 | labels = labels.reshape(-1, top_k) 58 | gt_assignment = gt_assignment.reshape(-1, top_k).reshape(-1) 59 | target_boxes = gt_boxes_perimg.ai[gt_assignment, :4] 60 | #rois = all_rois.ai[keep_inds] 61 | target_shape = (all_rois.shapeof()[0], top_k, all_rois.shapeof()[-1]) 62 | target_rois = F.add_axis(all_rois, 1).broadcast(target_shape).reshape(-1, all_rois.shapeof()[-1]) 63 | bbox_targets = bbox_transform_opr(target_rois[:, 1:5], target_boxes) 64 | if config.rcnn_bbox_normalize_targets: 65 | std_opr = mge.tensor(config.bbox_normalize_stds[None, :]) 66 | mean_opr = mge.tensor(config.bbox_normalize_means[None, :]) 67 | minus_opr = mean_opr / std_opr 68 | bbox_targets = bbox_targets / std_opr - minus_opr 69 | bbox_targets = bbox_targets.reshape(-1, top_k * 4) 70 | return_rois.append(all_rois) 71 | return_labels.append(labels) 72 | return_bbox_targets.append(bbox_targets) 73 | if config.batch_per_gpu == 1: 74 | return F.zero_grad(all_rois), F.zero_grad(labels), F.zero_grad(bbox_targets) 75 | else: 76 | return_rois = F.concat(return_rois, axis=0) 77 | return_labels = F.concat(return_labels, axis=0) 78 | return_bbox_targets = F.concat(return_bbox_targets, axis=0) 79 | return F.zero_grad(return_rois), F.zero_grad(return_labels), F.zero_grad(return_bbox_targets) -------------------------------------------------------------------------------- /lib/det_opr/find_top_rpn_proposals.py: -------------------------------------------------------------------------------- 1 | import megengine as mge 2 | import megengine.functional as F 3 | from megengine.core import tensor 4 | 5 | from layers.nms import gpu_nms 6 | from config import config 7 | from det_opr.bbox_opr import bbox_transform_inv_opr, clip_boxes_opr, \ 8 | filter_boxes_opr 9 | 10 | def find_top_rpn_proposals(is_train, rpn_bbox_offsets_list, rpn_cls_prob_list, 11 | all_anchors_list, im_info): 12 | prev_nms_top_n = config.train_prev_nms_top_n \ 13 | if is_train else config.test_prev_nms_top_n 14 | post_nms_top_n = config.train_post_nms_top_n \ 15 | if is_train else config.test_post_nms_top_n 16 | batch_per_gpu = config.batch_per_gpu if is_train else 1 17 | nms_threshold = config.rpn_nms_threshold 18 | box_min_size = config.rpn_min_box_size 19 | bbox_normalize_targets = config.rpn_bbox_normalize_targets 20 | bbox_normalize_means = config.bbox_normalize_means 21 | bbox_normalize_stds = config.bbox_normalize_stds 22 | 23 | list_size = len(rpn_bbox_offsets_list) 24 | 25 | return_rois = [] 26 | return_probs = [] 27 | for bid in range(batch_per_gpu): 28 | batch_proposals_list = [] 29 | batch_probs_list = [] 30 | for l in range(list_size): 31 | # get proposals and probs 32 | offsets = rpn_bbox_offsets_list[l][bid] \ 33 | .dimshuffle(1, 2, 0).reshape(-1, 4) 34 | if bbox_normalize_targets: 35 | std_opr = tensor(config.bbox_normalize_stds[None, :]) 36 | mean_opr = tensor(config.bbox_normalize_means[None, :]) 37 | pred_offsets = pred_offsets * std_opr 38 | pred_offsets = pred_offsets + mean_opr 39 | all_anchors = all_anchors_list[l] 40 | proposals = bbox_transform_inv_opr(all_anchors, offsets) 41 | if config.anchor_within_border: 42 | proposals = clip_boxes_opr(proposals, im_info[bid, :]) 43 | probs = rpn_cls_prob_list[l][bid] \ 44 | .dimshuffle(1,2,0).reshape(-1, 2) 45 | probs = F.softmax(probs)[:, 1] 46 | # gather the proposals and probs 47 | batch_proposals_list.append(proposals) 48 | batch_probs_list.append(probs) 49 | batch_proposals = F.concat(batch_proposals_list, axis=0) 50 | batch_probs = F.concat(batch_probs_list, axis=0) 51 | # filter the zero boxes. 52 | batch_keep_mask = filter_boxes_opr( 53 | batch_proposals, box_min_size * im_info[bid, 2]) 54 | batch_probs = batch_probs * batch_keep_mask 55 | # prev_nms_top_n 56 | num_proposals = F.minimum(prev_nms_top_n, batch_probs.shapeof()[0]) 57 | batch_probs, idx = F.argsort(batch_probs, descending=True) 58 | batch_probs = batch_probs[:num_proposals].reshape(-1,1) 59 | topk_idx = idx[:num_proposals].reshape(-1) 60 | batch_proposals = batch_proposals.ai[topk_idx] 61 | batch_rois = F.concat([batch_proposals, batch_probs], axis=1) 62 | # For each image, run a total-level NMS, and choose topk results. 63 | keep_inds = gpu_nms(batch_rois, nms_threshold, post_nms_top_n) 64 | batch_rois = batch_rois.ai[keep_inds] 65 | batch_probs = batch_rois[:, -1] 66 | # cons the rois 67 | batch_inds = mge.ones((batch_rois.shapeof()[0], 1)) * bid 68 | batch_rois = F.concat([batch_inds, batch_rois[:, :-1]], axis=1) 69 | return_rois.append(batch_rois) 70 | return_probs.append(batch_probs) 71 | 72 | if batch_per_gpu == 1: 73 | return batch_rois, batch_probs 74 | else: 75 | concated_rois = F.concat(return_rois, axis=0) 76 | concated_probs = F.concat(return_probs, axis=0) 77 | return concated_rois, concated_probs 78 | -------------------------------------------------------------------------------- /lib/det_opr/fpn_roi_target.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import megengine as mge 3 | import megengine.random as rand 4 | import megengine.functional as F 5 | 6 | import numpy as np 7 | from config import config 8 | from det_opr.utils import mask_to_inds 9 | from det_opr.bbox_opr import box_overlap_opr, bbox_transform_opr, box_overlap_ignore_opr 10 | 11 | 12 | def fpn_roi_target(rpn_rois, im_info, gt_boxes, top_k=1): 13 | return_rois = [] 14 | return_labels = [] 15 | return_bbox_targets = [] 16 | # get per image proposals and gt_boxes 17 | for bid in range(config.batch_per_gpu): 18 | gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5], :] 19 | batch_inds = mge.ones((gt_boxes_perimg.shapeof()[0], 1)) * bid 20 | #if config.proposal_append_gt: 21 | gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1) 22 | batch_roi_mask = rpn_rois[:, 0] == bid 23 | batch_roi_inds = mask_to_inds(batch_roi_mask) 24 | all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0) 25 | overlaps_normal, overlaps_ignore = box_overlap_ignore_opr( 26 | all_rois[:, 1:5], gt_boxes_perimg) 27 | overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True) 28 | overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True) 29 | # gt max and indices, ignore max and indices 30 | max_overlaps_normal = overlaps_normal[:, :top_k].reshape(-1) 31 | gt_assignment_normal = overlaps_normal_indices[:, :top_k].reshape(-1) 32 | max_overlaps_ignore = overlaps_ignore[:, :top_k].reshape(-1) 33 | gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].reshape(-1) 34 | # cons masks 35 | ignore_assign_mask = (max_overlaps_normal < config.fg_threshold) * ( 36 | max_overlaps_ignore > max_overlaps_normal) 37 | max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask) + \ 38 | max_overlaps_ignore * ignore_assign_mask 39 | gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \ 40 | gt_assignment_ignore * ignore_assign_mask 41 | gt_assignment = gt_assignment.astype(np.int32) 42 | labels = gt_boxes_perimg.ai[gt_assignment, 4] 43 | fg_mask = (max_overlaps >= config.fg_threshold) * (1 - F.equal(labels, config.ignore_label)) 44 | bg_mask = (max_overlaps < config.bg_threshold_high) * ( 45 | max_overlaps >= config.bg_threshold_low) 46 | fg_mask = fg_mask.reshape(-1, top_k) 47 | bg_mask = bg_mask.reshape(-1, top_k) 48 | pos_max = config.num_rois * config.fg_ratio 49 | fg_inds_mask = _bernoulli_sample_masks(fg_mask[:, 0], pos_max, 1) 50 | neg_max = config.num_rois - fg_inds_mask.sum() 51 | bg_inds_mask = _bernoulli_sample_masks(bg_mask[:, 0], neg_max, 1) 52 | labels = labels * fg_mask.reshape(-1) 53 | keep_mask = fg_inds_mask + bg_inds_mask 54 | keep_inds = mask_to_inds(keep_mask) 55 | #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])] 56 | # labels 57 | labels = labels.reshape(-1, top_k).ai[keep_inds] 58 | gt_assignment = gt_assignment.reshape(-1, top_k).ai[keep_inds].reshape(-1) 59 | target_boxes = gt_boxes_perimg.ai[gt_assignment, :4] 60 | rois = all_rois.ai[keep_inds] 61 | target_shape = (rois.shapeof()[0], top_k, rois.shapeof()[-1]) 62 | target_rois = F.add_axis(rois, 1).broadcast(target_shape).reshape(-1, rois.shapeof()[-1]) 63 | bbox_targets = bbox_transform_opr(target_rois[:, 1:5], target_boxes) 64 | if config.rcnn_bbox_normalize_targets: 65 | std_opr = mge.tensor(config.bbox_normalize_stds[None, :]) 66 | mean_opr = mge.tensor(config.bbox_normalize_means[None, :]) 67 | minus_opr = mean_opr / std_opr 68 | bbox_targets = bbox_targets / std_opr - minus_opr 69 | bbox_targets = bbox_targets.reshape(-1, top_k * 4) 70 | return_rois.append(rois) 71 | return_labels.append(labels) 72 | return_bbox_targets.append(bbox_targets) 73 | if config.batch_per_gpu == 1: 74 | return F.zero_grad(rois), F.zero_grad(labels), F.zero_grad(bbox_targets) 75 | else: 76 | return_rois = F.concat(return_rois, axis=0) 77 | return_labels = F.concat(return_labels, axis=0) 78 | return_bbox_targets = F.concat(return_bbox_targets, axis=0) 79 | return F.zero_grad(return_rois), F.zero_grad(return_labels), F.zero_grad(return_bbox_targets) 80 | 81 | def _bernoulli_sample_masks(masks, num_samples, sample_value): 82 | """ Using the bernoulli sampling method""" 83 | sample_mask = F.equal(masks, sample_value) 84 | num_mask = sample_mask.sum() 85 | num_final_samples = F.minimum(num_mask, num_samples) 86 | # here, we use the bernoulli probability to sample the anchors 87 | sample_prob = num_final_samples / num_mask 88 | uniform_rng = rand.uniform(sample_mask.shapeof()[0]) 89 | after_sampled_mask = (uniform_rng <= sample_prob) * sample_mask 90 | return after_sampled_mask 91 | 92 | -------------------------------------------------------------------------------- /lib/det_opr/loss_opr.py: -------------------------------------------------------------------------------- 1 | import megengine as mge 2 | import megengine.functional as F 3 | from megengine.core import Tensor 4 | 5 | def softmax_loss(pred, label, ignore_label=-1): 6 | max_pred = F.zero_grad(pred.max(axis=1, keepdims=True)) 7 | pred -= max_pred 8 | log_prob = pred - F.log(F.exp(pred).sum(axis=1, keepdims=True)) 9 | mask = 1 - F.equal(label, ignore_label) 10 | vlabel = label * mask 11 | loss = -(F.indexing_one_hot(log_prob, vlabel, 1) * mask) 12 | return loss 13 | 14 | def smooth_l1_loss(pred, target, beta: float): 15 | abs_x = F.abs(pred - target) 16 | in_mask = abs_x < beta 17 | out_mask = 1 - in_mask 18 | in_loss = 0.5 * abs_x ** 2 / beta 19 | out_loss = abs_x - 0.5 * beta 20 | loss = in_loss * in_mask + out_loss * out_mask 21 | return loss.sum(axis=1) 22 | 23 | -------------------------------------------------------------------------------- /lib/det_opr/utils.py: -------------------------------------------------------------------------------- 1 | import megengine as mge 2 | import megengine.functional as F 3 | import numpy as np 4 | 5 | from megengine.core import Tensor 6 | 7 | def get_padded_tensor( 8 | array: Tensor, multiple_number: int = 32, pad_value: float = 0 9 | ) -> Tensor: 10 | """ pad the nd-array to multiple stride of th e 11 | 12 | Args: 13 | array (Tensor): 14 | the tensor with the shape of [batch, channel, height, width] 15 | multiple_number (int): 16 | make the height and width can be divided by multiple_number 17 | pad_value (int): the value to be padded 18 | 19 | Returns: 20 | padded_array (Tensor) 21 | """ 22 | batch, chl, t_height, t_width = array.shape 23 | padded_height = ( 24 | (t_height + multiple_number - 1) // multiple_number * multiple_number 25 | ) 26 | padded_width = (t_width + multiple_number - 1) // multiple_number * multiple_number 27 | 28 | padded_array = ( 29 | mge.ones( 30 | F.concat([batch, chl, padded_height, padded_width], axis=0), 31 | dtype=np.float32, 32 | ) 33 | * pad_value 34 | ) 35 | 36 | ndim = array.ndim 37 | if ndim == 4: 38 | padded_array = padded_array.set_subtensor(array)[:, :, :t_height, :t_width] 39 | elif ndim == 3: 40 | padded_array = padded_array.set_subtensor(array)[:, :t_height, :t_width] 41 | else: 42 | raise Exception("Not supported tensor dim: %d" % ndim) 43 | return padded_array 44 | 45 | from megengine.core.tensor import wrap_io_tensor 46 | import megengine._internal as mgb 47 | @wrap_io_tensor 48 | def cond_take(data, mask, **kwargs): 49 | return mgb.opr.cond_take(data, mask, **kwargs) 50 | 51 | def mask_to_inds(mask): 52 | _, inds = cond_take(mask, mask, mode=mgb.opr_param_defs.CondTake.Mode.EQ, val=1) 53 | return F.zero_grad(inds) 54 | -------------------------------------------------------------------------------- /lib/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import megengine.module as M 3 | from megengine.core import Buffer 4 | 5 | class FrozenBatchNorm2d(M.Module): 6 | """ 7 | BatchNorm2d, which the weight, bias, running_mean, running_var 8 | are immutable. 9 | """ 10 | def __init__(self, num_features, eps=1e-5): 11 | super().__init__() 12 | self.eps = eps 13 | self.weight = Buffer(np.ones(num_features, dtype=np.float32)) 14 | self.bias = Buffer(np.zeros(num_features, dtype=np.float32)) 15 | self.running_mean = Buffer(np.zeros((1, num_features, 1, 1), dtype=np.float32)) 16 | self.running_var = Buffer(np.ones((1, num_features, 1, 1), dtype=np.float32)) 17 | def forward(self, x): 18 | scale = self.weight.reshape(1, -1, 1, 1) * (1.0 / (self.running_var + self.eps).sqrt()) 19 | bias = self.bias.reshape(1, -1, 1, 1) - self.running_mean * scale 20 | return x * scale + bias 21 | 22 | -------------------------------------------------------------------------------- /lib/layers/gpu_nms/compile.md: -------------------------------------------------------------------------------- 1 | nvcc -I /usr/local/lib/python3.6/dist-packages/megengine/_internal/include -shared -o lib_nms.so -Xcompiler "-fno-strict-aliasing -fPIC" nms.cu 2 | -------------------------------------------------------------------------------- /lib/layers/lib_nms.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/megvii-model/CrowdDetection/9786f58869a55af3e0b51fc78f8638a825dae4a2/lib/layers/lib_nms.so -------------------------------------------------------------------------------- /lib/layers/nms.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env mdl 2 | # This file will seal the nms opr within a better way than lib_nms 3 | import ctypes 4 | import struct, os 5 | from megengine._internal.craniotome import CraniotomeBase 6 | from megengine.core.tensor import wrap_io_tensor 7 | import numpy as np 8 | import traceback 9 | import warnings 10 | 11 | try: 12 | _so_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'lib_nms.so') 13 | _so_lib = ctypes.CDLL(_so_path) 14 | 15 | _TYPE_POINTER = ctypes.c_void_p 16 | _TYPE_POINTER = ctypes.c_void_p 17 | _TYPE_INT = ctypes.c_int32 18 | _TYPE_FLOAT = ctypes.c_float 19 | 20 | _so_lib.NMSForwardGpu.argtypes = [_TYPE_POINTER, _TYPE_POINTER, _TYPE_POINTER, _TYPE_POINTER, _TYPE_FLOAT, _TYPE_INT, _TYPE_POINTER] 21 | _so_lib.NMSForwardGpu.restype = _TYPE_INT 22 | 23 | _so_lib.CreateHostDevice.restype = _TYPE_POINTER 24 | except: 25 | # traceback.print_exc() 26 | warnings.warn("lib_nms.so not loaded. please compile with lib/layers/setup.sh") 27 | 28 | class NMSCran(CraniotomeBase): 29 | __nr_inputs__ = 1 30 | __nr_outputs__= 3 31 | 32 | def setup(self, iou_threshold, max_output): 33 | self._iou_threshold = iou_threshold 34 | self._max_output = max_output 35 | # Load the necessary host device 36 | self._host_device = _so_lib.CreateHostDevice() 37 | 38 | def execute(self, inputs, outputs): 39 | box_tensor_ptr = inputs[0].pubapi_dev_tensor_ptr 40 | output_tensor_ptr = outputs[0].pubapi_dev_tensor_ptr 41 | output_num_tensor_ptr = outputs[1].pubapi_dev_tensor_ptr 42 | mask_tensor_ptr = outputs[2].pubapi_dev_tensor_ptr 43 | 44 | ret = _so_lib.NMSForwardGpu(box_tensor_ptr, mask_tensor_ptr, output_tensor_ptr, output_num_tensor_ptr, self._iou_threshold, self._max_output, self._host_device) 45 | 46 | def grad(self, wrt_idx, inputs, outputs, out_grad): 47 | return 0 48 | 49 | def init_output_dtype(self, input_dtypes): 50 | return [np.int32, np.int32, np.int32] 51 | 52 | def get_serialize_params(self): 53 | return ('nms', struct.pack('fi', self._iou_threshold, self._max_output)) 54 | 55 | def infer_shape(self, inp_shapes): 56 | nr_box = inp_shapes[0][0] 57 | threadsPerBlock = 64 58 | output_size = nr_box 59 | # here we compute the number of int32 used in mask_outputs. In original version, we compute the bytes only. 60 | mask_size = int(nr_box * (nr_box // threadsPerBlock + int((nr_box % threadsPerBlock) > 0)) * 8 / 4) 61 | return [[output_size], [1], [mask_size]] 62 | 63 | @wrap_io_tensor 64 | def gpu_nms(box, iou_threshold, max_output): 65 | keep, num, mask = NMSCran.make(box, iou_threshold=iou_threshold, max_output=max_output) 66 | return keep[:num] 67 | -------------------------------------------------------------------------------- /lib/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import numpy as np 4 | import megengine as mge 5 | import megengine.functional as F 6 | 7 | from det_opr.utils import mask_to_inds 8 | 9 | def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', 10 | labels=None, bbox_targets=None): 11 | assert len(stride) == len(rpn_fms) 12 | canonical_level = 4 13 | canonical_box_size = 224 14 | min_level = math.log2(stride[0]) 15 | max_level = math.log2(stride[-1]) 16 | 17 | num_fms = len(rpn_fms) 18 | box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])) 19 | level_assignments = F.floor( 20 | canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2) 21 | ) 22 | level_assignments = F.minimum(level_assignments, max_level) 23 | level_assignments = F.maximum(level_assignments, min_level) 24 | level_assignments = level_assignments - min_level 25 | available_masks = F.concat( 26 | [mge.ones(level_assignments.shapeof()[0]), mge.zeros(num_fms)], axis=0) 27 | level_assignments = F.concat([level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0) 28 | rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof()[-1]))], axis=0) 29 | if labels is not None: 30 | labels = F.concat([labels, mge.ones((num_fms, labels.shapeof()[-1]))], axis=0) 31 | bbox_targets = F.concat([bbox_targets, mge.zeros((num_fms, bbox_targets.shapeof()[-1]))], axis=0) 32 | pool_list, inds_list = [], [] 33 | for i in range(len(rpn_fms)): 34 | mask = level_assignments == i 35 | inds = mask_to_inds(mask) 36 | rois_fm = rois.ai[inds] 37 | if roi_type == 'roi_pool': 38 | pool_fm = F.roi_pooling( 39 | rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0/stride[i]) 40 | elif roi_type == 'roi_align': 41 | pool_fm = F.roi_align( 42 | rpn_fms[i], rois_fm, pool_shape, mode='average', 43 | spatial_scale=1.0/stride[i], sample_points=2, aligned=True) 44 | pool_list.append(pool_fm) 45 | inds_list.append(inds) 46 | 47 | fm_order = F.concat(inds_list, axis=0) 48 | pool_feature = F.concat(pool_list, axis=0) 49 | 50 | ordered_available_masks = available_masks.ai[fm_order] 51 | available_inds = mask_to_inds(ordered_available_masks) 52 | pool_feature = pool_feature.ai[available_inds] 53 | rois = rois.ai[fm_order, :].ai[available_inds, :] 54 | if labels is not None: 55 | labels = labels.ai[fm_order].ai[available_inds] 56 | bbox_targets = bbox_targets.ai[fm_order, :].ai[available_inds, :] 57 | return pool_feature, rois, F.zero_grad(labels), F.zero_grad(bbox_targets) 58 | else: 59 | return pool_feature, rois, None, None 60 | 61 | -------------------------------------------------------------------------------- /lib/layers/setup.sh: -------------------------------------------------------------------------------- 1 | nvcc -I ~/.local/lib/python3.6/site-packages/megengine/_internal/include -shared -o lib_nms.so -Xcompiler "-fno-strict-aliasing -fPIC" ./gpu_nms/nms.cu 2 | -------------------------------------------------------------------------------- /lib/module/rpn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import megengine.functional as F 3 | import megengine.module as M 4 | 5 | from config import config 6 | from det_opr.utils import mask_to_inds 7 | from det_opr.anchors_generator import AnchorGenerator 8 | from det_opr.find_top_rpn_proposals import find_top_rpn_proposals 9 | from det_opr.fpn_anchor_target import fpn_anchor_target, fpn_rpn_reshape 10 | from det_opr.loss_opr import softmax_loss, smooth_l1_loss 11 | 12 | class RPN(M.Module): 13 | def __init__(self, rpn_channel=256): 14 | super().__init__() 15 | self.anchors_generator = AnchorGenerator( 16 | config.anchor_base_size, 17 | config.anchor_aspect_ratios, 18 | config.anchor_base_scale) 19 | self.rpn_conv = M.Conv2d(256, rpn_channel, kernel_size=3, stride=1, padding=1) 20 | self.rpn_cls_score = M.Conv2d(rpn_channel, config.num_cell_anchors * 2, kernel_size=1, stride=1) 21 | self.rpn_bbox_offsets = M.Conv2d(rpn_channel, config.num_cell_anchors * 4, kernel_size=1, stride=1) 22 | 23 | for l in [self.rpn_conv, self.rpn_cls_score, self.rpn_bbox_offsets]: 24 | M.init.normal_(l.weight, std=0.01) 25 | M.init.fill_(l.bias, 0) 26 | 27 | def forward(self, features, im_info, boxes=None): 28 | # prediction 29 | pred_cls_score_list = [] 30 | pred_bbox_offsets_list = [] 31 | for x in features: 32 | t = F.relu(self.rpn_conv(x)) 33 | pred_cls_score_list.append(self.rpn_cls_score(t)) 34 | pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) 35 | # get anchors 36 | all_anchors_list = [] 37 | fm_stride = 2 ** (len(features) + 1) 38 | for fm in features: 39 | layer_anchors = self.anchors_generator(fm, fm_stride) 40 | fm_stride = fm_stride // 2 41 | all_anchors_list.append(layer_anchors) 42 | # sample from the predictions 43 | rpn_rois, rpn_probs = find_top_rpn_proposals( 44 | self.training, pred_bbox_offsets_list, pred_cls_score_list, 45 | all_anchors_list, im_info) 46 | 47 | if self.training: 48 | rpn_labels, rpn_bbox_targets = fpn_anchor_target( 49 | boxes, im_info, all_anchors_list) 50 | #rpn_labels = rpn_labels.astype(np.int32) 51 | pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( 52 | pred_cls_score_list, pred_bbox_offsets_list) 53 | 54 | # rpn loss 55 | valid_masks = rpn_labels >= 0 56 | valid_inds = mask_to_inds(valid_masks) 57 | objectness_loss = softmax_loss( 58 | pred_cls_score.ai[valid_inds], 59 | rpn_labels.ai[valid_inds]) 60 | #objectness_loss = objectness_loss * valid_masks 61 | 62 | pos_masks = rpn_labels > 0 63 | localization_loss = smooth_l1_loss( 64 | pred_bbox_offsets, 65 | rpn_bbox_targets, 66 | config.rpn_smooth_l1_beta) 67 | localization_loss = localization_loss * pos_masks 68 | normalizer = 1.0 / (valid_masks.sum()) 69 | loss_rpn_cls = objectness_loss.sum() * normalizer 70 | loss_rpn_loc = localization_loss.sum() * normalizer 71 | loss_dict = {} 72 | loss_dict['loss_rpn_cls'] = loss_rpn_cls 73 | loss_dict['loss_rpn_loc'] = loss_rpn_loc 74 | return rpn_rois, loss_dict 75 | else: 76 | return rpn_rois 77 | 78 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.double.heads.one.stage.baseline/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(osp.join(root_dir, 'util')) 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | output_dir = osp.join(root_dir, 'output', usr, 'fpn', 'human', this_model_dir) 27 | model_dir = osp.join(output_dir, 'model_dump') 28 | eval_dir = osp.join(output_dir, 'eval_dump') 29 | log_dir = output_dir 30 | logger = osp.join(output_dir, 'logger.log') 31 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 32 | # ----------data config---------- # 33 | image_mean = np.array([103.530, 116.280, 123.675]) 34 | image_std = np.array([57.375, 57.120, 58.395]) 35 | train_image_short_size = 800 36 | train_image_max_size = 1400 37 | eval_resize = True 38 | eval_image_short_size = 800 39 | eval_image_max_size = 1400 40 | seed_dataprovider = 3 41 | datadb = Crowdhuman 42 | train_source = datadb.train_source 43 | eval_source = datadb.eval_source 44 | eval_json, train_json = eval_source, train_source 45 | image_folder = datadb.image_folder 46 | imgDir = image_folder 47 | class_names = datadb.class_names 48 | num_classes = datadb.num_classes 49 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 50 | gt_boxes_name = 'fbox' 51 | 52 | # ----------train config---------- # 53 | backbone_freeze_at = 2 54 | rpn_channel = 256 55 | 56 | train_batch_per_gpu = 2 57 | momentum = 0.9 58 | weight_decay = 1e-4 59 | base_lr = 1e-3 * 1.25 60 | 61 | warm_iter = 800 62 | max_epoch = 35 63 | lr_decay = [24, 27] 64 | nr_images_epoch = 15000 65 | log_dump_interval = 1 66 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 67 | 68 | # ----------test config---------- # 69 | test_nms = 0.5 70 | test_nms_method = 'set_nms' 71 | visulize_threshold = 0.3 72 | pred_cls_threshold = 0.05 73 | nr_info_dim=6 74 | # ----------model config---------- # 75 | batch_filter_box_size = 0 76 | nr_box_dim = 5 77 | ignore_label = -1 78 | max_boxes_of_image = 500 79 | num_predictions = 2 80 | # ----------rois generator config---------- # 81 | anchor_base_size = 32 82 | anchor_base_scale = [1] 83 | anchor_aspect_ratios = [1, 2, 3] 84 | num_cell_anchors = len(anchor_aspect_ratios) 85 | anchor_within_border = False 86 | anchor_ignore_label = -1 87 | 88 | rpn_min_box_size = 2 89 | rpn_nms_threshold = 0.7 90 | train_prev_nms_top_n = 12000 91 | train_post_nms_top_n = 2000 92 | test_prev_nms_top_n = 6000 93 | test_post_nms_top_n = 1500 94 | 95 | # ----------binding&training config---------- # 96 | rpn_smooth_l1_beta = 1 97 | rcnn_smooth_l1_beta = 1 98 | 99 | num_sample_anchors = 256 100 | positive_anchor_ratio = 0.5 101 | rpn_positive_overlap = 0.7 102 | rpn_negative_overlap = 0.3 103 | rpn_bbox_normalize_targets = False 104 | 105 | num_rois = 512 106 | fg_ratio = 0.5 107 | fg_threshold = 0.5 108 | bg_threshold_high = 0.5 109 | bg_threshold_low = 0.0 110 | rcnn_bbox_normalize_targets = True 111 | bbox_normalize_means = np.array([0, 0, 0, 0]) 112 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 113 | 114 | config = Config() 115 | 116 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.double.heads.one.stage.baseline/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.double.heads.one.stage.baseline/visulize_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import argparse 5 | 6 | sys.path.insert(0, '../lib') 7 | from utils import misc_utils, visual_utils 8 | 9 | img_root = '/data/CrowdHuman/images/' 10 | def eval_all(args): 11 | # json file 12 | assert os.path.exists(args.json_file), "Wrong json path!" 13 | misc_utils.ensure_dir('outputs') 14 | records = misc_utils.load_json_lines(args.json_file)[:args.number] 15 | for record in records: 16 | dtboxes = misc_utils.load_bboxes( 17 | record, key_name='dtboxes', key_box='box', key_score='score', key_tag='tag') 18 | gtboxes = misc_utils.load_bboxes(record, 'gtboxes', 'box') 19 | dtboxes = misc_utils.xywh_to_xyxy(dtboxes) 20 | gtboxes = misc_utils.xywh_to_xyxy(gtboxes) 21 | keep = dtboxes[:, -2] > args.visual_thresh 22 | dtboxes = dtboxes[keep] 23 | len_dt = len(dtboxes) 24 | len_gt = len(gtboxes) 25 | line = "{}: dt:{}, gt:{}.".format(record['ID'], len_dt, len_gt) 26 | print(line) 27 | img_path = img_root + record['ID'] + '.png' 28 | img = misc_utils.load_img(img_path) 29 | visual_utils.draw_boxes(img, dtboxes, line_thick=1, line_color='blue') 30 | visual_utils.draw_boxes(img, gtboxes, line_thick=1, line_color='white') 31 | fpath = 'outputs/{}.png'.format(record['ID']) 32 | cv2.imwrite(fpath, img) 33 | 34 | 35 | def run_eval(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--json_file', '-f', default=None, required=True, type=str) 38 | parser.add_argument('--number', '-n', default=3, type=int) 39 | parser.add_argument('--visual_thresh', '-v', default=0.3, type=int) 40 | args = parser.parse_args() 41 | eval_all(args) 42 | 43 | if __name__ == '__main__': 44 | run_eval() 45 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.double.heads.two.stages/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(osp.join(root_dir, 'util')) 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | workspace = osp.split(osp.realpath(__file__))[0] 27 | output_dir = osp.join(root_dir, 'output', usr, 'cascade.rcnn.new', 'human', this_model_dir) 28 | model_dir = osp.join(output_dir, 'model_dump') 29 | eval_dir = osp.join(output_dir, 'eval_dump') 30 | log_dir = output_dir 31 | logger = osp.join(output_dir, 'logger.log') 32 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 33 | # ----------data config---------- # 34 | image_mean = np.array([103.530, 116.280, 123.675]) 35 | image_std = np.array([57.375, 57.120, 58.395]) 36 | train_image_short_size = 800 37 | train_image_max_size = 1400 38 | eval_resize = True 39 | eval_image_short_size = 800 40 | eval_image_max_size = 1400 41 | seed_dataprovider = 3 42 | datadb = Crowdhuman 43 | train_source = datadb.train_source 44 | eval_source = datadb.eval_source 45 | eval_json, train_json = eval_source, train_source 46 | image_folder = datadb.image_folder 47 | imgDir = image_folder 48 | class_names = datadb.class_names 49 | num_classes = datadb.num_classes 50 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 51 | gt_boxes_name = 'fbox' 52 | 53 | # ----------train config---------- # 54 | backbone_freeze_at = 2 55 | rpn_channel = 256 56 | 57 | 58 | train_batch_per_gpu = 2 59 | momentum = 0.9 60 | weight_decay = 1e-4 61 | base_lr = 1e-3 * 1.25 62 | 63 | warm_iter = 800 64 | max_epoch = 35 65 | lr_decay = [24, 27] 66 | nr_images_epoch = 15000 67 | log_dump_interval = 1 68 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 69 | 70 | # ----------test config---------- # 71 | test_nms = 0.5 72 | test_nms_method = 'set_nms' 73 | visulize_threshold = 0.3 74 | pred_cls_threshold = 0.05 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | nr_info_dim = 6 80 | ignore_label = -1 81 | max_boxes_of_image = 500 82 | num_predictions = 2 83 | # ----------rois generator config---------- # 84 | anchor_base_size = 35 85 | anchor_base_scale = [1] 86 | anchor_aspect_ratios = [1, 2, 3] 87 | num_cell_anchors = len(anchor_aspect_ratios) 88 | anchor_within_border = False 89 | anchor_ignore_label = -1 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1500 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.double.heads.two.stages/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.double.heads.two.stages/visulize_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import argparse 5 | 6 | sys.path.insert(0, '../lib') 7 | from utils import misc_utils, visual_utils 8 | 9 | img_root = '/data/CrowdHuman/images/' 10 | def eval_all(args): 11 | # json file 12 | assert os.path.exists(args.json_file), "Wrong json path!" 13 | misc_utils.ensure_dir('outputs') 14 | records = misc_utils.load_json_lines(args.json_file)[:args.number] 15 | for record in records: 16 | dtboxes = misc_utils.load_bboxes( 17 | record, key_name='dtboxes', key_box='box', key_score='score', key_tag='tag') 18 | gtboxes = misc_utils.load_bboxes(record, 'gtboxes', 'box') 19 | dtboxes = misc_utils.xywh_to_xyxy(dtboxes) 20 | gtboxes = misc_utils.xywh_to_xyxy(gtboxes) 21 | keep = dtboxes[:, -2] > args.visual_thresh 22 | dtboxes = dtboxes[keep] 23 | len_dt = len(dtboxes) 24 | len_gt = len(gtboxes) 25 | line = "{}: dt:{}, gt:{}.".format(record['ID'], len_dt, len_gt) 26 | print(line) 27 | img_path = img_root + record['ID'] + '.png' 28 | img = misc_utils.load_img(img_path) 29 | visual_utils.draw_boxes(img, dtboxes, line_thick=1, line_color='blue') 30 | visual_utils.draw_boxes(img, gtboxes, line_thick=1, line_color='white') 31 | fpath = 'outputs/{}.png'.format(record['ID']) 32 | cv2.imwrite(fpath, img) 33 | 34 | 35 | def run_eval(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--json_file', '-f', default=None, required=True, type=str) 38 | parser.add_argument('--number', '-n', default=3, type=int) 39 | parser.add_argument('--visual_thresh', '-v', default=0.3, type=int) 40 | args = parser.parse_args() 41 | eval_all(args) 42 | 43 | if __name__ == '__main__': 44 | run_eval() 45 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.one.stage.baseline/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(osp.join(root_dir, 'util')) 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | workspace = osp.split(osp.realpath(__file__))[0] 27 | output_dir = osp.join(root_dir, 'output', usr, 'cascade.fpn.new', 'human', this_model_dir) 28 | model_dir = osp.join(output_dir, 'model_dump') 29 | eval_dir = osp.join(output_dir, 'eval_dump') 30 | log_dir = output_dir 31 | logger = osp.join(output_dir, 'logger.log') 32 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 33 | # ----------data config---------- # 34 | image_mean = np.array([103.530, 116.280, 123.675]) 35 | image_std = np.array([57.375, 57.120, 58.395]) 36 | train_image_short_size = 800 37 | train_image_max_size = 1400 38 | eval_resize = True 39 | eval_image_short_size = 800 40 | eval_image_max_size = 1400 41 | seed_dataprovider = 3 42 | datadb = Crowdhuman 43 | train_source = datadb.train_source 44 | eval_source = datadb.eval_source 45 | eval_json, train_json = eval_source, train_source 46 | image_folder = datadb.image_folder 47 | imgDir = image_folder 48 | class_names = datadb.class_names 49 | num_classes = datadb.num_classes 50 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 51 | gt_boxes_name = 'fbox' 52 | 53 | # ----------train config---------- # 54 | backbone_freeze_at = 2 55 | rpn_channel = 256 56 | 57 | 58 | train_batch_per_gpu = 2 59 | momentum = 0.9 60 | weight_decay = 1e-4 61 | base_lr = 1e-3 * 1.25 62 | 63 | warm_iter = 800 64 | max_epoch = 35 65 | lr_decay = [24, 27] 66 | nr_images_epoch = 15000 67 | log_dump_interval = 1 68 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 69 | 70 | # ----------test config---------- # 71 | test_nms = 0.5 72 | test_nms_method = 'normal_nms' 73 | visulize_threshold = 0.3 74 | pred_cls_threshold = 0.05 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | nr_info_dim = 6 80 | ignore_label = -1 81 | max_boxes_of_image = 500 82 | num_predictions = 2 83 | # ----------rois generator config---------- # 84 | anchor_base_size = 32 85 | anchor_base_scale = [1] 86 | anchor_aspect_ratios = [1, 2, 3] 87 | num_cell_anchors = len(anchor_aspect_ratios) 88 | anchor_within_border = False 89 | anchor_ignore_label = -1 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1000 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.one.stage.baseline/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.one.stage.baseline/visulize_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import argparse 5 | 6 | sys.path.insert(0, '../lib') 7 | from utils import misc_utils, visual_utils 8 | 9 | img_root = '/data/CrowdHuman/images/' 10 | def eval_all(args): 11 | # json file 12 | assert os.path.exists(args.json_file), "Wrong json path!" 13 | misc_utils.ensure_dir('outputs') 14 | records = misc_utils.load_json_lines(args.json_file)[:args.number] 15 | for record in records: 16 | dtboxes = misc_utils.load_bboxes( 17 | record, key_name='dtboxes', key_box='box', key_score='score', key_tag='tag') 18 | gtboxes = misc_utils.load_bboxes(record, 'gtboxes', 'box') 19 | dtboxes = misc_utils.xywh_to_xyxy(dtboxes) 20 | gtboxes = misc_utils.xywh_to_xyxy(gtboxes) 21 | keep = dtboxes[:, -2] > args.visual_thresh 22 | dtboxes = dtboxes[keep] 23 | len_dt = len(dtboxes) 24 | len_gt = len(gtboxes) 25 | line = "{}: dt:{}, gt:{}.".format(record['ID'], len_dt, len_gt) 26 | print(line) 27 | img_path = img_root + record['ID'] + '.png' 28 | img = misc_utils.load_img(img_path) 29 | visual_utils.draw_boxes(img, dtboxes, line_thick=1, line_color='blue') 30 | visual_utils.draw_boxes(img, gtboxes, line_thick=1, line_color='white') 31 | fpath = 'outputs/{}.png'.format(record['ID']) 32 | cv2.imwrite(fpath, img) 33 | 34 | 35 | def run_eval(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--json_file', '-f', default=None, required=True, type=str) 38 | parser.add_argument('--number', '-n', default=3, type=int) 39 | parser.add_argument('--visual_thresh', '-v', default=0.3, type=int) 40 | args = parser.parse_args() 41 | eval_all(args) 42 | 43 | if __name__ == '__main__': 44 | run_eval() 45 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.two.stages.refinement/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(osp.join(root_dir, 'util')) 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | workspace = osp.split(osp.realpath(__file__))[0] 27 | output_dir = osp.join(root_dir, 'output', usr, 'cascade.fpn', 'human', this_model_dir) 28 | model_dir = osp.join(output_dir, 'model_dump') 29 | eval_dir = osp.join(output_dir, 'eval_dump') 30 | log_dir = output_dir 31 | logger = osp.join(output_dir, 'logger.log') 32 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 33 | # ----------data config---------- # 34 | image_mean = np.array([103.530, 116.280, 123.675]) 35 | image_std = np.array([57.375, 57.120, 58.395]) 36 | train_image_short_size = 800 37 | train_image_max_size = 1400 38 | eval_resize = True 39 | eval_image_short_size = 800 40 | eval_image_max_size = 1400 41 | seed_dataprovider = 3 42 | datadb = Crowdhuman 43 | train_source = datadb.train_source 44 | eval_source = datadb.eval_source 45 | eval_json, train_json = eval_source, train_source 46 | image_folder = datadb.image_folder 47 | imgDir = image_folder 48 | class_names = datadb.class_names 49 | num_classes = datadb.num_classes 50 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 51 | gt_boxes_name = 'fbox' 52 | 53 | # ----------train config---------- # 54 | backbone_freeze_at = 2 55 | rpn_channel = 256 56 | 57 | 58 | train_batch_per_gpu = 2 59 | momentum = 0.9 60 | weight_decay = 1e-4 61 | base_lr = 1e-3 * 1.25 62 | 63 | warm_iter = 800 64 | max_epoch = 35 65 | lr_decay = [24, 27] 66 | nr_images_epoch = 15000 67 | log_dump_interval = 1 68 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 69 | 70 | # ----------test config---------- # 71 | test_nms = 0.5 72 | test_nms_method = 'set_nms' 73 | visulize_threshold = 0.3 74 | pred_cls_threshold = 0.05 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | nr_info_dim = 6 80 | ignore_label = -1 81 | max_boxes_of_image = 500 82 | num_predictions = 2 83 | # ----------rois generator config---------- # 84 | anchor_base_size = 32 85 | anchor_base_scale = [1] 86 | anchor_aspect_ratios = [1, 2, 3] 87 | num_cell_anchors = len(anchor_aspect_ratios) 88 | anchor_within_border = False 89 | anchor_ignore_label = -1 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1500 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.two.stages.refinement/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.two.stages.refinement/visulize_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import argparse 5 | 6 | sys.path.insert(0, '../lib') 7 | from utils import misc_utils, visual_utils 8 | 9 | img_root = '/data/CrowdHuman/images/' 10 | def eval_all(args): 11 | # json file 12 | assert os.path.exists(args.json_file), "Wrong json path!" 13 | misc_utils.ensure_dir('outputs') 14 | records = misc_utils.load_json_lines(args.json_file)[:args.number] 15 | for record in records: 16 | dtboxes = misc_utils.load_bboxes( 17 | record, key_name='dtboxes', key_box='box', key_score='score', key_tag='tag') 18 | gtboxes = misc_utils.load_bboxes(record, 'gtboxes', 'box') 19 | dtboxes = misc_utils.xywh_to_xyxy(dtboxes) 20 | gtboxes = misc_utils.xywh_to_xyxy(gtboxes) 21 | keep = dtboxes[:, -2] > args.visual_thresh 22 | dtboxes = dtboxes[keep] 23 | len_dt = len(dtboxes) 24 | len_gt = len(gtboxes) 25 | line = "{}: dt:{}, gt:{}.".format(record['ID'], len_dt, len_gt) 26 | print(line) 27 | img_path = img_root + record['ID'] + '.png' 28 | img = misc_utils.load_img(img_path) 29 | visual_utils.draw_boxes(img, dtboxes, line_thick=1, line_color='blue') 30 | visual_utils.draw_boxes(img, gtboxes, line_thick=1, line_color='white') 31 | fpath = 'outputs/{}.png'.format(record['ID']) 32 | cv2.imwrite(fpath, img) 33 | 34 | 35 | def run_eval(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--json_file', '-f', default=None, required=True, type=str) 38 | parser.add_argument('--number', '-n', default=3, type=int) 39 | parser.add_argument('--visual_thresh', '-v', default=0.3, type=int) 40 | args = parser.parse_args() 41 | eval_all(args) 42 | 43 | if __name__ == '__main__': 44 | run_eval() 45 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.two.stages/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(osp.join(root_dir, 'util')) 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | workspace = osp.split(osp.realpath(__file__))[0] 27 | output_dir = osp.join(root_dir, 'output', usr, 'cascade.fpn', 'human', this_model_dir) 28 | output_dir = 'output' 29 | model_dir = osp.join(output_dir, 'model_dump') 30 | eval_dir = osp.join(output_dir, 'eval_dump') 31 | log_dir = output_dir 32 | logger = osp.join(output_dir, 'logger.log') 33 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 34 | # ----------data config---------- # 35 | image_mean = np.array([103.530, 116.280, 123.675]) 36 | image_std = np.array([57.375, 57.120, 58.395]) 37 | train_image_short_size = 800 38 | train_image_max_size = 1400 39 | eval_resize = True 40 | eval_image_short_size = 800 41 | eval_image_max_size = 1400 42 | seed_dataprovider = 3 43 | datadb = Crowdhuman 44 | train_source = datadb.train_source 45 | eval_source = datadb.eval_source 46 | eval_json, train_json = eval_source, train_source 47 | image_folder = datadb.image_folder 48 | imgDir = image_folder 49 | class_names = datadb.class_names 50 | num_classes = datadb.num_classes 51 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 52 | gt_boxes_name = 'fbox' 53 | 54 | # ----------train config---------- # 55 | backbone_freeze_at = 2 56 | rpn_channel = 256 57 | 58 | 59 | train_batch_per_gpu = 2 60 | momentum = 0.9 61 | weight_decay = 1e-4 62 | base_lr = 1e-3 * 1.25 63 | 64 | warm_iter = 800 65 | max_epoch = 35 66 | lr_decay = [24, 27] 67 | nr_images_epoch = 15000 68 | log_dump_interval = 1 69 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 70 | 71 | # ----------test config---------- # 72 | test_nms = 0.5 73 | test_nms_method = 'normal_nms' 74 | visulize_threshold = 0.3 75 | pred_cls_threshold = 0.05 76 | 77 | # ----------model config---------- # 78 | batch_filter_box_size = 0 79 | nr_box_dim = 5 80 | nr_info_dim = 6 81 | ignore_label = -1 82 | max_boxes_of_image = 500 83 | num_predictions = 2 84 | # ----------rois generator config---------- # 85 | anchor_base_size = 32 86 | anchor_base_scale = [1] 87 | anchor_aspect_ratios = [1, 2, 3] 88 | num_cell_anchors = len(anchor_aspect_ratios) 89 | anchor_within_border = False 90 | anchor_ignore_label = -1 91 | 92 | rpn_min_box_size = 2 93 | rpn_nms_threshold = 0.7 94 | train_prev_nms_top_n = 12000 95 | train_post_nms_top_n = 2000 96 | test_prev_nms_top_n = 6000 97 | test_post_nms_top_n = 1500 98 | 99 | # ----------binding&training config---------- # 100 | rpn_smooth_l1_beta = 1 101 | rcnn_smooth_l1_beta = 1 102 | 103 | num_sample_anchors = 256 104 | positive_anchor_ratio = 0.5 105 | rpn_positive_overlap = 0.7 106 | rpn_negative_overlap = 0.3 107 | rpn_bbox_normalize_targets = False 108 | 109 | num_rois = 512 110 | fg_ratio = 0.5 111 | fg_threshold = 0.5 112 | bg_threshold_high = 0.5 113 | bg_threshold_low = 0.0 114 | rcnn_bbox_normalize_targets = True 115 | bbox_normalize_means = np.array([0, 0, 0, 0]) 116 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 117 | 118 | config = Config() 119 | 120 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.two.stages/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/cascade.rcnn/megvii/res50.rcnn.one.head.two.stages/visulize_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import argparse 5 | 6 | sys.path.insert(0, '../lib') 7 | from utils import misc_utils, visual_utils 8 | 9 | img_root = '/data/CrowdHuman/images/' 10 | def eval_all(args): 11 | # json file 12 | assert os.path.exists(args.json_file), "Wrong json path!" 13 | misc_utils.ensure_dir('outputs') 14 | records = misc_utils.load_json_lines(args.json_file)[:args.number] 15 | for record in records: 16 | dtboxes = misc_utils.load_bboxes( 17 | record, key_name='dtboxes', key_box='box', key_score='score', key_tag='tag') 18 | gtboxes = misc_utils.load_bboxes(record, 'gtboxes', 'box') 19 | dtboxes = misc_utils.xywh_to_xyxy(dtboxes) 20 | gtboxes = misc_utils.xywh_to_xyxy(gtboxes) 21 | keep = dtboxes[:, -2] > args.visual_thresh 22 | dtboxes = dtboxes[keep] 23 | len_dt = len(dtboxes) 24 | len_gt = len(gtboxes) 25 | line = "{}: dt:{}, gt:{}.".format(record['ID'], len_dt, len_gt) 26 | print(line) 27 | img_path = img_root + record['ID'] + '.png' 28 | img = misc_utils.load_img(img_path) 29 | visual_utils.draw_boxes(img, dtboxes, line_thick=1, line_color='blue') 30 | visual_utils.draw_boxes(img, gtboxes, line_thick=1, line_color='white') 31 | fpath = 'outputs/{}.png'.format(record['ID']) 32 | cv2.imwrite(fpath, img) 33 | 34 | 35 | def run_eval(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--json_file', '-f', default=None, required=True, type=str) 38 | parser.add_argument('--number', '-n', default=3, type=int) 39 | parser.add_argument('--visual_thresh', '-v', default=0.3, type=int) 40 | args = parser.parse_args() 41 | eval_all(args) 42 | 43 | if __name__ == '__main__': 44 | run_eval() 45 | -------------------------------------------------------------------------------- /megvii/rcnn.emd/megvii/res50.rcnn.double.heads.set.nms.refine.head/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir, 'lib')) 12 | add_path(osp.join(root_dir, 'utils')) 13 | 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | output_dir = osp.join(root_dir, 'output', usr, 'fpn', 'human', this_model_dir) 27 | model_dir = osp.join(output_dir, 'model_dump') 28 | eval_dir = osp.join(output_dir, 'eval_dump') 29 | log_dir = output_dir 30 | logger = osp.join(output_dir, 'logger.log') 31 | 32 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 33 | # ----------data config---------- # 34 | image_mean = np.array([103.530, 116.280, 123.675]) 35 | image_std = np.array([57.375, 57.120, 58.395]) 36 | train_image_short_size = 800 37 | train_image_max_size = 1400 38 | eval_resize = True 39 | eval_image_short_size = 800 40 | eval_image_max_size = 1400 41 | seed_dataprovider = 3 42 | datadb = Crowdhuman 43 | train_source = datadb.train_source 44 | eval_source = datadb.eval_source 45 | eval_json, train_json = eval_source, train_source 46 | image_folder = datadb.image_folder 47 | imgDir = image_folder 48 | class_names = datadb.class_names 49 | num_classes = datadb.num_classes 50 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 51 | gt_boxes_name = 'fbox' 52 | 53 | # ----------train config---------- # 54 | backbone_freeze_at = 2 55 | rpn_channel = 256 56 | 57 | 58 | train_batch_per_gpu = 2 59 | momentum = 0.9 60 | weight_decay = 1e-4 61 | base_lr = 1e-3 * 1.25 62 | 63 | warm_iter = 800 64 | max_epoch = 35 65 | lr_decay = [25, 30] 66 | nr_images_epoch = 15000 67 | log_dump_interval = 1 68 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 69 | 70 | # ----------test config---------- # 71 | test_nms = 0.5 72 | test_nms_method = 'set_nms' 73 | visulize_threshold = 0.3 74 | pred_cls_threshold = 0.05 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | nr_info_dim=6 80 | ignore_label = -1 81 | max_boxes_of_image = 500 82 | num_predictions = 2 83 | # ----------rois generator config---------- # 84 | anchor_base_size = 32 85 | anchor_base_scale = [1] 86 | anchor_aspect_ratios = [1, 2, 3] 87 | num_cell_anchors = len(anchor_aspect_ratios) 88 | anchor_within_border = False 89 | anchor_ignore_label = -1 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1500 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /megvii/rcnn.emd/megvii/res50.rcnn.double.heads.set.nms.refine.head/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/rcnn.emd/megvii/res50.rcnn.double.heads.set.nms.refinement/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(osp.join(root_dir, 'util')) 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | output_dir = osp.join(root_dir, 'output', usr, 'fpn.new', 'human', this_model_dir) 27 | model_dir = osp.join(output_dir, 'model_dump') 28 | eval_dir = osp.join(output_dir, 'eval_dump') 29 | log_dir = output_dir 30 | logger = osp.join(output_dir, 'logger.log') 31 | 32 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 33 | # ----------data config---------- # 34 | image_mean = np.array([103.530, 116.280, 123.675]) 35 | image_std = np.array([57.375, 57.120, 58.395]) 36 | train_image_short_size = 800 37 | train_image_max_size = 1400 38 | eval_resize = True 39 | eval_image_short_size = 800 40 | eval_image_max_size = 1400 41 | seed_dataprovider = 3 42 | datadb = Crowdhuman 43 | train_source = datadb.train_source 44 | eval_source = datadb.eval_source 45 | eval_json, train_json = eval_source, train_source 46 | image_folder = datadb.image_folder 47 | imgDir = image_folder 48 | class_names = datadb.class_names 49 | num_classes = datadb.num_classes 50 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 51 | gt_boxes_name = 'fbox' 52 | 53 | # ----------train config---------- # 54 | backbone_freeze_at = 2 55 | rpn_channel = 256 56 | 57 | 58 | train_batch_per_gpu = 2 59 | momentum = 0.9 60 | weight_decay = 1e-4 61 | base_lr = 1e-3 * 1.25 62 | 63 | warm_iter = 800 64 | max_epoch = 35 65 | lr_decay = [24, 27] 66 | nr_images_epoch = 15000 67 | log_dump_interval = 1 68 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 69 | 70 | # ----------test config---------- # 71 | test_nms = 0.5 72 | test_nms_method = 'set_nms' 73 | visulize_threshold = 0.3 74 | pred_cls_threshold = 0.05 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | nr_info_dim=6 80 | ignore_label = -1 81 | max_boxes_of_image = 500 82 | num_predictions = 2 83 | # ----------rois generator config---------- # 84 | anchor_base_size = 32 85 | anchor_base_scale = [1] 86 | anchor_aspect_ratios = [1, 2, 3] 87 | num_cell_anchors = len(anchor_aspect_ratios) 88 | anchor_within_border = False 89 | anchor_ignore_label = -1 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1500 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /megvii/rcnn.emd/megvii/res50.rcnn.double.heads.set.nms.refinement/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/rcnn.emd/megvii/res50.rcnn.double.heads.set.nms.refinement/visulize_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import argparse 5 | 6 | sys.path.insert(0, '../lib') 7 | from utils import misc_utils, visual_utils 8 | 9 | img_root = '/data/CrowdHuman/images/' 10 | def eval_all(args): 11 | # json file 12 | assert os.path.exists(args.json_file), "Wrong json path!" 13 | misc_utils.ensure_dir('outputs') 14 | records = misc_utils.load_json_lines(args.json_file)[:args.number] 15 | for record in records: 16 | dtboxes = misc_utils.load_bboxes( 17 | record, key_name='dtboxes', key_box='box', key_score='score', key_tag='tag') 18 | gtboxes = misc_utils.load_bboxes(record, 'gtboxes', 'box') 19 | dtboxes = misc_utils.xywh_to_xyxy(dtboxes) 20 | gtboxes = misc_utils.xywh_to_xyxy(gtboxes) 21 | keep = dtboxes[:, -2] > args.visual_thresh 22 | dtboxes = dtboxes[keep] 23 | len_dt = len(dtboxes) 24 | len_gt = len(gtboxes) 25 | line = "{}: dt:{}, gt:{}.".format(record['ID'], len_dt, len_gt) 26 | print(line) 27 | img_path = img_root + record['ID'] + '.png' 28 | img = misc_utils.load_img(img_path) 29 | visual_utils.draw_boxes(img, dtboxes, line_thick=1, line_color='blue') 30 | visual_utils.draw_boxes(img, gtboxes, line_thick=1, line_color='white') 31 | fpath = 'outputs/{}.png'.format(record['ID']) 32 | cv2.imwrite(fpath, img) 33 | 34 | 35 | def run_eval(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--json_file', '-f', default=None, required=True, type=str) 38 | parser.add_argument('--number', '-n', default=3, type=int) 39 | parser.add_argument('--visual_thresh', '-v', default=0.3, type=int) 40 | args = parser.parse_args() 41 | eval_all(args) 42 | 43 | if __name__ == '__main__': 44 | run_eval() 45 | -------------------------------------------------------------------------------- /megvii/rcnn.emd/megvii/res50.rcnn.double.heads.set.nms/config.py: -------------------------------------------------------------------------------- 1 | import os, getpass 2 | import sys 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir, 'lib')) 12 | add_path(osp.join(root_dir, 'utils')) 13 | 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | output_dir = osp.join(root_dir, 'output', usr, 'fpn', 'human', this_model_dir) 27 | model_dir = osp.join(output_dir, 'model_dump') 28 | eval_dir = osp.join(output_dir, 'eval_dump') 29 | log_dir = output_dir 30 | logger = osp.join(output_dir, 'logger.log') 31 | 32 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 33 | # ----------data config---------- # 34 | image_mean = np.array([103.530, 116.280, 123.675]) 35 | image_std = np.array([57.375, 57.120, 58.395]) 36 | train_image_short_size = 800 37 | train_image_max_size = 1400 38 | eval_resize = True 39 | eval_image_short_size = 800 40 | eval_image_max_size = 1400 41 | seed_dataprovider = 3 42 | datadb = Crowdhuman 43 | train_source = datadb.train_source 44 | eval_source = datadb.eval_source 45 | eval_json, train_json = eval_source, train_source 46 | image_folder = datadb.image_folder 47 | imgDir = image_folder 48 | class_names = datadb.class_names 49 | num_classes = datadb.num_classes 50 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 51 | gt_boxes_name = 'fbox' 52 | 53 | # ----------train config---------- # 54 | backbone_freeze_at = 2 55 | rpn_channel = 256 56 | 57 | 58 | train_batch_per_gpu = 2 59 | momentum = 0.9 60 | weight_decay = 1e-4 61 | base_lr = 1e-3 * 1.25 62 | 63 | warm_iter = 800 64 | max_epoch = 35 65 | lr_decay = [25, 30] 66 | nr_images_epoch = 15000 67 | log_dump_interval = 1 68 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 69 | 70 | # ----------test config---------- # 71 | test_nms = 0.5 72 | test_nms_method = 'set_nms' 73 | visulize_threshold = 0.3 74 | pred_cls_threshold = 0.05 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | nr_info_dim=6 80 | ignore_label = -1 81 | max_boxes_of_image = 500 82 | num_predictions = 2 83 | # ----------rois generator config---------- # 84 | anchor_base_size = 32 85 | anchor_base_scale = [1] 86 | anchor_aspect_ratios = [1, 2, 3] 87 | num_cell_anchors = len(anchor_aspect_ratios) 88 | anchor_within_border = False 89 | anchor_ignore_label = -1 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1500 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /megvii/rcnn.emd/megvii/res50.rcnn.double.heads.set.nms/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/rcnn/megvii/res50.rcnn.one.head.nms/config.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import getpass 3 | import os.path as osp 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(os.path.join(root_dir, 'lib')) 12 | add_path(osp.join(root_dir, 'utils')) 13 | 14 | class Crowdhuman: 15 | class_names = ['background', 'person'] 16 | num_classes = len(class_names) 17 | root_folder = '/home/zhenganlin/june/CrowdHuman' 18 | image_folder = osp.join(root_folder, 'images') 19 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 20 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | 22 | class Config: 23 | 24 | usr = getpass.getuser() 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0].split('/')[-1] 26 | output_dir = osp.join(root_dir, 'output', usr, 'fpn', 'human', this_model_dir) 27 | model_dir = osp.join(output_dir, 'model_dump') 28 | eval_dir = osp.join(output_dir, 'eval_dump') 29 | log_dir = output_dir 30 | logger = osp.join(output_dir, 'logger.log') 31 | 32 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 33 | # ----------data config---------- # 34 | image_mean = np.array([103.530, 116.280, 123.675]) 35 | image_std = np.array([57.375, 57.120, 58.395]) 36 | train_image_short_size = 800 37 | train_image_max_size = 1400 38 | eval_resize = True 39 | eval_image_short_size = 800 40 | eval_image_max_size = 1400 41 | seed_dataprovider = 3 42 | datadb = Crowdhuman 43 | train_source = datadb.train_source 44 | eval_source = datadb.eval_source 45 | eval_json, train_json = eval_source, train_source 46 | image_folder = datadb.image_folder 47 | imgDir = image_folder 48 | class_names = datadb.class_names 49 | num_classes = datadb.num_classes 50 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 51 | gt_boxes_name = 'fbox' 52 | 53 | # ----------train config---------- # 54 | backbone_freeze_at = 2 55 | rpn_channel = 256 56 | 57 | train_batch_per_gpu = 2 58 | momentum = 0.9 59 | weight_decay = 1e-4 60 | base_lr = 1e-3 * 1.25 61 | 62 | warm_iter = 800 63 | max_epoch = 35 64 | lr_decay = [25, 30] 65 | nr_images_epoch = 15000 66 | log_dump_interval = 1 67 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 68 | 69 | # ----------test config---------- # 70 | test_nms = 0.5 71 | test_nms_method = 'normal_nms' 72 | visulize_threshold = 0.3 73 | pred_cls_threshold = 0.05 74 | 75 | # ----------model config---------- # 76 | batch_filter_box_size = 0 77 | nr_box_dim = 5 78 | nr_info_dim=6 79 | ignore_label = -1 80 | max_boxes_of_image = 500 81 | num_predictions = 2 82 | # ----------rois generator config---------- # 83 | anchor_base_size = 32 84 | anchor_base_scale = [1] 85 | anchor_aspect_ratios = [1, 2, 3] 86 | num_cell_anchors = len(anchor_aspect_ratios) 87 | anchor_within_border = False 88 | anchor_ignore_label = -1 89 | 90 | rpn_min_box_size = 2 91 | rpn_nms_threshold = 0.7 92 | train_prev_nms_top_n = 12000 93 | train_post_nms_top_n = 2000 94 | test_prev_nms_top_n = 6000 95 | test_post_nms_top_n = 1500 96 | 97 | # ----------binding&training config---------- # 98 | rpn_smooth_l1_beta = 1 99 | rcnn_smooth_l1_beta = 1 100 | 101 | num_sample_anchors = 256 102 | positive_anchor_ratio = 0.5 103 | rpn_positive_overlap = 0.7 104 | rpn_negative_overlap = 0.3 105 | rpn_bbox_normalize_targets = False 106 | 107 | num_rois = 512 108 | fg_ratio = 0.5 109 | fg_threshold = 0.5 110 | bg_threshold_high = 0.5 111 | bg_threshold_low = 0.0 112 | rcnn_bbox_normalize_targets = True 113 | bbox_normalize_means = np.array([0, 0, 0, 0]) 114 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]) 115 | 116 | config = Config() 117 | 118 | -------------------------------------------------------------------------------- /megvii/rcnn/megvii/res50.rcnn.one.head.nms/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/rcnn/megvii/res50.rcnn.one.head.nms/record.txt: -------------------------------------------------------------------------------- 1 | epoch-32.human 2 | dtboxes: 3 | mAP:0.8716, mMR:0.4268, 4 | score_thr:0.000, mean_ratio:0.6268, mean_cover:0.6580, mean_recall:0.9356, mean_noise:0.3420, valids:90173, total:152774, gtn:98846 5 | score_thr:0.100, mean_ratio:0.6824, mean_cover:0.7251, mean_recall:0.9246, mean_noise:0.2749, valids:89084, total:134143, gtn:98800 6 | score_thr:0.200, mean_ratio:0.7338, mean_cover:0.7935, mean_recall:0.9088, mean_noise:0.2065, valids:87414, total:117111, gtn:98741 7 | score_thr:0.300, mean_ratio:0.7614, mean_cover:0.8355, mean_recall:0.8962, mean_noise:0.1645, valids:86052, total:107876, gtn:98669 8 | score_thr:0.400, mean_ratio:0.7796, mean_cover:0.8669, mean_recall:0.8853, mean_noise:0.1331, valids:84784, total:101488, gtn:98599 9 | score_thr:0.500, mean_ratio:0.7904, mean_cover:0.8904, mean_recall:0.8745, mean_noise:0.1096, valids:83431, total:96323, gtn:98520 10 | score_thr:0.600, mean_ratio:0.7964, mean_cover:0.9110, mean_recall:0.8626, mean_noise:0.0890, valids:81957, total:91840, gtn:98430 11 | score_thr:0.700, mean_ratio:0.7983, mean_cover:0.9297, mean_recall:0.8485, mean_noise:0.0703, valids:80119, total:87418, gtn:98307 12 | score_thr:0.800, mean_ratio:0.7937, mean_cover:0.9480, mean_recall:0.8287, mean_noise:0.0520, valids:77508, total:82529, gtn:98123 13 | score_thr:0.900, mean_ratio:0.7728, mean_cover:0.9682, mean_recall:0.7917, mean_noise:0.0318, valids:72573, total:75232, gtn:97740 14 | epoch-33.human 15 | dtboxes: 16 | mAP:0.8704, mMR:0.4277, 17 | score_thr:0.000, mean_ratio:0.6294, mean_cover:0.6618, mean_recall:0.9342, mean_noise:0.3382, valids:90026, total:151250, gtn:98839 18 | score_thr:0.100, mean_ratio:0.6852, mean_cover:0.7292, mean_recall:0.9229, mean_noise:0.2708, valids:88916, total:132878, gtn:98794 19 | score_thr:0.200, mean_ratio:0.7356, mean_cover:0.7973, mean_recall:0.9065, mean_noise:0.2027, valids:87220, total:116198, gtn:98724 20 | score_thr:0.300, mean_ratio:0.7627, mean_cover:0.8381, mean_recall:0.8947, mean_noise:0.1619, valids:85877, total:107208, gtn:98649 21 | score_thr:0.400, mean_ratio:0.7800, mean_cover:0.8690, mean_recall:0.8836, mean_noise:0.1310, valids:84598, total:100879, gtn:98585 22 | score_thr:0.500, mean_ratio:0.7904, mean_cover:0.8919, mean_recall:0.8732, mean_noise:0.1081, valids:83279, total:95869, gtn:98500 23 | score_thr:0.600, mean_ratio:0.7964, mean_cover:0.9124, mean_recall:0.8612, mean_noise:0.0876, valids:81803, total:91454, gtn:98416 24 | score_thr:0.700, mean_ratio:0.7984, mean_cover:0.9316, mean_recall:0.8470, mean_noise:0.0684, valids:79985, total:87069, gtn:98305 25 | score_thr:0.800, mean_ratio:0.7934, mean_cover:0.9490, mean_recall:0.8275, mean_noise:0.0510, valids:77329, total:82232, gtn:98121 26 | score_thr:0.900, mean_ratio:0.7729, mean_cover:0.9687, mean_recall:0.7915, mean_noise:0.0313, valids:72499, total:75115, gtn:97733 27 | epoch-34.human 28 | dtboxes: 29 | mAP:0.8699, mMR:0.4274, 30 | score_thr:0.000, mean_ratio:0.6355, mean_cover:0.6685, mean_recall:0.9338, mean_noise:0.3315, valids:89880, total:148557, gtn:98831 31 | score_thr:0.100, mean_ratio:0.6899, mean_cover:0.7348, mean_recall:0.9219, mean_noise:0.2652, valids:88712, total:130908, gtn:98781 32 | score_thr:0.200, mean_ratio:0.7401, mean_cover:0.8021, mean_recall:0.9064, mean_noise:0.1979, valids:87120, total:115025, gtn:98713 33 | score_thr:0.300, mean_ratio:0.7654, mean_cover:0.8419, mean_recall:0.8942, mean_noise:0.1581, valids:85748, total:106324, gtn:98643 34 | score_thr:0.400, mean_ratio:0.7822, mean_cover:0.8718, mean_recall:0.8834, mean_noise:0.1282, valids:84517, total:100254, gtn:98570 35 | score_thr:0.500, mean_ratio:0.7926, mean_cover:0.8949, mean_recall:0.8730, mean_noise:0.1051, valids:83187, total:95372, gtn:98484 36 | score_thr:0.600, mean_ratio:0.7978, mean_cover:0.9141, mean_recall:0.8612, mean_noise:0.0859, valids:81715, total:91128, gtn:98401 37 | score_thr:0.700, mean_ratio:0.7986, mean_cover:0.9320, mean_recall:0.8467, mean_noise:0.0680, valids:79894, total:86905, gtn:98289 38 | score_thr:0.800, mean_ratio:0.7936, mean_cover:0.9496, mean_recall:0.8274, mean_noise:0.0504, valids:77318, total:82155, gtn:98103 39 | score_thr:0.900, mean_ratio:0.7725, mean_cover:0.9688, mean_recall:0.7910, mean_noise:0.0312, valids:72406, total:75006, gtn:97713 40 | epoch-35.human 41 | dtboxes: 42 | mAP:0.8704, mMR:0.4290, 43 | score_thr:0.000, mean_ratio:0.6307, mean_cover:0.6629, mean_recall:0.9343, mean_noise:0.3371, valids:89988, total:150501, gtn:98825 44 | score_thr:0.100, mean_ratio:0.6855, mean_cover:0.7295, mean_recall:0.9226, mean_noise:0.2705, valids:88829, total:132329, gtn:98781 45 | score_thr:0.200, mean_ratio:0.7370, mean_cover:0.7983, mean_recall:0.9069, mean_noise:0.2017, valids:87206, total:115926, gtn:98715 46 | score_thr:0.300, mean_ratio:0.7631, mean_cover:0.8386, mean_recall:0.8947, mean_noise:0.1614, valids:85863, total:107059, gtn:98647 47 | score_thr:0.400, mean_ratio:0.7807, mean_cover:0.8690, mean_recall:0.8842, mean_noise:0.1310, valids:84637, total:100839, gtn:98582 48 | score_thr:0.500, mean_ratio:0.7913, mean_cover:0.8927, mean_recall:0.8735, mean_noise:0.1073, valids:83334, total:95851, gtn:98498 49 | score_thr:0.600, mean_ratio:0.7970, mean_cover:0.9121, mean_recall:0.8620, mean_noise:0.0879, valids:81889, total:91559, gtn:98416 50 | score_thr:0.700, mean_ratio:0.7982, mean_cover:0.9305, mean_recall:0.8475, mean_noise:0.0695, valids:80023, total:87175, gtn:98310 51 | score_thr:0.800, mean_ratio:0.7937, mean_cover:0.9483, mean_recall:0.8284, mean_noise:0.0517, valids:77453, total:82377, gtn:98120 52 | score_thr:0.900, mean_ratio:0.7727, mean_cover:0.9680, mean_recall:0.7916, mean_noise:0.0320, valids:72561, total:75210, gtn:97751 53 | -------------------------------------------------------------------------------- /megvii/retinanet/res50.retinanet.fpn.double.heads.iou.inference/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys, getpass 3 | import os.path as osp 4 | import numpy as np 5 | import pdb 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(osp.join(root_dir)) 12 | add_path(osp.join(root_dir, 'lib')) 13 | add_path(osp.join(root_dir, 'util')) 14 | 15 | class Crowd_human: 16 | 17 | class_names = ['background', 'person'] 18 | num_classes = len(class_names) 19 | root_folder = '/home/zhenganlin/june/CrowdHuman/' 20 | image_folder = osp.join(root_folder, 'images') 21 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 22 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 23 | 24 | class Config: 25 | 26 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 27 | user = getpass.getuser() 28 | cur_dir = osp.basename(this_model_dir) 29 | 30 | output_dir = osp.join(root_dir, 'output', user, 'retinanet', cur_dir) 31 | model_dir = os.path.join(output_dir, 'model_dump') 32 | eval_dir = os.path.join(output_dir, 'eval_dump') 33 | log_dir = output_dir 34 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 35 | # ----------data config---------- # 36 | image_mean = np.array([103.530, 116.280, 123.675]) 37 | image_std = np.array([57.375, 57.120, 58.395]) 38 | train_image_short_size = 800 39 | train_image_max_size = 1400 40 | eval_resize = True 41 | eval_image_short_size = 800 42 | eval_image_max_size = 1400 43 | seed_dataprovider = 3 44 | datadb = Crowd_human() 45 | train_source = datadb.train_source 46 | eval_source = datadb.eval_source 47 | train_json, eval_json = train_source, eval_source 48 | image_folder = datadb.image_folder 49 | imgDir = image_folder 50 | class_names = datadb.class_names 51 | num_classes = datadb.num_classes 52 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 53 | gt_boxes_name = 'fbox' 54 | 55 | # ----------train config---------- # 56 | backbone_freeze_at = 2 57 | train_batch_per_gpu = 2 58 | momentum = 0.9 59 | weight_decay = 1e-4 60 | base_lr = 3.125e-4 61 | learning_rate = base_lr 62 | focal_loss_alpha = 0.25 63 | focal_loss_gamma = 2 64 | anchor_ignore_label = -1 65 | 66 | warm_iter = 1874 67 | max_epoch = 55 68 | lr_decay = [0, 33, 43] 69 | nr_images_epoch = 15000 70 | log_dump_interval = 1 71 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 72 | 73 | # ----------test config---------- # 74 | test_nms = 0.5 75 | test_nms_method = 'set_nms' 76 | visulize_threshold = 0.3 77 | pred_cls_threshold = 0.05 78 | 79 | # ----------dataset config---------- # 80 | nr_box_dim = 5 81 | max_boxes_of_image = 500 82 | 83 | # --------anchor generator config-------- # 84 | anchor_base_size = 32 # the minimize anchor size in the bigest feature map. 85 | anchor_base_scale = [1] 86 | anchor_aspect_ratios = [1, 2, 3] 87 | num_cell_anchors = len(anchor_aspect_ratios) * len(anchor_base_scale) 88 | 89 | # ----------binding&training config---------- # 90 | smooth_l1_beta = 0.1 91 | negative_thresh = 0.4 92 | positive_thresh = 0.5 93 | allow_low_quality = True 94 | 95 | config = Config() 96 | -------------------------------------------------------------------------------- /megvii/retinanet/res50.retinanet.fpn.double.heads.iou.inference/demo.py: -------------------------------------------------------------------------------- 1 | from config import config 2 | import os.path as osp 3 | from common import * 4 | def computeJaccard(fpath, save_path ='results.md'): 5 | 6 | assert os.path.exists(fpath) 7 | records = load_func(fpath) 8 | 9 | GT = load_func(config.eval_json) 10 | fid = open(save_path, 'a') 11 | for i in range(10): 12 | score_thr = 1e-1 * i 13 | results = common_process(worker, records, 20, GT, score_thr, 0.5) 14 | line = strline(results) 15 | line = 'score_thr:{:.3f}, '.format(score_thr) + line 16 | print(line) 17 | fid.write(line + '\n') 18 | fid.flush() 19 | fid.close() 20 | 21 | def computeIoUs(fpath): 22 | 23 | print('Processing {}'.format(osp.basename(fpath))) 24 | name = os.path.basename(fpath) 25 | 26 | mAP, mMR = compute_mAP(fpath) 27 | 28 | fid = open('results.md', 'a') 29 | fid.write('{}\ndtboxes:\n'.format(name)) 30 | print('{}\ndtboxes:\n'.format(name)) 31 | line = 'mAP:{:.4f}, mMR:{:.4f}, '.format(mAP, mMR) 32 | print(line) 33 | fid.write(line + '\n') 34 | fid.close() 35 | computeJaccard(fpath, save_path='results.md') 36 | 37 | def eval_all(): 38 | for epoch in range(25, 50): 39 | fpath = osp.join(config.eval_dir, 'epoch-{}.human'.format(epoch)) 40 | if not os.path.exists(fpath): 41 | continue 42 | computeIoUs(fpath) 43 | 44 | if __name__ == '__main__': 45 | eval_all() 46 | -------------------------------------------------------------------------------- /megvii/retinanet/res50.retinanet.fpn.one.head.iou.inference.baseline.v2/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys, getpass 3 | import os.path as osp 4 | import numpy as np 5 | import pdb 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(osp.join(root_dir, 'lib')) 12 | add_path(osp.join(root_dir, 'utils')) 13 | 14 | class Crowd_human: 15 | 16 | class_names = ['background', 'person'] 17 | num_classes = len(class_names) 18 | root_folder = '/home/zhenganlin/june/CrowdHuman/' 19 | image_folder = osp.join(root_folder, 'images') 20 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 22 | 23 | class Config: 24 | 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 26 | user = getpass.getuser() 27 | cur_dir = osp.basename(this_model_dir) 28 | 29 | output_dir = osp.join(root_dir, 'output', user, 'retinanet', cur_dir) 30 | model_dir = os.path.join(output_dir, 'model_dump') 31 | eval_dir = os.path.join(output_dir, 'eval_dump') 32 | log_dir = output_dir 33 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 34 | # ----------data config---------- # 35 | image_mean = np.array([103.530, 116.280, 123.675]) 36 | image_std = np.array([57.375, 57.120, 58.395]) 37 | train_image_short_size = 800 38 | train_image_max_size = 1400 39 | eval_resize = True 40 | eval_image_short_size = 800 41 | eval_image_max_size = 1400 42 | seed_dataprovider = 3 43 | datadb = Crowd_human() 44 | train_source = datadb.train_source 45 | eval_source = datadb.eval_source 46 | train_json, eval_json = train_source, eval_source 47 | image_folder = datadb.image_folder 48 | imgDir = image_folder 49 | class_names = datadb.class_names 50 | num_classes = datadb.num_classes 51 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 52 | gt_boxes_name = 'fbox' 53 | 54 | # ----------train config---------- # 55 | backbone_freeze_at = 2 56 | train_batch_per_gpu = 2 57 | momentum = 0.9 58 | weight_decay = 1e-4 59 | base_lr = 3.125e-4 60 | learning_rate = base_lr 61 | focal_loss_alpha = 0.25 62 | focal_loss_gamma = 2 63 | anchor_ignore_label = -1 64 | 65 | warm_iter = 1874 66 | max_epoch = 50 67 | lr_decay = [0, 30, 40] 68 | nr_images_epoch = 15000 69 | log_dump_interval = 1 70 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 71 | 72 | # ----------test config---------- # 73 | test_nms = 0.5 74 | test_nms_method = 'normal_nms' 75 | visulize_threshold = 0.3 76 | pred_cls_threshold = 0.05 77 | 78 | # ----------dataset config---------- # 79 | nr_box_dim = 5 80 | max_boxes_of_image = 500 81 | 82 | # --------anchor generator config-------- # 83 | anchor_base_size = 32 # the minimize anchor size in the bigest feature map. 84 | anchor_base_scale = [1] 85 | anchor_aspect_ratios = [1, 2, 3] 86 | num_cell_anchors = len(anchor_aspect_ratios) * len(anchor_base_scale) 87 | 88 | # ----------binding&training config---------- # 89 | smooth_l1_beta = 0.1 90 | negative_thresh = 0.4 91 | positive_thresh = 0.5 92 | allow_low_quality = True 93 | 94 | config = Config() 95 | -------------------------------------------------------------------------------- /megvii/retinanet/res50.retinanet.fpn.one.head.iou.inference.baseline.v2/emd_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import pdb 10 | 11 | def emd_cpu_nms(dets, base_thr, upp_thr = 1.): 12 | """Pure Python NMS baseline.""" 13 | 14 | x1 = dets[:, 0] 15 | y1 = dets[:, 1] 16 | x2 = dets[:, 2] 17 | y2 = dets[:, 3] 18 | scores = dets[:, 4] 19 | numbers = dets[:, 5] 20 | 21 | sup = np.zeros(dets.shape[0]) 22 | 23 | areas = (x2 - x1) * (y2 - y1) 24 | order = np.argsort(-scores) 25 | eps = 1e-8 26 | 27 | while order.size > 0: 28 | 29 | i = order[0] 30 | num = numbers[i] 31 | sup[i] = 0 32 | xx1 = np.maximum(x1[i], x1[order[1:]]) 33 | yy1 = np.maximum(y1[i], y1[order[1:]]) 34 | xx2 = np.minimum(x2[i], x2[order[1:]]) 35 | yy2 = np.minimum(y2[i], y2[order[1:]]) 36 | 37 | w = np.maximum(0.0, xx2 - xx1) 38 | h = np.maximum(0.0, yy2 - yy1) 39 | inter = w * h 40 | ovr = inter / (areas[i] + areas[order[1:]] - inter + eps) 41 | indices = np.where(np.logical_and(ovr > base_thr, sup[order[1:]] < 1))[0] 42 | loc = np.where(numbers[order[indices + 1]] == num)[0] 43 | sup[order[indices + 1]] = 1 44 | if loc.size: 45 | sup[order[indices[loc[0]] + 1]] = 0 46 | inds = np.where(sup[order[1:]] < 1)[0] 47 | order = order[inds + 1] 48 | 49 | keep = np.where(sup < 1)[0] 50 | return keep 51 | -------------------------------------------------------------------------------- /megvii/retinanet/res50.retinanet.fpn.one.head.iou.inference.baseline/config.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import getpass 3 | import os.path as osp 4 | import numpy as np 5 | import pdb 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(osp.join(root_dir, 'lib')) 12 | add_path(osp.join(root_dir, 'utils')) 13 | 14 | class Crowd_human: 15 | 16 | class_names = ['background', 'person'] 17 | num_classes = len(class_names) 18 | root_folder = '/home/zhenganlin/june/CrowdHuman/' 19 | image_folder = osp.join(root_folder, 'images') 20 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 22 | 23 | class Config: 24 | 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 26 | user = getpass.getuser() 27 | cur_dir = osp.basename(this_model_dir) 28 | 29 | output_dir = osp.join(root_dir, 'output', user, 'retinanet', cur_dir) 30 | model_dir = os.path.join(output_dir, 'model_dump') 31 | eval_dir = os.path.join(output_dir, 'eval_dump') 32 | log_dir = output_dir 33 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 34 | # ----------data config---------- # 35 | image_mean = np.array([103.530, 116.280, 123.675]) 36 | image_std = np.array([57.375, 57.120, 58.395]) 37 | train_image_short_size = 800 38 | train_image_max_size = 1400 39 | eval_resize = True 40 | eval_image_short_size = 800 41 | eval_image_max_size = 1400 42 | seed_dataprovider = 3 43 | datadb = Crowd_human() 44 | train_source = datadb.train_source 45 | eval_source = datadb.eval_source 46 | train_json, eval_json = train_source, eval_source 47 | image_folder = datadb.image_folder 48 | imgDir = image_folder 49 | class_names = datadb.class_names 50 | num_classes = datadb.num_classes 51 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 52 | gt_boxes_name = 'fbox' 53 | 54 | # ----------train config---------- # 55 | backbone_freeze_at = 2 56 | train_batch_per_gpu = 2 57 | momentum = 0.9 58 | weight_decay = 1e-4 59 | base_lr = 3.125e-4 60 | learning_rate = base_lr 61 | focal_loss_alpha = 0.25 62 | focal_loss_gamma = 2 63 | anchor_ignore_label = -1 64 | 65 | warm_iter = 1874 66 | <<<<<<< HEAD 67 | max_epoch = 52 68 | ======= 69 | max_epoch = 50 70 | >>>>>>> 126a572b4b37b562f2a6f27ee126688c37d13cf9 71 | lr_decay = [0, 30, 40] 72 | nr_images_epoch = 15000 73 | log_dump_interval = 1 74 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 75 | 76 | # ----------test config---------- # 77 | test_nms = 0.5 78 | test_nms_method = 'normal_nms' 79 | visulize_threshold = 0.3 80 | pred_cls_threshold = 0.05 81 | 82 | # ----------dataset config---------- # 83 | nr_box_dim = 5 84 | max_boxes_of_image = 500 85 | 86 | # --------anchor generator config-------- # 87 | anchor_base_size = 32 # the minimize anchor size in the bigest feature map. 88 | anchor_base_scale = [1] 89 | anchor_aspect_ratios = [1, 2, 3] 90 | num_cell_anchors = len(anchor_aspect_ratios) * len(anchor_base_scale) 91 | 92 | # ----------binding&training config---------- # 93 | smooth_l1_beta = 0.1 94 | negative_thresh = 0.4 95 | positive_thresh = 0.5 96 | allow_low_quality = True 97 | 98 | config = Config() 99 | -------------------------------------------------------------------------------- /megvii/retinanet/res50.retinanet.fpn.one.head.iou.inference.baseline/record.txt: -------------------------------------------------------------------------------- 1 | epoch-50.human 2 | dtboxes: 3 | mAP:0.8784, mMR:0.4762, 4 | score_thr:0.000, mean_ratio:0.3205, mean_cover:0.3245, mean_recall:0.9688, mean_noise:0.6755, valids:94730, total:335452, gtn:99091 5 | score_thr:0.100, mean_ratio:0.5592, mean_cover:0.5834, mean_recall:0.9362, mean_noise:0.4166, valids:91246, total:177489, gtn:98957 6 | score_thr:0.200, mean_ratio:0.7441, mean_cover:0.8325, mean_recall:0.8756, mean_noise:0.1675, valids:84034, total:105167, gtn:98564 7 | score_thr:0.300, mean_ratio:0.7702, mean_cover:0.9285, mean_recall:0.8180, mean_noise:0.0715, valids:76237, total:82777, gtn:98016 8 | score_thr:0.400, mean_ratio:0.7289, mean_cover:0.9701, mean_recall:0.7450, mean_noise:0.0299, valids:66418, total:68468, gtn:97291 9 | score_thr:0.500, mean_ratio:0.6398, mean_cover:0.9850, mean_recall:0.6442, mean_noise:0.0150, valids:53544, total:54113, gtn:96404 10 | score_thr:0.600, mean_ratio:0.4997, mean_cover:0.9767, mean_recall:0.5006, mean_noise:0.0233, valids:37818, total:37954, gtn:95637 11 | score_thr:0.700, mean_ratio:0.3205, mean_cover:0.9093, mean_recall:0.3206, mean_noise:0.0907, valids:21738, total:21765, gtn:95090 12 | score_thr:0.800, mean_ratio:0.1202, mean_cover:0.6171, mean_recall:0.1203, mean_noise:0.3829, valids:7127, total:7135, gtn:94851 13 | score_thr:0.900, mean_ratio:0.0037, mean_cover:0.0341, mean_recall:0.0037, mean_noise:0.9659, valids:165, total:165, gtn:94801 14 | epoch-50.human 15 | dtboxes: 16 | mAP:0.8784, mMR:0.4762, 17 | score_thr:0.000, mean_ratio:0.3205, mean_cover:0.3245, mean_recall:0.9688, mean_noise:0.6755, valids:94730, total:335452, gtn:99091 18 | score_thr:0.100, mean_ratio:0.5592, mean_cover:0.5834, mean_recall:0.9362, mean_noise:0.4166, valids:91246, total:177489, gtn:98957 19 | score_thr:0.200, mean_ratio:0.7441, mean_cover:0.8325, mean_recall:0.8756, mean_noise:0.1675, valids:84034, total:105167, gtn:98564 20 | score_thr:0.300, mean_ratio:0.7702, mean_cover:0.9285, mean_recall:0.8180, mean_noise:0.0715, valids:76237, total:82777, gtn:98016 21 | score_thr:0.400, mean_ratio:0.7289, mean_cover:0.9701, mean_recall:0.7450, mean_noise:0.0299, valids:66418, total:68468, gtn:97291 22 | score_thr:0.500, mean_ratio:0.6398, mean_cover:0.9850, mean_recall:0.6442, mean_noise:0.0150, valids:53544, total:54113, gtn:96404 23 | score_thr:0.600, mean_ratio:0.4997, mean_cover:0.9767, mean_recall:0.5006, mean_noise:0.0233, valids:37818, total:37954, gtn:95637 24 | score_thr:0.700, mean_ratio:0.3205, mean_cover:0.9093, mean_recall:0.3206, mean_noise:0.0907, valids:21738, total:21765, gtn:95090 25 | score_thr:0.800, mean_ratio:0.1202, mean_cover:0.6171, mean_recall:0.1203, mean_noise:0.3829, valids:7127, total:7135, gtn:94851 26 | score_thr:0.900, mean_ratio:0.0037, mean_cover:0.0341, mean_recall:0.0037, mean_noise:0.9659, valids:165, total:165, gtn:94801 27 | -------------------------------------------------------------------------------- /megvii/retinanet/res50.retinanet.fpn.one.head.iou.inference/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys, getpass 3 | import os.path as osp 4 | import numpy as np 5 | import pdb 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../../..' 11 | add_path(osp.join(root_dir, 'lib')) 12 | add_path(osp.join(root_dir, 'utils')) 13 | 14 | class Crowd_human: 15 | 16 | class_names = ['background', 'person'] 17 | num_classes = len(class_names) 18 | root_folder = '/home/zhenganlin/june/CrowdHuman/' 19 | image_folder = osp.join(root_folder, 'images') 20 | train_source = osp.join(root_folder, 'crowd_human_train15000_final_unsure_fixempty_fixvis_vboxmerge.odgt') 21 | eval_source = osp.join(root_folder, 'crowd_human_test4370_final_unsure_fixempty_fixvis_vboxmerge.odgt') 22 | 23 | class Config: 24 | 25 | this_model_dir = osp.split(os.path.realpath(__file__))[0] 26 | user = getpass.getuser() 27 | cur_dir = osp.basename(this_model_dir) 28 | 29 | output_dir = osp.join(root_dir, 'output', user, 'retinanet', cur_dir) 30 | model_dir = os.path.join(output_dir, 'model_dump') 31 | eval_dir = os.path.join(output_dir, 'eval_dump') 32 | log_dir = output_dir 33 | init_weights = '/home/zhenganlin/june/CrowdHuman/resnet50_fbaug.pth' 34 | # ----------data config---------- # 35 | image_mean = np.array([103.530, 116.280, 123.675]) 36 | image_std = np.array([57.375, 57.120, 58.395]) 37 | train_image_short_size = 800 38 | train_image_max_size = 1400 39 | eval_resize = True 40 | eval_image_short_size = 800 41 | eval_image_max_size = 1400 42 | seed_dataprovider = 3 43 | datadb = Crowd_human() 44 | train_source = datadb.train_source 45 | eval_source = datadb.eval_source 46 | train_json, eval_json = train_source, eval_source 47 | image_folder = datadb.image_folder 48 | imgDir = image_folder 49 | class_names = datadb.class_names 50 | num_classes = datadb.num_classes 51 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 52 | gt_boxes_name = 'fbox' 53 | 54 | # ----------train config---------- # 55 | backbone_freeze_at = 2 56 | train_batch_per_gpu = 2 57 | momentum = 0.9 58 | weight_decay = 1e-4 59 | base_lr = 3.125e-4 60 | learning_rate = base_lr 61 | focal_loss_alpha = 0.25 62 | focal_loss_gamma = 2 63 | anchor_ignore_label = -1 64 | 65 | warm_iter = 1874 66 | max_epoch = 50 67 | lr_decay = [0, 30, 40] 68 | nr_images_epoch = 15000 69 | log_dump_interval = 1 70 | iter_per_epoch = nr_images_epoch // train_batch_per_gpu 71 | 72 | # ----------test config---------- # 73 | test_nms = 0.5 74 | test_nms_method = 'normal_nms' 75 | visulize_threshold = 0.3 76 | pred_cls_threshold = 0.05 77 | 78 | # ----------dataset config---------- # 79 | nr_box_dim = 5 80 | max_boxes_of_image = 500 81 | 82 | # --------anchor generator config-------- # 83 | anchor_base_size = 32 # the minimize anchor size in the bigest feature map. 84 | anchor_base_scale = [1] 85 | anchor_aspect_ratios = [1, 2, 3] 86 | num_cell_anchors = len(anchor_aspect_ratios) * len(anchor_base_scale) 87 | 88 | # ----------binding&training config---------- # 89 | smooth_l1_beta = 0.1 90 | negative_thresh = 0.4 91 | positive_thresh = 0.5 92 | allow_low_quality = True 93 | 94 | config = Config() 95 | -------------------------------------------------------------------------------- /model/cascade_emd/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(os.path.join(root_dir, 'utils')) 14 | 15 | class Crowd_human: 16 | class_names = ['background', 'person'] 17 | num_classes = len(class_names) 18 | root_folder = '/data/CrowdHuman' 19 | image_folder = '/data/CrowdHuman/images' 20 | train_source = os.path.join('/data/CrowdHuman/annotation_train.odgt') 21 | eval_source = os.path.join('/data/CrowdHuman/annotation_val.odgt') 22 | 23 | class Config: 24 | output_dir = 'outputs' 25 | if not os.path.isdir(output_dir): 26 | os.makedirs(output_dir) 27 | model_dir = os.path.join(output_dir, 'model_dump') 28 | eval_dir = os.path.join(output_dir, 'eval_dump') 29 | pretrain_weight = '/home/model/resnet50_fbaug_76254_4e14b7d1.pkl' 30 | 31 | # ----------data config---------- # 32 | image_mean = np.array([103.530, 116.280, 123.675]) 33 | image_std = np.array([57.375, 57.120, 58.395]) 34 | train_image_short_size = 800 35 | train_image_max_size = 1400 36 | eval_resize = True 37 | eval_image_short_size = 800 38 | eval_image_max_size = 1400 39 | seed_dataprovider = 3 40 | train_source = Crowd_human.train_source 41 | eval_source = Crowd_human.eval_source 42 | image_folder = Crowd_human.image_folder 43 | class_names = Crowd_human.class_names 44 | num_classes = Crowd_human.num_classes 45 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 46 | gt_boxes_name = 'fbox' 47 | 48 | backbone_freeze_at = 2 49 | rpn_channel = 256 50 | # ----------train config---------- # 51 | batch_per_gpu = 2 52 | basic_lr = 1e-3 * 1.25 53 | momentum = 0.9 54 | weight_decay = 1e-4 55 | 56 | warm_iters = 800 57 | max_epoch = 30 58 | lr_decay_rate = 0.1 59 | lr_decay_sates = [20, 26] 60 | nr_images_epoch = 15000 61 | 62 | #base_lr = 1e-3 * train_batch_per_gpu * 1.25 63 | log_dump_interval = 1 64 | 65 | # ----------test config---------- # 66 | test_cls_threshold = 0.05 67 | test_nms_version = 'original' 68 | test_max_boxes_per_image = 300 #200 69 | test_save_type = 'human' 70 | test_nms = 0.5 71 | test_vis_threshold = 0.3 72 | 73 | # ----------model config---------- # 74 | batch_filter_box_size = 0 75 | nr_box_dim = 5 76 | ignore_label = -1 77 | max_boxes_of_image = 500 78 | 79 | # ----------rois generator config---------- # 80 | anchor_base_size = 16 81 | anchor_base_scale = 0.5 82 | #anchor_scales = np.array([2, 4, 8, 16, 32]) 83 | anchor_aspect_ratios = [1, 2, 3] 84 | #num_anchor_scales = len(anchor_scales) 85 | num_cell_anchors = len(anchor_aspect_ratios) 86 | anchor_within_border = False 87 | 88 | rpn_min_box_size = 2 89 | rpn_nms_threshold = 0.7 90 | train_prev_nms_top_n = 12000 91 | train_post_nms_top_n = 2000 92 | test_prev_nms_top_n = 6000 93 | test_post_nms_top_n = 1000 94 | 95 | # ----------binding&training config---------- # 96 | rpn_smooth_l1_beta = 1 97 | rcnn_smooth_l1_beta = 1 98 | 99 | num_sample_anchors = 256 100 | positive_anchor_ratio = 0.5 101 | rpn_positive_overlap = 0.7 102 | rpn_negative_overlap = 0.3 103 | rpn_bbox_normalize_targets = False 104 | 105 | num_rois = 512 106 | fg_ratio = 0.5 107 | fg_threshold = 0.5 108 | bg_threshold_high = 0.5 109 | bg_threshold_low = 0.0 110 | rcnn_bbox_normalize_targets = True 111 | bbox_normalize_means = np.array([0, 0, 0, 0]).astype(np.float32) 112 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32) 113 | 114 | config = Config() 115 | 116 | -------------------------------------------------------------------------------- /model/cascade_emd/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | from multiprocessing import Process, Queue 5 | 6 | from tqdm import tqdm 7 | import numpy as np 8 | import megengine as mge 9 | from megengine import jit 10 | 11 | from config import config 12 | import network 13 | import dataset 14 | import misc_utils 15 | 16 | if_set_nms = True 17 | 18 | def eval_all(args): 19 | # model_path 20 | saveDir = config.model_dir 21 | evalDir = config.eval_dir 22 | misc_utils.ensure_dir(evalDir) 23 | model_file = os.path.join(saveDir, 24 | 'epoch_{}.pkl'.format(args.resume_weights)) 25 | assert os.path.exists(model_file) 26 | # load data 27 | records = misc_utils.load_json_lines(config.eval_source) 28 | # multiprocessing 29 | num_records = len(records) 30 | num_devs = args.devices 31 | num_image = math.ceil(num_records / num_devs) 32 | result_queue = Queue(1000) 33 | procs = [] 34 | all_results = [] 35 | for i in range(num_devs): 36 | start = i * num_image 37 | end = min(start + num_image, num_records) 38 | split_records = records[start:end] 39 | proc = Process(target=inference, args=( 40 | model_file, i, split_records, result_queue)) 41 | proc.start() 42 | procs.append(proc) 43 | pbar = tqdm(total=num_records, ncols=50) 44 | for i in range(num_records): 45 | t = result_queue.get() 46 | all_results.append(t) 47 | pbar.update(1) 48 | for p in procs: 49 | p.join() 50 | fpath = os.path.join(evalDir, 'dump-{}.json'.format(args.resume_weights)) 51 | misc_utils.save_json_lines(all_results, fpath) 52 | 53 | def inference(model_file, device, records, result_queue): 54 | @jit.trace(symbolic=False) 55 | def val_func(): 56 | pred_boxes = net(net.inputs) 57 | return pred_boxes 58 | net = network.Network() 59 | net.eval() 60 | check_point = mge.load(model_file) 61 | net.load_state_dict(check_point['state_dict']) 62 | for record in records: 63 | np.set_printoptions(precision=2, suppress=True) 64 | net.eval() 65 | image, gt_boxes, im_info, ID = get_data(record, device) 66 | net.inputs["image"].set_value(image.astype(np.float32)) 67 | net.inputs["im_info"].set_value(im_info) 68 | pred_boxes = val_func().numpy() 69 | # nms 70 | if if_set_nms: 71 | from set_nms_utils import set_cpu_nms 72 | n = pred_boxes.shape[0] // 2 73 | idents = np.tile(np.arange(n)[:,None], (1, 2)).reshape(-1, 1) 74 | pred_boxes = np.hstack((pred_boxes, idents)) 75 | keep = pred_boxes[:, -2] > 0.05 76 | pred_boxes = pred_boxes[keep] 77 | keep = set_cpu_nms(pred_boxes, 0.5) 78 | pred_boxes = pred_boxes[keep][:, :-1] 79 | else: 80 | from set_nms_utils import cpu_nms 81 | keep = pred_boxes[:, -1] > 0.05 82 | pred_boxes = pred_boxes[keep] 83 | keep = cpu_nms(pred_boxes, 0.5) 84 | pred_boxes = pred_boxes[keep] 85 | result_dict = dict(ID=ID, height=int(im_info[0, -2]), width=int(im_info[0, -1]), 86 | dtboxes=boxes_dump(pred_boxes, False), 87 | gtboxes=boxes_dump(gt_boxes, True)) 88 | result_queue.put_nowait(result_dict) 89 | 90 | def boxes_dump(boxes, is_gt): 91 | result = [] 92 | boxes = boxes.tolist() 93 | for box in boxes: 94 | if is_gt: 95 | box_dict = {} 96 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 97 | box_dict['tag'] = box[-1] 98 | else: 99 | box_dict = {} 100 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 101 | box_dict['tag'] = 1 102 | box_dict['score'] = box[-1] 103 | result.append(box_dict) 104 | return result 105 | 106 | def get_data(record, device): 107 | data = dataset.val_dataset(record) 108 | image, gt_boxes, ID = \ 109 | data['data'], data['boxes'], data['ID'] 110 | if config.eval_resize == False: 111 | resized_img, scale = image, 1 112 | else: 113 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 114 | image, config.eval_image_short_size, config.eval_image_max_size) 115 | 116 | original_height, original_width = image.shape[0:2] 117 | height, width = resized_img.shape[0:2] 118 | transposed_img = np.ascontiguousarray( 119 | resized_img.transpose(2, 0, 1)[None, :, :, :], 120 | dtype=np.float32) 121 | im_info = np.array([height, width, scale, original_height, original_width], 122 | dtype=np.float32)[None, :] 123 | return transposed_img, gt_boxes, im_info, ID 124 | 125 | def run_test(): 126 | parser = argparse.ArgumentParser() 127 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 128 | parser.add_argument('--devices', '-d', default=1, type=int) 129 | args = parser.parse_args() 130 | eval_all(args) 131 | 132 | if __name__ == '__main__': 133 | run_test() 134 | 135 | -------------------------------------------------------------------------------- /model/cascade_fpn/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(os.path.join(root_dir, 'utils')) 14 | 15 | class Crowd_human: 16 | class_names = ['background', 'person'] 17 | num_classes = len(class_names) 18 | root_folder = '/data/CrowdHuman' 19 | image_folder = '/data/CrowdHuman/images' 20 | train_source = os.path.join('/data/CrowdHuman/annotation_train.odgt') 21 | eval_source = os.path.join('/data/CrowdHuman/annotation_val.odgt') 22 | 23 | class Config: 24 | output_dir = 'outputs' 25 | if not os.path.isdir(output_dir): 26 | os.makedirs(output_dir) 27 | model_dir = os.path.join(output_dir, 'model_dump') 28 | eval_dir = os.path.join(output_dir, 'eval_dump') 29 | pretrain_weight = '/home/model/resnet50_fbaug_76254_4e14b7d1.pkl' 30 | 31 | # ----------data config---------- # 32 | image_mean = np.array([103.530, 116.280, 123.675]) 33 | image_std = np.array([57.375, 57.120, 58.395]) 34 | train_image_short_size = 800 35 | train_image_max_size = 1400 36 | eval_resize = True 37 | eval_image_short_size = 800 38 | eval_image_max_size = 1400 39 | seed_dataprovider = 3 40 | train_source = Crowd_human.train_source 41 | eval_source = Crowd_human.eval_source 42 | image_folder = Crowd_human.image_folder 43 | class_names = Crowd_human.class_names 44 | num_classes = Crowd_human.num_classes 45 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 46 | gt_boxes_name = 'fbox' 47 | 48 | backbone_freeze_at = 2 49 | rpn_channel = 256 50 | # ----------train config---------- # 51 | batch_per_gpu = 2 52 | basic_lr = 1e-3 * 1.25 53 | momentum = 0.9 54 | weight_decay = 1e-4 55 | 56 | warm_iters = 800 57 | max_epoch = 30 58 | lr_decay_rate = 0.1 59 | lr_decay_sates = [20, 26] 60 | nr_images_epoch = 15000 61 | 62 | #base_lr = 1e-3 * train_batch_per_gpu * 1.25 63 | log_dump_interval = 1 64 | 65 | # ----------test config---------- # 66 | test_cls_threshold = 0.05 67 | test_nms_version = 'original' 68 | test_max_boxes_per_image = 300 #200 69 | test_save_type = 'human' 70 | test_nms = 0.5 71 | test_vis_threshold = 0.3 72 | 73 | # ----------model config---------- # 74 | batch_filter_box_size = 0 75 | nr_box_dim = 5 76 | ignore_label = -1 77 | max_boxes_of_image = 500 78 | 79 | # ----------rois generator config---------- # 80 | anchor_base_size = 16 81 | anchor_base_scale = 0.5 82 | #anchor_scales = np.array([2, 4, 8, 16, 32]) 83 | anchor_aspect_ratios = [1, 2, 3] 84 | #num_anchor_scales = len(anchor_scales) 85 | num_cell_anchors = len(anchor_aspect_ratios) 86 | anchor_within_border = False 87 | 88 | rpn_min_box_size = 2 89 | rpn_nms_threshold = 0.7 90 | train_prev_nms_top_n = 12000 91 | train_post_nms_top_n = 2000 92 | test_prev_nms_top_n = 6000 93 | test_post_nms_top_n = 1000 94 | 95 | # ----------binding&training config---------- # 96 | rpn_smooth_l1_beta = 1 97 | rcnn_smooth_l1_beta = 1 98 | 99 | num_sample_anchors = 256 100 | positive_anchor_ratio = 0.5 101 | rpn_positive_overlap = 0.7 102 | rpn_negative_overlap = 0.3 103 | rpn_bbox_normalize_targets = False 104 | 105 | num_rois = 512 106 | fg_ratio = 0.5 107 | fg_threshold = 0.5 108 | bg_threshold_high = 0.5 109 | bg_threshold_low = 0.0 110 | rcnn_bbox_normalize_targets = True 111 | bbox_normalize_means = np.array([0, 0, 0, 0]).astype(np.float32) 112 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32) 113 | 114 | config = Config() 115 | 116 | -------------------------------------------------------------------------------- /model/cascade_fpn/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | import megengine as mge 8 | from megengine import jit 9 | 10 | from config import config 11 | import dataset 12 | import network 13 | import misc_utils 14 | import visual_utils 15 | 16 | def inference(args): 17 | @jit.trace(symbolic=False) 18 | def val_func(): 19 | pred_boxes = net(net.inputs) 20 | return pred_boxes 21 | # model path 22 | saveDir = config.model_dir 23 | evalDir = config.eval_dir 24 | misc_utils.ensure_dir(evalDir) 25 | model_file = os.path.join(saveDir, 26 | 'epoch_{}.pkl'.format(args.resume_weights)) 27 | assert os.path.exists(model_file) 28 | # load model 29 | net = network.Network() 30 | net.eval() 31 | check_point = mge.load(model_file) 32 | net.load_state_dict(check_point['state_dict']) 33 | ori_image, image, im_info = get_data(args.img_path) 34 | net.inputs["image"].set_value(image.astype(np.float32)) 35 | net.inputs["im_info"].set_value(im_info) 36 | pred_boxes = val_func().numpy() 37 | num_tag = config.num_classes - 1 38 | target_shape = (pred_boxes.shape[0]//num_tag, 1) 39 | pred_tags = (np.arange(num_tag) + 1).reshape(-1,1) 40 | pred_tags = np.tile(pred_tags, target_shape).reshape(-1,1) 41 | # nms 42 | from set_nms_utils import cpu_nms 43 | keep = pred_boxes[:, -1] > args.thresh 44 | pred_boxes = pred_boxes[keep] 45 | pred_tags = pred_tags[keep] 46 | keep = cpu_nms(pred_boxes, 0.5) 47 | pred_boxes = pred_boxes[keep] 48 | pred_tags = pred_tags[keep] 49 | 50 | pred_tags = pred_tags.astype(np.int32).flatten() 51 | pred_tags_name = np.array(config.class_names)[pred_tags] 52 | visual_utils.draw_boxes(ori_image, pred_boxes[:, :-1], pred_boxes[:, -1], pred_tags_name) 53 | name = args.img_path.split('/')[-1].split('.')[-2] 54 | fpath = '/data/jupyter/{}.png'.format(name) 55 | cv2.imwrite(fpath, ori_image) 56 | 57 | def get_data(path): 58 | image = cv2.imread(path, cv2.IMREAD_COLOR) 59 | if config.eval_resize == False: 60 | resized_img, scale = image, 1 61 | else: 62 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 63 | image, config.eval_image_short_size, config.eval_image_max_size) 64 | 65 | original_height, original_width = image.shape[0:2] 66 | height, width = resized_img.shape[0:2] 67 | transposed_img = np.ascontiguousarray( 68 | resized_img.transpose(2, 0, 1)[None, :, :, :], 69 | dtype=np.float32) 70 | im_info = np.array([height, width, scale, original_height, original_width], 71 | dtype=np.float32)[None, :] 72 | return image, transposed_img, im_info 73 | 74 | def run_inference(): 75 | parser = argparse.ArgumentParser() 76 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 77 | parser.add_argument('--img_path', '-i', default=None, type=str) 78 | parser.add_argument('--thresh', '-t', default=0.05, type=float) 79 | args = parser.parse_args() 80 | inference(args) 81 | 82 | if __name__ == '__main__': 83 | run_inference() 84 | -------------------------------------------------------------------------------- /model/cascade_fpn/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | from multiprocessing import Process, Queue 5 | 6 | from tqdm import tqdm 7 | import numpy as np 8 | import megengine as mge 9 | from megengine import jit 10 | 11 | from config import config 12 | import network 13 | import dataset 14 | import misc_utils 15 | 16 | def eval_all(args): 17 | # model_path 18 | saveDir = config.model_dir 19 | evalDir = config.eval_dir 20 | misc_utils.ensure_dir(evalDir) 21 | model_file = os.path.join(saveDir, 22 | 'epoch_{}.pkl'.format(args.resume_weights)) 23 | assert os.path.exists(model_file) 24 | # load data 25 | records = misc_utils.load_json_lines(config.eval_source) 26 | # multiprocessing 27 | num_records = len(records) 28 | num_devs = args.devices 29 | num_image = math.ceil(num_records / num_devs) 30 | result_queue = Queue(1000) 31 | procs = [] 32 | all_results = [] 33 | for i in range(num_devs): 34 | start = i * num_image 35 | end = min(start + num_image, num_records) 36 | split_records = records[start:end] 37 | proc = Process(target=inference, args=( 38 | model_file, i, split_records, result_queue)) 39 | proc.start() 40 | procs.append(proc) 41 | pbar = tqdm(total=num_records, ncols=50) 42 | for i in range(num_records): 43 | t = result_queue.get() 44 | all_results.append(t) 45 | pbar.update(1) 46 | for p in procs: 47 | p.join() 48 | fpath = os.path.join(evalDir, 'dump-{}.json'.format(args.resume_weights)) 49 | misc_utils.save_json_lines(all_results, fpath) 50 | 51 | def inference(model_file, device, records, result_queue): 52 | @jit.trace(symbolic=False) 53 | def val_func(): 54 | pred_boxes = net(net.inputs) 55 | return pred_boxes 56 | net = network.Network() 57 | net.eval() 58 | check_point = mge.load(model_file) 59 | net.load_state_dict(check_point['state_dict']) 60 | for record in records: 61 | np.set_printoptions(precision=2, suppress=True) 62 | net.eval() 63 | image, gt_boxes, im_info, ID = get_data(record, device) 64 | net.inputs["image"].set_value(image.astype(np.float32)) 65 | net.inputs["im_info"].set_value(im_info) 66 | pred_boxes = val_func().numpy() 67 | num_tag = config.num_classes - 1 68 | target_shape = (pred_boxes.shape[0]//num_tag, 1) 69 | pred_tags = (np.arange(num_tag) + 1).reshape(-1,1) 70 | pred_tags = np.tile(pred_tags, target_shape).reshape(-1,1) 71 | # nms 72 | from set_nms_utils import cpu_nms 73 | keep = pred_boxes[:, -1] > 0.05 74 | pred_boxes = pred_boxes[keep] 75 | pred_tags = pred_tags[keep] 76 | keep = cpu_nms(pred_boxes, 0.5) 77 | pred_boxes = pred_boxes[keep] 78 | pred_tags = pred_tags[keep].flatten() 79 | result_dict = dict(ID=ID, height=int(im_info[0, -2]), width=int(im_info[0, -1]), 80 | dtboxes=boxes_dump(pred_boxes, pred_tags, False), 81 | gtboxes=boxes_dump(gt_boxes, None, True)) 82 | result_queue.put_nowait(result_dict) 83 | 84 | def boxes_dump(boxes, pred_tags, is_gt): 85 | result = [] 86 | boxes = boxes.tolist() 87 | for idx in range(len(boxes)): 88 | box = boxes[idx] 89 | if is_gt: 90 | box_dict = {} 91 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 92 | box_dict['tag'] = box[-1] 93 | else: 94 | box_dict = {} 95 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 96 | box_dict['tag'] = int(pred_tags[idx]) 97 | box_dict['score'] = box[-1] 98 | result.append(box_dict) 99 | return result 100 | 101 | def get_data(record, device): 102 | data = dataset.val_dataset(record) 103 | image, gt_boxes, ID = \ 104 | data['data'], data['boxes'], data['ID'] 105 | if config.eval_resize == False: 106 | resized_img, scale = image, 1 107 | else: 108 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 109 | image, config.eval_image_short_size, config.eval_image_max_size) 110 | 111 | original_height, original_width = image.shape[0:2] 112 | height, width = resized_img.shape[0:2] 113 | transposed_img = np.ascontiguousarray( 114 | resized_img.transpose(2, 0, 1)[None, :, :, :], 115 | dtype=np.float32) 116 | im_info = np.array([height, width, scale, original_height, original_width], 117 | dtype=np.float32)[None, :] 118 | return transposed_img, gt_boxes, im_info, ID 119 | 120 | def run_test(): 121 | parser = argparse.ArgumentParser() 122 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 123 | parser.add_argument('--devices', '-d', default=1, type=int) 124 | args = parser.parse_args() 125 | eval_all(args) 126 | 127 | if __name__ == '__main__': 128 | run_test() 129 | 130 | -------------------------------------------------------------------------------- /model/emd_refine/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = os.path.join( 11 | os.path.dirname(os.path.abspath(__file__)), 12 | "../../" 13 | ) 14 | add_path(os.path.join(root_dir)) 15 | add_path(os.path.join(root_dir, 'lib')) 16 | add_path(os.path.join(root_dir, 'utils')) 17 | 18 | class Crowd_human: 19 | class_names = ['background', 'person'] 20 | num_classes = len(class_names) 21 | root_folder = '/data/CrowdHuman' 22 | image_folder = '/data/CrowdHuman/images' 23 | train_source = os.path.join('/data/CrowdHuman/annotation_train.odgt') 24 | eval_source = os.path.join('/data/CrowdHuman/annotation_val.odgt') 25 | 26 | class Config: 27 | output_dir = 'outputs' 28 | if not os.path.isdir(output_dir): 29 | os.makedirs(output_dir) 30 | model_dir = os.path.join(output_dir, 'model_dump') 31 | eval_dir = os.path.join(output_dir, 'eval_dump') 32 | pretrain_weight = '/home/model/resnet50_fbaug_76254_4e14b7d1.pkl' 33 | 34 | # ----------data config---------- # 35 | image_mean = np.array([103.530, 116.280, 123.675]) 36 | image_std = np.array([57.375, 57.120, 58.395]) 37 | train_image_short_size = 800 38 | train_image_max_size = 1400 39 | eval_resize = True 40 | eval_image_short_size = 800 41 | eval_image_max_size = 1400 42 | seed_dataprovider = 3 43 | train_source = Crowd_human.train_source 44 | eval_source = Crowd_human.eval_source 45 | image_folder = Crowd_human.image_folder 46 | class_names = Crowd_human.class_names 47 | num_classes = Crowd_human.num_classes 48 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 49 | gt_boxes_name = 'fbox' 50 | 51 | backbone_freeze_at = 2 52 | rpn_channel = 256 53 | # ----------train config---------- # 54 | batch_per_gpu = 2 55 | basic_lr = 1e-3 * 1.25 56 | momentum = 0.9 57 | weight_decay = 1e-4 58 | 59 | warm_iters = 800 60 | max_epoch = 30 61 | lr_decay_rate = 0.1 62 | lr_decay_sates = [20, 26] 63 | nr_images_epoch = 15000 64 | 65 | #base_lr = 1e-3 * train_batch_per_gpu * 1.25 66 | log_dump_interval = 1 67 | 68 | # ----------test config---------- # 69 | test_cls_threshold = 0.05 70 | test_nms_version = 'original' 71 | test_max_boxes_per_image = 300 #200 72 | test_save_type = 'human' 73 | test_nms = 0.5 74 | test_vis_threshold = 0.3 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | ignore_label = -1 80 | max_boxes_of_image = 500 81 | 82 | # ----------rois generator config---------- # 83 | anchor_base_size = 16 84 | anchor_base_scale = 0.5 85 | #anchor_scales = np.array([2, 4, 8, 16, 32]) 86 | anchor_aspect_ratios = [1, 2, 3] 87 | #num_anchor_scales = len(anchor_scales) 88 | num_cell_anchors = len(anchor_aspect_ratios) 89 | anchor_within_border = False 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1000 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]).astype(np.float32) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /model/emd_refine/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | import megengine as mge 8 | from megengine import jit 9 | 10 | from config import config 11 | import dataset 12 | import network 13 | import misc_utils 14 | import visual_utils 15 | 16 | if_set_nms = True 17 | top_k = 2 18 | 19 | def inference(args): 20 | @jit.trace(symbolic=False) 21 | def val_func(): 22 | pred_boxes = net(net.inputs) 23 | return pred_boxes 24 | # model path 25 | model_file = args.resume_weights 26 | assert os.path.exists(model_file) 27 | # load model 28 | net = network.Network() 29 | net.eval() 30 | check_point = mge.load(model_file) 31 | net.load_state_dict(check_point['state_dict']) 32 | ori_image, image, im_info = get_data(args.img_path) 33 | net.inputs["image"].set_value(image.astype(np.float32)) 34 | net.inputs["im_info"].set_value(im_info) 35 | pred_boxes = val_func().numpy() 36 | num_tag = config.num_classes - 1 37 | target_shape = (pred_boxes.shape[0]//num_tag//top_k, top_k) 38 | pred_tags = (np.arange(num_tag) + 1).reshape(-1,1) 39 | pred_tags = np.tile(pred_tags, target_shape).reshape(-1,1) 40 | # nms 41 | if if_set_nms: 42 | from set_nms_utils import set_cpu_nms 43 | n = pred_boxes.shape[0] // top_k 44 | idents = np.tile(np.arange(n)[:,None], (1, top_k)).reshape(-1, 1) 45 | pred_boxes = np.hstack((pred_boxes, idents)) 46 | keep = pred_boxes[:, -2] > args.thresh 47 | pred_boxes = pred_boxes[keep] 48 | pred_tags = pred_tags[keep] 49 | keep = set_cpu_nms(pred_boxes, 0.5) 50 | pred_boxes = pred_boxes[keep][:, :-1] 51 | pred_tags = pred_tags[keep] 52 | else: 53 | from set_nms_utils import cpu_nms 54 | keep = pred_boxes[:, -1] > args.thresh 55 | pred_boxes = pred_boxes[keep] 56 | pred_tags = pred_tags[keep] 57 | keep = cpu_nms(pred_boxes, 0.5) 58 | pred_boxes = pred_boxes[keep] 59 | pred_tags = pred_tags[keep] 60 | pred_tags = pred_tags.astype(np.int32).flatten() 61 | pred_tags_name = np.array(config.class_names)[pred_tags] 62 | visual_utils.draw_boxes(ori_image, pred_boxes[:, :-1], pred_boxes[:, -1], pred_tags_name) 63 | name = args.img_path.split('/')[-1].split('.')[-2] 64 | fpath = 'result.jpg' 65 | cv2.imwrite(fpath, ori_image) 66 | 67 | def get_data(path): 68 | image = cv2.imread(path, cv2.IMREAD_COLOR) 69 | if config.eval_resize == False: 70 | resized_img, scale = image, 1 71 | else: 72 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 73 | image, config.eval_image_short_size, config.eval_image_max_size) 74 | 75 | original_height, original_width = image.shape[0:2] 76 | height, width = resized_img.shape[0:2] 77 | transposed_img = np.ascontiguousarray( 78 | resized_img.transpose(2, 0, 1)[None, :, :, :], 79 | dtype=np.float32) 80 | im_info = np.array([height, width, scale, original_height, original_width], 81 | dtype=np.float32)[None, :] 82 | return image, transposed_img, im_info 83 | 84 | def run_inference(): 85 | parser = argparse.ArgumentParser() 86 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 87 | parser.add_argument('--img_path', '-i', default=None, type=str) 88 | parser.add_argument('--thresh', '-t', default=0.3, type=float) 89 | args = parser.parse_args() 90 | inference(args) 91 | 92 | if __name__ == '__main__': 93 | run_inference() 94 | -------------------------------------------------------------------------------- /model/emd_refine/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | from multiprocessing import Process, Queue 5 | 6 | from tqdm import tqdm 7 | import numpy as np 8 | import megengine as mge 9 | from megengine import jit 10 | 11 | from config import config 12 | import network 13 | import dataset 14 | import misc_utils 15 | 16 | if_set_nms = True 17 | 18 | def eval_all(args): 19 | # model_path 20 | saveDir = config.model_dir 21 | evalDir = config.eval_dir 22 | misc_utils.ensure_dir(evalDir) 23 | model_file = os.path.join(saveDir, 24 | 'epoch_{}.pkl'.format(args.resume_weights)) 25 | assert os.path.exists(model_file) 26 | # load data 27 | records = misc_utils.load_json_lines(config.eval_source) 28 | # multiprocessing 29 | num_records = len(records) 30 | num_devs = args.devices 31 | num_image = math.ceil(num_records / num_devs) 32 | result_queue = Queue(1000) 33 | procs = [] 34 | all_results = [] 35 | for i in range(num_devs): 36 | start = i * num_image 37 | end = min(start + num_image, num_records) 38 | split_records = records[start:end] 39 | proc = Process(target=inference, args=( 40 | model_file, i, split_records, result_queue)) 41 | proc.start() 42 | procs.append(proc) 43 | pbar = tqdm(total=num_records, ncols=50) 44 | for i in range(num_records): 45 | t = result_queue.get() 46 | all_results.append(t) 47 | pbar.update(1) 48 | for p in procs: 49 | p.join() 50 | fpath = os.path.join(evalDir, 'dump-{}.json'.format(args.resume_weights)) 51 | misc_utils.save_json_lines(all_results, fpath) 52 | 53 | def inference(model_file, device, records, result_queue): 54 | @jit.trace(symbolic=False) 55 | def val_func(): 56 | pred_boxes = net(net.inputs) 57 | return pred_boxes 58 | net = network.Network() 59 | net.eval() 60 | check_point = mge.load(model_file) 61 | net.load_state_dict(check_point['state_dict']) 62 | for record in records: 63 | np.set_printoptions(precision=2, suppress=True) 64 | net.eval() 65 | image, gt_boxes, im_info, ID = get_data(record, device) 66 | net.inputs["image"].set_value(image.astype(np.float32)) 67 | net.inputs["im_info"].set_value(im_info) 68 | pred_boxes = val_func().numpy() 69 | # nms 70 | if if_set_nms: 71 | from set_nms_utils import set_cpu_nms 72 | n = pred_boxes.shape[0] // 2 73 | idents = np.tile(np.arange(n)[:,None], (1, 2)).reshape(-1, 1) 74 | pred_boxes = np.hstack((pred_boxes, idents)) 75 | keep = pred_boxes[:, -2] > 0.05 76 | pred_boxes = pred_boxes[keep] 77 | keep = set_cpu_nms(pred_boxes, 0.5) 78 | pred_boxes = pred_boxes[keep][:, :-1] 79 | else: 80 | from set_nms_utils import cpu_nms 81 | keep = pred_boxes[:, -1] > 0.05 82 | pred_boxes = pred_boxes[keep] 83 | keep = cpu_nms(pred_boxes, 0.5) 84 | pred_boxes = pred_boxes[keep] 85 | result_dict = dict(ID=ID, height=int(im_info[0, -2]), width=int(im_info[0, -1]), 86 | dtboxes=boxes_dump(pred_boxes, False), 87 | gtboxes=boxes_dump(gt_boxes, True)) 88 | result_queue.put_nowait(result_dict) 89 | 90 | def boxes_dump(boxes, is_gt): 91 | result = [] 92 | boxes = boxes.tolist() 93 | for box in boxes: 94 | if is_gt: 95 | box_dict = {} 96 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 97 | box_dict['tag'] = box[-1] 98 | else: 99 | box_dict = {} 100 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 101 | box_dict['tag'] = 1 102 | box_dict['score'] = box[-1] 103 | result.append(box_dict) 104 | return result 105 | 106 | def get_data(record, device): 107 | data = dataset.val_dataset(record) 108 | image, gt_boxes, ID = \ 109 | data['data'], data['boxes'], data['ID'] 110 | if config.eval_resize == False: 111 | resized_img, scale = image, 1 112 | else: 113 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 114 | image, config.eval_image_short_size, config.eval_image_max_size) 115 | 116 | original_height, original_width = image.shape[0:2] 117 | height, width = resized_img.shape[0:2] 118 | transposed_img = np.ascontiguousarray( 119 | resized_img.transpose(2, 0, 1)[None, :, :, :], 120 | dtype=np.float32) 121 | im_info = np.array([height, width, scale, original_height, original_width], 122 | dtype=np.float32)[None, :] 123 | return transposed_img, gt_boxes, im_info, ID 124 | 125 | def run_test(): 126 | parser = argparse.ArgumentParser() 127 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 128 | parser.add_argument('--devices', '-d', default=1, type=int) 129 | args = parser.parse_args() 130 | eval_all(args) 131 | 132 | if __name__ == '__main__': 133 | run_test() 134 | 135 | -------------------------------------------------------------------------------- /model/emd_simple/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = os.path.join( 11 | os.path.dirname(os.path.abspath(__file__)), 12 | "../../" 13 | ) 14 | add_path(os.path.join(root_dir)) 15 | add_path(os.path.join(root_dir, 'lib')) 16 | add_path(os.path.join(root_dir, 'utils')) 17 | 18 | class Crowd_human: 19 | class_names = ['background', 'person'] 20 | num_classes = len(class_names) 21 | root_folder = '/data/CrowdHuman' 22 | image_folder = '/data/CrowdHuman/images' 23 | train_source = os.path.join('/data/CrowdHuman/annotation_train.odgt') 24 | eval_source = os.path.join('/data/CrowdHuman/annotation_val.odgt') 25 | 26 | class Config: 27 | output_dir = 'outputs' 28 | if not os.path.isdir(output_dir): 29 | os.makedirs(output_dir) 30 | model_dir = os.path.join(output_dir, 'model_dump') 31 | eval_dir = os.path.join(output_dir, 'eval_dump') 32 | pretrain_weight = '/home/model/resnet50_fbaug_76254_4e14b7d1.pkl' 33 | 34 | # ----------data config---------- # 35 | image_mean = np.array([103.530, 116.280, 123.675]) 36 | image_std = np.array([57.375, 57.120, 58.395]) 37 | train_image_short_size = 800 38 | train_image_max_size = 1400 39 | eval_resize = True 40 | eval_image_short_size = 800 41 | eval_image_max_size = 1400 42 | seed_dataprovider = 3 43 | train_source = Crowd_human.train_source 44 | eval_source = Crowd_human.eval_source 45 | image_folder = Crowd_human.image_folder 46 | class_names = Crowd_human.class_names 47 | num_classes = Crowd_human.num_classes 48 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 49 | gt_boxes_name = 'fbox' 50 | 51 | backbone_freeze_at = 2 52 | rpn_channel = 256 53 | # ----------train config---------- # 54 | batch_per_gpu = 2 55 | basic_lr = 1e-3 * 1.25 56 | momentum = 0.9 57 | weight_decay = 1e-4 58 | 59 | warm_iters = 800 60 | max_epoch = 30 61 | lr_decay_rate = 0.1 62 | lr_decay_sates = [20, 26] 63 | nr_images_epoch = 15000 64 | 65 | #base_lr = 1e-3 * train_batch_per_gpu * 1.25 66 | log_dump_interval = 1 67 | 68 | # ----------test config---------- # 69 | test_cls_threshold = 0.05 70 | test_nms_version = 'original' 71 | test_max_boxes_per_image = 300 #200 72 | test_save_type = 'human' 73 | test_nms = 0.5 74 | test_vis_threshold = 0.3 75 | 76 | # ----------model config---------- # 77 | batch_filter_box_size = 0 78 | nr_box_dim = 5 79 | ignore_label = -1 80 | max_boxes_of_image = 500 81 | 82 | # ----------rois generator config---------- # 83 | anchor_base_size = 16 84 | anchor_base_scale = 0.5 85 | #anchor_scales = np.array([2, 4, 8, 16, 32]) 86 | anchor_aspect_ratios = [1, 2, 3] 87 | #num_anchor_scales = len(anchor_scales) 88 | num_cell_anchors = len(anchor_aspect_ratios) 89 | anchor_within_border = False 90 | 91 | rpn_min_box_size = 2 92 | rpn_nms_threshold = 0.7 93 | train_prev_nms_top_n = 12000 94 | train_post_nms_top_n = 2000 95 | test_prev_nms_top_n = 6000 96 | test_post_nms_top_n = 1000 97 | 98 | # ----------binding&training config---------- # 99 | rpn_smooth_l1_beta = 1 100 | rcnn_smooth_l1_beta = 1 101 | 102 | num_sample_anchors = 256 103 | positive_anchor_ratio = 0.5 104 | rpn_positive_overlap = 0.7 105 | rpn_negative_overlap = 0.3 106 | rpn_bbox_normalize_targets = False 107 | 108 | num_rois = 512 109 | fg_ratio = 0.5 110 | fg_threshold = 0.5 111 | bg_threshold_high = 0.5 112 | bg_threshold_low = 0.0 113 | rcnn_bbox_normalize_targets = True 114 | bbox_normalize_means = np.array([0, 0, 0, 0]).astype(np.float32) 115 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32) 116 | 117 | config = Config() 118 | 119 | -------------------------------------------------------------------------------- /model/emd_simple/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | import megengine as mge 8 | from megengine import jit 9 | 10 | from config import config 11 | import dataset 12 | import network 13 | import misc_utils 14 | import visual_utils 15 | 16 | if_set_nms = True 17 | top_k = 2 18 | 19 | def inference(args): 20 | @jit.trace(symbolic=False) 21 | def val_func(): 22 | pred_boxes = net(net.inputs) 23 | return pred_boxes 24 | # model path 25 | model_file = args.resume_weights 26 | assert os.path.exists(model_file) 27 | # load model 28 | net = network.Network() 29 | net.eval() 30 | check_point = mge.load(model_file) 31 | net.load_state_dict(check_point['state_dict']) 32 | ori_image, image, im_info = get_data(args.img_path) 33 | net.inputs["image"].set_value(image.astype(np.float32)) 34 | net.inputs["im_info"].set_value(im_info) 35 | pred_boxes = val_func().numpy() 36 | num_tag = config.num_classes - 1 37 | target_shape = (pred_boxes.shape[0]//num_tag//top_k, top_k) 38 | pred_tags = (np.arange(num_tag) + 1).reshape(-1,1) 39 | pred_tags = np.tile(pred_tags, target_shape).reshape(-1,1) 40 | # nms 41 | if if_set_nms: 42 | from set_nms_utils import set_cpu_nms 43 | n = pred_boxes.shape[0] // top_k 44 | idents = np.tile(np.arange(n)[:,None], (1, top_k)).reshape(-1, 1) 45 | pred_boxes = np.hstack((pred_boxes, idents)) 46 | keep = pred_boxes[:, -2] > args.thresh 47 | pred_boxes = pred_boxes[keep] 48 | pred_tags = pred_tags[keep] 49 | keep = set_cpu_nms(pred_boxes, 0.5) 50 | pred_boxes = pred_boxes[keep][:, :-1] 51 | pred_tags = pred_tags[keep] 52 | else: 53 | from set_nms_utils import cpu_nms 54 | keep = pred_boxes[:, -1] > args.thresh 55 | pred_boxes = pred_boxes[keep] 56 | pred_tags = pred_tags[keep] 57 | keep = cpu_nms(pred_boxes, 0.5) 58 | pred_boxes = pred_boxes[keep] 59 | pred_tags = pred_tags[keep] 60 | pred_tags = pred_tags.astype(np.int32).flatten() 61 | pred_tags_name = np.array(config.class_names)[pred_tags] 62 | visual_utils.draw_boxes(ori_image, pred_boxes[:, :-1], pred_boxes[:, -1], pred_tags_name) 63 | name = args.img_path.split('/')[-1].split('.')[-2] 64 | fpath = 'result.jpg' 65 | cv2.imwrite(fpath, ori_image) 66 | 67 | def get_data(path): 68 | image = cv2.imread(path, cv2.IMREAD_COLOR) 69 | if config.eval_resize == False: 70 | resized_img, scale = image, 1 71 | else: 72 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 73 | image, config.eval_image_short_size, config.eval_image_max_size) 74 | 75 | original_height, original_width = image.shape[0:2] 76 | height, width = resized_img.shape[0:2] 77 | transposed_img = np.ascontiguousarray( 78 | resized_img.transpose(2, 0, 1)[None, :, :, :], 79 | dtype=np.float32) 80 | im_info = np.array([height, width, scale, original_height, original_width], 81 | dtype=np.float32)[None, :] 82 | return image, transposed_img, im_info 83 | 84 | def boxes_dump(boxes, pred_tags): 85 | result = [] 86 | boxes = boxes.tolist() 87 | for idx in range(len(boxes)): 88 | box = boxes[idx] 89 | box_dict = {} 90 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 91 | box_dict['tag'] = int(pred_tags[idx]) 92 | box_dict['score'] = box[-1] 93 | result.append(box_dict) 94 | return result 95 | 96 | def run_inference(): 97 | parser = argparse.ArgumentParser() 98 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 99 | parser.add_argument('--img_path', '-i', default=None, type=str) 100 | parser.add_argument('--thresh', '-t', default=0.3, type=float) 101 | args = parser.parse_args() 102 | inference(args) 103 | 104 | if __name__ == '__main__': 105 | run_inference() 106 | -------------------------------------------------------------------------------- /model/fpn_baseline/config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import numpy as np 5 | 6 | def add_path(path): 7 | if path not in sys.path: 8 | sys.path.insert(0, path) 9 | 10 | root_dir = '../../' 11 | add_path(os.path.join(root_dir)) 12 | add_path(os.path.join(root_dir, 'lib')) 13 | add_path(os.path.join(root_dir, 'utils')) 14 | 15 | class Crowd_human: 16 | class_names = ['background', 'person'] 17 | num_classes = len(class_names) 18 | root_folder = '/data/CrowdHuman' 19 | image_folder = '/data/CrowdHuman/images' 20 | train_source = os.path.join('/data/CrowdHuman/annotation_train.odgt') 21 | eval_source = os.path.join('/data/CrowdHuman/annotation_val.odgt') 22 | 23 | class Config: 24 | output_dir = 'outputs' 25 | if not os.path.isdir(output_dir): 26 | os.makedirs(output_dir) 27 | model_dir = os.path.join(output_dir, 'model_dump') 28 | eval_dir = os.path.join(output_dir, 'eval_dump') 29 | pretrain_weight = '/home/model/resnet50_fbaug_76254_4e14b7d1.pkl' 30 | 31 | # ----------data config---------- # 32 | image_mean = np.array([103.530, 116.280, 123.675]) 33 | image_std = np.array([57.375, 57.120, 58.395]) 34 | train_image_short_size = 800 35 | train_image_max_size = 1400 36 | eval_resize = True 37 | eval_image_short_size = 800 38 | eval_image_max_size = 1400 39 | seed_dataprovider = 3 40 | train_source = Crowd_human.train_source 41 | eval_source = Crowd_human.eval_source 42 | image_folder = Crowd_human.image_folder 43 | class_names = Crowd_human.class_names 44 | num_classes = Crowd_human.num_classes 45 | class_names2id = dict(list(zip(class_names, list(range(num_classes))))) 46 | gt_boxes_name = 'fbox' 47 | 48 | backbone_freeze_at = 2 49 | rpn_channel = 256 50 | # ----------train config---------- # 51 | batch_per_gpu = 2 52 | basic_lr = 1e-3 * 1.25 53 | momentum = 0.9 54 | weight_decay = 1e-4 55 | 56 | warm_iters = 800 57 | max_epoch = 30 58 | lr_decay_rate = 0.1 59 | lr_decay_sates = [20, 26] 60 | nr_images_epoch = 15000 61 | 62 | #base_lr = 1e-3 * train_batch_per_gpu * 1.25 63 | log_dump_interval = 1 64 | 65 | # ----------test config---------- # 66 | test_cls_threshold = 0.05 67 | test_nms_version = 'original' 68 | test_max_boxes_per_image = 300 #200 69 | test_save_type = 'human' 70 | test_nms = 0.5 71 | test_vis_threshold = 0.3 72 | 73 | # ----------model config---------- # 74 | batch_filter_box_size = 0 75 | nr_box_dim = 5 76 | ignore_label = -1 77 | max_boxes_of_image = 500 78 | 79 | # ----------rois generator config---------- # 80 | anchor_base_size = 16 81 | anchor_base_scale = 0.5 82 | #anchor_scales = np.array([2, 4, 8, 16, 32]) 83 | anchor_aspect_ratios = [1, 2, 3] 84 | #num_anchor_scales = len(anchor_scales) 85 | num_cell_anchors = len(anchor_aspect_ratios) 86 | anchor_within_border = False 87 | 88 | rpn_min_box_size = 2 89 | rpn_nms_threshold = 0.7 90 | train_prev_nms_top_n = 12000 91 | train_post_nms_top_n = 2000 92 | test_prev_nms_top_n = 6000 93 | test_post_nms_top_n = 1000 94 | 95 | # ----------binding&training config---------- # 96 | rpn_smooth_l1_beta = 1 97 | rcnn_smooth_l1_beta = 1 98 | 99 | num_sample_anchors = 256 100 | positive_anchor_ratio = 0.5 101 | rpn_positive_overlap = 0.7 102 | rpn_negative_overlap = 0.3 103 | rpn_bbox_normalize_targets = False 104 | 105 | num_rois = 512 106 | fg_ratio = 0.5 107 | fg_threshold = 0.5 108 | bg_threshold_high = 0.5 109 | bg_threshold_low = 0.0 110 | rcnn_bbox_normalize_targets = True 111 | bbox_normalize_means = np.array([0, 0, 0, 0]).astype(np.float32) 112 | bbox_normalize_stds = np.array([0.1, 0.1, 0.2, 0.2]).astype(np.float32) 113 | 114 | config = Config() 115 | 116 | -------------------------------------------------------------------------------- /model/fpn_baseline/inference.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | 5 | import cv2 6 | import numpy as np 7 | import megengine as mge 8 | from megengine import jit 9 | 10 | from config import config 11 | import dataset 12 | import network 13 | import misc_utils 14 | import visual_utils 15 | 16 | def inference(args): 17 | @jit.trace(symbolic=False) 18 | def val_func(): 19 | pred_boxes = net(net.inputs) 20 | return pred_boxes 21 | # model path 22 | saveDir = config.model_dir 23 | evalDir = config.eval_dir 24 | misc_utils.ensure_dir(evalDir) 25 | model_file = os.path.join(saveDir, 26 | 'epoch_{}.pkl'.format(args.resume_weights)) 27 | assert os.path.exists(model_file) 28 | # load model 29 | net = network.Network() 30 | net.eval() 31 | check_point = mge.load(model_file) 32 | net.load_state_dict(check_point['state_dict']) 33 | ori_image, image, im_info = get_data(args.img_path) 34 | net.inputs["image"].set_value(image.astype(np.float32)) 35 | net.inputs["im_info"].set_value(im_info) 36 | pred_boxes = val_func().numpy() 37 | num_tag = config.num_classes - 1 38 | target_shape = (pred_boxes.shape[0]//num_tag, 1) 39 | pred_tags = (np.arange(num_tag) + 1).reshape(-1,1) 40 | pred_tags = np.tile(pred_tags, target_shape).reshape(-1,1) 41 | # nms 42 | from set_nms_utils import cpu_nms 43 | keep = pred_boxes[:, -1] > args.thresh 44 | pred_boxes = pred_boxes[keep] 45 | pred_tags = pred_tags[keep] 46 | keep = cpu_nms(pred_boxes, 0.5) 47 | pred_boxes = pred_boxes[keep] 48 | pred_tags = pred_tags[keep] 49 | 50 | pred_tags = pred_tags.astype(np.int32).flatten() 51 | pred_tags_name = np.array(config.class_names)[pred_tags] 52 | visual_utils.draw_boxes(ori_image, pred_boxes[:, :-1], pred_boxes[:, -1], pred_tags_name) 53 | name = args.img_path.split('/')[-1].split('.')[-2] 54 | fpath = '/data/jupyter/{}.png'.format(name) 55 | cv2.imwrite(fpath, ori_image) 56 | 57 | def get_data(path): 58 | image = cv2.imread(path, cv2.IMREAD_COLOR) 59 | if config.eval_resize == False: 60 | resized_img, scale = image, 1 61 | else: 62 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 63 | image, config.eval_image_short_size, config.eval_image_max_size) 64 | 65 | original_height, original_width = image.shape[0:2] 66 | height, width = resized_img.shape[0:2] 67 | transposed_img = np.ascontiguousarray( 68 | resized_img.transpose(2, 0, 1)[None, :, :, :], 69 | dtype=np.float32) 70 | im_info = np.array([height, width, scale, original_height, original_width], 71 | dtype=np.float32)[None, :] 72 | return image, transposed_img, im_info 73 | 74 | def run_inference(): 75 | parser = argparse.ArgumentParser() 76 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 77 | parser.add_argument('--img_path', '-i', default=None, type=str) 78 | parser.add_argument('--thresh', '-t', default=0.05, type=float) 79 | args = parser.parse_args() 80 | inference(args) 81 | 82 | if __name__ == '__main__': 83 | run_inference() 84 | -------------------------------------------------------------------------------- /model/fpn_baseline/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import argparse 4 | from multiprocessing import Process, Queue 5 | 6 | from tqdm import tqdm 7 | import numpy as np 8 | import megengine as mge 9 | from megengine import jit 10 | 11 | from config import config 12 | import network 13 | import dataset 14 | import misc_utils 15 | 16 | def eval_all(args): 17 | # model_path 18 | saveDir = config.model_dir 19 | evalDir = config.eval_dir 20 | misc_utils.ensure_dir(evalDir) 21 | model_file = os.path.join(saveDir, 22 | 'epoch_{}.pkl'.format(args.resume_weights)) 23 | assert os.path.exists(model_file) 24 | # load data 25 | records = misc_utils.load_json_lines(config.eval_source) 26 | # multiprocessing 27 | num_records = len(records) 28 | num_devs = args.devices 29 | num_image = math.ceil(num_records / num_devs) 30 | result_queue = Queue(1000) 31 | procs = [] 32 | all_results = [] 33 | for i in range(num_devs): 34 | start = i * num_image 35 | end = min(start + num_image, num_records) 36 | split_records = records[start:end] 37 | proc = Process(target=inference, args=( 38 | model_file, i, split_records, result_queue)) 39 | proc.start() 40 | procs.append(proc) 41 | pbar = tqdm(total=num_records, ncols=50) 42 | for i in range(num_records): 43 | t = result_queue.get() 44 | all_results.append(t) 45 | pbar.update(1) 46 | for p in procs: 47 | p.join() 48 | fpath = os.path.join(evalDir, 'dump-{}.json'.format(args.resume_weights)) 49 | misc_utils.save_json_lines(all_results, fpath) 50 | 51 | def inference(model_file, device, records, result_queue): 52 | @jit.trace(symbolic=False) 53 | def val_func(): 54 | pred_boxes = net(net.inputs) 55 | return pred_boxes 56 | net = network.Network() 57 | net.eval() 58 | check_point = mge.load(model_file) 59 | net.load_state_dict(check_point['state_dict']) 60 | for record in records: 61 | np.set_printoptions(precision=2, suppress=True) 62 | net.eval() 63 | image, gt_boxes, im_info, ID = get_data(record, device) 64 | net.inputs["image"].set_value(image.astype(np.float32)) 65 | net.inputs["im_info"].set_value(im_info) 66 | pred_boxes = val_func().numpy() 67 | num_tag = config.num_classes - 1 68 | target_shape = (pred_boxes.shape[0]//num_tag, 1) 69 | pred_tags = (np.arange(num_tag) + 1).reshape(-1,1) 70 | pred_tags = np.tile(pred_tags, target_shape).reshape(-1,1) 71 | # nms 72 | from set_nms_utils import cpu_nms 73 | keep = pred_boxes[:, -1] > 0.05 74 | pred_boxes = pred_boxes[keep] 75 | pred_tags = pred_tags[keep] 76 | keep = cpu_nms(pred_boxes, 0.5) 77 | pred_boxes = pred_boxes[keep] 78 | pred_tags = pred_tags[keep].flatten() 79 | result_dict = dict(ID=ID, height=int(im_info[0, -2]), width=int(im_info[0, -1]), 80 | dtboxes=boxes_dump(pred_boxes, pred_tags, False), 81 | gtboxes=boxes_dump(gt_boxes, None, True)) 82 | result_queue.put_nowait(result_dict) 83 | 84 | def boxes_dump(boxes, pred_tags, is_gt): 85 | result = [] 86 | boxes = boxes.tolist() 87 | for idx in range(len(boxes)): 88 | box = boxes[idx] 89 | if is_gt: 90 | box_dict = {} 91 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 92 | box_dict['tag'] = box[-1] 93 | else: 94 | box_dict = {} 95 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 96 | box_dict['tag'] = int(pred_tags[idx]) 97 | box_dict['score'] = box[-1] 98 | result.append(box_dict) 99 | return result 100 | 101 | def get_data(record, device): 102 | data = dataset.val_dataset(record) 103 | image, gt_boxes, ID = \ 104 | data['data'], data['boxes'], data['ID'] 105 | if config.eval_resize == False: 106 | resized_img, scale = image, 1 107 | else: 108 | resized_img, scale = dataset.resize_img_by_short_and_max_size( 109 | image, config.eval_image_short_size, config.eval_image_max_size) 110 | 111 | original_height, original_width = image.shape[0:2] 112 | height, width = resized_img.shape[0:2] 113 | transposed_img = np.ascontiguousarray( 114 | resized_img.transpose(2, 0, 1)[None, :, :, :], 115 | dtype=np.float32) 116 | im_info = np.array([height, width, scale, original_height, original_width], 117 | dtype=np.float32)[None, :] 118 | return transposed_img, gt_boxes, im_info, ID 119 | 120 | def run_test(): 121 | parser = argparse.ArgumentParser() 122 | parser.add_argument('--resume_weights', '-r', default=None, type=str) 123 | parser.add_argument('--devices', '-d', default=1, type=int) 124 | args = parser.parse_args() 125 | eval_all(args) 126 | 127 | if __name__ == '__main__': 128 | run_test() 129 | 130 | -------------------------------------------------------------------------------- /tools/visulize_json.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import cv2 4 | import argparse 5 | 6 | sys.path.insert(0, '../lib') 7 | from utils import misc_utils, visual_utils 8 | 9 | img_root = '/data/CrowdHuman/images/' 10 | def eval_all(args): 11 | # json file 12 | assert os.path.exists(args.json_file), "Wrong json path!" 13 | misc_utils.ensure_dir('outputs') 14 | records = misc_utils.load_json_lines(args.json_file)[:args.number] 15 | for record in records: 16 | dtboxes = misc_utils.load_bboxes( 17 | record, key_name='dtboxes', key_box='box', key_score='score', key_tag='tag') 18 | gtboxes = misc_utils.load_bboxes(record, 'gtboxes', 'box') 19 | dtboxes = misc_utils.xywh_to_xyxy(dtboxes) 20 | gtboxes = misc_utils.xywh_to_xyxy(gtboxes) 21 | keep = dtboxes[:, -2] > args.visual_thresh 22 | dtboxes = dtboxes[keep] 23 | len_dt = len(dtboxes) 24 | len_gt = len(gtboxes) 25 | line = "{}: dt:{}, gt:{}.".format(record['ID'], len_dt, len_gt) 26 | print(line) 27 | img_path = img_root + record['ID'] + '.png' 28 | img = misc_utils.load_img(img_path) 29 | visual_utils.draw_boxes(img, dtboxes, line_thick=1, line_color='blue') 30 | visual_utils.draw_boxes(img, gtboxes, line_thick=1, line_color='white') 31 | fpath = 'outputs/{}.png'.format(record['ID']) 32 | cv2.imwrite(fpath, img) 33 | 34 | 35 | def run_eval(): 36 | parser = argparse.ArgumentParser() 37 | parser.add_argument('--json_file', '-f', default=None, required=True, type=str) 38 | parser.add_argument('--number', '-n', default=3, type=int) 39 | parser.add_argument('--visual_thresh', '-v', default=0.3, type=int) 40 | args = parser.parse_args() 41 | eval_all(args) 42 | 43 | if __name__ == '__main__': 44 | run_eval() 45 | -------------------------------------------------------------------------------- /utils/detToolkits/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.swp 3 | __pycache__ 4 | tmp 5 | -------------------------------------------------------------------------------- /utils/detToolkits/detools/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf8 -*- 2 | 3 | __author__ = 'jyn' 4 | __email__ = 'jyn@megvii.com' 5 | 6 | from .box import * 7 | from .image import * 8 | from .database import * 9 | from .evaluator import * 10 | -------------------------------------------------------------------------------- /utils/detToolkits/eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | import argparse 3 | from detools import * 4 | import pdb 5 | def nmsRule(a, b, nmsThres=0.65): 6 | return a.iomin(b) > nmsThres 7 | 8 | def filterRule(a, scoreThres=0.0): 9 | return a.score > scoreThres 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--dt', dest='dt', nargs='+', required=True, \ 14 | help='the fpaths to *.oddet (detection results)') 15 | parser.add_argument('--showThr', dest='showThr', nargs='+', default=[None], type=float, \ 16 | help='the thresholds for drawing detBoxes') 17 | parser.add_argument('--filterThr', dest='filterThr', nargs='+', default=[None], type=float, \ 18 | help='the thresholds for filtering detBoxes') 19 | parser.add_argument('--gt', dest='gt', required=True, \ 20 | help='the fpath to *.odgt (groundtruth)') 21 | parser.add_argument('--show', dest='show', default='all', \ 22 | help='image show options [all, fp, miss] or watchlist path') 23 | parser.add_argument('--axis', dest='axis', choices=[0,1], default=1, type=int, \ 24 | help='concate axis for multiple dts') 25 | parser.add_argument('--maxsize', dest='maxsize', default=640, type=int, \ 26 | help='maxsize for the show images') 27 | parser.add_argument('--which', dest='which', default=0, type=int, \ 28 | help='show detBoxes in which dt ranking') 29 | parser.add_argument('--nms', dest='nms', action='store_true', default=False, \ 30 | help='do nms or not') 31 | parser.add_argument('--shuffle', dest='shuffle', action='store_true', default=False, \ 32 | help='do shuffle or not for show results') 33 | args = parser.parse_args() 34 | gtpath, dtpaths = args.gt, args.dt 35 | doNMS, filterThres, showThres = args.nms, args.filterThr, args.showThr 36 | which, showopt, doShuffle = args.which, args.show, args.shuffle 37 | axis, maxsize = args.axis, args.maxsize 38 | 39 | if len(showThres) == 1: 40 | showThres = [showThres[0]] * len(dtpaths) 41 | else: 42 | assert len(showThres) == len(dtpaths), 'Num of showThrs must be equal to the num of dts.' 43 | 44 | if len(filterThres) == 1: 45 | filterThres = [filterThres[0]] * len(dtpaths) 46 | else: 47 | assert len(filterThres) == len(dtpaths), 'Num of filterThrs must be equal to the num of dts.' 48 | 49 | DBs = list() 50 | for (dtpath, showThr, filterThr) in zip(dtpaths, showThres, filterThres): 51 | dbName = dtpath.split('/')[-1] 52 | print('Loading {}...'.format(dbName)) 53 | db = EvalDB(dbName, gtpath, dtpath, showThr) 54 | if filterThr is not None: 55 | print('Doing filtering for {}...'.format(dbName)) 56 | db.filterBoxes('dt', filterRule, filterThr) 57 | if doNMS: 58 | print('Doing nms for {}...'.format(dbName)) 59 | db.doNMS(nmsRule) 60 | DBs.append(db) 61 | 62 | print('Evaluating...') 63 | evaluator = Evaluator(DBs) 64 | evaluator.eval_MR() 65 | evaluator.eval_AP() 66 | showlist = evaluator.getShowlist(DBs[which].dbName, option=showopt) 67 | 68 | # displayer = Displayer(DBs) 69 | # displayer.show(showlist, doShuffle, axis, maxsize) 70 | 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /utils/detToolkits/eval_csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | from detools import * 5 | 6 | def nmsRule(a, b, nmsThres=0.3): 7 | return a.iomin(b) > nmsThres 8 | 9 | def filterRule(a, scoreThres=0.0): 10 | return a.score > scoreThres 11 | 12 | def main(): 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('--dt', dest='dt', nargs='+', required=True, \ 15 | help='the fpaths to *.oddet (detection results)') 16 | parser.add_argument('--showThr', dest='showThr', nargs='+', default=[None], type=float, \ 17 | help='the thresholds for drawing detBoxes') 18 | parser.add_argument('--filterThr', dest='filterThr', nargs='+', default=[None], type=float, \ 19 | help='the thresholds for filtering detBoxes') 20 | parser.add_argument('--gt', dest='gt', required=True, \ 21 | help='the fpath to *.odgt (groundtruth)') 22 | parser.add_argument('--imgroot', dest='imgroot', default='', \ 23 | help='the path of the root dir of images') 24 | parser.add_argument('--show', dest='show', default='all', \ 25 | help='image show options [all, fp, miss] or watchlist path') 26 | parser.add_argument('--axis', dest='axis', choices=[0,1], default=1, type=int, \ 27 | help='concate axis for multiple dts') 28 | parser.add_argument('--maxsize', dest='maxsize', default=640, type=int, \ 29 | help='maxsize for the show images') 30 | parser.add_argument('--which', dest='which', default=0, type=int, \ 31 | help='show detBoxes in which dt ranking') 32 | parser.add_argument('--nms', dest='nms', action='store_true', default=False, \ 33 | help='do nms or not') 34 | parser.add_argument('--csv', dest='csv', default=None, \ 35 | help='output csv file') 36 | parser.add_argument('--shuffle', dest='shuffle', action='store_true', default=False, \ 37 | help='do shuffle or not for show results') 38 | args = parser.parse_args() 39 | gtpath, dtpaths, imgroot = args.gt, args.dt, args.imgroot 40 | doNMS, filterThres, showThres = args.nms, args.filterThr, args.showThr 41 | which, showopt, doShuffle = args.which, args.show, args.shuffle 42 | axis, maxsize = args.axis, args.maxsize 43 | 44 | if len(showThres) == 1: 45 | showThres = [showThres[0]] * len(dtpaths) 46 | else: 47 | assert len(showThres) == len(dtpaths), 'Num of showThrs must be equal to the num of dts.' 48 | 49 | if len(filterThres) == 1: 50 | filterThres = [filterThres[0]] * len(dtpaths) 51 | else: 52 | assert len(filterThres) == len(dtpaths), 'Num of filterThrs must be equal to the num of dts.' 53 | 54 | DBs = list() 55 | 56 | for (dtpath, showThr, filterThr) in zip(dtpaths, showThres, filterThres): 57 | dbName = dtpath.split('/')[-1] 58 | print('Loading {}...'.format(dbName)) 59 | db = EvalDB(dbName, imgroot, gtpath, dtpath, showThr) 60 | if filterThr is not None: 61 | print('Doing filtering for {}...'.format(dbName)) 62 | db.filterBoxes('dt', filterRule, filterThr) 63 | if doNMS: 64 | print('Doing nms for {}...'.format(dbName)) 65 | db.doNMS(nmsRule) 66 | print(db) 67 | DBs.append(db) 68 | 69 | print('Evaluating...') 70 | evaluator = Evaluator(DBs) 71 | # evaluator.eval_AP_detail(iouThres=0.5, csvname=args.csv) 72 | evaluator.eval_AP_detail(iouThres=0.5) 73 | evaluator.eval_MR(iouThres=0.5) 74 | # print("AP is {}, MR is {}".format(AP,MR)) 75 | if __name__ == '__main__': 76 | main() 77 | -------------------------------------------------------------------------------- /utils/draw.py: -------------------------------------------------------------------------------- 1 | #coding:utf-8 2 | import numpy as np 3 | import cv2 4 | from typing import Tuple 5 | from box import Box 6 | import pdb 7 | class Color: 8 | Red = np.array([0, 0, 255]) 9 | Green = np.array([0, 255, 0]) 10 | Blue = np.array([255, 0, 0]) 11 | White = np.array([255, 255, 255]) 12 | Black = np.array([0, 0, 0]) 13 | Gray = np.array([128, 128, 128]) 14 | Yellow = np.array([255, 215, 0]) 15 | Brown = np.array([128, 42, 42]) 16 | 17 | def draw_box(box:Box, target:np.ndarray, color, line_width:int=1): 18 | assert line_width > 0 19 | width_outer = line_width // 2 20 | assert len(target.shape) == 2 or len(target.shape) == 3 21 | target = target.reshape([target.shape[0], target.shape[1], -1]) 22 | 23 | width = target.shape[1] 24 | height = target.shape[0] 25 | 26 | box_outer = Box(box).expend_by(width_outer) 27 | 28 | if box_outer.valid: 29 | # top line 30 | h_start = max(0, box_outer[1]) 31 | h_end = min(height, box_outer[1] + line_width, box_outer[3]) 32 | w_start = max(0, box_outer[0]) 33 | w_end = min(width, box_outer[2]) 34 | if h_start < h_end and w_start < w_end: 35 | target[h_start:h_end, w_start:w_end, :] = color 36 | 37 | # bottom line 38 | h_start = max(box_outer[1], box_outer[3] - line_width, 0) 39 | h_end = min(height, box_outer[3]) 40 | if h_start < h_end: 41 | target[h_start:h_end, w_start:w_end, :] = color 42 | 43 | # left line 44 | h_start = max(0, box_outer[1]) 45 | h_end = min(height, box_outer[3]) 46 | w_start = max(0, box_outer[0]) 47 | w_end = min(width, box_outer[0] + line_width, box_outer[2]) 48 | if h_start < h_end and w_start < w_end: 49 | target[h_start:h_end, w_start:w_end, :] = color 50 | 51 | # right line 52 | w_start = max(box_outer[0], box_outer[2] - line_width, 0) 53 | w_end = min(width, box_outer[2]) 54 | if w_start < w_end: 55 | target[h_start:h_end, w_start:w_end, :] = color 56 | 57 | 58 | 59 | def fill_box(box:Box, target:np.ndarray, color): 60 | assert len(target.shape) == 2 or len(target.shape) == 3 61 | target = target.reshape([target.shape[0], target.shape[1], -1]) 62 | 63 | width = target.shape[1] 64 | height = target.shape[0] 65 | 66 | box = Box(box).intersect(Box([0, 0, width, height])) 67 | if box.valid: 68 | target[box[1]:box[3], box[0]:box[2], :] = color 69 | 70 | def draw_xt(xt:np.ndarray,image:np.ndarray,color:Color,line_width:int): 71 | assert image is not None 72 | xt = np.int32(np.round(xt[:,:4])) 73 | nr_xt = xt.shape[0] 74 | for i in range(nr_xt): 75 | box = Box(xt[i,:]) 76 | draw_box(box,image,color,line_width) 77 | def fill_poly(points:np.ndarray, target:np.ndarray, color): 78 | points = np.int32(np.round(np.array(points))) 79 | assert len(points.shape) == 2 80 | assert points.shape[1] == 2 81 | 82 | assert len(target.shape) == 2 or len(target.shape) == 3 83 | target = target.reshape([target.shape[0], target.shape[1], -1]) 84 | 85 | height = target.shape[0] 86 | width = target.shape[1] 87 | 88 | n = points.shape[0] 89 | p1 = np.hstack([points, np.ones([n, 1], dtype=np.int32)]) 90 | p2 = np.vstack([points[1:, :], points[:1, :]]) 91 | p2 = np.hstack([p2, np.ones([n, 1], dtype=np.int32)]) 92 | edges = np.cross(p1, p2) 93 | 94 | tight_box = Box.create_tight_box(points).intersect(Box([0, 0, width, height])) 95 | iy, ix = np.mgrid[tight_box[1]:tight_box[3], tight_box[0]:tight_box[2]] 96 | iy = iy.reshape([-1, 1]) 97 | ix = ix.reshape([-1, 1]) 98 | inner_points = np.hstack([ix, iy, np.ones([iy.size, 1], dtype=np.int32)]) 99 | r = np.dot(inner_points, edges.T) 100 | r = (r <= 0).all(axis=1) 101 | if r.size > 0: 102 | inner_points = inner_points[r, :] 103 | target[inner_points[:, 1], inner_points[:, 0], :] = color 104 | 105 | 106 | # unit test 107 | def _unit_test(): 108 | import cv2 as cv 109 | 110 | b = Box([-10, 400, 300, 600]) 111 | img = np.zeros([1000, 1000, 3]) 112 | 113 | draw_box(b, img, Color.Green, 2) 114 | fill_box(b, img, Color.Red) 115 | 116 | #fill_poly(b.vertices, img, Color.Blue) 117 | #ss = np.int64(b.get_rotated_vertices(45)) 118 | #print(ss) 119 | #fill_poly(ss, img, Color.Blue) 120 | 121 | cv.imshow("test", img) 122 | cv.waitKey(0) 123 | 124 | if __name__ == '__main__': 125 | _unit_test() 126 | 127 | -------------------------------------------------------------------------------- /utils/misc_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | def load_img(image_path): 6 | import cv2 7 | img = cv2.imread(image_path, cv2.IMREAD_COLOR) 8 | return img 9 | 10 | def load_json_lines(fpath): 11 | assert os.path.exists(fpath) 12 | with open(fpath,'r') as fid: 13 | lines = fid.readlines() 14 | records = [json.loads(line.strip('\n')) for line in lines] 15 | return records 16 | 17 | def save_json_lines(content,fpath): 18 | with open(fpath,'w') as fid: 19 | for db in content: 20 | line = json.dumps(db)+'\n' 21 | fid.write(line) 22 | 23 | def ensure_dir(dirpath): 24 | if not os.path.exists(dirpath): 25 | os.makedirs(dirpath) 26 | 27 | def xyxy_to_xywh(boxes): 28 | assert boxes.shape[1]>=4 29 | boxes[:, 2:4] -= boxes[:,:2] 30 | return boxes 31 | 32 | def xywh_to_xyxy(boxes): 33 | assert boxes.shape[1]>=4 34 | boxes[:, 2:4] += boxes[:,:2] 35 | return boxes 36 | 37 | def load_bboxes(dict_input, key_name, key_box, key_score=None, key_tag=None): 38 | assert key_name in dict_input 39 | if len(dict_input[key_name]) < 1: 40 | return np.empty([0, 5]) 41 | else: 42 | assert key_box in dict_input[key_name][0] 43 | if key_score: 44 | assert key_score in dict_input[key_name][0] 45 | if key_tag: 46 | assert key_tag in dict_input[key_name][0] 47 | if key_score: 48 | if key_tag: 49 | bboxes = np.vstack([np.hstack((rb[key_box], rb[key_score], rb[key_tag])) for rb in dict_input[key_name]]) 50 | else: 51 | bboxes = np.vstack([np.hstack((rb[key_box], rb[key_score])) for rb in dict_input[key_name]]) 52 | else: 53 | if key_tag: 54 | bboxes = np.vstack([np.hstack((rb[key_box], rb[key_tag])) for rb in dict_input[key_name]]) 55 | else: 56 | bboxes = np.vstack([rb[key_box] for rb in dict_input[key_name]]) 57 | return bboxes 58 | 59 | def load_masks(dict_input, key_name, key_box): 60 | assert key_name in dict_input 61 | if len(dict_input[key_name]) < 1: 62 | return np.empty([0, 28, 28]) 63 | else: 64 | assert key_box in dict_input[key_name][0] 65 | masks = np.array([rb[key_box] for rb in dict_input[key_name]]) 66 | return masks 67 | 68 | def load_gt(dict_input, key_name, key_box, class_names): 69 | assert key_name in dict_input 70 | if len(dict_input[key_name]) < 1: 71 | return np.empty([0, 5]) 72 | else: 73 | assert key_box in dict_input[key_name][0] 74 | bbox = [] 75 | for rb in dict_input[key_name]: 76 | if rb['tag'] in class_names: 77 | tag = class_names.index(rb['tag']) 78 | else: 79 | tag = -1 80 | if 'extra' in rb: 81 | if 'ignore' in rb['extra']: 82 | if rb['extra']['ignore'] != 0: 83 | tag = -1 84 | bbox.append(np.hstack((rb[key_box], tag))) 85 | bboxes = np.vstack(bbox).astype(np.float64) 86 | return bboxes 87 | 88 | def boxes_dump(boxes, is_gt): 89 | result = [] 90 | boxes = boxes.tolist() 91 | for box in boxes: 92 | if is_gt: 93 | box_dict = {} 94 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 95 | box_dict['tag'] = box[-1] 96 | result.append(box_dict) 97 | else: 98 | box_dict = {} 99 | box_dict['box'] = [box[0], box[1], box[2]-box[0], box[3]-box[1]] 100 | box_dict['tag'] = 1 101 | box_dict['score'] = box[-1] 102 | result.append(box_dict) 103 | return result 104 | 105 | def clip_boundary(boxes,height,width): 106 | assert boxes.shape[-1]>=4 107 | boxes[:,0] = np.minimum(np.maximum(boxes[:,0],0), width - 1) 108 | boxes[:,1] = np.minimum(np.maximum(boxes[:,1],0), height - 1) 109 | boxes[:,2] = np.maximum(np.minimum(boxes[:,2],width), 0) 110 | boxes[:,3] = np.maximum(np.minimum(boxes[:,3],height), 0) 111 | return boxes 112 | 113 | -------------------------------------------------------------------------------- /utils/nms/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | # Created Time: 2017-03-17 14:59:15 4 | # Modified Time: 2017-03-17 14:59:18 5 | 6 | -------------------------------------------------------------------------------- /utils/nms/nms/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf8 -*- 2 | # File : __init__.py 3 | # Author : Jiayuan Mao 4 | # Email : mjy@megvii.com 5 | # Date : 8/12/16 13:00 6 | # 7 | # This file is part of NeuArtist2 8 | 9 | -------------------------------------------------------------------------------- /utils/nms/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /utils/nms/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /utils/nms/nms_opr.py: -------------------------------------------------------------------------------- 1 | # This file implements the nms operator used in network 2 | import megbrain as mgb 3 | from megbrain.craniotome import CraniotomeBase 4 | from megbrain.craniotome import make_opr 5 | from megskull.opr.all import MGBOprForwarderBase, SingleCNOperatorNodeBase, NonTrainableMLPOperatorNodeBase 6 | import numpy as np 7 | from IPython import embed 8 | 9 | from meg_kernels.lib_nms.gpu_nms import gpu_nms 10 | 11 | class NMSKeepCran(CraniotomeBase): 12 | __nr_inputs__ = 1 13 | __nr_outputs__= 1 14 | __is_dynamic_output_shape__ = True 15 | 16 | def setup(self, iou_threshold): 17 | self._iou_threshold = iou_threshold 18 | 19 | def execute(self, inputs, outputs): 20 | """ inputs: list of (x0, y0, x1, y1, score)""" 21 | in_ = inputs[0].get_value() 22 | keep = gpu_nms(in_, thresh=self._iou_threshold) 23 | outputs[0].set_value(keep) 24 | 25 | def grad(self, wrt_idx, inputs, outputs, out_grad): 26 | return 0 27 | 28 | def init_output_dtype(self, input_dtypes): 29 | return [np.int32] 30 | 31 | class NMSKeep(NonTrainableMLPOperatorNodeBase): 32 | 33 | def __init__(self, name, box, iou_threshold): 34 | super().__init__(name, box) 35 | self._iou_threshold = iou_threshold 36 | 37 | def _init_output_mgbvar(self, env): 38 | var_box = env.get_mgbvar(self._var_input) 39 | keep = NMSKeepCran.make(var_box, iou_threshold = self._iou_threshold) 40 | env.set_mgbvar(self._var_output, keep) 41 | -------------------------------------------------------------------------------- /utils/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | from nms.gpu_nms import gpu_nms 2 | from nms.cpu_nms import cpu_nms, cpu_soft_nms 3 | import numpy as np 4 | 5 | def soft_nms(dets, sigma=0.5, Nt=0.3, threshold=0.001, method=1): 6 | 7 | keep = cpu_soft_nms(np.ascontiguousarray(dets, dtype=np.float32), 8 | np.float32(sigma), np.float32(Nt), 9 | np.float32(threshold), 10 | np.uint8(method)) 11 | return keep 12 | 13 | 14 | # Original NMS implementation 15 | def nms(dets, thresh, force_cpu=True): 16 | """Dispatch to either CPU or GPU NMS implementations.""" 17 | if dets.shape[0] == 0: 18 | return [] 19 | if not force_cpu: 20 | return gpu_nms(dets, thresh, device_id=0) 21 | else: 22 | return cpu_nms(dets, thresh) 23 | -------------------------------------------------------------------------------- /utils/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1) * (y2 - y1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | eps = 1e-8 23 | while order.size > 0: 24 | i = order[0] 25 | keep.append(i) 26 | xx1 = np.maximum(x1[i], x1[order[1:]]) 27 | yy1 = np.maximum(y1[i], y1[order[1:]]) 28 | xx2 = np.minimum(x2[i], x2[order[1:]]) 29 | yy2 = np.minimum(y2[i], y2[order[1:]]) 30 | 31 | w = np.maximum(0.0, xx2 - xx1) 32 | h = np.maximum(0.0, yy2 - yy1) 33 | inter = w * h 34 | ovr = inter / (areas[i] + areas[order[1:]] - inter + eps) 35 | 36 | inds = np.where(ovr <= thresh)[0] 37 | order = order[inds + 1] 38 | 39 | return keep 40 | def _test(): 41 | box1 = np.array([33,45,145,230,0.7])[None,:] 42 | box2 = np.array([44,54,123,348,0.8])[None,:] 43 | box3 = np.array([88,12,340,342,0.65])[None,:] 44 | boxes = np.concatenate([box1,box2,box3],axis = 0) 45 | nms_thresh = 0.5 46 | keep = py_cpu_nms(boxes,nms_thresh) 47 | alive_boxes = boxes[keep] 48 | if __name__=='__main__': 49 | _test() -------------------------------------------------------------------------------- /utils/set_nms_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pdb 3 | 4 | def set_cpu_nms(dets, thresh): 5 | """Pure Python NMS baseline.""" 6 | def _overlap(det_boxes, basement, others): 7 | eps = 1e-8 8 | x1_basement, y1_basement, x2_basement, y2_basement \ 9 | = det_boxes[basement, 0], det_boxes[basement, 1], \ 10 | det_boxes[basement, 2], det_boxes[basement, 3] 11 | x1_others, y1_others, x2_others, y2_others \ 12 | = det_boxes[others, 0], det_boxes[others, 1], \ 13 | det_boxes[others, 2], det_boxes[others, 3] 14 | areas_basement = (x2_basement - x1_basement) * (y2_basement - y1_basement) 15 | areas_others = (x2_others - x1_others) * (y2_others - y1_others) 16 | xx1 = np.maximum(x1_basement, x1_others) 17 | yy1 = np.maximum(y1_basement, y1_others) 18 | xx2 = np.minimum(x2_basement, x2_others) 19 | yy2 = np.minimum(y2_basement, y2_others) 20 | w = np.maximum(0.0, xx2 - xx1) 21 | h = np.maximum(0.0, yy2 - yy1) 22 | inter = w * h 23 | ovr = inter / (areas_basement + areas_others - inter + eps) 24 | return ovr 25 | scores = dets[:, 4] 26 | order = np.argsort(-scores) 27 | dets = dets[order] 28 | 29 | numbers = dets[:, 5] 30 | keep = np.ones(len(dets)) == 1 31 | ruler = np.arange(len(dets)) 32 | while ruler.size>0: 33 | basement = ruler[0] 34 | ruler=ruler[1:] 35 | num = numbers[basement] 36 | # calculate the body overlap 37 | overlap = _overlap(dets[:, :4], basement, ruler) 38 | indices = np.where(overlap > thresh)[0] 39 | loc = np.where(numbers[ruler][indices] == num)[0] 40 | # the mask won't change in the step 41 | mask = keep[ruler[indices][loc]]#.copy() 42 | keep[ruler[indices]] = False 43 | keep[ruler[indices][loc][mask]] = True 44 | ruler[~keep[ruler]] = -1 45 | ruler = ruler[ruler>0] 46 | keep = keep[np.argsort(order)] 47 | return keep 48 | 49 | def set_cpu_nms_body_with_head(body_dets, head_dets, thresh): 50 | """Pure Python NMS baseline.""" 51 | def _overlap(det_boxes, basement, others): 52 | eps = 1e-8 53 | x1_basement, y1_basement, x2_basement, y2_basement \ 54 | = det_boxes[basement, 0], det_boxes[basement, 1], \ 55 | det_boxes[basement, 2], det_boxes[basement, 3] 56 | x1_others, y1_others, x2_others, y2_others \ 57 | = det_boxes[others, 0], det_boxes[others, 1], \ 58 | det_boxes[others, 2], det_boxes[others, 3] 59 | areas_basement = (x2_basement - x1_basement) * (y2_basement - y1_basement) 60 | areas_others = (x2_others - x1_others) * (y2_others - y1_others) 61 | xx1 = np.maximum(x1_basement, x1_others) 62 | yy1 = np.maximum(y1_basement, y1_others) 63 | xx2 = np.minimum(x2_basement, x2_others) 64 | yy2 = np.minimum(y2_basement, y2_others) 65 | w = np.maximum(0.0, xx2 - xx1) 66 | h = np.maximum(0.0, yy2 - yy1) 67 | inter = w * h 68 | ovr = inter / (areas_basement + areas_others - inter + eps) 69 | return ovr 70 | scores = body_dets[:, 4] 71 | order = np.argsort(-scores) 72 | body_dets = body_dets[order] 73 | head_dets = head_dets[order] 74 | 75 | numbers = body_dets[:, 5] 76 | keep = np.ones(len(body_dets)) == 1 77 | ruler = np.arange(len(body_dets)) 78 | while ruler.size>0: 79 | basement = ruler[0] 80 | ruler=ruler[1:] 81 | num = numbers[basement] 82 | # calculate the body overlap 83 | overlap_body = _overlap(body_dets[:, :4], basement, ruler) 84 | overlap_head = _overlap(head_dets[:, :4], basement, ruler) 85 | overlap = np.maximum(overlap_body, overlap_head) 86 | indices = np.where(overlap > thresh)[0] 87 | loc = np.where(numbers[ruler][indices] == num)[0] 88 | # the mask won't change in the step 89 | mask = keep[ruler[indices][loc]]#.copy() 90 | keep[ruler[indices]] = False 91 | keep[ruler[indices][loc][mask]] = True 92 | ruler[~keep[ruler]] = -1 93 | ruler = ruler[ruler>0] 94 | keep = keep[np.argsort(order)] 95 | return keep 96 | 97 | def cpu_nms(dets, thresh): 98 | eps = 1e-8 99 | x1 = np.ascontiguousarray(dets[:, 0]) 100 | y1 = np.ascontiguousarray(dets[:, 1]) 101 | x2 = np.ascontiguousarray(dets[:, 2]) 102 | y2 = np.ascontiguousarray(dets[:, 3]) 103 | 104 | areas = (x2 - x1) * (y2 - y1) 105 | order = dets[:, 4].argsort()[::-1] 106 | keep = list() 107 | 108 | while order.size > 0: 109 | pick_ind = order[0] 110 | keep.append(pick_ind) 111 | 112 | xx1 = np.maximum(x1[pick_ind], x1[order[1:]]) 113 | yy1 = np.maximum(y1[pick_ind], y1[order[1:]]) 114 | xx2 = np.minimum(x2[pick_ind], x2[order[1:]]) 115 | yy2 = np.minimum(y2[pick_ind], y2[order[1:]]) 116 | 117 | inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1) 118 | iou = inter / (areas[pick_ind] + areas[order[1:]] - inter + eps) 119 | order = order[np.where(iou <= thresh)[0] + 1] 120 | 121 | return keep 122 | 123 | def _test(): 124 | 125 | box1 = np.array([33,45,145,230,0.7])[None,:] 126 | box2 = np.array([44,54,123,348,0.8])[None,:] 127 | box3 = np.array([88,12,340,342,0.65])[None,:] 128 | boxes = np.concatenate([box1,box2,box3],axis = 0) 129 | nms_thresh = 0.5 130 | keep = py_cpu_nms(boxes,nms_thresh) 131 | alive_boxes = boxes[keep] 132 | if __name__=='__main__': 133 | _test() 134 | -------------------------------------------------------------------------------- /utils/visual_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | import cv2 5 | 6 | color = {'green':(0,255,0), 7 | 'blue':(255,165,0), 8 | 'dark red':(0,0,139), 9 | 'red':(0, 0, 255), 10 | 'dark slate blue':(139,61,72), 11 | 'aqua':(255,255,0), 12 | 'brown':(42,42,165), 13 | 'deep pink':(147,20,255), 14 | 'fuchisia':(255,0,255), 15 | 'yello':(0,238,238), 16 | 'orange':(0,165,255), 17 | 'saddle brown':(19,69,139), 18 | 'black':(0,0,0), 19 | 'white':(255,255,255)} 20 | 21 | def draw_boxes(img, boxes, scores=None, tags=None, line_thick=1, line_color='white'): 22 | width = img.shape[1] 23 | height = img.shape[0] 24 | for i in range(len(boxes)): 25 | one_box = boxes[i] 26 | one_box = np.array([max(one_box[0], 0), max(one_box[1], 0), 27 | min(one_box[2], width - 1), min(one_box[3], height - 1)]) 28 | x1,y1,x2,y2 = np.array(one_box[:4]).astype(int) 29 | cv2.rectangle(img, (x1,y1), (x2,y2), color[line_color], line_thick) 30 | if scores is not None: 31 | text = "{} {:.3f}".format(tags[i], scores[i]) 32 | cv2.putText(img, text, (x1, y1 - 7), cv2.FONT_ITALIC, 0.5, color[line_color], line_thick) 33 | 34 | --------------------------------------------------------------------------------