├── README.md ├── evaluation.py ├── lib ├── Trainer │ ├── base_trainer.py │ └── ctdet.py ├── dataset │ ├── coco.py │ ├── coco_eval.py │ ├── coco_rsdata.py │ ├── misc.py │ └── pascal.py ├── external │ ├── .gitignore │ ├── Makefile │ ├── __init__.py │ ├── nms.c │ ├── nms.cpython-36m-x86_64-linux-gnu.so │ ├── nms.cpython-37m-x86_64-linux-gnu.so │ ├── nms.pyx │ └── setup.py ├── loss │ ├── 1.txt │ └── losses.py ├── models │ ├── DCNv2 │ │ ├── .gitignore │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── dcn_v2.py │ │ ├── make.sh │ │ ├── setup.py │ │ ├── src │ │ │ ├── cpu │ │ │ │ ├── dcn_v2_cpu.cpp │ │ │ │ └── vision.h │ │ │ ├── cuda │ │ │ │ ├── dcn_v2_cuda.cu │ │ │ │ ├── dcn_v2_im2col_cuda.cu │ │ │ │ ├── dcn_v2_im2col_cuda.h │ │ │ │ ├── dcn_v2_psroi_pooling_cuda.cu │ │ │ │ └── vision.h │ │ │ ├── dcn_v2.h │ │ │ └── vision.cpp │ │ └── test.py │ ├── DSFNet.py │ ├── DSFNet_with_Dynamic.py │ ├── DSFNet_with_Static.py │ └── stNet.py └── utils │ ├── augmentations.py │ ├── data_parallel.py │ ├── debugger.py │ ├── decode.py │ ├── image.py │ ├── logger.py │ ├── opts.py │ ├── post_process.py │ ├── scatter_gather.py │ ├── sort.py │ ├── utils.py │ └── utils_eval.py ├── readme ├── net.bmp └── visualResults.bmp ├── requirements.txt ├── test.py ├── testSaveMat.py ├── testTrackingSort.py └── train.py /README.md: -------------------------------------------------------------------------------- 1 | # DSFNet: Dynamic and Static Fusion Network for Moving Object Detection in Satellite Videos 2 | 3 | ![outline](./readme/net.bmp) 4 | ## Algorithm Introduction 5 | 6 | DSFNet: Dynamic and Static Fusion Network for Moving Object Detection in Satellite Videos, Chao Xiao, Qian Yin, and Xingyi Ying. 7 | 8 | We propose a two-stream network named DSFNet to combine the static context information and the dynamic motion cues to detect small moving object in satellite videos. Experiments on videos collected from Jilin-1 satellite and the results have demonstrated the effectiveness and robustness of the proposed DSFNet. For more detailed information, please refer to the paper. 9 | 10 | In this code, we also apply [SORT](https://github.com/abewley/sort) to get the tracking results of DSFNet. 11 | 12 | ## Citation 13 | If you find the code useful, please consider citing our paper using the following BibTeX entry. 14 | ``` 15 | @article{xiao2021dsfnet, 16 | title={DSFNet: Dynamic and Static Fusion Network for Moving Object Detection in Satellite Videos}, 17 | author={Xiao, Chao and Yin, Qian and Ying, Xinyi and Li, Ruojing and Wu, Shuanglin and Li, Miao and Liu, Li and An, Wei and Chen, Zhijie}, 18 | journal={IEEE Geoscience and Remote Sensing Letters}, 19 | volume={19}, 20 | pages={1--5}, 21 | year={2021}, 22 | publisher={IEEE} 23 | } 24 | ``` 25 | 26 | ## Prerequisite 27 | * Tested on Ubuntu 20.04, with Python 3.7, PyTorch 1.7, Torchvision 0.8.1, CUDA 10.2, and 2x NVIDIA 2080Ti. 28 | * You can follow [CenterNet](https://github.com/xingyizhou/CenterNet) to build the conda environment but remember to replace the DCNv2 used here with the used [DCNv2](https://github.com/CharlesShang/DCNv2/tree/pytorch_0.4) by CenterNet (Because we used the latested version of [DCNv2](https://github.com/CharlesShang/DCNv2) under PyTorch 1.7). 29 | * You can also follow [CenterNet](https://github.com/xingyizhou/CenterNet) to build the conda environment with Python 3.7, PyTorch 1.7, Torchvision 0.8.1 and run this code. 30 | * The dataset used here is available in [[BaiduYun](https://pan.baidu.com/s/1QuLXsZEUkZMoQ9JJW6Qz4w?pwd=4afk)](Sharing code: 4afk). You can download the dataset and put it to the data folder. 31 | ## Usage 32 | 33 | #### On Ubuntu: 34 | #### 1. Train. 35 | ```bash 36 | python train.py --model_name DSFNet --gpus 0,1 --lr 1.25e-4 --lr_step 30,45 --num_epochs 55 --batch_size 4 --val_intervals 5 --test_large_size True --datasetname rsdata --data_dir ./data/RsCarData/ 37 | ``` 38 | 39 | #### 2. Test. 40 | ```bash 41 | python test.py --model_name DSFNet --gpus 0 --load_model ./checkpoints/DSFNet.pth --test_large_size True --datasetname rsdata --data_dir ./data/RsCarData/ 42 | ``` 43 | 44 | #### (Optional 1) Test and visulization. 45 | ```bash 46 | python test.py --model_name DSFNet --gpus 0 --load_model ./checkpoints/DSFNet.pth --test_large_size True --show_results True --datasetname rsdata --data_dir ./data/RsCarData/ 47 | ``` 48 | 49 | #### (Optional 2) Test and visualize the tracking results of SORT. 50 | ```bash 51 | python testTrackingSort.py --model_name DSFNet --gpus 0 --load_model ./checkpoints/DSFNet.pth --test_large_size True --save_track_results True --datasetname rsdata --data_dir ./data/RsCarData/ 52 | ``` 53 | 54 | ## Results and Trained Models 55 | 56 | #### Qualitative Results 57 | 58 | ![outline](./readme/visualResults.bmp) 59 | 60 | #### Quantative Results 61 | 62 | Quantitative results of different models evaluated by AP@50. The model weights are available at [[BaiduYun](https://pan.baidu.com/s/1-LAEW1v8c3VDsc-e0vstcw?pwd=bidt)](Sharing code: bidt). You can down load the model weights and put it to the checkpoints folder. 63 | 64 | | Models | AP@50 | 65 | |--------------|-----------| 66 | | DSFNet with Static | 54.3 | 67 | | DSFNet with Dynamic | 60.5 | 68 | | DSFNet | 70.5 | 69 | 70 | *This code is highly borrowed from [CenterNet](https://github.com/xingyizhou/CenterNet). Thanks to Xingyi zhou. 71 | 72 | *The overall repository style is highly borrowed from [DNANet](https://github.com/YeRen123455/Infrared-Small-Target-Detection). Thanks to Boyang Li. 73 | 74 | *The dataset is part of [VISO](https://github.com/The-Learning-And-Vision-Atelier-LAVA/VISO). Thanks to Qian Yin. 75 | ## Referrences 76 | 1. X. Zhou, D. Wang, and P. Krahenbuhl, "Objects as points," arXiv preprint arXiv:1904.07850, 2019. 77 | 2. K. Simonyan and A. Zisserman, "Two-stream convolutional networks for action recognition in videos," Advances in NeurIPS, vol. 1, 2014. 78 | 3. Bewley, Alex, et al. "Simple online and realtime tracking." 2016 IEEE international conference on image processing (ICIP). IEEE, 2016. 79 | 4. Yin, Qian, et al., "Detecting and Tracking Small and Dense Moving Objects in Satellite Videos: A Benchmark," IEEE Transactions on Geoscience and Remote Sensing (2021). 80 | ## Update 81 | The eval code has been updated and can be found in './lib/utils/utils_eval.py'. The evaluation results can be generated by running testSaveMat.py first and then evaluation.py. 82 | -------------------------------------------------------------------------------- /evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io as sio 3 | import os 4 | from lib.utils.utils_eval import eval_metric 5 | 6 | if __name__ == '__main__': 7 | #eval func 8 | eval_mode_metric = 'iou' 9 | dis_th = [5] 10 | iou_th = [0.05, 0.1, 0.2] 11 | conf_thresh = 0.3 12 | dataName = [3,5,2,8,10,6,9] 13 | #data path ori 14 | ANN_PATH0 = './dataset/RsCarData/images/test1024/' 15 | #specify results path 16 | results_dir_tol = [ 17 | './weights/rsdata/DSFNet/results/DSFNet_checkpoints_DSFNet_mat/', 18 | ] 19 | 20 | methods_results = {} 21 | for results_dir0 in results_dir_tol: 22 | iou_results = [] 23 | print(results_dir0) 24 | #record the results 25 | txt_name = 'reuslts_%s_%.2f.txt'%(eval_mode_metric, conf_thresh) 26 | fid = open(results_dir0 + txt_name, 'w+') 27 | fid.write(results_dir0 + '(recall,precision,F1)\n') 28 | fid.write(eval_mode_metric + '\n') 29 | if eval_mode_metric=='dis': 30 | thres = dis_th 31 | elif eval_mode_metric=='iou': 32 | thres = iou_th 33 | else: 34 | raise Exception('Not a valid eval mode!!') 35 | ##eval 36 | thresh_results = {} 37 | for thre in thres:# 38 | if eval_mode_metric == 'dis': 39 | dis_th_cur = thre 40 | iou_th_cur = 0.05 41 | elif eval_mode_metric == 'iou': 42 | dis_th_cur = 5 43 | iou_th_cur = thre 44 | else: 45 | raise Exception('Not a valid eval mode!!') 46 | det_metric = eval_metric(dis_th=dis_th_cur, iou_th=iou_th_cur, eval_mode=eval_mode_metric) 47 | fid.write('conf_thresh=%.2f,thresh=%.2f\n'%(conf_thresh, thre)) 48 | print('conf_thresh=%.2f,thresh=%.2f'%(conf_thresh, thre)) 49 | results_temp = {} 50 | for datafolder in dataName: 51 | det_metric.reset() 52 | ANN_PATH = ANN_PATH0+'%03d'%datafolder+'/xml_det/' 53 | results_dir = results_dir0 + '%03d/' % (datafolder) 54 | #start eval 55 | anno_dir = os.listdir(ANN_PATH) 56 | num_images = len(anno_dir) 57 | for index in range(num_images): 58 | file_name = anno_dir[index] 59 | #load gt 60 | if(not file_name.endswith('.xml')): 61 | continue 62 | annName = ANN_PATH+file_name 63 | if not os.path.exists(annName): 64 | continue 65 | gt_t = det_metric.getGtFromXml(annName) 66 | #load det 67 | matname = results_dir + file_name.replace('.xml','.mat') 68 | if os.path.exists(matname): 69 | det_ori = sio.loadmat(matname)['A'] 70 | det = np.array(det_ori) 71 | score = det[:,-1] 72 | inds = np.argsort(-score) 73 | score = score[inds] 74 | det = det[score>conf_thresh] 75 | else: 76 | det = np.empty([0,4]) 77 | #eval 78 | det_metric.update(gt_t, det) 79 | #get results 80 | result = det_metric.get_result() 81 | fid.write('&%.2f\t&%.2f\t&%.2f\n' % (result['recall'], result['prec'], result['f1'])) 82 | print('%s, evalmode=%s, thre=%0.2f, conf_th=%0.2f, re=%0.3f, prec=%0.3f, f1=%0.3f' % ( 83 | '%03d' % datafolder, eval_mode_metric, thre, conf_thresh, result['recall'], result['prec'], result['f1'])) 84 | results_temp[datafolder] = result 85 | #avg results 86 | meatri = [[v['recall'], v['prec'], v['f1']] for k, v in results_temp.items()] 87 | meatri = np.array(meatri) 88 | avg_results = np.mean(meatri, 0) 89 | print('avg result: ', avg_results) 90 | fid.write( 91 | '&%.2f\t&%.2f\t&%.2f\n' % (avg_results[0], avg_results[1], avg_results[2])) 92 | results_temp['avg'] = { 93 | 'recall': avg_results[0], 94 | 'prec': avg_results[1], 95 | 'f1': avg_results[2], 96 | } 97 | thresh_results[thre] = results_temp 98 | methods_results[results_dir0] = thresh_results 99 | # print(methods_results) -------------------------------------------------------------------------------- /lib/Trainer/base_trainer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import time 6 | import torch 7 | from progress.bar import Bar 8 | from lib.utils.data_parallel import DataParallel 9 | from lib.utils.utils import AverageMeter 10 | from lib.utils.decode import ctdet_decode 11 | from lib.utils.post_process import ctdet_post_process 12 | import numpy as np 13 | from lib.external.nms import soft_nms 14 | from lib.dataset.coco_eval import CocoEvaluator 15 | 16 | def post_process(output, meta, num_classes=1, scale=1): 17 | # decode 18 | hm = output['hm'].sigmoid_() 19 | wh = output['wh'] 20 | reg = output['reg'] 21 | 22 | torch.cuda.synchronize() 23 | dets = ctdet_decode(hm, wh, reg=reg) 24 | dets = dets.detach().cpu().numpy() 25 | dets = dets.reshape(1, -1, dets.shape[2]) 26 | dets = ctdet_post_process( 27 | dets.copy(), [meta['c']], [meta['s']], 28 | meta['out_height'], meta['out_width'], num_classes) 29 | for j in range(1, num_classes + 1): 30 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) 31 | dets[0][j][:, :4] /= scale 32 | return dets[0] 33 | 34 | def merge_outputs(detections, num_classes ,max_per_image): 35 | results = {} 36 | for j in range(1, num_classes + 1): 37 | results[j] = np.concatenate( 38 | [detection[j] for detection in detections], axis=0).astype(np.float32) 39 | 40 | soft_nms(results[j], Nt=0.5, method=2) 41 | 42 | scores = np.hstack( 43 | [results[j][:, 4] for j in range(1, num_classes + 1)]) 44 | if len(scores) > max_per_image: 45 | kth = len(scores) - max_per_image 46 | thresh = np.partition(scores, kth)[kth] 47 | for j in range(1, num_classes + 1): 48 | keep_inds = (results[j][:, 4] >= thresh) 49 | results[j] = results[j][keep_inds] 50 | return results 51 | 52 | 53 | class ModelWithLoss(torch.nn.Module): 54 | def __init__(self, model, loss): 55 | super(ModelWithLoss, self).__init__() 56 | self.model = model 57 | self.loss = loss 58 | 59 | def forward(self, batch): 60 | # print(batch['input'].shape) 61 | outputs = self.model(batch['input']) 62 | loss, loss_stats = self.loss(outputs, batch) 63 | return outputs[-1], loss, loss_stats 64 | 65 | 66 | class BaseTrainer(object): 67 | def __init__( 68 | self, opt, model, optimizer=None): 69 | self.opt = opt 70 | self.optimizer = optimizer 71 | self.loss_stats, self.loss = self._get_losses(opt) 72 | self.model_with_loss = ModelWithLoss(model, self.loss) 73 | 74 | def set_device(self, gpus, device): 75 | if len(gpus) > 1: 76 | self.model_with_loss = DataParallel( 77 | self.model_with_loss, device_ids=gpus).to(device) 78 | else: 79 | self.model_with_loss = self.model_with_loss.to(device) 80 | 81 | for state in self.optimizer.state.values(): 82 | for k, v in state.items(): 83 | if isinstance(v, torch.Tensor): 84 | state[k] = v.to(device=device, non_blocking=True) 85 | 86 | def run_epoch(self, phase, epoch, data_loader): 87 | model_with_loss = self.model_with_loss 88 | if phase == 'train': 89 | model_with_loss.train() 90 | else: 91 | if len(self.opt.gpus) > 1: 92 | model_with_loss = self.model_with_loss.module 93 | model_with_loss.eval() 94 | torch.cuda.empty_cache() 95 | 96 | opt = self.opt 97 | results = {} 98 | data_time, batch_time = AverageMeter(), AverageMeter() 99 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 100 | num_iters = len(data_loader)//20 101 | # bar = Bar('{}/{}'.format(opt.task, opt.exp_id), max=num_iters) 102 | end = time.time() 103 | for iter_id, (im_id, batch) in enumerate(data_loader): 104 | if iter_id >= num_iters: 105 | break 106 | data_time.update(time.time() - end) 107 | 108 | for k in batch: 109 | if k != 'meta' and k != 'file_name': 110 | batch[k] = batch[k].to(device=opt.device, non_blocking=True) 111 | output, loss, loss_stats = model_with_loss(batch) 112 | loss = loss.mean() 113 | if phase == 'train': 114 | self.optimizer.zero_grad() 115 | loss.backward() 116 | self.optimizer.step() 117 | batch_time.update(time.time() - end) 118 | 119 | print('phase=%s, epoch=%5d, iters=%d/%d,time=%0.4f, loss=%0.4f, hm_loss=%0.4f, wh_loss=%0.4f, off_loss=%0.4f' \ 120 | % (phase, epoch,iter_id+1,num_iters, time.time() - end, 121 | loss.mean().cpu().detach().numpy(), 122 | loss_stats['hm_loss'].mean().cpu().detach().numpy(), 123 | loss_stats['wh_loss'].mean().cpu().detach().numpy(), 124 | loss_stats['off_loss'].mean().cpu().detach().numpy())) 125 | 126 | end = time.time() 127 | 128 | for l in avg_loss_stats: 129 | avg_loss_stats[l].update( 130 | loss_stats[l].mean().item(), batch['input'].size(0)) 131 | del output, loss, loss_stats 132 | 133 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 134 | ret['time'] = 1 / 60. 135 | 136 | return ret, results 137 | 138 | def run_eval_epoch(self, phase, epoch, data_loader, base_s, dataset): 139 | model_with_loss = self.model_with_loss 140 | 141 | if len(self.opt.gpus) > 1: 142 | model_with_loss = self.model_with_loss.module 143 | model_with_loss.eval() 144 | torch.cuda.empty_cache() 145 | 146 | opt = self.opt 147 | results = {} 148 | data_time, batch_time = AverageMeter(), AverageMeter() 149 | avg_loss_stats = {l: AverageMeter() for l in self.loss_stats} 150 | num_iters = len(data_loader) 151 | end = time.time() 152 | 153 | for iter_id, (im_id, batch) in enumerate(data_loader): 154 | if iter_id >= num_iters: 155 | break 156 | data_time.update(time.time() - end) 157 | 158 | for k in batch: 159 | if k != 'meta' and k != 'file_name': 160 | batch[k] = batch[k].to(device=opt.device, non_blocking=True) 161 | output, loss, loss_stats = model_with_loss(batch) 162 | 163 | inp_height, inp_width = batch['input'].shape[3],batch['input'].shape[4] 164 | c = np.array([inp_width / 2., inp_height / 2.], dtype=np.float32) 165 | s = max(inp_height, inp_width) * 1.0 166 | 167 | meta = {'c': c, 's': s, 168 | 'out_height': inp_height, 169 | 'out_width': inp_width} 170 | 171 | dets = post_process(output, meta) 172 | ret = merge_outputs([dets], num_classes=1, max_per_image=opt.K) 173 | results[im_id.numpy().astype(np.int32)[0]] = ret 174 | 175 | loss = loss.mean() 176 | batch_time.update(time.time() - end) 177 | 178 | print('phase=%s, epoch=%5d, iters=%d/%d,time=%0.4f, loss=%0.4f, hm_loss=%0.4f, wh_loss=%0.4f, off_loss=%0.4f' \ 179 | % (phase, epoch,iter_id+1,num_iters, time.time() - end, 180 | loss.mean().cpu().detach().numpy(), 181 | loss_stats['hm_loss'].mean().cpu().detach().numpy(), 182 | loss_stats['wh_loss'].mean().cpu().detach().numpy(), 183 | loss_stats['off_loss'].mean().cpu().detach().numpy())) 184 | end = time.time() 185 | 186 | for l in avg_loss_stats: 187 | avg_loss_stats[l].update( 188 | loss_stats[l].mean().item(), batch['input'].size(0)) 189 | del output, loss, loss_stats 190 | 191 | ret = {k: v.avg for k, v in avg_loss_stats.items()} 192 | # coco_evaluator.accumulate() 193 | # coco_evaluator.summarize() 194 | stats1, _ = dataset.run_eval(results, opt.save_results_dir, 'latest') 195 | ret['time'] = 1 / 60. 196 | ret['ap50'] = stats1[1] 197 | 198 | return ret, results, stats1 199 | 200 | def debug(self, batch, output, iter_id): 201 | raise NotImplementedError 202 | 203 | def save_result(self, output, batch, results): 204 | raise NotImplementedError 205 | 206 | def _get_losses(self, opt): 207 | raise NotImplementedError 208 | 209 | def val(self, epoch, data_loader, base_s, dataset): 210 | # return self.run_epoch('val', epoch, data_loader) 211 | 212 | return self.run_eval_epoch('val', epoch, data_loader, base_s, dataset) 213 | 214 | def train(self, epoch, data_loader): 215 | return self.run_epoch('train', epoch, data_loader) -------------------------------------------------------------------------------- /lib/Trainer/ctdet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import numpy as np 7 | 8 | from lib.loss.losses import FocalLoss 9 | from lib.loss.losses import RegL1Loss, RegLoss, NormRegL1Loss, RegWeightedL1Loss 10 | from lib.utils.decode import ctdet_decode 11 | from lib.utils.utils import _sigmoid 12 | from lib.utils.debugger import Debugger 13 | from lib.utils.post_process import ctdet_post_process 14 | from lib.Trainer.base_trainer import BaseTrainer 15 | import cv2 16 | 17 | 18 | class CtdetLoss(torch.nn.Module): 19 | def __init__(self, opt): 20 | super(CtdetLoss, self).__init__() 21 | self.crit = FocalLoss() # torch.nn.MSELoss() 22 | self.crit_reg = RegL1Loss() # RegLoss() 23 | self.crit_wh = torch.nn.L1Loss(reduction='sum') # NormRegL1Loss() # RegWeightedL1Loss() 24 | self.opt = opt 25 | self.wh_weight = 0.1 26 | self.hm_weight = 1 27 | self.off_weight = 1 28 | self.num_stacks = 1 29 | 30 | def forward(self, outputs, batch): 31 | hm_loss, wh_loss, off_loss = 0, 0, 0 32 | 33 | output = outputs[0] 34 | 35 | output['hm'] = _sigmoid(output['hm']) 36 | 37 | hm_loss += self.crit(output['hm'], batch['hm']) / self.num_stacks 38 | 39 | wh_loss += self.crit_reg( 40 | output['wh'], batch['reg_mask'], 41 | batch['ind'], batch['wh']) / self.num_stacks 42 | 43 | off_loss += self.crit_reg(output['reg'], batch['reg_mask'], 44 | batch['ind'], batch['reg']) 45 | 46 | loss = self.hm_weight * hm_loss + self.wh_weight * wh_loss + \ 47 | self.off_weight * off_loss 48 | 49 | loss_stats = {'loss': loss, 'hm_loss': hm_loss, 50 | 'wh_loss': wh_loss, 'off_loss': off_loss} 51 | 52 | return loss, loss_stats 53 | 54 | 55 | class CtdetTrainer(BaseTrainer): 56 | def __init__(self, opt, model, optimizer=None): 57 | super(CtdetTrainer, self).__init__(opt, model, optimizer=optimizer) 58 | 59 | def _get_losses(self, opt): 60 | loss_states = ['loss', 'hm_loss', 'wh_loss', 'off_loss'] 61 | # loss_states = ['loss', 'hm_loss'] 62 | loss = CtdetLoss(opt) 63 | return loss_states, loss 64 | 65 | def debug(self, batch, output, iter_id): 66 | opt = self.opt 67 | reg = output['reg'] if opt.reg_offset else None 68 | dets = ctdet_decode( 69 | output['hm'], output['wh'], reg=reg, 70 | cat_spec_wh=opt.cat_spec_wh, K=opt.K) 71 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 72 | dets[:, :, :4] *= opt.down_ratio 73 | dets_gt = batch['meta']['gt_det'].numpy().reshape(1, -1, dets.shape[2]) 74 | dets_gt[:, :, :4] *= opt.down_ratio 75 | for i in range(1): 76 | debugger = Debugger( 77 | dataset=opt.dataset, ipynb=(opt.debug == 3), theme=opt.debugger_theme) 78 | img = batch['input'][i].detach().cpu().numpy().transpose(1, 2, 0) 79 | img = np.clip((( 80 | img * opt.std + opt.mean) * 255.), 0, 255).astype(np.uint8) 81 | pred = debugger.gen_colormap(output['hm'][i].detach().cpu().numpy()) 82 | gt = debugger.gen_colormap(batch['hm'][i].detach().cpu().numpy()) 83 | debugger.add_blend_img(img, pred, 'pred_hm') 84 | debugger.add_blend_img(img, gt, 'gt_hm') 85 | debugger.add_img(img, img_id='out_pred') 86 | for k in range(len(dets[i])): 87 | if dets[i, k, 4] > opt.center_thresh: 88 | debugger.add_coco_bbox(dets[i, k, :4], dets[i, k, -1], 89 | dets[i, k, 4], img_id='out_pred') 90 | 91 | debugger.add_img(img, img_id='out_gt') 92 | for k in range(len(dets_gt[i])): 93 | if dets_gt[i, k, 4] > opt.center_thresh: 94 | debugger.add_coco_bbox(dets_gt[i, k, :4], dets_gt[i, k, -1], 95 | dets_gt[i, k, 4], img_id='out_gt') 96 | 97 | if opt.debug == 4: 98 | debugger.save_all_imgs(opt.debug_dir, prefix='{}'.format(iter_id)) 99 | else: 100 | debugger.show_all_imgs(pause=True) 101 | 102 | def save_result(self, output, batch, results): 103 | reg = output['reg'] if self.opt.reg_offset else None 104 | dets = ctdet_decode( 105 | output['hm'], output['wh'], reg=reg, 106 | cat_spec_wh=self.opt.cat_spec_wh, K=self.opt.K) 107 | dets = dets.detach().cpu().numpy().reshape(1, -1, dets.shape[2]) 108 | dets_out = ctdet_post_process( 109 | dets.copy(), batch['meta']['c'].cpu().numpy(), 110 | batch['meta']['s'].cpu().numpy(), 111 | output['hm'].shape[2], output['hm'].shape[3], output['hm'].shape[1]) 112 | results[batch['meta']['img_id'].cpu().numpy()[0]] = dets_out[0] -------------------------------------------------------------------------------- /lib/dataset/coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | import numpy as np 13 | import torch 14 | import json 15 | import cv2 16 | import os 17 | from lib.utils.image import flip, color_aug 18 | from lib.utils.image import get_affine_transform, affine_transform 19 | from lib.utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian 20 | from lib.utils.image import draw_dense_reg 21 | import math 22 | from lib.utils.opts import opts 23 | 24 | from lib.utils.augmentations import Augmentation 25 | 26 | import torch.utils.data as data 27 | 28 | class COCO(data.Dataset): 29 | opt = opts().parse() 30 | num_classes = 1 31 | default_resolution = [512,512] 32 | dense_wh = False 33 | reg_offset = True 34 | mean = np.array([0.49965, 0.49965, 0.49965], 35 | dtype=np.float32).reshape(1, 1, 3) 36 | std = np.array([0.08255, 0.08255, 0.08255], 37 | dtype=np.float32).reshape(1, 1, 3) 38 | 39 | 40 | 41 | def __init__(self, opt, split): 42 | super(COCO, self).__init__() 43 | 44 | self.img_dir0 = self.opt.data_dir 45 | 46 | self.img_dir = self.opt.data_dir 47 | 48 | if opt.test_large_size: 49 | 50 | if split == 'train': 51 | self.resolution = [512, 512] 52 | self.annot_path = os.path.join( 53 | self.img_dir0, 'annotations', 54 | 'instances_{}2017.json').format(split) 55 | else: 56 | self.resolution = [1024, 1024] 57 | self.annot_path = os.path.join( 58 | self.img_dir0, 'annotations', 59 | 'instances_{}2017_1024.json').format(split) 60 | else: 61 | self.resolution = [512, 512] 62 | self.annot_path = os.path.join( 63 | self.img_dir0, 'annotations', 64 | 'instances_{}2017.json').format(split) 65 | 66 | self.down_ratio = opt.down_ratio 67 | self.max_objs = opt.K 68 | self.seqLen = opt.seqLen 69 | 70 | self.class_name = [ 71 | '__background__', 's'] 72 | self._valid_ids = [ 73 | 1, 2] 74 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} # 生成对应的category dict 75 | 76 | self.split = split 77 | self.opt = opt 78 | 79 | print('==> initializing coco 2017 {} data.'.format(split)) 80 | self.coco = coco.COCO(self.annot_path) 81 | self.images = self.coco.getImgIds() 82 | self.num_samples = len(self.images) 83 | 84 | print('Loaded {} {} samples'.format(split, self.num_samples)) 85 | 86 | if(split=='train'): 87 | self.aug = Augmentation() 88 | else: 89 | self.aug = None 90 | 91 | def _to_float(self, x): 92 | return float("{:.2f}".format(x)) 93 | 94 | # 遍历每一个标注文件解析写入detections. 输出结果使用 95 | def convert_eval_format(self, all_bboxes): 96 | # import pdb; pdb.set_trace() 97 | detections = [] 98 | for image_id in all_bboxes: 99 | for cls_ind in all_bboxes[image_id]: 100 | category_id = self._valid_ids[cls_ind - 1] 101 | for bbox in all_bboxes[image_id][cls_ind]: 102 | bbox[2] -= bbox[0] 103 | bbox[3] -= bbox[1] 104 | score = bbox[4] 105 | bbox_out = list(map(self._to_float, bbox[0:4])) 106 | 107 | detection = { 108 | "image_id": int(image_id), 109 | "category_id": int(category_id), 110 | "bbox": bbox_out, 111 | "score": float("{:.2f}".format(score)) 112 | } 113 | if len(bbox) > 5: 114 | extreme_points = list(map(self._to_float, bbox[5:13])) 115 | detection["extreme_points"] = extreme_points 116 | detections.append(detection) 117 | return detections 118 | 119 | def __len__(self): 120 | return self.num_samples 121 | 122 | def save_results(self, results, save_dir, time_str): 123 | json.dump(self.convert_eval_format(results), 124 | open('{}/results_{}.json'.format(save_dir,time_str), 'w')) 125 | 126 | print('{}/results_{}.json'.format(save_dir,time_str)) 127 | 128 | def run_eval(self, results, save_dir, time_str): 129 | self.save_results(results, save_dir, time_str) 130 | coco_dets = self.coco.loadRes('{}/results_{}.json'.format(save_dir, time_str)) 131 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 132 | coco_eval.evaluate() 133 | coco_eval.accumulate() 134 | coco_eval.summarize() 135 | stats = coco_eval.stats 136 | precisions = coco_eval.eval['precision'] 137 | 138 | return stats, precisions 139 | 140 | def run_eval_just(self, save_dir, time_str, iouth): 141 | coco_dets = self.coco.loadRes('{}/{}'.format(save_dir, time_str)) 142 | coco_eval = COCOeval(self.coco, coco_dets, "bbox", iouth = iouth) 143 | coco_eval.evaluate() 144 | coco_eval.accumulate() 145 | coco_eval.summarize() 146 | stats_5 = coco_eval.stats 147 | precisions = coco_eval.eval['precision'] 148 | 149 | return stats_5, precisions 150 | 151 | def _coco_box_to_bbox(self, box): 152 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 153 | dtype=np.float32) 154 | return bbox 155 | 156 | def _get_border(self, border, size): 157 | i = 1 158 | while size - border // i <= border // i: 159 | i *= 2 160 | return border // i 161 | 162 | def __getitem__(self, index): 163 | img_id = self.images[index] 164 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] 165 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 166 | anns = self.coco.loadAnns(ids=ann_ids) 167 | num_objs = min(len(anns), self.max_objs) 168 | 169 | seq_num = self.seqLen 170 | imIdex = int(file_name.split('.')[0].split('/')[-1]) 171 | imf = file_name.split(file_name.split('/')[-1])[0] 172 | imtype = '.'+file_name.split('.')[-1] 173 | img = np.zeros([self.resolution[0], self.resolution[1], 3, seq_num]) 174 | 175 | for ii in range(seq_num): 176 | imIndexNew = '%06d' % max(imIdex - ii, 1) 177 | imName = imf+imIndexNew+imtype 178 | im = cv2.imread(self.img_dir + imName) 179 | if(ii==0): 180 | imgOri = im 181 | #normalize 182 | inp_i = (im.astype(np.float32) / 255.) 183 | inp_i = (inp_i - self.mean) / self.std 184 | img[:,:,:,ii] = inp_i 185 | 186 | #transpose 187 | inp = img.transpose(2, 3, 0, 1).astype(np.float32) 188 | 189 | bbox_tol = [] 190 | cls_id_tol = [] 191 | 192 | for k in range(num_objs): 193 | ann = anns[k] 194 | bbox_tol.append(self._coco_box_to_bbox(ann['bbox'])) 195 | cls_id_tol.append(self.cat_ids[ann['category_id']]) 196 | 197 | if self.aug is not None and num_objs>0: 198 | bbox_tol = np.array(bbox_tol) 199 | cls_id_tol = np.array(cls_id_tol) 200 | img, bbox_tol, cls_id_tol = self.aug(img, bbox_tol, cls_id_tol) 201 | bbox_tol = bbox_tol.tolist() 202 | cls_id_tol = cls_id_tol.tolist() 203 | num_objs = len(bbox_tol) 204 | 205 | height, width = img.shape[0], img.shape[1] 206 | c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) 207 | 208 | s = max(img.shape[0], img.shape[1]) * 1.0 209 | 210 | output_h = height // self.down_ratio 211 | output_w = width // self.down_ratio 212 | num_classes = self.num_classes 213 | trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) 214 | 215 | hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) 216 | wh = np.zeros((self.max_objs, 2), dtype=np.float32) 217 | dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) 218 | reg = np.zeros((self.max_objs, 2), dtype=np.float32) 219 | ind = np.zeros((self.max_objs), dtype=np.int64) 220 | reg_mask = np.zeros((self.max_objs), dtype=np.uint8) 221 | cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) 222 | cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) 223 | 224 | draw_gaussian = draw_umich_gaussian 225 | 226 | gt_det = [] 227 | for k in range(num_objs): 228 | bbox = bbox_tol[k] 229 | cls_id = cls_id_tol[k] 230 | bbox[:2] = affine_transform(bbox[:2], trans_output) 231 | bbox[2:] = affine_transform(bbox[2:], trans_output) 232 | 233 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] 234 | h = np.clip(h, 0, output_h - 1) 235 | w = np.clip(w, 0, output_w - 1) 236 | if h > 0 and w > 0: 237 | radius = gaussian_radius((math.ceil(h), math.ceil(w))) 238 | radius = max(0, int(radius)) 239 | radius = radius 240 | ct = np.array( 241 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 242 | ct[0] = np.clip(ct[0], 0, output_w - 1) 243 | ct[1] = np.clip(ct[1], 0, output_h - 1) 244 | ct_int = ct.astype(np.int32) 245 | draw_gaussian(hm[cls_id], ct_int, radius) 246 | wh[k] = 1. * w, 1. * h 247 | ind[k] = ct_int[1] * output_w + ct_int[0] 248 | reg[k] = ct - ct_int 249 | reg_mask[k] = 1 250 | cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] 251 | cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 252 | if self.dense_wh: 253 | draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) 254 | gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 255 | ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) 256 | for kkk in range(num_objs, self.max_objs): 257 | bbox_tol.append([]) 258 | 259 | 260 | ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'imgOri': imgOri} 261 | 262 | if self.dense_wh: 263 | hm_a = hm.max(axis=0, keepdims=True) 264 | dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) 265 | ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) 266 | del ret['wh'] 267 | 268 | if self.reg_offset: 269 | ret.update({'reg': reg}) 270 | 271 | ret['file_name'] = file_name 272 | 273 | return img_id, ret -------------------------------------------------------------------------------- /lib/dataset/coco_eval.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | COCO evaluator that works in distributed mode. 4 | 5 | Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py 6 | The difference is that there is less copy-pasting from pycocotools 7 | in the end of the file, as python3 can suppress prints with contextlib 8 | """ 9 | import os 10 | import contextlib 11 | import copy 12 | import numpy as np 13 | import torch 14 | 15 | from pycocotools.cocoeval import COCOeval 16 | from pycocotools.coco import COCO 17 | import pycocotools.mask as mask_util 18 | 19 | from lib.dataset.misc import all_gather 20 | 21 | class CocoEvaluator(object): 22 | def __init__(self, coco_gt, iou_types): 23 | assert isinstance(iou_types, (list, tuple)) 24 | coco_gt = copy.deepcopy(coco_gt) 25 | self.coco_gt = coco_gt 26 | 27 | self.iou_types = iou_types 28 | self.coco_eval = {} 29 | for iou_type in iou_types: 30 | self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) 31 | 32 | self.img_ids = [] 33 | self.eval_imgs = {k: [] for k in iou_types} 34 | 35 | def update(self, predictions): 36 | img_ids = list(np.unique(list(predictions.keys()))) 37 | self.img_ids.extend(img_ids) 38 | 39 | for iou_type in self.iou_types: 40 | results = self.prepare(predictions, iou_type) 41 | 42 | # suppress pycocotools prints 43 | with open(os.devnull, 'w') as devnull: 44 | with contextlib.redirect_stdout(devnull): 45 | coco_dt = COCO.loadRes(self.coco_gt, results) if results else COCO() 46 | coco_eval = self.coco_eval[iou_type] 47 | 48 | coco_eval.cocoDt = coco_dt 49 | coco_eval.params.imgIds = list(img_ids) 50 | img_ids, eval_imgs = evaluate(coco_eval) 51 | 52 | self.eval_imgs[iou_type].append(eval_imgs) 53 | 54 | def synchronize_between_processes(self): 55 | for iou_type in self.iou_types: 56 | self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) 57 | create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) 58 | 59 | def accumulate(self): 60 | for coco_eval in self.coco_eval.values(): 61 | coco_eval.accumulate() 62 | 63 | def summarize(self): 64 | for iou_type, coco_eval in self.coco_eval.items(): 65 | print("IoU metric: {}".format(iou_type)) 66 | coco_eval.summarize() 67 | 68 | def prepare(self, predictions, iou_type): 69 | if iou_type == "bbox": 70 | return self.prepare_for_coco_detection(predictions) 71 | elif iou_type == "segm": 72 | return self.prepare_for_coco_segmentation(predictions) 73 | elif iou_type == "keypoints": 74 | return self.prepare_for_coco_keypoint(predictions) 75 | else: 76 | raise ValueError("Unknown iou type {}".format(iou_type)) 77 | 78 | def prepare_for_coco_detection(self, predictions): 79 | coco_results = [] 80 | for original_id, prediction in predictions.items(): 81 | if len(prediction) == 0: 82 | continue 83 | 84 | boxes = prediction["boxes"] 85 | boxes = convert_to_xywh(boxes).tolist() 86 | scores = prediction["scores"].tolist() 87 | labels = prediction["labels"].tolist() 88 | 89 | coco_results.extend( 90 | [ 91 | { 92 | "image_id": original_id, 93 | "category_id": labels[k], 94 | "bbox": box, 95 | "score": scores[k], 96 | } 97 | for k, box in enumerate(boxes) 98 | ] 99 | ) 100 | return coco_results 101 | 102 | def prepare_for_coco_segmentation(self, predictions): 103 | coco_results = [] 104 | for original_id, prediction in predictions.items(): 105 | if len(prediction) == 0: 106 | continue 107 | 108 | scores = prediction["scores"] 109 | labels = prediction["labels"] 110 | masks = prediction["masks"] 111 | 112 | masks = masks > 0.5 113 | 114 | scores = prediction["scores"].tolist() 115 | labels = prediction["labels"].tolist() 116 | 117 | rles = [ 118 | mask_util.encode(np.array(mask[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] 119 | for mask in masks 120 | ] 121 | for rle in rles: 122 | rle["counts"] = rle["counts"].decode("utf-8") 123 | 124 | coco_results.extend( 125 | [ 126 | { 127 | "image_id": original_id, 128 | "category_id": labels[k], 129 | "segmentation": rle, 130 | "score": scores[k], 131 | } 132 | for k, rle in enumerate(rles) 133 | ] 134 | ) 135 | return coco_results 136 | 137 | def prepare_for_coco_keypoint(self, predictions): 138 | coco_results = [] 139 | for original_id, prediction in predictions.items(): 140 | if len(prediction) == 0: 141 | continue 142 | 143 | boxes = prediction["boxes"] 144 | boxes = convert_to_xywh(boxes).tolist() 145 | scores = prediction["scores"].tolist() 146 | labels = prediction["labels"].tolist() 147 | keypoints = prediction["keypoints"] 148 | keypoints = keypoints.flatten(start_dim=1).tolist() 149 | 150 | coco_results.extend( 151 | [ 152 | { 153 | "image_id": original_id, 154 | "category_id": labels[k], 155 | 'keypoints': keypoint, 156 | "score": scores[k], 157 | } 158 | for k, keypoint in enumerate(keypoints) 159 | ] 160 | ) 161 | return coco_results 162 | 163 | 164 | def convert_to_xywh(boxes): 165 | xmin, ymin, xmax, ymax = boxes[:,0],boxes[:,1],boxes[:,2],boxes[:,3] 166 | return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) 167 | 168 | 169 | def merge(img_ids, eval_imgs): 170 | all_img_ids = all_gather(img_ids) 171 | all_eval_imgs = all_gather(eval_imgs) 172 | 173 | merged_img_ids = [] 174 | for p in all_img_ids: 175 | merged_img_ids.extend(p) 176 | 177 | merged_eval_imgs = [] 178 | for p in all_eval_imgs: 179 | merged_eval_imgs.append(p) 180 | 181 | merged_img_ids = np.array(merged_img_ids) 182 | merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) 183 | 184 | # keep only unique (and in sorted order) images 185 | merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) 186 | merged_eval_imgs = merged_eval_imgs[..., idx] 187 | 188 | return merged_img_ids, merged_eval_imgs 189 | 190 | 191 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs): 192 | img_ids, eval_imgs = merge(img_ids, eval_imgs) 193 | img_ids = list(img_ids) 194 | eval_imgs = list(eval_imgs.flatten()) 195 | 196 | coco_eval.evalImgs = eval_imgs 197 | coco_eval.params.imgIds = img_ids 198 | coco_eval._paramsEval = copy.deepcopy(coco_eval.params) 199 | 200 | 201 | ################################################################# 202 | # From pycocotools, just removed the prints and fixed 203 | # a Python3 bug about unicode not defined 204 | ################################################################# 205 | 206 | 207 | def evaluate(self): 208 | ''' 209 | Run per image evaluation on given images and store results (a list of dict) in self.evalImgs 210 | :return: None 211 | ''' 212 | # tic = time.time() 213 | # print('Running per image evaluation...') 214 | p = self.params 215 | # add backward compatibility if useSegm is specified in params 216 | if p.useSegm is not None: 217 | p.iouType = 'segm' if p.useSegm == 1 else 'bbox' 218 | print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) 219 | # print('Evaluate annotation type *{}*'.format(p.iouType)) 220 | p.imgIds = list(np.unique(p.imgIds)) 221 | if p.useCats: 222 | p.catIds = list(np.unique(p.catIds)) 223 | p.maxDets = sorted(p.maxDets) 224 | self.params = p 225 | 226 | self._prepare() 227 | # loop through images, area range, max detection number 228 | catIds = p.catIds if p.useCats else [-1] 229 | 230 | if p.iouType == 'segm' or p.iouType == 'bbox': 231 | computeIoU = self.computeIoU 232 | elif p.iouType == 'keypoints': 233 | computeIoU = self.computeOks 234 | self.ious = { 235 | (imgId, catId): computeIoU(imgId, catId) 236 | for imgId in p.imgIds 237 | for catId in catIds} 238 | 239 | evaluateImg = self.evaluateImg 240 | maxDet = p.maxDets[-1] 241 | evalImgs = [ 242 | evaluateImg(imgId, catId, areaRng, maxDet) 243 | for catId in catIds 244 | for areaRng in p.areaRng 245 | for imgId in p.imgIds 246 | ] 247 | # this is NOT in the pycocotools code, but could be done outside 248 | evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) 249 | self._paramsEval = copy.deepcopy(self.params) 250 | # toc = time.time() 251 | # print('DONE (t={:0.2f}s).'.format(toc-tic)) 252 | return p.imgIds, evalImgs 253 | 254 | ################################################################# 255 | # end of straight copy from pycocotools, just removing the prints 256 | ################################################################# 257 | -------------------------------------------------------------------------------- /lib/dataset/coco_rsdata.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | from pycocotools.cocoeval import COCOeval 7 | import numpy as np 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | import numpy as np 13 | import torch 14 | import json 15 | import cv2 16 | import os 17 | from lib.utils.image import flip, color_aug 18 | from lib.utils.image import get_affine_transform, affine_transform 19 | from lib.utils.image import gaussian_radius, draw_umich_gaussian, draw_msra_gaussian 20 | from lib.utils.image import draw_dense_reg 21 | import math 22 | from lib.utils.opts import opts 23 | 24 | from lib.utils.augmentations import Augmentation 25 | 26 | import torch.utils.data as data 27 | 28 | class COCO(data.Dataset): 29 | opt = opts().parse() 30 | num_classes = 1 31 | default_resolution = [512,512] 32 | dense_wh = False 33 | reg_offset = True 34 | mean = np.array([0.49965, 0.49965, 0.49965], 35 | dtype=np.float32).reshape(1, 1, 3) 36 | std = np.array([0.08255, 0.08255, 0.08255], 37 | dtype=np.float32).reshape(1, 1, 3) 38 | 39 | def __init__(self, opt, split): 40 | super(COCO, self).__init__() 41 | 42 | self.img_dir0 = self.opt.data_dir 43 | 44 | self.img_dir = self.opt.data_dir 45 | 46 | if opt.test_large_size: 47 | 48 | if split == 'train': 49 | self.resolution = [512, 512] 50 | self.annot_path = os.path.join( 51 | self.img_dir0, 'annotations', 52 | 'instances_{}2017.json').format(split) 53 | else: 54 | self.resolution = [1024, 1024] 55 | self.annot_path = os.path.join( 56 | self.img_dir0, 'annotations', 57 | 'instances_{}2017_1024.json').format(split) 58 | else: 59 | self.resolution = [512, 512] 60 | self.annot_path = os.path.join( 61 | self.img_dir0, 'annotations', 62 | 'instances_{}2017.json').format(split) 63 | 64 | self.down_ratio = opt.down_ratio 65 | self.max_objs = opt.K 66 | self.seqLen = opt.seqLen 67 | 68 | self.class_name = [ 69 | '__background__', 'car'] 70 | self._valid_ids = [ 71 | 1, 2] 72 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} # 生成对应的category dict 73 | 74 | self.split = split 75 | self.opt = opt 76 | 77 | print('==> initializing coco 2017 {} data.'.format(split)) 78 | self.coco = coco.COCO(self.annot_path) 79 | self.images = self.coco.getImgIds() 80 | self.num_samples = len(self.images) 81 | 82 | print('Loaded {} {} samples'.format(split, self.num_samples)) 83 | 84 | if(split=='train'): 85 | self.aug = Augmentation() 86 | else: 87 | self.aug = None 88 | 89 | def _to_float(self, x): 90 | return float("{:.2f}".format(x)) 91 | 92 | # 遍历每一个标注文件解析写入detections. 输出结果使用 93 | def convert_eval_format(self, all_bboxes): 94 | # import pdb; pdb.set_trace() 95 | detections = [] 96 | for image_id in all_bboxes: 97 | for cls_ind in all_bboxes[image_id]: 98 | category_id = self._valid_ids[cls_ind - 1] 99 | for bbox in all_bboxes[image_id][cls_ind]: 100 | bbox[2] -= bbox[0] 101 | bbox[3] -= bbox[1] 102 | score = bbox[4] 103 | bbox_out = list(map(self._to_float, bbox[0:4])) 104 | 105 | detection = { 106 | "image_id": int(image_id), 107 | "category_id": int(category_id), 108 | "bbox": bbox_out, 109 | "score": float("{:.2f}".format(score)) 110 | } 111 | if len(bbox) > 5: 112 | extreme_points = list(map(self._to_float, bbox[5:13])) 113 | detection["extreme_points"] = extreme_points 114 | detections.append(detection) 115 | return detections 116 | 117 | def __len__(self): 118 | return self.num_samples 119 | 120 | def save_results(self, results, save_dir, time_str): 121 | json.dump(self.convert_eval_format(results), 122 | open('{}/results_{}.json'.format(save_dir,time_str), 'w')) 123 | 124 | print('{}/results_{}.json'.format(save_dir,time_str)) 125 | 126 | def run_eval(self, results, save_dir, time_str): 127 | self.save_results(results, save_dir, time_str) 128 | coco_dets = self.coco.loadRes('{}/results_{}.json'.format(save_dir, time_str)) 129 | coco_eval = COCOeval(self.coco, coco_dets, "bbox") 130 | coco_eval.evaluate() 131 | coco_eval.accumulate() 132 | coco_eval.summarize() 133 | stats = coco_eval.stats 134 | precisions = coco_eval.eval['precision'] 135 | 136 | return stats, precisions 137 | 138 | def run_eval_just(self, save_dir, time_str, iouth): 139 | coco_dets = self.coco.loadRes('{}/{}'.format(save_dir, time_str)) 140 | coco_eval = COCOeval(self.coco, coco_dets, "bbox", iouth = iouth) 141 | coco_eval.evaluate() 142 | coco_eval.accumulate() 143 | coco_eval.summarize() 144 | stats_5 = coco_eval.stats 145 | precisions = coco_eval.eval['precision'] 146 | 147 | return stats_5, precisions 148 | 149 | def _coco_box_to_bbox(self, box): 150 | bbox = np.array([box[0], box[1], box[0] + box[2], box[1] + box[3]], 151 | dtype=np.float32) 152 | return bbox 153 | 154 | def _get_border(self, border, size): 155 | i = 1 156 | while size - border // i <= border // i: 157 | i *= 2 158 | return border // i 159 | 160 | def __getitem__(self, index): 161 | img_id = self.images[index] 162 | file_name = self.coco.loadImgs(ids=[img_id])[0]['file_name'] 163 | ann_ids = self.coco.getAnnIds(imgIds=[img_id]) 164 | anns = self.coco.loadAnns(ids=ann_ids) 165 | num_objs = min(len(anns), self.max_objs) 166 | 167 | seq_num = self.seqLen 168 | imIdex = int(file_name.split('.')[0].split('/')[-1]) 169 | imf = file_name.split(file_name.split('/')[-1])[0] 170 | imtype = '.'+file_name.split('.')[-1] 171 | img = np.zeros([self.resolution[0], self.resolution[1], 3, seq_num]) 172 | 173 | for ii in range(seq_num): 174 | imIndexNew = '%06d' % max(imIdex - ii, 1) 175 | imName = imf+imIndexNew+imtype 176 | im = cv2.imread(self.img_dir + imName) 177 | if(ii==0): 178 | imgOri = im 179 | #normalize 180 | inp_i = (im.astype(np.float32) / 255.) 181 | inp_i = (inp_i - self.mean) / self.std 182 | img[:,:,:,ii] = inp_i 183 | 184 | bbox_tol = [] 185 | cls_id_tol = [] 186 | 187 | for k in range(num_objs): 188 | ann = anns[k] 189 | bbox_tol.append(self._coco_box_to_bbox(ann['bbox'])) 190 | cls_id_tol.append(self.cat_ids[ann['category_id']]) 191 | 192 | if self.aug is not None and num_objs>0: 193 | bbox_tol = np.array(bbox_tol) 194 | cls_id_tol = np.array(cls_id_tol) 195 | img, bbox_tol, cls_id_tol = self.aug(img, bbox_tol, cls_id_tol) 196 | bbox_tol = bbox_tol.tolist() 197 | cls_id_tol = cls_id_tol.tolist() 198 | num_objs = len(bbox_tol) 199 | 200 | #transpose 201 | inp = img.transpose(2, 3, 0, 1).astype(np.float32) 202 | 203 | height, width = img.shape[0], img.shape[1] 204 | c = np.array([img.shape[1] / 2., img.shape[0] / 2.], dtype=np.float32) 205 | 206 | s = max(img.shape[0], img.shape[1]) * 1.0 207 | 208 | output_h = height // self.down_ratio 209 | output_w = width // self.down_ratio 210 | num_classes = self.num_classes 211 | trans_output = get_affine_transform(c, s, 0, [output_w, output_h]) 212 | 213 | hm = np.zeros((num_classes, output_h, output_w), dtype=np.float32) 214 | wh = np.zeros((self.max_objs, 2), dtype=np.float32) 215 | dense_wh = np.zeros((2, output_h, output_w), dtype=np.float32) 216 | reg = np.zeros((self.max_objs, 2), dtype=np.float32) 217 | ind = np.zeros((self.max_objs), dtype=np.int64) 218 | reg_mask = np.zeros((self.max_objs), dtype=np.uint8) 219 | cat_spec_wh = np.zeros((self.max_objs, num_classes * 2), dtype=np.float32) 220 | cat_spec_mask = np.zeros((self.max_objs, num_classes * 2), dtype=np.uint8) 221 | 222 | draw_gaussian = draw_umich_gaussian 223 | 224 | gt_det = [] 225 | for k in range(num_objs): 226 | bbox = bbox_tol[k] 227 | cls_id = cls_id_tol[k] 228 | bbox[:2] = affine_transform(bbox[:2], trans_output) 229 | bbox[2:] = affine_transform(bbox[2:], trans_output) 230 | 231 | h, w = bbox[3] - bbox[1], bbox[2] - bbox[0] 232 | h = np.clip(h, 0, output_h - 1) 233 | w = np.clip(w, 0, output_w - 1) 234 | if h > 0 and w > 0: 235 | radius = gaussian_radius((math.ceil(h), math.ceil(w))) 236 | radius = max(0, int(radius)) 237 | radius = radius 238 | ct = np.array( 239 | [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) 240 | ct[0] = np.clip(ct[0], 0, output_w - 1) 241 | ct[1] = np.clip(ct[1], 0, output_h - 1) 242 | ct_int = ct.astype(np.int32) 243 | draw_gaussian(hm[cls_id], ct_int, radius) 244 | wh[k] = 1. * w, 1. * h 245 | ind[k] = ct_int[1] * output_w + ct_int[0] 246 | reg[k] = ct - ct_int 247 | reg_mask[k] = 1 248 | cat_spec_wh[k, cls_id * 2: cls_id * 2 + 2] = wh[k] 249 | cat_spec_mask[k, cls_id * 2: cls_id * 2 + 2] = 1 250 | if self.dense_wh: 251 | draw_dense_reg(dense_wh, hm.max(axis=0), ct_int, wh[k], radius) 252 | gt_det.append([ct[0] - w / 2, ct[1] - h / 2, 253 | ct[0] + w / 2, ct[1] + h / 2, 1, cls_id]) 254 | for kkk in range(num_objs, self.max_objs): 255 | bbox_tol.append([]) 256 | 257 | 258 | ret = {'input': inp, 'hm': hm, 'reg_mask': reg_mask, 'ind': ind, 'wh': wh, 'imgOri': imgOri} 259 | 260 | if self.dense_wh: 261 | hm_a = hm.max(axis=0, keepdims=True) 262 | dense_wh_mask = np.concatenate([hm_a, hm_a], axis=0) 263 | ret.update({'dense_wh': dense_wh, 'dense_wh_mask': dense_wh_mask}) 264 | del ret['wh'] 265 | 266 | if self.reg_offset: 267 | ret.update({'reg': reg}) 268 | 269 | ret['file_name'] = file_name 270 | 271 | return img_id, ret -------------------------------------------------------------------------------- /lib/dataset/pascal.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import pycocotools.coco as coco 6 | import numpy as np 7 | import torch 8 | import json 9 | import os 10 | 11 | import torch.utils.data as data 12 | 13 | class PascalVOC(data.Dataset): 14 | num_classes = 20 15 | default_resolution = [384, 384] 16 | mean = np.array([0.485, 0.456, 0.406], 17 | dtype=np.float32).reshape(1, 1, 3) 18 | std = np.array([0.229, 0.224, 0.225], 19 | dtype=np.float32).reshape(1, 1, 3) 20 | 21 | def __init__(self, opt, split): 22 | super(PascalVOC, self).__init__() 23 | self.data_dir = os.path.join(opt.data_dir, 'voc') 24 | self.img_dir = os.path.join(self.data_dir, 'images') 25 | _ann_name = {'train': 'trainval0712', 'val': 'test2007'} 26 | self.annot_path = os.path.join( 27 | self.data_dir, 'annotations', 28 | 'pascal_{}.json').format(_ann_name[split]) 29 | self.max_objs = 50 30 | self.class_name = ['__background__', "aeroplane", "bicycle", "bird", "boat", 31 | "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", 32 | "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", 33 | "train", "tvmonitor"] 34 | self._valid_ids = np.arange(1, 21, dtype=np.int32) 35 | self.cat_ids = {v: i for i, v in enumerate(self._valid_ids)} 36 | self._data_rng = np.random.RandomState(123) 37 | self._eig_val = np.array([0.2141788, 0.01817699, 0.00341571], 38 | dtype=np.float32) 39 | self._eig_vec = np.array([ 40 | [-0.58752847, -0.69563484, 0.41340352], 41 | [-0.5832747, 0.00994535, -0.81221408], 42 | [-0.56089297, 0.71832671, 0.41158938] 43 | ], dtype=np.float32) 44 | self.split = split 45 | self.opt = opt 46 | 47 | print('==> initializing pascal {} data.'.format(_ann_name[split])) 48 | self.coco = coco.COCO(self.annot_path) 49 | self.images = sorted(self.coco.getImgIds()) 50 | self.num_samples = len(self.images) 51 | 52 | print('Loaded {} {} samples'.format(split, self.num_samples)) 53 | 54 | def _to_float(self, x): 55 | return float("{:.2f}".format(x)) 56 | 57 | def convert_eval_format(self, all_bboxes): 58 | detections = [[[] for __ in range(self.num_samples)] \ 59 | for _ in range(self.num_classes + 1)] 60 | for i in range(self.num_samples): 61 | img_id = self.images[i] 62 | for j in range(1, self.num_classes + 1): 63 | if isinstance(all_bboxes[img_id][j], np.ndarray): 64 | detections[j][i] = all_bboxes[img_id][j].tolist() 65 | else: 66 | detections[j][i] = all_bboxes[img_id][j] 67 | return detections 68 | 69 | def __len__(self): 70 | return self.num_samples 71 | 72 | def save_results(self, results, save_dir): 73 | json.dump(self.convert_eval_format(results), 74 | open('{}/results.json'.format(save_dir), 'w')) 75 | 76 | def run_eval(self, results, save_dir): 77 | # result_json = os.path.join(save_dir, "results.json") 78 | # detections = self.convert_eval_format(results) 79 | # json.dump(detections, open(result_json, "w")) 80 | self.save_results(results, save_dir) 81 | os.system('python tools/reval.py ' + \ 82 | '{}/results.json'.format(save_dir)) 83 | -------------------------------------------------------------------------------- /lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /lib/external/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/external/nms.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChaoXiao12/Moving-object-detection-DSFNet/2938297db6d92478ba34f00d85324e6d1cd3a1c5/lib/external/nms.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/external/nms.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChaoXiao12/Moving-object-detection-DSFNet/2938297db6d92478ba34f00d85324e6d1cd3a1c5/lib/external/nms.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/external/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | # ---------------------------------------------------------- 9 | # Soft-NMS: Improving Object Detection With One Line of Code 10 | # Copyright (c) University of Maryland, College Park 11 | # Licensed under The MIT License [see LICENSE for details] 12 | # Written by Navaneeth Bodla and Bharat Singh 13 | # ---------------------------------------------------------- 14 | 15 | import numpy as np 16 | cimport numpy as np 17 | 18 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 19 | return a if a >= b else b 20 | 21 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 22 | return a if a <= b else b 23 | 24 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 25 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 26 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 27 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 28 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 29 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 30 | 31 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 32 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 33 | 34 | cdef int ndets = dets.shape[0] 35 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 36 | np.zeros((ndets), dtype=np.int) 37 | 38 | # nominal indices 39 | cdef int _i, _j 40 | # sorted indices 41 | cdef int i, j 42 | # temp variables for box i's (the box currently under consideration) 43 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 44 | # variables for computing overlap with box j (lower scoring box) 45 | cdef np.float32_t xx1, yy1, xx2, yy2 46 | cdef np.float32_t w, h 47 | cdef np.float32_t inter, ovr 48 | 49 | keep = [] 50 | for _i in range(ndets): 51 | i = order[_i] 52 | if suppressed[i] == 1: 53 | continue 54 | keep.append(i) 55 | ix1 = x1[i] 56 | iy1 = y1[i] 57 | ix2 = x2[i] 58 | iy2 = y2[i] 59 | iarea = areas[i] 60 | for _j in range(_i + 1, ndets): 61 | j = order[_j] 62 | if suppressed[j] == 1: 63 | continue 64 | xx1 = max(ix1, x1[j]) 65 | yy1 = max(iy1, y1[j]) 66 | xx2 = min(ix2, x2[j]) 67 | yy2 = min(iy2, y2[j]) 68 | w = max(0.0, xx2 - xx1 + 1) 69 | h = max(0.0, yy2 - yy1 + 1) 70 | inter = w * h 71 | ovr = inter / (iarea + areas[j] - inter) 72 | if ovr >= thresh: 73 | suppressed[j] = 1 74 | 75 | return keep 76 | 77 | def soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0): 78 | cdef unsigned int N = boxes.shape[0] 79 | cdef float iw, ih, box_area 80 | cdef float ua 81 | cdef int pos = 0 82 | cdef float maxscore = 0 83 | cdef int maxpos = 0 84 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 85 | 86 | for i in range(N): 87 | maxscore = boxes[i, 4] 88 | maxpos = i 89 | 90 | tx1 = boxes[i,0] 91 | ty1 = boxes[i,1] 92 | tx2 = boxes[i,2] 93 | ty2 = boxes[i,3] 94 | ts = boxes[i,4] 95 | 96 | pos = i + 1 97 | # get max box 98 | while pos < N: 99 | if maxscore < boxes[pos, 4]: 100 | maxscore = boxes[pos, 4] 101 | maxpos = pos 102 | pos = pos + 1 103 | 104 | # add max box as a detection 105 | boxes[i,0] = boxes[maxpos,0] 106 | boxes[i,1] = boxes[maxpos,1] 107 | boxes[i,2] = boxes[maxpos,2] 108 | boxes[i,3] = boxes[maxpos,3] 109 | boxes[i,4] = boxes[maxpos,4] 110 | 111 | # swap ith box with position of max box 112 | boxes[maxpos,0] = tx1 113 | boxes[maxpos,1] = ty1 114 | boxes[maxpos,2] = tx2 115 | boxes[maxpos,3] = ty2 116 | boxes[maxpos,4] = ts 117 | 118 | tx1 = boxes[i,0] 119 | ty1 = boxes[i,1] 120 | tx2 = boxes[i,2] 121 | ty2 = boxes[i,3] 122 | ts = boxes[i,4] 123 | 124 | pos = i + 1 125 | # NMS iterations, note that N changes if detection boxes fall below threshold 126 | while pos < N: 127 | x1 = boxes[pos, 0] 128 | y1 = boxes[pos, 1] 129 | x2 = boxes[pos, 2] 130 | y2 = boxes[pos, 3] 131 | s = boxes[pos, 4] 132 | 133 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 134 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 135 | if iw > 0: 136 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 137 | if ih > 0: 138 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 139 | ov = iw * ih / ua #iou between max box and detection box 140 | 141 | if method == 1: # linear 142 | if ov > Nt: 143 | weight = 1 - ov 144 | else: 145 | weight = 1 146 | elif method == 2: # gaussian 147 | weight = np.exp(-(ov * ov)/sigma) 148 | else: # original NMS 149 | if ov > Nt: 150 | weight = 0 151 | else: 152 | weight = 1 153 | 154 | boxes[pos, 4] = weight*boxes[pos, 4] 155 | 156 | # if box score falls below threshold, discard the box by swapping with last box 157 | # update N 158 | if boxes[pos, 4] < threshold: 159 | boxes[pos,0] = boxes[N-1, 0] 160 | boxes[pos,1] = boxes[N-1, 1] 161 | boxes[pos,2] = boxes[N-1, 2] 162 | boxes[pos,3] = boxes[N-1, 3] 163 | boxes[pos,4] = boxes[N-1, 4] 164 | N = N - 1 165 | pos = pos - 1 166 | 167 | pos = pos + 1 168 | 169 | keep = [i for i in range(N)] 170 | return keep 171 | 172 | def soft_nms_39(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0): 173 | cdef unsigned int N = boxes.shape[0] 174 | cdef float iw, ih, box_area 175 | cdef float ua 176 | cdef int pos = 0 177 | cdef float maxscore = 0 178 | cdef int maxpos = 0 179 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 180 | cdef float tmp 181 | 182 | for i in range(N): 183 | maxscore = boxes[i, 4] 184 | maxpos = i 185 | 186 | tx1 = boxes[i,0] 187 | ty1 = boxes[i,1] 188 | tx2 = boxes[i,2] 189 | ty2 = boxes[i,3] 190 | ts = boxes[i,4] 191 | 192 | pos = i + 1 193 | # get max box 194 | while pos < N: 195 | if maxscore < boxes[pos, 4]: 196 | maxscore = boxes[pos, 4] 197 | maxpos = pos 198 | pos = pos + 1 199 | 200 | # add max box as a detection 201 | boxes[i,0] = boxes[maxpos,0] 202 | boxes[i,1] = boxes[maxpos,1] 203 | boxes[i,2] = boxes[maxpos,2] 204 | boxes[i,3] = boxes[maxpos,3] 205 | boxes[i,4] = boxes[maxpos,4] 206 | 207 | # swap ith box with position of max box 208 | boxes[maxpos,0] = tx1 209 | boxes[maxpos,1] = ty1 210 | boxes[maxpos,2] = tx2 211 | boxes[maxpos,3] = ty2 212 | boxes[maxpos,4] = ts 213 | 214 | for j in range(5, 39): 215 | tmp = boxes[i, j] 216 | boxes[i, j] = boxes[maxpos, j] 217 | boxes[maxpos, j] = tmp 218 | 219 | tx1 = boxes[i,0] 220 | ty1 = boxes[i,1] 221 | tx2 = boxes[i,2] 222 | ty2 = boxes[i,3] 223 | ts = boxes[i,4] 224 | 225 | pos = i + 1 226 | # NMS iterations, note that N changes if detection boxes fall below threshold 227 | while pos < N: 228 | x1 = boxes[pos, 0] 229 | y1 = boxes[pos, 1] 230 | x2 = boxes[pos, 2] 231 | y2 = boxes[pos, 3] 232 | s = boxes[pos, 4] 233 | 234 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 235 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 236 | if iw > 0: 237 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 238 | if ih > 0: 239 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 240 | ov = iw * ih / ua #iou between max box and detection box 241 | 242 | if method == 1: # linear 243 | if ov > Nt: 244 | weight = 1 - ov 245 | else: 246 | weight = 1 247 | elif method == 2: # gaussian 248 | weight = np.exp(-(ov * ov)/sigma) 249 | else: # original NMS 250 | if ov > Nt: 251 | weight = 0 252 | else: 253 | weight = 1 254 | 255 | boxes[pos, 4] = weight*boxes[pos, 4] 256 | 257 | # if box score falls below threshold, discard the box by swapping with last box 258 | # update N 259 | if boxes[pos, 4] < threshold: 260 | boxes[pos,0] = boxes[N-1, 0] 261 | boxes[pos,1] = boxes[N-1, 1] 262 | boxes[pos,2] = boxes[N-1, 2] 263 | boxes[pos,3] = boxes[N-1, 3] 264 | boxes[pos,4] = boxes[N-1, 4] 265 | for j in range(5, 39): 266 | tmp = boxes[pos, j] 267 | boxes[pos, j] = boxes[N - 1, j] 268 | boxes[N - 1, j] = tmp 269 | N = N - 1 270 | pos = pos - 1 271 | 272 | pos = pos + 1 273 | 274 | keep = [i for i in range(N)] 275 | return keep 276 | 277 | def soft_nms_merge(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0, float weight_exp=6): 278 | cdef unsigned int N = boxes.shape[0] 279 | cdef float iw, ih, box_area 280 | cdef float ua 281 | cdef int pos = 0 282 | cdef float maxscore = 0 283 | cdef int maxpos = 0 284 | cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov 285 | cdef float mx1,mx2,my1,my2,mts,mbs,mw 286 | 287 | for i in range(N): 288 | maxscore = boxes[i, 4] 289 | maxpos = i 290 | 291 | tx1 = boxes[i,0] 292 | ty1 = boxes[i,1] 293 | tx2 = boxes[i,2] 294 | ty2 = boxes[i,3] 295 | ts = boxes[i,4] 296 | 297 | pos = i + 1 298 | # get max box 299 | while pos < N: 300 | if maxscore < boxes[pos, 4]: 301 | maxscore = boxes[pos, 4] 302 | maxpos = pos 303 | pos = pos + 1 304 | 305 | # add max box as a detection 306 | boxes[i,0] = boxes[maxpos,0] 307 | boxes[i,1] = boxes[maxpos,1] 308 | boxes[i,2] = boxes[maxpos,2] 309 | boxes[i,3] = boxes[maxpos,3] 310 | boxes[i,4] = boxes[maxpos,4] 311 | 312 | mx1 = boxes[i, 0] * boxes[i, 5] 313 | my1 = boxes[i, 1] * boxes[i, 5] 314 | mx2 = boxes[i, 2] * boxes[i, 6] 315 | my2 = boxes[i, 3] * boxes[i, 6] 316 | mts = boxes[i, 5] 317 | mbs = boxes[i, 6] 318 | 319 | # swap ith box with position of max box 320 | boxes[maxpos,0] = tx1 321 | boxes[maxpos,1] = ty1 322 | boxes[maxpos,2] = tx2 323 | boxes[maxpos,3] = ty2 324 | boxes[maxpos,4] = ts 325 | 326 | tx1 = boxes[i,0] 327 | ty1 = boxes[i,1] 328 | tx2 = boxes[i,2] 329 | ty2 = boxes[i,3] 330 | ts = boxes[i,4] 331 | 332 | pos = i + 1 333 | # NMS iterations, note that N changes if detection boxes fall below threshold 334 | while pos < N: 335 | x1 = boxes[pos, 0] 336 | y1 = boxes[pos, 1] 337 | x2 = boxes[pos, 2] 338 | y2 = boxes[pos, 3] 339 | s = boxes[pos, 4] 340 | 341 | area = (x2 - x1 + 1) * (y2 - y1 + 1) 342 | iw = (min(tx2, x2) - max(tx1, x1) + 1) 343 | if iw > 0: 344 | ih = (min(ty2, y2) - max(ty1, y1) + 1) 345 | if ih > 0: 346 | ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih) 347 | ov = iw * ih / ua #iou between max box and detection box 348 | 349 | if method == 1: # linear 350 | if ov > Nt: 351 | weight = 1 - ov 352 | else: 353 | weight = 1 354 | elif method == 2: # gaussian 355 | weight = np.exp(-(ov * ov)/sigma) 356 | else: # original NMS 357 | if ov > Nt: 358 | weight = 0 359 | else: 360 | weight = 1 361 | 362 | mw = (1 - weight) ** weight_exp 363 | mx1 = mx1 + boxes[pos, 0] * boxes[pos, 5] * mw 364 | my1 = my1 + boxes[pos, 1] * boxes[pos, 5] * mw 365 | mx2 = mx2 + boxes[pos, 2] * boxes[pos, 6] * mw 366 | my2 = my2 + boxes[pos, 3] * boxes[pos, 6] * mw 367 | mts = mts + boxes[pos, 5] * mw 368 | mbs = mbs + boxes[pos, 6] * mw 369 | 370 | boxes[pos, 4] = weight*boxes[pos, 4] 371 | 372 | # if box score falls below threshold, discard the box by swapping with last box 373 | # update N 374 | if boxes[pos, 4] < threshold: 375 | boxes[pos,0] = boxes[N-1, 0] 376 | boxes[pos,1] = boxes[N-1, 1] 377 | boxes[pos,2] = boxes[N-1, 2] 378 | boxes[pos,3] = boxes[N-1, 3] 379 | boxes[pos,4] = boxes[N-1, 4] 380 | N = N - 1 381 | pos = pos - 1 382 | 383 | pos = pos + 1 384 | 385 | boxes[i, 0] = mx1 / mts 386 | boxes[i, 1] = my1 / mts 387 | boxes[i, 2] = mx2 / mbs 388 | boxes[i, 3] = my2 / mbs 389 | 390 | keep = [i for i in range(N)] 391 | return keep 392 | -------------------------------------------------------------------------------- /lib/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /lib/loss/1.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/loss/losses.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Portions of this code are from 3 | # CornerNet (https://github.com/princeton-vl/CornerNet) 4 | # Copyright (c) 2018, University of Michigan 5 | # Licensed under the BSD 3-Clause License 6 | # ------------------------------------------------------------------------------ 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import torch 12 | import torch.nn as nn 13 | from lib.utils.utils import _transpose_and_gather_feat 14 | import torch.nn.functional as F 15 | 16 | 17 | def _slow_neg_loss(pred, gt): 18 | '''focal loss from CornerNet''' 19 | pos_inds = gt.eq(1) 20 | neg_inds = gt.lt(1) 21 | 22 | neg_weights = torch.pow(1 - gt[neg_inds], 4) 23 | 24 | loss = 0 25 | pos_pred = pred[pos_inds] 26 | neg_pred = pred[neg_inds] 27 | 28 | pos_loss = torch.log(pos_pred) * torch.pow(1 - pos_pred, 2) 29 | neg_loss = torch.log(1 - neg_pred) * torch.pow(neg_pred, 2) * neg_weights 30 | 31 | num_pos = pos_inds.float().sum() 32 | pos_loss = pos_loss.sum() 33 | neg_loss = neg_loss.sum() 34 | 35 | if pos_pred.nelement() == 0: 36 | loss = loss - neg_loss 37 | else: 38 | loss = loss - (pos_loss + neg_loss) / num_pos 39 | return loss 40 | 41 | 42 | def _neg_loss(pred, gt): 43 | ''' Modified focal loss. Exactly the same as CornerNet. 44 | Runs faster and costs a little bit more memory 45 | Arguments: 46 | pred (batch x c x h x w) 47 | gt_regr (batch x c x h x w) 48 | ''' 49 | pos_inds = gt.eq(1).float() 50 | neg_inds = gt.lt(1).float() 51 | 52 | neg_weights = torch.pow(1 - gt, 4) 53 | 54 | loss = 0 55 | 56 | pos_loss = torch.log(pred) * torch.pow(1 - pred, 2) * pos_inds 57 | neg_loss = torch.log(1 - pred) * torch.pow(pred, 2) * neg_weights * neg_inds 58 | 59 | num_pos = pos_inds.float().sum() 60 | pos_loss = pos_loss.sum() 61 | neg_loss = neg_loss.sum() 62 | 63 | if num_pos == 0: 64 | loss = loss - neg_loss 65 | else: 66 | loss = loss - (pos_loss + neg_loss) / num_pos 67 | return loss 68 | 69 | 70 | def _not_faster_neg_loss(pred, gt): 71 | pos_inds = gt.eq(1).float() 72 | neg_inds = gt.lt(1).float() 73 | num_pos = pos_inds.float().sum() 74 | neg_weights = torch.pow(1 - gt, 4) 75 | 76 | loss = 0 77 | trans_pred = pred * neg_inds + (1 - pred) * pos_inds 78 | weight = neg_weights * neg_inds + pos_inds 79 | all_loss = torch.log(1 - trans_pred) * torch.pow(trans_pred, 2) * weight 80 | all_loss = all_loss.sum() 81 | 82 | if num_pos > 0: 83 | all_loss /= num_pos 84 | loss -= all_loss 85 | return loss 86 | 87 | 88 | def _slow_reg_loss(regr, gt_regr, mask): 89 | num = mask.float().sum() 90 | mask = mask.unsqueeze(2).expand_as(gt_regr) 91 | 92 | regr = regr[mask] 93 | gt_regr = gt_regr[mask] 94 | 95 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 96 | regr_loss = regr_loss / (num + 1e-4) 97 | return regr_loss 98 | 99 | 100 | def _reg_loss(regr, gt_regr, mask): 101 | ''' L1 regression loss 102 | Arguments: 103 | regr (batch x max_objects x dim) 104 | gt_regr (batch x max_objects x dim) 105 | mask (batch x max_objects) 106 | ''' 107 | num = mask.float().sum() 108 | mask = mask.unsqueeze(2).expand_as(gt_regr).float() 109 | 110 | regr = regr * mask 111 | gt_regr = gt_regr * mask 112 | 113 | regr_loss = nn.functional.smooth_l1_loss(regr, gt_regr, size_average=False) 114 | regr_loss = regr_loss / (num + 1e-4) 115 | return regr_loss 116 | 117 | 118 | class FocalLoss(nn.Module): 119 | '''nn.Module warpper for focal loss''' 120 | 121 | def __init__(self): 122 | super(FocalLoss, self).__init__() 123 | self.neg_loss = _neg_loss 124 | 125 | def forward(self, out, target): 126 | return self.neg_loss(out, target) 127 | 128 | 129 | class RegLoss(nn.Module): 130 | '''Regression loss for an output tensor 131 | Arguments: 132 | output (batch x dim x h x w) 133 | mask (batch x max_objects) 134 | ind (batch x max_objects) 135 | target (batch x max_objects x dim) 136 | ''' 137 | 138 | def __init__(self): 139 | super(RegLoss, self).__init__() 140 | 141 | def forward(self, output, mask, ind, target): 142 | pred = _transpose_and_gather_feat(output, ind) 143 | loss = _reg_loss(pred, target, mask) 144 | return loss 145 | 146 | 147 | class RegL1Loss(nn.Module): 148 | def __init__(self): 149 | super(RegL1Loss, self).__init__() 150 | 151 | def forward(self, output, mask, ind, target): 152 | pred = _transpose_and_gather_feat(output, ind) 153 | mask = mask.unsqueeze(2).expand_as(pred).float() 154 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 155 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 156 | loss = loss / (mask.sum() + 1e-4) 157 | return loss 158 | 159 | 160 | class NormRegL1Loss(nn.Module): 161 | def __init__(self): 162 | super(NormRegL1Loss, self).__init__() 163 | 164 | def forward(self, output, mask, ind, target): 165 | pred = _transpose_and_gather_feat(output, ind) 166 | mask = mask.unsqueeze(2).expand_as(pred).float() 167 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 168 | pred = pred / (target + 1e-4) 169 | target = target * 0 + 1 170 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 171 | loss = loss / (mask.sum() + 1e-4) 172 | return loss 173 | 174 | 175 | class RegWeightedL1Loss(nn.Module): 176 | def __init__(self): 177 | super(RegWeightedL1Loss, self).__init__() 178 | 179 | def forward(self, output, mask, ind, target): 180 | pred = _transpose_and_gather_feat(output, ind) 181 | mask = mask.float() 182 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 183 | loss = F.l1_loss(pred * mask, target * mask, size_average=False) 184 | loss = loss / (mask.sum() + 1e-4) 185 | return loss 186 | 187 | 188 | class L1Loss(nn.Module): 189 | def __init__(self): 190 | super(L1Loss, self).__init__() 191 | 192 | def forward(self, output, mask, ind, target): 193 | pred = _transpose_and_gather_feat(output, ind) 194 | mask = mask.unsqueeze(2).expand_as(pred).float() 195 | loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 196 | return loss 197 | 198 | 199 | class BinRotLoss(nn.Module): 200 | def __init__(self): 201 | super(BinRotLoss, self).__init__() 202 | 203 | def forward(self, output, mask, ind, rotbin, rotres): 204 | pred = _transpose_and_gather_feat(output, ind) 205 | loss = compute_rot_loss(pred, rotbin, rotres, mask) 206 | return loss 207 | 208 | 209 | def compute_res_loss(output, target): 210 | return F.smooth_l1_loss(output, target, reduction='elementwise_mean') 211 | 212 | 213 | # TODO: weight 214 | def compute_bin_loss(output, target, mask): 215 | mask = mask.expand_as(output) 216 | output = output * mask.float() 217 | return F.cross_entropy(output, target, reduction='elementwise_mean') 218 | 219 | 220 | def compute_rot_loss(output, target_bin, target_res, mask): 221 | # output: (B, 128, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 222 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 223 | # target_bin: (B, 128, 2) [bin1_cls, bin2_cls] 224 | # target_res: (B, 128, 2) [bin1_res, bin2_res] 225 | # mask: (B, 128, 1) 226 | # import pdb; pdb.set_trace() 227 | output = output.view(-1, 8) 228 | target_bin = target_bin.view(-1, 2) 229 | target_res = target_res.view(-1, 2) 230 | mask = mask.view(-1, 1) 231 | loss_bin1 = compute_bin_loss(output[:, 0:2], target_bin[:, 0], mask) 232 | loss_bin2 = compute_bin_loss(output[:, 4:6], target_bin[:, 1], mask) 233 | loss_res = torch.zeros_like(loss_bin1) 234 | if target_bin[:, 0].nonzero().shape[0] > 0: 235 | idx1 = target_bin[:, 0].nonzero()[:, 0] 236 | valid_output1 = torch.index_select(output, 0, idx1.long()) 237 | valid_target_res1 = torch.index_select(target_res, 0, idx1.long()) 238 | loss_sin1 = compute_res_loss( 239 | valid_output1[:, 2], torch.sin(valid_target_res1[:, 0])) 240 | loss_cos1 = compute_res_loss( 241 | valid_output1[:, 3], torch.cos(valid_target_res1[:, 0])) 242 | loss_res += loss_sin1 + loss_cos1 243 | if target_bin[:, 1].nonzero().shape[0] > 0: 244 | idx2 = target_bin[:, 1].nonzero()[:, 0] 245 | valid_output2 = torch.index_select(output, 0, idx2.long()) 246 | valid_target_res2 = torch.index_select(target_res, 0, idx2.long()) 247 | loss_sin2 = compute_res_loss( 248 | valid_output2[:, 6], torch.sin(valid_target_res2[:, 1])) 249 | loss_cos2 = compute_res_loss( 250 | valid_output2[:, 7], torch.cos(valid_target_res2[:, 1])) 251 | loss_res += loss_sin2 + loss_cos2 252 | return loss_bin1 + loss_bin2 + loss_res 253 | -------------------------------------------------------------------------------- /lib/models/DCNv2/.gitignore: -------------------------------------------------------------------------------- 1 | .vscode 2 | .idea 3 | *.so 4 | *.o 5 | *pyc 6 | _ext 7 | build 8 | DCNv2.egg-info 9 | dist -------------------------------------------------------------------------------- /lib/models/DCNv2/LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, Charles Shang 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /lib/models/DCNv2/README.md: -------------------------------------------------------------------------------- 1 | ## Deformable Convolutional Networks V2 with Pytorch 1.0 2 | 3 | ### Build 4 | ```bash 5 | ./make.sh # build 6 | python test.py # run examples and gradient check 7 | ``` 8 | 9 | ### An Example 10 | - deformable conv 11 | ```python 12 | from dcn_v2 import DCN 13 | input = torch.randn(2, 64, 128, 128).cuda() 14 | # wrap all things (offset and mask) in DCN 15 | dcn = DCN(64, 64, kernel_size=(3,3), stride=1, padding=1, deformable_groups=2).cuda() 16 | output = dcn(input) 17 | print(output.shape) 18 | ``` 19 | - deformable roi pooling 20 | ```python 21 | from dcn_v2 import DCNPooling 22 | input = torch.randn(2, 32, 64, 64).cuda() 23 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 24 | x = torch.randint(256, (20, 1)).cuda().float() 25 | y = torch.randint(256, (20, 1)).cuda().float() 26 | w = torch.randint(64, (20, 1)).cuda().float() 27 | h = torch.randint(64, (20, 1)).cuda().float() 28 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 29 | 30 | # mdformable pooling (V2) 31 | # wrap all things (offset and mask) in DCNPooling 32 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 33 | pooled_size=7, 34 | output_dim=32, 35 | no_trans=False, 36 | group_size=1, 37 | trans_std=0.1).cuda() 38 | 39 | dout = dpooling(input, rois) 40 | ``` 41 | ### Note 42 | Now the master branch is for pytorch 1.0 (new ATen API), you can switch back to pytorch 0.4 with, 43 | ```bash 44 | git checkout pytorch_0.4 45 | ``` 46 | 47 | ### Known Issues: 48 | 49 | - [x] Gradient check w.r.t offset (solved) 50 | - [ ] Backward is not reentrant (minor) 51 | 52 | This is an adaption of the official [Deformable-ConvNets](https://github.com/msracver/Deformable-ConvNets/tree/master/DCNv2_op). 53 | 54 | I have ran the gradient check for many times with DOUBLE type. Every tensor **except offset** passes. 55 | However, when I set the offset to 0.5, it passes. I'm still wondering what cause this problem. Is it because some 56 | non-differential points? 57 | 58 | Update: all gradient check passes with double precision. 59 | 60 | Another issue is that it raises `RuntimeError: Backward is not reentrant`. However, the error is very small (`<1e-7` for 61 | float `<1e-15` for double), 62 | so it may not be a serious problem (?) 63 | 64 | Please post an issue or PR if you have any comments. 65 | -------------------------------------------------------------------------------- /lib/models/DCNv2/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /lib/models/DCNv2/dcn_v2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import math 7 | import torch 8 | from torch import nn 9 | from torch.autograd import Function 10 | from torch.nn.modules.utils import _pair 11 | from torch.autograd.function import once_differentiable 12 | 13 | import _ext as _backend 14 | 15 | 16 | class _DCNv2(Function): 17 | @staticmethod 18 | def forward(ctx, input, offset, mask, weight, bias, 19 | stride, padding, dilation, deformable_groups): 20 | ctx.stride = _pair(stride) 21 | ctx.padding = _pair(padding) 22 | ctx.dilation = _pair(dilation) 23 | ctx.kernel_size = _pair(weight.shape[2:4]) 24 | ctx.deformable_groups = deformable_groups 25 | output = _backend.dcn_v2_forward(input, weight, bias, 26 | offset, mask, 27 | ctx.kernel_size[0], ctx.kernel_size[1], 28 | ctx.stride[0], ctx.stride[1], 29 | ctx.padding[0], ctx.padding[1], 30 | ctx.dilation[0], ctx.dilation[1], 31 | ctx.deformable_groups) 32 | ctx.save_for_backward(input, offset, mask, weight, bias) 33 | return output 34 | 35 | @staticmethod 36 | @once_differentiable 37 | def backward(ctx, grad_output): 38 | input, offset, mask, weight, bias = ctx.saved_tensors 39 | grad_input, grad_offset, grad_mask, grad_weight, grad_bias = \ 40 | _backend.dcn_v2_backward(input, weight, 41 | bias, 42 | offset, mask, 43 | grad_output, 44 | ctx.kernel_size[0], ctx.kernel_size[1], 45 | ctx.stride[0], ctx.stride[1], 46 | ctx.padding[0], ctx.padding[1], 47 | ctx.dilation[0], ctx.dilation[1], 48 | ctx.deformable_groups) 49 | 50 | return grad_input, grad_offset, grad_mask, grad_weight, grad_bias,\ 51 | None, None, None, None, 52 | 53 | 54 | dcn_v2_conv = _DCNv2.apply 55 | 56 | 57 | class DCNv2(nn.Module): 58 | 59 | def __init__(self, in_channels, out_channels, 60 | kernel_size, stride, padding, dilation=1, deformable_groups=1): 61 | super(DCNv2, self).__init__() 62 | self.in_channels = in_channels 63 | self.out_channels = out_channels 64 | self.kernel_size = _pair(kernel_size) 65 | self.stride = _pair(stride) 66 | self.padding = _pair(padding) 67 | self.dilation = _pair(dilation) 68 | self.deformable_groups = deformable_groups 69 | 70 | self.weight = nn.Parameter(torch.Tensor( 71 | out_channels, in_channels, *self.kernel_size)) 72 | self.bias = nn.Parameter(torch.Tensor(out_channels)) 73 | self.reset_parameters() 74 | 75 | def reset_parameters(self): 76 | n = self.in_channels 77 | for k in self.kernel_size: 78 | n *= k 79 | stdv = 1. / math.sqrt(n) 80 | self.weight.data.uniform_(-stdv, stdv) 81 | self.bias.data.zero_() 82 | 83 | def forward(self, input, offset, mask): 84 | assert 2 * self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \ 85 | offset.shape[1] 86 | assert self.deformable_groups * self.kernel_size[0] * self.kernel_size[1] == \ 87 | mask.shape[1] 88 | return dcn_v2_conv(input, offset, mask, 89 | self.weight, 90 | self.bias, 91 | self.stride, 92 | self.padding, 93 | self.dilation, 94 | self.deformable_groups) 95 | 96 | 97 | class DCN(DCNv2): 98 | 99 | def __init__(self, in_channels, out_channels, 100 | kernel_size, stride, padding, 101 | dilation=1, deformable_groups=1): 102 | super(DCN, self).__init__(in_channels, out_channels, 103 | kernel_size, stride, padding, dilation, deformable_groups) 104 | 105 | channels_ = self.deformable_groups * 3 * self.kernel_size[0] * self.kernel_size[1] 106 | self.conv_offset_mask = nn.Conv2d(self.in_channels, 107 | channels_, 108 | kernel_size=self.kernel_size, 109 | stride=self.stride, 110 | padding=self.padding, 111 | bias=True) 112 | self.init_offset() 113 | 114 | def init_offset(self): 115 | self.conv_offset_mask.weight.data.zero_() 116 | self.conv_offset_mask.bias.data.zero_() 117 | 118 | def forward(self, input): 119 | out = self.conv_offset_mask(input) 120 | o1, o2, mask = torch.chunk(out, 3, dim=1) 121 | offset = torch.cat((o1, o2), dim=1) 122 | mask = torch.sigmoid(mask) 123 | return dcn_v2_conv(input, offset, mask, 124 | self.weight, self.bias, 125 | self.stride, 126 | self.padding, 127 | self.dilation, 128 | self.deformable_groups) 129 | 130 | 131 | 132 | class _DCNv2Pooling(Function): 133 | @staticmethod 134 | def forward(ctx, input, rois, offset, 135 | spatial_scale, 136 | pooled_size, 137 | output_dim, 138 | no_trans, 139 | group_size=1, 140 | part_size=None, 141 | sample_per_part=4, 142 | trans_std=.0): 143 | ctx.spatial_scale = spatial_scale 144 | ctx.no_trans = int(no_trans) 145 | ctx.output_dim = output_dim 146 | ctx.group_size = group_size 147 | ctx.pooled_size = pooled_size 148 | ctx.part_size = pooled_size if part_size is None else part_size 149 | ctx.sample_per_part = sample_per_part 150 | ctx.trans_std = trans_std 151 | 152 | output, output_count = \ 153 | _backend.dcn_v2_psroi_pooling_forward(input, rois, offset, 154 | ctx.no_trans, ctx.spatial_scale, 155 | ctx.output_dim, ctx.group_size, 156 | ctx.pooled_size, ctx.part_size, 157 | ctx.sample_per_part, ctx.trans_std) 158 | ctx.save_for_backward(input, rois, offset, output_count) 159 | return output 160 | 161 | @staticmethod 162 | @once_differentiable 163 | def backward(ctx, grad_output): 164 | input, rois, offset, output_count = ctx.saved_tensors 165 | grad_input, grad_offset = \ 166 | _backend.dcn_v2_psroi_pooling_backward(grad_output, 167 | input, 168 | rois, 169 | offset, 170 | output_count, 171 | ctx.no_trans, 172 | ctx.spatial_scale, 173 | ctx.output_dim, 174 | ctx.group_size, 175 | ctx.pooled_size, 176 | ctx.part_size, 177 | ctx.sample_per_part, 178 | ctx.trans_std) 179 | 180 | return grad_input, None, grad_offset, \ 181 | None, None, None, None, None, None, None, None 182 | 183 | 184 | dcn_v2_pooling = _DCNv2Pooling.apply 185 | 186 | 187 | class DCNv2Pooling(nn.Module): 188 | 189 | def __init__(self, 190 | spatial_scale, 191 | pooled_size, 192 | output_dim, 193 | no_trans, 194 | group_size=1, 195 | part_size=None, 196 | sample_per_part=4, 197 | trans_std=.0): 198 | super(DCNv2Pooling, self).__init__() 199 | self.spatial_scale = spatial_scale 200 | self.pooled_size = pooled_size 201 | self.output_dim = output_dim 202 | self.no_trans = no_trans 203 | self.group_size = group_size 204 | self.part_size = pooled_size if part_size is None else part_size 205 | self.sample_per_part = sample_per_part 206 | self.trans_std = trans_std 207 | 208 | def forward(self, input, rois, offset): 209 | assert input.shape[1] == self.output_dim 210 | if self.no_trans: 211 | offset = input.new() 212 | return dcn_v2_pooling(input, rois, offset, 213 | self.spatial_scale, 214 | self.pooled_size, 215 | self.output_dim, 216 | self.no_trans, 217 | self.group_size, 218 | self.part_size, 219 | self.sample_per_part, 220 | self.trans_std) 221 | 222 | 223 | class DCNPooling(DCNv2Pooling): 224 | 225 | def __init__(self, 226 | spatial_scale, 227 | pooled_size, 228 | output_dim, 229 | no_trans, 230 | group_size=1, 231 | part_size=None, 232 | sample_per_part=4, 233 | trans_std=.0, 234 | deform_fc_dim=1024): 235 | super(DCNPooling, self).__init__(spatial_scale, 236 | pooled_size, 237 | output_dim, 238 | no_trans, 239 | group_size, 240 | part_size, 241 | sample_per_part, 242 | trans_std) 243 | 244 | self.deform_fc_dim = deform_fc_dim 245 | 246 | if not no_trans: 247 | self.offset_mask_fc = nn.Sequential( 248 | nn.Linear(self.pooled_size * self.pooled_size * 249 | self.output_dim, self.deform_fc_dim), 250 | nn.ReLU(inplace=True), 251 | nn.Linear(self.deform_fc_dim, self.deform_fc_dim), 252 | nn.ReLU(inplace=True), 253 | nn.Linear(self.deform_fc_dim, self.pooled_size * 254 | self.pooled_size * 3) 255 | ) 256 | self.offset_mask_fc[4].weight.data.zero_() 257 | self.offset_mask_fc[4].bias.data.zero_() 258 | 259 | def forward(self, input, rois): 260 | offset = input.new() 261 | 262 | if not self.no_trans: 263 | 264 | # do roi_align first 265 | n = rois.shape[0] 266 | roi = dcn_v2_pooling(input, rois, offset, 267 | self.spatial_scale, 268 | self.pooled_size, 269 | self.output_dim, 270 | True, # no trans 271 | self.group_size, 272 | self.part_size, 273 | self.sample_per_part, 274 | self.trans_std) 275 | 276 | # build mask and offset 277 | offset_mask = self.offset_mask_fc(roi.view(n, -1)) 278 | offset_mask = offset_mask.view( 279 | n, 3, self.pooled_size, self.pooled_size) 280 | o1, o2, mask = torch.chunk(offset_mask, 3, dim=1) 281 | offset = torch.cat((o1, o2), dim=1) 282 | mask = torch.sigmoid(mask) 283 | 284 | # do pooling with offset and mask 285 | return dcn_v2_pooling(input, rois, offset, 286 | self.spatial_scale, 287 | self.pooled_size, 288 | self.output_dim, 289 | self.no_trans, 290 | self.group_size, 291 | self.part_size, 292 | self.sample_per_part, 293 | self.trans_std) * mask 294 | # only roi_align 295 | return dcn_v2_pooling(input, rois, offset, 296 | self.spatial_scale, 297 | self.pooled_size, 298 | self.output_dim, 299 | self.no_trans, 300 | self.group_size, 301 | self.part_size, 302 | self.sample_per_part, 303 | self.trans_std) 304 | -------------------------------------------------------------------------------- /lib/models/DCNv2/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | python setup.py build develop 3 | -------------------------------------------------------------------------------- /lib/models/DCNv2/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | import glob 5 | 6 | import torch 7 | 8 | from torch.utils.cpp_extension import CUDA_HOME 9 | from torch.utils.cpp_extension import CppExtension 10 | from torch.utils.cpp_extension import CUDAExtension 11 | 12 | from setuptools import find_packages 13 | from setuptools import setup 14 | 15 | requirements = ["torch", "torchvision"] 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "src") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | extra_compile_args = {"cxx": []} 28 | define_macros = [] 29 | 30 | if torch.cuda.is_available() and CUDA_HOME is not None: 31 | extension = CUDAExtension 32 | sources += source_cuda 33 | define_macros += [("WITH_CUDA", None)] 34 | extra_compile_args["nvcc"] = [ 35 | "-DCUDA_HAS_FP16=1", 36 | "-D__CUDA_NO_HALF_OPERATORS__", 37 | "-D__CUDA_NO_HALF_CONVERSIONS__", 38 | "-D__CUDA_NO_HALF2_OPERATORS__", 39 | ] 40 | else: 41 | raise NotImplementedError('Cuda is not availabel') 42 | 43 | sources = [os.path.join(extensions_dir, s) for s in sources] 44 | include_dirs = [extensions_dir] 45 | ext_modules = [ 46 | extension( 47 | "_ext", 48 | sources, 49 | include_dirs=include_dirs, 50 | define_macros=define_macros, 51 | extra_compile_args=extra_compile_args, 52 | ) 53 | ] 54 | return ext_modules 55 | 56 | setup( 57 | name="DCNv2", 58 | version="0.1", 59 | author="charlesshang", 60 | url="https://github.com/charlesshang/DCNv2", 61 | description="deformable convolutional networks", 62 | packages=find_packages(exclude=("configs", "tests",)), 63 | # install_requires=requirements, 64 | ext_modules=get_extensions(), 65 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 66 | ) -------------------------------------------------------------------------------- /lib/models/DCNv2/src/cpu/dcn_v2_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | 7 | at::Tensor 8 | dcn_v2_cpu_forward(const at::Tensor &input, 9 | const at::Tensor &weight, 10 | const at::Tensor &bias, 11 | const at::Tensor &offset, 12 | const at::Tensor &mask, 13 | const int kernel_h, 14 | const int kernel_w, 15 | const int stride_h, 16 | const int stride_w, 17 | const int pad_h, 18 | const int pad_w, 19 | const int dilation_h, 20 | const int dilation_w, 21 | const int deformable_group) 22 | { 23 | AT_ERROR("Not implement on cpu"); 24 | } 25 | 26 | std::vector 27 | dcn_v2_cpu_backward(const at::Tensor &input, 28 | const at::Tensor &weight, 29 | const at::Tensor &bias, 30 | const at::Tensor &offset, 31 | const at::Tensor &mask, 32 | const at::Tensor &grad_output, 33 | int kernel_h, int kernel_w, 34 | int stride_h, int stride_w, 35 | int pad_h, int pad_w, 36 | int dilation_h, int dilation_w, 37 | int deformable_group) 38 | { 39 | AT_ERROR("Not implement on cpu"); 40 | } 41 | 42 | std::tuple 43 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 44 | const at::Tensor &bbox, 45 | const at::Tensor &trans, 46 | const int no_trans, 47 | const float spatial_scale, 48 | const int output_dim, 49 | const int group_size, 50 | const int pooled_size, 51 | const int part_size, 52 | const int sample_per_part, 53 | const float trans_std) 54 | { 55 | AT_ERROR("Not implement on cpu"); 56 | } 57 | 58 | std::tuple 59 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 60 | const at::Tensor &input, 61 | const at::Tensor &bbox, 62 | const at::Tensor &trans, 63 | const at::Tensor &top_count, 64 | const int no_trans, 65 | const float spatial_scale, 66 | const int output_dim, 67 | const int group_size, 68 | const int pooled_size, 69 | const int part_size, 70 | const int sample_per_part, 71 | const float trans_std) 72 | { 73 | AT_ERROR("Not implement on cpu"); 74 | } -------------------------------------------------------------------------------- /lib/models/DCNv2/src/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cpu_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cpu_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cpu_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cpu_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/models/DCNv2/src/cuda/dcn_v2_im2col_cuda.h: -------------------------------------------------------------------------------- 1 | 2 | /*! 3 | ******************* BEGIN Caffe Copyright Notice and Disclaimer **************** 4 | * 5 | * COPYRIGHT 6 | * 7 | * All contributions by the University of California: 8 | * Copyright (c) 2014-2017 The Regents of the University of California (Regents) 9 | * All rights reserved. 10 | * 11 | * All other contributions: 12 | * Copyright (c) 2014-2017, the respective contributors 13 | * All rights reserved. 14 | * 15 | * Caffe uses a shared copyright model: each contributor holds copyright over 16 | * their contributions to Caffe. The project versioning records all such 17 | * contribution and copyright details. If a contributor wants to further mark 18 | * their specific copyright on a particular contribution, they should indicate 19 | * their copyright solely in the commit message of the change when it is 20 | * committed. 21 | * 22 | * LICENSE 23 | * 24 | * Redistribution and use in source and binary forms, with or without 25 | * modification, are permitted provided that the following conditions are met: 26 | * 27 | * 1. Redistributions of source code must retain the above copyright notice, this 28 | * list of conditions and the following disclaimer. 29 | * 2. Redistributions in binary form must reproduce the above copyright notice, 30 | * this list of conditions and the following disclaimer in the documentation 31 | * and/or other materials provided with the distribution. 32 | * 33 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 34 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 35 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 36 | * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 37 | * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 38 | * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 39 | * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 40 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 41 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43 | * 44 | * CONTRIBUTION AGREEMENT 45 | * 46 | * By contributing to the BVLC/caffe repository through pull-request, comment, 47 | * or otherwise, the contributor releases their content to the 48 | * license and copyright terms herein. 49 | * 50 | ***************** END Caffe Copyright Notice and Disclaimer ******************** 51 | * 52 | * Copyright (c) 2018 Microsoft 53 | * Licensed under The MIT License [see LICENSE for details] 54 | * \file modulated_deformable_im2col.h 55 | * \brief Function definitions of converting an image to 56 | * column matrix based on kernel, padding, dilation, and offset. 57 | * These functions are mainly used in deformable convolution operators. 58 | * \ref: https://arxiv.org/abs/1811.11168 59 | * \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu 60 | */ 61 | 62 | /***************** Adapted by Charles Shang *********************/ 63 | 64 | #ifndef DCN_V2_IM2COL_CUDA 65 | #define DCN_V2_IM2COL_CUDA 66 | 67 | #ifdef __cplusplus 68 | extern "C" 69 | { 70 | #endif 71 | 72 | void modulated_deformable_im2col_cuda(cudaStream_t stream, 73 | const float *data_im, const float *data_offset, const float *data_mask, 74 | const int batch_size, const int channels, const int height_im, const int width_im, 75 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 76 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 77 | const int dilation_h, const int dilation_w, 78 | const int deformable_group, float *data_col); 79 | 80 | void modulated_deformable_col2im_cuda(cudaStream_t stream, 81 | const float *data_col, const float *data_offset, const float *data_mask, 82 | const int batch_size, const int channels, const int height_im, const int width_im, 83 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 84 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 85 | const int dilation_h, const int dilation_w, 86 | const int deformable_group, float *grad_im); 87 | 88 | void modulated_deformable_col2im_coord_cuda(cudaStream_t stream, 89 | const float *data_col, const float *data_im, const float *data_offset, const float *data_mask, 90 | const int batch_size, const int channels, const int height_im, const int width_im, 91 | const int height_col, const int width_col, const int kernel_h, const int kenerl_w, 92 | const int pad_h, const int pad_w, const int stride_h, const int stride_w, 93 | const int dilation_h, const int dilation_w, 94 | const int deformable_group, 95 | float *grad_offset, float *grad_mask); 96 | 97 | #ifdef __cplusplus 98 | } 99 | #endif 100 | 101 | #endif -------------------------------------------------------------------------------- /lib/models/DCNv2/src/cuda/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | at::Tensor 5 | dcn_v2_cuda_forward(const at::Tensor &input, 6 | const at::Tensor &weight, 7 | const at::Tensor &bias, 8 | const at::Tensor &offset, 9 | const at::Tensor &mask, 10 | const int kernel_h, 11 | const int kernel_w, 12 | const int stride_h, 13 | const int stride_w, 14 | const int pad_h, 15 | const int pad_w, 16 | const int dilation_h, 17 | const int dilation_w, 18 | const int deformable_group); 19 | 20 | std::vector 21 | dcn_v2_cuda_backward(const at::Tensor &input, 22 | const at::Tensor &weight, 23 | const at::Tensor &bias, 24 | const at::Tensor &offset, 25 | const at::Tensor &mask, 26 | const at::Tensor &grad_output, 27 | int kernel_h, int kernel_w, 28 | int stride_h, int stride_w, 29 | int pad_h, int pad_w, 30 | int dilation_h, int dilation_w, 31 | int deformable_group); 32 | 33 | 34 | std::tuple 35 | dcn_v2_psroi_pooling_cuda_forward(const at::Tensor &input, 36 | const at::Tensor &bbox, 37 | const at::Tensor &trans, 38 | const int no_trans, 39 | const float spatial_scale, 40 | const int output_dim, 41 | const int group_size, 42 | const int pooled_size, 43 | const int part_size, 44 | const int sample_per_part, 45 | const float trans_std); 46 | 47 | std::tuple 48 | dcn_v2_psroi_pooling_cuda_backward(const at::Tensor &out_grad, 49 | const at::Tensor &input, 50 | const at::Tensor &bbox, 51 | const at::Tensor &trans, 52 | const at::Tensor &top_count, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std); -------------------------------------------------------------------------------- /lib/models/DCNv2/src/dcn_v2.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | at::Tensor 10 | dcn_v2_forward(const at::Tensor &input, 11 | const at::Tensor &weight, 12 | const at::Tensor &bias, 13 | const at::Tensor &offset, 14 | const at::Tensor &mask, 15 | const int kernel_h, 16 | const int kernel_w, 17 | const int stride_h, 18 | const int stride_w, 19 | const int pad_h, 20 | const int pad_w, 21 | const int dilation_h, 22 | const int dilation_w, 23 | const int deformable_group) 24 | { 25 | if (input.type().is_cuda()) 26 | { 27 | #ifdef WITH_CUDA 28 | return dcn_v2_cuda_forward(input, weight, bias, offset, mask, 29 | kernel_h, kernel_w, 30 | stride_h, stride_w, 31 | pad_h, pad_w, 32 | dilation_h, dilation_w, 33 | deformable_group); 34 | #else 35 | AT_ERROR("Not compiled with GPU support"); 36 | #endif 37 | } 38 | AT_ERROR("Not implemented on the CPU"); 39 | } 40 | 41 | std::vector 42 | dcn_v2_backward(const at::Tensor &input, 43 | const at::Tensor &weight, 44 | const at::Tensor &bias, 45 | const at::Tensor &offset, 46 | const at::Tensor &mask, 47 | const at::Tensor &grad_output, 48 | int kernel_h, int kernel_w, 49 | int stride_h, int stride_w, 50 | int pad_h, int pad_w, 51 | int dilation_h, int dilation_w, 52 | int deformable_group) 53 | { 54 | if (input.type().is_cuda()) 55 | { 56 | #ifdef WITH_CUDA 57 | return dcn_v2_cuda_backward(input, 58 | weight, 59 | bias, 60 | offset, 61 | mask, 62 | grad_output, 63 | kernel_h, kernel_w, 64 | stride_h, stride_w, 65 | pad_h, pad_w, 66 | dilation_h, dilation_w, 67 | deformable_group); 68 | #else 69 | AT_ERROR("Not compiled with GPU support"); 70 | #endif 71 | } 72 | AT_ERROR("Not implemented on the CPU"); 73 | } 74 | 75 | std::tuple 76 | dcn_v2_psroi_pooling_forward(const at::Tensor &input, 77 | const at::Tensor &bbox, 78 | const at::Tensor &trans, 79 | const int no_trans, 80 | const float spatial_scale, 81 | const int output_dim, 82 | const int group_size, 83 | const int pooled_size, 84 | const int part_size, 85 | const int sample_per_part, 86 | const float trans_std) 87 | { 88 | if (input.type().is_cuda()) 89 | { 90 | #ifdef WITH_CUDA 91 | return dcn_v2_psroi_pooling_cuda_forward(input, 92 | bbox, 93 | trans, 94 | no_trans, 95 | spatial_scale, 96 | output_dim, 97 | group_size, 98 | pooled_size, 99 | part_size, 100 | sample_per_part, 101 | trans_std); 102 | #else 103 | AT_ERROR("Not compiled with GPU support"); 104 | #endif 105 | } 106 | AT_ERROR("Not implemented on the CPU"); 107 | } 108 | 109 | std::tuple 110 | dcn_v2_psroi_pooling_backward(const at::Tensor &out_grad, 111 | const at::Tensor &input, 112 | const at::Tensor &bbox, 113 | const at::Tensor &trans, 114 | const at::Tensor &top_count, 115 | const int no_trans, 116 | const float spatial_scale, 117 | const int output_dim, 118 | const int group_size, 119 | const int pooled_size, 120 | const int part_size, 121 | const int sample_per_part, 122 | const float trans_std) 123 | { 124 | if (input.type().is_cuda()) 125 | { 126 | #ifdef WITH_CUDA 127 | return dcn_v2_psroi_pooling_cuda_backward(out_grad, 128 | input, 129 | bbox, 130 | trans, 131 | top_count, 132 | no_trans, 133 | spatial_scale, 134 | output_dim, 135 | group_size, 136 | pooled_size, 137 | part_size, 138 | sample_per_part, 139 | trans_std); 140 | #else 141 | AT_ERROR("Not compiled with GPU support"); 142 | #endif 143 | } 144 | AT_ERROR("Not implemented on the CPU"); 145 | } -------------------------------------------------------------------------------- /lib/models/DCNv2/src/vision.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include "dcn_v2.h" 3 | 4 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 5 | m.def("dcn_v2_forward", &dcn_v2_forward, "dcn_v2_forward"); 6 | m.def("dcn_v2_backward", &dcn_v2_backward, "dcn_v2_backward"); 7 | m.def("dcn_v2_psroi_pooling_forward", &dcn_v2_psroi_pooling_forward, "dcn_v2_psroi_pooling_forward"); 8 | m.def("dcn_v2_psroi_pooling_backward", &dcn_v2_psroi_pooling_backward, "dcn_v2_psroi_pooling_backward"); 9 | } 10 | -------------------------------------------------------------------------------- /lib/models/DCNv2/test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import absolute_import 3 | from __future__ import print_function 4 | from __future__ import division 5 | 6 | import time 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import gradcheck 10 | 11 | from dcn_v2 import dcn_v2_conv, DCNv2, DCN 12 | from dcn_v2 import dcn_v2_pooling, DCNv2Pooling, DCNPooling 13 | 14 | deformable_groups = 1 15 | N, inC, inH, inW = 2, 2, 4, 4 16 | outC = 2 17 | kH, kW = 3, 3 18 | 19 | 20 | def conv_identify(weight, bias): 21 | weight.data.zero_() 22 | bias.data.zero_() 23 | o, i, h, w = weight.shape 24 | y = h//2 25 | x = w//2 26 | for p in range(i): 27 | for q in range(o): 28 | if p == q: 29 | weight.data[q, p, y, x] = 1.0 30 | 31 | 32 | def check_zero_offset(): 33 | conv_offset = nn.Conv2d(inC, deformable_groups * 2 * kH * kW, 34 | kernel_size=(kH, kW), 35 | stride=(1, 1), 36 | padding=(1, 1), 37 | bias=True).cuda() 38 | 39 | conv_mask = nn.Conv2d(inC, deformable_groups * 1 * kH * kW, 40 | kernel_size=(kH, kW), 41 | stride=(1, 1), 42 | padding=(1, 1), 43 | bias=True).cuda() 44 | 45 | dcn_v2 = DCNv2(inC, outC, (kH, kW), 46 | stride=1, padding=1, dilation=1, 47 | deformable_groups=deformable_groups).cuda() 48 | 49 | conv_offset.weight.data.zero_() 50 | conv_offset.bias.data.zero_() 51 | conv_mask.weight.data.zero_() 52 | conv_mask.bias.data.zero_() 53 | conv_identify(dcn_v2.weight, dcn_v2.bias) 54 | 55 | input = torch.randn(N, inC, inH, inW).cuda() 56 | offset = conv_offset(input) 57 | mask = conv_mask(input) 58 | mask = torch.sigmoid(mask) 59 | output = dcn_v2(input, offset, mask) 60 | output *= 2 61 | d = (input - output).abs().max() 62 | if d < 1e-10: 63 | print('Zero offset passed') 64 | else: 65 | print('Zero offset failed') 66 | print(input) 67 | print(output) 68 | 69 | def check_gradient_dconv(): 70 | 71 | input = torch.rand(N, inC, inH, inW).cuda() * 0.01 72 | input.requires_grad = True 73 | 74 | offset = torch.randn(N, deformable_groups * 2 * kW * kH, inH, inW).cuda() * 2 75 | # offset.data.zero_() 76 | # offset.data -= 0.5 77 | offset.requires_grad = True 78 | 79 | mask = torch.rand(N, deformable_groups * 1 * kW * kH, inH, inW).cuda() 80 | # mask.data.zero_() 81 | mask.requires_grad = True 82 | mask = torch.sigmoid(mask) 83 | 84 | weight = torch.randn(outC, inC, kH, kW).cuda() 85 | weight.requires_grad = True 86 | 87 | bias = torch.rand(outC).cuda() 88 | bias.requires_grad = True 89 | 90 | stride = 1 91 | padding = 1 92 | dilation = 1 93 | 94 | print('check_gradient_dconv: ', 95 | gradcheck(dcn_v2_conv, (input, offset, mask, weight, bias, 96 | stride, padding, dilation, deformable_groups), 97 | eps=1e-3, atol=1e-4, rtol=1e-2)) 98 | 99 | 100 | def check_pooling_zero_offset(): 101 | 102 | input = torch.randn(2, 16, 64, 64).cuda().zero_() 103 | input[0, :, 16:26, 16:26] = 1. 104 | input[1, :, 10:20, 20:30] = 2. 105 | rois = torch.tensor([ 106 | [0, 65, 65, 103, 103], 107 | [1, 81, 41, 119, 79], 108 | ]).cuda().float() 109 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 110 | pooled_size=7, 111 | output_dim=16, 112 | no_trans=True, 113 | group_size=1, 114 | trans_std=0.0).cuda() 115 | 116 | out = pooling(input, rois, input.new()) 117 | s = ', '.join(['%f' % out[i, :, :, :].mean().item() 118 | for i in range(rois.shape[0])]) 119 | print(s) 120 | 121 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 122 | pooled_size=7, 123 | output_dim=16, 124 | no_trans=False, 125 | group_size=1, 126 | trans_std=0.0).cuda() 127 | offset = torch.randn(20, 2, 7, 7).cuda().zero_() 128 | dout = dpooling(input, rois, offset) 129 | s = ', '.join(['%f' % dout[i, :, :, :].mean().item() 130 | for i in range(rois.shape[0])]) 131 | print(s) 132 | 133 | 134 | def check_gradient_dpooling(): 135 | input = torch.randn(2, 3, 5, 5).cuda() * 0.01 136 | N = 4 137 | batch_inds = torch.randint(2, (N, 1)).cuda().float() 138 | x = torch.rand((N, 1)).cuda().float() * 15 139 | y = torch.rand((N, 1)).cuda().float() * 15 140 | w = torch.rand((N, 1)).cuda().float() * 10 141 | h = torch.rand((N, 1)).cuda().float() * 10 142 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 143 | offset = torch.randn(N, 2, 3, 3).cuda() 144 | input.requires_grad = True 145 | offset.requires_grad = True 146 | 147 | spatial_scale = 1.0 / 4 148 | pooled_size = 3 149 | output_dim = 3 150 | no_trans = 0 151 | group_size = 1 152 | trans_std = 0.0 153 | sample_per_part = 4 154 | part_size = pooled_size 155 | 156 | print('check_gradient_dpooling:', 157 | gradcheck(dcn_v2_pooling, (input, rois, offset, 158 | spatial_scale, 159 | pooled_size, 160 | output_dim, 161 | no_trans, 162 | group_size, 163 | part_size, 164 | sample_per_part, 165 | trans_std), 166 | eps=1e-4)) 167 | 168 | 169 | def example_dconv(): 170 | input = torch.randn(2, 64, 128, 128).cuda() 171 | # wrap all things (offset and mask) in DCN 172 | dcn = DCN(64, 64, kernel_size=(3, 3), stride=1, 173 | padding=1, deformable_groups=2).cuda() 174 | # print(dcn.weight.shape, input.shape) 175 | output = dcn(input) 176 | targert = output.new(*output.size()) 177 | targert.data.uniform_(-0.01, 0.01) 178 | error = (targert - output).mean() 179 | error.backward() 180 | print(output.shape) 181 | 182 | 183 | def example_dpooling(): 184 | input = torch.randn(2, 32, 64, 64).cuda() 185 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 186 | x = torch.randint(256, (20, 1)).cuda().float() 187 | y = torch.randint(256, (20, 1)).cuda().float() 188 | w = torch.randint(64, (20, 1)).cuda().float() 189 | h = torch.randint(64, (20, 1)).cuda().float() 190 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 191 | offset = torch.randn(20, 2, 7, 7).cuda() 192 | input.requires_grad = True 193 | offset.requires_grad = True 194 | 195 | # normal roi_align 196 | pooling = DCNv2Pooling(spatial_scale=1.0 / 4, 197 | pooled_size=7, 198 | output_dim=32, 199 | no_trans=True, 200 | group_size=1, 201 | trans_std=0.1).cuda() 202 | 203 | # deformable pooling 204 | dpooling = DCNv2Pooling(spatial_scale=1.0 / 4, 205 | pooled_size=7, 206 | output_dim=32, 207 | no_trans=False, 208 | group_size=1, 209 | trans_std=0.1).cuda() 210 | 211 | out = pooling(input, rois, offset) 212 | dout = dpooling(input, rois, offset) 213 | print(out.shape) 214 | print(dout.shape) 215 | 216 | target_out = out.new(*out.size()) 217 | target_out.data.uniform_(-0.01, 0.01) 218 | target_dout = dout.new(*dout.size()) 219 | target_dout.data.uniform_(-0.01, 0.01) 220 | e = (target_out - out).mean() 221 | e.backward() 222 | e = (target_dout - dout).mean() 223 | e.backward() 224 | 225 | 226 | def example_mdpooling(): 227 | input = torch.randn(2, 32, 64, 64).cuda() 228 | input.requires_grad = True 229 | batch_inds = torch.randint(2, (20, 1)).cuda().float() 230 | x = torch.randint(256, (20, 1)).cuda().float() 231 | y = torch.randint(256, (20, 1)).cuda().float() 232 | w = torch.randint(64, (20, 1)).cuda().float() 233 | h = torch.randint(64, (20, 1)).cuda().float() 234 | rois = torch.cat((batch_inds, x, y, x + w, y + h), dim=1) 235 | 236 | # mdformable pooling (V2) 237 | dpooling = DCNPooling(spatial_scale=1.0 / 4, 238 | pooled_size=7, 239 | output_dim=32, 240 | no_trans=False, 241 | group_size=1, 242 | trans_std=0.1, 243 | deform_fc_dim=1024).cuda() 244 | 245 | dout = dpooling(input, rois) 246 | target = dout.new(*dout.size()) 247 | target.data.uniform_(-0.1, 0.1) 248 | error = (target - dout).mean() 249 | error.backward() 250 | print(dout.shape) 251 | 252 | 253 | if __name__ == '__main__': 254 | 255 | example_dconv() 256 | example_dpooling() 257 | example_mdpooling() 258 | 259 | check_pooling_zero_offset() 260 | # zero offset check 261 | if inC == outC: 262 | check_zero_offset() 263 | 264 | check_gradient_dpooling() 265 | check_gradient_dconv() 266 | # """ 267 | # ****** Note: backward is not reentrant error may not be a serious problem, 268 | # ****** since the max error is less than 1e-7, 269 | # ****** Still looking for what trigger this problem 270 | # """ 271 | -------------------------------------------------------------------------------- /lib/models/stNet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | from thop import profile 7 | from lib.models.DSFNet_with_Static import DSFNet_with_Static 8 | from lib.models.DSFNet import DSFNet 9 | from lib.models.DSFNet_with_Dynamic import DSFNet_with_Dynamic 10 | 11 | def model_lib(model_chose): 12 | model_factory = { 13 | 'DSFNet_with_Static': DSFNet_with_Static, 14 | 'DSFNet': DSFNet, 15 | 'DSFNet_with_Dynamic': DSFNet_with_Dynamic, 16 | } 17 | return model_factory[model_chose] 18 | 19 | def get_det_net(heads, model_name): 20 | model_name = model_lib(model_name) 21 | model = model_name(heads) 22 | return model 23 | 24 | 25 | def load_model(model, model_path, optimizer=None, resume=False, 26 | lr=None, lr_step=None): 27 | start_epoch = 0 28 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 29 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 30 | state_dict_ = checkpoint['state_dict'] 31 | state_dict = {} 32 | 33 | # convert data_parallal to model 34 | for k in state_dict_: 35 | if k.startswith('module') and not k.startswith('module_list'): 36 | state_dict[k[7:]] = state_dict_[k] 37 | else: 38 | state_dict[k] = state_dict_[k] 39 | model_state_dict = model.state_dict() 40 | 41 | # check loaded parameters and created model parameters 42 | msg = 'If you see this, your model does not fully load the ' + \ 43 | 'pre-trained weight. Please make sure ' + \ 44 | 'you have correctly specified --arch xxx ' + \ 45 | 'or set the correct --num_classes for your own dataset.' 46 | for k in state_dict: 47 | if k in model_state_dict: 48 | if state_dict[k].shape != model_state_dict[k].shape: 49 | print('Skip loading parameter {}, required shape{}, ' \ 50 | 'loaded shape{}. {}'.format( 51 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 52 | state_dict[k] = model_state_dict[k] 53 | else: 54 | print('Drop parameter {}.'.format(k) + msg) 55 | for k in model_state_dict: 56 | if not (k in state_dict): 57 | print('No param {}.'.format(k) + msg) 58 | state_dict[k] = model_state_dict[k] 59 | model.load_state_dict(state_dict, strict=False) 60 | 61 | # resume optimizer parameters 62 | if optimizer is not None and resume: 63 | if 'optimizer' in checkpoint: 64 | optimizer.load_state_dict(checkpoint['optimizer']) 65 | start_epoch = checkpoint['epoch'] 66 | start_lr = lr 67 | for step in lr_step: 68 | if start_epoch >= step: 69 | start_lr *= 0.1 70 | for param_group in optimizer.param_groups: 71 | param_group['lr'] = start_lr 72 | print('Resumed optimizer with start lr', start_lr) 73 | else: 74 | print('No optimizer parameters in checkpoint.') 75 | if optimizer is not None: 76 | return model, optimizer, start_epoch 77 | else: 78 | return model 79 | 80 | 81 | def save_model(path, epoch, model, optimizer=None): 82 | if isinstance(model, torch.nn.DataParallel): 83 | state_dict = model.module.state_dict() 84 | else: 85 | state_dict = model.state_dict() 86 | data = {'epoch': epoch, 87 | 'state_dict': state_dict} 88 | if not (optimizer is None): 89 | data['optimizer'] = optimizer.state_dict() 90 | torch.save(data, path) 91 | 92 | if __name__ == '__main__': 93 | torch.backends.cudnn.enabled = True 94 | heads = {'hm': 1, 'wh': 2, 'reg': 2} 95 | model_nameAll = ['DSFNet_with_Static', 'DSFNet_with_Dynamic', 'DSFNet_with_dynamic_3D_full', 'DSFNet'] 96 | device = 'cuda:0' 97 | 98 | out = {} 99 | 100 | for model_name in model_nameAll: 101 | net = get_det_net(heads, model_name).to(device) 102 | input = torch.rand(1,3,5,512,512).to(device) 103 | flops, params = profile(net, inputs=(input,)) 104 | out[model_name] = [flops, params] 105 | 106 | for k,v in out.items(): 107 | print('---------------------------------------------') 108 | print(k + ' Number of flops: %.2fG' % (v[0] / 1e9)) 109 | print(k + ' Number of params: %.2fM' % (v[1] / 1e6)) -------------------------------------------------------------------------------- /lib/utils/data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules import Module 3 | from torch.nn.parallel.scatter_gather import gather 4 | from torch.nn.parallel.replicate import replicate 5 | from torch.nn.parallel.parallel_apply import parallel_apply 6 | 7 | 8 | from .scatter_gather import scatter_kwargs 9 | 10 | class _DataParallel(Module): 11 | r"""Implements data parallelism at the module level. 12 | 13 | This container parallelizes the application of the given module by 14 | splitting the input across the specified devices by chunking in the batch 15 | dimension. In the forward pass, the module is replicated on each device, 16 | and each replica handles a portion of the input. During the backwards 17 | pass, gradients from each replica are summed into the original module. 18 | 19 | The batch size should be larger than the number of GPUs used. It should 20 | also be an integer multiple of the number of GPUs so that each chunk is the 21 | same size (so that each GPU processes the same number of samples). 22 | 23 | See also: :ref:`cuda-nn-dataparallel-instead` 24 | 25 | Arbitrary positional and keyword inputs are allowed to be passed into 26 | DataParallel EXCEPT Tensors. All variables will be scattered on dim 27 | specified (default 0). Primitive types will be broadcasted, but all 28 | other types will be a shallow copy and can be corrupted if written to in 29 | the model's forward pass. 30 | 31 | Args: 32 | module: module to be parallelized 33 | device_ids: CUDA devices (default: all devices) 34 | output_device: device location of output (default: device_ids[0]) 35 | 36 | Example:: 37 | 38 | >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) 39 | >>> output = net(input_var) 40 | """ 41 | 42 | # TODO: update notes/cuda.rst when this class handles 8+ GPUs well 43 | 44 | def __init__(self, module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 45 | super(_DataParallel, self).__init__() 46 | 47 | if not torch.cuda.is_available(): 48 | self.module = module 49 | self.device_ids = [] 50 | return 51 | 52 | if device_ids is None: 53 | device_ids = list(range(torch.cuda.device_count())) 54 | if output_device is None: 55 | output_device = device_ids[0] 56 | self.dim = dim 57 | self.module = module 58 | self.device_ids = device_ids 59 | self.chunk_sizes = chunk_sizes 60 | self.output_device = output_device 61 | if len(self.device_ids) == 1: 62 | self.module.cuda(device_ids[0]) 63 | 64 | def forward(self, *inputs, **kwargs): 65 | if not self.device_ids: 66 | return self.module(*inputs, **kwargs) 67 | inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids, self.chunk_sizes) 68 | if len(self.device_ids) == 1: 69 | return self.module(*inputs[0], **kwargs[0]) 70 | replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) 71 | outputs = self.parallel_apply(replicas, inputs, kwargs) 72 | return self.gather(outputs, self.output_device) 73 | 74 | def replicate(self, module, device_ids): 75 | return replicate(module, device_ids) 76 | 77 | def scatter(self, inputs, kwargs, device_ids, chunk_sizes): 78 | return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim, chunk_sizes=self.chunk_sizes) 79 | 80 | def parallel_apply(self, replicas, inputs, kwargs): 81 | return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) 82 | 83 | def gather(self, outputs, output_device): 84 | return gather(outputs, output_device, dim=self.dim) 85 | 86 | 87 | def data_parallel(module, inputs, device_ids=None, output_device=None, dim=0, module_kwargs=None): 88 | r"""Evaluates module(input) in parallel across the GPUs given in device_ids. 89 | 90 | This is the functional version of the DataParallel module. 91 | 92 | Args: 93 | module: the module to evaluate in parallel 94 | inputs: inputs to the module 95 | device_ids: GPU ids on which to replicate module 96 | output_device: GPU location of the output Use -1 to indicate the CPU. 97 | (default: device_ids[0]) 98 | Returns: 99 | a Variable containing the result of module(input) located on 100 | output_device 101 | """ 102 | if not isinstance(inputs, tuple): 103 | inputs = (inputs,) 104 | 105 | if device_ids is None: 106 | device_ids = list(range(torch.cuda.device_count())) 107 | 108 | if output_device is None: 109 | output_device = device_ids[0] 110 | 111 | inputs, module_kwargs = scatter_kwargs(inputs, module_kwargs, device_ids, dim) 112 | if len(device_ids) == 1: 113 | return module(*inputs[0], **module_kwargs[0]) 114 | used_device_ids = device_ids[:len(inputs)] 115 | replicas = replicate(module, used_device_ids) 116 | outputs = parallel_apply(replicas, inputs, module_kwargs, used_device_ids) 117 | return gather(outputs, output_device, dim) 118 | 119 | def DataParallel(module, device_ids=None, output_device=None, dim=0, chunk_sizes=None): 120 | if chunk_sizes is None: 121 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 122 | standard_size = True 123 | for i in range(1, len(chunk_sizes)): 124 | if chunk_sizes[i] != chunk_sizes[0]: 125 | standard_size = False 126 | if standard_size: 127 | return torch.nn.DataParallel(module, device_ids, output_device, dim) 128 | return _DataParallel(module, device_ids, output_device, dim, chunk_sizes) -------------------------------------------------------------------------------- /lib/utils/image.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # Modified by Xingyi Zhou 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import cv2 14 | import random 15 | 16 | def flip(img): 17 | return img[:, :, ::-1].copy() 18 | 19 | def transform_preds(coords, center, scale, output_size): 20 | target_coords = np.zeros(coords.shape) 21 | trans = get_affine_transform(center, scale, 0, output_size, inv=1) 22 | for p in range(coords.shape[0]): 23 | target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) 24 | return target_coords 25 | 26 | 27 | def get_affine_transform(center, 28 | scale, 29 | rot, 30 | output_size, 31 | shift=np.array([0, 0], dtype=np.float32), 32 | inv=0): 33 | if not isinstance(scale, np.ndarray) and not isinstance(scale, list): 34 | scale = np.array([scale, scale], dtype=np.float32) 35 | 36 | scale_tmp = scale 37 | src_w = scale_tmp[0] 38 | dst_w = output_size[0] 39 | dst_h = output_size[1] 40 | 41 | rot_rad = np.pi * rot / 180 42 | src_dir = get_dir([0, src_w * -0.5], rot_rad) 43 | dst_dir = np.array([0, dst_w * -0.5], np.float32) 44 | 45 | src = np.zeros((3, 2), dtype=np.float32) 46 | dst = np.zeros((3, 2), dtype=np.float32) 47 | src[0, :] = center + scale_tmp * shift 48 | src[1, :] = center + src_dir + scale_tmp * shift 49 | dst[0, :] = [dst_w * 0.5, dst_h * 0.5] 50 | dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5], np.float32) + dst_dir 51 | 52 | src[2:, :] = get_3rd_point(src[0, :], src[1, :]) 53 | dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :]) 54 | 55 | if inv: 56 | trans = cv2.getAffineTransform(np.float32(dst), np.float32(src)) 57 | else: 58 | trans = cv2.getAffineTransform(np.float32(src), np.float32(dst)) 59 | 60 | return trans 61 | 62 | 63 | def affine_transform(pt, t): 64 | new_pt = np.array([pt[0], pt[1], 1.], dtype=np.float32).T 65 | new_pt = np.dot(t, new_pt) 66 | return new_pt[:2] 67 | 68 | 69 | def get_3rd_point(a, b): 70 | direct = a - b 71 | return b + np.array([-direct[1], direct[0]], dtype=np.float32) 72 | 73 | 74 | def get_dir(src_point, rot_rad): 75 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 76 | 77 | src_result = [0, 0] 78 | src_result[0] = src_point[0] * cs - src_point[1] * sn 79 | src_result[1] = src_point[0] * sn + src_point[1] * cs 80 | 81 | return src_result 82 | 83 | 84 | def crop(img, center, scale, output_size, rot=0): 85 | trans = get_affine_transform(center, scale, rot, output_size) 86 | 87 | dst_img = cv2.warpAffine(img, 88 | trans, 89 | (int(output_size[0]), int(output_size[1])), 90 | flags=cv2.INTER_LINEAR) 91 | 92 | return dst_img 93 | 94 | 95 | def gaussian_radius(det_size, min_overlap=0.7): 96 | 97 | height, width = det_size 98 | 99 | a1 = 1 100 | b1 = (height + width) 101 | c1 = width * height * (1 - min_overlap) / (1 + min_overlap) 102 | sq1 = np.sqrt(b1 ** 2 - 4 * a1 * c1) 103 | r1 = (b1 + sq1) / 2 104 | 105 | a2 = 4 106 | b2 = 2 * (height + width) 107 | c2 = (1 - min_overlap) * width * height 108 | sq2 = np.sqrt(b2 ** 2 - 4 * a2 * c2) 109 | r2 = (b2 + sq2) / 2 110 | 111 | a3 = 4 * min_overlap 112 | b3 = -2 * min_overlap * (height + width) 113 | c3 = (min_overlap - 1) * width * height 114 | sq3 = np.sqrt(b3 ** 2 - 4 * a3 * c3) 115 | r3 = (b3 + sq3) / 2 116 | return min(r1, r2, r3) 117 | 118 | 119 | def gaussian2D(shape, sigma=1): 120 | m, n = [(ss - 1.) / 2. for ss in shape] 121 | y, x = np.ogrid[-m:m+1,-n:n+1] 122 | 123 | h = np.exp(-(x * x + y * y) / (2 * sigma * sigma)) 124 | h[h < np.finfo(h.dtype).eps * h.max()] = 0 125 | return h 126 | 127 | def draw_umich_gaussian(heatmap, center, radius, k=1): 128 | diameter = 2 * radius + 1 129 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 130 | 131 | x, y = int(center[0]), int(center[1]) 132 | 133 | height, width = heatmap.shape[0:2] 134 | 135 | left, right = min(x, radius), min(width - x, radius + 1) 136 | top, bottom = min(y, radius), min(height - y, radius + 1) 137 | 138 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 139 | masked_gaussian = gaussian[radius - top:radius + bottom, radius - left:radius + right] 140 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 141 | np.maximum(masked_heatmap, masked_gaussian * k, out=masked_heatmap) 142 | 143 | return heatmap 144 | 145 | def draw_dense_reg(regmap, heatmap, center, value, radius, is_offset=False): 146 | diameter = 2 * radius + 1 147 | gaussian = gaussian2D((diameter, diameter), sigma=diameter / 6) 148 | value = np.array(value, dtype=np.float32).reshape(-1, 1, 1) 149 | dim = value.shape[0] 150 | reg = np.ones((dim, diameter*2+1, diameter*2+1), dtype=np.float32) * value 151 | if is_offset and dim == 2: 152 | delta = np.arange(diameter*2+1) - radius 153 | reg[0] = reg[0] - delta.reshape(1, -1) 154 | reg[1] = reg[1] - delta.reshape(-1, 1) 155 | 156 | x, y = int(center[0]), int(center[1]) 157 | 158 | height, width = heatmap.shape[0:2] 159 | 160 | left, right = min(x, radius), min(width - x, radius + 1) 161 | top, bottom = min(y, radius), min(height - y, radius + 1) 162 | 163 | masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right] 164 | masked_regmap = regmap[:, y - top:y + bottom, x - left:x + right] 165 | masked_gaussian = gaussian[radius - top:radius + bottom, 166 | radius - left:radius + right] 167 | masked_reg = reg[:, radius - top:radius + bottom, 168 | radius - left:radius + right] 169 | if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0: # TODO debug 170 | idx = (masked_gaussian >= masked_heatmap).reshape( 171 | 1, masked_gaussian.shape[0], masked_gaussian.shape[1]) 172 | masked_regmap = (1-idx) * masked_regmap + idx * masked_reg 173 | regmap[:, y - top:y + bottom, x - left:x + right] = masked_regmap 174 | return regmap 175 | 176 | 177 | def draw_msra_gaussian(heatmap, center, sigma): 178 | tmp_size = sigma * 3 179 | mu_x = int(center[0] + 0.5) 180 | mu_y = int(center[1] + 0.5) 181 | w, h = heatmap.shape[0], heatmap.shape[1] 182 | ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] 183 | br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] 184 | if ul[0] >= h or ul[1] >= w or br[0] < 0 or br[1] < 0: 185 | return heatmap 186 | size = 2 * tmp_size + 1 187 | x = np.arange(0, size, 1, np.float32) 188 | y = x[:, np.newaxis] 189 | x0 = y0 = size // 2 190 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 191 | g_x = max(0, -ul[0]), min(br[0], h) - ul[0] 192 | g_y = max(0, -ul[1]), min(br[1], w) - ul[1] 193 | img_x = max(0, ul[0]), min(br[0], h) 194 | img_y = max(0, ul[1]), min(br[1], w) 195 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum( 196 | heatmap[img_y[0]:img_y[1], img_x[0]:img_x[1]], 197 | g[g_y[0]:g_y[1], g_x[0]:g_x[1]]) 198 | return heatmap 199 | 200 | def grayscale(image): 201 | return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 202 | 203 | def lighting_(data_rng, image, alphastd, eigval, eigvec): 204 | alpha = data_rng.normal(scale=alphastd, size=(3, )) 205 | image += np.dot(eigvec, eigval * alpha) 206 | 207 | def blend_(alpha, image1, image2): 208 | image1 *= alpha 209 | image2 *= (1 - alpha) 210 | image1 += image2 211 | 212 | def saturation_(data_rng, image, gs, gs_mean, var): 213 | alpha = 1. + data_rng.uniform(low=-var, high=var) 214 | blend_(alpha, image, gs[:, :, None]) 215 | 216 | def brightness_(data_rng, image, gs, gs_mean, var): 217 | alpha = 1. + data_rng.uniform(low=-var, high=var) 218 | image *= alpha 219 | 220 | def contrast_(data_rng, image, gs, gs_mean, var): 221 | alpha = 1. + data_rng.uniform(low=-var, high=var) 222 | blend_(alpha, image, gs_mean) 223 | 224 | def color_aug(data_rng, image, eig_val, eig_vec): 225 | functions = [brightness_, contrast_, saturation_] 226 | random.shuffle(functions) 227 | 228 | gs = grayscale(image) 229 | gs_mean = gs.mean() 230 | for f in functions: 231 | f(data_rng, image, gs, gs_mean, 0.4) 232 | lighting_(data_rng, image, 0.1, eig_val, eig_vec) 233 | -------------------------------------------------------------------------------- /lib/utils/logger.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 6 | import os 7 | import time 8 | import sys 9 | import torch 10 | 11 | class Logger(object): 12 | def __init__(self, opt): 13 | """Create a summary writer logging to log_dir.""" 14 | if not os.path.exists(opt.save_log_dir): 15 | os.makedirs(opt.save_log_dir) 16 | 17 | time_str = time.strftime('%Y-%m-%d-%H-%M') 18 | 19 | args = dict((name, getattr(opt, name)) for name in dir(opt) 20 | if not name.startswith('_')) 21 | file_name = os.path.join(opt.save_log_dir, 'opt.txt') 22 | with open(file_name, 'wt') as opt_file: 23 | opt_file.write('==> torch version: {}\n'.format(torch.__version__)) 24 | opt_file.write('==> cudnn version: {}\n'.format( 25 | torch.backends.cudnn.version())) 26 | opt_file.write('==> Cmd:\n') 27 | opt_file.write(str(sys.argv)) 28 | opt_file.write('\n==> Opt:\n') 29 | for k, v in sorted(args.items()): 30 | opt_file.write(' %s: %s\n' % (str(k), str(v))) 31 | 32 | # log_dir = opt.save_log_dir + '/logs_{}'.format(time_str) 33 | log_dir = opt.save_log_dir 34 | if not os.path.exists(os.path.dirname(log_dir)): 35 | os.mkdir(os.path.dirname(log_dir)) 36 | if not os.path.exists(log_dir): 37 | os.mkdir(log_dir) 38 | self.log = open(log_dir + '/log.txt', 'w') 39 | # try: 40 | # os.system('cp {}/opt.txt {}/'.format(opt.save_log_dir, log_dir)) 41 | # except: 42 | # pass 43 | self.start_line = True 44 | 45 | def write(self, txt): 46 | if self.start_line: 47 | time_str = time.strftime('%Y-%m-%d-%H-%M') 48 | self.log.write('{}: {}'.format(time_str, txt)) 49 | else: 50 | self.log.write(txt) 51 | self.start_line = False 52 | if '\n' in txt: 53 | self.start_line = True 54 | self.log.flush() 55 | 56 | def close(self): 57 | self.log.close() 58 | -------------------------------------------------------------------------------- /lib/utils/opts.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import os 7 | import sys 8 | from datetime import datetime 9 | 10 | class opts(object): 11 | def __init__(self): 12 | self.parser = argparse.ArgumentParser() 13 | # basic experiment setting 14 | self.parser.add_argument('--model_name', default='DSFNet_with_Static', 15 | help='name of the model. DSFNet_with_Static | DSFNet_with_Dynamic | DSFNet') 16 | self.parser.add_argument('--load_model', default='', 17 | help='path to pretrained model') 18 | self.parser.add_argument('--resume', type=bool, default=False, 19 | help='resume an experiment.') 20 | self.parser.add_argument('--down_ratio', type=int, default=1, 21 | help='output stride. Currently only supports for 1.') 22 | # system 23 | self.parser.add_argument('--gpus', default='0,1', 24 | help='-1 for CPU, use comma for multiple gpus') 25 | self.parser.add_argument('--num_workers', type=int, default=4, 26 | help='dataloader threads. 0 for single-thread.') 27 | self.parser.add_argument('--seed', type=int, default=317, 28 | help='random seed') # from CornerNet 29 | 30 | # train 31 | self.parser.add_argument('--lr', type=float, default=1.25e-4, 32 | help='learning rate for batch size 4.') 33 | self.parser.add_argument('--lr_step', type=str, default='30,45', #30,45 34 | help='drop learning rate by 10.') 35 | self.parser.add_argument('--num_epochs', type=int, default=55, #55 36 | help='total training epochs.') 37 | self.parser.add_argument('--batch_size', type=int, default=4, 38 | help='batch size') 39 | self.parser.add_argument('--val_intervals', type=int, default=5, 40 | help='number of epochs to run validation.') 41 | self.parser.add_argument('--seqLen', type=int, default=5, 42 | help='number of images for per sample. Currently supports 5.') 43 | 44 | # test 45 | self.parser.add_argument('--nms', action='store_true', 46 | help='run nms in testing.') 47 | self.parser.add_argument('--K', type=int, default=256, 48 | help='max number of output objects.') 49 | self.parser.add_argument('--test_large_size', type=bool, default=True, 50 | help='whether or not to test image size of 1024. Only for test.') 51 | self.parser.add_argument('--show_results', type=bool, default=False, 52 | help='whether or not to show the detection results. Only for test.') 53 | self.parser.add_argument('--save_track_results', type=bool, default=False, 54 | help='whether or not to save the tracking results of sort. Only for testTrackingSort.') 55 | 56 | # save 57 | self.parser.add_argument('--save_dir', type=str, default='./weights', 58 | help='savepath of model.') 59 | 60 | # dataset 61 | self.parser.add_argument('--datasetname', type=str, default='rsdata', 62 | help='dataset name.') 63 | self.parser.add_argument('--data_dir', type=str, default= './data/RsCarData/', 64 | help='path of dataset.') 65 | 66 | 67 | def parse(self, args=''): 68 | if args == '': 69 | opt = self.parser.parse_args() 70 | else: 71 | opt = self.parser.parse_args(args) 72 | 73 | opt.gpus_str = opt.gpus 74 | opt.gpus = [int(gpu) for gpu in opt.gpus.split(',')] 75 | opt.gpus = [i for i in range(len(opt.gpus))] if opt.gpus[0] >= 0 else [-1] 76 | opt.lr_step = [int(i) for i in opt.lr_step.split(',')] 77 | opt.dataName = opt.data_dir.split('/')[-2] 78 | 79 | now = datetime.now() 80 | time_str = now.strftime("%Y_%m_%d_%H_%M_%S") 81 | 82 | opt.save_dir = opt.save_dir + '/' + opt.datasetname 83 | 84 | if (not os.path.exists(opt.save_dir)): 85 | os.mkdir(opt.save_dir) 86 | 87 | opt.save_dir = opt.save_dir + '/' + opt.model_name 88 | 89 | if (not os.path.exists(opt.save_dir)): 90 | os.mkdir(opt.save_dir) 91 | 92 | opt.save_results_dir = opt.save_dir+'/results' 93 | 94 | opt.save_dir = opt.save_dir + '/weights' + time_str 95 | opt.save_log_dir = opt.save_dir 96 | 97 | return opt 98 | -------------------------------------------------------------------------------- /lib/utils/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from lib.utils.image import transform_preds 7 | 8 | 9 | def get_pred_depth(depth): 10 | return depth 11 | 12 | def get_alpha(rot): 13 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 14 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 15 | # return rot[:, 0] 16 | idx = rot[:, 1] > rot[:, 5] 17 | alpha1 = np.arctan(rot[:, 2] / rot[:, 3]) + (-0.5 * np.pi) 18 | alpha2 = np.arctan(rot[:, 6] / rot[:, 7]) + ( 0.5 * np.pi) 19 | return alpha1 * idx + alpha2 * (1 - idx) 20 | 21 | 22 | def ddd_post_process_2d(dets, c, s, opt): 23 | # dets: batch x max_dets x dim 24 | # return 1-based class det list 25 | ret = [] 26 | include_wh = dets.shape[2] > 16 27 | for i in range(dets.shape[0]): 28 | top_preds = {} 29 | dets[i, :, :2] = transform_preds( 30 | dets[i, :, 0:2], c[i], s[i], (opt.output_w, opt.output_h)) 31 | classes = dets[i, :, -1] 32 | for j in range(opt.num_classes): 33 | inds = (classes == j) 34 | top_preds[j + 1] = np.concatenate([ 35 | dets[i, inds, :3].astype(np.float32), 36 | get_alpha(dets[i, inds, 3:11])[:, np.newaxis].astype(np.float32), 37 | get_pred_depth(dets[i, inds, 11:12]).astype(np.float32), 38 | dets[i, inds, 12:15].astype(np.float32)], axis=1) 39 | if include_wh: 40 | top_preds[j + 1] = np.concatenate([ 41 | top_preds[j + 1], 42 | transform_preds( 43 | dets[i, inds, 15:17], c[i], s[i], (opt.output_w, opt.output_h)) 44 | .astype(np.float32)], axis=1) 45 | ret.append(top_preds) 46 | return ret 47 | 48 | def ddd_post_process_3d(dets, calibs): 49 | # dets: batch x max_dets x dim 50 | # return 1-based class det list 51 | ret = [] 52 | for i in range(len(dets)): 53 | preds = {} 54 | for cls_ind in dets[i].keys(): 55 | preds[cls_ind] = [] 56 | for j in range(len(dets[i][cls_ind])): 57 | center = dets[i][cls_ind][j][:2] 58 | score = dets[i][cls_ind][j][2] 59 | alpha = dets[i][cls_ind][j][3] 60 | depth = dets[i][cls_ind][j][4] 61 | dimensions = dets[i][cls_ind][j][5:8] 62 | wh = dets[i][cls_ind][j][8:10] 63 | locations, rotation_y = ddd2locrot( 64 | center, alpha, dimensions, depth, calibs[0]) 65 | bbox = [center[0] - wh[0] / 2, center[1] - wh[1] / 2, 66 | center[0] + wh[0] / 2, center[1] + wh[1] / 2] 67 | pred = [alpha] + bbox + dimensions.tolist() + \ 68 | locations.tolist() + [rotation_y, score] 69 | preds[cls_ind].append(pred) 70 | preds[cls_ind] = np.array(preds[cls_ind], dtype=np.float32) 71 | ret.append(preds) 72 | return ret 73 | 74 | def ddd_post_process(dets, c, s, calibs, opt): 75 | # dets: batch x max_dets x dim 76 | # return 1-based class det list 77 | dets = ddd_post_process_2d(dets, c, s, opt) 78 | dets = ddd_post_process_3d(dets, calibs) 79 | return dets 80 | 81 | 82 | def ctdet_post_process(dets, c, s, h, w, num_classes): 83 | # dets: batch x max_dets x dim 84 | # return 1-based class det dict 85 | ret = [] 86 | for i in range(dets.shape[0]): 87 | top_preds = {} 88 | dets[i, :, :2] = transform_preds( 89 | dets[i, :, 0:2], c[i], s[i], (w, h)) 90 | dets[i, :, 2:4] = transform_preds( 91 | dets[i, :, 2:4], c[i], s[i], (w, h)) 92 | classes = dets[i, :, -1] 93 | for j in range(num_classes): 94 | inds = (classes == j) 95 | top_preds[j + 1] = np.concatenate([ 96 | dets[i, inds, :4].astype(np.float32), 97 | dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist() 98 | ret.append(top_preds) 99 | return ret 100 | 101 | 102 | def multi_pose_post_process(dets, c, s, h, w): 103 | # dets: batch x max_dets x 40 104 | # return list of 39 in image coord 105 | num_classes=24 106 | ret = [] 107 | for i in range(dets.shape[0]): 108 | top_preds = {} 109 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h)) 110 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h)) 111 | bbox = bbox.reshape(-1, 4) 112 | pts = pts.reshape(-1, 34) 113 | score=dets[i, :, 4] 114 | score = np.expand_dims(score,axis=1) 115 | classes = dets[i, :, -1] 116 | for j in range(num_classes): 117 | inds = (classes == j) 118 | top_preds[j + 1] = np.concatenate([ 119 | bbox[ inds, :].astype(np.float32),score[inds], 120 | pts[inds, :].astype(np.float32)], axis=1).tolist() 121 | ret.append(top_preds) 122 | return ret 123 | 124 | 125 | 126 | def multi_pose_post_process_ori(dets, c, s, h, w): 127 | # dets: batch x max_dets x 40 128 | # return list of 39 in image coord 129 | ret = [] 130 | for i in range(dets.shape[0]): 131 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h)) 132 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h)) 133 | top_preds = np.concatenate( 134 | [bbox.reshape(-1, 4), dets[i, :, 4:5], 135 | pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist() 136 | ret.append({np.ones(1, dtype=np.int32)[0]: top_preds}) 137 | return ret -------------------------------------------------------------------------------- /lib/utils/scatter_gather.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Variable 3 | from torch.nn.parallel._functions import Scatter, Gather 4 | 5 | 6 | def scatter(inputs, target_gpus, dim=0, chunk_sizes=None): 7 | r""" 8 | Slices variables into approximately equal chunks and 9 | distributes them across given GPUs. Duplicates 10 | references to objects that are not variables. Does not 11 | support Tensors. 12 | """ 13 | def scatter_map(obj): 14 | if isinstance(obj, Variable): 15 | return Scatter.apply(target_gpus, chunk_sizes, dim, obj) 16 | assert not torch.is_tensor(obj), "Tensors not supported in scatter." 17 | if isinstance(obj, tuple): 18 | return list(zip(*map(scatter_map, obj))) 19 | if isinstance(obj, list): 20 | return list(map(list, zip(*map(scatter_map, obj)))) 21 | if isinstance(obj, dict): 22 | return list(map(type(obj), zip(*map(scatter_map, obj.items())))) 23 | return [obj for targets in target_gpus] 24 | 25 | return scatter_map(inputs) 26 | 27 | 28 | def scatter_kwargs(inputs, kwargs, target_gpus, dim=0, chunk_sizes=None): 29 | r"""Scatter with support for kwargs dictionary""" 30 | inputs = scatter(inputs, target_gpus, dim, chunk_sizes) if inputs else [] 31 | kwargs = scatter(kwargs, target_gpus, dim, chunk_sizes) if kwargs else [] 32 | if len(inputs) < len(kwargs): 33 | inputs.extend([() for _ in range(len(kwargs) - len(inputs))]) 34 | elif len(kwargs) < len(inputs): 35 | kwargs.extend([{} for _ in range(len(inputs) - len(kwargs))]) 36 | inputs = tuple(inputs) 37 | kwargs = tuple(kwargs) 38 | return inputs, kwargs 39 | -------------------------------------------------------------------------------- /lib/utils/sort.py: -------------------------------------------------------------------------------- 1 | """ 2 | SORT: A Simple, Online and Realtime Tracker 3 | Copyright (C) 2016-2020 Alex Bewley alex@bewley.ai 4 | 5 | This program is free software: you can redistribute it and/or modify 6 | it under the terms of the GNU General Public License as published by 7 | the Free Software Foundation, either version 3 of the License, or 8 | (at your option) any later version. 9 | 10 | This program is distributed in the hope that it will be useful, 11 | but WITHOUT ANY WARRANTY; without even the implied warranty of 12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 | GNU General Public License for more details. 14 | 15 | You should have received a copy of the GNU General Public License 16 | along with this program. If not, see . 17 | """ 18 | from __future__ import print_function 19 | 20 | import os 21 | import numpy as np 22 | import matplotlib 23 | matplotlib.use('TkAgg') 24 | import matplotlib.pyplot as plt 25 | import matplotlib.patches as patches 26 | from skimage import io 27 | 28 | import glob 29 | import time 30 | import argparse 31 | from filterpy.kalman import KalmanFilter 32 | 33 | np.random.seed(0) 34 | 35 | 36 | def linear_assignment(cost_matrix): 37 | try: 38 | import lap 39 | _, x, y = lap.lapjv(cost_matrix, extend_cost=True) 40 | return np.array([[y[i],i] for i in x if i >= 0]) # 41 | except ImportError: 42 | from scipy.optimize import linear_sum_assignment 43 | x, y = linear_sum_assignment(cost_matrix) 44 | return np.array(list(zip(x, y))) 45 | 46 | 47 | def iou_batch(bb_test, bb_gt): 48 | """ 49 | From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] 50 | """ 51 | bb_gt = np.expand_dims(bb_gt, 0) 52 | bb_test = np.expand_dims(bb_test, 1) 53 | 54 | xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0]) 55 | yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1]) 56 | xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2]) 57 | yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3]) 58 | w = np.maximum(0., xx2 - xx1) 59 | h = np.maximum(0., yy2 - yy1) 60 | wh = w * h 61 | o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) 62 | + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh) 63 | return(o) 64 | 65 | 66 | def convert_bbox_to_z(bbox): 67 | """ 68 | Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form 69 | [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is 70 | the aspect ratio 71 | """ 72 | w = bbox[2] - bbox[0] 73 | h = bbox[3] - bbox[1] 74 | x = bbox[0] + w/2. 75 | y = bbox[1] + h/2. 76 | s = w * h #scale is just area 77 | r = w / float(h) 78 | return np.array([x, y, s, r]).reshape((4, 1)) 79 | 80 | 81 | def convert_x_to_bbox(x,score=None): 82 | """ 83 | Takes a bounding box in the centre form [x,y,s,r] and returns it in the form 84 | [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right 85 | """ 86 | w = np.sqrt(x[2] * x[3]) 87 | h = x[2] / w 88 | if(score==None): 89 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.]).reshape((1,4)) 90 | else: 91 | return np.array([x[0]-w/2.,x[1]-h/2.,x[0]+w/2.,x[1]+h/2.,score]).reshape((1,5)) 92 | 93 | 94 | class KalmanBoxTracker(object): 95 | """ 96 | This class represents the internal state of individual tracked objects observed as bbox. 97 | """ 98 | count = 0 99 | def __init__(self,bbox): 100 | """ 101 | Initialises a tracker using initial bounding box. 102 | """ 103 | #define constant velocity model 104 | self.kf = KalmanFilter(dim_x=7, dim_z=4) 105 | self.kf.F = np.array([[1,0,0,0,1,0,0],[0,1,0,0,0,1,0],[0,0,1,0,0,0,1],[0,0,0,1,0,0,0], [0,0,0,0,1,0,0],[0,0,0,0,0,1,0],[0,0,0,0,0,0,1]]) 106 | self.kf.H = np.array([[1,0,0,0,0,0,0],[0,1,0,0,0,0,0],[0,0,1,0,0,0,0],[0,0,0,1,0,0,0]]) 107 | 108 | self.kf.R[2:,2:] *= 10. 109 | self.kf.P[4:,4:] *= 1000. #give high uncertainty to the unobservable initial velocities 110 | self.kf.P *= 10. 111 | self.kf.Q[-1,-1] *= 0.01 112 | self.kf.Q[4:,4:] *= 0.01 113 | 114 | self.kf.x[:4] = convert_bbox_to_z(bbox) 115 | self.time_since_update = 0 116 | self.id = KalmanBoxTracker.count 117 | KalmanBoxTracker.count += 1 118 | self.history = [] 119 | self.hits = 0 120 | self.hit_streak = 0 121 | self.age = 0 122 | 123 | def update(self,bbox): 124 | """ 125 | Updates the state vector with observed bbox. 126 | """ 127 | self.time_since_update = 0 128 | self.history = [] 129 | self.hits += 1 130 | self.hit_streak += 1 131 | self.kf.update(convert_bbox_to_z(bbox)) 132 | 133 | def predict(self): 134 | """ 135 | Advances the state vector and returns the predicted bounding box estimate. 136 | """ 137 | if((self.kf.x[6]+self.kf.x[2])<=0): 138 | self.kf.x[6] *= 0.0 139 | self.kf.predict() 140 | self.age += 1 141 | if(self.time_since_update>0): 142 | self.hit_streak = 0 143 | self.time_since_update += 1 144 | self.history.append(convert_x_to_bbox(self.kf.x)) 145 | return self.history[-1] 146 | 147 | def get_state(self): 148 | """ 149 | Returns the current bounding box estimate. 150 | """ 151 | return convert_x_to_bbox(self.kf.x) 152 | 153 | 154 | def associate_detections_to_trackers(detections,trackers,iou_threshold = 0.3): 155 | """ 156 | Assigns detections to tracked object (both represented as bounding boxes) 157 | 158 | Returns 3 lists of matches, unmatched_detections and unmatched_trackers 159 | """ 160 | if(len(trackers)==0): 161 | return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int) 162 | 163 | iou_matrix = iou_batch(detections, trackers) 164 | 165 | if min(iou_matrix.shape) > 0: 166 | a = (iou_matrix > iou_threshold).astype(np.int32) 167 | if a.sum(1).max() == 1 and a.sum(0).max() == 1: 168 | matched_indices = np.stack(np.where(a), axis=1) 169 | else: 170 | matched_indices = linear_assignment(-iou_matrix) 171 | else: 172 | matched_indices = np.empty(shape=(0,2)) 173 | 174 | unmatched_detections = [] 175 | for d, det in enumerate(detections): 176 | if(d not in matched_indices[:,0]): 177 | unmatched_detections.append(d) 178 | unmatched_trackers = [] 179 | for t, trk in enumerate(trackers): 180 | if(t not in matched_indices[:,1]): 181 | unmatched_trackers.append(t) 182 | 183 | #filter out matched with low IOU 184 | matches = [] 185 | for m in matched_indices: 186 | if(iou_matrix[m[0], m[1]]= self.min_hits or self.frame_count <= self.min_hits): 247 | ret.append(np.concatenate((d,[trk.id+1])).reshape(1,-1)) # +1 as MOT benchmark requires positive 248 | i -= 1 249 | # remove dead tracklet 250 | if(trk.time_since_update > self.max_age): 251 | self.trackers.pop(i) 252 | if(len(ret)>0): 253 | return np.concatenate(ret) 254 | return np.empty((0,5)) 255 | 256 | def parse_args(): 257 | """Parse input arguments.""" 258 | parser = argparse.ArgumentParser(description='SORT demo') 259 | parser.add_argument('--display', dest='display', help='Display online tracker output (slow) [False]',action='store_true') 260 | parser.add_argument("--seq_path", help="Path to detections.", type=str, default='data') 261 | parser.add_argument("--phase", help="Subdirectory in seq_path.", type=str, default='train') 262 | parser.add_argument("--max_age", 263 | help="Maximum number of frames to keep alive a track without associated detections.", 264 | type=int, default=1) 265 | parser.add_argument("--min_hits", 266 | help="Minimum number of associated detections before track is initialised.", 267 | type=int, default=3) 268 | parser.add_argument("--iou_threshold", help="Minimum IOU for match.", type=float, default=0.3) 269 | args = parser.parse_args() 270 | return args 271 | 272 | if __name__ == '__main__': 273 | # all train 274 | args = parse_args() 275 | display = args.display 276 | phase = args.phase 277 | total_time = 0.0 278 | total_frames = 0 279 | colours = np.random.rand(32, 3) #used only for display 280 | if(display): 281 | if not os.path.exists('mot_benchmark'): 282 | print('\n\tERROR: mot_benchmark link not found!\n\n Create a symbolic link to the MOT benchmark\n (https://motchallenge.net/data/2D_MOT_2015/#download). E.g.:\n\n $ ln -s /path/to/MOT2015_challenge/2DMOT2015 mot_benchmark\n\n') 283 | exit() 284 | plt.ion() 285 | fig = plt.figure() 286 | ax1 = fig.add_subplot(111, aspect='equal') 287 | 288 | if not os.path.exists('output'): 289 | os.makedirs('output') 290 | pattern = os.path.join(args.seq_path, phase, '*', 'det', 'det.txt') 291 | for seq_dets_fn in glob.glob(pattern): 292 | mot_tracker = Sort(max_age=args.max_age, 293 | min_hits=args.min_hits, 294 | iou_threshold=args.iou_threshold) #create instance of the SORT tracker 295 | seq_dets = np.loadtxt(seq_dets_fn, delimiter=',') 296 | seq = seq_dets_fn[pattern.find('*'):].split(os.path.sep)[0] 297 | 298 | with open(os.path.join('output', '%s.txt'%(seq)),'w') as out_file: 299 | print("Processing %s."%(seq)) 300 | for frame in range(int(seq_dets[:,0].max())): 301 | frame += 1 #detection and frame numbers begin at 1 302 | dets = seq_dets[seq_dets[:, 0]==frame, 2:7] 303 | dets[:, 2:4] += dets[:, 0:2] #convert to [x1,y1,w,h] to [x1,y1,x2,y2] 304 | total_frames += 1 305 | 306 | if(display): 307 | fn = os.path.join('mot_benchmark', phase, seq, 'img1', '%06d.jpg'%(frame)) 308 | im =io.imread(fn) 309 | ax1.imshow(im) 310 | plt.title(seq + ' Tracked Targets') 311 | 312 | start_time = time.time() 313 | trackers = mot_tracker.update(dets) 314 | cycle_time = time.time() - start_time 315 | total_time += cycle_time 316 | 317 | for d in trackers: 318 | print('%d,%d,%.2f,%.2f,%.2f,%.2f,1,-1,-1,-1'%(frame,d[4],d[0],d[1],d[2]-d[0],d[3]-d[1]),file=out_file) 319 | if(display): 320 | d = d.astype(np.int32) 321 | ax1.add_patch(patches.Rectangle((d[0],d[1]),d[2]-d[0],d[3]-d[1],fill=False,lw=3,ec=colours[d[4]%32,:])) 322 | 323 | if(display): 324 | fig.canvas.flush_events() 325 | plt.draw() 326 | ax1.cla() 327 | 328 | print("Total Tracking took: %.3f seconds for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time)) 329 | 330 | if(display): 331 | print("Note: to get real runtime results run without the option: --display") 332 | -------------------------------------------------------------------------------- /lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | def _sigmoid(x): 9 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 10 | return y 11 | 12 | #计算whloss 13 | #ind中记录了目标在heatmap上的地址索引,通过_tranpose_and_gather_feat 14 | # 以及def _gather_feat(feat, ind, mask=None):函数得出我们预测的宽高。 15 | # _gather_feat根据ind取出feat中对应的元素 16 | def _gather_feat(feat, ind, mask=None): 17 | #起到的作用是消除各个channel区别的作用,最终得到的inds是对于所有channel而言的。 18 | #feat(topk_inds): batch * (cat x K) * 1 19 | #ind(topk_ind):batch * K 20 | dim = feat.size(2) 21 | #首先将ind扩展一个指标,变为 batch * K * 1 22 | ind = ind.unsqueeze(2).expand(ind.size(0), ind.size(1), dim) 23 | # 返回的是index: 24 | #feat: batch * K * 1 取值:[0, cat x K - 1] 25 | #更一般的情况如下: 26 | #feat : A * B * C 27 | #ind:A * D 28 | #首先将ind扩展一个指标,并且expand为dim的大小,变为 A * D * C,其中对于任意的i, j, 数组ind[i, j, :]中所有的元素均相同,等于原来A * D shape的ind[i, j]。 29 | #之后使用gather,将ind对应的值取出来。 30 | #得到的feat: A * D * C 31 | 32 | feat = feat.gather(1, ind) 33 | if mask is not None: 34 | mask = mask.unsqueeze(2).expand_as(feat) 35 | feat = feat[mask] 36 | feat = feat.view(-1, dim) 37 | return feat 38 | 39 | def _transpose_and_gather_feat(feat, ind): 40 | #首先将feat中各channel的元素放到最后一个index中,并且使用contiguous将内存变为连续的,用于后面的view。 41 | #之后将feat变为batch * (W x H) * C的形状 42 | #,使用_gather_feat根据ind取出feat中对应的元素 43 | #返回: 44 | #feat:batch * K * C 45 | #feat[i, j, k]为第i个batch,第k个channel的第j个最大值。 46 | 47 | feat = feat.permute(0, 2, 3, 1).contiguous() 48 | feat = feat.view(feat.size(0), -1, feat.size(3)) 49 | feat = _gather_feat(feat, ind) 50 | return feat 51 | 52 | def flip_tensor(x): 53 | return torch.flip(x, [3]) 54 | # tmp = x.detach().cpu().numpy()[..., ::-1].copy() 55 | # return torch.from_numpy(tmp).to(x.device) 56 | 57 | def flip_lr(x, flip_idx): 58 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 59 | shape = tmp.shape 60 | for e in flip_idx: 61 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 62 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 63 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 64 | 65 | def flip_lr_off(x, flip_idx): 66 | tmp = x.detach().cpu().numpy()[..., ::-1].copy() 67 | shape = tmp.shape 68 | tmp = tmp.reshape(tmp.shape[0], 17, 2, 69 | tmp.shape[2], tmp.shape[3]) 70 | tmp[:, :, 0, :, :] *= -1 71 | for e in flip_idx: 72 | tmp[:, e[0], ...], tmp[:, e[1], ...] = \ 73 | tmp[:, e[1], ...].copy(), tmp[:, e[0], ...].copy() 74 | return torch.from_numpy(tmp.reshape(shape)).to(x.device) 75 | 76 | 77 | class AverageMeter(object): 78 | """Computes and stores the average and current value""" 79 | def __init__(self): 80 | self.reset() 81 | 82 | def reset(self): 83 | self.val = 0 84 | self.avg = 0 85 | self.sum = 0 86 | self.count = 0 87 | 88 | def update(self, val, n=1): 89 | self.val = val 90 | self.sum += val * n 91 | self.count += n 92 | if self.count > 0: 93 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /lib/utils/utils_eval.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import xml.dom.minidom as doxml 3 | 4 | class eval_metric(): 5 | def __init__(self, dis_th = 0.5, iou_th=0.05, eval_mode = 'dis'): 6 | self.dis_th = dis_th 7 | self.iou_th = iou_th 8 | self.eval_mode = eval_mode 9 | self.area_min_th = 2 10 | self.area_max_th = 80 11 | self.tp = 0 12 | self.fp = 0 13 | self.tn = 0 14 | self.fn = 0 15 | 16 | 17 | def reset(self): 18 | self.tp = 0 19 | self.fp = 0 20 | self.tn = 0 21 | self.fn = 0 22 | 23 | def get_result(self): 24 | precision = self.tp/(self.tp + self.fp+1e-7) 25 | recall = self.tp/(self.tp + self.fn+1e-7) 26 | f1 = 2*recall*precision/(recall+precision+1e-7) 27 | 28 | out = {} 29 | out['recall'] = recall*100 30 | out['prec'] = precision*100 31 | out['f1'] = f1*100 32 | out['tp'] = self.tp 33 | out['fp'] = self.fp 34 | out['fn'] = self.fn 35 | 36 | return out 37 | 38 | def update(self, gt, det): 39 | if (gt.shape[0] > 0): 40 | if (det.shape[0] > 0): 41 | if self.eval_mode == 'iou': 42 | cost_matrix = self.iou_batch(det, gt) 43 | elif self.eval_mode == 'dis': 44 | cost_matrix = self.dist_batch(det, gt) 45 | if min(cost_matrix.shape) > 0: 46 | cost_matrix[cost_matrix > self.dis_th] = self.dis_th + 10 47 | else: 48 | raise Exception('Not a valid eval mode!!!!') 49 | 50 | if min(cost_matrix.shape) > 0: 51 | # matched_indices = self.linear_assignment(cost_matrix) 52 | # matched_matrix = cost_matrix[matched_indices[:, 0], matched_indices[:, 1]] 53 | if self.eval_mode == 'iou': 54 | matched_indices = self.linear_assignment(-cost_matrix) 55 | matched_matrix = cost_matrix[matched_indices[:, 0], matched_indices[:, 1]] 56 | matched_results = matched_matrix[matched_matrix > self.iou_th] 57 | elif self.eval_mode == 'dis': 58 | matched_indices = self.linear_assignment(cost_matrix) 59 | matched_matrix = cost_matrix[matched_indices[:, 0], matched_indices[:, 1]] 60 | matched_results = matched_matrix[matched_matrix < self.dis_th] 61 | else: 62 | raise Exception('Not a valid eval mode!!!!') 63 | else: 64 | matched_results = np.empty(shape=(0, 1)) 65 | 66 | tp = matched_results.shape[0] 67 | fn = gt.shape[0] - tp 68 | fp = det.shape[0] - tp 69 | else: 70 | tp = 0 71 | fn = gt.shape[0] 72 | fp = 0 73 | else: 74 | tp = 0 75 | fn = 0 76 | fp = det.shape[0] 77 | 78 | self.tp += tp 79 | self.fn += fn 80 | self.fp += fp 81 | 82 | def iou_batch(self, bb_test, bb_gt): 83 | """ 84 | From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] 85 | """ 86 | bb_gt = np.expand_dims(bb_gt, 0) 87 | bb_test = np.expand_dims(bb_test, 1) 88 | 89 | xx1 = np.maximum(bb_test[..., 0], bb_gt[..., 0]) 90 | yy1 = np.maximum(bb_test[..., 1], bb_gt[..., 1]) 91 | xx2 = np.minimum(bb_test[..., 2], bb_gt[..., 2]) 92 | yy2 = np.minimum(bb_test[..., 3], bb_gt[..., 3]) 93 | w = np.maximum(0., xx2 - xx1) 94 | h = np.maximum(0., yy2 - yy1) 95 | wh = w * h 96 | o = wh / ((bb_test[..., 2] - bb_test[..., 0]) * (bb_test[..., 3] - bb_test[..., 1]) 97 | + (bb_gt[..., 2] - bb_gt[..., 0]) * (bb_gt[..., 3] - bb_gt[..., 1]) - wh + 1e-7) 98 | return (o) 99 | 100 | def dist_batch(self, bb_test, bb_gt): 101 | """ 102 | From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2] 103 | """ 104 | bb_gt = np.expand_dims(bb_gt, 0) 105 | bb_test = np.expand_dims(bb_test, 1) 106 | 107 | gt_center = (bb_gt[:, :, :2] + bb_gt[:, :, 2:4]) / 2 108 | det_center = (bb_test[:, :, :2] + bb_test[:, :, 2:4]) / 2 109 | o = np.sqrt(np.sum((gt_center - det_center) ** 2, -1)) 110 | return (o) 111 | 112 | def linear_assignment(self, cost_matrix): 113 | try: 114 | import lap 115 | _, x, y = lap.lapjv(cost_matrix, extend_cost=True) 116 | return np.array([[y[i], i] for i in x if i >= 0]) # 117 | except ImportError: 118 | from scipy.optimize import linear_sum_assignment 119 | x, y = linear_sum_assignment(cost_matrix) 120 | return np.array(list(zip(x, y))) 121 | 122 | def getGtFromXml(self, xml_file): 123 | # tree = ET.parse(xml_file) 124 | tree = doxml.parse(xml_file) 125 | # root = tree.getroot() 126 | annotation = tree.documentElement 127 | 128 | objectlist = annotation.getElementsByTagName('object') 129 | 130 | gt = [] 131 | 132 | if (len(objectlist) > 0): 133 | for object in objectlist: 134 | bndbox = object.getElementsByTagName('bndbox') 135 | for box in bndbox: 136 | xmin0 = box.getElementsByTagName('xmin') 137 | xmin = int(xmin0[0].childNodes[0].data) 138 | ymin0 = box.getElementsByTagName('ymin') 139 | ymin = int(ymin0[0].childNodes[0].data) 140 | xmax0 = box.getElementsByTagName('xmax') 141 | xmax = int(xmax0[0].childNodes[0].data) 142 | ymax0 = box.getElementsByTagName('ymax') 143 | ymax = int(ymax0[0].childNodes[0].data) 144 | gt.append([xmin, ymin, xmax, ymax]) 145 | return np.array(gt) -------------------------------------------------------------------------------- /readme/net.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChaoXiao12/Moving-object-detection-DSFNet/2938297db6d92478ba34f00d85324e6d1cd3a1c5/readme/net.bmp -------------------------------------------------------------------------------- /readme/visualResults.bmp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ChaoXiao12/Moving-object-detection-DSFNet/2938297db6d92478ba34f00d85324e6d1cd3a1c5/readme/visualResults.bmp -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | Cython 3 | numba 4 | progress 5 | matplotlib 6 | easydict 7 | scipy 8 | numpy 9 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import numpy as np 7 | import time 8 | import torch 9 | 10 | from lib.utils.opts import opts 11 | 12 | from lib.models.stNet import get_det_net, load_model, save_model 13 | from lib.dataset.coco import COCO 14 | 15 | from lib.external.nms import soft_nms 16 | 17 | from lib.utils.decode import ctdet_decode 18 | from lib.utils.post_process import ctdet_post_process 19 | 20 | import cv2 21 | 22 | from progress.bar import Bar 23 | 24 | CONFIDENCE_thres = 0.3 25 | COLORS = [(255, 0, 0)] 26 | 27 | FONT = cv2.FONT_HERSHEY_SIMPLEX 28 | 29 | def cv2_demo(frame, detections): 30 | det = [] 31 | for i in range(detections.shape[0]): 32 | if detections[i, 4] >= CONFIDENCE_thres: 33 | pt = detections[i, :] 34 | cv2.rectangle(frame,(int(pt[0])-4, int(pt[1])-4),(int(pt[2])+4, int(pt[3])+4),COLORS[0], 2) 35 | cv2.putText(frame, str(pt[4]), (int(pt[0]), int(pt[1])), FONT, 1, (0, 255, 0), 1) 36 | det.append([int(pt[0]), int(pt[1]),int(pt[2]), int(pt[3]),detections[i, 4]]) 37 | return frame, det 38 | 39 | def process(model, image, return_time): 40 | with torch.no_grad(): 41 | output = model(image)[-1] 42 | hm = output['hm'].sigmoid_() 43 | wh = output['wh'] 44 | reg = output['reg'] 45 | torch.cuda.synchronize() 46 | forward_time = time.time() 47 | dets = ctdet_decode(hm, wh, reg=reg) 48 | if return_time: 49 | return output, dets, forward_time 50 | else: 51 | return output, dets 52 | 53 | def post_process(dets, meta, num_classes=1, scale=1): 54 | dets = dets.detach().cpu().numpy() 55 | dets = dets.reshape(1, -1, dets.shape[2]) 56 | dets = ctdet_post_process( 57 | dets.copy(), [meta['c']], [meta['s']], 58 | meta['out_height'], meta['out_width'], num_classes) 59 | for j in range(1, num_classes + 1): 60 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) 61 | dets[0][j][:, :4] /= scale 62 | return dets[0] 63 | 64 | def pre_process(image, scale=1): 65 | height, width = image.shape[2:4] 66 | new_height = int(height * scale) 67 | new_width = int(width * scale) 68 | 69 | inp_height, inp_width = height, width 70 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) 71 | s = max(height, width) * 1.0 72 | 73 | meta = {'c': c, 's': s, 74 | 'out_height': inp_height , 75 | 'out_width': inp_width} 76 | return meta 77 | 78 | def merge_outputs(detections, num_classes ,max_per_image): 79 | results = {} 80 | for j in range(1, num_classes + 1): 81 | results[j] = np.concatenate( 82 | [detection[j] for detection in detections], axis=0).astype(np.float32) 83 | 84 | soft_nms(results[j], Nt=0.5, method=2) 85 | 86 | scores = np.hstack( 87 | [results[j][:, 4] for j in range(1, num_classes + 1)]) 88 | if len(scores) > max_per_image: 89 | kth = len(scores) - max_per_image 90 | thresh = np.partition(scores, kth)[kth] 91 | for j in range(1, num_classes + 1): 92 | keep_inds = (results[j][:, 4] >= thresh) 93 | results[j] = results[j][keep_inds] 94 | return results 95 | 96 | def test(opt, split, modelPath, show_flag, results_name): 97 | 98 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 99 | opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') 100 | 101 | # Logger(opt) 102 | print(opt.model_name) 103 | 104 | dataset = COCO(opt, split) 105 | 106 | data_loader = torch.utils.data.DataLoader( 107 | dataset, 108 | batch_size=1, shuffle=False, num_workers=1, pin_memory=True) 109 | 110 | model = get_det_net({'hm': dataset.num_classes, 'wh': 2, 'reg': 2}, opt.model_name) # 建立模型 111 | model = load_model(model, modelPath) 112 | model = model.cuda() 113 | model.eval() 114 | 115 | results = {} 116 | 117 | return_time = False 118 | scale = 1 119 | num_classes = dataset.num_classes 120 | max_per_image = opt.K 121 | 122 | # # 将结果存储成视频 123 | # videoName = '/media/xc/New/xiaochao/code/SpatialTemporalNet/weights/2.avi' 124 | # fps = 10 125 | # size = (256, 256) 126 | # fourcc = cv2.VideoWriter_fourcc(*'XVID') 127 | # videoWriter = cv2.VideoWriter(videoName, fourcc,fps,size) 128 | 129 | num_iters = len(data_loader) 130 | bar = Bar('processing', max=num_iters) 131 | for ind, (img_id,pre_processed_images) in enumerate(data_loader): 132 | # print(ind) 133 | if(ind>len(data_loader)-1): 134 | break 135 | 136 | bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 137 | ind, num_iters,total=bar.elapsed_td, eta=bar.eta_td 138 | ) 139 | 140 | #读入图像 141 | detection = [] 142 | meta = pre_process(pre_processed_images['input'], scale) 143 | image = pre_processed_images['input'].cuda() 144 | img = pre_processed_images['imgOri'].squeeze().numpy() 145 | 146 | #检测 147 | output, dets = process(model, image, return_time) 148 | 149 | #后处理 150 | dets = post_process(dets, meta, num_classes) 151 | detection.append(dets) 152 | ret = merge_outputs(detection, num_classes, max_per_image) 153 | 154 | if(show_flag): 155 | frame, det = cv2_demo(img, dets[1]) 156 | 157 | cv2.imshow('frame',frame) 158 | cv2.waitKey(5) 159 | 160 | hm1 = output['hm'].squeeze(0).squeeze(0).cpu().detach().numpy() 161 | 162 | cv2.imshow('hm', hm1) 163 | cv2.waitKey(5) 164 | 165 | results[img_id.numpy().astype(np.int32)[0]] = ret 166 | bar.next() 167 | bar.finish() 168 | dataset.run_eval(results, opt.save_results_dir, results_name) 169 | 170 | if __name__ == '__main__': 171 | opt = opts().parse() 172 | 173 | split = 'test' 174 | 175 | show_flag = opt.show_results 176 | 177 | if (not os.path.exists(opt.save_results_dir)): 178 | os.mkdir(opt.save_results_dir) 179 | 180 | if opt.load_model != '': 181 | modelPath = opt.load_model 182 | else: 183 | modelPath = './checkpoints/DSFNet.pth' 184 | 185 | print(modelPath) 186 | 187 | results_name = opt.model_name+'_'+modelPath.split('/')[-1].split('.')[0] 188 | 189 | test(opt, split, modelPath, show_flag, results_name) -------------------------------------------------------------------------------- /testSaveMat.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import numpy as np 7 | import time 8 | import torch 9 | 10 | from lib.utils.opts import opts 11 | 12 | from lib.models.stNet import get_det_net, load_model, save_model 13 | from lib.dataset.coco import COCO 14 | 15 | from lib.external.nms import soft_nms 16 | 17 | from lib.utils.decode import ctdet_decode 18 | from lib.utils.post_process import ctdet_post_process 19 | 20 | import cv2 21 | 22 | from progress.bar import Bar 23 | 24 | import time 25 | 26 | import scipy.io as scio 27 | 28 | CONFIDENCE_thres = 0.3 29 | COLORS = [(255, 0, 0)] 30 | 31 | FONT = cv2.FONT_HERSHEY_SIMPLEX 32 | 33 | def cv2_demo(frame, detections): 34 | det = [] 35 | for i in range(detections.shape[0]): 36 | if detections[i, 4] >= CONFIDENCE_thres: 37 | pt = detections[i, :] 38 | cv2.rectangle(frame,(int(pt[0])-4, int(pt[1])-4),(int(pt[2])+4, int(pt[3])+4),COLORS[0], 2) 39 | cv2.putText(frame, str(pt[4]), (int(pt[0]), int(pt[1])), FONT, 1, (0, 255, 0), 1) 40 | det.append([int(pt[0]), int(pt[1]),int(pt[2]), int(pt[3]),detections[i, 4]]) 41 | return frame, det 42 | 43 | def process(model, image, return_time): 44 | with torch.no_grad(): 45 | output = model(image)[-1] 46 | hm = output['hm'].sigmoid_() 47 | wh = output['wh'] 48 | reg = output['reg'] 49 | torch.cuda.synchronize() 50 | forward_time = time.time() 51 | dets = ctdet_decode(hm, wh, reg=reg) 52 | if return_time: 53 | return output, dets, forward_time 54 | else: 55 | return output, dets 56 | 57 | def post_process(dets, meta, num_classes=1, scale=1): 58 | dets = dets.detach().cpu().numpy() 59 | dets = dets.reshape(1, -1, dets.shape[2]) 60 | dets = ctdet_post_process( 61 | dets.copy(), [meta['c']], [meta['s']], 62 | meta['out_height'], meta['out_width'], num_classes) 63 | for j in range(1, num_classes + 1): 64 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) 65 | dets[0][j][:, :4] /= scale 66 | return dets[0] 67 | 68 | def pre_process(image, scale=1): 69 | height, width = image.shape[2:4] 70 | new_height = int(height * scale) 71 | new_width = int(width * scale) 72 | 73 | inp_height, inp_width = height, width 74 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) 75 | s = max(height, width) * 1.0 76 | 77 | meta = {'c': c, 's': s, 78 | 'out_height': inp_height , 79 | 'out_width': inp_width} 80 | return meta 81 | 82 | def merge_outputs(detections, num_classes ,max_per_image): 83 | results = {} 84 | for j in range(1, num_classes + 1): 85 | results[j] = np.concatenate( 86 | [detection[j] for detection in detections], axis=0).astype(np.float32) 87 | 88 | soft_nms(results[j], Nt=0.5, method=2) 89 | 90 | scores = np.hstack( 91 | [results[j][:, 4] for j in range(1, num_classes + 1)]) 92 | if len(scores) > max_per_image: 93 | kth = len(scores) - max_per_image 94 | thresh = np.partition(scores, kth)[kth] 95 | for j in range(1, num_classes + 1): 96 | keep_inds = (results[j][:, 4] >= thresh) 97 | results[j] = results[j][keep_inds] 98 | return results 99 | 100 | def test(opt, split, modelPath, show_flag, results_name, saveMat=False): 101 | 102 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 103 | opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') 104 | 105 | # Logger(opt) 106 | print(opt.model_name) 107 | 108 | dataset = COCO(opt, split) 109 | 110 | data_loader = torch.utils.data.DataLoader( 111 | dataset, 112 | batch_size=1, shuffle=False, num_workers=1, pin_memory=True) 113 | 114 | model = get_det_net({'hm': dataset.num_classes, 'wh': 2, 'reg': 2}, opt.model_name, opt) # 建立模型 115 | model = load_model(model, modelPath) 116 | model = model.cuda() 117 | model.eval() 118 | 119 | results = {} 120 | 121 | return_time = False 122 | scale = 1 123 | num_classes = dataset.num_classes 124 | max_per_image = opt.K 125 | 126 | if saveMat: 127 | save_mat_path_upper = os.path.join(opt.save_results_dir, results_name+'_mat') 128 | if not os.path.exists(save_mat_path_upper): 129 | os.mkdir(save_mat_path_upper) 130 | 131 | num_iters = len(data_loader) 132 | bar = Bar('processing', max=num_iters) 133 | for ind, (img_id, pre_processed_images) in enumerate(data_loader): 134 | # print(ind) 135 | if(ind>num_iters): 136 | break 137 | 138 | bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 139 | ind, num_iters,total=bar.elapsed_td, eta=bar.eta_td 140 | ) 141 | 142 | start_time = time.time() 143 | 144 | #read image 145 | detection = [] 146 | meta = pre_process(pre_processed_images['input'], scale) 147 | image = pre_processed_images['input'].cuda() 148 | img = pre_processed_images['imgOri'].squeeze().numpy() 149 | if saveMat: 150 | file_name = pre_processed_images['file_name'] 151 | mat_name = file_name[0].split('/')[-1].replace('.jpg', '.mat') 152 | save_mat_folder = os.path.join(save_mat_path_upper, file_name[0].split('/')[2]) 153 | if not os.path.exists(save_mat_folder): 154 | os.mkdir(save_mat_folder) 155 | 156 | #det 157 | output, dets = process(model, image, return_time) 158 | 159 | #post process 160 | dets = post_process(dets, meta, num_classes) 161 | detection.append(dets) 162 | ret = merge_outputs(detection, num_classes, max_per_image) 163 | 164 | end_time = time.time() 165 | # print('process time:', end_time-start_time) 166 | 167 | if(show_flag): 168 | frame, det = cv2_demo(img, dets[1]) 169 | 170 | cv2.imshow('frame',frame) 171 | cv2.waitKey(5) 172 | 173 | hm1 = output['hm'].squeeze(0).squeeze(0).cpu().detach().numpy() 174 | 175 | cv2.imshow('hm', hm1) 176 | cv2.waitKey(5) 177 | 178 | if (saveMat): 179 | matsaveName = os.path.join(save_mat_folder, mat_name) 180 | A = np.array(ret[1]) 181 | scio.savemat(matsaveName, {'A': A}) 182 | 183 | results[img_id.numpy().astype(np.int32)[0]] = ret 184 | bar.next() 185 | bar.finish() 186 | dataset.run_eval(results, opt.save_results_dir, results_name) 187 | 188 | if __name__ == '__main__': 189 | opt = opts().parse() 190 | 191 | split = 'test' 192 | 193 | show_flag = False 194 | 195 | save_flag = 1 196 | opt.save_dir = opt.save_dir + '/' + opt.datasetname 197 | if (not os.path.exists(opt.save_dir)): 198 | os.mkdir(opt.save_dir) 199 | opt.save_dir = opt.save_dir + '/' + opt.model_name 200 | if (not os.path.exists(opt.save_dir)): 201 | os.mkdir(opt.save_dir) 202 | opt.save_results_dir = opt.save_dir+'/results' 203 | if (not os.path.exists(opt.save_results_dir)): 204 | os.mkdir(opt.save_results_dir) 205 | 206 | if opt.load_model != '': 207 | modelPath = opt.load_model 208 | else: 209 | modelPath = './checkpoints/DSFNet.pth' 210 | 211 | print(modelPath) 212 | 213 | results_name = opt.model_name+'_'+modelPath.split('/')[-2]+'_'+modelPath.split('/')[-1].split('.')[0] 214 | 215 | test(opt, split, modelPath, show_flag, results_name, save_flag) -------------------------------------------------------------------------------- /testTrackingSort.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import numpy as np 7 | import time 8 | import torch 9 | 10 | from lib.utils.opts import opts 11 | 12 | from lib.models.stNet import get_det_net, load_model 13 | from lib.dataset.coco import COCO 14 | 15 | from lib.external.nms import soft_nms 16 | 17 | from lib.utils.decode import ctdet_decode 18 | from lib.utils.post_process import ctdet_post_process 19 | 20 | from lib.utils.sort import * 21 | 22 | import cv2 23 | 24 | from progress.bar import Bar 25 | 26 | CONFIDENCE_thres = 0.3 27 | COLORS = [(255, 0, 0)] 28 | 29 | FONT = cv2.FONT_HERSHEY_SIMPLEX 30 | 31 | def cv2_demo(frame, detections): 32 | det = [] 33 | for i in range(detections.shape[0]): 34 | if detections[i, 4] >= CONFIDENCE_thres: 35 | pt = detections[i, :] 36 | cv2.rectangle(frame,(int(pt[0])-4, int(pt[1])-4),(int(pt[2])+4, int(pt[3])+4),COLORS[0], 2) 37 | cv2.putText(frame, str(pt[4]), (int(pt[0]), int(pt[1])), FONT, 1, (0, 255, 0), 1) 38 | det.append([int(pt[0]), int(pt[1]),int(pt[2]), int(pt[3]),detections[i, 4]]) 39 | return frame, det 40 | 41 | def process(model, image, return_time): 42 | with torch.no_grad(): 43 | output = model(image)[-1] 44 | hm = output['hm'].sigmoid_() 45 | wh = output['wh'] 46 | reg = output['reg'] 47 | torch.cuda.synchronize() 48 | forward_time = time.time() 49 | dets = ctdet_decode(hm, wh, reg=reg) 50 | if return_time: 51 | return output, dets, forward_time 52 | else: 53 | return output, dets 54 | 55 | def post_process(dets, meta, num_classes=1, scale=1): 56 | dets = dets.detach().cpu().numpy() 57 | dets = dets.reshape(1, -1, dets.shape[2]) 58 | dets = ctdet_post_process( 59 | dets.copy(), [meta['c']], [meta['s']], 60 | meta['out_height'], meta['out_width'], num_classes) 61 | for j in range(1, num_classes + 1): 62 | dets[0][j] = np.array(dets[0][j], dtype=np.float32).reshape(-1, 5) 63 | dets[0][j][:, :4] /= scale 64 | return dets[0] 65 | 66 | def pre_process(image, scale=1): 67 | height, width = image.shape[2:4] 68 | new_height = int(height * scale) 69 | new_width = int(width * scale) 70 | 71 | inp_height, inp_width = height, width 72 | c = np.array([new_width / 2., new_height / 2.], dtype=np.float32) 73 | s = max(height, width) * 1.0 74 | 75 | meta = {'c': c, 's': s, 76 | 'out_height': inp_height , 77 | 'out_width': inp_width} 78 | return meta 79 | 80 | def merge_outputs(detections, num_classes ,max_per_image): 81 | results = {} 82 | for j in range(1, num_classes + 1): 83 | results[j] = np.concatenate( 84 | [detection[j] for detection in detections], axis=0).astype(np.float32) 85 | 86 | soft_nms(results[j], Nt=0.5, method=2) 87 | 88 | scores = np.hstack( 89 | [results[j][:, 4] for j in range(1, num_classes + 1)]) 90 | if len(scores) > max_per_image: 91 | kth = len(scores) - max_per_image 92 | thresh = np.partition(scores, kth)[kth] 93 | for j in range(1, num_classes + 1): 94 | keep_inds = (results[j][:, 4] >= thresh) 95 | results[j] = results[j][keep_inds] 96 | return results 97 | 98 | def test(opt, split, modelPath, show_flag, results_name): 99 | 100 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 101 | 102 | # Logger(opt) 103 | print(opt.model_name) 104 | 105 | dataset = COCO(opt, split) 106 | 107 | data_loader = torch.utils.data.DataLoader( 108 | dataset, 109 | batch_size=1, shuffle=False, num_workers=1, pin_memory=True) 110 | 111 | model = get_det_net({'hm': dataset.num_classes, 'wh': 2, 'reg': 2}, opt.model_name) # 建立模型 112 | model = load_model(model, modelPath) 113 | model = model.cuda() 114 | model.eval() 115 | 116 | results = {} 117 | return_time = False 118 | scale = 1 119 | num_classes = dataset.num_classes 120 | max_per_image = opt.K 121 | 122 | file_folder_pre = '' 123 | im_count = 0 124 | 125 | saveTxt = opt.save_track_results 126 | if saveTxt: 127 | track_results_save_dir = os.path.join(opt.save_results_dir, 'trackingResults'+opt.model_name) 128 | if not os.path.exists(track_results_save_dir): 129 | os.mkdir(track_results_save_dir) 130 | 131 | num_iters = len(data_loader) 132 | bar = Bar('processing', max=num_iters) 133 | for ind, (img_id, pre_processed_images) in enumerate(data_loader): 134 | # print(ind) 135 | if(ind>len(data_loader)-1): 136 | break 137 | 138 | bar.suffix = '[{0}/{1}]|Tot: {total:} |ETA: {eta:} '.format( 139 | ind, num_iters,total=bar.elapsed_td, eta=bar.eta_td 140 | ) 141 | 142 | #set tracker 143 | file_folder_cur = pre_processed_images['file_name'][0].split('/')[-3] 144 | if file_folder_cur != file_folder_pre: 145 | if saveTxt and file_folder_pre!='': 146 | fid.close() 147 | file_folder_pre = file_folder_cur 148 | mot_tracker = Sort() 149 | if saveTxt: 150 | im_count = 0 151 | txt_path = os.path.join(track_results_save_dir, file_folder_cur+'.txt') 152 | fid = open(txt_path, 'w+') 153 | 154 | #read images 155 | detection = [] 156 | meta = pre_process(pre_processed_images['input'], scale) 157 | image = pre_processed_images['input'].cuda() 158 | img = pre_processed_images['imgOri'].squeeze().numpy() 159 | 160 | #detection 161 | output, dets = process(model, image, return_time) 162 | #POST PROCESS 163 | dets = post_process(dets, meta, num_classes) 164 | detection.append(dets) 165 | ret = merge_outputs(detection, num_classes, max_per_image) 166 | 167 | #update tracker 168 | dets_track = dets[1] 169 | dets_track_select = np.argwhere(dets_track[:,-1]>CONFIDENCE_thres) 170 | dets_track = dets_track[dets_track_select[:,0],:] 171 | track_bbs_ids = mot_tracker.update(dets_track) 172 | 173 | if(show_flag): 174 | frame, det = cv2_demo(img, track_bbs_ids) 175 | cv2.imshow('frame',frame) 176 | cv2.waitKey(5) 177 | hm1 = output['hm'].squeeze(0).squeeze(0).cpu().detach().numpy() 178 | cv2.imshow('hm', hm1) 179 | cv2.waitKey(5) 180 | 181 | if saveTxt: 182 | im_count += 1 183 | track_bbs_ids = track_bbs_ids[::-1,:] 184 | track_bbs_ids[:,2:4] = track_bbs_ids[:,2:4]-track_bbs_ids[:,:2] 185 | for it in range(track_bbs_ids.shape[0]): 186 | fid.write('%d,%d,%0.2f,%0.2f,%0.2f,%0.2f,1,-1,-1,-1\n'%(im_count, 187 | track_bbs_ids[it,-1], track_bbs_ids[it,0],track_bbs_ids[it,1], 188 | track_bbs_ids[it, 2], track_bbs_ids[it, 3])) 189 | 190 | results[img_id.numpy().astype(np.int32)[0]] = ret 191 | bar.next() 192 | bar.finish() 193 | dataset.run_eval(results, opt.save_results_dir, results_name) 194 | 195 | if __name__ == '__main__': 196 | opt = opts().parse() 197 | 198 | split = 'test' 199 | show_flag = opt.save_track_results 200 | if (not os.path.exists(opt.save_results_dir)): 201 | os.mkdir(opt.save_results_dir) 202 | 203 | if opt.load_model != '': 204 | modelPath = opt.load_model 205 | else: 206 | modelPath = './checkpoints/DSFNet.pth' 207 | print(modelPath) 208 | 209 | results_name = opt.model_name+'_'+modelPath.split('/')[-1].split('.')[0] 210 | test(opt, split, modelPath, show_flag, results_name) -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | 7 | import torch 8 | import torch.utils.data 9 | from lib.utils.opts import opts 10 | from lib.utils.logger import Logger 11 | from datetime import datetime 12 | 13 | from lib.models.stNet import get_det_net,load_model, save_model 14 | from lib.dataset.coco_rsdata import COCO 15 | from lib.Trainer.ctdet import CtdetTrainer 16 | 17 | def main(opt): 18 | torch.manual_seed(opt.seed) 19 | 20 | os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpus_str 21 | opt.device = torch.device('cuda' if opt.gpus[0] >= 0 else 'cpu') 22 | 23 | val_intervals = opt.val_intervals 24 | 25 | 26 | DataVal = COCO(opt, 'test') 27 | 28 | val_loader = torch.utils.data.DataLoader( 29 | DataVal, 30 | batch_size=1, 31 | shuffle=False, 32 | num_workers=opt.num_workers, 33 | pin_memory=True 34 | ) 35 | 36 | DataTrain = COCO(opt, 'train') 37 | 38 | base_s = DataTrain.coco 39 | 40 | train_loader = torch.utils.data.DataLoader( 41 | DataTrain, 42 | batch_size=opt.batch_size, 43 | shuffle=True, 44 | num_workers=opt.num_workers, 45 | pin_memory=True, 46 | drop_last=True 47 | ) 48 | 49 | print('Creating model...') 50 | head = {'hm': DataTrain.num_classes, 'wh': 2, 'reg': 2} 51 | model = get_det_net(head, opt.model_name) # 建立模型 52 | 53 | print(opt.model_name) 54 | 55 | optimizer = torch.optim.Adam(model.parameters(), opt.lr) #设置优化器 56 | 57 | start_epoch = 0 58 | 59 | if(not os.path.exists(opt.save_dir)): 60 | os.mkdir(opt.save_dir) 61 | 62 | if(not os.path.exists(opt.save_results_dir)): 63 | os.mkdir(opt.save_results_dir) 64 | 65 | logger = Logger(opt) 66 | 67 | if opt.load_model != '': 68 | model, optimizer, start_epoch = load_model( 69 | model, opt.load_model, optimizer, opt.resume, opt.lr, opt.lr_step) # 导入训练好的模型 70 | 71 | 72 | trainer = CtdetTrainer(opt, model, optimizer) 73 | trainer.set_device(opt.gpus, opt.device) 74 | 75 | print('Starting training...') 76 | 77 | best = -1 78 | 79 | for epoch in range(start_epoch + 1, opt.num_epochs + 1): 80 | 81 | log_dict_train, _ = trainer.train(epoch, train_loader) 82 | 83 | logger.write('epoch: {} |'.format(epoch)) 84 | 85 | save_model(os.path.join(opt.save_dir, 'model_last.pth'), 86 | epoch, model, optimizer) 87 | 88 | for k, v in log_dict_train.items(): 89 | logger.write('{} {:8f} | '.format(k, v)) 90 | if val_intervals > 0 and epoch % val_intervals == 0: 91 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 92 | epoch, model, optimizer) 93 | with torch.no_grad(): 94 | log_dict_val, preds, stats = trainer.val(epoch, val_loader,base_s, DataVal) 95 | for k, v in log_dict_val.items(): 96 | logger.write('{} {:8f} | '.format(k, v)) 97 | logger.write('eval results: ') 98 | for k in stats.tolist(): 99 | logger.write('{:8f} | '.format(k)) 100 | if log_dict_val['ap50'] > best: 101 | best = log_dict_val['ap50'] 102 | save_model(os.path.join(opt.save_dir, 'model_best.pth'), 103 | epoch, model) 104 | else: 105 | save_model(os.path.join(opt.save_dir, 'model_last.pth'), 106 | epoch, model, optimizer) 107 | logger.write('\n') 108 | if epoch in opt.lr_step: 109 | save_model(os.path.join(opt.save_dir, 'model_{}.pth'.format(epoch)), 110 | epoch, model, optimizer) 111 | lr = opt.lr * (0.1 ** (opt.lr_step.index(epoch) + 1)) 112 | print('Drop LR to', lr) 113 | for param_group in optimizer.param_groups: 114 | param_group['lr'] = lr 115 | logger.close() 116 | 117 | if __name__ == '__main__': 118 | opt = opts().parse() 119 | main(opt) --------------------------------------------------------------------------------