├── README.md ├── Run.py ├── Run.py.bk ├── __init__.py ├── __pycache__ ├── options.cpython-37.pyc ├── python_RLS_RTMDNet.cpython-37.pyc ├── tracker.cpython-35.pyc ├── tracker.cpython-37.pyc └── vot.cpython-37.pyc ├── models └── rt-mdnet.pth ├── modules ├── __init__.py ├── __pycache__ │ ├── bbreg.cpython-37.pyc │ ├── data_prov.cpython-37.pyc │ ├── img_cropper.cpython-37.pyc │ ├── model.cpython-37.pyc │ ├── pretrain_options.cpython-37.pyc │ ├── sample_generator.cpython-37.pyc │ └── utils.cpython-37.pyc ├── bbreg.py ├── data_prov.py ├── img_cropper.py ├── model.py ├── prepro_data.py ├── prepro_data_imagenet.py ├── pretrain_options.py ├── roi_align │ ├── .setup.py.swp │ ├── Makefile │ ├── __init__.py │ ├── __pycache__ │ │ └── __init__.cpython-37.pyc │ ├── _ext │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ └── roi_align │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ └── _roi_align.so │ ├── _ext2 │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ └── __init__.cpython-37.pyc │ │ └── roi_align.cpython-37m-x86_64-linux-gnu.so │ ├── build.py │ ├── functions │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── roi_align.cpython-37.pyc │ │ ├── roi_align.py │ │ └── roi_align.pyc │ ├── modules │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── roi_align.cpython-37.pyc │ │ ├── roi_align.py │ │ └── roi_align.pyc │ ├── setup.py │ ├── src.bak │ │ ├── .roi_align_cuda.cpp.swp │ │ ├── cuda │ │ │ ├── Makefile │ │ │ ├── roi_align.cu.o │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ │ ├── roi_align_cuda.c │ │ ├── roi_align_cuda.cpp │ │ └── roi_align_cuda.hpp │ ├── src.bak2 │ │ ├── .roi_align_cuda.cpp.swp │ │ ├── cuda │ │ │ ├── Makefile │ │ │ ├── roi_align.cu.o │ │ │ ├── roi_align_kernel.cu │ │ │ └── roi_align_kernel.h │ │ ├── roi_align_cuda.c │ │ ├── roi_align_cuda.cpp │ │ └── roi_align_cuda.hpp │ └── src │ │ ├── cuda │ │ ├── Makefile │ │ ├── roi_align.cu.o │ │ ├── roi_align_kernel.cu │ │ └── roi_align_kernel.h │ │ ├── roi_align_cuda.c │ │ ├── roi_align_cuda.cpp │ │ └── roi_align_cuda.hpp ├── sample_generator.py └── utils.py ├── options.py ├── python_RLS_RTMDNet.py ├── python_RLS_RTMDNet_bk.py ├── tracker.py ├── train_mrcnn.py └── vot.py /README.md: -------------------------------------------------------------------------------- 1 | ## RLS-RTMDNet 2 | Code and raw result files of our CVPR2020 oral paper "[Recursive Least-Squares Estimator-Aided Online Learning for Visual Tracking](https://openaccess.thecvf.com/content_CVPR_2020/html/Gao_Recursive_Least-Squares_Estimator-Aided_Online_Learning_for_Visual_Tracking_CVPR_2020_paper.html)" 3 | 4 | Created by [Jin Gao](http://www.nlpr.ia.ac.cn/users/gaojin/) 5 | 6 | ### Introduction 7 | RLS-RTMDNet is dedicated to improving online tracking part of RT-MDNet ([project page](http://cvlab.postech.ac.kr/~chey0313/real_time_mdnet/) and [paper](https://arxiv.org/pdf/1808.08834.pdf)) based on our proposed recursive least-squares estimator-aided online learning method. 8 | 9 | ### Citation 10 | If you're using this code in a publication, please cite our paper. 11 | 12 | @InProceedings{Gao_2020_CVPR, 13 | author = {Gao, Jin and Hu, Weiming and Lu, Yan}, 14 | title = {Recursive Least-squares Estimator-aided Online Learning for Visual Tracking}, 15 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 16 | month = {June}, 17 | year = {2020} 18 | } 19 | 20 | 21 | ### System Requirements 22 | 23 | This code is tested on 64 bit Linux (Ubuntu 16.04 LTS) with the following Anaconda environment: 24 | >> * PyTorch (= 1.2.0) 25 | >> * Python (= 3.7.4) 26 | 27 | ### Online Tracking 28 | 29 | **Pretrained Model** 30 | >> * The off-the-shelf pretrained model in RT-MDNet is used for our testing: [RT-MDNet-ImageNet-pretrained](https://www.dropbox.com/s/lr8uft05zlo21an/rt-mdnet.pth?dl=0). 31 | 32 | **Demo** 33 | >> * 'Run.py' for OTB and UAV123 34 | >> * 'python_RLS_RTMDNet.py' for VOT16/17. 35 | 36 | -------------------------------------------------------------------------------- /Run.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join, isdir 3 | from tracker import * 4 | import numpy as np 5 | 6 | import argparse 7 | 8 | import pickle 9 | 10 | import math 11 | import warnings 12 | warnings.filterwarnings('ignore') 13 | torch.cuda.set_device(1) 14 | 15 | def genConfig(seq_path, set_type): 16 | 17 | path, seqname = os.path.split(seq_path) 18 | 19 | 20 | if set_type == 'OTB100': 21 | ############################################ have to refine ############################################# 22 | 23 | img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg']) 24 | 25 | if (seqname == 'Jogging_1') or (seqname == 'Skating2_1'): 26 | gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt') 27 | elif (seqname == 'Jogging_2') or (seqname == 'Skating2_2'): 28 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt') 29 | elif seqname =='Human4': 30 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',') 31 | elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \ 32 | or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \ 33 | or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \ 34 | or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \ 35 | or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \ 36 | or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \ 37 | or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') : 38 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt') 39 | else: 40 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 41 | 42 | if seqname == 'David': 43 | img_list = img_list[299:] 44 | 45 | if seqname == 'Football1': 46 | img_list = img_list[0:74] 47 | if seqname == 'Freeman3': 48 | img_list = img_list[0:460] 49 | if seqname == 'Freeman4': 50 | img_list = img_list[0:283] 51 | if seqname == 'Diving': 52 | img_list = img_list[0:215] 53 | 54 | elif set_type == 'UAV123': 55 | img_list = sorted([seq_path + '/' + p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg']) 56 | 57 | gt = np.loadtxt(seq_path + '/anno/UAV123/' + seqname + '.txt', delimiter=',') 58 | 59 | 60 | if seqname == 'bird1_1': 61 | img_list = img_list[0:253] 62 | if seqname == 'bird1_2': 63 | img_list = img_list[774:1477] 64 | if seqname == 'bird1_3': 65 | img_list = img_list[1572:2437] 66 | 67 | if seqname == 'car1_1': 68 | img_list = img_list[0:751] 69 | if seqname == 'car1_2': 70 | img_list = img_list[750:1627] 71 | if seqname == 'car1_3': 72 | img_list = img_list[1626:2629] 73 | 74 | if seqname == 'car6_1': 75 | img_list = img_list[0:487] 76 | if seqname == 'car6_2': 77 | img_list = img_list[486:1807] 78 | if seqname == 'car6_3': 79 | img_list = img_list[1806:2953] 80 | if seqname == 'car6_4': 81 | img_list = img_list[2952:3925] 82 | if seqname == 'car6_5': 83 | img_list = img_list[3924:4861] 84 | 85 | if seqname == 'car8_1': 86 | img_list = img_list[0:1357] 87 | if seqname == 'car8_2': 88 | img_list = img_list[1356:2575] 89 | 90 | if seqname == 'car16_1': 91 | img_list = img_list[0:415] 92 | if seqname == 'car16_2': 93 | img_list = img_list[414:1993] 94 | 95 | 96 | if seqname == 'group1_1': 97 | img_list = img_list[0:1333] 98 | if seqname == 'group1_2': 99 | img_list = img_list[1332:2515] 100 | if seqname == 'group1_3': 101 | img_list = img_list[2514:3925] 102 | if seqname == 'group1_4': 103 | img_list = img_list[3924:4873] 104 | 105 | if seqname == 'group2_1': 106 | img_list = img_list[0:907] 107 | if seqname == 'group2_2': 108 | img_list = img_list[906:1771] 109 | if seqname == 'group2_3': 110 | img_list = img_list[1770:2683] 111 | 112 | if seqname == 'group3_1': 113 | img_list = img_list[0:1567] 114 | if seqname == 'group3_2': 115 | img_list = img_list[1566:2827] 116 | if seqname == 'group3_3': 117 | img_list = img_list[2826:4369] 118 | if seqname == 'group3_4': 119 | img_list = img_list[4368:5527] 120 | 121 | if seqname == 'person2_1': 122 | img_list = img_list[0:1189] 123 | if seqname == 'person2_2': 124 | img_list = img_list[1188:2623] 125 | 126 | if seqname == 'person4_1': 127 | img_list = img_list[0:1501] 128 | if seqname == 'person4_2': 129 | img_list = img_list[1500:2743] 130 | 131 | if seqname == 'person5_1': 132 | img_list = img_list[0:877] 133 | if seqname == 'person5_2': 134 | img_list = img_list[876:2101] 135 | 136 | if seqname == 'person7_1': 137 | img_list = img_list[0:1249] 138 | if seqname == 'person7_2': 139 | img_list = img_list[1248:2065] 140 | 141 | if seqname == 'person8_1': 142 | img_list = img_list[0:1075] 143 | if seqname == 'person8_2': 144 | img_list = img_list[1074:1525] 145 | 146 | if seqname == 'person12_1': 147 | img_list = img_list[0:601] 148 | if seqname == 'person12_2': 149 | img_list = img_list[600:1621] 150 | 151 | if seqname == 'person14_1': 152 | img_list = img_list[0:847] 153 | if seqname == 'person14_2': 154 | img_list = img_list[846:1813] 155 | if seqname == 'person14_3': 156 | img_list = img_list[1812:2923] 157 | 158 | if seqname == 'person17_1': 159 | img_list = img_list[0:1501] 160 | if seqname == 'person17_2': 161 | img_list = img_list[1500:2347] 162 | 163 | if seqname == 'person19_1': 164 | img_list = img_list[0:1243] 165 | if seqname == 'person19_2': 166 | img_list = img_list[1242:2791] 167 | if seqname == 'person19_3': 168 | img_list = img_list[2790:4357] 169 | 170 | if seqname == 'truck4_1': 171 | img_list = img_list[0:577] 172 | if seqname == 'truck4_2': 173 | img_list = img_list[576:1261] 174 | 175 | if seqname == 'uav1_1': 176 | img_list = img_list[0:1555] 177 | if seqname == 'uav1_2': 178 | img_list = img_list[1554:2377] 179 | if seqname == 'uav1_3': 180 | img_list = img_list[2472:3469] 181 | 182 | if seqname == 'truck2': 183 | img_list = img_list[0:385] 184 | 185 | ##polygon to rect 186 | if gt.shape[1] == 8: 187 | x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 188 | y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 189 | x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 190 | y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 191 | gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1) 192 | 193 | return img_list, gt 194 | 195 | 196 | if __name__ == "__main__": 197 | 198 | parser = argparse.ArgumentParser() 199 | parser.add_argument("-set_type", default = 'OTB100' ) 200 | parser.add_argument("-model_path", default = './models/rt-mdnet.pth') 201 | parser.add_argument("-result_path", default = './result.npy') 202 | parser.add_argument("-visual_log",default=False, action= 'store_true') 203 | parser.add_argument("-visualize",default=False, action='store_true') 204 | parser.add_argument("-adaptive_align",default=True, action='store_false') 205 | parser.add_argument("-padding",default=1.2, type = float) 206 | parser.add_argument("-jitter",default=True, action='store_false') 207 | 208 | args = parser.parse_args() 209 | 210 | ################################################################################## 211 | #########################Just modify opts in this script.######################### 212 | ######################Becuase of synchronization of options####################### 213 | ################################################################################## 214 | ## option setting 215 | opts['model_path']=args.model_path 216 | opts['result_path']=args.result_path 217 | opts['visual_log']=args.visual_log 218 | opts['set_type']=args.set_type 219 | opts['visualize'] = args.visualize 220 | opts['adaptive_align'] = args.adaptive_align 221 | opts['padding'] = args.padding 222 | opts['jitter'] = args.jitter 223 | ################################################################################## 224 | ############################Do not modify opts anymore.########################### 225 | ######################Becuase of synchronization of options####################### 226 | ################################################################################## 227 | print (opts) 228 | 229 | 230 | ## path initialization 231 | dataset_path = '/home/jgao/Recent/' 232 | 233 | 234 | seq_home = dataset_path + opts['set_type'] 235 | seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))] 236 | mIoU_max = 0.0 237 | mIoU_min = 1.0 238 | mIoU_avg = 0.0 239 | res_list = [] 240 | for iterloop in range(50): 241 | iou_list=[] 242 | fps_list=dict() 243 | bb_result = dict() 244 | result = dict() 245 | 246 | iou_list_nobb=[] 247 | bb_result_nobb = dict() 248 | for num,seq in enumerate(seq_list): 249 | if num<-1: 250 | continue 251 | seq_path = seq_home + '/' + seq 252 | img_list,gt=genConfig(seq_path,opts['set_type']) 253 | 254 | if os.path.exists(opts['result_path']+str(iterloop)+'replay.npy'): 255 | resultdic = np.load(opts['result_path']+str(iterloop)+'replay.npy', allow_pickle=True) 256 | resultdic = resultdic.tolist() 257 | result_bb = resultdic['bb_result'][seq] 258 | fps = resultdic['fps'][seq] 259 | result_nobb = resultdic['bb_result_nobb'][seq] 260 | iou_result = np.zeros((len(img_list), 1)) 261 | for i in range(1, len(img_list)): 262 | iou_result[i] = overlap_ratio(gt[i], result_bb[i])[0] 263 | else: 264 | iou_result, result_bb, fps, result_nobb = run_mdnet(img_list, gt[0], gt, seq = seq, display=opts['visualize']) 265 | 266 | enable_frameNum = 0. 267 | for iidx in range(len(iou_result)): 268 | if (math.isnan(iou_result[iidx])==False): 269 | enable_frameNum += 1. 270 | else: 271 | ## gt is not alowed 272 | iou_result[iidx] = 0. 273 | 274 | iou_list.append(iou_result.sum()/enable_frameNum) 275 | bb_result[seq] = result_bb 276 | fps_list[seq]=fps 277 | 278 | bb_result_nobb[seq] = result_nobb 279 | print ('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list))) 280 | 281 | res_list.append(sum(iou_list) / len(iou_list)) 282 | mIoU_avg += sum(iou_list)/len(iou_list) 283 | if mIoU_max < sum(iou_list)/len(iou_list): 284 | mIoU_max = sum(iou_list)/len(iou_list) 285 | if mIoU_min > sum(iou_list)/len(iou_list): 286 | mIoU_min = sum(iou_list)/len(iou_list) 287 | result['bb_result']=bb_result 288 | result['fps']=fps_list 289 | result['bb_result_nobb']=bb_result_nobb 290 | np.save(opts['result_path'] + str(iterloop) + 'replay', result) 291 | print (mIoU_max) 292 | print (mIoU_min) 293 | print (res_list) 294 | #np.save(opts['result_path']+str(iterloop),result) 295 | mIoU_avg /= 50 296 | print (mIoU_max) 297 | print (mIoU_avg) 298 | print (mIoU_min) 299 | print (res_list) 300 | -------------------------------------------------------------------------------- /Run.py.bk: -------------------------------------------------------------------------------- 1 | import os 2 | from os.path import join, isdir 3 | from tracker import * 4 | import numpy as np 5 | 6 | import argparse 7 | 8 | from scipy import io 9 | 10 | import pickle 11 | 12 | import math 13 | import warnings 14 | warnings.filterwarnings('ignore') 15 | torch.cuda.set_device(2) 16 | 17 | def genConfig(seq_path, set_type): 18 | 19 | path, seqname = os.path.split(seq_path) 20 | 21 | 22 | if set_type == 'OTB100': 23 | ############################################ have to refine ############################################# 24 | 25 | img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg']) 26 | 27 | if (seqname == 'Jogging_1') or (seqname == 'Skating2_1'): 28 | gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt') 29 | elif (seqname == 'Jogging_2') or (seqname == 'Skating2_2'): 30 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt') 31 | elif seqname =='Human4': 32 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',') 33 | elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \ 34 | or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \ 35 | or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \ 36 | or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \ 37 | or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \ 38 | or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \ 39 | or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') : 40 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt') 41 | else: 42 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 43 | 44 | if seqname == 'David': 45 | img_list = img_list[299:] 46 | 47 | if seqname == 'Football1': 48 | img_list = img_list[0:74] 49 | if seqname == 'Freeman3': 50 | img_list = img_list[0:460] 51 | if seqname == 'Freeman4': 52 | img_list = img_list[0:283] 53 | if seqname == 'Diving': 54 | img_list = img_list[0:215] 55 | 56 | elif set_type == 'UAV123': 57 | img_list = sorted([seq_path + '/' + p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg']) 58 | 59 | gt = np.loadtxt(seq_path + '/anno/UAV123/' + seqname + '.txt', delimiter=',') 60 | 61 | 62 | if seqname == 'bird1_1': 63 | img_list = img_list[0:253] 64 | if seqname == 'bird1_2': 65 | img_list = img_list[774:1477] 66 | if seqname == 'bird1_3': 67 | img_list = img_list[1572:2437] 68 | 69 | if seqname == 'car1_1': 70 | img_list = img_list[0:751] 71 | if seqname == 'car1_2': 72 | img_list = img_list[750:1627] 73 | if seqname == 'car1_3': 74 | img_list = img_list[1626:2629] 75 | 76 | if seqname == 'car6_1': 77 | img_list = img_list[0:487] 78 | if seqname == 'car6_2': 79 | img_list = img_list[486:1807] 80 | if seqname == 'car6_3': 81 | img_list = img_list[1806:2953] 82 | if seqname == 'car6_4': 83 | img_list = img_list[2952:3925] 84 | if seqname == 'car6_5': 85 | img_list = img_list[3924:4861] 86 | 87 | if seqname == 'car8_1': 88 | img_list = img_list[0:1357] 89 | if seqname == 'car8_2': 90 | img_list = img_list[1356:2575] 91 | 92 | if seqname == 'car16_1': 93 | img_list = img_list[0:415] 94 | if seqname == 'car16_2': 95 | img_list = img_list[414:1993] 96 | 97 | 98 | if seqname == 'group1_1': 99 | img_list = img_list[0:1333] 100 | if seqname == 'group1_2': 101 | img_list = img_list[1332:2515] 102 | if seqname == 'group1_3': 103 | img_list = img_list[2514:3925] 104 | if seqname == 'group1_4': 105 | img_list = img_list[3924:4873] 106 | 107 | if seqname == 'group2_1': 108 | img_list = img_list[0:907] 109 | if seqname == 'group2_2': 110 | img_list = img_list[906:1771] 111 | if seqname == 'group2_3': 112 | img_list = img_list[1770:2683] 113 | 114 | if seqname == 'group3_1': 115 | img_list = img_list[0:1567] 116 | if seqname == 'group3_2': 117 | img_list = img_list[1566:2827] 118 | if seqname == 'group3_3': 119 | img_list = img_list[2826:4369] 120 | if seqname == 'group3_4': 121 | img_list = img_list[4368:5527] 122 | 123 | if seqname == 'person2_1': 124 | img_list = img_list[0:1189] 125 | if seqname == 'person2_2': 126 | img_list = img_list[1188:2623] 127 | 128 | if seqname == 'person4_1': 129 | img_list = img_list[0:1501] 130 | if seqname == 'person4_2': 131 | img_list = img_list[1500:2743] 132 | 133 | if seqname == 'person5_1': 134 | img_list = img_list[0:877] 135 | if seqname == 'person5_2': 136 | img_list = img_list[876:2101] 137 | 138 | if seqname == 'person7_1': 139 | img_list = img_list[0:1249] 140 | if seqname == 'person7_2': 141 | img_list = img_list[1248:2065] 142 | 143 | if seqname == 'person8_1': 144 | img_list = img_list[0:1075] 145 | if seqname == 'person8_2': 146 | img_list = img_list[1074:1525] 147 | 148 | if seqname == 'person12_1': 149 | img_list = img_list[0:601] 150 | if seqname == 'person12_2': 151 | img_list = img_list[600:1621] 152 | 153 | if seqname == 'person14_1': 154 | img_list = img_list[0:847] 155 | if seqname == 'person14_2': 156 | img_list = img_list[846:1813] 157 | if seqname == 'person14_3': 158 | img_list = img_list[1812:2923] 159 | 160 | if seqname == 'person17_1': 161 | img_list = img_list[0:1501] 162 | if seqname == 'person17_2': 163 | img_list = img_list[1500:2347] 164 | 165 | if seqname == 'person19_1': 166 | img_list = img_list[0:1243] 167 | if seqname == 'person19_2': 168 | img_list = img_list[1242:2791] 169 | if seqname == 'person19_3': 170 | img_list = img_list[2790:4357] 171 | 172 | if seqname == 'truck4_1': 173 | img_list = img_list[0:577] 174 | if seqname == 'truck4_2': 175 | img_list = img_list[576:1261] 176 | 177 | if seqname == 'uav1_1': 178 | img_list = img_list[0:1555] 179 | if seqname == 'uav1_2': 180 | img_list = img_list[1554:2377] 181 | if seqname == 'uav1_3': 182 | img_list = img_list[2472:3469] 183 | 184 | if seqname == 'truck2': 185 | img_list = img_list[0:385] 186 | 187 | ##polygon to rect 188 | if gt.shape[1] == 8: 189 | x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 190 | y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 191 | x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 192 | y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 193 | gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1) 194 | 195 | return img_list, gt 196 | 197 | 198 | if __name__ == "__main__": 199 | 200 | parser = argparse.ArgumentParser() 201 | parser.add_argument("-set_type", default = 'OTB100' ) 202 | parser.add_argument("-model_path", default = './models/rt-mdnet.pth') 203 | parser.add_argument("-result_path", default = './result.npy') 204 | parser.add_argument("-visual_log",default=False, action= 'store_true') 205 | parser.add_argument("-visualize",default=False, action='store_true') 206 | parser.add_argument("-adaptive_align",default=True, action='store_false') 207 | parser.add_argument("-padding",default=1.2, type = float) 208 | parser.add_argument("-jitter",default=True, action='store_false') 209 | 210 | args = parser.parse_args() 211 | 212 | ################################################################################## 213 | #########################Just modify opts in this script.######################### 214 | ######################Becuase of synchronization of options####################### 215 | ################################################################################## 216 | ## option setting 217 | opts['model_path']=args.model_path 218 | opts['result_path']=args.result_path 219 | opts['visual_log']=args.visual_log 220 | opts['set_type']=args.set_type 221 | opts['visualize'] = args.visualize 222 | opts['adaptive_align'] = args.adaptive_align 223 | opts['padding'] = args.padding 224 | opts['jitter'] = args.jitter 225 | ################################################################################## 226 | ############################Do not modify opts anymore.########################### 227 | ######################Becuase of synchronization of options####################### 228 | ################################################################################## 229 | print (opts) 230 | 231 | 232 | ## path initialization 233 | dataset_path = '/home/jgao/Recent/' 234 | 235 | 236 | seq_home = dataset_path + opts['set_type'] 237 | seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))] 238 | mIoU_max = 0.0 239 | mIoU_min = 1.0 240 | mIoU_avg = 0.0 241 | res_list = [] 242 | for iterloop in range(50): 243 | iou_list=[] 244 | fps_list=dict() 245 | bb_result = dict() 246 | result = dict() 247 | 248 | iou_list_nobb=[] 249 | bb_result_nobb = dict() 250 | for num,seq in enumerate(seq_list): 251 | if num<-1: 252 | continue 253 | seq_path = seq_home + '/' + seq 254 | img_list,gt=genConfig(seq_path,opts['set_type']) 255 | res = {} 256 | res['results'] = [] 257 | for innerloop in range(50): 258 | if innerloop == iterloop: 259 | if os.path.exists(opts['result_path']+str(innerloop)+'replay.npy'): 260 | resultdic = np.load(opts['result_path']+str(iterloop)+'replay.npy', allow_pickle=True) 261 | resultdic = resultdic.tolist() 262 | result_bb = resultdic['bb_result'][seq] 263 | fps = resultdic['fps'][seq] 264 | result_nobb = resultdic['bb_result_nobb'][seq] 265 | iou_result = np.zeros((len(img_list), 1)) 266 | for i in range(1, len(img_list)): 267 | iou_result[i] = overlap_ratio(gt[i], result_bb[i])[0] 268 | else: 269 | iou_result, result_bb, fps, result_nobb = run_mdnet(img_list, gt[0], gt, seq = seq, display=opts['visualize']) 270 | 271 | enable_frameNum = 0. 272 | for iidx in range(len(iou_result)): 273 | if (math.isnan(iou_result[iidx])==False): 274 | enable_frameNum += 1. 275 | else: 276 | ## gt is not alowed 277 | iou_result[iidx] = 0. 278 | 279 | iou_list.append(iou_result.sum()/enable_frameNum) 280 | bb_result[seq] = result_bb 281 | fps_list[seq]=fps 282 | 283 | bb_result_nobb[seq] = result_nobb 284 | print ('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list))) 285 | 286 | res['results'].append({'res': result_bb.round().tolist(), 'type': 'rect', 'len': len(result_bb)}) 287 | else: 288 | resultdic = np.load(opts['result_path']+str(innerloop)+'replay.npy', allow_pickle=True) 289 | resultdic = resultdic.tolist() 290 | result_bb = resultdic['bb_result'][seq] 291 | res['results'].append({'res': result_bb.round().tolist(), 'type': 'rect', 'len': len(result_bb)}) 292 | io.savemat('./' + seq + '_RLS_RTMDNet.mat', res) 293 | res_list.append(sum(iou_list) / len(iou_list)) 294 | mIoU_avg += sum(iou_list)/len(iou_list) 295 | if mIoU_max < sum(iou_list)/len(iou_list): 296 | mIoU_max = sum(iou_list)/len(iou_list) 297 | if mIoU_min > sum(iou_list)/len(iou_list): 298 | mIoU_min = sum(iou_list)/len(iou_list) 299 | result['bb_result']=bb_result 300 | result['fps']=fps_list 301 | result['bb_result_nobb']=bb_result_nobb 302 | np.save(opts['result_path'] + str(iterloop) + 'replay', result) 303 | print (mIoU_max) 304 | print (mIoU_min) 305 | print (res_list) 306 | #np.save(opts['result_path']+str(iterloop),result) 307 | mIoU_avg /= 50 308 | print (mIoU_max) 309 | print (mIoU_avg) 310 | print (mIoU_min) 311 | print (res_list) 312 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__init__.py -------------------------------------------------------------------------------- /__pycache__/options.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/options.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/python_RLS_RTMDNet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/python_RLS_RTMDNet.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/tracker.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/tracker.cpython-35.pyc -------------------------------------------------------------------------------- /__pycache__/tracker.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/tracker.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/vot.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/vot.cpython-37.pyc -------------------------------------------------------------------------------- /models/rt-mdnet.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/models/rt-mdnet.pth -------------------------------------------------------------------------------- /modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__init__.py -------------------------------------------------------------------------------- /modules/__pycache__/bbreg.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/bbreg.cpython-37.pyc -------------------------------------------------------------------------------- /modules/__pycache__/data_prov.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/data_prov.cpython-37.pyc -------------------------------------------------------------------------------- /modules/__pycache__/img_cropper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/img_cropper.cpython-37.pyc -------------------------------------------------------------------------------- /modules/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /modules/__pycache__/pretrain_options.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/pretrain_options.cpython-37.pyc -------------------------------------------------------------------------------- /modules/__pycache__/sample_generator.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/sample_generator.cpython-37.pyc -------------------------------------------------------------------------------- /modules/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /modules/bbreg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from sklearn.linear_model import Ridge 3 | import numpy as np 4 | 5 | from utils import * 6 | 7 | class BBRegressor(): 8 | def __init__(self, img_size, alpha=1000, overlap=[0.6, 1], scale=[1, 2]): 9 | self.img_size = img_size 10 | self.alpha = alpha 11 | self.overlap_range = overlap 12 | self.scale_range = scale 13 | self.model = Ridge(alpha=self.alpha) 14 | 15 | def train(self, X, bbox, gt): 16 | X = X.cpu().numpy() 17 | bbox = np.copy(bbox) 18 | gt = np.copy(gt) 19 | 20 | if gt.ndim==1: 21 | gt = gt[None,:] 22 | 23 | r = overlap_ratio(bbox, gt) 24 | s = np.prod(bbox[:,2:], axis=1) / np.prod(gt[0,2:]) 25 | idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ 26 | (s >= self.scale_range[0]) * (s <= self.scale_range[1]) 27 | 28 | X = X[idx] 29 | bbox = bbox[idx] 30 | 31 | Y = self.get_examples(bbox, gt) 32 | 33 | self.model.fit(X, Y) 34 | 35 | def predict(self, X, bbox): 36 | X = X.cpu().numpy() 37 | bbox_ = np.copy(bbox) 38 | 39 | Y = self.model.predict(X) 40 | 41 | bbox_[:,:2] = bbox_[:,:2] + bbox_[:,2:]/2 42 | bbox_[:,:2] = Y[:,:2] * bbox_[:,2:] + bbox_[:,:2] 43 | bbox_[:,2:] = np.exp(Y[:,2:]) * bbox_[:,2:] 44 | bbox_[:,:2] = bbox_[:,:2] - bbox_[:,2:]/2 45 | 46 | r = overlap_ratio(bbox, bbox_) 47 | s = np.prod(bbox[:,2:], axis=1) / np.prod(bbox_[:,2:], axis=1) 48 | idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ 49 | (s >= self.scale_range[0]) * (s <= self.scale_range[1]) 50 | idx = np.logical_not(idx) 51 | bbox_[idx] = bbox[idx] 52 | 53 | bbox_[:,:2] = np.maximum(bbox_[:,:2], 0) 54 | bbox_[:,2:] = np.minimum(bbox_[:,2:], self.img_size - bbox[:,:2]) 55 | 56 | return bbox_ 57 | 58 | def get_examples(self, bbox, gt): 59 | bbox[:,:2] = bbox[:,:2] + bbox[:,2:]/2 60 | gt[:,:2] = gt[:,:2] + gt[:,2:]/2 61 | 62 | dst_xy = (gt[:,:2] - bbox[:,:2]) / bbox[:,2:] 63 | dst_wh = np.log(gt[:,2:] / bbox[:,2:]) 64 | 65 | Y = np.concatenate((dst_xy, dst_wh), axis=1) 66 | return Y 67 | 68 | -------------------------------------------------------------------------------- /modules/data_prov.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | from PIL import Image 4 | 5 | import torch 6 | import torch.utils.data as data 7 | import matplotlib.pyplot as plt 8 | from utils import * 9 | 10 | import matplotlib.patches as patches 11 | 12 | import os 13 | from sample_generator import * 14 | 15 | import sys 16 | from pretrain_options import * 17 | 18 | from img_cropper import * 19 | 20 | 21 | 22 | class RegionDataset(data.Dataset): 23 | def __init__(self, img_dir, img_list, gt, receptive_field, opts): 24 | 25 | self.img_list = np.array([os.path.join(img_dir, img) for img in img_list]) 26 | self.gt = gt 27 | 28 | self.batch_frames = pretrain_opts['batch_frames'] 29 | self.batch_pos = pretrain_opts['batch_pos'] 30 | self.batch_neg = pretrain_opts['batch_neg'] 31 | 32 | self.overlap_pos = pretrain_opts['overlap_pos'] 33 | self.overlap_neg = pretrain_opts['overlap_neg'] 34 | 35 | 36 | self.crop_size = pretrain_opts['img_size'] 37 | self.padding = pretrain_opts['padding'] 38 | 39 | self.index = np.random.permutation(len(self.img_list)) 40 | self.pointer = 0 41 | 42 | image = Image.open(self.img_list[0]).convert('RGB') 43 | self.scene_generator = SampleGenerator('gaussian', image.size,trans_f=1.5, scale_f=1.2,valid=True) 44 | self.pos_generator = SampleGenerator('gaussian', image.size, 0.1, 1.2, 1.1, True) 45 | self.neg_generator = SampleGenerator('uniform', image.size, 1, 1.2, 1.1, True) 46 | 47 | self.receptive_field = receptive_field 48 | 49 | self.interval = pretrain_opts['frame_interval'] 50 | self.img_crop_model = imgCropper(pretrain_opts['padded_img_size']) 51 | self.img_crop_model.eval() 52 | if pretrain_opts['use_gpu']: 53 | self.img_crop_model.gpuEnable() 54 | 55 | def __iter__(self): 56 | return self 57 | 58 | def __next__(self): 59 | 60 | next_pointer = min(self.pointer + self.batch_frames, len(self.img_list)) 61 | idx = self.index[self.pointer:next_pointer] 62 | if len(idx) < self.batch_frames: 63 | self.index = np.random.permutation(len(self.img_list)) 64 | next_pointer = self.batch_frames - len(idx) 65 | idx = np.concatenate((idx, self.index[:next_pointer])) 66 | self.pointer = next_pointer 67 | 68 | 69 | n_pos = self.batch_pos 70 | n_neg = self.batch_neg 71 | 72 | scenes = [] 73 | for i, (img_path, bbox) in enumerate(zip(self.img_list[idx], self.gt[idx])): 74 | image = Image.open(img_path).convert('RGB') 75 | #plt.figure("test") 76 | #plt.imshow(image) 77 | #plt.show() 78 | #plt.close() 79 | image = np.asarray(image) 80 | 81 | ishape = image.shape 82 | pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2, 1.1, False), bbox, n_pos, overlap_range=self.overlap_pos) 83 | neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 1.2, 1.1, False), bbox, n_neg, overlap_range=self.overlap_neg) 84 | 85 | # compute padded sample 86 | padded_x1 = (neg_examples[:, 0]-neg_examples[:,2]*(pretrain_opts['padding']-1.)/2.).min() 87 | padded_y1 = (neg_examples[:, 1]-neg_examples[:,3]*(pretrain_opts['padding']-1.)/2.).min() 88 | padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2]*(pretrain_opts['padding']+1.)/2.).max() 89 | padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3]*(pretrain_opts['padding']+1.)/2.).max() 90 | padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) 91 | 92 | jitter_scale = 1.1 ** np.clip(3.*np.random.randn(1,1),-2,2) 93 | crop_img_size = (padded_scene_box[2:4] * ((pretrain_opts['img_size'], pretrain_opts['img_size']) / bbox[2:4])).astype('int64') * jitter_scale[0][0] 94 | cropped_image, cur_image_var = self.img_crop_model.crop_image(image, np.reshape(padded_scene_box, (1, 4)), crop_img_size) 95 | cropped_image = cropped_image - 128. 96 | if pretrain_opts['use_gpu']: 97 | cropped_image = cropped_image.data.cpu() 98 | cur_image_var = cur_image_var.cpu() 99 | scenes.append(cropped_image) 100 | ## get current frame and heatmap 101 | 102 | rel_bbox = np.copy(bbox) 103 | rel_bbox[0:2] -= padded_scene_box[0:2] 104 | 105 | jittered_obj_size = jitter_scale[0][0]*float(pretrain_opts['img_size']) 106 | 107 | batch_num = np.zeros((pos_examples.shape[0], 1)) 108 | pos_rois = np.copy(pos_examples) 109 | pos_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), pos_rois.shape[0], axis=0) 110 | pos_rois = samples2maskroi(pos_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding']) 111 | pos_rois = np.concatenate((batch_num, pos_rois), axis=1) 112 | 113 | batch_num = np.zeros((neg_examples.shape[0], 1)) 114 | neg_rois = np.copy(neg_examples) 115 | neg_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), neg_rois.shape[0], axis=0) 116 | neg_rois = samples2maskroi(neg_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding']) 117 | neg_rois = np.concatenate((batch_num, neg_rois), axis=1) 118 | 119 | if i==0: 120 | total_pos_rois = [torch.from_numpy(np.copy(pos_rois).astype('float32'))] 121 | total_neg_rois = [torch.from_numpy(np.copy(neg_rois).astype('float32'))] 122 | else: 123 | total_pos_rois.append(torch.from_numpy(np.copy(pos_rois).astype('float32'))) 124 | total_neg_rois.append(torch.from_numpy(np.copy(neg_rois).astype('float32'))) 125 | 126 | return scenes,total_pos_rois, total_neg_rois 127 | 128 | next = __next__ 129 | 130 | def extract_regions(self, image, samples): 131 | regions = np.zeros((len(samples), self.crop_size, self.crop_size, 3), dtype='uint8') 132 | for i, sample in enumerate(samples): 133 | regions[i] = crop_image(image, sample, self.crop_size, self.padding, True) 134 | 135 | regions = regions.transpose(0, 3, 1, 2) 136 | regions = regions.astype('float32') - 128. 137 | return regions 138 | 139 | 140 | class RegionExtractor(): 141 | def __init__(self, image, samples, crop_size, padding, batch_size, shuffle=False): 142 | 143 | self.image = np.asarray(image) 144 | self.samples = samples 145 | self.crop_size = crop_size 146 | self.padding = padding 147 | self.batch_size = batch_size 148 | self.shuffle = shuffle 149 | 150 | self.index = np.arange(len(samples)) 151 | self.pointer = 0 152 | 153 | self.mean = self.image.mean(0).mean(0).astype('float32') 154 | 155 | def __iter__(self): 156 | return self 157 | 158 | def __next__(self): 159 | if self.pointer == len(self.samples): 160 | self.pointer = 0 161 | raise StopIteration 162 | else: 163 | next_pointer = min(self.pointer + self.batch_size, len(self.samples)) 164 | index = self.index[self.pointer:next_pointer] 165 | self.pointer = next_pointer 166 | 167 | regions = self.extract_regions(index) 168 | regions = torch.from_numpy(regions) 169 | return regions 170 | next = __next__ 171 | 172 | def extract_regions(self, index): 173 | regions = np.zeros((len(index),self.crop_size,self.crop_size,3),dtype='uint8') 174 | for i, sample in enumerate(self.samples[index]): 175 | regions[i] = crop_image(self.image, sample, self.crop_size, self.padding) 176 | 177 | regions = regions.transpose(0,3,1,2).astype('float32') 178 | regions = regions - 128. 179 | return regions 180 | -------------------------------------------------------------------------------- /modules/img_cropper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0,'./modules') 3 | from roi_align.modules.roi_align import RoIAlign 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import torch 8 | import numpy as np 9 | 10 | import time 11 | 12 | import matplotlib.pyplot as plt 13 | import matplotlib.patches as patches 14 | 15 | class imgCropper(nn.Module): 16 | def __init__(self, img_size): 17 | super(imgCropper, self).__init__() 18 | self.isCuda = False 19 | self.img_size = img_size 20 | self.roi_align_model = RoIAlign(img_size,img_size, 1. ) 21 | 22 | def gpuEnable(self): 23 | self.roi_align_model = self.roi_align_model.cuda() 24 | self.isCuda = True 25 | 26 | def forward(self, image, roi): 27 | aligned_image_var = self.roi_align_model(image, roi) 28 | return aligned_image_var 29 | 30 | def crop_image(self,image, box, result_size): 31 | ## constraint = several box from common 1 image 32 | ishape = image.shape 33 | cur_image_var = np.reshape(image, (1, ishape[0], ishape[1], ishape[2])) 34 | cur_image_var = cur_image_var.transpose(0, 3, 1, 2) 35 | cur_image_var = cur_image_var.astype('float32') 36 | cur_image_var = Variable(torch.from_numpy(cur_image_var).float()) 37 | 38 | 39 | roi = np.copy(box) 40 | roi[:,2:4] += roi[:,0:2] 41 | roi = np.concatenate((np.zeros((roi.shape[0], 1)), roi), axis=1) 42 | roi = Variable(torch.from_numpy(roi).float()) 43 | 44 | if self.isCuda: 45 | cur_image_var = cur_image_var.cuda() 46 | roi = roi.cuda() 47 | 48 | self.roi_align_model.aligned_width = result_size[0] 49 | self.roi_align_model.aligned_height = result_size[1] 50 | cropped_image = self.forward(cur_image_var, roi) 51 | 52 | return cropped_image, cur_image_var 53 | 54 | def crop_several_image(self,img_list,target_list): 55 | ## constraint = one to one matching between image and target 56 | ## exception handling 57 | assert(len(target_list) == len(img_list)) 58 | 59 | ## image crop 60 | torch.cuda.synchronize() 61 | start_time = time.time() 62 | cur_images = torch.squeeze(torch.stack(img_list, 0)) 63 | torch.cuda.synchronize() 64 | print ('10 image stacking time:{}'.format(time.time() - start_time)) 65 | 66 | ishape = cur_images.size() 67 | 68 | # Extract sample features and get target location 69 | sample_rois = np.array(target_list) 70 | sample_rois[:,2:4] += sample_rois[:,0:2] 71 | batch_num = np.reshape(np.arange(0,len(sample_rois)),(len(sample_rois),1)) 72 | sample_rois = np.concatenate( (batch_num, sample_rois), axis=1) 73 | sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))) 74 | if self.isCuda: 75 | sample_rois = sample_rois.cuda() 76 | cur_images = cur_images.cuda() 77 | 78 | cropped_images = self.forward(cur_images, sample_rois) 79 | 80 | 81 | return cropped_images 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /modules/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.io 3 | import numpy as np 4 | from collections import OrderedDict 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | import torch 10 | 11 | import time 12 | 13 | import sys 14 | sys.path.insert(0,'./roi_align') 15 | from roi_align.modules.roi_align import RoIAlignAvg,RoIAlignMax 16 | 17 | def append_params(params, module, prefix): 18 | for child in module.children(): 19 | for k,p in child._parameters.items(): 20 | if p is None: continue 21 | 22 | if isinstance(child, nn.BatchNorm2d): 23 | name = prefix + '_bn_' + k 24 | else: 25 | name = prefix + '_' + k 26 | 27 | if name not in params: 28 | params[name] = p 29 | else: 30 | raise RuntimeError("Duplicated param name: %s" % (name)) 31 | 32 | class LRN(nn.Module): 33 | def __init__(self, local_size=1, alpha=0.0001, beta=0.75, ACROSS_CHANNELS=False): 34 | super(LRN, self).__init__() 35 | self.ACROSS_CHANNELS = ACROSS_CHANNELS 36 | if self.ACROSS_CHANNELS: 37 | self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1), 38 | stride=1, 39 | padding=(int((local_size - 1.0) / 2), 0, 0)) 40 | else: 41 | self.average = nn.AvgPool2d(kernel_size=local_size, 42 | stride=1, 43 | padding=int((local_size - 1.0) / 2)) 44 | self.alpha = alpha 45 | self.beta = beta 46 | 47 | def forward(self, x): 48 | if self.ACROSS_CHANNELS: 49 | div = x.pow(2).unsqueeze(1) 50 | div = self.average(div).squeeze(1) 51 | div = div.mul(self.alpha).add(2.0).pow(self.beta) 52 | else: 53 | div = x.pow(2) 54 | div = self.average(div) 55 | div = div.mul(self.alpha).add(2.0).pow(self.beta) 56 | x = x.div(div) 57 | return x 58 | 59 | 60 | class MDNet(nn.Module): 61 | def __init__(self, model_path=None,K=1): 62 | super(MDNet, self).__init__() 63 | self.K = K 64 | self.layers = nn.Sequential(OrderedDict([ 65 | ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2), 66 | nn.ReLU(), 67 | LRN(), 68 | nn.MaxPool2d(kernel_size=3, stride=2) 69 | )), 70 | ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2,dilation=1), 71 | nn.ReLU(), 72 | LRN(), 73 | )), 74 | 75 | ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1,dilation=3), 76 | nn.ReLU(), 77 | )), 78 | ('fc4', nn.Sequential( 79 | nn.Linear(512 * 3 * 3, 512), 80 | nn.ReLU())), 81 | ('fc5', nn.Sequential(nn.Dropout(0.5), 82 | nn.Linear(512, 512), 83 | nn.ReLU()))])) 84 | 85 | self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5), 86 | nn.Linear(512, 2)) for _ in range(K)]) 87 | 88 | self.roi_align_model = RoIAlignMax(3, 3, 1. / 8) 89 | 90 | self.receptive_field = 75. # it is receptive fieald that a element of feat_map covers. feat_map is bottom layer of ROI_align_layer 91 | 92 | if model_path is not None: 93 | if os.path.splitext(model_path)[1] == '.pth': 94 | self.load_model(model_path) 95 | elif os.path.splitext(model_path)[1] == '.mat': 96 | self.load_mat_model(model_path) 97 | else: 98 | raise RuntimeError("Unkown model format: %s" % (model_path)) 99 | self.build_param_dict() 100 | 101 | def build_param_dict(self): 102 | self.params = OrderedDict() 103 | for name, module in self.layers.named_children(): 104 | append_params(self.params, module, name) 105 | for k, module in enumerate(self.branches): 106 | append_params(self.params, module, 'fc6_%d'%(k)) 107 | 108 | def set_learnable_params(self, layers): 109 | for k, p in self.params.items(): 110 | if any([k.startswith(l) for l in layers]): 111 | p.requires_grad = True 112 | else: 113 | p.requires_grad = False 114 | 115 | 116 | def get_learnable_params(self): 117 | params = OrderedDict() 118 | for k, p in self.params.items(): 119 | if p.requires_grad: 120 | params[k] = p 121 | return params 122 | 123 | def forward(self, x, k=0, in_layer='conv1', out_layer='fc6'): 124 | 125 | run = False 126 | for name, module in self.layers.named_children(): 127 | if name == in_layer: 128 | run = True 129 | if run: 130 | x = module(x) 131 | if name == out_layer: 132 | return x 133 | 134 | 135 | x = self.branches[k](x) 136 | if out_layer=='fc6': 137 | return x 138 | elif out_layer=='fc6_softmax': 139 | return F.softmax(x) 140 | 141 | def forward_owm(self, x, k=0, in_layer='conv1', out_layer='fc6'): 142 | 143 | run = False 144 | h_list = [] 145 | for name, module in self.layers.named_children(): 146 | if name == in_layer: 147 | run = True 148 | if run: 149 | #print x.size(0) 150 | #print torch.cat((x.view(x.size(0), -1), torch.ones(1, 1).repeat(x.size(0), 1).cuda()), 1) 151 | h_list.append(torch.mean(torch.cat((x.view(x.size(0), -1), torch.ones(1, 1).repeat(x.size(0),1).cuda()), 1), 0, True)) 152 | x = module(x) 153 | if name == out_layer: 154 | return x 155 | h_list.append(torch.mean(torch.cat((x.view(x.size(0), -1), torch.ones(1, 1).repeat(x.size(0), 1).cuda()), 1), 0, True)) 156 | x = self.branches[k](x) 157 | if out_layer=='fc6': 158 | return x, h_list 159 | elif out_layer=='fc6_softmax': 160 | return F.softmax(x) 161 | 162 | def load_model(self, model_path): 163 | states = torch.load(model_path) 164 | shared_layers = states['shared_layers'] 165 | self.layers.load_state_dict(shared_layers) 166 | 167 | def load_mat_model(self, matfile): 168 | mat = scipy.io.loadmat(matfile) 169 | mat_layers = list(mat['layers'])[0] 170 | 171 | # copy conv weights 172 | for i in range(3): 173 | weight, bias = mat_layers[i*4]['weights'].item()[0] 174 | self.layers[i][0].weight.data = torch.from_numpy(np.transpose(weight, (3,2,0,1))) 175 | self.layers[i][0].bias.data = torch.from_numpy(bias[:,0]) 176 | 177 | def trainSpatialTransform(self, image, bb): 178 | 179 | return 180 | 181 | 182 | class BinaryLoss(nn.Module): 183 | def __init__(self): 184 | super(BinaryLoss, self).__init__() 185 | 186 | def forward(self, pos_score, neg_score): 187 | pos_loss = -F.log_softmax(pos_score)[:,1] 188 | neg_loss = -F.log_softmax(neg_score)[:,0] 189 | 190 | loss = (pos_loss.sum() + neg_loss.sum())/(pos_loss.size(0) + neg_loss.size(0)) 191 | return loss 192 | 193 | 194 | class Accuracy(): 195 | def __call__(self, pos_score, neg_score): 196 | 197 | pos_correct = (pos_score[:,1] > pos_score[:,0]).sum().float() 198 | neg_correct = (neg_score[:,1] < neg_score[:,0]).sum().float() 199 | 200 | pos_acc = pos_correct / (pos_score.size(0) + 1e-8) 201 | neg_acc = neg_correct / (neg_score.size(0) + 1e-8) 202 | 203 | return pos_acc.item(), neg_acc.item() 204 | 205 | 206 | class Precision(): 207 | def __call__(self, pos_score, neg_score): 208 | 209 | scores = torch.cat((pos_score[:,1], neg_score[:,1]), 0) 210 | topk = torch.topk(scores, pos_score.size(0))[1] 211 | prec = (topk < pos_score.size(0)).float().sum() / (pos_score.size(0)+1e-8) 212 | 213 | return prec.item() 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /modules/prepro_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | from collections import OrderedDict 5 | 6 | 7 | 8 | # seq_home = '../dataset/' 9 | seqlist_path = '../vot-otb.txt' 10 | output_path = 'data/vot-otb.pkl' 11 | set_type = 'VOT' 12 | seq_home = '/home/ilchae/dataset/tracking/'+set_type +'/' 13 | 14 | if set_type=='OTB': 15 | seqlist_path = '../otb-vot15.txt' 16 | output_path = '../otb-vot15.pkl' 17 | 18 | if set_type == 'VOT': 19 | seqlist_path = '../vot-otb.txt' 20 | output_path = '../vot-otb.pkl' 21 | 22 | with open(seqlist_path,'r') as fp: 23 | seq_list = fp.read().splitlines() 24 | 25 | data = {} 26 | for i,seqname in enumerate(seq_list): 27 | print(seqname) 28 | if set_type=='OTB': 29 | seq_path = seq_home+seqname 30 | img_list = sorted([p for p in os.listdir(seq_path+'/img') if os.path.splitext(p)[1] == '.jpg']) 31 | 32 | if (seqname == 'Jogging') or (seqname == 'Skating2'): 33 | gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt') 34 | elif seqname == 'Human4' : 35 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',') 36 | elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \ 37 | or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \ 38 | or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \ 39 | or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \ 40 | or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \ 41 | or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \ 42 | or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') : 43 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt') 44 | elif (seqname == 'Diving'): 45 | gt = np.loadtxt(seq_path + '/groundtruth_rect_ilchae.txt', delimiter=',') 46 | else: 47 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 48 | 49 | if (seqname == 'David') or (seqname == 'Football1') or (seqname == 'Freeman3') or (seqname == 'Freeman4'): 50 | continue 51 | 52 | if set_type =='VOT': 53 | img_list = sorted([p for p in os.listdir(seq_home + seqname) if os.path.splitext(p)[1] == '.jpg']) 54 | gt = np.loadtxt(seq_home + seqname + '/groundtruth.txt', delimiter=',') 55 | 56 | if set_type == 'IMAGENET': 57 | img_list = [] 58 | gt = [] 59 | 60 | assert len(img_list) == len(gt), "Lengths do not match!!" 61 | 62 | if gt.shape[1]==8: 63 | x_min = np.min(gt[:,[0,2,4,6]],axis=1)[:,None] 64 | y_min = np.min(gt[:,[1,3,5,7]],axis=1)[:,None] 65 | x_max = np.max(gt[:,[0,2,4,6]],axis=1)[:,None] 66 | y_max = np.max(gt[:,[1,3,5,7]],axis=1)[:,None] 67 | gt = np.concatenate((x_min, y_min, x_max-x_min, y_max-y_min),axis=1) 68 | 69 | data[seqname] = {'images':img_list, 'gt':gt} 70 | 71 | with open(output_path, 'wb') as fp: 72 | pickle.dump(data, fp, -1) 73 | -------------------------------------------------------------------------------- /modules/prepro_data_imagenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | from collections import OrderedDict 5 | 6 | import xml.etree.ElementTree 7 | import xmltodict 8 | import numpy as np 9 | 10 | import matplotlib.pyplot as plt 11 | import matplotlib.patches as patches 12 | from PIL import Image 13 | import time 14 | 15 | output_path = './imagenet_refine.pkl' 16 | 17 | 18 | 19 | seq_home = '/mnt/jgao/jgao/ILSVRC2015/' 20 | train_list = [p for p in os.listdir(seq_home + 'Data/VID/train')] 21 | seq_list = [] 22 | for num, cur_dir in enumerate(train_list): 23 | seq_list += [cur_dir + '/' + p for p in os.listdir(seq_home + 'Data/VID/train/' + cur_dir)] 24 | 25 | fig = plt.figure() 26 | ax = fig.add_subplot(1,1,1) 27 | 28 | data = {} 29 | completeNum = 0 30 | for i,seqname in enumerate(seq_list): 31 | print(seqname) 32 | seq_path = seq_home + 'Data/VID/train/' + seqname 33 | gt_path = seq_home +'Annotations/VID/train/' + seqname 34 | if os.path.isdir(seq_path): 35 | img_list = sorted([p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.JPEG']) 36 | else: 37 | continue 38 | # gt = np.zeros((len(img_list),4)) 39 | enable_gt = [] 40 | enable_img_list = [] 41 | gt_list = sorted([gt_path + '/' + p for p in os.listdir(gt_path) if os.path.splitext(p)[1] == '.xml']) 42 | save_enable = True 43 | for gidx in range(0,len(img_list)): 44 | with open(gt_list[gidx]) as fd: 45 | doc = xmltodict.parse(fd.read()) 46 | try: 47 | try: 48 | object =doc['annotation']['object'][0] 49 | except: 50 | object = doc['annotation']['object'] 51 | except: 52 | ## no object, occlusion and hidden etc. 53 | continue 54 | 55 | if (int(object['trackid']) is not 0): 56 | continue 57 | 58 | xmin = float(object['bndbox']['xmin']) 59 | xmax = float(object['bndbox']['xmax']) 60 | ymin = float(object['bndbox']['ymin']) 61 | ymax = float(object['bndbox']['ymax']) 62 | 63 | ## discard too big object 64 | if ((float(doc['annotation']['size']['width'])/2.) < (xmax-xmin) ) and ((float(doc['annotation']['size']['height'])/2.) < (ymax-ymin) ): 65 | continue 66 | 67 | # gt[gidx,0] = xmin 68 | # gt[gidx,1] = ymin 69 | # gt[gidx,2] = xmax - xmin 70 | # gt[gidx,3] = ymax - ymin 71 | 72 | cur_gt = np.zeros((4)) 73 | cur_gt[0] = xmin 74 | cur_gt[1] = ymin 75 | cur_gt[2] = xmax - xmin 76 | cur_gt[3] = ymax - ymin 77 | enable_gt.append(cur_gt) 78 | 79 | enable_img_list.append(img_list[gidx]) 80 | 81 | if len(enable_img_list) == 0: 82 | save_enable = False 83 | if save_enable: 84 | assert len(enable_img_list) == len(enable_gt), "Lengths do not match!!" 85 | data[seqname] = {'images':enable_img_list, 'gt':np.asarray(enable_gt)} 86 | completeNum += 1 87 | print 'Complete!' 88 | 89 | with open(output_path, 'wb') as fp: 90 | pickle.dump(data, fp, -1) 91 | 92 | print 'complete {} videos'.format(completeNum) 93 | -------------------------------------------------------------------------------- /modules/pretrain_options.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | pretrain_opts = OrderedDict() 4 | pretrain_opts['use_gpu'] = True 5 | 6 | pretrain_opts['init_model_path'] = './models/imagenet-vgg-m.mat' 7 | pretrain_opts['model_path'] = './models/rt_mdnet.pth' 8 | 9 | pretrain_opts['batch_frames'] = 8 10 | pretrain_opts['batch_pos'] = 64 11 | pretrain_opts['batch_neg'] = 196 12 | 13 | pretrain_opts['overlap_pos'] = [0.7, 1] 14 | pretrain_opts['overlap_neg'] = [0, 0.5] 15 | 16 | pretrain_opts['img_size'] = 107 17 | 18 | 19 | pretrain_opts['lr'] = 0.0001 20 | pretrain_opts['w_decay'] = 0.0005 21 | pretrain_opts['momentum'] = 0.9 22 | pretrain_opts['grad_clip'] = 10 23 | pretrain_opts['ft_layers'] = ['conv','fc'] 24 | pretrain_opts['lr_mult'] = {'fc':1} 25 | pretrain_opts['n_cycles'] = 1000 26 | 27 | 28 | ##################################### from RCNN ############################################# 29 | pretrain_opts['padding'] = 1.2 30 | pretrain_opts['padding_ratio']=5. 31 | pretrain_opts['padded_img_size'] = pretrain_opts['img_size']*int(pretrain_opts['padding_ratio']) 32 | pretrain_opts['frame_interval'] = 2 33 | -------------------------------------------------------------------------------- /modules/roi_align/.setup.py.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/.setup.py.swp -------------------------------------------------------------------------------- /modules/roi_align/Makefile: -------------------------------------------------------------------------------- 1 | all: src/cuda/roi_align.cu.o 2 | python build.py 3 | 4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu 5 | $(MAKE) -C src/cuda 6 | 7 | clean: 8 | $(MAKE) -C src/cuda clean 9 | -------------------------------------------------------------------------------- /modules/roi_align/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/__init__.py -------------------------------------------------------------------------------- /modules/roi_align/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /modules/roi_align/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/__init__.py -------------------------------------------------------------------------------- /modules/roi_align/_ext/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/__init__.pyc -------------------------------------------------------------------------------- /modules/roi_align/_ext/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._roi_align import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /modules/roi_align/_ext/roi_align/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/roi_align/__init__.pyc -------------------------------------------------------------------------------- /modules/roi_align/_ext/roi_align/_roi_align.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/roi_align/_roi_align.so -------------------------------------------------------------------------------- /modules/roi_align/_ext2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/__init__.py -------------------------------------------------------------------------------- /modules/roi_align/_ext2/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/__init__.pyc -------------------------------------------------------------------------------- /modules/roi_align/_ext2/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /modules/roi_align/_ext2/roi_align.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/roi_align.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /modules/roi_align/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | # sources = ['src/roi_align.c'] 7 | # headers = ['src/roi_align.h'] 8 | sources = [] 9 | headers = [] 10 | defines = [] 11 | with_cuda = False 12 | 13 | if torch.cuda.is_available(): 14 | print('Including CUDA code.') 15 | sources += ['src/roi_align_cuda.c'] 16 | headers += ['src/roi_align_cuda.h'] 17 | defines += [('WITH_CUDA', None)] 18 | with_cuda = True 19 | 20 | this_file = os.path.dirname(os.path.realpath(__file__)) 21 | print(this_file) 22 | extra_objects = ['src/cuda/roi_align.cu.o'] 23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 24 | 25 | ffi = create_extension( 26 | '_ext.roi_align', 27 | headers=headers, 28 | sources=sources, 29 | define_macros=defines, 30 | relative_to=__file__, 31 | with_cuda=with_cuda, 32 | extra_objects=extra_objects 33 | ) 34 | 35 | if __name__ == '__main__': 36 | ffi.build() 37 | -------------------------------------------------------------------------------- /modules/roi_align/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__init__.py -------------------------------------------------------------------------------- /modules/roi_align/functions/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__init__.pyc -------------------------------------------------------------------------------- /modules/roi_align/functions/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /modules/roi_align/functions/__pycache__/roi_align.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__pycache__/roi_align.cpython-37.pyc -------------------------------------------------------------------------------- /modules/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext2 import roi_align 4 | 5 | 6 | # TODO use save_for_backward instead 7 | class RoIAlignFunction(Function): 8 | def __init__(self, aligned_height, aligned_width, spatial_scale): 9 | self.aligned_width = int(aligned_width) 10 | self.aligned_height = int(aligned_height) 11 | self.spatial_scale = float(spatial_scale) 12 | self.rois = None 13 | self.feature_size = None 14 | 15 | def forward(self, features, rois): 16 | self.rois = rois 17 | self.feature_size = features.size() 18 | 19 | batch_size, num_channels, data_height, data_width = features.size() 20 | num_rois = rois.size(0) 21 | 22 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 23 | #help(roi_align.roi_align_forward_cuda) 24 | #print(features.type()) 25 | #print(rois.type()) 26 | #print(output.type()) 27 | if features.is_cuda: 28 | success = roi_align.roi_align_forward_cuda(self.aligned_height, 29 | self.aligned_width, 30 | self.spatial_scale, features, 31 | rois, output) 32 | else: 33 | raise NotImplementedError 34 | 35 | return output 36 | 37 | def backward(self, grad_output): 38 | assert(self.feature_size is not None and grad_output.is_cuda) 39 | 40 | batch_size, num_channels, data_height, data_width = self.feature_size 41 | 42 | grad_input = self.rois.new(batch_size, num_channels, data_height, 43 | data_width).zero_() 44 | roi_align.roi_align_backward_cuda(self.aligned_height, 45 | self.aligned_width, 46 | self.spatial_scale, grad_output, 47 | self.rois, grad_input) 48 | 49 | # print grad_input 50 | 51 | return grad_input, None 52 | 53 | 54 | # TODO use save_for_backward instead 55 | class RoIAlignAdaFunction(Function): 56 | def __init__(self, aligned_height, aligned_width, spatial_scale): 57 | self.aligned_width = int(aligned_width) 58 | self.aligned_height = int(aligned_height) 59 | self.spatial_scale = float(spatial_scale) 60 | self.rois = None 61 | self.feature_size = None 62 | 63 | def forward(self, features, rois): 64 | self.rois = rois 65 | self.feature_size = features.size() 66 | 67 | batch_size, num_channels, data_height, data_width = features.size() 68 | num_rois = rois.size(0) 69 | 70 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 71 | if features.is_cuda: 72 | success = roi_align.roi_align_ada_forward_cuda(self.aligned_height, 73 | self.aligned_width, 74 | self.spatial_scale, features, 75 | rois, output) 76 | else: 77 | raise NotImplementedError 78 | 79 | return output 80 | 81 | def backward(self, grad_output): 82 | assert(self.feature_size is not None and grad_output.is_cuda) 83 | 84 | batch_size, num_channels, data_height, data_width = self.feature_size 85 | 86 | grad_input = self.rois.new(batch_size, num_channels, data_height, 87 | data_width).zero_() 88 | roi_align.roi_align_ada_backward_cuda(self.aligned_height, 89 | self.aligned_width, 90 | self.spatial_scale, grad_output, 91 | self.rois, grad_input) 92 | 93 | # print grad_input 94 | 95 | return grad_input, None 96 | 97 | 98 | # TODO use save_for_backward instead 99 | class RoIAlignDenseAdaFunction(Function): 100 | def __init__(self, aligned_height, aligned_width, spatial_scale): 101 | self.aligned_width = int(aligned_width) 102 | self.aligned_height = int(aligned_height) 103 | self.spatial_scale = float(spatial_scale) 104 | self.rois = None 105 | self.feature_size = None 106 | 107 | def forward(self, features, rois): 108 | self.rois = rois 109 | self.feature_size = features.size() 110 | 111 | batch_size, num_channels, data_height, data_width = features.size() 112 | num_rois = rois.size(0) 113 | 114 | output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_() 115 | if features.is_cuda: 116 | success = roi_align.roi_align_dense_ada_forward_cuda(self.aligned_height, 117 | self.aligned_width, 118 | self.spatial_scale, features, 119 | rois, output) 120 | else: 121 | raise NotImplementedError 122 | 123 | return output 124 | 125 | def backward(self, grad_output): 126 | assert(self.feature_size is not None and grad_output.is_cuda) 127 | 128 | batch_size, num_channels, data_height, data_width = self.feature_size 129 | 130 | grad_input = self.rois.new(batch_size, num_channels, data_height, 131 | data_width).zero_() 132 | roi_align.roi_align_dense_ada_backward_cuda(self.aligned_height, 133 | self.aligned_width, 134 | self.spatial_scale, grad_output, 135 | self.rois, grad_input) 136 | 137 | # print grad_input 138 | 139 | return grad_input, None 140 | -------------------------------------------------------------------------------- /modules/roi_align/functions/roi_align.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/roi_align.pyc -------------------------------------------------------------------------------- /modules/roi_align/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__init__.py -------------------------------------------------------------------------------- /modules/roi_align/modules/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__init__.pyc -------------------------------------------------------------------------------- /modules/roi_align/modules/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /modules/roi_align/modules/__pycache__/roi_align.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__pycache__/roi_align.cpython-37.pyc -------------------------------------------------------------------------------- /modules/roi_align/modules/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.nn.functional import avg_pool2d, max_pool2d 3 | from ..functions.roi_align import RoIAlignFunction, RoIAlignAdaFunction, RoIAlignDenseAdaFunction 4 | 5 | import torch 6 | 7 | 8 | class RoIAlign(Module): 9 | def __init__(self, aligned_height, aligned_width, spatial_scale): 10 | super(RoIAlign, self).__init__() 11 | 12 | self.aligned_width = int(aligned_width) 13 | self.aligned_height = int(aligned_height) 14 | self.spatial_scale = float(spatial_scale) 15 | 16 | def forward(self, features, rois): 17 | return RoIAlignFunction(self.aligned_height, self.aligned_width, 18 | self.spatial_scale)(features, rois) 19 | 20 | class RoIAlignAvg(Module): 21 | def __init__(self, aligned_height, aligned_width, spatial_scale): 22 | super(RoIAlignAvg, self).__init__() 23 | 24 | self.aligned_width = int(aligned_width) 25 | self.aligned_height = int(aligned_height) 26 | self.spatial_scale = float(spatial_scale) 27 | 28 | def forward(self, features, rois): 29 | x = RoIAlignFunction(self.aligned_height+1, self.aligned_width+1, 30 | self.spatial_scale)(features, rois) 31 | return avg_pool2d(x, kernel_size=2, stride=1) 32 | 33 | class RoIAlignMax(Module): 34 | def __init__(self, aligned_height, aligned_width, spatial_scale): 35 | super(RoIAlignMax, self).__init__() 36 | 37 | self.aligned_width = int(aligned_width) 38 | self.aligned_height = int(aligned_height) 39 | self.spatial_scale = float(spatial_scale) 40 | 41 | def forward(self, features, rois): 42 | x = RoIAlignFunction(self.aligned_height+4, self.aligned_width+4, 43 | self.spatial_scale)(features, rois) 44 | return max_pool2d(x, kernel_size=3, stride=2) 45 | 46 | 47 | class RoIAlignAdaMax(Module): 48 | def __init__(self, aligned_height, aligned_width, spatial_scale): 49 | super(RoIAlignAdaMax, self).__init__() 50 | 51 | self.aligned_width = int(aligned_width) 52 | self.aligned_height = int(aligned_height) 53 | self.spatial_scale = float(spatial_scale) 54 | 55 | def forward(self, features, rois): 56 | x = RoIAlignAdaFunction(self.aligned_height+4, self.aligned_width+4, 57 | self.spatial_scale)(features, rois) 58 | return max_pool2d(x, kernel_size=3, stride=2) 59 | 60 | 61 | class RoIAlignDenseAdaMax(Module): 62 | def __init__(self, aligned_height, aligned_width, spatial_scale): 63 | super(RoIAlignDenseAdaMax, self).__init__() 64 | 65 | self.aligned_width = int(aligned_width) 66 | self.aligned_height = int(aligned_height) 67 | self.spatial_scale = float(spatial_scale) 68 | 69 | def forward(self, features, rois): 70 | x = RoIAlignDenseAdaFunction(self.aligned_height+4, self.aligned_width+4, 71 | self.spatial_scale)(features, rois) 72 | # x_relu = torch.nn.ReLU()(x) 73 | return max_pool2d(x, kernel_size=3, stride=2) -------------------------------------------------------------------------------- /modules/roi_align/modules/roi_align.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/roi_align.pyc -------------------------------------------------------------------------------- /modules/roi_align/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "src") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | #source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | #source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | #sources = main_file + source_cpu 26 | #extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | #define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | #sources += source_cuda 34 | sources = main_file; 35 | define_macros = [("WITH_CUDA", None)] 36 | with_cuda = True 37 | #extra_compile_args = {"nvcc": [ 38 | # "-DCUDA_HAS_FP16=1", 39 | # "-D__CUDA_NO_HALF_OPERATORS__", 40 | # "-D__CUDA_NO_HALF_CONVERSIONS__", 41 | # "-D__CUDA_NO_HALF2_OPERATORS__", 42 | #]} 43 | 44 | sources = [os.path.join(extensions_dir, s) for s in sources] 45 | 46 | include_dirs = [extensions_dir] 47 | extra_objects = ['src/cuda/roi_align.cu.o'] 48 | extra_objects = [os.path.join(this_dir, fname) for fname in extra_objects] 49 | ext_modules = [ 50 | extension( 51 | "_ext2.roi_align", 52 | sources, 53 | include_dirs=include_dirs, 54 | define_macros=define_macros, 55 | relative_to=__file__, 56 | with_cuda=with_cuda, 57 | extra_compile_args = extra_compile_args, 58 | extra_objects=extra_objects 59 | #extra_compile_args=extra_compile_args, 60 | ) 61 | ] 62 | 63 | return ext_modules 64 | 65 | 66 | setup( 67 | name="RT_MDNet", 68 | version="1.1", 69 | author="amgao", 70 | url="https://github.com/IlchaeJung/RT-MDNet.git", 71 | description="Real time tracking using MDNet in pytorch", 72 | #packages=find_packages(exclude=("configs", "tests",)), 73 | # install_requires=requirements, 74 | ext_modules=get_extensions(), 75 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 76 | ) 77 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak/.roi_align_cuda.cpp.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak/.roi_align_cuda.cpp.swp -------------------------------------------------------------------------------- /modules/roi_align/src.bak/cuda/Makefile: -------------------------------------------------------------------------------- 1 | all: roi_align_kernel.cu roi_align_kernel.h 2 | nvcc -c -o roi_align.cu.o roi_align_kernel.cu -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 3 | 4 | clean: 5 | rm roi_align.cu.o 6 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak/cuda/roi_align.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak/cuda/roi_align.cu.o -------------------------------------------------------------------------------- /modules/roi_align/src.bak/cuda/roi_align_kernel.cu: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" { 3 | #endif 4 | 5 | #include 6 | #include 7 | #include 8 | #include "roi_align_kernel.h" 9 | 10 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 11 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 12 | i += blockDim.x * gridDim.x) 13 | 14 | 15 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) { 16 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 17 | // (n, c, ph, pw) is an element in the aligned output 18 | int n = index; 19 | int pw = n % aligned_width; 20 | n /= aligned_width; 21 | int ph = n % aligned_height; 22 | n /= aligned_height; 23 | int c = n % channels; 24 | n /= channels; 25 | 26 | bottom_rois += n * 5; 27 | float roi_batch_ind = bottom_rois[0]; 28 | float roi_start_w = bottom_rois[1] * spatial_scale; 29 | float roi_start_h = bottom_rois[2] * spatial_scale; 30 | float roi_end_w = bottom_rois[3] * spatial_scale; 31 | float roi_end_h = bottom_rois[4] * spatial_scale; 32 | 33 | // Force malformed ROIs to be 1x1 34 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 35 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 36 | //This is incorrect, because the coordinates for RoI follows the [L,R) convension. That is, (0, 0, 4, 4) denotes a box of size 4by4. 37 | /* original code */ 38 | 39 | float bin_size_h = roi_height / (aligned_height - 1.); 40 | float bin_size_w = roi_width / (aligned_width - 1.); 41 | 42 | float h = (float)(ph) * bin_size_h + roi_start_h; 43 | float w = (float)(pw) * bin_size_w + roi_start_w; 44 | 45 | 46 | /*======================from ilchae========================*/ 47 | /* 48 | float bin_size_h = roi_height / (aligned_height + 1.); 49 | float bin_size_w = roi_width / (aligned_width + 1.); 50 | 51 | float h = (float)(ph+1) * bin_size_h + roi_start_h; 52 | float w = (float)(pw+1) * bin_size_w + roi_start_w; 53 | */ 54 | //////////////////////////////////////////////////////////// 55 | 56 | int hstart = fminf(floor(h), height - 2); 57 | int wstart = fminf(floor(w), width - 2); 58 | 59 | 60 | int img_start = roi_batch_ind * channels * height * width; 61 | 62 | // bilinear interpolation 63 | if (h < 0 || h >= height || w < 0 || w >= width) { 64 | top_data[index] = 0.; 65 | } else { 66 | float h_ratio = h - (float)(hstart); 67 | float w_ratio = w - (float)(wstart); 68 | int upleft = img_start + (c * height + hstart) * width + wstart; 69 | int upright = upleft + 1; 70 | int downleft = upleft + width; 71 | int downright = downleft + 1; 72 | 73 | top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio) 74 | + bottom_data[upright] * (1. - h_ratio) * w_ratio 75 | + bottom_data[downleft] * h_ratio * (1. - w_ratio) 76 | + bottom_data[downright] * h_ratio * w_ratio; 77 | } 78 | } 79 | } 80 | 81 | 82 | int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) { 83 | const int kThreadsPerBlock = 1024; 84 | const int output_size = num_rois * aligned_height * aligned_width * channels; 85 | cudaError_t err; 86 | 87 | 88 | ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, bottom_data, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_rois, top_data); 89 | 90 | err = cudaGetLastError(); 91 | if(cudaSuccess != err) { 92 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 93 | exit( -1 ); 94 | } 95 | 96 | return 1; 97 | } 98 | 99 | 100 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) { 101 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 102 | 103 | // (n, c, ph, pw) is an element in the aligned output 104 | int n = index; 105 | int pw = n % aligned_width; 106 | n /= aligned_width; 107 | int ph = n % aligned_height; 108 | n /= aligned_height; 109 | int c = n % channels; 110 | n /= channels; 111 | 112 | bottom_rois += n * 5; 113 | float roi_batch_ind = bottom_rois[0]; 114 | float roi_start_w = bottom_rois[1] * spatial_scale; 115 | float roi_start_h = bottom_rois[2] * spatial_scale; 116 | float roi_end_w = bottom_rois[3] * spatial_scale; 117 | float roi_end_h = bottom_rois[4] * spatial_scale; 118 | /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */ 119 | /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */ 120 | /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */ 121 | /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */ 122 | 123 | // Force malformed ROIs to be 1x1 124 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 125 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 126 | /* ============ original code =========== */ 127 | 128 | float bin_size_h = roi_height / (aligned_height - 1.); 129 | float bin_size_w = roi_width / (aligned_width - 1.); 130 | 131 | float h = (float)(ph) * bin_size_h + roi_start_h; 132 | float w = (float)(pw) * bin_size_w + roi_start_w; 133 | 134 | 135 | /*======================from ilchae========================*/ 136 | /* 137 | float bin_size_h = roi_height / (aligned_height + 1.); 138 | float bin_size_w = roi_width / (aligned_width + 1.); 139 | 140 | float h = (float)(ph+1) * bin_size_h + roi_start_h; 141 | float w = (float)(pw+1) * bin_size_w + roi_start_w; 142 | */ 143 | //////////////////////////////////////////////////////////// 144 | 145 | 146 | int hstart = fminf(floor(h), height - 2); 147 | int wstart = fminf(floor(w), width - 2); 148 | 149 | int img_start = roi_batch_ind * channels * height * width; 150 | 151 | // bilinear interpolation 152 | if (!(h < 0 || h >= height || w < 0 || w >= width)) { 153 | float h_ratio = h - (float)(hstart); 154 | float w_ratio = w - (float)(wstart); 155 | int upleft = img_start + (c * height + hstart) * width + wstart; 156 | int upright = upleft + 1; 157 | int downleft = upleft + width; 158 | int downright = downleft + 1; 159 | 160 | atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio)); 161 | atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio); 162 | atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio)); 163 | atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio); 164 | } 165 | } 166 | } 167 | 168 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) { 169 | const int kThreadsPerBlock = 1024; 170 | const int output_size = num_rois * aligned_height * aligned_width * channels; 171 | cudaError_t err; 172 | 173 | ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, top_diff, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_diff, bottom_rois); 174 | 175 | err = cudaGetLastError(); 176 | if(cudaSuccess != err) { 177 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 178 | exit( -1 ); 179 | } 180 | 181 | return 1; 182 | } 183 | 184 | __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) { 185 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 186 | // (n, c, ph, pw) is an element in the aligned output 187 | int n = index; 188 | int pw = n % aligned_width; 189 | n /= aligned_width; 190 | int ph = n % aligned_height; 191 | n /= aligned_height; 192 | int c = n % channels; 193 | n /= channels; 194 | 195 | bottom_rois += n * 5; 196 | float roi_batch_ind = bottom_rois[0]; 197 | float roi_start_w = bottom_rois[1] * spatial_scale; 198 | float roi_start_h = bottom_rois[2] * spatial_scale; 199 | float roi_end_w = bottom_rois[3] * spatial_scale; 200 | float roi_end_h = bottom_rois[4] * spatial_scale; 201 | 202 | // Force malformed ROIs to be 1x1 203 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 204 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 205 | //This is incorrect, because the coordinates for RoI follows the [L,R) convension. That is, (0, 0, 4, 4) denotes a box of size 4by4. 206 | 207 | /* original code */ 208 | 209 | float bin_size_h = roi_height / (float)(aligned_height); 210 | float bin_size_w = roi_width / (float)(aligned_width); 211 | 212 | int stride_w = fmaxf(1,round(bin_size_w)); 213 | int stride_h = fmaxf(1,round(bin_size_h)); 214 | 215 | 216 | float h = (float)(ph) * bin_size_h + roi_start_h; // this is right in geometically 217 | float w = (float)(pw) * bin_size_w + roi_start_w; // this is right in geometically 218 | 219 | 220 | 221 | 222 | int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2); 223 | int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2); 224 | 225 | int img_start = roi_batch_ind * channels * height * width; 226 | 227 | // bilinear interpolation 228 | if (h < 0 || h >= height || w < 0 || w >= width) { 229 | top_data[index] = 0.; 230 | } else { 231 | for(int hidx=0; hidx<=stride_h; hidx+=stride_h){ 232 | for(int widx=0; widx<=stride_w; widx+=stride_w){ 233 | if( ((widx+wstart)>=0) && ((widx+wstart)=0) && ((hidx+hstart)>>(output_size, bottom_data, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_rois, top_data); 254 | 255 | err = cudaGetLastError(); 256 | if(cudaSuccess != err) { 257 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 258 | exit( -1 ); 259 | } 260 | 261 | return 1; 262 | } 263 | 264 | 265 | __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) { 266 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 267 | 268 | // (n, c, ph, pw) is an element in the aligned output 269 | int n = index; 270 | int pw = n % aligned_width; 271 | n /= aligned_width; 272 | int ph = n % aligned_height; 273 | n /= aligned_height; 274 | int c = n % channels; 275 | n /= channels; 276 | 277 | bottom_rois += n * 5; 278 | float roi_batch_ind = bottom_rois[0]; 279 | float roi_start_w = bottom_rois[1] * spatial_scale; 280 | float roi_start_h = bottom_rois[2] * spatial_scale; 281 | float roi_end_w = bottom_rois[3] * spatial_scale; 282 | float roi_end_h = bottom_rois[4] * spatial_scale; 283 | 284 | // Force malformed ROIs to be 1x1 285 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 286 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 287 | /* ============ original code =========== */ 288 | 289 | float bin_size_h = roi_height / (float)(aligned_height); 290 | float bin_size_w = roi_width / (float)(aligned_width); 291 | 292 | int stride_w = fmaxf(1,round(bin_size_w)); 293 | int stride_h = fmaxf(1,round(bin_size_h)); 294 | 295 | float h = (float)(ph) * bin_size_h + roi_start_h; 296 | float w = (float)(pw) * bin_size_w + roi_start_w; 297 | 298 | int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2); 299 | int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2); 300 | 301 | int img_start = roi_batch_ind * channels * height * width; 302 | 303 | // bilinear interpolation 304 | if (!(h < 0 || h >= height || w < 0 || w >= width)) { 305 | for(int hidx=0; hidx<=stride_h; hidx+=stride_h){ 306 | for(int widx=0; widx<=stride_w; widx+=stride_w){ 307 | if( ((hstart+hidx)>=0) && ((hstart+hidx)=0) && ((wstart+widx)>>(output_size, top_diff, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_diff, bottom_rois); 326 | 327 | err = cudaGetLastError(); 328 | if(cudaSuccess != err) { 329 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 330 | exit( -1 ); 331 | } 332 | 333 | return 1; 334 | } 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) { 353 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 354 | // (n, c, ph, pw) is an element in the aligned output 355 | int n = index; 356 | int pw = n % aligned_width; 357 | n /= aligned_width; 358 | int ph = n % aligned_height; 359 | n /= aligned_height; 360 | int c = n % channels; 361 | n /= channels; 362 | 363 | bottom_rois += n * 5; 364 | float roi_batch_ind = bottom_rois[0]; 365 | float roi_start_w = bottom_rois[1] * spatial_scale; 366 | float roi_start_h = bottom_rois[2] * spatial_scale; 367 | float roi_end_w = bottom_rois[3] * spatial_scale; 368 | float roi_end_h = bottom_rois[4] * spatial_scale; 369 | 370 | // Force malformed ROIs to be 1x1 371 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 372 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 373 | //This is incorrect, because the coordinates for RoI follows the [L,R) convension. That is, (0, 0, 4, 4) denotes a box of size 4by4. 374 | /* original code */ 375 | 376 | float bin_size_h = roi_height / (float)(aligned_height); 377 | float bin_size_w = roi_width / (float)(aligned_width); 378 | 379 | int stride_w = fmaxf(1,round(bin_size_w)); 380 | int stride_h = fmaxf(1,round(bin_size_h)); 381 | 382 | 383 | float h = (float)(ph) * bin_size_h + roi_start_h; // this is right in geometically 384 | float w = (float)(pw) * bin_size_w + roi_start_w; // this is right in geometically 385 | 386 | 387 | 388 | 389 | int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2); 390 | int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2); 391 | 392 | int img_start = roi_batch_ind * channels * height * width; 393 | 394 | // bilinear interpolation 395 | if (h < 0 || h >= height || w < 0 || w >= width) { 396 | top_data[index] = 0.; 397 | } else { 398 | 399 | float ratio_sum = 0. ; 400 | for(int hidx=0; hidx<=stride_h; hidx++){ 401 | for(int widx=0; widx<=stride_w; widx++){ 402 | int cur_loc = img_start + (c * height + hstart) * width + wstart + hidx*width + widx; 403 | float h_ratio = 1. - (float)fabsf(h-hstart-hidx)/(float)stride_h; 404 | float w_ratio = 1. - (float)fabsf(w-wstart-widx)/(float)stride_w; 405 | 406 | float ratio = h_ratio * w_ratio; 407 | ratio_sum += ratio; 408 | top_data[index]+=bottom_data[cur_loc]*ratio; 409 | } 410 | } 411 | top_data[index]/=ratio_sum; 412 | } 413 | } 414 | } 415 | 416 | 417 | int ROIAlignDenseAdaForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) { 418 | const int kThreadsPerBlock = 1024; 419 | const int output_size = num_rois * aligned_height * aligned_width * channels; 420 | cudaError_t err; 421 | 422 | 423 | ROIAlignDenseAdaForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, bottom_data, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_rois, top_data); 424 | 425 | err = cudaGetLastError(); 426 | if(cudaSuccess != err) { 427 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 428 | exit( -1 ); 429 | } 430 | 431 | return 1; 432 | } 433 | 434 | 435 | __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) { 436 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 437 | 438 | // (n, c, ph, pw) is an element in the aligned output 439 | int n = index; 440 | int pw = n % aligned_width; 441 | n /= aligned_width; 442 | int ph = n % aligned_height; 443 | n /= aligned_height; 444 | int c = n % channels; 445 | n /= channels; 446 | 447 | bottom_rois += n * 5; 448 | float roi_batch_ind = bottom_rois[0]; 449 | float roi_start_w = bottom_rois[1] * spatial_scale; 450 | float roi_start_h = bottom_rois[2] * spatial_scale; 451 | float roi_end_w = bottom_rois[3] * spatial_scale; 452 | float roi_end_h = bottom_rois[4] * spatial_scale; 453 | 454 | // Force malformed ROIs to be 1x1 455 | float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.); 456 | float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.); 457 | /* ============ original code =========== */ 458 | 459 | float bin_size_h = roi_height / (float)(aligned_height); 460 | float bin_size_w = roi_width / (float)(aligned_width); 461 | 462 | int stride_w = fmaxf(1,round(bin_size_w)); 463 | int stride_h = fmaxf(1,round(bin_size_h)); 464 | 465 | float h = (float)(ph) * bin_size_h + roi_start_h; 466 | float w = (float)(pw) * bin_size_w + roi_start_w; 467 | 468 | int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2); 469 | int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2); 470 | 471 | int img_start = roi_batch_ind * channels * height * width; 472 | 473 | // bilinear interpolation 474 | if (!(h < 0 || h >= height || w < 0 || w >= width)) { 475 | for(int hidx=0; hidx<=stride_h; hidx+=stride_h){ 476 | for(int widx=0; widx<=stride_w; widx+=stride_w){ 477 | int cur_loc = img_start + (c * height + hstart) * width + wstart + hidx*width + widx; 478 | 479 | //float h_ratio = 1. - (float)fabsf(h-hstart-hidx)/(float)(stride_h); 480 | //float w_ratio = 1. - (float)fabsf(w-wstart-widx)/(float)(stride_w); 481 | 482 | float ratio = 1. / (2.505*(stride_h+1.)*(stride_w+1.)) * expf( -0.5*(powf((h-hstart-hidx)/(float)stride_h,2.) + powf( (w-wstart-widx)/(float)stride_w, 2.)) ) ; 483 | 484 | atomicAdd(bottom_diff + cur_loc, top_diff[index]*ratio); 485 | } 486 | } 487 | } 488 | } 489 | } 490 | 491 | int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) { 492 | const int kThreadsPerBlock = 1024; 493 | const int output_size = num_rois * aligned_height * aligned_width * channels; 494 | cudaError_t err; 495 | 496 | ROIAlignDenseAdaBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, top_diff, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_diff, bottom_rois); 497 | 498 | err = cudaGetLastError(); 499 | if(cudaSuccess != err) { 500 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 501 | exit( -1 ); 502 | } 503 | 504 | return 1; 505 | } 506 | 507 | 508 | 509 | #ifdef __cplusplus 510 | } 511 | #endif 512 | 513 | 514 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak/cuda/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | 30 | __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data, 31 | const float spatial_scale, const int height, const int width, 32 | const int channels, const int aligned_height, const int aligned_width, 33 | const float* bottom_rois, float* top_data); 34 | 35 | int ROIAlignAdaForwardLaucher( 36 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 37 | const int width, const int channels, const int aligned_height, 38 | const int aligned_width, const float* bottom_rois, 39 | float* top_data, cudaStream_t stream); 40 | 41 | __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff, 42 | const float spatial_scale, const int height, const int width, 43 | const int channels, const int aligned_height, const int aligned_width, 44 | float* bottom_diff, const float* bottom_rois); 45 | 46 | int ROIAlignAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 47 | const int height, const int width, const int channels, const int aligned_height, 48 | const int aligned_width, const float* bottom_rois, 49 | float* bottom_diff, cudaStream_t stream); 50 | 51 | 52 | __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data, 53 | const float spatial_scale, const int height, const int width, 54 | const int channels, const int aligned_height, const int aligned_width, 55 | const float* bottom_rois, float* top_data); 56 | 57 | int ROIAlignDenseAdaForwardLaucher( 58 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 59 | const int width, const int channels, const int aligned_height, 60 | const int aligned_width, const float* bottom_rois, 61 | float* top_data, cudaStream_t stream); 62 | 63 | __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff, 64 | const float spatial_scale, const int height, const int width, 65 | const int channels, const int aligned_height, const int aligned_width, 66 | float* bottom_diff, const float* bottom_rois); 67 | 68 | int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 69 | const int height, const int width, const int channels, const int aligned_height, 70 | const int aligned_width, const float* bottom_rois, 71 | float* bottom_diff, cudaStream_t stream); 72 | 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | 78 | #endif 79 | 80 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // batch size 25 | //int batch_size = THCudaTensor_size(state, features, 0); 26 | //if (batch_size != 1) 27 | //{ 28 | // return 0; 29 | //} 30 | 31 | 32 | // data height 33 | int data_height = THCudaTensor_size(state, features, 2); 34 | // data width 35 | int data_width = THCudaTensor_size(state, features, 3); 36 | // Number of channels 37 | int num_channels = THCudaTensor_size(state, features, 1); 38 | 39 | cudaStream_t stream = THCState_getCurrentStream(state); 40 | 41 | ROIAlignForwardLaucher( 42 | data_flat, spatial_scale, num_rois, data_height, 43 | data_width, num_channels, aligned_height, 44 | aligned_width, rois_flat, 45 | output_flat, stream); 46 | 47 | return 1; 48 | } 49 | 50 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 51 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 52 | { 53 | // Grab the input tensor 54 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 55 | float * rois_flat = THCudaTensor_data(state, rois); 56 | 57 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | //if (batch_size != 1) 70 | //{ 71 | // return 0; 72 | //} 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIAlignBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, aligned_height, 84 | aligned_width, rois_flat, 85 | bottom_grad_flat, stream); 86 | 87 | return 1; 88 | } 89 | 90 | 91 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 92 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 93 | { 94 | // Grab the input tensor 95 | float * data_flat = THCudaTensor_data(state, features); 96 | float * rois_flat = THCudaTensor_data(state, rois); 97 | 98 | float * output_flat = THCudaTensor_data(state, output); 99 | 100 | // Number of ROIs 101 | int num_rois = THCudaTensor_size(state, rois, 0); 102 | int size_rois = THCudaTensor_size(state, rois, 1); 103 | if (size_rois != 5) 104 | { 105 | return 0; 106 | } 107 | 108 | // batch size 109 | //int batch_size = THCudaTensor_size(state, features, 0); 110 | //if (batch_size != 1) 111 | //{ 112 | // return 0; 113 | //} 114 | 115 | 116 | // data height 117 | int data_height = THCudaTensor_size(state, features, 2); 118 | // data width 119 | int data_width = THCudaTensor_size(state, features, 3); 120 | // Number of channels 121 | int num_channels = THCudaTensor_size(state, features, 1); 122 | 123 | cudaStream_t stream = THCState_getCurrentStream(state); 124 | 125 | ROIAlignAdaForwardLaucher( 126 | data_flat, spatial_scale, num_rois, data_height, 127 | data_width, num_channels, aligned_height, 128 | aligned_width, rois_flat, 129 | output_flat, stream); 130 | 131 | return 1; 132 | } 133 | 134 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 135 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 136 | { 137 | // Grab the input tensor 138 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 139 | float * rois_flat = THCudaTensor_data(state, rois); 140 | 141 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 142 | 143 | // Number of ROIs 144 | int num_rois = THCudaTensor_size(state, rois, 0); 145 | int size_rois = THCudaTensor_size(state, rois, 1); 146 | if (size_rois != 5) 147 | { 148 | return 0; 149 | } 150 | 151 | // batch size 152 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 153 | //if (batch_size != 1) 154 | //{ 155 | // return 0; 156 | //} 157 | // data height 158 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 159 | // data width 160 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 161 | // Number of channels 162 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 163 | 164 | cudaStream_t stream = THCState_getCurrentStream(state); 165 | ROIAlignAdaBackwardLaucher( 166 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 167 | data_width, num_channels, aligned_height, 168 | aligned_width, rois_flat, 169 | bottom_grad_flat, stream); 170 | 171 | return 1; 172 | } 173 | 174 | 175 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 176 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 177 | { 178 | // Grab the input tensor 179 | float * data_flat = THCudaTensor_data(state, features); 180 | float * rois_flat = THCudaTensor_data(state, rois); 181 | 182 | float * output_flat = THCudaTensor_data(state, output); 183 | 184 | // Number of ROIs 185 | int num_rois = THCudaTensor_size(state, rois, 0); 186 | int size_rois = THCudaTensor_size(state, rois, 1); 187 | if (size_rois != 5) 188 | { 189 | return 0; 190 | } 191 | 192 | // batch size 193 | //int batch_size = THCudaTensor_size(state, features, 0); 194 | //if (batch_size != 1) 195 | //{ 196 | // return 0; 197 | //} 198 | 199 | 200 | // data height 201 | int data_height = THCudaTensor_size(state, features, 2); 202 | // data width 203 | int data_width = THCudaTensor_size(state, features, 3); 204 | // Number of channels 205 | int num_channels = THCudaTensor_size(state, features, 1); 206 | 207 | cudaStream_t stream = THCState_getCurrentStream(state); 208 | 209 | ROIAlignDenseAdaForwardLaucher( 210 | data_flat, spatial_scale, num_rois, data_height, 211 | data_width, num_channels, aligned_height, 212 | aligned_width, rois_flat, 213 | output_flat, stream); 214 | 215 | return 1; 216 | } 217 | 218 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 219 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 220 | { 221 | // Grab the input tensor 222 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 223 | float * rois_flat = THCudaTensor_data(state, rois); 224 | 225 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 226 | 227 | // Number of ROIs 228 | int num_rois = THCudaTensor_size(state, rois, 0); 229 | int size_rois = THCudaTensor_size(state, rois, 1); 230 | if (size_rois != 5) 231 | { 232 | return 0; 233 | } 234 | 235 | // batch size 236 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 237 | //if (batch_size != 1) 238 | //{ 239 | // return 0; 240 | //} 241 | // data height 242 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 243 | // data width 244 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 245 | // Number of channels 246 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 247 | 248 | cudaStream_t stream = THCState_getCurrentStream(state); 249 | ROIAlignDenseAdaBackwardLaucher( 250 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 251 | data_width, num_channels, aligned_height, 252 | aligned_width, rois_flat, 253 | bottom_grad_flat, stream); 254 | 255 | return 1; 256 | } 257 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | //#include 2 | //#include 3 | #include "roi_align_cuda.hpp" 4 | #include 5 | #include "cuda/roi_align_kernel.h" 6 | //#include 7 | 8 | //THC_CLASS at::Context& at::globalContext(); 9 | //THCState *state = at::globalContext().getTHCState(); 10 | //ATen_CLASS at::Context& at::globalContext(); 11 | //THCState *state = at::globalContext().thc_state; 12 | 13 | extern "C" int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 14 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 15 | { 16 | // Grab the input tensor 17 | float * data_flat = THCudaTensor_data(state, features); 18 | float * rois_flat = THCudaTensor_data(state, rois); 19 | 20 | float * output_flat = THCudaTensor_data(state, output); 21 | 22 | // Number of ROIs 23 | int num_rois = THCudaTensor_size(state, rois, 0); 24 | int size_rois = THCudaTensor_size(state, rois, 1); 25 | if (size_rois != 5) 26 | { 27 | return 0; 28 | } 29 | 30 | // batch size 31 | //int batch_size = THCudaTensor_size(state, features, 0); 32 | //if (batch_size != 1) 33 | //{ 34 | // return 0; 35 | //} 36 | 37 | 38 | // data height 39 | int data_height = THCudaTensor_size(state, features, 2); 40 | // data width 41 | int data_width = THCudaTensor_size(state, features, 3); 42 | // Number of channels 43 | int num_channels = THCudaTensor_size(state, features, 1); 44 | 45 | cudaStream_t stream = THCState_getCurrentStream(state); 46 | 47 | ROIAlignForwardLaucher( 48 | data_flat, spatial_scale, num_rois, data_height, 49 | data_width, num_channels, aligned_height, 50 | aligned_width, rois_flat, 51 | output_flat, stream); 52 | 53 | return 1; 54 | } 55 | 56 | extern "C" int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 57 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 58 | { 59 | // Grab the input tensor 60 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 61 | float * rois_flat = THCudaTensor_data(state, rois); 62 | 63 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 64 | 65 | // Number of ROIs 66 | int num_rois = THCudaTensor_size(state, rois, 0); 67 | int size_rois = THCudaTensor_size(state, rois, 1); 68 | if (size_rois != 5) 69 | { 70 | return 0; 71 | } 72 | 73 | // batch size 74 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 75 | //if (batch_size != 1) 76 | //{ 77 | // return 0; 78 | //} 79 | // data height 80 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 81 | // data width 82 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 83 | // Number of channels 84 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 85 | 86 | cudaStream_t stream = THCState_getCurrentStream(state); 87 | ROIAlignBackwardLaucher( 88 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 89 | data_width, num_channels, aligned_height, 90 | aligned_width, rois_flat, 91 | bottom_grad_flat, stream); 92 | 93 | return 1; 94 | } 95 | 96 | 97 | extern "C" int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 98 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 99 | { 100 | // Grab the input tensor 101 | float * data_flat = THCudaTensor_data(state, features); 102 | float * rois_flat = THCudaTensor_data(state, rois); 103 | 104 | float * output_flat = THCudaTensor_data(state, output); 105 | 106 | // Number of ROIs 107 | int num_rois = THCudaTensor_size(state, rois, 0); 108 | int size_rois = THCudaTensor_size(state, rois, 1); 109 | if (size_rois != 5) 110 | { 111 | return 0; 112 | } 113 | 114 | // batch size 115 | //int batch_size = THCudaTensor_size(state, features, 0); 116 | //if (batch_size != 1) 117 | //{ 118 | // return 0; 119 | //} 120 | 121 | 122 | // data height 123 | int data_height = THCudaTensor_size(state, features, 2); 124 | // data width 125 | int data_width = THCudaTensor_size(state, features, 3); 126 | // Number of channels 127 | int num_channels = THCudaTensor_size(state, features, 1); 128 | 129 | cudaStream_t stream = THCState_getCurrentStream(state); 130 | 131 | ROIAlignAdaForwardLaucher( 132 | data_flat, spatial_scale, num_rois, data_height, 133 | data_width, num_channels, aligned_height, 134 | aligned_width, rois_flat, 135 | output_flat, stream); 136 | 137 | return 1; 138 | } 139 | 140 | extern "C" int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 141 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 142 | { 143 | // Grab the input tensor 144 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 145 | float * rois_flat = THCudaTensor_data(state, rois); 146 | 147 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 148 | 149 | // Number of ROIs 150 | int num_rois = THCudaTensor_size(state, rois, 0); 151 | int size_rois = THCudaTensor_size(state, rois, 1); 152 | if (size_rois != 5) 153 | { 154 | return 0; 155 | } 156 | 157 | // batch size 158 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 159 | //if (batch_size != 1) 160 | //{ 161 | // return 0; 162 | //} 163 | // data height 164 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 165 | // data width 166 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 167 | // Number of channels 168 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 169 | 170 | cudaStream_t stream = THCState_getCurrentStream(state); 171 | ROIAlignAdaBackwardLaucher( 172 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 173 | data_width, num_channels, aligned_height, 174 | aligned_width, rois_flat, 175 | bottom_grad_flat, stream); 176 | 177 | return 1; 178 | } 179 | 180 | 181 | extern "C" int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 182 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 183 | { 184 | // Grab the input tensor 185 | float * data_flat = THCudaTensor_data(state, features); 186 | float * rois_flat = THCudaTensor_data(state, rois); 187 | 188 | float * output_flat = THCudaTensor_data(state, output); 189 | 190 | // Number of ROIs 191 | int num_rois = THCudaTensor_size(state, rois, 0); 192 | int size_rois = THCudaTensor_size(state, rois, 1); 193 | if (size_rois != 5) 194 | { 195 | return 0; 196 | } 197 | 198 | // batch size 199 | //int batch_size = THCudaTensor_size(state, features, 0); 200 | //if (batch_size != 1) 201 | //{ 202 | // return 0; 203 | //} 204 | 205 | 206 | // data height 207 | int data_height = THCudaTensor_size(state, features, 2); 208 | // data width 209 | int data_width = THCudaTensor_size(state, features, 3); 210 | // Number of channels 211 | int num_channels = THCudaTensor_size(state, features, 1); 212 | 213 | cudaStream_t stream = THCState_getCurrentStream(state); 214 | 215 | ROIAlignDenseAdaForwardLaucher( 216 | data_flat, spatial_scale, num_rois, data_height, 217 | data_width, num_channels, aligned_height, 218 | aligned_width, rois_flat, 219 | output_flat, stream); 220 | 221 | return 1; 222 | } 223 | 224 | extern "C" int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 225 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 226 | { 227 | // Grab the input tensor 228 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 229 | float * rois_flat = THCudaTensor_data(state, rois); 230 | 231 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 232 | 233 | // Number of ROIs 234 | int num_rois = THCudaTensor_size(state, rois, 0); 235 | int size_rois = THCudaTensor_size(state, rois, 1); 236 | if (size_rois != 5) 237 | { 238 | return 0; 239 | } 240 | 241 | // batch size 242 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 243 | //if (batch_size != 1) 244 | //{ 245 | // return 0; 246 | //} 247 | // data height 248 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 249 | // data width 250 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 251 | // Number of channels 252 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 253 | 254 | cudaStream_t stream = THCState_getCurrentStream(state); 255 | ROIAlignDenseAdaBackwardLaucher( 256 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 257 | data_width, num_channels, aligned_height, 258 | aligned_width, rois_flat, 259 | bottom_grad_flat, stream); 260 | 261 | return 1; 262 | } 263 | 264 | /*PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 265 | m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward"); 266 | m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward"); 267 | m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward"); 268 | m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward"); 269 | m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward"); 270 | m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward"); 271 | }*/ 272 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak/roi_align_cuda.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | THC_CLASS at::Context& at::globalContext(); 6 | THCState *state = at::globalContext().getTHCState(); 7 | //ATen_CLASS at::Context& at::globalContext(); 8 | //THCState *state = at::globalContext().thc_state; 9 | 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 16 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 17 | 18 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 19 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 20 | 21 | 22 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 23 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 24 | 25 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 26 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 27 | 28 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 29 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 30 | 31 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 32 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 40 | m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward"); 41 | m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward"); 42 | m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward"); 43 | m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward"); 44 | m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward"); 45 | m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward"); 46 | } 47 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak2/.roi_align_cuda.cpp.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak2/.roi_align_cuda.cpp.swp -------------------------------------------------------------------------------- /modules/roi_align/src.bak2/cuda/Makefile: -------------------------------------------------------------------------------- 1 | all: roi_align_kernel.cu roi_align_kernel.h 2 | nvcc -c -o roi_align.cu.o roi_align_kernel.cu -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 3 | 4 | clean: 5 | rm roi_align.cu.o 6 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak2/cuda/roi_align.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak2/cuda/roi_align.cu.o -------------------------------------------------------------------------------- /modules/roi_align/src.bak2/cuda/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | 30 | __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data, 31 | const float spatial_scale, const int height, const int width, 32 | const int channels, const int aligned_height, const int aligned_width, 33 | const float* bottom_rois, float* top_data); 34 | 35 | int ROIAlignAdaForwardLaucher( 36 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 37 | const int width, const int channels, const int aligned_height, 38 | const int aligned_width, const float* bottom_rois, 39 | float* top_data, cudaStream_t stream); 40 | 41 | __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff, 42 | const float spatial_scale, const int height, const int width, 43 | const int channels, const int aligned_height, const int aligned_width, 44 | float* bottom_diff, const float* bottom_rois); 45 | 46 | int ROIAlignAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 47 | const int height, const int width, const int channels, const int aligned_height, 48 | const int aligned_width, const float* bottom_rois, 49 | float* bottom_diff, cudaStream_t stream); 50 | 51 | 52 | __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data, 53 | const float spatial_scale, const int height, const int width, 54 | const int channels, const int aligned_height, const int aligned_width, 55 | const float* bottom_rois, float* top_data); 56 | 57 | int ROIAlignDenseAdaForwardLaucher( 58 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 59 | const int width, const int channels, const int aligned_height, 60 | const int aligned_width, const float* bottom_rois, 61 | float* top_data, cudaStream_t stream); 62 | 63 | __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff, 64 | const float spatial_scale, const int height, const int width, 65 | const int channels, const int aligned_height, const int aligned_width, 66 | float* bottom_diff, const float* bottom_rois); 67 | 68 | int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 69 | const int height, const int width, const int channels, const int aligned_height, 70 | const int aligned_width, const float* bottom_rois, 71 | float* bottom_diff, cudaStream_t stream); 72 | 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | 78 | #endif 79 | 80 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak2/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // batch size 25 | //int batch_size = THCudaTensor_size(state, features, 0); 26 | //if (batch_size != 1) 27 | //{ 28 | // return 0; 29 | //} 30 | 31 | 32 | // data height 33 | int data_height = THCudaTensor_size(state, features, 2); 34 | // data width 35 | int data_width = THCudaTensor_size(state, features, 3); 36 | // Number of channels 37 | int num_channels = THCudaTensor_size(state, features, 1); 38 | 39 | cudaStream_t stream = THCState_getCurrentStream(state); 40 | 41 | ROIAlignForwardLaucher( 42 | data_flat, spatial_scale, num_rois, data_height, 43 | data_width, num_channels, aligned_height, 44 | aligned_width, rois_flat, 45 | output_flat, stream); 46 | 47 | return 1; 48 | } 49 | 50 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 51 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 52 | { 53 | // Grab the input tensor 54 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 55 | float * rois_flat = THCudaTensor_data(state, rois); 56 | 57 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | //if (batch_size != 1) 70 | //{ 71 | // return 0; 72 | //} 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIAlignBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, aligned_height, 84 | aligned_width, rois_flat, 85 | bottom_grad_flat, stream); 86 | 87 | return 1; 88 | } 89 | 90 | 91 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 92 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 93 | { 94 | // Grab the input tensor 95 | float * data_flat = THCudaTensor_data(state, features); 96 | float * rois_flat = THCudaTensor_data(state, rois); 97 | 98 | float * output_flat = THCudaTensor_data(state, output); 99 | 100 | // Number of ROIs 101 | int num_rois = THCudaTensor_size(state, rois, 0); 102 | int size_rois = THCudaTensor_size(state, rois, 1); 103 | if (size_rois != 5) 104 | { 105 | return 0; 106 | } 107 | 108 | // batch size 109 | //int batch_size = THCudaTensor_size(state, features, 0); 110 | //if (batch_size != 1) 111 | //{ 112 | // return 0; 113 | //} 114 | 115 | 116 | // data height 117 | int data_height = THCudaTensor_size(state, features, 2); 118 | // data width 119 | int data_width = THCudaTensor_size(state, features, 3); 120 | // Number of channels 121 | int num_channels = THCudaTensor_size(state, features, 1); 122 | 123 | cudaStream_t stream = THCState_getCurrentStream(state); 124 | 125 | ROIAlignAdaForwardLaucher( 126 | data_flat, spatial_scale, num_rois, data_height, 127 | data_width, num_channels, aligned_height, 128 | aligned_width, rois_flat, 129 | output_flat, stream); 130 | 131 | return 1; 132 | } 133 | 134 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 135 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 136 | { 137 | // Grab the input tensor 138 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 139 | float * rois_flat = THCudaTensor_data(state, rois); 140 | 141 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 142 | 143 | // Number of ROIs 144 | int num_rois = THCudaTensor_size(state, rois, 0); 145 | int size_rois = THCudaTensor_size(state, rois, 1); 146 | if (size_rois != 5) 147 | { 148 | return 0; 149 | } 150 | 151 | // batch size 152 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 153 | //if (batch_size != 1) 154 | //{ 155 | // return 0; 156 | //} 157 | // data height 158 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 159 | // data width 160 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 161 | // Number of channels 162 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 163 | 164 | cudaStream_t stream = THCState_getCurrentStream(state); 165 | ROIAlignAdaBackwardLaucher( 166 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 167 | data_width, num_channels, aligned_height, 168 | aligned_width, rois_flat, 169 | bottom_grad_flat, stream); 170 | 171 | return 1; 172 | } 173 | 174 | 175 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 176 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 177 | { 178 | // Grab the input tensor 179 | float * data_flat = THCudaTensor_data(state, features); 180 | float * rois_flat = THCudaTensor_data(state, rois); 181 | 182 | float * output_flat = THCudaTensor_data(state, output); 183 | 184 | // Number of ROIs 185 | int num_rois = THCudaTensor_size(state, rois, 0); 186 | int size_rois = THCudaTensor_size(state, rois, 1); 187 | if (size_rois != 5) 188 | { 189 | return 0; 190 | } 191 | 192 | // batch size 193 | //int batch_size = THCudaTensor_size(state, features, 0); 194 | //if (batch_size != 1) 195 | //{ 196 | // return 0; 197 | //} 198 | 199 | 200 | // data height 201 | int data_height = THCudaTensor_size(state, features, 2); 202 | // data width 203 | int data_width = THCudaTensor_size(state, features, 3); 204 | // Number of channels 205 | int num_channels = THCudaTensor_size(state, features, 1); 206 | 207 | cudaStream_t stream = THCState_getCurrentStream(state); 208 | 209 | ROIAlignDenseAdaForwardLaucher( 210 | data_flat, spatial_scale, num_rois, data_height, 211 | data_width, num_channels, aligned_height, 212 | aligned_width, rois_flat, 213 | output_flat, stream); 214 | 215 | return 1; 216 | } 217 | 218 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 219 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 220 | { 221 | // Grab the input tensor 222 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 223 | float * rois_flat = THCudaTensor_data(state, rois); 224 | 225 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 226 | 227 | // Number of ROIs 228 | int num_rois = THCudaTensor_size(state, rois, 0); 229 | int size_rois = THCudaTensor_size(state, rois, 1); 230 | if (size_rois != 5) 231 | { 232 | return 0; 233 | } 234 | 235 | // batch size 236 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 237 | //if (batch_size != 1) 238 | //{ 239 | // return 0; 240 | //} 241 | // data height 242 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 243 | // data width 244 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 245 | // Number of channels 246 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 247 | 248 | cudaStream_t stream = THCState_getCurrentStream(state); 249 | ROIAlignDenseAdaBackwardLaucher( 250 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 251 | data_width, num_channels, aligned_height, 252 | aligned_width, rois_flat, 253 | bottom_grad_flat, stream); 254 | 255 | return 1; 256 | } 257 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak2/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | //#include 2 | //#include 3 | #include "roi_align_cuda.hpp" 4 | #include 5 | #include "cuda/roi_align_kernel.h" 6 | //#include 7 | 8 | THC_CLASS at::Context& at::globalContext(); 9 | THCState *state = at::globalContext().getTHCState(); 10 | //ATen_CLASS at::Context& at::globalContext(); 11 | //THCState *state = at::globalContext().thc_state; 12 | //extern THCState *state; 13 | 14 | extern "C" int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 15 | at::Tensor& features_t, at::Tensor& rois_t, at::Tensor& output_t) 16 | { 17 | //printf("%d\n", aligned_height); 18 | THCudaTensor * features(std::move(features_t.unsafeGetTensorImpl())); 19 | THCudaTensor * rois(std::move(rois_t.unsafeGetTensorImpl())); 20 | THCudaTensor * output(std::move(output_t.unsafeGetTensorImpl())); 21 | THCudaTensor * features_ = THCudaTensor_newContiguous(state, features); 22 | THCudaTensor * rois_ = THCudaTensor_newContiguous(state, rois); 23 | THCudaTensor * output_ = THCudaTensor_newContiguous(state, output); 24 | THCudaStorage * features__ = THCudaTensor_storage(state, features_); 25 | THCudaStorage * rois__ = THCudaTensor_storage(state, rois_); 26 | THCudaStorage * output__ = THCudaTensor_storage(state, output_); 27 | printf("%d\n", aligned_width); 28 | //features = THCudaTensor_newContiguous(state, features_t.impl_.get()); 29 | // Grab the input tensor 30 | float * data_flat = THCudaStorage_data(state, features__); 31 | printf("%d\n", aligned_height); 32 | float * rois_flat = THCudaStorage_data(state, rois__); 33 | 34 | float * output_flat = THCudaStorage_data(state, output__); 35 | printf("%d\n", aligned_height); 36 | // Number of ROIs 37 | int num_rois = THCudaTensor_size(state, rois, 0); 38 | int size_rois = THCudaTensor_size(state, rois, 1); 39 | if (size_rois != 5) 40 | { 41 | return 0; 42 | } 43 | printf("%f\n", spatial_scale); 44 | // batch size 45 | //int batch_size = THCudaTensor_size(state, features, 0); 46 | //if (batch_size != 1) 47 | //{ 48 | // return 0; 49 | //} 50 | 51 | 52 | // data height 53 | int data_height = THCudaTensor_size(state, features, 2); 54 | // data width 55 | int data_width = THCudaTensor_size(state, features, 3); 56 | // Number of channels 57 | int num_channels = THCudaTensor_size(state, features, 1); 58 | 59 | cudaStream_t stream = THCState_getCurrentStream(state); 60 | 61 | ROIAlignForwardLaucher( 62 | data_flat, spatial_scale, num_rois, data_height, 63 | data_width, num_channels, aligned_height, 64 | aligned_width, rois_flat, 65 | output_flat, stream); 66 | 67 | return 1; 68 | } 69 | 70 | extern "C" int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 71 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 72 | { 73 | // Grab the input tensor 74 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 75 | float * rois_flat = THCudaTensor_data(state, rois); 76 | 77 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 78 | 79 | // Number of ROIs 80 | int num_rois = THCudaTensor_size(state, rois, 0); 81 | int size_rois = THCudaTensor_size(state, rois, 1); 82 | if (size_rois != 5) 83 | { 84 | return 0; 85 | } 86 | 87 | // batch size 88 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 89 | //if (batch_size != 1) 90 | //{ 91 | // return 0; 92 | //} 93 | // data height 94 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 95 | // data width 96 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 97 | // Number of channels 98 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 99 | 100 | cudaStream_t stream = THCState_getCurrentStream(state); 101 | ROIAlignBackwardLaucher( 102 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 103 | data_width, num_channels, aligned_height, 104 | aligned_width, rois_flat, 105 | bottom_grad_flat, stream); 106 | 107 | return 1; 108 | } 109 | 110 | 111 | extern "C" int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 112 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 113 | { 114 | // Grab the input tensor 115 | float * data_flat = THCudaTensor_data(state, features); 116 | float * rois_flat = THCudaTensor_data(state, rois); 117 | 118 | float * output_flat = THCudaTensor_data(state, output); 119 | 120 | // Number of ROIs 121 | int num_rois = THCudaTensor_size(state, rois, 0); 122 | int size_rois = THCudaTensor_size(state, rois, 1); 123 | if (size_rois != 5) 124 | { 125 | return 0; 126 | } 127 | 128 | // batch size 129 | //int batch_size = THCudaTensor_size(state, features, 0); 130 | //if (batch_size != 1) 131 | //{ 132 | // return 0; 133 | //} 134 | 135 | 136 | // data height 137 | int data_height = THCudaTensor_size(state, features, 2); 138 | // data width 139 | int data_width = THCudaTensor_size(state, features, 3); 140 | // Number of channels 141 | int num_channels = THCudaTensor_size(state, features, 1); 142 | 143 | cudaStream_t stream = THCState_getCurrentStream(state); 144 | 145 | ROIAlignAdaForwardLaucher( 146 | data_flat, spatial_scale, num_rois, data_height, 147 | data_width, num_channels, aligned_height, 148 | aligned_width, rois_flat, 149 | output_flat, stream); 150 | 151 | return 1; 152 | } 153 | 154 | extern "C" int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 155 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 156 | { 157 | // Grab the input tensor 158 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 159 | float * rois_flat = THCudaTensor_data(state, rois); 160 | 161 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 162 | 163 | // Number of ROIs 164 | int num_rois = THCudaTensor_size(state, rois, 0); 165 | int size_rois = THCudaTensor_size(state, rois, 1); 166 | if (size_rois != 5) 167 | { 168 | return 0; 169 | } 170 | 171 | // batch size 172 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 173 | //if (batch_size != 1) 174 | //{ 175 | // return 0; 176 | //} 177 | // data height 178 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 179 | // data width 180 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 181 | // Number of channels 182 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 183 | 184 | cudaStream_t stream = THCState_getCurrentStream(state); 185 | ROIAlignAdaBackwardLaucher( 186 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 187 | data_width, num_channels, aligned_height, 188 | aligned_width, rois_flat, 189 | bottom_grad_flat, stream); 190 | 191 | return 1; 192 | } 193 | 194 | 195 | extern "C" int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 196 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 197 | { 198 | // Grab the input tensor 199 | float * data_flat = THCudaTensor_data(state, features); 200 | float * rois_flat = THCudaTensor_data(state, rois); 201 | 202 | float * output_flat = THCudaTensor_data(state, output); 203 | 204 | // Number of ROIs 205 | int num_rois = THCudaTensor_size(state, rois, 0); 206 | int size_rois = THCudaTensor_size(state, rois, 1); 207 | if (size_rois != 5) 208 | { 209 | return 0; 210 | } 211 | 212 | // batch size 213 | //int batch_size = THCudaTensor_size(state, features, 0); 214 | //if (batch_size != 1) 215 | //{ 216 | // return 0; 217 | //} 218 | 219 | 220 | // data height 221 | int data_height = THCudaTensor_size(state, features, 2); 222 | // data width 223 | int data_width = THCudaTensor_size(state, features, 3); 224 | // Number of channels 225 | int num_channels = THCudaTensor_size(state, features, 1); 226 | 227 | cudaStream_t stream = THCState_getCurrentStream(state); 228 | 229 | ROIAlignDenseAdaForwardLaucher( 230 | data_flat, spatial_scale, num_rois, data_height, 231 | data_width, num_channels, aligned_height, 232 | aligned_width, rois_flat, 233 | output_flat, stream); 234 | 235 | return 1; 236 | } 237 | 238 | extern "C" int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 239 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 240 | { 241 | // Grab the input tensor 242 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 243 | float * rois_flat = THCudaTensor_data(state, rois); 244 | 245 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 246 | 247 | // Number of ROIs 248 | int num_rois = THCudaTensor_size(state, rois, 0); 249 | int size_rois = THCudaTensor_size(state, rois, 1); 250 | if (size_rois != 5) 251 | { 252 | return 0; 253 | } 254 | 255 | // batch size 256 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 257 | //if (batch_size != 1) 258 | //{ 259 | // return 0; 260 | //} 261 | // data height 262 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 263 | // data width 264 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 265 | // Number of channels 266 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 267 | 268 | cudaStream_t stream = THCState_getCurrentStream(state); 269 | ROIAlignDenseAdaBackwardLaucher( 270 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 271 | data_width, num_channels, aligned_height, 272 | aligned_width, rois_flat, 273 | bottom_grad_flat, stream); 274 | 275 | return 1; 276 | } 277 | 278 | /*PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 279 | m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward"); 280 | m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward"); 281 | m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward"); 282 | m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward"); 283 | m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward"); 284 | m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward"); 285 | }*/ 286 | -------------------------------------------------------------------------------- /modules/roi_align/src.bak2/roi_align_cuda.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | //THC_CLASS at::Context& at::globalContext(); 6 | //THCState *state = at::globalContext().getTHCState(); 7 | //ATen_CLASS at::Context& at::globalContext(); 8 | //THCState *state = at::globalContext().thc_state; 9 | //extern THCState *state; 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 16 | at::Tensor& features, at::Tensor& rois, at::Tensor& output); 17 | 18 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 19 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 20 | 21 | 22 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 23 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 24 | 25 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 26 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 27 | 28 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 29 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output); 30 | 31 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 32 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 40 | m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward"); 41 | m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward"); 42 | m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward"); 43 | m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward"); 44 | m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward"); 45 | m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward"); 46 | } 47 | -------------------------------------------------------------------------------- /modules/roi_align/src/cuda/Makefile: -------------------------------------------------------------------------------- 1 | all: roi_align_kernel.cu roi_align_kernel.h 2 | nvcc -c -o roi_align.cu.o roi_align_kernel.cu -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52 3 | 4 | clean: 5 | rm roi_align.cu.o 6 | -------------------------------------------------------------------------------- /modules/roi_align/src/cuda/roi_align.cu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src/cuda/roi_align.cu.o -------------------------------------------------------------------------------- /modules/roi_align/src/cuda/roi_align_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _ROI_ALIGN_KERNEL 2 | #define _ROI_ALIGN_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, 9 | const float spatial_scale, const int height, const int width, 10 | const int channels, const int aligned_height, const int aligned_width, 11 | const float* bottom_rois, float* top_data); 12 | 13 | int ROIAlignForwardLaucher( 14 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 15 | const int width, const int channels, const int aligned_height, 16 | const int aligned_width, const float* bottom_rois, 17 | float* top_data, cudaStream_t stream); 18 | 19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, 20 | const float spatial_scale, const int height, const int width, 21 | const int channels, const int aligned_height, const int aligned_width, 22 | float* bottom_diff, const float* bottom_rois); 23 | 24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 25 | const int height, const int width, const int channels, const int aligned_height, 26 | const int aligned_width, const float* bottom_rois, 27 | float* bottom_diff, cudaStream_t stream); 28 | 29 | 30 | __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data, 31 | const float spatial_scale, const int height, const int width, 32 | const int channels, const int aligned_height, const int aligned_width, 33 | const float* bottom_rois, float* top_data); 34 | 35 | int ROIAlignAdaForwardLaucher( 36 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 37 | const int width, const int channels, const int aligned_height, 38 | const int aligned_width, const float* bottom_rois, 39 | float* top_data, cudaStream_t stream); 40 | 41 | __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff, 42 | const float spatial_scale, const int height, const int width, 43 | const int channels, const int aligned_height, const int aligned_width, 44 | float* bottom_diff, const float* bottom_rois); 45 | 46 | int ROIAlignAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 47 | const int height, const int width, const int channels, const int aligned_height, 48 | const int aligned_width, const float* bottom_rois, 49 | float* bottom_diff, cudaStream_t stream); 50 | 51 | 52 | __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data, 53 | const float spatial_scale, const int height, const int width, 54 | const int channels, const int aligned_height, const int aligned_width, 55 | const float* bottom_rois, float* top_data); 56 | 57 | int ROIAlignDenseAdaForwardLaucher( 58 | const float* bottom_data, const float spatial_scale, const int num_rois, const int height, 59 | const int width, const int channels, const int aligned_height, 60 | const int aligned_width, const float* bottom_rois, 61 | float* top_data, cudaStream_t stream); 62 | 63 | __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff, 64 | const float spatial_scale, const int height, const int width, 65 | const int channels, const int aligned_height, const int aligned_width, 66 | float* bottom_diff, const float* bottom_rois); 67 | 68 | int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, 69 | const int height, const int width, const int channels, const int aligned_height, 70 | const int aligned_width, const float* bottom_rois, 71 | float* bottom_diff, cudaStream_t stream); 72 | 73 | 74 | #ifdef __cplusplus 75 | } 76 | #endif 77 | 78 | #endif 79 | 80 | -------------------------------------------------------------------------------- /modules/roi_align/src/roi_align_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "cuda/roi_align_kernel.h" 4 | 5 | extern THCState *state; 6 | 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 8 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 9 | { 10 | // Grab the input tensor 11 | float * data_flat = THCudaTensor_data(state, features); 12 | float * rois_flat = THCudaTensor_data(state, rois); 13 | 14 | float * output_flat = THCudaTensor_data(state, output); 15 | 16 | // Number of ROIs 17 | int num_rois = THCudaTensor_size(state, rois, 0); 18 | int size_rois = THCudaTensor_size(state, rois, 1); 19 | if (size_rois != 5) 20 | { 21 | return 0; 22 | } 23 | 24 | // batch size 25 | //int batch_size = THCudaTensor_size(state, features, 0); 26 | //if (batch_size != 1) 27 | //{ 28 | // return 0; 29 | //} 30 | 31 | 32 | // data height 33 | int data_height = THCudaTensor_size(state, features, 2); 34 | // data width 35 | int data_width = THCudaTensor_size(state, features, 3); 36 | // Number of channels 37 | int num_channels = THCudaTensor_size(state, features, 1); 38 | 39 | cudaStream_t stream = THCState_getCurrentStream(state); 40 | 41 | ROIAlignForwardLaucher( 42 | data_flat, spatial_scale, num_rois, data_height, 43 | data_width, num_channels, aligned_height, 44 | aligned_width, rois_flat, 45 | output_flat, stream); 46 | 47 | return 1; 48 | } 49 | 50 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 51 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 52 | { 53 | // Grab the input tensor 54 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 55 | float * rois_flat = THCudaTensor_data(state, rois); 56 | 57 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 58 | 59 | // Number of ROIs 60 | int num_rois = THCudaTensor_size(state, rois, 0); 61 | int size_rois = THCudaTensor_size(state, rois, 1); 62 | if (size_rois != 5) 63 | { 64 | return 0; 65 | } 66 | 67 | // batch size 68 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 69 | //if (batch_size != 1) 70 | //{ 71 | // return 0; 72 | //} 73 | // data height 74 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 75 | // data width 76 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 77 | // Number of channels 78 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 79 | 80 | cudaStream_t stream = THCState_getCurrentStream(state); 81 | ROIAlignBackwardLaucher( 82 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 83 | data_width, num_channels, aligned_height, 84 | aligned_width, rois_flat, 85 | bottom_grad_flat, stream); 86 | 87 | return 1; 88 | } 89 | 90 | 91 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 92 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 93 | { 94 | // Grab the input tensor 95 | float * data_flat = THCudaTensor_data(state, features); 96 | float * rois_flat = THCudaTensor_data(state, rois); 97 | 98 | float * output_flat = THCudaTensor_data(state, output); 99 | 100 | // Number of ROIs 101 | int num_rois = THCudaTensor_size(state, rois, 0); 102 | int size_rois = THCudaTensor_size(state, rois, 1); 103 | if (size_rois != 5) 104 | { 105 | return 0; 106 | } 107 | 108 | // batch size 109 | //int batch_size = THCudaTensor_size(state, features, 0); 110 | //if (batch_size != 1) 111 | //{ 112 | // return 0; 113 | //} 114 | 115 | 116 | // data height 117 | int data_height = THCudaTensor_size(state, features, 2); 118 | // data width 119 | int data_width = THCudaTensor_size(state, features, 3); 120 | // Number of channels 121 | int num_channels = THCudaTensor_size(state, features, 1); 122 | 123 | cudaStream_t stream = THCState_getCurrentStream(state); 124 | 125 | ROIAlignAdaForwardLaucher( 126 | data_flat, spatial_scale, num_rois, data_height, 127 | data_width, num_channels, aligned_height, 128 | aligned_width, rois_flat, 129 | output_flat, stream); 130 | 131 | return 1; 132 | } 133 | 134 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 135 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 136 | { 137 | // Grab the input tensor 138 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 139 | float * rois_flat = THCudaTensor_data(state, rois); 140 | 141 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 142 | 143 | // Number of ROIs 144 | int num_rois = THCudaTensor_size(state, rois, 0); 145 | int size_rois = THCudaTensor_size(state, rois, 1); 146 | if (size_rois != 5) 147 | { 148 | return 0; 149 | } 150 | 151 | // batch size 152 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 153 | //if (batch_size != 1) 154 | //{ 155 | // return 0; 156 | //} 157 | // data height 158 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 159 | // data width 160 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 161 | // Number of channels 162 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 163 | 164 | cudaStream_t stream = THCState_getCurrentStream(state); 165 | ROIAlignAdaBackwardLaucher( 166 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 167 | data_width, num_channels, aligned_height, 168 | aligned_width, rois_flat, 169 | bottom_grad_flat, stream); 170 | 171 | return 1; 172 | } 173 | 174 | 175 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 176 | THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output) 177 | { 178 | // Grab the input tensor 179 | float * data_flat = THCudaTensor_data(state, features); 180 | float * rois_flat = THCudaTensor_data(state, rois); 181 | 182 | float * output_flat = THCudaTensor_data(state, output); 183 | 184 | // Number of ROIs 185 | int num_rois = THCudaTensor_size(state, rois, 0); 186 | int size_rois = THCudaTensor_size(state, rois, 1); 187 | if (size_rois != 5) 188 | { 189 | return 0; 190 | } 191 | 192 | // batch size 193 | //int batch_size = THCudaTensor_size(state, features, 0); 194 | //if (batch_size != 1) 195 | //{ 196 | // return 0; 197 | //} 198 | 199 | 200 | // data height 201 | int data_height = THCudaTensor_size(state, features, 2); 202 | // data width 203 | int data_width = THCudaTensor_size(state, features, 3); 204 | // Number of channels 205 | int num_channels = THCudaTensor_size(state, features, 1); 206 | 207 | cudaStream_t stream = THCState_getCurrentStream(state); 208 | 209 | ROIAlignDenseAdaForwardLaucher( 210 | data_flat, spatial_scale, num_rois, data_height, 211 | data_width, num_channels, aligned_height, 212 | aligned_width, rois_flat, 213 | output_flat, stream); 214 | 215 | return 1; 216 | } 217 | 218 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 219 | THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad) 220 | { 221 | // Grab the input tensor 222 | float * top_grad_flat = THCudaTensor_data(state, top_grad); 223 | float * rois_flat = THCudaTensor_data(state, rois); 224 | 225 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 226 | 227 | // Number of ROIs 228 | int num_rois = THCudaTensor_size(state, rois, 0); 229 | int size_rois = THCudaTensor_size(state, rois, 1); 230 | if (size_rois != 5) 231 | { 232 | return 0; 233 | } 234 | 235 | // batch size 236 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 237 | //if (batch_size != 1) 238 | //{ 239 | // return 0; 240 | //} 241 | // data height 242 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 243 | // data width 244 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 245 | // Number of channels 246 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 247 | 248 | cudaStream_t stream = THCState_getCurrentStream(state); 249 | ROIAlignDenseAdaBackwardLaucher( 250 | top_grad_flat, spatial_scale, batch_size, num_rois, data_height, 251 | data_width, num_channels, aligned_height, 252 | aligned_width, rois_flat, 253 | bottom_grad_flat, stream); 254 | 255 | return 1; 256 | } 257 | -------------------------------------------------------------------------------- /modules/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | //#include 2 | //#include 3 | #include 4 | #include "roi_align_cuda.hpp" 5 | #include 6 | #include "cuda/roi_align_kernel.h" 7 | //#include 8 | extern THCState *state; 9 | 10 | extern "C" int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 11 | at::Tensor& features, at::Tensor& rois, at::Tensor& output) 12 | { 13 | // Number of ROIs 14 | int num_rois = rois.size(0); 15 | int size_rois = rois.size(1); 16 | if (size_rois != 5) 17 | { 18 | return 0; 19 | } 20 | // batch size 21 | //int batch_size = THCudaTensor_size(state, features, 0); 22 | //if (batch_size != 1) 23 | //{ 24 | // return 0; 25 | //} 26 | 27 | 28 | // data height 29 | int data_height = features.size(2); 30 | // data width 31 | int data_width = features.size(3); 32 | // Number of channels 33 | int num_channels = features.size(1); 34 | 35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 36 | 37 | ROIAlignForwardLaucher( 38 | features.contiguous().data(), spatial_scale, num_rois, data_height, 39 | data_width, num_channels, aligned_height, 40 | aligned_width, rois.contiguous().data(), 41 | output.contiguous().data(), stream); 42 | 43 | return 1; 44 | } 45 | 46 | extern "C" int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 47 | at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad) 48 | { 49 | 50 | // Number of ROIs 51 | int num_rois = rois.size(0); 52 | int size_rois = rois.size(1); 53 | if (size_rois != 5) 54 | { 55 | return 0; 56 | } 57 | 58 | // batch size 59 | int batch_size = bottom_grad.size(0); 60 | //if (batch_size != 1) 61 | //{ 62 | // return 0; 63 | //} 64 | // data height 65 | int data_height = bottom_grad.size(2); 66 | // data width 67 | int data_width = bottom_grad.size(3); 68 | // Number of channels 69 | int num_channels = bottom_grad.size(1); 70 | 71 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 72 | ROIAlignBackwardLaucher( 73 | top_grad.contiguous().data(), spatial_scale, batch_size, num_rois, data_height, 74 | data_width, num_channels, aligned_height, 75 | aligned_width, rois.contiguous().data(), 76 | bottom_grad.contiguous().data(), stream); 77 | 78 | return 1; 79 | } 80 | 81 | 82 | extern "C" int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 83 | at::Tensor& features, at::Tensor& rois, at::Tensor& output) 84 | { 85 | // Number of ROIs 86 | int num_rois = rois.size(0); 87 | int size_rois = rois.size(1); 88 | if (size_rois != 5) 89 | { 90 | return 0; 91 | } 92 | // batch size 93 | //int batch_size = THCudaTensor_size(state, features, 0); 94 | //if (batch_size != 1) 95 | //{ 96 | // return 0; 97 | //} 98 | 99 | 100 | // data height 101 | int data_height = features.size(2); 102 | // data width 103 | int data_width = features.size(3); 104 | // Number of channels 105 | int num_channels = features.size(1); 106 | 107 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 108 | 109 | ROIAlignAdaForwardLaucher( 110 | features.contiguous().data(), spatial_scale, num_rois, data_height, 111 | data_width, num_channels, aligned_height, 112 | aligned_width, rois.contiguous().data(), 113 | output.contiguous().data(), stream); 114 | 115 | return 1; 116 | } 117 | 118 | extern "C" int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 119 | at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad) 120 | { 121 | // Number of ROIs 122 | int num_rois = rois.size(0); 123 | int size_rois = rois.size(1); 124 | if (size_rois != 5) 125 | { 126 | return 0; 127 | } 128 | 129 | // batch size 130 | int batch_size = bottom_grad.size(0); 131 | //if (batch_size != 1) 132 | //{ 133 | // return 0; 134 | //} 135 | // data height 136 | int data_height = bottom_grad.size(2); 137 | // data width 138 | int data_width = bottom_grad.size(3); 139 | // Number of channels 140 | int num_channels = bottom_grad.size(1); 141 | 142 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 143 | ROIAlignAdaBackwardLaucher( 144 | top_grad.contiguous().data(), spatial_scale, batch_size, num_rois, data_height, 145 | data_width, num_channels, aligned_height, 146 | aligned_width, rois.contiguous().data(), 147 | bottom_grad.contiguous().data(), stream); 148 | 149 | return 1; 150 | } 151 | 152 | 153 | extern "C" int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 154 | at::Tensor& features, at::Tensor& rois, at::Tensor& output) 155 | { 156 | // Number of ROIs 157 | int num_rois = rois.size(0); 158 | int size_rois = rois.size(1); 159 | if (size_rois != 5) 160 | { 161 | return 0; 162 | } 163 | // batch size 164 | //int batch_size = THCudaTensor_size(state, features, 0); 165 | //if (batch_size != 1) 166 | //{ 167 | // return 0; 168 | //} 169 | 170 | 171 | // data height 172 | int data_height = features.size(2); 173 | // data width 174 | int data_width = features.size(3); 175 | // Number of channels 176 | int num_channels = features.size(1); 177 | 178 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 179 | 180 | ROIAlignDenseAdaForwardLaucher( 181 | features.contiguous().data(), spatial_scale, num_rois, data_height, 182 | data_width, num_channels, aligned_height, 183 | aligned_width, rois.contiguous().data(), 184 | output.contiguous().data(), stream); 185 | 186 | return 1; 187 | } 188 | 189 | extern "C" int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 190 | at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad) 191 | { 192 | // Grab the input tensor 193 | /*float * top_grad_flat = THCudaTensor_data(state, top_grad); 194 | float * rois_flat = THCudaTensor_data(state, rois); 195 | 196 | float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad); 197 | 198 | // Number of ROIs 199 | int num_rois = THCudaTensor_size(state, rois, 0); 200 | int size_rois = THCudaTensor_size(state, rois, 1); 201 | if (size_rois != 5) 202 | { 203 | return 0; 204 | } 205 | 206 | // batch size 207 | int batch_size = THCudaTensor_size(state, bottom_grad, 0); 208 | //if (batch_size != 1) 209 | //{ 210 | // return 0; 211 | //} 212 | // data height 213 | int data_height = THCudaTensor_size(state, bottom_grad, 2); 214 | // data width 215 | int data_width = THCudaTensor_size(state, bottom_grad, 3); 216 | // Number of channels 217 | int num_channels = THCudaTensor_size(state, bottom_grad, 1); 218 | 219 | cudaStream_t stream = THCState_getCurrentStream(state);*/ 220 | // Number of ROIs 221 | int num_rois = rois.size(0); 222 | int size_rois = rois.size(1); 223 | if (size_rois != 5) 224 | { 225 | return 0; 226 | } 227 | 228 | // batch size 229 | int batch_size = bottom_grad.size(0); 230 | //if (batch_size != 1) 231 | //{ 232 | // return 0; 233 | //} 234 | // data height 235 | int data_height = bottom_grad.size(2); 236 | // data width 237 | int data_width = bottom_grad.size(3); 238 | // Number of channels 239 | int num_channels = bottom_grad.size(1); 240 | 241 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 242 | ROIAlignDenseAdaBackwardLaucher( 243 | top_grad.contiguous().data(), spatial_scale, batch_size, num_rois, data_height, 244 | data_width, num_channels, aligned_height, 245 | aligned_width, rois.contiguous().data(), 246 | bottom_grad.contiguous().data(), stream); 247 | 248 | return 1; 249 | } 250 | 251 | /*PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 252 | m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward"); 253 | m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward"); 254 | m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward"); 255 | m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward"); 256 | m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward"); 257 | m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward"); 258 | }*/ 259 | -------------------------------------------------------------------------------- /modules/roi_align/src/roi_align_cuda.hpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | //THC_CLASS at::Context& at::globalContext(); 6 | //THCState *state = at::globalContext().getTHCState(); 7 | //ATen_CLASS at::Context& at::globalContext(); 8 | //THCState *state = at::globalContext().thc_state; 9 | //extern THCState *state; 10 | 11 | #ifdef __cplusplus 12 | extern "C" { 13 | #endif 14 | 15 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 16 | at::Tensor& features, at::Tensor& rois, at::Tensor& output); 17 | 18 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 19 | at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad); 20 | 21 | 22 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 23 | at::Tensor& features, at::Tensor& rois, at::Tensor& output); 24 | 25 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 26 | at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad); 27 | 28 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale, 29 | at::Tensor& features, at::Tensor& rois, at::Tensor& output); 30 | 31 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale, 32 | at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad); 33 | 34 | 35 | #ifdef __cplusplus 36 | } 37 | #endif 38 | 39 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 40 | m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward"); 41 | m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward"); 42 | m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward"); 43 | m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward"); 44 | m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward"); 45 | m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward"); 46 | } 47 | -------------------------------------------------------------------------------- /modules/sample_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | #from PIL import Image 3 | 4 | from utils import * 5 | 6 | def gen_samples(generator, bbox, n, overlap_range=None, scale_range=None): 7 | 8 | if overlap_range is None and scale_range is None: 9 | return generator(bbox, n) 10 | 11 | else: 12 | samples = None 13 | remain = n 14 | factor = 2 15 | while remain > 0 and factor < 16: 16 | samples_ = generator(bbox, int(remain*factor)) 17 | 18 | idx = np.ones(len(samples_), dtype=bool) 19 | if overlap_range is not None: 20 | r = overlap_ratio(samples_, bbox) 21 | idx *= (r >= overlap_range[0]) * (r <= overlap_range[1]) 22 | if scale_range is not None: 23 | s = np.prod(samples_[:,2:], axis=1) / np.prod(bbox[2:]) 24 | idx *= (s >= scale_range[0]) * (s <= scale_range[1]) 25 | 26 | samples_ = samples_[idx,:] 27 | samples_ = samples_[:min(int(remain), len(samples_))] 28 | if samples is None: 29 | samples = samples_ 30 | else: 31 | samples = np.concatenate([samples, samples_]) 32 | remain = n - len(samples) 33 | factor = factor*2 34 | 35 | return samples 36 | 37 | 38 | class SampleGenerator(): 39 | def __init__(self, type, img_size, trans_f=1, scale_f=1, aspect_f=None, valid=False): 40 | self.type = type 41 | self.img_size = np.array(img_size) # (w, h) 42 | self.trans_f = trans_f 43 | self.scale_f = scale_f 44 | self.aspect_f = aspect_f 45 | self.valid = valid 46 | 47 | def __call__(self, bb, n): 48 | # 49 | # bb: target bbox (min_x,min_y,w,h) 50 | bb = np.array(bb, dtype='float32') 51 | 52 | # (center_x, center_y, w, h) 53 | sample = np.array([bb[0]+bb[2]/2, bb[1]+bb[3]/2, bb[2], bb[3]], dtype='float32') 54 | samples = np.tile(sample[None,:],(n,1)) 55 | 56 | # vary aspect ratio 57 | if self.aspect_f is not None: 58 | ratio = np.random.rand(n,1)*2-1 59 | samples[:,2:] *= self.aspect_f ** np.concatenate([ratio, -ratio],axis=1) 60 | 61 | # sample generation 62 | if self.type=='gaussian': 63 | samples[:,:2] += self.trans_f * np.mean(bb[2:]) * np.clip(0.5*np.random.randn(n,2),-1,1) 64 | samples[:,2:] *= self.scale_f ** np.clip(0.5*np.random.randn(n,1),-1,1) 65 | 66 | elif self.type=='uniform': 67 | samples[:,:2] += self.trans_f * np.mean(bb[2:]) * (np.random.rand(n,2)*2-1) 68 | samples[:,2:] *= self.scale_f ** (np.random.rand(n,1)*2-1) 69 | 70 | elif self.type=='whole': 71 | m = int(2*np.sqrt(n)) 72 | xy = np.dstack(np.meshgrid(np.linspace(0,1,m),np.linspace(0,1,m))).reshape(-1,2) 73 | xy = np.random.permutation(xy)[:n] 74 | samples[:,:2] = bb[2:]/2 + xy * (self.img_size-bb[2:]/2-1) 75 | samples[:,2:] *= self.scale_f ** (np.random.rand(n,1)*2-1) 76 | 77 | # adjust bbox range 78 | samples[:,2:] = np.clip(samples[:,2:], 5, self.img_size-5.) 79 | if self.valid: 80 | samples[:,:2] = np.clip(samples[:,:2], samples[:,2:]/2, self.img_size-samples[:,2:]/2-1) 81 | else: 82 | samples[:,:2] = np.clip(samples[:,:2], 0, self.img_size) 83 | 84 | # (min_x, min_y, w, h) 85 | samples[:,:2] -= samples[:,2:]/2 86 | 87 | return samples 88 | 89 | def set_trans_f(self, trans_f): 90 | self.trans_f = trans_f 91 | 92 | def get_trans_f(self): 93 | return self.trans_f 94 | 95 | -------------------------------------------------------------------------------- /modules/utils.py: -------------------------------------------------------------------------------- 1 | #from scipy.misc import imresize 2 | from PIL import Image 3 | import numpy as np 4 | 5 | 6 | ################################################################################## 7 | ############################Do not modify opts anymore.########################### 8 | ######################Becuase of synchronization of options####################### 9 | ################################################################################## 10 | 11 | def overlap_ratio(rect1, rect2): 12 | ''' 13 | Compute overlap ratio between two rects 14 | - rect: 1d array of [x,y,w,h] or 15 | 2d array of N x [x,y,w,h] 16 | ''' 17 | 18 | if rect1.ndim==1: 19 | rect1 = rect1[None,:] 20 | if rect2.ndim==1: 21 | rect2 = rect2[None,:] 22 | 23 | left = np.maximum(rect1[:,0], rect2[:,0]) 24 | right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2]) 25 | top = np.maximum(rect1[:,1], rect2[:,1]) 26 | bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3]) 27 | 28 | intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top) 29 | union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect 30 | iou = np.clip(intersect / union, 0, 1) 31 | return iou 32 | 33 | 34 | def crop_image(img, bbox, img_size=[107,107], padding=16, valid=False): 35 | ## img_size = [w,h] 36 | x,y,w,h = np.array(bbox,dtype='float32') 37 | 38 | half_w, half_h = w/2, h/2 39 | center_x, center_y = x + half_w, y + half_h 40 | 41 | if padding > 0: 42 | pad_w = padding * w/img_size[0] 43 | pad_h = padding * h/img_size[1] 44 | half_w += pad_w 45 | half_h += pad_h 46 | 47 | img_h, img_w, _ = img.shape 48 | min_x = int(center_x - half_w + 0.5) 49 | min_y = int(center_y - half_h + 0.5) 50 | max_x = int(center_x + half_w + 0.5) 51 | max_y = int(center_y + half_h + 0.5) 52 | 53 | if valid: 54 | min_x = max(0, min_x) 55 | min_y = max(0, min_y) 56 | max_x = min(img_w, max_x) 57 | max_y = min(img_h, max_y) 58 | 59 | if min_x >=0 and min_y >= 0 and max_x <= img_w and max_y <= img_h: 60 | cropped = img[min_y:max_y, min_x:max_x, :] 61 | 62 | else: 63 | min_x_val = max(0, min_x) 64 | min_y_val = max(0, min_y) 65 | max_x_val = min(img_w, max_x) 66 | max_y_val = min(img_h, max_y) 67 | 68 | cropped = 128 * np.ones((max_y-min_y, max_x-min_x, 3), dtype='uint8') 69 | cropped[min_y_val-min_y:max_y_val-min_y, min_x_val-min_x:max_x_val-min_x, :] \ 70 | = img[min_y_val:max_y_val, min_x_val:max_x_val, :] 71 | 72 | scaled = np.array(Image.fromarray(cropped).resize((img_size[1],img_size[0]))) 73 | return scaled 74 | 75 | def samples2maskroi(samples,receptive_field, cshape,padded_scene_size,padding_ratio): 76 | # rois is from domain of original image axis 77 | # receptive field can be subtracted to x2,y2 78 | 79 | # ratios between original image and resized_image 80 | cur_resize_ratio = cshape / padded_scene_size 81 | rois = np.copy(samples) 82 | 83 | # xywh -> x1y1x2y2 84 | rois[:, 2:4] += rois[:, 0:2] 85 | 86 | # padding application 87 | rois_paddings = (rois[:,2:4]-rois[:,0:2])*(padding_ratio-1.)/2. 88 | rois[:,0:2]-=rois_paddings 89 | rois[:,2:4]+=rois_paddings 90 | 91 | 92 | rois[:, 0] *= cur_resize_ratio[0] 93 | rois[:, 1] *= cur_resize_ratio[1] 94 | rois[:, 2] = np.maximum(rois[:,0]+1,rois[:, 2]*cur_resize_ratio[0] - receptive_field) 95 | rois[:, 3] = np.maximum(rois[:,1]+1,rois[:, 3]*cur_resize_ratio[1] - receptive_field) 96 | 97 | 98 | return rois 99 | -------------------------------------------------------------------------------- /options.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | opts = OrderedDict() 4 | opts['use_gpu'] = True 5 | 6 | 7 | opts['model_path'] = './models/model_imagenet_seqbatch50_final.pth' 8 | 9 | opts['img_size'] = 107 10 | opts['padding'] = 1.2 11 | opts['jitter'] = True 12 | opts['result_path']='./result.npy' 13 | opts['adaptive_align']=True 14 | opts['batch_pos'] = 32 15 | opts['batch_neg'] = 96 16 | opts['batch_neg_cand'] = 1024 17 | opts['batch_test'] = 256 18 | 19 | opts['n_samples'] = 256 20 | opts['trans_f'] = 0.6 21 | opts['scale_f'] = 1.05 22 | opts['trans_f_expand'] = 1.4 23 | 24 | opts['n_bbreg'] = 1000 25 | opts['overlap_bbreg'] = [0.6, 1] 26 | opts['scale_bbreg'] = [1, 2] 27 | 28 | opts['lr_init'] = 0.0001 # original = 0.0001 29 | opts['maxiter_init'] = 50 # original = 30 30 | opts['n_pos_init'] = 500 31 | opts['n_neg_init'] = 5000 32 | opts['overlap_pos_init'] = [0.7, 1] 33 | opts['overlap_neg_init'] = [0, 0.5] 34 | 35 | opts['lr_update'] = 0.0003 # original = 0.0002 36 | opts['maxiter_update'] = 15 # original = 15 37 | opts['n_pos_update'] = 50 38 | opts['n_neg_update'] = 200 39 | opts['overlap_pos_update'] = [0.7, 1] 40 | opts['overlap_neg_update'] = [0, 0.3] 41 | 42 | opts['success_thr'] = 0. # original = 0 43 | opts['n_frames_short'] = 20 44 | opts['n_frames_long'] = 100 45 | opts['long_interval'] = 10 46 | 47 | opts['w_decay'] = 0.0005 # original = 0.0005 48 | opts['momentum'] = 0.9 49 | opts['grad_clip'] = 10 # original = 10 50 | opts['lr_mult'] = {'fc6':10} 51 | opts['ft_layers'] = ['fc'] 52 | 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /python_RLS_RTMDNet_bk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import vot 4 | import sys 5 | import time 6 | import cv2 7 | import numpy 8 | import collections 9 | 10 | class NCCTracker(object): 11 | 12 | def __init__(self, image, region): 13 | self.window = max(region.width, region.height) * 2 14 | 15 | left = max(region.x, 0) 16 | top = max(region.y, 0) 17 | 18 | right = min(region.x + region.width, image.shape[1] - 1) 19 | bottom = min(region.y + region.height, image.shape[0] - 1) 20 | 21 | self.template = image[int(top):int(bottom), int(left):int(right)] 22 | self.position = (region.x + region.width / 2, region.y + region.height / 2) 23 | self.size = (region.width, region.height) 24 | 25 | def track(self, image): 26 | 27 | left = max(round(self.position[0] - float(self.window) / 2), 0) 28 | top = max(round(self.position[1] - float(self.window) / 2), 0) 29 | 30 | right = min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1) 31 | bottom = min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1) 32 | 33 | if right - left < self.template.shape[1] or bottom - top < self.template.shape[0]: 34 | return vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0], self.size[1]) 35 | 36 | cut = image[int(top):int(bottom), int(left):int(right)] 37 | 38 | matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED) 39 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches) 40 | 41 | self.position = (left + max_loc[0] + float(self.size[0]) / 2, top + max_loc[1] + float(self.size[1]) / 2) 42 | 43 | return vot.Rectangle(left + max_loc[0], top + max_loc[1], self.size[0], self.size[1]), max_val 44 | 45 | handle = vot.VOT("rectangle") 46 | selection = handle.region() 47 | 48 | imagefile = handle.frame() 49 | if not imagefile: 50 | sys.exit(0) 51 | 52 | image = cv2.imread(imagefile, cv2.IMREAD_GRAYSCALE) 53 | tracker = NCCTracker(image, selection) 54 | while True: 55 | imagefile = handle.frame() 56 | if not imagefile: 57 | break 58 | image = cv2.imread(imagefile, cv2.IMREAD_GRAYSCALE) 59 | region, confidence = tracker.track(image) 60 | handle.report(region, confidence) 61 | 62 | -------------------------------------------------------------------------------- /train_mrcnn.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pickle 4 | import time 5 | 6 | import torch 7 | import torch.optim as optim 8 | from torch.autograd import Variable 9 | 10 | sys.path.insert(0,'./modules') 11 | from data_prov import * 12 | from model import * 13 | from pretrain_options import * 14 | from tracker import * 15 | import numpy as np 16 | 17 | import argparse 18 | 19 | torch.cuda.set_device(1) 20 | def set_optimizer(model, lr_base, lr_mult=pretrain_opts['lr_mult'], momentum=pretrain_opts['momentum'], w_decay=pretrain_opts['w_decay']): 21 | params = model.get_learnable_params() 22 | param_list = [] 23 | for k, p in params.iteritems(): 24 | lr = lr_base 25 | for l, m in lr_mult.iteritems(): 26 | if k.startswith(l): 27 | lr = lr_base * m 28 | param_list.append({'params': [p], 'lr': lr}) 29 | optimizer = optim.SGD(param_list, lr=lr, momentum=momentum, weight_decay=w_decay) 30 | return optimizer 31 | 32 | def genConfig(seq_path, set_type): 33 | 34 | path, seqname = os.path.split(seq_path) 35 | 36 | if set_type == 'OTB': 37 | img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg']) 38 | 39 | if (seqname == 'Jogging') or (seqname == 'Skating2'): 40 | gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt') 41 | elif seqname =='Human4': 42 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',') 43 | elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \ 44 | or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \ 45 | or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \ 46 | or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \ 47 | or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \ 48 | or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \ 49 | or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') : 50 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt') 51 | elif (seqname == 'Freeman4') or (seqname == 'Diving') or (seqname =='Freeman3') or (seqname =='Football1'): 52 | gt = np.loadtxt(seq_path + '/groundtruth_rect_revise.txt', delimiter=',') 53 | else: 54 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 55 | 56 | if seqname == 'David': 57 | img_list = img_list[300:] 58 | # gt = gt[300:,:] 59 | if seqname == 'Football1': 60 | img_list = img_list[0:73] 61 | if seqname == 'Freeman3': 62 | img_list = img_list[0:459] 63 | if seqname == 'Freeman4': 64 | img_list = img_list[0:282] 65 | 66 | elif set_type=='VOT/2016': 67 | img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg']) 68 | gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',') 69 | 70 | ##polygon to rect 71 | if gt.shape[1] == 8: 72 | x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 73 | y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 74 | x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 75 | y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 76 | gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1) 77 | 78 | return img_list, gt 79 | 80 | 81 | def train_mdnet(): 82 | 83 | ## set image directory 84 | if pretrain_opts['set_type'] == 'OTB': 85 | img_home = '/home/ilchae/dataset/tracking/OTB/' 86 | data_path = './otb-vot15.pkl' 87 | if pretrain_opts['set_type'] == 'VOT': 88 | img_home = '/home/ilchae/dataset/tracking/VOT/' 89 | data_path = './vot-otb.pkl' 90 | if pretrain_opts['set_type'] == 'IMAGENET': 91 | img_home = '/mnt/jgao/jgao/ILSVRC2015/Data/VID/train/' 92 | data_path = './modules/imagenet_refine.pkl' 93 | 94 | ## Init dataset ## 95 | with open(data_path, 'rb') as fp: 96 | data = pickle.load(fp) 97 | 98 | 99 | K = len(data) 100 | 101 | ## Init model ## 102 | model = MDNet(pretrain_opts['init_model_path'], K) 103 | if pretrain_opts['adaptive_align']: 104 | align_h = model.roi_align_model.aligned_height 105 | align_w = model.roi_align_model.aligned_width 106 | spatial_s = model.roi_align_model.spatial_scale 107 | model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) 108 | 109 | if pretrain_opts['use_gpu']: 110 | model = model.cuda() 111 | model.set_learnable_params(pretrain_opts['ft_layers']) 112 | model.train() 113 | 114 | dataset = [None] * K 115 | for k, (seqname, seq) in enumerate(data.iteritems()): 116 | img_list = seq['images'] 117 | gt = seq['gt'] 118 | if pretrain_opts['set_type'] == 'OTB': 119 | img_dir = os.path.join(img_home, seqname+'/img') 120 | if pretrain_opts['set_type'] == 'VOT': 121 | img_dir = img_home + seqname 122 | if pretrain_opts['set_type'] == 'IMAGENET': 123 | img_dir = img_home + seqname 124 | dataset[k]=RegionDataset(img_dir,img_list,gt,model.receptive_field,pretrain_opts) 125 | #print(img_dir) 126 | #print(img_list) 127 | 128 | 129 | ## Init criterion and optimizer ## 130 | binaryCriterion = BinaryLoss() 131 | interDomainCriterion = nn.CrossEntropyLoss() 132 | evaluator = Precision() 133 | optimizer = set_optimizer(model, pretrain_opts['lr']) 134 | 135 | best_score = 0. 136 | batch_cur_idx = 0 137 | for i in range(pretrain_opts['n_cycles']): 138 | print "==== Start Cycle %d ====" % (i) 139 | k_list = np.random.permutation(K) 140 | prec = np.zeros(K) 141 | #totalTripleLoss = np.zeros(K) 142 | totalInterClassLoss = np.zeros(K) 143 | for j, k in enumerate(k_list): 144 | tic = time.time() 145 | try: 146 | cropped_scenes, pos_rois, neg_rois= dataset[k].next() 147 | except: 148 | #print "______except1_______" 149 | continue 150 | 151 | try: 152 | for sidx in range(0, len(cropped_scenes)): 153 | cur_scene = cropped_scenes[sidx] 154 | cur_pos_rois = pos_rois[sidx] 155 | cur_neg_rois = neg_rois[sidx] 156 | 157 | cur_scene = Variable(cur_scene) 158 | cur_pos_rois = Variable(cur_pos_rois) 159 | cur_neg_rois = Variable(cur_neg_rois) 160 | if pretrain_opts['use_gpu']: 161 | cur_scene = cur_scene.cuda() 162 | cur_pos_rois = cur_pos_rois.cuda() 163 | cur_neg_rois = cur_neg_rois.cuda() 164 | cur_feat_map = model(cur_scene, k, out_layer='conv3') 165 | 166 | if cur_pos_rois.size(0) == 0 or cur_neg_rois.size(0) == 0: 167 | print "______except num rois_______" 168 | continue 169 | 170 | #print "rois %2d, %2d" % \ 171 | # (cur_pos_rois.size(0), cur_neg_rois.size(0)) 172 | cur_pos_feats = model.roi_align_model(cur_feat_map, cur_pos_rois) 173 | cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1) 174 | cur_neg_feats = model.roi_align_model(cur_feat_map, cur_neg_rois) 175 | cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1) 176 | 177 | if sidx == 0: 178 | pos_feats = [cur_pos_feats] 179 | neg_feats = [cur_neg_feats] 180 | else: 181 | pos_feats.append(cur_pos_feats) 182 | neg_feats.append(cur_neg_feats) 183 | feat_dim = cur_neg_feats.size(1) 184 | pos_feats = torch.stack(pos_feats,dim=0).view(-1,feat_dim) 185 | neg_feats = torch.stack(neg_feats,dim=0).view(-1,feat_dim) 186 | except: 187 | #print "______except2_______" 188 | continue 189 | 190 | 191 | pos_score = model(pos_feats, k, in_layer='fc4') 192 | neg_score = model(neg_feats, k, in_layer='fc4') 193 | 194 | cls_loss = binaryCriterion(pos_score, neg_score) 195 | 196 | ## inter frame classification 197 | 198 | interclass_label = Variable(torch.zeros((pos_score.size(0))).long()) 199 | if opts['use_gpu']: 200 | interclass_label = interclass_label.cuda() 201 | total_interclass_score = pos_score[:,1].contiguous() 202 | total_interclass_score = total_interclass_score.view((pos_score.size(0),1)) 203 | 204 | K_perm = np.random.permutation(K) 205 | K_perm = K_perm[0:100] 206 | for cidx in K_perm: 207 | if k == cidx: 208 | continue 209 | else: 210 | interclass_score = model(pos_feats, cidx, in_layer='fc4') 211 | total_interclass_score = torch.cat((total_interclass_score,interclass_score[:,1].contiguous().view((interclass_score.size(0),1))),dim=1) 212 | 213 | interclass_loss = interDomainCriterion(total_interclass_score, interclass_label) 214 | totalInterClassLoss[k] = interclass_loss.item() 215 | 216 | (cls_loss+0.1*interclass_loss).backward() 217 | 218 | batch_cur_idx+=1 219 | if (batch_cur_idx%pretrain_opts['seqbatch_size'])==0: 220 | torch.nn.utils.clip_grad_norm(model.parameters(), pretrain_opts['grad_clip']) 221 | optimizer.step() 222 | model.zero_grad() 223 | batch_cur_idx = 0 224 | 225 | ## evaulator 226 | prec[k] = evaluator(pos_score, neg_score) 227 | ## computation latency 228 | toc = time.time() - tic 229 | 230 | print "Cycle %2d, K %2d (%2d), BinLoss %.3f, Prec %.3f, interLoss %.3f, Time %.3f" % \ 231 | (i, j, k, cls_loss.item(), prec[k], totalInterClassLoss[k], toc) 232 | 233 | cur_score = prec.mean() 234 | try: 235 | total_miou = sum(total_iou)/len(total_iou) 236 | except: 237 | total_miou = 0. 238 | print "Mean Precision: %.3f Triple Loss: %.3f Inter Loss: %.3f IoU: %.3f" % (prec.mean(), cur_score, totalInterClassLoss.mean(),total_miou) 239 | if cur_score > best_score: 240 | best_score = cur_score 241 | if pretrain_opts['use_cpu']: 242 | model = model.cpu() 243 | states = {'shared_layers': model.layers.state_dict()} 244 | print "Save model to %s" % pretrain_opts['model_path'] 245 | torch.save(states, pretrain_opts['model_path']) 246 | if pretrain_opts['use_gpu']: 247 | model = model.cuda() 248 | 249 | 250 | if __name__ == "__main__": 251 | 252 | parser = argparse.ArgumentParser() 253 | parser.add_argument("-set_type", default = 'VOT' ) 254 | parser.add_argument("-padding_ratio", default = 5., type =float) 255 | parser.add_argument("-model_path", default ="./models/rt_mdnet.pth", help = "model path") 256 | parser.add_argument("-frame_interval", default = 2, type=int, help="frame interval in batch. ex) interval=1 -> [1 2 3 4 5], interval=2 ->[1 3 5]") 257 | parser.add_argument("-init_model_path", default="./models/imagenet-vgg-m.mat") 258 | parser.add_argument("-batch_frames", default = 8, type = int) 259 | parser.add_argument("-lr", default=0.0001, type = float) 260 | parser.add_argument("-batch_pos",default = 64, type = int) 261 | parser.add_argument("-batch_neg", default = 196, type = int) 262 | parser.add_argument("-n_cycles", default = 1000, type = int ) 263 | parser.add_argument("-adaptive_align", default = True, action = 'store_false') 264 | parser.add_argument("-seqbatch_size", default=50, type=int) 265 | 266 | args = parser.parse_args() 267 | 268 | ################################################################################## 269 | #########################Just modify opts in this script.######################### 270 | ######################Becuase of synchronization of options####################### 271 | ################################################################################## 272 | ##option setting 273 | pretrain_opts['set_type'] = args.set_type 274 | pretrain_opts['padding_ratio']=args.padding_ratio 275 | pretrain_opts['padded_img_size']=pretrain_opts['img_size']*int(pretrain_opts['padding_ratio']) 276 | pretrain_opts['model_path']=args.model_path 277 | pretrain_opts['frame_interval'] = args.frame_interval 278 | pretrain_opts['init_model_path'] = args.init_model_path 279 | pretrain_opts['batch_frames'] = args.batch_frames 280 | pretrain_opts['lr'] = args.lr 281 | pretrain_opts['batch_pos'] = args.batch_pos # original = 64 282 | pretrain_opts['batch_neg'] = args.batch_neg # original = 192 283 | pretrain_opts['n_cycles'] = args.n_cycles 284 | pretrain_opts['adaptive_align']=False 285 | pretrain_opts['seqbatch_size'] = args.seqbatch_size 286 | pretrain_opts['use_gpu'] = True 287 | pretrain_opts['use_cpu'] = False 288 | ################################################################################## 289 | ############################Do not modify opts anymore.########################### 290 | ######################Becuase of synchronization of options####################### 291 | ################################################################################## 292 | 293 | print pretrain_opts 294 | train_mdnet() 295 | 296 | -------------------------------------------------------------------------------- /vot.py: -------------------------------------------------------------------------------- 1 | """ 2 | \file vot.py 3 | 4 | @brief Python utility functions for VOT integration 5 | 6 | @author Luka Cehovin, Alessio Dore 7 | 8 | @date 2016 9 | 10 | """ 11 | 12 | import sys 13 | import copy 14 | import collections 15 | 16 | try: 17 | import trax 18 | except ImportError: 19 | raise Exception('TraX support not found. Please add trax module to Python path.') 20 | 21 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height']) 22 | Point = collections.namedtuple('Point', ['x', 'y']) 23 | Polygon = collections.namedtuple('Polygon', ['points']) 24 | 25 | class VOT(object): 26 | """ Base class for Python VOT integration """ 27 | def __init__(self, region_format, channels=None): 28 | """ Constructor 29 | 30 | Args: 31 | region_format: Region format options 32 | """ 33 | assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON]) 34 | 35 | if channels is None: 36 | channels = ['color'] 37 | elif channels == 'rgbd': 38 | channels = ['color', 'depth'] 39 | elif channels == 'rgbt': 40 | channels = ['color', 'ir'] 41 | elif channels == 'ir': 42 | channels = ['ir'] 43 | else: 44 | raise Exception('Illegal configuration {}.'.format(channels)) 45 | 46 | self._trax = trax.Server([region_format], [trax.Image.PATH], channels) 47 | 48 | request = self._trax.wait() 49 | assert(request.type == 'initialize') 50 | if isinstance(request.region, trax.Polygon): 51 | self._region = Polygon([Point(x[0], x[1]) for x in request.region]) 52 | else: 53 | self._region = Rectangle(*request.region.bounds()) 54 | self._image = [x.path() for k, x in request.image.items()] 55 | if len(self._image) == 1: 56 | self._image = self._image[0] 57 | 58 | self._trax.status(request.region) 59 | 60 | def region(self): 61 | """ 62 | Send configuration message to the client and receive the initialization 63 | region and the path of the first image 64 | 65 | Returns: 66 | initialization region 67 | """ 68 | 69 | return self._region 70 | 71 | def report(self, region, confidence = None): 72 | """ 73 | Report the tracking results to the client 74 | 75 | Arguments: 76 | region: region for the frame 77 | """ 78 | assert(isinstance(region, Rectangle) or isinstance(region, Polygon)) 79 | if isinstance(region, Polygon): 80 | tregion = trax.Polygon.create([(x.x, x.y) for x in region.points]) 81 | else: 82 | tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height) 83 | properties = {} 84 | if not confidence is None: 85 | properties['confidence'] = confidence 86 | self._trax.status(tregion, properties) 87 | 88 | def frame(self): 89 | """ 90 | Get a frame (image path) from client 91 | 92 | Returns: 93 | absolute path of the image 94 | """ 95 | if hasattr(self, "_image"): 96 | image = self._image 97 | del self._image 98 | return image 99 | 100 | request = self._trax.wait() 101 | 102 | if request.type == 'frame': 103 | image = [x.path() for k, x in request.image.items()] 104 | if len(image) == 1: 105 | return image[0] 106 | return image 107 | else: 108 | return None 109 | 110 | 111 | def quit(self): 112 | if hasattr(self, '_trax'): 113 | self._trax.quit() 114 | 115 | def __del__(self): 116 | self.quit() 117 | 118 | --------------------------------------------------------------------------------