├── README.md
├── Run.py
├── Run.py.bk
├── __init__.py
├── __pycache__
    ├── options.cpython-37.pyc
    ├── python_RLS_RTMDNet.cpython-37.pyc
    ├── tracker.cpython-35.pyc
    ├── tracker.cpython-37.pyc
    └── vot.cpython-37.pyc
├── models
    └── rt-mdnet.pth
├── modules
    ├── __init__.py
    ├── __pycache__
    │   ├── bbreg.cpython-37.pyc
    │   ├── data_prov.cpython-37.pyc
    │   ├── img_cropper.cpython-37.pyc
    │   ├── model.cpython-37.pyc
    │   ├── pretrain_options.cpython-37.pyc
    │   ├── sample_generator.cpython-37.pyc
    │   └── utils.cpython-37.pyc
    ├── bbreg.py
    ├── data_prov.py
    ├── img_cropper.py
    ├── model.py
    ├── prepro_data.py
    ├── prepro_data_imagenet.py
    ├── pretrain_options.py
    ├── roi_align
    │   ├── .setup.py.swp
    │   ├── Makefile
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   └── __init__.cpython-37.pyc
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   └── roi_align
    │   │   │   ├── __init__.py
    │   │   │   ├── __init__.pyc
    │   │   │   └── _roi_align.so
    │   ├── _ext2
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-37.pyc
    │   │   └── roi_align.cpython-37m-x86_64-linux-gnu.so
    │   ├── build.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   └── roi_align.cpython-37.pyc
    │   │   ├── roi_align.py
    │   │   └── roi_align.pyc
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   └── roi_align.cpython-37.pyc
    │   │   ├── roi_align.py
    │   │   └── roi_align.pyc
    │   ├── setup.py
    │   ├── src.bak
    │   │   ├── .roi_align_cuda.cpp.swp
    │   │   ├── cuda
    │   │   │   ├── Makefile
    │   │   │   ├── roi_align.cu.o
    │   │   │   ├── roi_align_kernel.cu
    │   │   │   └── roi_align_kernel.h
    │   │   ├── roi_align_cuda.c
    │   │   ├── roi_align_cuda.cpp
    │   │   └── roi_align_cuda.hpp
    │   ├── src.bak2
    │   │   ├── .roi_align_cuda.cpp.swp
    │   │   ├── cuda
    │   │   │   ├── Makefile
    │   │   │   ├── roi_align.cu.o
    │   │   │   ├── roi_align_kernel.cu
    │   │   │   └── roi_align_kernel.h
    │   │   ├── roi_align_cuda.c
    │   │   ├── roi_align_cuda.cpp
    │   │   └── roi_align_cuda.hpp
    │   └── src
    │   │   ├── cuda
    │   │       ├── Makefile
    │   │       ├── roi_align.cu.o
    │   │       ├── roi_align_kernel.cu
    │   │       └── roi_align_kernel.h
    │   │   ├── roi_align_cuda.c
    │   │   ├── roi_align_cuda.cpp
    │   │   └── roi_align_cuda.hpp
    ├── sample_generator.py
    └── utils.py
├── options.py
├── python_RLS_RTMDNet.py
├── python_RLS_RTMDNet_bk.py
├── tracker.py
├── train_mrcnn.py
└── vot.py


/README.md:
--------------------------------------------------------------------------------
 1 | ## RLS-RTMDNet
 2 | Code and raw result files of our CVPR2020 oral paper "[Recursive Least-Squares Estimator-Aided Online Learning for Visual Tracking](https://openaccess.thecvf.com/content_CVPR_2020/html/Gao_Recursive_Least-Squares_Estimator-Aided_Online_Learning_for_Visual_Tracking_CVPR_2020_paper.html)"
 3 | 
 4 | Created by [Jin Gao](http://www.nlpr.ia.ac.cn/users/gaojin/)
 5 | 
 6 | ### Introduction
 7 | RLS-RTMDNet is dedicated to improving online tracking part of RT-MDNet ([project page](http://cvlab.postech.ac.kr/~chey0313/real_time_mdnet/) and [paper](https://arxiv.org/pdf/1808.08834.pdf)) based on our proposed recursive least-squares estimator-aided online learning method.
 8 | 
 9 | ### Citation
10 | If you're using this code in a publication, please cite our paper.
11 | 
12 | 	@InProceedings{Gao_2020_CVPR,
13 |    	author = {Gao, Jin and Hu, Weiming and Lu, Yan},
14 |     	title = {Recursive Least-squares Estimator-aided Online Learning for Visual Tracking},
15 |     	booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
16 |     	month = {June},
17 |     	year = {2020}
18 |   	}
19 |   
20 | 
21 | ### System Requirements
22 | 
23 | This code is tested on 64 bit Linux (Ubuntu 16.04 LTS) with the following Anaconda environment:
24 | >> * PyTorch (= 1.2.0)
25 | >> * Python (= 3.7.4)
26 |   
27 | ### Online Tracking
28 | 
29 | **Pretrained Model**
30 | >> * The off-the-shelf pretrained model in RT-MDNet is used for our testing: [RT-MDNet-ImageNet-pretrained](https://www.dropbox.com/s/lr8uft05zlo21an/rt-mdnet.pth?dl=0).
31 | 
32 | **Demo**
33 | >> * 'Run.py' for OTB and UAV123
34 | >> * 'python_RLS_RTMDNet.py' for VOT16/17.
35 |   
36 | 


--------------------------------------------------------------------------------
/Run.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from os.path import join, isdir
  3 | from tracker import *
  4 | import numpy as np
  5 | 
  6 | import argparse
  7 | 
  8 | import pickle
  9 | 
 10 | import math
 11 | import warnings
 12 | warnings.filterwarnings('ignore')
 13 | torch.cuda.set_device(1)
 14 | 
 15 | def genConfig(seq_path, set_type):
 16 | 
 17 |     path, seqname = os.path.split(seq_path)
 18 | 
 19 | 
 20 |     if set_type == 'OTB100':
 21 |         ############################################  have to refine #############################################
 22 | 
 23 |         img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg'])
 24 | 
 25 |         if (seqname == 'Jogging_1') or (seqname == 'Skating2_1'):
 26 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt')
 27 |         elif (seqname == 'Jogging_2') or (seqname == 'Skating2_2'):
 28 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt')
 29 |         elif seqname =='Human4':
 30 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',')
 31 |         elif (seqname == 'BlurBody')  or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \
 32 |                 or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \
 33 |                 or (seqname == 'Box')   or (seqname == 'Car4')  or (seqname == 'CarScale') or (seqname == 'ClifBar') \
 34 |                 or (seqname == 'Couple')  or (seqname == 'Crossing')  or (seqname == 'Dog') or (seqname == 'FaceOcc1') \
 35 |                 or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \
 36 |                 or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \
 37 |                 or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman')   :
 38 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt')
 39 |         else:
 40 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
 41 | 
 42 |         if seqname == 'David':
 43 |             img_list = img_list[299:]
 44 |           
 45 |         if seqname == 'Football1':
 46 |             img_list = img_list[0:74]
 47 |         if seqname == 'Freeman3':
 48 |             img_list = img_list[0:460]
 49 |         if seqname == 'Freeman4':
 50 |             img_list = img_list[0:283]
 51 |         if seqname == 'Diving':
 52 |             img_list = img_list[0:215]
 53 | 
 54 |     elif set_type == 'UAV123':
 55 |         img_list = sorted([seq_path + '/' + p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg'])
 56 | 
 57 |         gt = np.loadtxt(seq_path + '/anno/UAV123/' + seqname + '.txt', delimiter=',')
 58 |             
 59 | 
 60 |         if seqname == 'bird1_1':
 61 |             img_list = img_list[0:253]
 62 |         if seqname == 'bird1_2':
 63 |             img_list = img_list[774:1477]
 64 |         if seqname == 'bird1_3':
 65 |             img_list = img_list[1572:2437]
 66 | 
 67 |         if seqname == 'car1_1':
 68 |             img_list = img_list[0:751]
 69 |         if seqname == 'car1_2':
 70 |             img_list = img_list[750:1627]
 71 |         if seqname == 'car1_3':
 72 |             img_list = img_list[1626:2629]
 73 | 
 74 |         if seqname == 'car6_1':
 75 |             img_list = img_list[0:487]
 76 |         if seqname == 'car6_2':
 77 |             img_list = img_list[486:1807]
 78 |         if seqname == 'car6_3':
 79 |             img_list = img_list[1806:2953]
 80 |         if seqname == 'car6_4':
 81 |             img_list = img_list[2952:3925]
 82 |         if seqname == 'car6_5':
 83 |             img_list = img_list[3924:4861]
 84 |         
 85 |         if seqname == 'car8_1':
 86 |             img_list = img_list[0:1357]
 87 |         if seqname == 'car8_2':
 88 |             img_list = img_list[1356:2575]
 89 | 
 90 |         if seqname == 'car16_1':
 91 |             img_list = img_list[0:415]
 92 |         if seqname == 'car16_2':
 93 |             img_list = img_list[414:1993]
 94 | 
 95 | 
 96 |         if seqname == 'group1_1':
 97 |             img_list = img_list[0:1333]
 98 |         if seqname == 'group1_2':
 99 |             img_list = img_list[1332:2515]
100 |         if seqname == 'group1_3':
101 |             img_list = img_list[2514:3925]
102 |         if seqname == 'group1_4':
103 |             img_list = img_list[3924:4873]
104 | 
105 |         if seqname == 'group2_1':
106 |             img_list = img_list[0:907]
107 |         if seqname == 'group2_2':
108 |             img_list = img_list[906:1771]
109 |         if seqname == 'group2_3':
110 |             img_list = img_list[1770:2683]
111 | 
112 |         if seqname == 'group3_1':
113 |             img_list = img_list[0:1567]
114 |         if seqname == 'group3_2':
115 |             img_list = img_list[1566:2827]
116 |         if seqname == 'group3_3':
117 |             img_list = img_list[2826:4369]
118 |         if seqname == 'group3_4':
119 |             img_list = img_list[4368:5527]
120 | 
121 |         if seqname == 'person2_1':
122 |             img_list = img_list[0:1189]
123 |         if seqname == 'person2_2':
124 |             img_list = img_list[1188:2623]
125 | 
126 |         if seqname == 'person4_1':
127 |             img_list = img_list[0:1501]
128 |         if seqname == 'person4_2':
129 |             img_list = img_list[1500:2743]
130 | 
131 |         if seqname == 'person5_1':
132 |             img_list = img_list[0:877]
133 |         if seqname == 'person5_2':
134 |             img_list = img_list[876:2101]
135 | 
136 |         if seqname == 'person7_1':
137 |             img_list = img_list[0:1249]
138 |         if seqname == 'person7_2':
139 |             img_list = img_list[1248:2065]
140 | 
141 |         if seqname == 'person8_1':
142 |             img_list = img_list[0:1075]
143 |         if seqname == 'person8_2':
144 |             img_list = img_list[1074:1525]
145 | 
146 |         if seqname == 'person12_1':
147 |             img_list = img_list[0:601]
148 |         if seqname == 'person12_2':
149 |             img_list = img_list[600:1621]
150 | 
151 |         if seqname == 'person14_1':
152 |             img_list = img_list[0:847]
153 |         if seqname == 'person14_2':
154 |             img_list = img_list[846:1813]
155 |         if seqname == 'person14_3':
156 |             img_list = img_list[1812:2923]
157 | 
158 |         if seqname == 'person17_1':
159 |             img_list = img_list[0:1501]
160 |         if seqname == 'person17_2':
161 |             img_list = img_list[1500:2347]
162 | 
163 |         if seqname == 'person19_1':
164 |             img_list = img_list[0:1243]
165 |         if seqname == 'person19_2':
166 |             img_list = img_list[1242:2791]
167 |         if seqname == 'person19_3':
168 |             img_list = img_list[2790:4357]
169 | 
170 |         if seqname == 'truck4_1':
171 |             img_list = img_list[0:577]
172 |         if seqname == 'truck4_2':
173 |             img_list = img_list[576:1261]
174 | 
175 |         if seqname == 'uav1_1':
176 |             img_list = img_list[0:1555]
177 |         if seqname == 'uav1_2':
178 |             img_list = img_list[1554:2377]
179 |         if seqname == 'uav1_3':
180 |             img_list = img_list[2472:3469]
181 | 
182 |         if seqname == 'truck2':
183 |             img_list = img_list[0:385]
184 | 
185 |         ##polygon to rect
186 |     if gt.shape[1] == 8:
187 |         x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
188 |         y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
189 |         x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
190 |         y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
191 |         gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1)
192 | 
193 |     return img_list, gt
194 | 
195 | 
196 | if __name__ == "__main__":
197 | 
198 |     parser = argparse.ArgumentParser()
199 |     parser.add_argument("-set_type", default = 'OTB100' )
200 |     parser.add_argument("-model_path", default = './models/rt-mdnet.pth')
201 |     parser.add_argument("-result_path", default = './result.npy')
202 |     parser.add_argument("-visual_log",default=False, action= 'store_true')
203 |     parser.add_argument("-visualize",default=False, action='store_true')
204 |     parser.add_argument("-adaptive_align",default=True, action='store_false')
205 |     parser.add_argument("-padding",default=1.2, type = float)
206 |     parser.add_argument("-jitter",default=True, action='store_false')
207 | 
208 |     args = parser.parse_args()
209 | 
210 |     ##################################################################################
211 |     #########################Just modify opts in this script.#########################
212 |     ######################Becuase of synchronization of options#######################
213 |     ##################################################################################
214 |     ## option setting
215 |     opts['model_path']=args.model_path
216 |     opts['result_path']=args.result_path
217 |     opts['visual_log']=args.visual_log
218 |     opts['set_type']=args.set_type
219 |     opts['visualize'] = args.visualize
220 |     opts['adaptive_align'] = args.adaptive_align
221 |     opts['padding'] = args.padding
222 |     opts['jitter'] = args.jitter
223 |     ##################################################################################
224 |     ############################Do not modify opts anymore.###########################
225 |     ######################Becuase of synchronization of options#######################
226 |     ##################################################################################
227 |     print (opts)
228 | 
229 | 
230 |     ## path initialization
231 |     dataset_path = '/home/jgao/Recent/'
232 | 
233 | 
234 |     seq_home = dataset_path + opts['set_type']
235 |     seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))]
236 |     mIoU_max = 0.0
237 |     mIoU_min = 1.0
238 |     mIoU_avg = 0.0
239 |     res_list = []
240 |     for iterloop in range(50):
241 |         iou_list=[]
242 |         fps_list=dict()
243 |         bb_result = dict()
244 |         result = dict()
245 | 
246 |         iou_list_nobb=[]
247 |         bb_result_nobb = dict()
248 |         for num,seq in enumerate(seq_list):
249 |             if num<-1:
250 |                 continue
251 |             seq_path = seq_home + '/' + seq
252 |             img_list,gt=genConfig(seq_path,opts['set_type'])
253 | 
254 |             if os.path.exists(opts['result_path']+str(iterloop)+'replay.npy'):
255 |                 resultdic = np.load(opts['result_path']+str(iterloop)+'replay.npy', allow_pickle=True)
256 |                 resultdic = resultdic.tolist()
257 |                 result_bb = resultdic['bb_result'][seq]
258 |                 fps = resultdic['fps'][seq]
259 |                 result_nobb = resultdic['bb_result_nobb'][seq]
260 |                 iou_result = np.zeros((len(img_list), 1))
261 |                 for i in range(1, len(img_list)):
262 |                     iou_result[i] = overlap_ratio(gt[i], result_bb[i])[0]
263 |             else:
264 |                 iou_result, result_bb, fps, result_nobb = run_mdnet(img_list, gt[0], gt, seq = seq, display=opts['visualize'])
265 | 
266 |             enable_frameNum = 0.
267 |             for iidx in range(len(iou_result)):
268 |                 if (math.isnan(iou_result[iidx])==False):
269 |                     enable_frameNum += 1.
270 |                 else:
271 |                     ## gt is not alowed
272 |                     iou_result[iidx] = 0.
273 | 
274 |             iou_list.append(iou_result.sum()/enable_frameNum)
275 |             bb_result[seq] = result_bb
276 |             fps_list[seq]=fps
277 | 
278 |             bb_result_nobb[seq] = result_nobb
279 |             print ('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list)))
280 | 
281 |         res_list.append(sum(iou_list) / len(iou_list))
282 |         mIoU_avg += sum(iou_list)/len(iou_list)
283 |         if mIoU_max < sum(iou_list)/len(iou_list):
284 |             mIoU_max = sum(iou_list)/len(iou_list)
285 |         if mIoU_min > sum(iou_list)/len(iou_list):
286 |             mIoU_min = sum(iou_list)/len(iou_list)
287 |         result['bb_result']=bb_result
288 |         result['fps']=fps_list
289 |         result['bb_result_nobb']=bb_result_nobb
290 |         np.save(opts['result_path'] + str(iterloop) + 'replay', result)
291 |         print (mIoU_max)
292 |         print (mIoU_min)
293 |         print (res_list)
294 |         #np.save(opts['result_path']+str(iterloop),result)
295 |     mIoU_avg /= 50
296 |     print (mIoU_max)
297 |     print (mIoU_avg)
298 |     print (mIoU_min)
299 |     print (res_list)
300 | 


--------------------------------------------------------------------------------
/Run.py.bk:
--------------------------------------------------------------------------------
  1 | import os
  2 | from os.path import join, isdir
  3 | from tracker import *
  4 | import numpy as np
  5 | 
  6 | import argparse
  7 | 
  8 | from scipy import io
  9 | 
 10 | import pickle
 11 | 
 12 | import math
 13 | import warnings
 14 | warnings.filterwarnings('ignore')
 15 | torch.cuda.set_device(2)
 16 | 
 17 | def genConfig(seq_path, set_type):
 18 | 
 19 |     path, seqname = os.path.split(seq_path)
 20 | 
 21 | 
 22 |     if set_type == 'OTB100':
 23 |         ############################################  have to refine #############################################
 24 | 
 25 |         img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg'])
 26 | 
 27 |         if (seqname == 'Jogging_1') or (seqname == 'Skating2_1'):
 28 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt')
 29 |         elif (seqname == 'Jogging_2') or (seqname == 'Skating2_2'):
 30 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt')
 31 |         elif seqname =='Human4':
 32 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',')
 33 |         elif (seqname == 'BlurBody')  or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \
 34 |                 or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \
 35 |                 or (seqname == 'Box')   or (seqname == 'Car4')  or (seqname == 'CarScale') or (seqname == 'ClifBar') \
 36 |                 or (seqname == 'Couple')  or (seqname == 'Crossing')  or (seqname == 'Dog') or (seqname == 'FaceOcc1') \
 37 |                 or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \
 38 |                 or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \
 39 |                 or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman')   :
 40 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt')
 41 |         else:
 42 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
 43 | 
 44 |         if seqname == 'David':
 45 |             img_list = img_list[299:]
 46 |           
 47 |         if seqname == 'Football1':
 48 |             img_list = img_list[0:74]
 49 |         if seqname == 'Freeman3':
 50 |             img_list = img_list[0:460]
 51 |         if seqname == 'Freeman4':
 52 |             img_list = img_list[0:283]
 53 |         if seqname == 'Diving':
 54 |             img_list = img_list[0:215]
 55 | 
 56 |     elif set_type == 'UAV123':
 57 |         img_list = sorted([seq_path + '/' + p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg'])
 58 | 
 59 |         gt = np.loadtxt(seq_path + '/anno/UAV123/' + seqname + '.txt', delimiter=',')
 60 |             
 61 | 
 62 |         if seqname == 'bird1_1':
 63 |             img_list = img_list[0:253]
 64 |         if seqname == 'bird1_2':
 65 |             img_list = img_list[774:1477]
 66 |         if seqname == 'bird1_3':
 67 |             img_list = img_list[1572:2437]
 68 | 
 69 |         if seqname == 'car1_1':
 70 |             img_list = img_list[0:751]
 71 |         if seqname == 'car1_2':
 72 |             img_list = img_list[750:1627]
 73 |         if seqname == 'car1_3':
 74 |             img_list = img_list[1626:2629]
 75 | 
 76 |         if seqname == 'car6_1':
 77 |             img_list = img_list[0:487]
 78 |         if seqname == 'car6_2':
 79 |             img_list = img_list[486:1807]
 80 |         if seqname == 'car6_3':
 81 |             img_list = img_list[1806:2953]
 82 |         if seqname == 'car6_4':
 83 |             img_list = img_list[2952:3925]
 84 |         if seqname == 'car6_5':
 85 |             img_list = img_list[3924:4861]
 86 |         
 87 |         if seqname == 'car8_1':
 88 |             img_list = img_list[0:1357]
 89 |         if seqname == 'car8_2':
 90 |             img_list = img_list[1356:2575]
 91 | 
 92 |         if seqname == 'car16_1':
 93 |             img_list = img_list[0:415]
 94 |         if seqname == 'car16_2':
 95 |             img_list = img_list[414:1993]
 96 | 
 97 | 
 98 |         if seqname == 'group1_1':
 99 |             img_list = img_list[0:1333]
100 |         if seqname == 'group1_2':
101 |             img_list = img_list[1332:2515]
102 |         if seqname == 'group1_3':
103 |             img_list = img_list[2514:3925]
104 |         if seqname == 'group1_4':
105 |             img_list = img_list[3924:4873]
106 | 
107 |         if seqname == 'group2_1':
108 |             img_list = img_list[0:907]
109 |         if seqname == 'group2_2':
110 |             img_list = img_list[906:1771]
111 |         if seqname == 'group2_3':
112 |             img_list = img_list[1770:2683]
113 | 
114 |         if seqname == 'group3_1':
115 |             img_list = img_list[0:1567]
116 |         if seqname == 'group3_2':
117 |             img_list = img_list[1566:2827]
118 |         if seqname == 'group3_3':
119 |             img_list = img_list[2826:4369]
120 |         if seqname == 'group3_4':
121 |             img_list = img_list[4368:5527]
122 | 
123 |         if seqname == 'person2_1':
124 |             img_list = img_list[0:1189]
125 |         if seqname == 'person2_2':
126 |             img_list = img_list[1188:2623]
127 | 
128 |         if seqname == 'person4_1':
129 |             img_list = img_list[0:1501]
130 |         if seqname == 'person4_2':
131 |             img_list = img_list[1500:2743]
132 | 
133 |         if seqname == 'person5_1':
134 |             img_list = img_list[0:877]
135 |         if seqname == 'person5_2':
136 |             img_list = img_list[876:2101]
137 | 
138 |         if seqname == 'person7_1':
139 |             img_list = img_list[0:1249]
140 |         if seqname == 'person7_2':
141 |             img_list = img_list[1248:2065]
142 | 
143 |         if seqname == 'person8_1':
144 |             img_list = img_list[0:1075]
145 |         if seqname == 'person8_2':
146 |             img_list = img_list[1074:1525]
147 | 
148 |         if seqname == 'person12_1':
149 |             img_list = img_list[0:601]
150 |         if seqname == 'person12_2':
151 |             img_list = img_list[600:1621]
152 | 
153 |         if seqname == 'person14_1':
154 |             img_list = img_list[0:847]
155 |         if seqname == 'person14_2':
156 |             img_list = img_list[846:1813]
157 |         if seqname == 'person14_3':
158 |             img_list = img_list[1812:2923]
159 | 
160 |         if seqname == 'person17_1':
161 |             img_list = img_list[0:1501]
162 |         if seqname == 'person17_2':
163 |             img_list = img_list[1500:2347]
164 | 
165 |         if seqname == 'person19_1':
166 |             img_list = img_list[0:1243]
167 |         if seqname == 'person19_2':
168 |             img_list = img_list[1242:2791]
169 |         if seqname == 'person19_3':
170 |             img_list = img_list[2790:4357]
171 | 
172 |         if seqname == 'truck4_1':
173 |             img_list = img_list[0:577]
174 |         if seqname == 'truck4_2':
175 |             img_list = img_list[576:1261]
176 | 
177 |         if seqname == 'uav1_1':
178 |             img_list = img_list[0:1555]
179 |         if seqname == 'uav1_2':
180 |             img_list = img_list[1554:2377]
181 |         if seqname == 'uav1_3':
182 |             img_list = img_list[2472:3469]
183 | 
184 |         if seqname == 'truck2':
185 |             img_list = img_list[0:385]
186 | 
187 |         ##polygon to rect
188 |     if gt.shape[1] == 8:
189 |         x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
190 |         y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
191 |         x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
192 |         y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
193 |         gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1)
194 | 
195 |     return img_list, gt
196 | 
197 | 
198 | if __name__ == "__main__":
199 | 
200 |     parser = argparse.ArgumentParser()
201 |     parser.add_argument("-set_type", default = 'OTB100' )
202 |     parser.add_argument("-model_path", default = './models/rt-mdnet.pth')
203 |     parser.add_argument("-result_path", default = './result.npy')
204 |     parser.add_argument("-visual_log",default=False, action= 'store_true')
205 |     parser.add_argument("-visualize",default=False, action='store_true')
206 |     parser.add_argument("-adaptive_align",default=True, action='store_false')
207 |     parser.add_argument("-padding",default=1.2, type = float)
208 |     parser.add_argument("-jitter",default=True, action='store_false')
209 | 
210 |     args = parser.parse_args()
211 | 
212 |     ##################################################################################
213 |     #########################Just modify opts in this script.#########################
214 |     ######################Becuase of synchronization of options#######################
215 |     ##################################################################################
216 |     ## option setting
217 |     opts['model_path']=args.model_path
218 |     opts['result_path']=args.result_path
219 |     opts['visual_log']=args.visual_log
220 |     opts['set_type']=args.set_type
221 |     opts['visualize'] = args.visualize
222 |     opts['adaptive_align'] = args.adaptive_align
223 |     opts['padding'] = args.padding
224 |     opts['jitter'] = args.jitter
225 |     ##################################################################################
226 |     ############################Do not modify opts anymore.###########################
227 |     ######################Becuase of synchronization of options#######################
228 |     ##################################################################################
229 |     print (opts)
230 | 
231 | 
232 |     ## path initialization
233 |     dataset_path = '/home/jgao/Recent/'
234 | 
235 | 
236 |     seq_home = dataset_path + opts['set_type']
237 |     seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))]
238 |     mIoU_max = 0.0
239 |     mIoU_min = 1.0
240 |     mIoU_avg = 0.0
241 |     res_list = []
242 |     for iterloop in range(50):
243 |         iou_list=[]
244 |         fps_list=dict()
245 |         bb_result = dict()
246 |         result = dict()
247 | 
248 |         iou_list_nobb=[]
249 |         bb_result_nobb = dict()
250 |         for num,seq in enumerate(seq_list):
251 |             if num<-1:
252 |                 continue
253 |             seq_path = seq_home + '/' + seq
254 |             img_list,gt=genConfig(seq_path,opts['set_type'])
255 |             res = {}
256 |             res['results'] = []
257 |             for innerloop in range(50):
258 |                 if innerloop == iterloop:
259 |                     if os.path.exists(opts['result_path']+str(innerloop)+'replay.npy'):
260 |                         resultdic = np.load(opts['result_path']+str(iterloop)+'replay.npy', allow_pickle=True)
261 |                         resultdic = resultdic.tolist()
262 |                         result_bb = resultdic['bb_result'][seq]
263 |                         fps = resultdic['fps'][seq]
264 |                         result_nobb = resultdic['bb_result_nobb'][seq]
265 |                         iou_result = np.zeros((len(img_list), 1))
266 |                         for i in range(1, len(img_list)):
267 |                             iou_result[i] = overlap_ratio(gt[i], result_bb[i])[0]
268 |                     else:
269 |                         iou_result, result_bb, fps, result_nobb = run_mdnet(img_list, gt[0], gt, seq = seq, display=opts['visualize'])
270 | 
271 |                     enable_frameNum = 0.
272 |                     for iidx in range(len(iou_result)):
273 |                         if (math.isnan(iou_result[iidx])==False):
274 |                             enable_frameNum += 1.
275 |                         else:
276 |                             ## gt is not alowed
277 |                             iou_result[iidx] = 0.
278 | 
279 |                     iou_list.append(iou_result.sum()/enable_frameNum)
280 |                     bb_result[seq] = result_bb
281 |                     fps_list[seq]=fps
282 | 
283 |                     bb_result_nobb[seq] = result_nobb
284 |                     print ('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list)))
285 | 
286 |                     res['results'].append({'res': result_bb.round().tolist(), 'type': 'rect', 'len': len(result_bb)})
287 |                 else:
288 |                     resultdic = np.load(opts['result_path']+str(innerloop)+'replay.npy', allow_pickle=True)
289 |                     resultdic = resultdic.tolist()
290 |                     result_bb = resultdic['bb_result'][seq]
291 |                     res['results'].append({'res': result_bb.round().tolist(), 'type': 'rect', 'len': len(result_bb)})
292 |             io.savemat('./' + seq + '_RLS_RTMDNet.mat', res)
293 |         res_list.append(sum(iou_list) / len(iou_list))
294 |         mIoU_avg += sum(iou_list)/len(iou_list)
295 |         if mIoU_max < sum(iou_list)/len(iou_list):
296 |             mIoU_max = sum(iou_list)/len(iou_list)
297 |         if mIoU_min > sum(iou_list)/len(iou_list):
298 |             mIoU_min = sum(iou_list)/len(iou_list)
299 |         result['bb_result']=bb_result
300 |         result['fps']=fps_list
301 |         result['bb_result_nobb']=bb_result_nobb
302 |         np.save(opts['result_path'] + str(iterloop) + 'replay', result)
303 |         print (mIoU_max)
304 |         print (mIoU_min)
305 |         print (res_list)
306 |         #np.save(opts['result_path']+str(iterloop),result)
307 |     mIoU_avg /= 50
308 |     print (mIoU_max)
309 |     print (mIoU_avg)
310 |     print (mIoU_min)
311 |     print (res_list)
312 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__init__.py


--------------------------------------------------------------------------------
/__pycache__/options.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/options.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/python_RLS_RTMDNet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/python_RLS_RTMDNet.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/tracker.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/tracker.cpython-35.pyc


--------------------------------------------------------------------------------
/__pycache__/tracker.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/tracker.cpython-37.pyc


--------------------------------------------------------------------------------
/__pycache__/vot.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/__pycache__/vot.cpython-37.pyc


--------------------------------------------------------------------------------
/models/rt-mdnet.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/models/rt-mdnet.pth


--------------------------------------------------------------------------------
/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__init__.py


--------------------------------------------------------------------------------
/modules/__pycache__/bbreg.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/bbreg.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/__pycache__/data_prov.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/data_prov.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/__pycache__/img_cropper.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/img_cropper.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/model.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/__pycache__/pretrain_options.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/pretrain_options.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/__pycache__/sample_generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/sample_generator.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/bbreg.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from sklearn.linear_model import Ridge
 3 | import numpy as np
 4 | 
 5 | from utils import *
 6 | 
 7 | class BBRegressor():
 8 |     def __init__(self, img_size, alpha=1000, overlap=[0.6, 1], scale=[1, 2]):
 9 |         self.img_size = img_size
10 |         self.alpha = alpha
11 |         self.overlap_range = overlap
12 |         self.scale_range = scale
13 |         self.model = Ridge(alpha=self.alpha)
14 | 
15 |     def train(self, X, bbox, gt):
16 |         X = X.cpu().numpy()
17 |         bbox = np.copy(bbox)
18 |         gt = np.copy(gt)
19 |         
20 |         if gt.ndim==1:
21 |             gt = gt[None,:]
22 | 
23 |         r = overlap_ratio(bbox, gt)
24 |         s = np.prod(bbox[:,2:], axis=1) / np.prod(gt[0,2:])
25 |         idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \
26 |               (s >= self.scale_range[0]) * (s <= self.scale_range[1])
27 | 
28 |         X = X[idx]
29 |         bbox = bbox[idx]
30 | 
31 |         Y = self.get_examples(bbox, gt)
32 |         
33 |         self.model.fit(X, Y)
34 | 
35 |     def predict(self, X, bbox):
36 |         X = X.cpu().numpy()
37 |         bbox_ = np.copy(bbox)
38 | 
39 |         Y = self.model.predict(X)
40 |     
41 |         bbox_[:,:2] = bbox_[:,:2] + bbox_[:,2:]/2
42 |         bbox_[:,:2] = Y[:,:2] * bbox_[:,2:] + bbox_[:,:2]
43 |         bbox_[:,2:] = np.exp(Y[:,2:]) * bbox_[:,2:]
44 |         bbox_[:,:2] = bbox_[:,:2] - bbox_[:,2:]/2
45 |         
46 |         r = overlap_ratio(bbox, bbox_)
47 |         s = np.prod(bbox[:,2:], axis=1) / np.prod(bbox_[:,2:], axis=1)
48 |         idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \
49 |               (s >= self.scale_range[0]) * (s <= self.scale_range[1])
50 |         idx = np.logical_not(idx)
51 |         bbox_[idx] = bbox[idx]
52 |  
53 |         bbox_[:,:2] = np.maximum(bbox_[:,:2], 0)
54 |         bbox_[:,2:] = np.minimum(bbox_[:,2:], self.img_size - bbox[:,:2])
55 | 
56 |         return bbox_
57 |     
58 |     def get_examples(self, bbox, gt):
59 |         bbox[:,:2] = bbox[:,:2] + bbox[:,2:]/2
60 |         gt[:,:2] = gt[:,:2] + gt[:,2:]/2
61 | 
62 |         dst_xy = (gt[:,:2] - bbox[:,:2]) / bbox[:,2:]
63 |         dst_wh = np.log(gt[:,2:] / bbox[:,2:])
64 | 
65 |         Y = np.concatenate((dst_xy, dst_wh), axis=1)
66 |         return Y
67 | 
68 | 


--------------------------------------------------------------------------------
/modules/data_prov.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | from PIL import Image
  4 | 
  5 | import torch
  6 | import torch.utils.data as data
  7 | import matplotlib.pyplot as plt
  8 | from utils import *
  9 | 
 10 | import matplotlib.patches as patches
 11 | 
 12 | import os
 13 | from sample_generator import *
 14 | 
 15 | import sys
 16 | from pretrain_options import *
 17 | 
 18 | from img_cropper import *
 19 | 
 20 | 
 21 | 
 22 | class RegionDataset(data.Dataset):
 23 |     def __init__(self, img_dir, img_list, gt, receptive_field, opts):
 24 | 
 25 |         self.img_list = np.array([os.path.join(img_dir, img) for img in img_list])
 26 |         self.gt = gt
 27 | 
 28 |         self.batch_frames = pretrain_opts['batch_frames']
 29 |         self.batch_pos = pretrain_opts['batch_pos']
 30 |         self.batch_neg = pretrain_opts['batch_neg']
 31 | 
 32 |         self.overlap_pos = pretrain_opts['overlap_pos']
 33 |         self.overlap_neg = pretrain_opts['overlap_neg']
 34 | 
 35 | 
 36 |         self.crop_size = pretrain_opts['img_size']
 37 |         self.padding = pretrain_opts['padding']
 38 | 
 39 |         self.index = np.random.permutation(len(self.img_list))
 40 |         self.pointer = 0
 41 | 
 42 |         image = Image.open(self.img_list[0]).convert('RGB')
 43 |         self.scene_generator = SampleGenerator('gaussian', image.size,trans_f=1.5, scale_f=1.2,valid=True)
 44 |         self.pos_generator = SampleGenerator('gaussian', image.size, 0.1, 1.2, 1.1, True)
 45 |         self.neg_generator = SampleGenerator('uniform', image.size, 1, 1.2, 1.1, True)
 46 | 
 47 |         self.receptive_field = receptive_field
 48 | 
 49 |         self.interval = pretrain_opts['frame_interval']
 50 |         self.img_crop_model = imgCropper(pretrain_opts['padded_img_size'])
 51 |         self.img_crop_model.eval()
 52 |         if pretrain_opts['use_gpu']:
 53 |             self.img_crop_model.gpuEnable()
 54 | 
 55 |     def __iter__(self):
 56 |         return self
 57 | 
 58 |     def __next__(self):
 59 | 
 60 |         next_pointer = min(self.pointer + self.batch_frames, len(self.img_list))
 61 |         idx = self.index[self.pointer:next_pointer]
 62 |         if len(idx) < self.batch_frames:
 63 |             self.index = np.random.permutation(len(self.img_list))
 64 |             next_pointer = self.batch_frames - len(idx)
 65 |             idx = np.concatenate((idx, self.index[:next_pointer]))
 66 |         self.pointer = next_pointer
 67 | 
 68 | 
 69 |         n_pos = self.batch_pos
 70 |         n_neg = self.batch_neg
 71 | 
 72 |         scenes = []
 73 |         for i, (img_path, bbox) in enumerate(zip(self.img_list[idx], self.gt[idx])):
 74 |             image = Image.open(img_path).convert('RGB')
 75 |             #plt.figure("test")
 76 |             #plt.imshow(image)
 77 |             #plt.show()
 78 |             #plt.close()
 79 |             image = np.asarray(image)
 80 | 
 81 |             ishape = image.shape
 82 |             pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2, 1.1, False), bbox, n_pos, overlap_range=self.overlap_pos)
 83 |             neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 1.2, 1.1, False), bbox, n_neg, overlap_range=self.overlap_neg)
 84 | 
 85 |             # compute padded sample
 86 |             padded_x1 = (neg_examples[:, 0]-neg_examples[:,2]*(pretrain_opts['padding']-1.)/2.).min()
 87 |             padded_y1 = (neg_examples[:, 1]-neg_examples[:,3]*(pretrain_opts['padding']-1.)/2.).min()
 88 |             padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2]*(pretrain_opts['padding']+1.)/2.).max()
 89 |             padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3]*(pretrain_opts['padding']+1.)/2.).max()
 90 |             padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1))
 91 | 
 92 |             jitter_scale = 1.1 ** np.clip(3.*np.random.randn(1,1),-2,2)
 93 |             crop_img_size = (padded_scene_box[2:4] * ((pretrain_opts['img_size'], pretrain_opts['img_size']) / bbox[2:4])).astype('int64') * jitter_scale[0][0]
 94 |             cropped_image, cur_image_var = self.img_crop_model.crop_image(image, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
 95 |             cropped_image = cropped_image - 128.
 96 |             if pretrain_opts['use_gpu']:
 97 |                 cropped_image = cropped_image.data.cpu()
 98 |                 cur_image_var = cur_image_var.cpu()
 99 |             scenes.append(cropped_image)
100 |             ## get current frame and heatmap
101 | 
102 |             rel_bbox = np.copy(bbox)
103 |             rel_bbox[0:2] -= padded_scene_box[0:2]
104 | 
105 |             jittered_obj_size = jitter_scale[0][0]*float(pretrain_opts['img_size'])
106 | 
107 |             batch_num = np.zeros((pos_examples.shape[0], 1))
108 |             pos_rois = np.copy(pos_examples)
109 |             pos_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), pos_rois.shape[0], axis=0)
110 |             pos_rois = samples2maskroi(pos_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding'])
111 |             pos_rois = np.concatenate((batch_num, pos_rois), axis=1)
112 | 
113 |             batch_num = np.zeros((neg_examples.shape[0], 1))
114 |             neg_rois = np.copy(neg_examples)
115 |             neg_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), neg_rois.shape[0], axis=0)
116 |             neg_rois = samples2maskroi(neg_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding'])
117 |             neg_rois = np.concatenate((batch_num, neg_rois), axis=1)
118 | 
119 |             if i==0:
120 |                 total_pos_rois = [torch.from_numpy(np.copy(pos_rois).astype('float32'))]
121 |                 total_neg_rois = [torch.from_numpy(np.copy(neg_rois).astype('float32'))]
122 |             else:
123 |                 total_pos_rois.append(torch.from_numpy(np.copy(pos_rois).astype('float32')))
124 |                 total_neg_rois.append(torch.from_numpy(np.copy(neg_rois).astype('float32')))
125 | 
126 |         return scenes,total_pos_rois, total_neg_rois
127 | 
128 |     next = __next__
129 | 
130 |     def extract_regions(self, image, samples):
131 |         regions = np.zeros((len(samples), self.crop_size, self.crop_size, 3), dtype='uint8')
132 |         for i, sample in enumerate(samples):
133 |             regions[i] = crop_image(image, sample, self.crop_size, self.padding, True)
134 | 
135 |         regions = regions.transpose(0, 3, 1, 2)
136 |         regions = regions.astype('float32') - 128.
137 |         return regions
138 | 
139 | 
140 | class RegionExtractor():
141 |     def __init__(self, image, samples, crop_size, padding, batch_size, shuffle=False):
142 | 
143 |         self.image = np.asarray(image)
144 |         self.samples = samples
145 |         self.crop_size = crop_size
146 |         self.padding = padding
147 |         self.batch_size = batch_size
148 |         self.shuffle = shuffle
149 | 
150 |         self.index = np.arange(len(samples))
151 |         self.pointer = 0
152 | 
153 |         self.mean = self.image.mean(0).mean(0).astype('float32')
154 | 
155 |     def __iter__(self):
156 |         return self
157 | 
158 |     def __next__(self):
159 |         if self.pointer == len(self.samples):
160 |             self.pointer = 0
161 |             raise StopIteration
162 |         else:
163 |             next_pointer = min(self.pointer + self.batch_size, len(self.samples))
164 |             index = self.index[self.pointer:next_pointer]
165 |             self.pointer = next_pointer
166 | 
167 |             regions = self.extract_regions(index)
168 |             regions = torch.from_numpy(regions)
169 |             return regions
170 |     next = __next__
171 | 
172 |     def extract_regions(self, index):
173 |         regions = np.zeros((len(index),self.crop_size,self.crop_size,3),dtype='uint8')
174 |         for i, sample in enumerate(self.samples[index]):
175 |             regions[i] = crop_image(self.image, sample, self.crop_size, self.padding)
176 | 
177 |         regions = regions.transpose(0,3,1,2).astype('float32')
178 |         regions = regions - 128.
179 |         return regions
180 | 


--------------------------------------------------------------------------------
/modules/img_cropper.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0,'./modules')
 3 | from roi_align.modules.roi_align import RoIAlign
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | import torch
 8 | import numpy as np
 9 | 
10 | import time
11 | 
12 | import matplotlib.pyplot as plt
13 | import matplotlib.patches as patches
14 | 
15 | class imgCropper(nn.Module):
16 |     def __init__(self, img_size):
17 |         super(imgCropper, self).__init__()
18 |         self.isCuda = False
19 |         self.img_size = img_size
20 |         self.roi_align_model = RoIAlign(img_size,img_size, 1. )
21 | 
22 |     def gpuEnable(self):
23 |         self.roi_align_model = self.roi_align_model.cuda()
24 |         self.isCuda = True
25 | 
26 |     def forward(self, image, roi):
27 |         aligned_image_var = self.roi_align_model(image, roi)
28 |         return aligned_image_var
29 | 
30 |     def crop_image(self,image, box, result_size):
31 |         ## constraint = several box from common 1 image
32 |         ishape = image.shape
33 |         cur_image_var = np.reshape(image, (1, ishape[0], ishape[1], ishape[2]))
34 |         cur_image_var = cur_image_var.transpose(0, 3, 1, 2)
35 |         cur_image_var = cur_image_var.astype('float32')
36 |         cur_image_var = Variable(torch.from_numpy(cur_image_var).float())
37 | 
38 | 
39 |         roi = np.copy(box)
40 |         roi[:,2:4] += roi[:,0:2]
41 |         roi = np.concatenate((np.zeros((roi.shape[0], 1)), roi), axis=1)
42 |         roi = Variable(torch.from_numpy(roi).float())
43 | 
44 |         if self.isCuda:
45 |             cur_image_var = cur_image_var.cuda()
46 |             roi = roi.cuda()
47 | 
48 |         self.roi_align_model.aligned_width = result_size[0]
49 |         self.roi_align_model.aligned_height = result_size[1]
50 |         cropped_image = self.forward(cur_image_var, roi)
51 | 
52 |         return cropped_image, cur_image_var
53 | 
54 |     def crop_several_image(self,img_list,target_list):
55 |         ## constraint = one to one matching between image and target
56 |         ## exception handling
57 |         assert(len(target_list) == len(img_list))
58 | 
59 |         ## image crop
60 |         torch.cuda.synchronize()
61 |         start_time = time.time()
62 |         cur_images = torch.squeeze(torch.stack(img_list, 0))
63 |         torch.cuda.synchronize()
64 |         print ('10 image stacking time:{}'.format(time.time() - start_time))
65 | 
66 |         ishape = cur_images.size()
67 | 
68 |         # Extract sample features and get target location
69 |         sample_rois = np.array(target_list)
70 |         sample_rois[:,2:4] += sample_rois[:,0:2]
71 |         batch_num = np.reshape(np.arange(0,len(sample_rois)),(len(sample_rois),1))
72 |         sample_rois = np.concatenate( (batch_num, sample_rois), axis=1)
73 |         sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32')))
74 |         if self.isCuda:
75 |             sample_rois = sample_rois.cuda()
76 |             cur_images = cur_images.cuda()
77 | 
78 |         cropped_images = self.forward(cur_images, sample_rois)
79 | 
80 | 
81 |         return cropped_images
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/modules/model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import scipy.io
  3 | import numpy as np
  4 | from collections import OrderedDict
  5 | 
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch.autograd import Variable
  9 | import torch
 10 | 
 11 | import time
 12 | 
 13 | import sys
 14 | sys.path.insert(0,'./roi_align')
 15 | from roi_align.modules.roi_align import RoIAlignAvg,RoIAlignMax
 16 | 
 17 | def append_params(params, module, prefix):
 18 |     for child in module.children():
 19 |         for k,p in child._parameters.items():
 20 |             if p is None: continue
 21 | 
 22 |             if isinstance(child, nn.BatchNorm2d):
 23 |                 name = prefix + '_bn_' + k
 24 |             else:
 25 |                 name = prefix + '_' + k
 26 | 
 27 |             if name not in params:
 28 |                 params[name] = p
 29 |             else:
 30 |                 raise RuntimeError("Duplicated param name: %s" % (name))
 31 | 
 32 | class LRN(nn.Module):
 33 |     def __init__(self, local_size=1, alpha=0.0001, beta=0.75, ACROSS_CHANNELS=False):
 34 |         super(LRN, self).__init__()
 35 |         self.ACROSS_CHANNELS = ACROSS_CHANNELS
 36 |         if self.ACROSS_CHANNELS:
 37 |             self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1),
 38 |                                         stride=1,
 39 |                                         padding=(int((local_size - 1.0) / 2), 0, 0))
 40 |         else:
 41 |             self.average = nn.AvgPool2d(kernel_size=local_size,
 42 |                                         stride=1,
 43 |                                         padding=int((local_size - 1.0) / 2))
 44 |         self.alpha = alpha
 45 |         self.beta = beta
 46 | 
 47 |     def forward(self, x):
 48 |         if self.ACROSS_CHANNELS:
 49 |             div = x.pow(2).unsqueeze(1)
 50 |             div = self.average(div).squeeze(1)
 51 |             div = div.mul(self.alpha).add(2.0).pow(self.beta)
 52 |         else:
 53 |             div = x.pow(2)
 54 |             div = self.average(div)
 55 |             div = div.mul(self.alpha).add(2.0).pow(self.beta)
 56 |         x = x.div(div)
 57 |         return x
 58 | 
 59 | 
 60 | class MDNet(nn.Module):
 61 |     def __init__(self, model_path=None,K=1):
 62 |         super(MDNet, self).__init__()
 63 |         self.K = K
 64 |         self.layers = nn.Sequential(OrderedDict([
 65 |                 ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2),
 66 |                                         nn.ReLU(),
 67 |                                         LRN(),
 68 |                                         nn.MaxPool2d(kernel_size=3, stride=2)
 69 |                                         )),
 70 |                 ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2,dilation=1),
 71 |                                         nn.ReLU(),
 72 |                                         LRN(),
 73 |                                         )),
 74 | 
 75 |                 ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1,dilation=3),
 76 |                                         nn.ReLU(),
 77 |                                         )),
 78 |                 ('fc4',   nn.Sequential(
 79 |                                         nn.Linear(512 * 3 * 3, 512),
 80 |                                         nn.ReLU())),
 81 |                 ('fc5',   nn.Sequential(nn.Dropout(0.5),
 82 |                                         nn.Linear(512, 512),
 83 |                                         nn.ReLU()))]))
 84 | 
 85 |         self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5),
 86 |                                                      nn.Linear(512, 2)) for _ in range(K)])
 87 | 
 88 |         self.roi_align_model = RoIAlignMax(3, 3, 1. / 8)
 89 | 
 90 |         self.receptive_field = 75.  # it is receptive fieald that a element of feat_map covers. feat_map is bottom layer of ROI_align_layer
 91 | 
 92 |         if model_path is not None:
 93 |             if os.path.splitext(model_path)[1] == '.pth':
 94 |                 self.load_model(model_path)
 95 |             elif os.path.splitext(model_path)[1] == '.mat':
 96 |                 self.load_mat_model(model_path)
 97 |             else:
 98 |                 raise RuntimeError("Unkown model format: %s" % (model_path))
 99 |         self.build_param_dict()
100 | 
101 |     def build_param_dict(self):
102 |         self.params = OrderedDict()
103 |         for name, module in self.layers.named_children():
104 |             append_params(self.params, module, name)
105 |         for k, module in enumerate(self.branches):
106 |             append_params(self.params, module, 'fc6_%d'%(k))
107 | 
108 |     def set_learnable_params(self, layers):
109 |         for k, p in self.params.items():
110 |             if any([k.startswith(l) for l in layers]):
111 |                 p.requires_grad = True
112 |             else:
113 |                 p.requires_grad = False
114 | 
115 | 
116 |     def get_learnable_params(self):
117 |         params = OrderedDict()
118 |         for k, p in self.params.items():
119 |             if p.requires_grad:
120 |                 params[k] = p
121 |         return params
122 | 
123 |     def forward(self, x, k=0, in_layer='conv1', out_layer='fc6'):
124 | 
125 |         run = False
126 |         for name, module in self.layers.named_children():
127 |             if name == in_layer:
128 |                 run = True
129 |             if run:
130 |                 x = module(x)
131 |                 if name == out_layer:
132 |                     return x
133 | 
134 | 
135 |         x = self.branches[k](x)
136 |         if out_layer=='fc6':
137 |             return x
138 |         elif out_layer=='fc6_softmax':
139 |             return F.softmax(x)
140 | 
141 |     def forward_owm(self, x, k=0, in_layer='conv1', out_layer='fc6'):
142 | 
143 |         run = False
144 |         h_list = []
145 |         for name, module in self.layers.named_children():
146 |             if name == in_layer:
147 |                 run = True
148 |             if run:
149 |                 #print x.size(0)
150 |                 #print torch.cat((x.view(x.size(0), -1), torch.ones(1, 1).repeat(x.size(0), 1).cuda()), 1)
151 |                 h_list.append(torch.mean(torch.cat((x.view(x.size(0), -1), torch.ones(1, 1).repeat(x.size(0),1).cuda()), 1), 0, True))
152 |                 x = module(x)
153 |                 if name == out_layer:
154 |                     return x
155 |         h_list.append(torch.mean(torch.cat((x.view(x.size(0), -1), torch.ones(1, 1).repeat(x.size(0), 1).cuda()), 1), 0, True))
156 |         x = self.branches[k](x)
157 |         if out_layer=='fc6':
158 |             return x, h_list
159 |         elif out_layer=='fc6_softmax':
160 |             return F.softmax(x)
161 | 
162 |     def load_model(self, model_path):
163 |         states = torch.load(model_path)
164 |         shared_layers = states['shared_layers']
165 |         self.layers.load_state_dict(shared_layers)
166 | 
167 |     def load_mat_model(self, matfile):
168 |         mat = scipy.io.loadmat(matfile)
169 |         mat_layers = list(mat['layers'])[0]
170 | 
171 |         # copy conv weights
172 |         for i in range(3):
173 |             weight, bias = mat_layers[i*4]['weights'].item()[0]
174 |             self.layers[i][0].weight.data = torch.from_numpy(np.transpose(weight, (3,2,0,1)))
175 |             self.layers[i][0].bias.data = torch.from_numpy(bias[:,0])
176 | 
177 |     def trainSpatialTransform(self, image, bb):
178 | 
179 |         return
180 | 
181 | 
182 | class BinaryLoss(nn.Module):
183 |     def __init__(self):
184 |         super(BinaryLoss, self).__init__()
185 | 
186 |     def forward(self, pos_score, neg_score):
187 |         pos_loss = -F.log_softmax(pos_score)[:,1]
188 |         neg_loss = -F.log_softmax(neg_score)[:,0]
189 | 
190 |         loss = (pos_loss.sum() + neg_loss.sum())/(pos_loss.size(0) + neg_loss.size(0))
191 |         return loss
192 | 
193 | 
194 | class Accuracy():
195 |     def __call__(self, pos_score, neg_score):
196 | 
197 |         pos_correct = (pos_score[:,1] > pos_score[:,0]).sum().float()
198 |         neg_correct = (neg_score[:,1] < neg_score[:,0]).sum().float()
199 | 
200 |         pos_acc = pos_correct / (pos_score.size(0) + 1e-8)
201 |         neg_acc = neg_correct / (neg_score.size(0) + 1e-8)
202 | 
203 |         return pos_acc.item(), neg_acc.item()
204 | 
205 | 
206 | class Precision():
207 |     def __call__(self, pos_score, neg_score):
208 | 
209 |         scores = torch.cat((pos_score[:,1], neg_score[:,1]), 0)
210 |         topk = torch.topk(scores, pos_score.size(0))[1]
211 |         prec = (topk < pos_score.size(0)).float().sum() / (pos_score.size(0)+1e-8)
212 | 
213 |         return prec.item()
214 | 
215 | 
216 | 
217 | 


--------------------------------------------------------------------------------
/modules/prepro_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pickle
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | 
 8 | # seq_home = '../dataset/'
 9 | seqlist_path = '../vot-otb.txt'
10 | output_path = 'data/vot-otb.pkl'
11 | set_type = 'VOT'
12 | seq_home = '/home/ilchae/dataset/tracking/'+set_type +'/'
13 | 
14 | if set_type=='OTB':
15 |     seqlist_path = '../otb-vot15.txt'
16 |     output_path = '../otb-vot15.pkl'
17 | 
18 | if set_type == 'VOT':
19 |     seqlist_path = '../vot-otb.txt'
20 |     output_path = '../vot-otb.pkl'
21 | 
22 | with open(seqlist_path,'r') as fp:
23 |     seq_list = fp.read().splitlines()
24 | 
25 | data = {}
26 | for i,seqname in enumerate(seq_list):
27 |     print(seqname)
28 |     if set_type=='OTB':
29 |         seq_path = seq_home+seqname
30 |         img_list = sorted([p for p in os.listdir(seq_path+'/img') if os.path.splitext(p)[1] == '.jpg'])
31 | 
32 |         if (seqname == 'Jogging') or (seqname == 'Skating2'):
33 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt')
34 |         elif seqname == 'Human4' :
35 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',')
36 |         elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \
37 |                 or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \
38 |                 or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \
39 |                 or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \
40 |                 or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \
41 |                 or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \
42 |                 or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') :
43 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt')
44 |         elif (seqname == 'Diving'):
45 |             gt = np.loadtxt(seq_path + '/groundtruth_rect_ilchae.txt', delimiter=',')
46 |         else:
47 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
48 | 
49 |         if (seqname == 'David') or (seqname == 'Football1') or (seqname == 'Freeman3') or (seqname == 'Freeman4'):
50 |             continue
51 | 
52 |     if set_type =='VOT':
53 |         img_list = sorted([p for p in os.listdir(seq_home + seqname) if os.path.splitext(p)[1] == '.jpg'])
54 |         gt = np.loadtxt(seq_home + seqname + '/groundtruth.txt', delimiter=',')
55 | 
56 |     if set_type == 'IMAGENET':
57 |         img_list = []
58 |         gt = []
59 | 
60 |     assert len(img_list) == len(gt), "Lengths do not match!!"
61 | 
62 |     if gt.shape[1]==8:
63 |         x_min = np.min(gt[:,[0,2,4,6]],axis=1)[:,None]
64 |         y_min = np.min(gt[:,[1,3,5,7]],axis=1)[:,None]
65 |         x_max = np.max(gt[:,[0,2,4,6]],axis=1)[:,None]
66 |         y_max = np.max(gt[:,[1,3,5,7]],axis=1)[:,None]
67 |         gt = np.concatenate((x_min, y_min, x_max-x_min, y_max-y_min),axis=1)
68 | 
69 |     data[seqname] = {'images':img_list, 'gt':gt}
70 | 
71 | with open(output_path, 'wb') as fp:
72 |     pickle.dump(data, fp, -1)
73 | 


--------------------------------------------------------------------------------
/modules/prepro_data_imagenet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pickle
 4 | from collections import OrderedDict
 5 | 
 6 | import xml.etree.ElementTree
 7 | import xmltodict
 8 | import numpy as np
 9 | 
10 | import  matplotlib.pyplot as plt
11 | import matplotlib.patches as patches
12 | from PIL import Image
13 | import time
14 | 
15 | output_path = './imagenet_refine.pkl'
16 | 
17 | 
18 | 
19 | seq_home = '/mnt/jgao/jgao/ILSVRC2015/'
20 | train_list = [p for p in os.listdir(seq_home + 'Data/VID/train')]
21 | seq_list = []
22 | for num, cur_dir in enumerate(train_list):
23 |     seq_list += [cur_dir + '/' + p for p in os.listdir(seq_home + 'Data/VID/train/' + cur_dir)]
24 | 
25 | fig = plt.figure()
26 | ax = fig.add_subplot(1,1,1)
27 | 
28 | data = {}
29 | completeNum = 0
30 | for i,seqname in enumerate(seq_list):
31 |     print(seqname)
32 |     seq_path = seq_home + 'Data/VID/train/' + seqname
33 |     gt_path = seq_home +'Annotations/VID/train/' + seqname
34 |     if os.path.isdir(seq_path):
35 |         img_list = sorted([p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.JPEG'])
36 |     else:
37 |        continue
38 |     # gt = np.zeros((len(img_list),4))
39 |     enable_gt = []
40 |     enable_img_list = []
41 |     gt_list = sorted([gt_path + '/' + p for p in os.listdir(gt_path) if os.path.splitext(p)[1] == '.xml'])
42 |     save_enable = True
43 |     for gidx in range(0,len(img_list)):
44 |         with open(gt_list[gidx]) as fd:
45 |             doc = xmltodict.parse(fd.read())
46 |         try:
47 |             try:
48 |                 object =doc['annotation']['object'][0]
49 |             except:
50 |                 object = doc['annotation']['object']
51 |         except:
52 |             ## no object, occlusion and hidden etc.
53 |             continue
54 | 
55 |         if (int(object['trackid']) is not 0):
56 |             continue
57 | 
58 |         xmin = float(object['bndbox']['xmin'])
59 |         xmax = float(object['bndbox']['xmax'])
60 |         ymin = float(object['bndbox']['ymin'])
61 |         ymax = float(object['bndbox']['ymax'])
62 | 
63 |         ## discard too big object
64 |         if ((float(doc['annotation']['size']['width'])/2.) < (xmax-xmin) ) and ((float(doc['annotation']['size']['height'])/2.) < (ymax-ymin) ):
65 |             continue
66 | 
67 |         # gt[gidx,0] = xmin
68 |         # gt[gidx,1] = ymin
69 |         # gt[gidx,2] = xmax - xmin
70 |         # gt[gidx,3] = ymax - ymin
71 | 
72 |         cur_gt = np.zeros((4))
73 |         cur_gt[0] = xmin
74 |         cur_gt[1] = ymin
75 |         cur_gt[2] = xmax - xmin
76 |         cur_gt[3] = ymax - ymin
77 |         enable_gt.append(cur_gt)
78 | 
79 |         enable_img_list.append(img_list[gidx])
80 | 
81 |     if len(enable_img_list) == 0:
82 |         save_enable = False
83 |     if save_enable:
84 |         assert len(enable_img_list) == len(enable_gt), "Lengths do not match!!"
85 |         data[seqname] = {'images':enable_img_list, 'gt':np.asarray(enable_gt)}
86 |         completeNum += 1
87 |         print 'Complete!'
88 | 
89 | with open(output_path, 'wb') as fp:
90 |     pickle.dump(data, fp, -1)
91 | 
92 | print 'complete {} videos'.format(completeNum)
93 | 


--------------------------------------------------------------------------------
/modules/pretrain_options.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | pretrain_opts = OrderedDict()
 4 | pretrain_opts['use_gpu'] = True
 5 | 
 6 | pretrain_opts['init_model_path'] = './models/imagenet-vgg-m.mat'
 7 | pretrain_opts['model_path'] = './models/rt_mdnet.pth'
 8 | 
 9 | pretrain_opts['batch_frames'] = 8
10 | pretrain_opts['batch_pos'] = 64
11 | pretrain_opts['batch_neg'] = 196
12 | 
13 | pretrain_opts['overlap_pos'] = [0.7, 1]
14 | pretrain_opts['overlap_neg'] = [0, 0.5]
15 | 
16 | pretrain_opts['img_size'] = 107
17 | 
18 | 
19 | pretrain_opts['lr'] = 0.0001
20 | pretrain_opts['w_decay'] = 0.0005
21 | pretrain_opts['momentum'] = 0.9
22 | pretrain_opts['grad_clip'] = 10
23 | pretrain_opts['ft_layers'] = ['conv','fc']
24 | pretrain_opts['lr_mult'] = {'fc':1}
25 | pretrain_opts['n_cycles'] = 1000
26 | 
27 | 
28 | ##################################### from RCNN #############################################
29 | pretrain_opts['padding'] = 1.2
30 | pretrain_opts['padding_ratio']=5.
31 | pretrain_opts['padded_img_size'] = pretrain_opts['img_size']*int(pretrain_opts['padding_ratio'])
32 | pretrain_opts['frame_interval'] = 2
33 | 


--------------------------------------------------------------------------------
/modules/roi_align/.setup.py.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/.setup.py.swp


--------------------------------------------------------------------------------
/modules/roi_align/Makefile:
--------------------------------------------------------------------------------
1 | all: src/cuda/roi_align.cu.o
2 | 	python build.py
3 | 
4 | src/cuda/roi_align.cu.o: src/cuda/roi_align_kernel.cu
5 | 	$(MAKE) -C src/cuda
6 | 
7 | clean:
8 | 	$(MAKE) -C src/cuda clean
9 | 


--------------------------------------------------------------------------------
/modules/roi_align/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/__init__.py


--------------------------------------------------------------------------------
/modules/roi_align/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/__init__.py


--------------------------------------------------------------------------------
/modules/roi_align/_ext/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/__init__.pyc


--------------------------------------------------------------------------------
/modules/roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/modules/roi_align/_ext/roi_align/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/roi_align/__init__.pyc


--------------------------------------------------------------------------------
/modules/roi_align/_ext/roi_align/_roi_align.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext/roi_align/_roi_align.so


--------------------------------------------------------------------------------
/modules/roi_align/_ext2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/__init__.py


--------------------------------------------------------------------------------
/modules/roi_align/_ext2/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/__init__.pyc


--------------------------------------------------------------------------------
/modules/roi_align/_ext2/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/roi_align/_ext2/roi_align.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/_ext2/roi_align.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/modules/roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | # sources = ['src/roi_align.c']
 7 | # headers = ['src/roi_align.h']
 8 | sources = []
 9 | headers = []
10 | defines = []
11 | with_cuda = False
12 | 
13 | if torch.cuda.is_available():
14 |     print('Including CUDA code.')
15 |     sources += ['src/roi_align_cuda.c']
16 |     headers += ['src/roi_align_cuda.h']
17 |     defines += [('WITH_CUDA', None)]
18 |     with_cuda = True
19 | 
20 | this_file = os.path.dirname(os.path.realpath(__file__))
21 | print(this_file)
22 | extra_objects = ['src/cuda/roi_align.cu.o']
23 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
24 | 
25 | ffi = create_extension(
26 |     '_ext.roi_align',
27 |     headers=headers,
28 |     sources=sources,
29 |     define_macros=defines,
30 |     relative_to=__file__,
31 |     with_cuda=with_cuda,
32 |     extra_objects=extra_objects
33 | )
34 | 
35 | if __name__ == '__main__':
36 |     ffi.build()
37 | 


--------------------------------------------------------------------------------
/modules/roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__init__.py


--------------------------------------------------------------------------------
/modules/roi_align/functions/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__init__.pyc


--------------------------------------------------------------------------------
/modules/roi_align/functions/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/roi_align/functions/__pycache__/roi_align.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/__pycache__/roi_align.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | from .._ext2 import roi_align
  4 | 
  5 | 
  6 | # TODO use save_for_backward instead
  7 | class RoIAlignFunction(Function):
  8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
  9 |         self.aligned_width = int(aligned_width)
 10 |         self.aligned_height = int(aligned_height)
 11 |         self.spatial_scale = float(spatial_scale)
 12 |         self.rois = None
 13 |         self.feature_size = None
 14 | 
 15 |     def forward(self, features, rois):
 16 |         self.rois = rois
 17 |         self.feature_size = features.size()
 18 | 
 19 |         batch_size, num_channels, data_height, data_width = features.size()
 20 |         num_rois = rois.size(0)
 21 | 
 22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
 23 |         #help(roi_align.roi_align_forward_cuda)
 24 |         #print(features.type())
 25 |         #print(rois.type())
 26 |         #print(output.type())
 27 |         if features.is_cuda:
 28 |             success = roi_align.roi_align_forward_cuda(self.aligned_height,
 29 |                                              self.aligned_width,
 30 |                                              self.spatial_scale, features,
 31 |                                              rois, output)
 32 |         else:
 33 |             raise NotImplementedError
 34 | 
 35 |         return output
 36 | 
 37 |     def backward(self, grad_output):
 38 |         assert(self.feature_size is not None and grad_output.is_cuda)
 39 | 
 40 |         batch_size, num_channels, data_height, data_width = self.feature_size
 41 | 
 42 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
 43 |                                   data_width).zero_()
 44 |         roi_align.roi_align_backward_cuda(self.aligned_height,
 45 |                                           self.aligned_width,
 46 |                                           self.spatial_scale, grad_output,
 47 |                                           self.rois, grad_input)
 48 | 
 49 |         # print grad_input
 50 | 
 51 |         return grad_input, None
 52 | 
 53 | 
 54 | # TODO use save_for_backward instead
 55 | class RoIAlignAdaFunction(Function):
 56 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 57 |         self.aligned_width = int(aligned_width)
 58 |         self.aligned_height = int(aligned_height)
 59 |         self.spatial_scale = float(spatial_scale)
 60 |         self.rois = None
 61 |         self.feature_size = None
 62 | 
 63 |     def forward(self, features, rois):
 64 |         self.rois = rois
 65 |         self.feature_size = features.size()
 66 | 
 67 |         batch_size, num_channels, data_height, data_width = features.size()
 68 |         num_rois = rois.size(0)
 69 | 
 70 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
 71 |         if features.is_cuda:
 72 |             success = roi_align.roi_align_ada_forward_cuda(self.aligned_height,
 73 |                                              self.aligned_width,
 74 |                                              self.spatial_scale, features,
 75 |                                              rois, output)
 76 |         else:
 77 |             raise NotImplementedError
 78 | 
 79 |         return output
 80 | 
 81 |     def backward(self, grad_output):
 82 |         assert(self.feature_size is not None and grad_output.is_cuda)
 83 | 
 84 |         batch_size, num_channels, data_height, data_width = self.feature_size
 85 | 
 86 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
 87 |                                   data_width).zero_()
 88 |         roi_align.roi_align_ada_backward_cuda(self.aligned_height,
 89 |                                           self.aligned_width,
 90 |                                           self.spatial_scale, grad_output,
 91 |                                           self.rois, grad_input)
 92 | 
 93 |         # print grad_input
 94 | 
 95 |         return grad_input, None
 96 | 
 97 | 
 98 | # TODO use save_for_backward instead
 99 | class RoIAlignDenseAdaFunction(Function):
100 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
101 |         self.aligned_width = int(aligned_width)
102 |         self.aligned_height = int(aligned_height)
103 |         self.spatial_scale = float(spatial_scale)
104 |         self.rois = None
105 |         self.feature_size = None
106 | 
107 |     def forward(self, features, rois):
108 |         self.rois = rois
109 |         self.feature_size = features.size()
110 | 
111 |         batch_size, num_channels, data_height, data_width = features.size()
112 |         num_rois = rois.size(0)
113 | 
114 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
115 |         if features.is_cuda:
116 |             success = roi_align.roi_align_dense_ada_forward_cuda(self.aligned_height,
117 |                                              self.aligned_width,
118 |                                              self.spatial_scale, features,
119 |                                              rois, output)
120 |         else:
121 |             raise NotImplementedError
122 | 
123 |         return output
124 | 
125 |     def backward(self, grad_output):
126 |         assert(self.feature_size is not None and grad_output.is_cuda)
127 | 
128 |         batch_size, num_channels, data_height, data_width = self.feature_size
129 | 
130 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
131 |                                   data_width).zero_()
132 |         roi_align.roi_align_dense_ada_backward_cuda(self.aligned_height,
133 |                                           self.aligned_width,
134 |                                           self.spatial_scale, grad_output,
135 |                                           self.rois, grad_input)
136 | 
137 |         # print grad_input
138 | 
139 |         return grad_input, None
140 | 


--------------------------------------------------------------------------------
/modules/roi_align/functions/roi_align.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/functions/roi_align.pyc


--------------------------------------------------------------------------------
/modules/roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__init__.py


--------------------------------------------------------------------------------
/modules/roi_align/modules/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__init__.pyc


--------------------------------------------------------------------------------
/modules/roi_align/modules/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/roi_align/modules/__pycache__/roi_align.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/__pycache__/roi_align.cpython-37.pyc


--------------------------------------------------------------------------------
/modules/roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction, RoIAlignAdaFunction, RoIAlignDenseAdaFunction
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class RoIAlign(Module):
 9 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
10 |         super(RoIAlign, self).__init__()
11 | 
12 |         self.aligned_width = int(aligned_width)
13 |         self.aligned_height = int(aligned_height)
14 |         self.spatial_scale = float(spatial_scale)
15 | 
16 |     def forward(self, features, rois):
17 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
18 |                                 self.spatial_scale)(features, rois)
19 | 
20 | class RoIAlignAvg(Module):
21 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
22 |         super(RoIAlignAvg, self).__init__()
23 | 
24 |         self.aligned_width = int(aligned_width)
25 |         self.aligned_height = int(aligned_height)
26 |         self.spatial_scale = float(spatial_scale)
27 | 
28 |     def forward(self, features, rois):
29 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
30 |                                 self.spatial_scale)(features, rois)
31 |         return avg_pool2d(x, kernel_size=2, stride=1)
32 | 
33 | class RoIAlignMax(Module):
34 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
35 |         super(RoIAlignMax, self).__init__()
36 | 
37 |         self.aligned_width = int(aligned_width)
38 |         self.aligned_height = int(aligned_height)
39 |         self.spatial_scale = float(spatial_scale)
40 | 
41 |     def forward(self, features, rois):
42 |         x =  RoIAlignFunction(self.aligned_height+4, self.aligned_width+4,
43 |                                 self.spatial_scale)(features, rois)
44 |         return max_pool2d(x, kernel_size=3, stride=2)
45 | 
46 | 
47 | class RoIAlignAdaMax(Module):
48 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
49 |         super(RoIAlignAdaMax, self).__init__()
50 | 
51 |         self.aligned_width = int(aligned_width)
52 |         self.aligned_height = int(aligned_height)
53 |         self.spatial_scale = float(spatial_scale)
54 | 
55 |     def forward(self, features, rois):
56 |         x =  RoIAlignAdaFunction(self.aligned_height+4, self.aligned_width+4,
57 |                                 self.spatial_scale)(features, rois)
58 |         return max_pool2d(x, kernel_size=3, stride=2)
59 | 
60 | 
61 | class RoIAlignDenseAdaMax(Module):
62 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
63 |         super(RoIAlignDenseAdaMax, self).__init__()
64 | 
65 |         self.aligned_width = int(aligned_width)
66 |         self.aligned_height = int(aligned_height)
67 |         self.spatial_scale = float(spatial_scale)
68 | 
69 |     def forward(self, features, rois):
70 |         x =  RoIAlignDenseAdaFunction(self.aligned_height+4, self.aligned_width+4,
71 |                                 self.spatial_scale)(features, rois)
72 |         # x_relu = torch.nn.ReLU()(x)
73 |         return max_pool2d(x, kernel_size=3, stride=2)


--------------------------------------------------------------------------------
/modules/roi_align/modules/roi_align.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/modules/roi_align.pyc


--------------------------------------------------------------------------------
/modules/roi_align/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "src")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     #source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     #source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     #sources = main_file + source_cpu
26 |     #extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     #define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         #sources += source_cuda
34 |         sources = main_file;
35 |         define_macros = [("WITH_CUDA", None)]
36 |         with_cuda = True
37 |         #extra_compile_args = {"nvcc": [
38 |         #    "-DCUDA_HAS_FP16=1",
39 |         #    "-D__CUDA_NO_HALF_OPERATORS__",
40 |         #    "-D__CUDA_NO_HALF_CONVERSIONS__",
41 |         #    "-D__CUDA_NO_HALF2_OPERATORS__",
42 |         #]}
43 | 
44 |     sources = [os.path.join(extensions_dir, s) for s in sources]
45 | 
46 |     include_dirs = [extensions_dir]
47 |     extra_objects = ['src/cuda/roi_align.cu.o']
48 |     extra_objects = [os.path.join(this_dir, fname) for fname in extra_objects]
49 |     ext_modules = [
50 |         extension(
51 |             "_ext2.roi_align",
52 |             sources,
53 |             include_dirs=include_dirs,
54 |             define_macros=define_macros,
55 |             relative_to=__file__,
56 |             with_cuda=with_cuda,
57 |             extra_compile_args = extra_compile_args,
58 |             extra_objects=extra_objects
59 |             #extra_compile_args=extra_compile_args,
60 |         )
61 |     ]
62 | 
63 |     return ext_modules
64 | 
65 | 
66 | setup(
67 |     name="RT_MDNet",
68 |     version="1.1",
69 |     author="amgao",
70 |     url="https://github.com/IlchaeJung/RT-MDNet.git",
71 |     description="Real time tracking using MDNet in pytorch",
72 |     #packages=find_packages(exclude=("configs", "tests",)),
73 |     # install_requires=requirements,
74 |     ext_modules=get_extensions(),
75 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
76 | )
77 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/.roi_align_cuda.cpp.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak/.roi_align_cuda.cpp.swp


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/cuda/Makefile:
--------------------------------------------------------------------------------
1 | all: roi_align_kernel.cu roi_align_kernel.h
2 | 	nvcc -c -o roi_align.cu.o roi_align_kernel.cu -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
3 | 
4 | clean:
5 | 	rm roi_align.cu.o
6 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/cuda/roi_align.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak/cuda/roi_align.cu.o


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/cuda/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
 16 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 17 |             // (n, c, ph, pw) is an element in the aligned output
 18 |             int n = index;
 19 |             int pw = n % aligned_width;
 20 |             n /= aligned_width;
 21 |             int ph = n % aligned_height;
 22 |             n /= aligned_height;
 23 |             int c = n % channels;
 24 |             n /= channels;
 25 | 
 26 |             bottom_rois += n * 5;
 27 |             float roi_batch_ind = bottom_rois[0];
 28 |             float roi_start_w = bottom_rois[1] * spatial_scale;
 29 |             float roi_start_h = bottom_rois[2] * spatial_scale;
 30 |             float roi_end_w = bottom_rois[3] * spatial_scale;
 31 |             float roi_end_h = bottom_rois[4] * spatial_scale;
 32 | 
 33 |             // Force malformed ROIs to be 1x1
 34 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
 35 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
 36 |             //This is incorrect, because the coordinates for RoI follows the [L,R) convension. That is, (0, 0, 4, 4) denotes a box of size 4by4.
 37 |             /* original code */
 38 | 
 39 |             float bin_size_h = roi_height / (aligned_height - 1.);
 40 |             float bin_size_w = roi_width / (aligned_width - 1.);
 41 | 
 42 |             float h = (float)(ph) * bin_size_h + roi_start_h;
 43 |             float w = (float)(pw) * bin_size_w + roi_start_w;
 44 | 
 45 | 
 46 |             /*======================from ilchae========================*/
 47 |             /*
 48 |             float bin_size_h = roi_height / (aligned_height + 1.);
 49 |             float bin_size_w = roi_width / (aligned_width + 1.);
 50 | 
 51 |             float h = (float)(ph+1) * bin_size_h + roi_start_h;
 52 |             float w = (float)(pw+1) * bin_size_w + roi_start_w;
 53 |             */
 54 |             ////////////////////////////////////////////////////////////
 55 | 
 56 |             int hstart = fminf(floor(h), height - 2);
 57 |             int wstart = fminf(floor(w), width - 2);
 58 | 
 59 | 
 60 |             int img_start = roi_batch_ind * channels * height * width;
 61 | 
 62 |             // bilinear interpolation
 63 |             if (h < 0 || h >= height || w < 0 || w >= width) {
 64 |                 top_data[index] = 0.;
 65 |             } else {
 66 |                 float h_ratio = h - (float)(hstart);
 67 |                 float w_ratio = w - (float)(wstart);
 68 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
 69 |                 int upright = upleft + 1;
 70 |                 int downleft = upleft + width;
 71 |                 int downright = downleft + 1;
 72 | 
 73 |                 top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
 74 |                     + bottom_data[upright] * (1. - h_ratio) * w_ratio
 75 |                     + bottom_data[downleft] * h_ratio * (1. - w_ratio)
 76 |                     + bottom_data[downright] * h_ratio * w_ratio;
 77 |             }
 78 |         }
 79 |     }
 80 | 
 81 | 
 82 |     int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
 83 |         const int kThreadsPerBlock = 1024;
 84 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
 85 |         cudaError_t err;
 86 | 
 87 | 
 88 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, bottom_data, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_rois, top_data);
 89 | 
 90 |         err = cudaGetLastError();
 91 |         if(cudaSuccess != err) {
 92 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 93 |             exit( -1 );
 94 |         }
 95 | 
 96 |         return 1;
 97 |     }
 98 | 
 99 | 
100 |     __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
101 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
102 | 
103 |             // (n, c, ph, pw) is an element in the aligned output
104 |             int n = index;
105 |             int pw = n % aligned_width;
106 |             n /= aligned_width;
107 |             int ph = n % aligned_height;
108 |             n /= aligned_height;
109 |             int c = n % channels;
110 |             n /= channels;
111 | 
112 |             bottom_rois += n * 5;
113 |             float roi_batch_ind = bottom_rois[0];
114 |             float roi_start_w = bottom_rois[1] * spatial_scale;
115 |             float roi_start_h = bottom_rois[2] * spatial_scale;
116 |             float roi_end_w = bottom_rois[3] * spatial_scale;
117 |             float roi_end_h = bottom_rois[4] * spatial_scale;
118 |             /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
119 |             /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
120 |             /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
121 |             /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
122 | 
123 |             // Force malformed ROIs to be 1x1
124 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
125 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
126 |             /* ============ original code =========== */
127 | 
128 |             float bin_size_h = roi_height / (aligned_height - 1.);
129 |             float bin_size_w = roi_width / (aligned_width - 1.);
130 | 
131 |             float h = (float)(ph) * bin_size_h + roi_start_h;
132 |             float w = (float)(pw) * bin_size_w + roi_start_w;
133 | 
134 | 
135 |             /*======================from ilchae========================*/
136 |             /*
137 |             float bin_size_h = roi_height / (aligned_height + 1.);
138 |             float bin_size_w = roi_width / (aligned_width + 1.);
139 | 
140 |             float h = (float)(ph+1) * bin_size_h + roi_start_h;
141 |             float w = (float)(pw+1) * bin_size_w + roi_start_w;
142 |             */
143 |             ////////////////////////////////////////////////////////////
144 | 
145 | 
146 |             int hstart = fminf(floor(h), height - 2);
147 |             int wstart = fminf(floor(w), width - 2);
148 | 
149 |             int img_start = roi_batch_ind * channels * height * width;
150 | 
151 |             // bilinear interpolation
152 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
153 |                 float h_ratio = h - (float)(hstart);
154 |                 float w_ratio = w - (float)(wstart);
155 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
156 |                 int upright = upleft + 1;
157 |                 int downleft = upleft + width;
158 |                 int downright = downleft + 1;
159 | 
160 |                 atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
161 |                 atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
162 |                 atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
163 |                 atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
164 |             }
165 |         }
166 |     }
167 | 
168 |     int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
169 |         const int kThreadsPerBlock = 1024;
170 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
171 |         cudaError_t err;
172 | 
173 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, top_diff, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_diff, bottom_rois);
174 | 
175 |         err = cudaGetLastError();
176 |         if(cudaSuccess != err) {
177 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
178 |             exit( -1 );
179 |         }
180 | 
181 |         return 1;
182 |     }
183 | 
184 |     __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
185 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
186 |             // (n, c, ph, pw) is an element in the aligned output
187 |             int n = index;
188 |             int pw = n % aligned_width;
189 |             n /= aligned_width;
190 |             int ph = n % aligned_height;
191 |             n /= aligned_height;
192 |             int c = n % channels;
193 |             n /= channels;
194 | 
195 |             bottom_rois += n * 5;
196 |             float roi_batch_ind = bottom_rois[0];
197 |             float roi_start_w = bottom_rois[1] * spatial_scale;
198 |             float roi_start_h = bottom_rois[2] * spatial_scale;
199 |             float roi_end_w = bottom_rois[3] * spatial_scale;
200 |             float roi_end_h = bottom_rois[4] * spatial_scale;
201 | 
202 |             // Force malformed ROIs to be 1x1
203 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
204 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
205 |             //This is incorrect, because the coordinates for RoI follows the [L,R) convension. That is, (0, 0, 4, 4) denotes a box of size 4by4.
206 | 
207 |             /* original code */
208 | 
209 |             float bin_size_h = roi_height / (float)(aligned_height);
210 |             float bin_size_w = roi_width / (float)(aligned_width);
211 | 
212 |             int stride_w = fmaxf(1,round(bin_size_w));
213 |             int stride_h = fmaxf(1,round(bin_size_h));
214 | 
215 | 
216 |             float h = (float)(ph) * bin_size_h + roi_start_h; // this is right in geometically
217 |             float w = (float)(pw) * bin_size_w + roi_start_w; // this is right in geometically
218 | 
219 | 
220 | 
221 | 
222 |             int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2);
223 |             int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2);
224 | 
225 |             int img_start = roi_batch_ind * channels * height * width;
226 | 
227 |             // bilinear interpolation
228 |             if (h < 0 || h >= height || w < 0 || w >= width) {
229 |                 top_data[index] = 0.;
230 |             } else {
231 |                 for(int hidx=0; hidx<=stride_h; hidx+=stride_h){
232 |                     for(int widx=0; widx<=stride_w; widx+=stride_w){
233 |                         if( ((widx+wstart)>=0) && ((widx+wstart)<width) && ((hidx+hstart)>=0) && ((hidx+hstart)<height) ){
234 |                         int cur_loc = img_start + (c * height + hstart) * width + wstart + hidx*width + widx;
235 |                         float h_ratio = 1. - (float)fabsf(h-hstart-hidx)/(float)stride_h;
236 |                         float w_ratio = 1. - (float)fabsf(w-wstart-widx)/(float)stride_w;
237 | 
238 |                         top_data[index]+=bottom_data[cur_loc]*h_ratio*w_ratio;
239 |                         }
240 |                     }
241 |                 }
242 |             }
243 |         }
244 |     }
245 | 
246 | 
247 |     int ROIAlignAdaForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
248 |         const int kThreadsPerBlock = 1024;
249 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
250 |         cudaError_t err;
251 | 
252 | 
253 |         ROIAlignAdaForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, bottom_data, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_rois, top_data);
254 | 
255 |         err = cudaGetLastError();
256 |         if(cudaSuccess != err) {
257 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
258 |             exit( -1 );
259 |         }
260 | 
261 |         return 1;
262 |     }
263 | 
264 | 
265 |     __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
266 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
267 | 
268 |             // (n, c, ph, pw) is an element in the aligned output
269 |             int n = index;
270 |             int pw = n % aligned_width;
271 |             n /= aligned_width;
272 |             int ph = n % aligned_height;
273 |             n /= aligned_height;
274 |             int c = n % channels;
275 |             n /= channels;
276 | 
277 |             bottom_rois += n * 5;
278 |             float roi_batch_ind = bottom_rois[0];
279 |             float roi_start_w = bottom_rois[1] * spatial_scale;
280 |             float roi_start_h = bottom_rois[2] * spatial_scale;
281 |             float roi_end_w = bottom_rois[3] * spatial_scale;
282 |             float roi_end_h = bottom_rois[4] * spatial_scale;
283 | 
284 |             // Force malformed ROIs to be 1x1
285 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
286 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
287 |             /* ============ original code =========== */
288 | 
289 |             float bin_size_h = roi_height / (float)(aligned_height);
290 |             float bin_size_w = roi_width / (float)(aligned_width);
291 | 
292 |             int stride_w = fmaxf(1,round(bin_size_w));
293 |             int stride_h = fmaxf(1,round(bin_size_h));
294 | 
295 |             float h = (float)(ph) * bin_size_h + roi_start_h;
296 |             float w = (float)(pw) * bin_size_w + roi_start_w;
297 | 
298 |             int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2);
299 |             int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2);
300 | 
301 |             int img_start = roi_batch_ind * channels * height * width;
302 | 
303 |             // bilinear interpolation
304 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
305 |                 for(int hidx=0; hidx<=stride_h; hidx+=stride_h){
306 |                     for(int widx=0; widx<=stride_w; widx+=stride_w){
307 |                         if( ((hstart+hidx)>=0) && ((hstart+hidx)<height) && ((wstart+widx)>=0) && ((wstart+widx)<width) ){
308 |                         int cur_loc = img_start + (c * height + hstart) * width + wstart + hidx*width + widx;
309 |                         float h_ratio = 1. - (float)fabsf(h-hstart-hidx)/(float)(stride_h);
310 |                         float w_ratio = 1. - (float)fabsf(w-wstart-widx)/(float)(stride_w);
311 | 
312 |                         atomicAdd(bottom_diff + cur_loc, top_diff[index]*h_ratio*w_ratio);
313 |                         }
314 |                     }
315 |                 }
316 |             }
317 |         }
318 |     }
319 | 
320 |     int ROIAlignAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
321 |         const int kThreadsPerBlock = 1024;
322 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
323 |         cudaError_t err;
324 | 
325 |         ROIAlignAdaBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, top_diff, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_diff, bottom_rois);
326 | 
327 |         err = cudaGetLastError();
328 |         if(cudaSuccess != err) {
329 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
330 |             exit( -1 );
331 |         }
332 | 
333 |         return 1;
334 |     }
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | 
342 | 
343 | 
344 | 
345 | 
346 | 
347 | 
348 | 
349 | 
350 | 
351 | 
352 |     __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
353 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
354 |             // (n, c, ph, pw) is an element in the aligned output
355 |             int n = index;
356 |             int pw = n % aligned_width;
357 |             n /= aligned_width;
358 |             int ph = n % aligned_height;
359 |             n /= aligned_height;
360 |             int c = n % channels;
361 |             n /= channels;
362 | 
363 |             bottom_rois += n * 5;
364 |             float roi_batch_ind = bottom_rois[0];
365 |             float roi_start_w = bottom_rois[1] * spatial_scale;
366 |             float roi_start_h = bottom_rois[2] * spatial_scale;
367 |             float roi_end_w = bottom_rois[3] * spatial_scale;
368 |             float roi_end_h = bottom_rois[4] * spatial_scale;
369 | 
370 |             // Force malformed ROIs to be 1x1
371 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
372 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
373 |             //This is incorrect, because the coordinates for RoI follows the [L,R) convension. That is, (0, 0, 4, 4) denotes a box of size 4by4.
374 |             /* original code */
375 | 
376 |             float bin_size_h = roi_height / (float)(aligned_height);
377 |             float bin_size_w = roi_width / (float)(aligned_width);
378 | 
379 |             int stride_w = fmaxf(1,round(bin_size_w));
380 |             int stride_h = fmaxf(1,round(bin_size_h));
381 | 
382 | 
383 |             float h = (float)(ph) * bin_size_h + roi_start_h; // this is right in geometically
384 |             float w = (float)(pw) * bin_size_w + roi_start_w; // this is right in geometically
385 | 
386 | 
387 | 
388 | 
389 |             int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2);
390 |             int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2);
391 | 
392 |             int img_start = roi_batch_ind * channels * height * width;
393 | 
394 |             // bilinear interpolation
395 |             if (h < 0 || h >= height || w < 0 || w >= width) {
396 |                 top_data[index] = 0.;
397 |             } else {
398 | 
399 |                 float ratio_sum = 0. ;
400 |                 for(int hidx=0; hidx<=stride_h; hidx++){
401 |                     for(int widx=0; widx<=stride_w; widx++){
402 |                         int cur_loc = img_start + (c * height + hstart) * width + wstart + hidx*width + widx;
403 |                         float h_ratio = 1. - (float)fabsf(h-hstart-hidx)/(float)stride_h;
404 |                         float w_ratio = 1. - (float)fabsf(w-wstart-widx)/(float)stride_w;
405 | 
406 |                         float ratio = h_ratio * w_ratio;
407 |                         ratio_sum += ratio;
408 |                         top_data[index]+=bottom_data[cur_loc]*ratio;
409 |                     }
410 |                 }
411 |                 top_data[index]/=ratio_sum;
412 |             }
413 |         }
414 |     }
415 | 
416 | 
417 |     int ROIAlignDenseAdaForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
418 |         const int kThreadsPerBlock = 1024;
419 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
420 |         cudaError_t err;
421 | 
422 | 
423 |         ROIAlignDenseAdaForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, bottom_data, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_rois, top_data);
424 | 
425 |         err = cudaGetLastError();
426 |         if(cudaSuccess != err) {
427 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
428 |             exit( -1 );
429 |         }
430 | 
431 |         return 1;
432 |     }
433 | 
434 | 
435 |     __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
436 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
437 | 
438 |             // (n, c, ph, pw) is an element in the aligned output
439 |             int n = index;
440 |             int pw = n % aligned_width;
441 |             n /= aligned_width;
442 |             int ph = n % aligned_height;
443 |             n /= aligned_height;
444 |             int c = n % channels;
445 |             n /= channels;
446 | 
447 |             bottom_rois += n * 5;
448 |             float roi_batch_ind = bottom_rois[0];
449 |             float roi_start_w = bottom_rois[1] * spatial_scale;
450 |             float roi_start_h = bottom_rois[2] * spatial_scale;
451 |             float roi_end_w = bottom_rois[3] * spatial_scale;
452 |             float roi_end_h = bottom_rois[4] * spatial_scale;
453 | 
454 |             // Force malformed ROIs to be 1x1
455 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
456 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
457 |             /* ============ original code =========== */
458 | 
459 |             float bin_size_h = roi_height / (float)(aligned_height);
460 |             float bin_size_w = roi_width / (float)(aligned_width);
461 | 
462 |             int stride_w = fmaxf(1,round(bin_size_w));
463 |             int stride_h = fmaxf(1,round(bin_size_h));
464 | 
465 |             float h = (float)(ph) * bin_size_h + roi_start_h;
466 |             float w = (float)(pw) * bin_size_w + roi_start_w;
467 | 
468 |             int hstart = fminf(floor((float)(ph) * bin_size_h + roi_start_h), height - 2);
469 |             int wstart = fminf(floor((float)(pw) * bin_size_w + roi_start_w), width - 2);
470 | 
471 |             int img_start = roi_batch_ind * channels * height * width;
472 | 
473 |             // bilinear interpolation
474 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
475 |                 for(int hidx=0; hidx<=stride_h; hidx+=stride_h){
476 |                     for(int widx=0; widx<=stride_w; widx+=stride_w){
477 |                         int cur_loc = img_start + (c * height + hstart) * width + wstart + hidx*width + widx;
478 | 
479 |                         //float h_ratio = 1. - (float)fabsf(h-hstart-hidx)/(float)(stride_h);
480 |                         //float w_ratio = 1. - (float)fabsf(w-wstart-widx)/(float)(stride_w);
481 | 
482 |                         float ratio = 1. / (2.505*(stride_h+1.)*(stride_w+1.)) * expf( -0.5*(powf((h-hstart-hidx)/(float)stride_h,2.) + powf( (w-wstart-widx)/(float)stride_w, 2.)) ) ;
483 | 
484 |                         atomicAdd(bottom_diff + cur_loc, top_diff[index]*ratio);
485 |                     }
486 |                 }
487 |             }
488 |         }
489 |     }
490 | 
491 |     int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width, const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
492 |         const int kThreadsPerBlock = 1024;
493 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
494 |         cudaError_t err;
495 | 
496 |         ROIAlignDenseAdaBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(output_size, top_diff, spatial_scale, height, width, channels, aligned_height, aligned_width, bottom_diff, bottom_rois);
497 | 
498 |         err = cudaGetLastError();
499 |         if(cudaSuccess != err) {
500 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
501 |             exit( -1 );
502 |         }
503 | 
504 |         return 1;
505 |     }
506 | 
507 | 
508 | 
509 | #ifdef __cplusplus
510 | }
511 | #endif
512 | 
513 | 
514 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/cuda/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | 
30 | __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data,
31 |     const float spatial_scale, const int height, const int width,
32 |     const int channels, const int aligned_height, const int aligned_width,
33 |     const float* bottom_rois, float* top_data);
34 | 
35 | int ROIAlignAdaForwardLaucher(
36 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
37 |     const int width, const int channels, const int aligned_height,
38 |     const int aligned_width, const float* bottom_rois,
39 |     float* top_data, cudaStream_t stream);
40 | 
41 | __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff,
42 |     const float spatial_scale, const int height, const int width,
43 |     const int channels, const int aligned_height, const int aligned_width,
44 |     float* bottom_diff, const float* bottom_rois);
45 | 
46 | int ROIAlignAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
47 |     const int height, const int width, const int channels, const int aligned_height,
48 |     const int aligned_width, const float* bottom_rois,
49 |     float* bottom_diff, cudaStream_t stream);
50 | 
51 | 
52 | __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data,
53 |     const float spatial_scale, const int height, const int width,
54 |     const int channels, const int aligned_height, const int aligned_width,
55 |     const float* bottom_rois, float* top_data);
56 | 
57 | int ROIAlignDenseAdaForwardLaucher(
58 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
59 |     const int width, const int channels, const int aligned_height,
60 |     const int aligned_width, const float* bottom_rois,
61 |     float* top_data, cudaStream_t stream);
62 | 
63 | __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff,
64 |     const float spatial_scale, const int height, const int width,
65 |     const int channels, const int aligned_height, const int aligned_width,
66 |     float* bottom_diff, const float* bottom_rois);
67 | 
68 | int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
69 |     const int height, const int width, const int channels, const int aligned_height,
70 |     const int aligned_width, const float* bottom_rois,
71 |     float* bottom_diff, cudaStream_t stream);
72 | 
73 | 
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | 
78 | #endif
79 | 
80 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/roi_align_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <math.h>
  3 | #include "cuda/roi_align_kernel.h"
  4 | 
  5 | extern THCState *state;
  6 | 
  7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
  8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
  9 | {
 10 |     // Grab the input tensor
 11 |     float * data_flat = THCudaTensor_data(state, features);
 12 |     float * rois_flat = THCudaTensor_data(state, rois);
 13 | 
 14 |     float * output_flat = THCudaTensor_data(state, output);
 15 | 
 16 |     // Number of ROIs
 17 |     int num_rois = THCudaTensor_size(state, rois, 0);
 18 |     int size_rois = THCudaTensor_size(state, rois, 1);
 19 |     if (size_rois != 5)
 20 |     {
 21 |         return 0;
 22 |     }
 23 | 
 24 |     // batch size
 25 |     //int batch_size = THCudaTensor_size(state, features, 0);
 26 |     //if (batch_size != 1)
 27 |     //{
 28 |     //    return 0;
 29 |     //}
 30 | 
 31 | 
 32 |     // data height
 33 |     int data_height = THCudaTensor_size(state, features, 2);
 34 |     // data width
 35 |     int data_width = THCudaTensor_size(state, features, 3);
 36 |     // Number of channels
 37 |     int num_channels = THCudaTensor_size(state, features, 1);
 38 | 
 39 |     cudaStream_t stream = THCState_getCurrentStream(state);
 40 | 
 41 |     ROIAlignForwardLaucher(
 42 |         data_flat, spatial_scale, num_rois, data_height,
 43 |         data_width, num_channels, aligned_height,
 44 |         aligned_width, rois_flat,
 45 |         output_flat, stream);
 46 | 
 47 |     return 1;
 48 | }
 49 | 
 50 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 51 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
 52 | {
 53 |     // Grab the input tensor
 54 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
 55 |     float * rois_flat = THCudaTensor_data(state, rois);
 56 | 
 57 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
 58 | 
 59 |     // Number of ROIs
 60 |     int num_rois = THCudaTensor_size(state, rois, 0);
 61 |     int size_rois = THCudaTensor_size(state, rois, 1);
 62 |     if (size_rois != 5)
 63 |     {
 64 |         return 0;
 65 |     }
 66 | 
 67 |     // batch size
 68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
 69 |     //if (batch_size != 1)
 70 |     //{
 71 |      //   return 0;
 72 |     //}
 73 |     // data height
 74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
 75 |     // data width
 76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
 77 |     // Number of channels
 78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
 79 | 
 80 |     cudaStream_t stream = THCState_getCurrentStream(state);
 81 |     ROIAlignBackwardLaucher(
 82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
 83 |         data_width, num_channels, aligned_height,
 84 |         aligned_width, rois_flat,
 85 |         bottom_grad_flat, stream);
 86 | 
 87 |     return 1;
 88 | }
 89 | 
 90 | 
 91 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 92 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 93 | {
 94 |     // Grab the input tensor
 95 |     float * data_flat = THCudaTensor_data(state, features);
 96 |     float * rois_flat = THCudaTensor_data(state, rois);
 97 | 
 98 |     float * output_flat = THCudaTensor_data(state, output);
 99 | 
100 |     // Number of ROIs
101 |     int num_rois = THCudaTensor_size(state, rois, 0);
102 |     int size_rois = THCudaTensor_size(state, rois, 1);
103 |     if (size_rois != 5)
104 |     {
105 |         return 0;
106 |     }
107 | 
108 |     // batch size
109 |     //int batch_size = THCudaTensor_size(state, features, 0);
110 |     //if (batch_size != 1)
111 |     //{
112 |     //    return 0;
113 |     //}
114 | 
115 | 
116 |     // data height
117 |     int data_height = THCudaTensor_size(state, features, 2);
118 |     // data width
119 |     int data_width = THCudaTensor_size(state, features, 3);
120 |     // Number of channels
121 |     int num_channels = THCudaTensor_size(state, features, 1);
122 | 
123 |     cudaStream_t stream = THCState_getCurrentStream(state);
124 | 
125 |     ROIAlignAdaForwardLaucher(
126 |         data_flat, spatial_scale, num_rois, data_height,
127 |         data_width, num_channels, aligned_height,
128 |         aligned_width, rois_flat,
129 |         output_flat, stream);
130 | 
131 |     return 1;
132 | }
133 | 
134 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
135 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
136 | {
137 |     // Grab the input tensor
138 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
139 |     float * rois_flat = THCudaTensor_data(state, rois);
140 | 
141 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
142 | 
143 |     // Number of ROIs
144 |     int num_rois = THCudaTensor_size(state, rois, 0);
145 |     int size_rois = THCudaTensor_size(state, rois, 1);
146 |     if (size_rois != 5)
147 |     {
148 |         return 0;
149 |     }
150 | 
151 |     // batch size
152 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
153 |     //if (batch_size != 1)
154 |     //{
155 |      //   return 0;
156 |     //}
157 |     // data height
158 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
159 |     // data width
160 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
161 |     // Number of channels
162 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
163 | 
164 |     cudaStream_t stream = THCState_getCurrentStream(state);
165 |     ROIAlignAdaBackwardLaucher(
166 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
167 |         data_width, num_channels, aligned_height,
168 |         aligned_width, rois_flat,
169 |         bottom_grad_flat, stream);
170 | 
171 |     return 1;
172 | }
173 | 
174 | 
175 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
176 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
177 | {
178 |     // Grab the input tensor
179 |     float * data_flat = THCudaTensor_data(state, features);
180 |     float * rois_flat = THCudaTensor_data(state, rois);
181 | 
182 |     float * output_flat = THCudaTensor_data(state, output);
183 | 
184 |     // Number of ROIs
185 |     int num_rois = THCudaTensor_size(state, rois, 0);
186 |     int size_rois = THCudaTensor_size(state, rois, 1);
187 |     if (size_rois != 5)
188 |     {
189 |         return 0;
190 |     }
191 | 
192 |     // batch size
193 |     //int batch_size = THCudaTensor_size(state, features, 0);
194 |     //if (batch_size != 1)
195 |     //{
196 |     //    return 0;
197 |     //}
198 | 
199 | 
200 |     // data height
201 |     int data_height = THCudaTensor_size(state, features, 2);
202 |     // data width
203 |     int data_width = THCudaTensor_size(state, features, 3);
204 |     // Number of channels
205 |     int num_channels = THCudaTensor_size(state, features, 1);
206 | 
207 |     cudaStream_t stream = THCState_getCurrentStream(state);
208 | 
209 |     ROIAlignDenseAdaForwardLaucher(
210 |         data_flat, spatial_scale, num_rois, data_height,
211 |         data_width, num_channels, aligned_height,
212 |         aligned_width, rois_flat,
213 |         output_flat, stream);
214 | 
215 |     return 1;
216 | }
217 | 
218 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
219 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
220 | {
221 |     // Grab the input tensor
222 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
223 |     float * rois_flat = THCudaTensor_data(state, rois);
224 | 
225 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
226 | 
227 |     // Number of ROIs
228 |     int num_rois = THCudaTensor_size(state, rois, 0);
229 |     int size_rois = THCudaTensor_size(state, rois, 1);
230 |     if (size_rois != 5)
231 |     {
232 |         return 0;
233 |     }
234 | 
235 |     // batch size
236 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
237 |     //if (batch_size != 1)
238 |     //{
239 |      //   return 0;
240 |     //}
241 |     // data height
242 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
243 |     // data width
244 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
245 |     // Number of channels
246 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
247 | 
248 |     cudaStream_t stream = THCState_getCurrentStream(state);
249 |     ROIAlignDenseAdaBackwardLaucher(
250 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
251 |         data_width, num_channels, aligned_height,
252 |         aligned_width, rois_flat,
253 |         bottom_grad_flat, stream);
254 | 
255 |     return 1;
256 | }
257 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
  1 | //#include <THC/THC.h>
  2 | //#include <ATen/ATen.h>
  3 | #include "roi_align_cuda.hpp"
  4 | #include <math.h>
  5 | #include "cuda/roi_align_kernel.h"
  6 | //#include <torch/extension.h>
  7 | 
  8 | //THC_CLASS at::Context& at::globalContext();
  9 | //THCState *state = at::globalContext().getTHCState();
 10 | //ATen_CLASS at::Context& at::globalContext();
 11 | //THCState *state = at::globalContext().thc_state;
 12 | 
 13 | extern "C" int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 14 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 15 | {
 16 |     // Grab the input tensor
 17 |     float * data_flat = THCudaTensor_data(state, features);
 18 |     float * rois_flat = THCudaTensor_data(state, rois);
 19 | 
 20 |     float * output_flat = THCudaTensor_data(state, output);
 21 | 
 22 |     // Number of ROIs
 23 |     int num_rois = THCudaTensor_size(state, rois, 0);
 24 |     int size_rois = THCudaTensor_size(state, rois, 1);
 25 |     if (size_rois != 5)
 26 |     {
 27 |         return 0;
 28 |     }
 29 | 
 30 |     // batch size
 31 |     //int batch_size = THCudaTensor_size(state, features, 0);
 32 |     //if (batch_size != 1)
 33 |     //{
 34 |     //    return 0;
 35 |     //}
 36 | 
 37 | 
 38 |     // data height
 39 |     int data_height = THCudaTensor_size(state, features, 2);
 40 |     // data width
 41 |     int data_width = THCudaTensor_size(state, features, 3);
 42 |     // Number of channels
 43 |     int num_channels = THCudaTensor_size(state, features, 1);
 44 | 
 45 |     cudaStream_t stream = THCState_getCurrentStream(state);
 46 | 
 47 |     ROIAlignForwardLaucher(
 48 |         data_flat, spatial_scale, num_rois, data_height,
 49 |         data_width, num_channels, aligned_height,
 50 |         aligned_width, rois_flat,
 51 |         output_flat, stream);
 52 | 
 53 |     return 1;
 54 | }
 55 | 
 56 | extern "C" int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 57 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
 58 | {
 59 |     // Grab the input tensor
 60 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
 61 |     float * rois_flat = THCudaTensor_data(state, rois);
 62 | 
 63 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
 64 | 
 65 |     // Number of ROIs
 66 |     int num_rois = THCudaTensor_size(state, rois, 0);
 67 |     int size_rois = THCudaTensor_size(state, rois, 1);
 68 |     if (size_rois != 5)
 69 |     {
 70 |         return 0;
 71 |     }
 72 | 
 73 |     // batch size
 74 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
 75 |     //if (batch_size != 1)
 76 |     //{
 77 |      //   return 0;
 78 |     //}
 79 |     // data height
 80 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
 81 |     // data width
 82 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
 83 |     // Number of channels
 84 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
 85 | 
 86 |     cudaStream_t stream = THCState_getCurrentStream(state);
 87 |     ROIAlignBackwardLaucher(
 88 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
 89 |         data_width, num_channels, aligned_height,
 90 |         aligned_width, rois_flat,
 91 |         bottom_grad_flat, stream);
 92 | 
 93 |     return 1;
 94 | }
 95 | 
 96 | 
 97 | extern "C" int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 98 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 99 | {
100 |     // Grab the input tensor
101 |     float * data_flat = THCudaTensor_data(state, features);
102 |     float * rois_flat = THCudaTensor_data(state, rois);
103 | 
104 |     float * output_flat = THCudaTensor_data(state, output);
105 | 
106 |     // Number of ROIs
107 |     int num_rois = THCudaTensor_size(state, rois, 0);
108 |     int size_rois = THCudaTensor_size(state, rois, 1);
109 |     if (size_rois != 5)
110 |     {
111 |         return 0;
112 |     }
113 | 
114 |     // batch size
115 |     //int batch_size = THCudaTensor_size(state, features, 0);
116 |     //if (batch_size != 1)
117 |     //{
118 |     //    return 0;
119 |     //}
120 | 
121 | 
122 |     // data height
123 |     int data_height = THCudaTensor_size(state, features, 2);
124 |     // data width
125 |     int data_width = THCudaTensor_size(state, features, 3);
126 |     // Number of channels
127 |     int num_channels = THCudaTensor_size(state, features, 1);
128 | 
129 |     cudaStream_t stream = THCState_getCurrentStream(state);
130 | 
131 |     ROIAlignAdaForwardLaucher(
132 |         data_flat, spatial_scale, num_rois, data_height,
133 |         data_width, num_channels, aligned_height,
134 |         aligned_width, rois_flat,
135 |         output_flat, stream);
136 | 
137 |     return 1;
138 | }
139 | 
140 | extern "C" int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
141 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
142 | {
143 |     // Grab the input tensor
144 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
145 |     float * rois_flat = THCudaTensor_data(state, rois);
146 | 
147 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
148 | 
149 |     // Number of ROIs
150 |     int num_rois = THCudaTensor_size(state, rois, 0);
151 |     int size_rois = THCudaTensor_size(state, rois, 1);
152 |     if (size_rois != 5)
153 |     {
154 |         return 0;
155 |     }
156 | 
157 |     // batch size
158 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
159 |     //if (batch_size != 1)
160 |     //{
161 |      //   return 0;
162 |     //}
163 |     // data height
164 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
165 |     // data width
166 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
167 |     // Number of channels
168 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
169 | 
170 |     cudaStream_t stream = THCState_getCurrentStream(state);
171 |     ROIAlignAdaBackwardLaucher(
172 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
173 |         data_width, num_channels, aligned_height,
174 |         aligned_width, rois_flat,
175 |         bottom_grad_flat, stream);
176 | 
177 |     return 1;
178 | }
179 | 
180 | 
181 | extern "C" int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
182 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
183 | {
184 |     // Grab the input tensor
185 |     float * data_flat = THCudaTensor_data(state, features);
186 |     float * rois_flat = THCudaTensor_data(state, rois);
187 | 
188 |     float * output_flat = THCudaTensor_data(state, output);
189 | 
190 |     // Number of ROIs
191 |     int num_rois = THCudaTensor_size(state, rois, 0);
192 |     int size_rois = THCudaTensor_size(state, rois, 1);
193 |     if (size_rois != 5)
194 |     {
195 |         return 0;
196 |     }
197 | 
198 |     // batch size
199 |     //int batch_size = THCudaTensor_size(state, features, 0);
200 |     //if (batch_size != 1)
201 |     //{
202 |     //    return 0;
203 |     //}
204 | 
205 | 
206 |     // data height
207 |     int data_height = THCudaTensor_size(state, features, 2);
208 |     // data width
209 |     int data_width = THCudaTensor_size(state, features, 3);
210 |     // Number of channels
211 |     int num_channels = THCudaTensor_size(state, features, 1);
212 | 
213 |     cudaStream_t stream = THCState_getCurrentStream(state);
214 | 
215 |     ROIAlignDenseAdaForwardLaucher(
216 |         data_flat, spatial_scale, num_rois, data_height,
217 |         data_width, num_channels, aligned_height,
218 |         aligned_width, rois_flat,
219 |         output_flat, stream);
220 | 
221 |     return 1;
222 | }
223 | 
224 | extern "C" int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
225 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
226 | {
227 |     // Grab the input tensor
228 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
229 |     float * rois_flat = THCudaTensor_data(state, rois);
230 | 
231 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
232 | 
233 |     // Number of ROIs
234 |     int num_rois = THCudaTensor_size(state, rois, 0);
235 |     int size_rois = THCudaTensor_size(state, rois, 1);
236 |     if (size_rois != 5)
237 |     {
238 |         return 0;
239 |     }
240 | 
241 |     // batch size
242 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
243 |     //if (batch_size != 1)
244 |     //{
245 |      //   return 0;
246 |     //}
247 |     // data height
248 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
249 |     // data width
250 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
251 |     // Number of channels
252 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
253 | 
254 |     cudaStream_t stream = THCState_getCurrentStream(state);
255 |     ROIAlignDenseAdaBackwardLaucher(
256 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
257 |         data_width, num_channels, aligned_height,
258 |         aligned_width, rois_flat,
259 |         bottom_grad_flat, stream);
260 | 
261 |     return 1;
262 | }
263 | 
264 | /*PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
265 |   m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward");
266 |   m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward");
267 |   m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward");
268 |   m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward");
269 |   m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward");
270 |   m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward");
271 | }*/
272 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak/roi_align_cuda.hpp:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <ATen/ATen.h>
 3 | #include <torch/extension.h>
 4 | 
 5 | THC_CLASS at::Context& at::globalContext();
 6 | THCState *state = at::globalContext().getTHCState();
 7 | //ATen_CLASS at::Context& at::globalContext();
 8 | //THCState *state = at::globalContext().thc_state;
 9 | 
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
16 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
17 | 
18 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
19 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
20 | 
21 | 
22 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
23 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
24 | 
25 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
26 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
27 | 
28 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
29 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
30 | 
31 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
32 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
40 |   m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward");
41 |   m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward");
42 |   m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward");
43 |   m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward");
44 |   m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward");
45 |   m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward");
46 | }
47 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak2/.roi_align_cuda.cpp.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak2/.roi_align_cuda.cpp.swp


--------------------------------------------------------------------------------
/modules/roi_align/src.bak2/cuda/Makefile:
--------------------------------------------------------------------------------
1 | all: roi_align_kernel.cu roi_align_kernel.h
2 | 	nvcc -c -o roi_align.cu.o roi_align_kernel.cu -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
3 | 
4 | clean:
5 | 	rm roi_align.cu.o
6 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak2/cuda/roi_align.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src.bak2/cuda/roi_align.cu.o


--------------------------------------------------------------------------------
/modules/roi_align/src.bak2/cuda/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | 
30 | __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data,
31 |     const float spatial_scale, const int height, const int width,
32 |     const int channels, const int aligned_height, const int aligned_width,
33 |     const float* bottom_rois, float* top_data);
34 | 
35 | int ROIAlignAdaForwardLaucher(
36 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
37 |     const int width, const int channels, const int aligned_height,
38 |     const int aligned_width, const float* bottom_rois,
39 |     float* top_data, cudaStream_t stream);
40 | 
41 | __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff,
42 |     const float spatial_scale, const int height, const int width,
43 |     const int channels, const int aligned_height, const int aligned_width,
44 |     float* bottom_diff, const float* bottom_rois);
45 | 
46 | int ROIAlignAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
47 |     const int height, const int width, const int channels, const int aligned_height,
48 |     const int aligned_width, const float* bottom_rois,
49 |     float* bottom_diff, cudaStream_t stream);
50 | 
51 | 
52 | __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data,
53 |     const float spatial_scale, const int height, const int width,
54 |     const int channels, const int aligned_height, const int aligned_width,
55 |     const float* bottom_rois, float* top_data);
56 | 
57 | int ROIAlignDenseAdaForwardLaucher(
58 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
59 |     const int width, const int channels, const int aligned_height,
60 |     const int aligned_width, const float* bottom_rois,
61 |     float* top_data, cudaStream_t stream);
62 | 
63 | __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff,
64 |     const float spatial_scale, const int height, const int width,
65 |     const int channels, const int aligned_height, const int aligned_width,
66 |     float* bottom_diff, const float* bottom_rois);
67 | 
68 | int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
69 |     const int height, const int width, const int channels, const int aligned_height,
70 |     const int aligned_width, const float* bottom_rois,
71 |     float* bottom_diff, cudaStream_t stream);
72 | 
73 | 
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | 
78 | #endif
79 | 
80 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak2/roi_align_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <math.h>
  3 | #include "cuda/roi_align_kernel.h"
  4 | 
  5 | extern THCState *state;
  6 | 
  7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
  8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
  9 | {
 10 |     // Grab the input tensor
 11 |     float * data_flat = THCudaTensor_data(state, features);
 12 |     float * rois_flat = THCudaTensor_data(state, rois);
 13 | 
 14 |     float * output_flat = THCudaTensor_data(state, output);
 15 | 
 16 |     // Number of ROIs
 17 |     int num_rois = THCudaTensor_size(state, rois, 0);
 18 |     int size_rois = THCudaTensor_size(state, rois, 1);
 19 |     if (size_rois != 5)
 20 |     {
 21 |         return 0;
 22 |     }
 23 | 
 24 |     // batch size
 25 |     //int batch_size = THCudaTensor_size(state, features, 0);
 26 |     //if (batch_size != 1)
 27 |     //{
 28 |     //    return 0;
 29 |     //}
 30 | 
 31 | 
 32 |     // data height
 33 |     int data_height = THCudaTensor_size(state, features, 2);
 34 |     // data width
 35 |     int data_width = THCudaTensor_size(state, features, 3);
 36 |     // Number of channels
 37 |     int num_channels = THCudaTensor_size(state, features, 1);
 38 | 
 39 |     cudaStream_t stream = THCState_getCurrentStream(state);
 40 | 
 41 |     ROIAlignForwardLaucher(
 42 |         data_flat, spatial_scale, num_rois, data_height,
 43 |         data_width, num_channels, aligned_height,
 44 |         aligned_width, rois_flat,
 45 |         output_flat, stream);
 46 | 
 47 |     return 1;
 48 | }
 49 | 
 50 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 51 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
 52 | {
 53 |     // Grab the input tensor
 54 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
 55 |     float * rois_flat = THCudaTensor_data(state, rois);
 56 | 
 57 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
 58 | 
 59 |     // Number of ROIs
 60 |     int num_rois = THCudaTensor_size(state, rois, 0);
 61 |     int size_rois = THCudaTensor_size(state, rois, 1);
 62 |     if (size_rois != 5)
 63 |     {
 64 |         return 0;
 65 |     }
 66 | 
 67 |     // batch size
 68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
 69 |     //if (batch_size != 1)
 70 |     //{
 71 |      //   return 0;
 72 |     //}
 73 |     // data height
 74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
 75 |     // data width
 76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
 77 |     // Number of channels
 78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
 79 | 
 80 |     cudaStream_t stream = THCState_getCurrentStream(state);
 81 |     ROIAlignBackwardLaucher(
 82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
 83 |         data_width, num_channels, aligned_height,
 84 |         aligned_width, rois_flat,
 85 |         bottom_grad_flat, stream);
 86 | 
 87 |     return 1;
 88 | }
 89 | 
 90 | 
 91 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 92 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 93 | {
 94 |     // Grab the input tensor
 95 |     float * data_flat = THCudaTensor_data(state, features);
 96 |     float * rois_flat = THCudaTensor_data(state, rois);
 97 | 
 98 |     float * output_flat = THCudaTensor_data(state, output);
 99 | 
100 |     // Number of ROIs
101 |     int num_rois = THCudaTensor_size(state, rois, 0);
102 |     int size_rois = THCudaTensor_size(state, rois, 1);
103 |     if (size_rois != 5)
104 |     {
105 |         return 0;
106 |     }
107 | 
108 |     // batch size
109 |     //int batch_size = THCudaTensor_size(state, features, 0);
110 |     //if (batch_size != 1)
111 |     //{
112 |     //    return 0;
113 |     //}
114 | 
115 | 
116 |     // data height
117 |     int data_height = THCudaTensor_size(state, features, 2);
118 |     // data width
119 |     int data_width = THCudaTensor_size(state, features, 3);
120 |     // Number of channels
121 |     int num_channels = THCudaTensor_size(state, features, 1);
122 | 
123 |     cudaStream_t stream = THCState_getCurrentStream(state);
124 | 
125 |     ROIAlignAdaForwardLaucher(
126 |         data_flat, spatial_scale, num_rois, data_height,
127 |         data_width, num_channels, aligned_height,
128 |         aligned_width, rois_flat,
129 |         output_flat, stream);
130 | 
131 |     return 1;
132 | }
133 | 
134 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
135 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
136 | {
137 |     // Grab the input tensor
138 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
139 |     float * rois_flat = THCudaTensor_data(state, rois);
140 | 
141 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
142 | 
143 |     // Number of ROIs
144 |     int num_rois = THCudaTensor_size(state, rois, 0);
145 |     int size_rois = THCudaTensor_size(state, rois, 1);
146 |     if (size_rois != 5)
147 |     {
148 |         return 0;
149 |     }
150 | 
151 |     // batch size
152 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
153 |     //if (batch_size != 1)
154 |     //{
155 |      //   return 0;
156 |     //}
157 |     // data height
158 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
159 |     // data width
160 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
161 |     // Number of channels
162 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
163 | 
164 |     cudaStream_t stream = THCState_getCurrentStream(state);
165 |     ROIAlignAdaBackwardLaucher(
166 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
167 |         data_width, num_channels, aligned_height,
168 |         aligned_width, rois_flat,
169 |         bottom_grad_flat, stream);
170 | 
171 |     return 1;
172 | }
173 | 
174 | 
175 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
176 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
177 | {
178 |     // Grab the input tensor
179 |     float * data_flat = THCudaTensor_data(state, features);
180 |     float * rois_flat = THCudaTensor_data(state, rois);
181 | 
182 |     float * output_flat = THCudaTensor_data(state, output);
183 | 
184 |     // Number of ROIs
185 |     int num_rois = THCudaTensor_size(state, rois, 0);
186 |     int size_rois = THCudaTensor_size(state, rois, 1);
187 |     if (size_rois != 5)
188 |     {
189 |         return 0;
190 |     }
191 | 
192 |     // batch size
193 |     //int batch_size = THCudaTensor_size(state, features, 0);
194 |     //if (batch_size != 1)
195 |     //{
196 |     //    return 0;
197 |     //}
198 | 
199 | 
200 |     // data height
201 |     int data_height = THCudaTensor_size(state, features, 2);
202 |     // data width
203 |     int data_width = THCudaTensor_size(state, features, 3);
204 |     // Number of channels
205 |     int num_channels = THCudaTensor_size(state, features, 1);
206 | 
207 |     cudaStream_t stream = THCState_getCurrentStream(state);
208 | 
209 |     ROIAlignDenseAdaForwardLaucher(
210 |         data_flat, spatial_scale, num_rois, data_height,
211 |         data_width, num_channels, aligned_height,
212 |         aligned_width, rois_flat,
213 |         output_flat, stream);
214 | 
215 |     return 1;
216 | }
217 | 
218 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
219 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
220 | {
221 |     // Grab the input tensor
222 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
223 |     float * rois_flat = THCudaTensor_data(state, rois);
224 | 
225 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
226 | 
227 |     // Number of ROIs
228 |     int num_rois = THCudaTensor_size(state, rois, 0);
229 |     int size_rois = THCudaTensor_size(state, rois, 1);
230 |     if (size_rois != 5)
231 |     {
232 |         return 0;
233 |     }
234 | 
235 |     // batch size
236 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
237 |     //if (batch_size != 1)
238 |     //{
239 |      //   return 0;
240 |     //}
241 |     // data height
242 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
243 |     // data width
244 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
245 |     // Number of channels
246 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
247 | 
248 |     cudaStream_t stream = THCState_getCurrentStream(state);
249 |     ROIAlignDenseAdaBackwardLaucher(
250 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
251 |         data_width, num_channels, aligned_height,
252 |         aligned_width, rois_flat,
253 |         bottom_grad_flat, stream);
254 | 
255 |     return 1;
256 | }
257 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak2/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
  1 | //#include <THC/THC.h>
  2 | //#include <ATen/ATen.h>
  3 | #include "roi_align_cuda.hpp"
  4 | #include <math.h>
  5 | #include "cuda/roi_align_kernel.h"
  6 | //#include <torch/extension.h>
  7 | 
  8 | THC_CLASS at::Context& at::globalContext();
  9 | THCState *state = at::globalContext().getTHCState();
 10 | //ATen_CLASS at::Context& at::globalContext();
 11 | //THCState *state = at::globalContext().thc_state;
 12 | //extern THCState *state;
 13 | 
 14 | extern "C" int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 15 |                         at::Tensor& features_t, at::Tensor& rois_t, at::Tensor& output_t)
 16 | {
 17 |     //printf("%d\n", aligned_height);
 18 |     THCudaTensor * features(std::move(features_t.unsafeGetTensorImpl()));
 19 |     THCudaTensor * rois(std::move(rois_t.unsafeGetTensorImpl()));
 20 |     THCudaTensor * output(std::move(output_t.unsafeGetTensorImpl()));
 21 |     THCudaTensor * features_ = THCudaTensor_newContiguous(state, features);
 22 |     THCudaTensor * rois_ = THCudaTensor_newContiguous(state, rois);
 23 |     THCudaTensor * output_ = THCudaTensor_newContiguous(state, output);
 24 |     THCudaStorage * features__ = THCudaTensor_storage(state, features_);
 25 |     THCudaStorage * rois__ = THCudaTensor_storage(state, rois_);
 26 |     THCudaStorage * output__ = THCudaTensor_storage(state, output_);
 27 |     printf("%d\n", aligned_width);
 28 |     //features = THCudaTensor_newContiguous(state, features_t.impl_.get());
 29 |     // Grab the input tensor
 30 |     float * data_flat = THCudaStorage_data(state, features__);
 31 |     printf("%d\n", aligned_height);
 32 |     float * rois_flat = THCudaStorage_data(state, rois__);
 33 | 
 34 |     float * output_flat = THCudaStorage_data(state, output__);
 35 |     printf("%d\n", aligned_height);
 36 |     // Number of ROIs
 37 |     int num_rois = THCudaTensor_size(state, rois, 0);
 38 |     int size_rois = THCudaTensor_size(state, rois, 1);
 39 |     if (size_rois != 5)
 40 |     {
 41 |         return 0;
 42 |     }
 43 |     printf("%f\n", spatial_scale);
 44 |     // batch size
 45 |     //int batch_size = THCudaTensor_size(state, features, 0);
 46 |     //if (batch_size != 1)
 47 |     //{
 48 |     //    return 0;
 49 |     //}
 50 | 
 51 | 
 52 |     // data height
 53 |     int data_height = THCudaTensor_size(state, features, 2);
 54 |     // data width
 55 |     int data_width = THCudaTensor_size(state, features, 3);
 56 |     // Number of channels
 57 |     int num_channels = THCudaTensor_size(state, features, 1);
 58 | 
 59 |     cudaStream_t stream = THCState_getCurrentStream(state);
 60 | 
 61 |     ROIAlignForwardLaucher(
 62 |         data_flat, spatial_scale, num_rois, data_height,
 63 |         data_width, num_channels, aligned_height,
 64 |         aligned_width, rois_flat,
 65 |         output_flat, stream);
 66 | 
 67 |     return 1;
 68 | }
 69 | 
 70 | extern "C" int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 71 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
 72 | {
 73 |     // Grab the input tensor
 74 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
 75 |     float * rois_flat = THCudaTensor_data(state, rois);
 76 | 
 77 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
 78 | 
 79 |     // Number of ROIs
 80 |     int num_rois = THCudaTensor_size(state, rois, 0);
 81 |     int size_rois = THCudaTensor_size(state, rois, 1);
 82 |     if (size_rois != 5)
 83 |     {
 84 |         return 0;
 85 |     }
 86 | 
 87 |     // batch size
 88 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
 89 |     //if (batch_size != 1)
 90 |     //{
 91 |      //   return 0;
 92 |     //}
 93 |     // data height
 94 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
 95 |     // data width
 96 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
 97 |     // Number of channels
 98 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
 99 | 
100 |     cudaStream_t stream = THCState_getCurrentStream(state);
101 |     ROIAlignBackwardLaucher(
102 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
103 |         data_width, num_channels, aligned_height,
104 |         aligned_width, rois_flat,
105 |         bottom_grad_flat, stream);
106 | 
107 |     return 1;
108 | }
109 | 
110 | 
111 | extern "C" int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
112 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
113 | {
114 |     // Grab the input tensor
115 |     float * data_flat = THCudaTensor_data(state, features);
116 |     float * rois_flat = THCudaTensor_data(state, rois);
117 | 
118 |     float * output_flat = THCudaTensor_data(state, output);
119 | 
120 |     // Number of ROIs
121 |     int num_rois = THCudaTensor_size(state, rois, 0);
122 |     int size_rois = THCudaTensor_size(state, rois, 1);
123 |     if (size_rois != 5)
124 |     {
125 |         return 0;
126 |     }
127 | 
128 |     // batch size
129 |     //int batch_size = THCudaTensor_size(state, features, 0);
130 |     //if (batch_size != 1)
131 |     //{
132 |     //    return 0;
133 |     //}
134 | 
135 | 
136 |     // data height
137 |     int data_height = THCudaTensor_size(state, features, 2);
138 |     // data width
139 |     int data_width = THCudaTensor_size(state, features, 3);
140 |     // Number of channels
141 |     int num_channels = THCudaTensor_size(state, features, 1);
142 | 
143 |     cudaStream_t stream = THCState_getCurrentStream(state);
144 | 
145 |     ROIAlignAdaForwardLaucher(
146 |         data_flat, spatial_scale, num_rois, data_height,
147 |         data_width, num_channels, aligned_height,
148 |         aligned_width, rois_flat,
149 |         output_flat, stream);
150 | 
151 |     return 1;
152 | }
153 | 
154 | extern "C" int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
155 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
156 | {
157 |     // Grab the input tensor
158 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
159 |     float * rois_flat = THCudaTensor_data(state, rois);
160 | 
161 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
162 | 
163 |     // Number of ROIs
164 |     int num_rois = THCudaTensor_size(state, rois, 0);
165 |     int size_rois = THCudaTensor_size(state, rois, 1);
166 |     if (size_rois != 5)
167 |     {
168 |         return 0;
169 |     }
170 | 
171 |     // batch size
172 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
173 |     //if (batch_size != 1)
174 |     //{
175 |      //   return 0;
176 |     //}
177 |     // data height
178 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
179 |     // data width
180 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
181 |     // Number of channels
182 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
183 | 
184 |     cudaStream_t stream = THCState_getCurrentStream(state);
185 |     ROIAlignAdaBackwardLaucher(
186 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
187 |         data_width, num_channels, aligned_height,
188 |         aligned_width, rois_flat,
189 |         bottom_grad_flat, stream);
190 | 
191 |     return 1;
192 | }
193 | 
194 | 
195 | extern "C" int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
196 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
197 | {
198 |     // Grab the input tensor
199 |     float * data_flat = THCudaTensor_data(state, features);
200 |     float * rois_flat = THCudaTensor_data(state, rois);
201 | 
202 |     float * output_flat = THCudaTensor_data(state, output);
203 | 
204 |     // Number of ROIs
205 |     int num_rois = THCudaTensor_size(state, rois, 0);
206 |     int size_rois = THCudaTensor_size(state, rois, 1);
207 |     if (size_rois != 5)
208 |     {
209 |         return 0;
210 |     }
211 | 
212 |     // batch size
213 |     //int batch_size = THCudaTensor_size(state, features, 0);
214 |     //if (batch_size != 1)
215 |     //{
216 |     //    return 0;
217 |     //}
218 | 
219 | 
220 |     // data height
221 |     int data_height = THCudaTensor_size(state, features, 2);
222 |     // data width
223 |     int data_width = THCudaTensor_size(state, features, 3);
224 |     // Number of channels
225 |     int num_channels = THCudaTensor_size(state, features, 1);
226 | 
227 |     cudaStream_t stream = THCState_getCurrentStream(state);
228 | 
229 |     ROIAlignDenseAdaForwardLaucher(
230 |         data_flat, spatial_scale, num_rois, data_height,
231 |         data_width, num_channels, aligned_height,
232 |         aligned_width, rois_flat,
233 |         output_flat, stream);
234 | 
235 |     return 1;
236 | }
237 | 
238 | extern "C" int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
239 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
240 | {
241 |     // Grab the input tensor
242 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
243 |     float * rois_flat = THCudaTensor_data(state, rois);
244 | 
245 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
246 | 
247 |     // Number of ROIs
248 |     int num_rois = THCudaTensor_size(state, rois, 0);
249 |     int size_rois = THCudaTensor_size(state, rois, 1);
250 |     if (size_rois != 5)
251 |     {
252 |         return 0;
253 |     }
254 | 
255 |     // batch size
256 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
257 |     //if (batch_size != 1)
258 |     //{
259 |      //   return 0;
260 |     //}
261 |     // data height
262 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
263 |     // data width
264 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
265 |     // Number of channels
266 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
267 | 
268 |     cudaStream_t stream = THCState_getCurrentStream(state);
269 |     ROIAlignDenseAdaBackwardLaucher(
270 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
271 |         data_width, num_channels, aligned_height,
272 |         aligned_width, rois_flat,
273 |         bottom_grad_flat, stream);
274 | 
275 |     return 1;
276 | }
277 | 
278 | /*PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
279 |   m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward");
280 |   m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward");
281 |   m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward");
282 |   m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward");
283 |   m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward");
284 |   m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward");
285 | }*/
286 | 


--------------------------------------------------------------------------------
/modules/roi_align/src.bak2/roi_align_cuda.hpp:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <ATen/ATen.h>
 3 | #include <torch/extension.h>
 4 | 
 5 | //THC_CLASS at::Context& at::globalContext();
 6 | //THCState *state = at::globalContext().getTHCState();
 7 | //ATen_CLASS at::Context& at::globalContext();
 8 | //THCState *state = at::globalContext().thc_state;
 9 | //extern THCState *state;
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
16 |                         at::Tensor& features, at::Tensor& rois, at::Tensor& output);
17 | 
18 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
19 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
20 | 
21 | 
22 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
23 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
24 | 
25 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
26 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
27 | 
28 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
29 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
30 | 
31 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
32 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
40 |   m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward");
41 |   m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward");
42 |   m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward");
43 |   m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward");
44 |   m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward");
45 |   m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward");
46 | }
47 | 


--------------------------------------------------------------------------------
/modules/roi_align/src/cuda/Makefile:
--------------------------------------------------------------------------------
1 | all: roi_align_kernel.cu roi_align_kernel.h
2 | 	nvcc -c -o roi_align.cu.o roi_align_kernel.cu -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC -arch=sm_52
3 | 
4 | clean:
5 | 	rm roi_align.cu.o
6 | 


--------------------------------------------------------------------------------
/modules/roi_align/src/cuda/roi_align.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Amgao/RLS-RTMDNet/a8b53aabf2ac4c5576222c95ee254d2faa433fba/modules/roi_align/src/cuda/roi_align.cu.o


--------------------------------------------------------------------------------
/modules/roi_align/src/cuda/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | 
30 | __global__ void ROIAlignAdaForward(const int nthreads, const float* bottom_data,
31 |     const float spatial_scale, const int height, const int width,
32 |     const int channels, const int aligned_height, const int aligned_width,
33 |     const float* bottom_rois, float* top_data);
34 | 
35 | int ROIAlignAdaForwardLaucher(
36 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
37 |     const int width, const int channels, const int aligned_height,
38 |     const int aligned_width, const float* bottom_rois,
39 |     float* top_data, cudaStream_t stream);
40 | 
41 | __global__ void ROIAlignAdaBackward(const int nthreads, const float* top_diff,
42 |     const float spatial_scale, const int height, const int width,
43 |     const int channels, const int aligned_height, const int aligned_width,
44 |     float* bottom_diff, const float* bottom_rois);
45 | 
46 | int ROIAlignAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
47 |     const int height, const int width, const int channels, const int aligned_height,
48 |     const int aligned_width, const float* bottom_rois,
49 |     float* bottom_diff, cudaStream_t stream);
50 | 
51 | 
52 | __global__ void ROIAlignDenseAdaForward(const int nthreads, const float* bottom_data,
53 |     const float spatial_scale, const int height, const int width,
54 |     const int channels, const int aligned_height, const int aligned_width,
55 |     const float* bottom_rois, float* top_data);
56 | 
57 | int ROIAlignDenseAdaForwardLaucher(
58 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
59 |     const int width, const int channels, const int aligned_height,
60 |     const int aligned_width, const float* bottom_rois,
61 |     float* top_data, cudaStream_t stream);
62 | 
63 | __global__ void ROIAlignDenseAdaBackward(const int nthreads, const float* top_diff,
64 |     const float spatial_scale, const int height, const int width,
65 |     const int channels, const int aligned_height, const int aligned_width,
66 |     float* bottom_diff, const float* bottom_rois);
67 | 
68 | int ROIAlignDenseAdaBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
69 |     const int height, const int width, const int channels, const int aligned_height,
70 |     const int aligned_width, const float* bottom_rois,
71 |     float* bottom_diff, cudaStream_t stream);
72 | 
73 | 
74 | #ifdef __cplusplus
75 | }
76 | #endif
77 | 
78 | #endif
79 | 
80 | 


--------------------------------------------------------------------------------
/modules/roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <THC/THC.h>
  2 | #include <math.h>
  3 | #include "cuda/roi_align_kernel.h"
  4 | 
  5 | extern THCState *state;
  6 | 
  7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
  8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
  9 | {
 10 |     // Grab the input tensor
 11 |     float * data_flat = THCudaTensor_data(state, features);
 12 |     float * rois_flat = THCudaTensor_data(state, rois);
 13 | 
 14 |     float * output_flat = THCudaTensor_data(state, output);
 15 | 
 16 |     // Number of ROIs
 17 |     int num_rois = THCudaTensor_size(state, rois, 0);
 18 |     int size_rois = THCudaTensor_size(state, rois, 1);
 19 |     if (size_rois != 5)
 20 |     {
 21 |         return 0;
 22 |     }
 23 | 
 24 |     // batch size
 25 |     //int batch_size = THCudaTensor_size(state, features, 0);
 26 |     //if (batch_size != 1)
 27 |     //{
 28 |     //    return 0;
 29 |     //}
 30 | 
 31 | 
 32 |     // data height
 33 |     int data_height = THCudaTensor_size(state, features, 2);
 34 |     // data width
 35 |     int data_width = THCudaTensor_size(state, features, 3);
 36 |     // Number of channels
 37 |     int num_channels = THCudaTensor_size(state, features, 1);
 38 | 
 39 |     cudaStream_t stream = THCState_getCurrentStream(state);
 40 | 
 41 |     ROIAlignForwardLaucher(
 42 |         data_flat, spatial_scale, num_rois, data_height,
 43 |         data_width, num_channels, aligned_height,
 44 |         aligned_width, rois_flat,
 45 |         output_flat, stream);
 46 | 
 47 |     return 1;
 48 | }
 49 | 
 50 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 51 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
 52 | {
 53 |     // Grab the input tensor
 54 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
 55 |     float * rois_flat = THCudaTensor_data(state, rois);
 56 | 
 57 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
 58 | 
 59 |     // Number of ROIs
 60 |     int num_rois = THCudaTensor_size(state, rois, 0);
 61 |     int size_rois = THCudaTensor_size(state, rois, 1);
 62 |     if (size_rois != 5)
 63 |     {
 64 |         return 0;
 65 |     }
 66 | 
 67 |     // batch size
 68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
 69 |     //if (batch_size != 1)
 70 |     //{
 71 |      //   return 0;
 72 |     //}
 73 |     // data height
 74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
 75 |     // data width
 76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
 77 |     // Number of channels
 78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
 79 | 
 80 |     cudaStream_t stream = THCState_getCurrentStream(state);
 81 |     ROIAlignBackwardLaucher(
 82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
 83 |         data_width, num_channels, aligned_height,
 84 |         aligned_width, rois_flat,
 85 |         bottom_grad_flat, stream);
 86 | 
 87 |     return 1;
 88 | }
 89 | 
 90 | 
 91 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 92 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 93 | {
 94 |     // Grab the input tensor
 95 |     float * data_flat = THCudaTensor_data(state, features);
 96 |     float * rois_flat = THCudaTensor_data(state, rois);
 97 | 
 98 |     float * output_flat = THCudaTensor_data(state, output);
 99 | 
100 |     // Number of ROIs
101 |     int num_rois = THCudaTensor_size(state, rois, 0);
102 |     int size_rois = THCudaTensor_size(state, rois, 1);
103 |     if (size_rois != 5)
104 |     {
105 |         return 0;
106 |     }
107 | 
108 |     // batch size
109 |     //int batch_size = THCudaTensor_size(state, features, 0);
110 |     //if (batch_size != 1)
111 |     //{
112 |     //    return 0;
113 |     //}
114 | 
115 | 
116 |     // data height
117 |     int data_height = THCudaTensor_size(state, features, 2);
118 |     // data width
119 |     int data_width = THCudaTensor_size(state, features, 3);
120 |     // Number of channels
121 |     int num_channels = THCudaTensor_size(state, features, 1);
122 | 
123 |     cudaStream_t stream = THCState_getCurrentStream(state);
124 | 
125 |     ROIAlignAdaForwardLaucher(
126 |         data_flat, spatial_scale, num_rois, data_height,
127 |         data_width, num_channels, aligned_height,
128 |         aligned_width, rois_flat,
129 |         output_flat, stream);
130 | 
131 |     return 1;
132 | }
133 | 
134 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
135 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
136 | {
137 |     // Grab the input tensor
138 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
139 |     float * rois_flat = THCudaTensor_data(state, rois);
140 | 
141 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
142 | 
143 |     // Number of ROIs
144 |     int num_rois = THCudaTensor_size(state, rois, 0);
145 |     int size_rois = THCudaTensor_size(state, rois, 1);
146 |     if (size_rois != 5)
147 |     {
148 |         return 0;
149 |     }
150 | 
151 |     // batch size
152 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
153 |     //if (batch_size != 1)
154 |     //{
155 |      //   return 0;
156 |     //}
157 |     // data height
158 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
159 |     // data width
160 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
161 |     // Number of channels
162 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
163 | 
164 |     cudaStream_t stream = THCState_getCurrentStream(state);
165 |     ROIAlignAdaBackwardLaucher(
166 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
167 |         data_width, num_channels, aligned_height,
168 |         aligned_width, rois_flat,
169 |         bottom_grad_flat, stream);
170 | 
171 |     return 1;
172 | }
173 | 
174 | 
175 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
176 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
177 | {
178 |     // Grab the input tensor
179 |     float * data_flat = THCudaTensor_data(state, features);
180 |     float * rois_flat = THCudaTensor_data(state, rois);
181 | 
182 |     float * output_flat = THCudaTensor_data(state, output);
183 | 
184 |     // Number of ROIs
185 |     int num_rois = THCudaTensor_size(state, rois, 0);
186 |     int size_rois = THCudaTensor_size(state, rois, 1);
187 |     if (size_rois != 5)
188 |     {
189 |         return 0;
190 |     }
191 | 
192 |     // batch size
193 |     //int batch_size = THCudaTensor_size(state, features, 0);
194 |     //if (batch_size != 1)
195 |     //{
196 |     //    return 0;
197 |     //}
198 | 
199 | 
200 |     // data height
201 |     int data_height = THCudaTensor_size(state, features, 2);
202 |     // data width
203 |     int data_width = THCudaTensor_size(state, features, 3);
204 |     // Number of channels
205 |     int num_channels = THCudaTensor_size(state, features, 1);
206 | 
207 |     cudaStream_t stream = THCState_getCurrentStream(state);
208 | 
209 |     ROIAlignDenseAdaForwardLaucher(
210 |         data_flat, spatial_scale, num_rois, data_height,
211 |         data_width, num_channels, aligned_height,
212 |         aligned_width, rois_flat,
213 |         output_flat, stream);
214 | 
215 |     return 1;
216 | }
217 | 
218 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
219 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
220 | {
221 |     // Grab the input tensor
222 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
223 |     float * rois_flat = THCudaTensor_data(state, rois);
224 | 
225 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
226 | 
227 |     // Number of ROIs
228 |     int num_rois = THCudaTensor_size(state, rois, 0);
229 |     int size_rois = THCudaTensor_size(state, rois, 1);
230 |     if (size_rois != 5)
231 |     {
232 |         return 0;
233 |     }
234 | 
235 |     // batch size
236 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
237 |     //if (batch_size != 1)
238 |     //{
239 |      //   return 0;
240 |     //}
241 |     // data height
242 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
243 |     // data width
244 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
245 |     // Number of channels
246 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
247 | 
248 |     cudaStream_t stream = THCState_getCurrentStream(state);
249 |     ROIAlignDenseAdaBackwardLaucher(
250 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
251 |         data_width, num_channels, aligned_height,
252 |         aligned_width, rois_flat,
253 |         bottom_grad_flat, stream);
254 | 
255 |     return 1;
256 | }
257 | 


--------------------------------------------------------------------------------
/modules/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
  1 | //#include <THC/THC.h>
  2 | //#include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | #include "roi_align_cuda.hpp"
  5 | #include <math.h>
  6 | #include "cuda/roi_align_kernel.h"
  7 | //#include <torch/extension.h>
  8 | extern THCState *state;
  9 | 
 10 | extern "C" int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 11 |                         at::Tensor& features, at::Tensor& rois, at::Tensor& output)
 12 | {
 13 |     // Number of ROIs
 14 |     int num_rois = rois.size(0);
 15 |     int size_rois = rois.size(1);
 16 |     if (size_rois != 5)
 17 |     {
 18 |         return 0;
 19 |     }
 20 |     // batch size
 21 |     //int batch_size = THCudaTensor_size(state, features, 0);
 22 |     //if (batch_size != 1)
 23 |     //{
 24 |     //    return 0;
 25 |     //}
 26 | 
 27 | 
 28 |     // data height
 29 |     int data_height = features.size(2);
 30 |     // data width
 31 |     int data_width = features.size(3);
 32 |     // Number of channels
 33 |     int num_channels = features.size(1);
 34 | 
 35 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 36 | 
 37 |     ROIAlignForwardLaucher(
 38 |         features.contiguous().data<float>(), spatial_scale, num_rois, data_height,
 39 |         data_width, num_channels, aligned_height,
 40 |         aligned_width, rois.contiguous().data<float>(),
 41 |         output.contiguous().data<float>(), stream);
 42 | 
 43 |     return 1;
 44 | }
 45 | 
 46 | extern "C" int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 47 |                         at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad)
 48 | {
 49 | 
 50 |     // Number of ROIs
 51 |     int num_rois = rois.size(0);
 52 |     int size_rois = rois.size(1);
 53 |     if (size_rois != 5)
 54 |     {
 55 |         return 0;
 56 |     }
 57 | 
 58 |     // batch size
 59 |     int batch_size = bottom_grad.size(0);
 60 |     //if (batch_size != 1)
 61 |     //{
 62 |      //   return 0;
 63 |     //}
 64 |     // data height
 65 |     int data_height = bottom_grad.size(2);
 66 |     // data width
 67 |     int data_width = bottom_grad.size(3);
 68 |     // Number of channels
 69 |     int num_channels = bottom_grad.size(1);
 70 | 
 71 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 72 |     ROIAlignBackwardLaucher(
 73 |         top_grad.contiguous().data<float>(), spatial_scale, batch_size, num_rois, data_height,
 74 |         data_width, num_channels, aligned_height,
 75 |         aligned_width, rois.contiguous().data<float>(),
 76 |         bottom_grad.contiguous().data<float>(), stream);
 77 | 
 78 |     return 1;
 79 | }
 80 | 
 81 | 
 82 | extern "C" int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 83 |                         at::Tensor& features, at::Tensor& rois, at::Tensor& output)
 84 | {
 85 |     // Number of ROIs
 86 |     int num_rois = rois.size(0);
 87 |     int size_rois = rois.size(1);
 88 |     if (size_rois != 5)
 89 |     {
 90 |         return 0;
 91 |     }
 92 |     // batch size
 93 |     //int batch_size = THCudaTensor_size(state, features, 0);
 94 |     //if (batch_size != 1)
 95 |     //{
 96 |     //    return 0;
 97 |     //}
 98 | 
 99 | 
100 |     // data height
101 |     int data_height = features.size(2);
102 |     // data width
103 |     int data_width = features.size(3);
104 |     // Number of channels
105 |     int num_channels = features.size(1);
106 | 
107 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
108 | 
109 |     ROIAlignAdaForwardLaucher(
110 |         features.contiguous().data<float>(), spatial_scale, num_rois, data_height,
111 |         data_width, num_channels, aligned_height,
112 |         aligned_width, rois.contiguous().data<float>(),
113 |         output.contiguous().data<float>(), stream);
114 | 
115 |     return 1;
116 | }
117 | 
118 | extern "C" int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
119 |                         at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad)
120 | {
121 |     // Number of ROIs
122 |     int num_rois = rois.size(0);
123 |     int size_rois = rois.size(1);
124 |     if (size_rois != 5)
125 |     {
126 |         return 0;
127 |     }
128 | 
129 |     // batch size
130 |     int batch_size = bottom_grad.size(0);
131 |     //if (batch_size != 1)
132 |     //{
133 |      //   return 0;
134 |     //}
135 |     // data height
136 |     int data_height = bottom_grad.size(2);
137 |     // data width
138 |     int data_width = bottom_grad.size(3);
139 |     // Number of channels
140 |     int num_channels = bottom_grad.size(1);
141 | 
142 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
143 |     ROIAlignAdaBackwardLaucher(
144 |         top_grad.contiguous().data<float>(), spatial_scale, batch_size, num_rois, data_height,
145 |         data_width, num_channels, aligned_height,
146 |         aligned_width, rois.contiguous().data<float>(),
147 |         bottom_grad.contiguous().data<float>(), stream);
148 | 
149 |     return 1;
150 | }
151 | 
152 | 
153 | extern "C" int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
154 |                         at::Tensor& features, at::Tensor& rois, at::Tensor& output)
155 | {
156 |     // Number of ROIs
157 |     int num_rois = rois.size(0);
158 |     int size_rois = rois.size(1);
159 |     if (size_rois != 5)
160 |     {
161 |         return 0;
162 |     }
163 |     // batch size
164 |     //int batch_size = THCudaTensor_size(state, features, 0);
165 |     //if (batch_size != 1)
166 |     //{
167 |     //    return 0;
168 |     //}
169 | 
170 | 
171 |     // data height
172 |     int data_height = features.size(2);
173 |     // data width
174 |     int data_width = features.size(3);
175 |     // Number of channels
176 |     int num_channels = features.size(1);
177 | 
178 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
179 | 
180 |     ROIAlignDenseAdaForwardLaucher(
181 |         features.contiguous().data<float>(), spatial_scale, num_rois, data_height,
182 |         data_width, num_channels, aligned_height,
183 |         aligned_width, rois.contiguous().data<float>(),
184 |         output.contiguous().data<float>(), stream);
185 | 
186 |     return 1;
187 | }
188 | 
189 | extern "C" int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
190 |                         at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad)
191 | {
192 |     // Grab the input tensor
193 |     /*float * top_grad_flat = THCudaTensor_data(state, top_grad);
194 |     float * rois_flat = THCudaTensor_data(state, rois);
195 | 
196 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
197 | 
198 |     // Number of ROIs
199 |     int num_rois = THCudaTensor_size(state, rois, 0);
200 |     int size_rois = THCudaTensor_size(state, rois, 1);
201 |     if (size_rois != 5)
202 |     {
203 |         return 0;
204 |     }
205 | 
206 |     // batch size
207 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
208 |     //if (batch_size != 1)
209 |     //{
210 |      //   return 0;
211 |     //}
212 |     // data height
213 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
214 |     // data width
215 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
216 |     // Number of channels
217 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
218 | 
219 |     cudaStream_t stream = THCState_getCurrentStream(state);*/
220 |     // Number of ROIs
221 |     int num_rois = rois.size(0);
222 |     int size_rois = rois.size(1);
223 |     if (size_rois != 5)
224 |     {
225 |         return 0;
226 |     }
227 | 
228 |     // batch size
229 |     int batch_size = bottom_grad.size(0);
230 |     //if (batch_size != 1)
231 |     //{
232 |      //   return 0;
233 |     //}
234 |     // data height
235 |     int data_height = bottom_grad.size(2);
236 |     // data width
237 |     int data_width = bottom_grad.size(3);
238 |     // Number of channels
239 |     int num_channels = bottom_grad.size(1);
240 | 
241 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream();
242 |     ROIAlignDenseAdaBackwardLaucher(
243 |         top_grad.contiguous().data<float>(), spatial_scale, batch_size, num_rois, data_height,
244 |         data_width, num_channels, aligned_height,
245 |         aligned_width, rois.contiguous().data<float>(),
246 |         bottom_grad.contiguous().data<float>(), stream);
247 | 
248 |     return 1;
249 | }
250 | 
251 | /*PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
252 |   m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward");
253 |   m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward");
254 |   m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward");
255 |   m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward");
256 |   m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward");
257 |   m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward");
258 | }*/
259 | 


--------------------------------------------------------------------------------
/modules/roi_align/src/roi_align_cuda.hpp:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <ATen/ATen.h>
 3 | #include <torch/extension.h>
 4 | 
 5 | //THC_CLASS at::Context& at::globalContext();
 6 | //THCState *state = at::globalContext().getTHCState();
 7 | //ATen_CLASS at::Context& at::globalContext();
 8 | //THCState *state = at::globalContext().thc_state;
 9 | //extern THCState *state;
10 | 
11 | #ifdef __cplusplus
12 | extern "C" {
13 | #endif
14 | 
15 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
16 |                         at::Tensor& features, at::Tensor& rois, at::Tensor& output);
17 | 
18 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
19 |                         at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad);
20 | 
21 | 
22 | int roi_align_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
23 |                         at::Tensor& features, at::Tensor& rois, at::Tensor& output);
24 | 
25 | int roi_align_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
26 |                         at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad);
27 | 
28 | int roi_align_dense_ada_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
29 |                         at::Tensor& features, at::Tensor& rois, at::Tensor& output);
30 | 
31 | int roi_align_dense_ada_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
32 |                         at::Tensor& top_grad, at::Tensor& rois, at::Tensor& bottom_grad);
33 | 
34 | 
35 | #ifdef __cplusplus
36 | }
37 | #endif
38 | 
39 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
40 |   m.def("roi_align_forward_cuda", &roi_align_forward_cuda, "ROIAlign_forward");
41 |   m.def("roi_align_backward_cuda", &roi_align_backward_cuda, "ROIAlign_backward");
42 |   m.def("roi_align_ada_forward_cuda", &roi_align_ada_forward_cuda, "ROIAlign_Ada_forward");
43 |   m.def("roi_align_ada_backward_cuda", &roi_align_ada_backward_cuda, "ROIAlign_Ada_backward");
44 |   m.def("roi_align_dense_ada_forward_cuda", &roi_align_dense_ada_forward_cuda, "ROIAlign_Dense_Ada_forward");
45 |   m.def("roi_align_dense_ada_backward_cuda", &roi_align_dense_ada_backward_cuda, "ROIAlign_Dense_Ada_backward");
46 | }
47 | 


--------------------------------------------------------------------------------
/modules/sample_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | #from PIL import Image
 3 | 
 4 | from utils import *
 5 | 
 6 | def gen_samples(generator, bbox, n, overlap_range=None, scale_range=None):
 7 | 
 8 |     if overlap_range is None and scale_range is None:
 9 |         return generator(bbox, n)
10 | 
11 |     else:
12 |         samples = None
13 |         remain = n
14 |         factor = 2
15 |         while remain > 0 and factor < 16:
16 |             samples_ = generator(bbox, int(remain*factor))
17 | 
18 |             idx = np.ones(len(samples_), dtype=bool)
19 |             if overlap_range is not None:
20 |                 r = overlap_ratio(samples_, bbox)
21 |                 idx *= (r >= overlap_range[0]) * (r <= overlap_range[1])
22 |             if scale_range is not None:
23 |                 s = np.prod(samples_[:,2:], axis=1) / np.prod(bbox[2:])
24 |                 idx *= (s >= scale_range[0]) * (s <= scale_range[1])
25 | 
26 |             samples_ = samples_[idx,:]
27 |             samples_ = samples_[:min(int(remain), len(samples_))]
28 |             if samples is None:
29 |                 samples = samples_
30 |             else:
31 |                 samples = np.concatenate([samples, samples_])
32 |             remain = n - len(samples)
33 |             factor = factor*2
34 | 
35 |         return samples
36 | 
37 | 
38 | class SampleGenerator():
39 |     def __init__(self, type, img_size, trans_f=1, scale_f=1, aspect_f=None, valid=False):
40 |         self.type = type
41 |         self.img_size = np.array(img_size) # (w, h)
42 |         self.trans_f = trans_f
43 |         self.scale_f = scale_f
44 |         self.aspect_f = aspect_f
45 |         self.valid = valid
46 | 
47 |     def __call__(self, bb, n):
48 |         #
49 |         # bb: target bbox (min_x,min_y,w,h)
50 |         bb = np.array(bb, dtype='float32')
51 | 
52 |         # (center_x, center_y, w, h)
53 |         sample = np.array([bb[0]+bb[2]/2, bb[1]+bb[3]/2, bb[2], bb[3]], dtype='float32')
54 |         samples = np.tile(sample[None,:],(n,1))
55 | 
56 |         # vary aspect ratio
57 |         if self.aspect_f is not None:
58 |             ratio = np.random.rand(n,1)*2-1
59 |             samples[:,2:] *= self.aspect_f ** np.concatenate([ratio, -ratio],axis=1)
60 | 
61 |         # sample generation
62 |         if self.type=='gaussian':
63 |             samples[:,:2] += self.trans_f * np.mean(bb[2:]) * np.clip(0.5*np.random.randn(n,2),-1,1)
64 |             samples[:,2:] *= self.scale_f ** np.clip(0.5*np.random.randn(n,1),-1,1)
65 | 
66 |         elif self.type=='uniform':
67 |             samples[:,:2] += self.trans_f * np.mean(bb[2:]) * (np.random.rand(n,2)*2-1)
68 |             samples[:,2:] *= self.scale_f ** (np.random.rand(n,1)*2-1)
69 | 
70 |         elif self.type=='whole':
71 |             m = int(2*np.sqrt(n))
72 |             xy = np.dstack(np.meshgrid(np.linspace(0,1,m),np.linspace(0,1,m))).reshape(-1,2)
73 |             xy = np.random.permutation(xy)[:n]
74 |             samples[:,:2] = bb[2:]/2 + xy * (self.img_size-bb[2:]/2-1)
75 |             samples[:,2:] *= self.scale_f ** (np.random.rand(n,1)*2-1)
76 | 
77 |         # adjust bbox range
78 |         samples[:,2:] = np.clip(samples[:,2:], 5, self.img_size-5.)
79 |         if self.valid:
80 |             samples[:,:2] = np.clip(samples[:,:2], samples[:,2:]/2, self.img_size-samples[:,2:]/2-1)
81 |         else:
82 |             samples[:,:2] = np.clip(samples[:,:2], 0, self.img_size)
83 | 
84 |         # (min_x, min_y, w, h)
85 |         samples[:,:2] -= samples[:,2:]/2
86 | 
87 |         return samples
88 | 
89 |     def set_trans_f(self, trans_f):
90 |         self.trans_f = trans_f
91 | 
92 |     def get_trans_f(self):
93 |         return self.trans_f
94 | 
95 | 


--------------------------------------------------------------------------------
/modules/utils.py:
--------------------------------------------------------------------------------
 1 | #from scipy.misc import imresize
 2 | from PIL import Image
 3 | import numpy as np
 4 | 
 5 | 
 6 | ##################################################################################
 7 | ############################Do not modify opts anymore.###########################
 8 | ######################Becuase of synchronization of options#######################
 9 | ##################################################################################
10 | 
11 | def overlap_ratio(rect1, rect2):
12 |     '''
13 |     Compute overlap ratio between two rects
14 |     - rect: 1d array of [x,y,w,h] or
15 |             2d array of N x [x,y,w,h]
16 |     '''
17 | 
18 |     if rect1.ndim==1:
19 |         rect1 = rect1[None,:]
20 |     if rect2.ndim==1:
21 |         rect2 = rect2[None,:]
22 | 
23 |     left = np.maximum(rect1[:,0], rect2[:,0])
24 |     right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
25 |     top = np.maximum(rect1[:,1], rect2[:,1])
26 |     bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
27 | 
28 |     intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
29 |     union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
30 |     iou = np.clip(intersect / union, 0, 1)
31 |     return iou
32 | 
33 | 
34 | def crop_image(img, bbox, img_size=[107,107], padding=16, valid=False):
35 |     ## img_size = [w,h]
36 |     x,y,w,h = np.array(bbox,dtype='float32')
37 | 
38 |     half_w, half_h = w/2, h/2
39 |     center_x, center_y = x + half_w, y + half_h
40 | 
41 |     if padding > 0:
42 |         pad_w = padding * w/img_size[0]
43 |         pad_h = padding * h/img_size[1]
44 |         half_w += pad_w
45 |         half_h += pad_h
46 | 
47 |     img_h, img_w, _ = img.shape
48 |     min_x = int(center_x - half_w + 0.5)
49 |     min_y = int(center_y - half_h + 0.5)
50 |     max_x = int(center_x + half_w + 0.5)
51 |     max_y = int(center_y + half_h + 0.5)
52 | 
53 |     if valid:
54 |         min_x = max(0, min_x)
55 |         min_y = max(0, min_y)
56 |         max_x = min(img_w, max_x)
57 |         max_y = min(img_h, max_y)
58 | 
59 |     if min_x >=0 and min_y >= 0 and max_x <= img_w and max_y <= img_h:
60 |         cropped = img[min_y:max_y, min_x:max_x, :]
61 | 
62 |     else:
63 |         min_x_val = max(0, min_x)
64 |         min_y_val = max(0, min_y)
65 |         max_x_val = min(img_w, max_x)
66 |         max_y_val = min(img_h, max_y)
67 | 
68 |         cropped = 128 * np.ones((max_y-min_y, max_x-min_x, 3), dtype='uint8')
69 |         cropped[min_y_val-min_y:max_y_val-min_y, min_x_val-min_x:max_x_val-min_x, :] \
70 |             = img[min_y_val:max_y_val, min_x_val:max_x_val, :]
71 | 
72 |     scaled = np.array(Image.fromarray(cropped).resize((img_size[1],img_size[0])))
73 |     return scaled
74 | 
75 | def samples2maskroi(samples,receptive_field, cshape,padded_scene_size,padding_ratio):
76 |     # rois is from domain of original image axis
77 |     # receptive field can be subtracted to x2,y2
78 | 
79 |     # ratios between original image and resized_image
80 |     cur_resize_ratio = cshape / padded_scene_size
81 |     rois = np.copy(samples)
82 | 
83 |     # xywh -> x1y1x2y2
84 |     rois[:, 2:4] += rois[:, 0:2]
85 | 
86 |     # padding application
87 |     rois_paddings = (rois[:,2:4]-rois[:,0:2])*(padding_ratio-1.)/2.
88 |     rois[:,0:2]-=rois_paddings
89 |     rois[:,2:4]+=rois_paddings
90 | 
91 | 
92 |     rois[:, 0] *= cur_resize_ratio[0]
93 |     rois[:, 1] *= cur_resize_ratio[1]
94 |     rois[:, 2] = np.maximum(rois[:,0]+1,rois[:, 2]*cur_resize_ratio[0] - receptive_field)
95 |     rois[:, 3] = np.maximum(rois[:,1]+1,rois[:, 3]*cur_resize_ratio[1] - receptive_field)
96 | 
97 | 
98 |     return rois
99 | 


--------------------------------------------------------------------------------
/options.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | opts = OrderedDict()
 4 | opts['use_gpu'] = True
 5 | 
 6 | 
 7 | opts['model_path'] = './models/model_imagenet_seqbatch50_final.pth'
 8 | 
 9 | opts['img_size'] = 107
10 | opts['padding'] = 1.2
11 | opts['jitter'] = True
12 | opts['result_path']='./result.npy'
13 | opts['adaptive_align']=True
14 | opts['batch_pos'] = 32
15 | opts['batch_neg'] = 96
16 | opts['batch_neg_cand'] = 1024
17 | opts['batch_test'] = 256
18 | 
19 | opts['n_samples'] = 256
20 | opts['trans_f'] = 0.6
21 | opts['scale_f'] = 1.05
22 | opts['trans_f_expand'] = 1.4
23 | 
24 | opts['n_bbreg'] = 1000
25 | opts['overlap_bbreg'] = [0.6, 1]
26 | opts['scale_bbreg'] = [1, 2]
27 | 
28 | opts['lr_init'] = 0.0001 # original = 0.0001
29 | opts['maxiter_init'] = 50 # original = 30
30 | opts['n_pos_init'] = 500
31 | opts['n_neg_init'] = 5000
32 | opts['overlap_pos_init'] = [0.7, 1]
33 | opts['overlap_neg_init'] = [0, 0.5]
34 | 
35 | opts['lr_update'] = 0.0003 # original = 0.0002
36 | opts['maxiter_update'] = 15 # original = 15
37 | opts['n_pos_update'] = 50
38 | opts['n_neg_update'] = 200
39 | opts['overlap_pos_update'] = [0.7, 1]
40 | opts['overlap_neg_update'] = [0, 0.3]
41 | 
42 | opts['success_thr'] = 0. # original = 0
43 | opts['n_frames_short'] = 20
44 | opts['n_frames_long'] = 100
45 | opts['long_interval'] = 10
46 | 
47 | opts['w_decay'] = 0.0005 # original = 0.0005
48 | opts['momentum'] = 0.9
49 | opts['grad_clip'] = 10 # original = 10
50 | opts['lr_mult'] = {'fc6':10}
51 | opts['ft_layers'] = ['fc']
52 | 
53 | 
54 | 
55 | 
56 | 
57 | 


--------------------------------------------------------------------------------
/python_RLS_RTMDNet_bk.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import vot
 4 | import sys
 5 | import time
 6 | import cv2
 7 | import numpy
 8 | import collections
 9 | 
10 | class NCCTracker(object):
11 | 
12 |     def __init__(self, image, region):
13 |         self.window = max(region.width, region.height) * 2
14 | 
15 |         left = max(region.x, 0)
16 |         top = max(region.y, 0)
17 | 
18 |         right = min(region.x + region.width, image.shape[1] - 1)
19 |         bottom = min(region.y + region.height, image.shape[0] - 1)
20 | 
21 |         self.template = image[int(top):int(bottom), int(left):int(right)]
22 |         self.position = (region.x + region.width / 2, region.y + region.height / 2)
23 |         self.size = (region.width, region.height)
24 | 
25 |     def track(self, image):
26 | 
27 |         left = max(round(self.position[0] - float(self.window) / 2), 0)
28 |         top = max(round(self.position[1] - float(self.window) / 2), 0)
29 | 
30 |         right = min(round(self.position[0] + float(self.window) / 2), image.shape[1] - 1)
31 |         bottom = min(round(self.position[1] + float(self.window) / 2), image.shape[0] - 1)
32 | 
33 |         if right - left < self.template.shape[1] or bottom - top < self.template.shape[0]:
34 |             return vot.Rectangle(self.position[0] + self.size[0] / 2, self.position[1] + self.size[1] / 2, self.size[0], self.size[1])
35 | 
36 |         cut = image[int(top):int(bottom), int(left):int(right)]
37 | 
38 |         matches = cv2.matchTemplate(cut, self.template, cv2.TM_CCOEFF_NORMED)
39 |         min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(matches)
40 | 
41 |         self.position = (left + max_loc[0] + float(self.size[0]) / 2, top + max_loc[1] + float(self.size[1]) / 2)
42 | 
43 |         return vot.Rectangle(left + max_loc[0], top + max_loc[1], self.size[0], self.size[1]), max_val
44 | 
45 | handle = vot.VOT("rectangle")
46 | selection = handle.region()
47 | 
48 | imagefile = handle.frame()
49 | if not imagefile:
50 |     sys.exit(0)
51 | 
52 | image = cv2.imread(imagefile, cv2.IMREAD_GRAYSCALE)
53 | tracker = NCCTracker(image, selection)
54 | while True:
55 |     imagefile = handle.frame()
56 |     if not imagefile:
57 |         break
58 |     image = cv2.imread(imagefile, cv2.IMREAD_GRAYSCALE)
59 |     region, confidence = tracker.track(image)
60 |     handle.report(region, confidence)
61 | 
62 | 


--------------------------------------------------------------------------------
/train_mrcnn.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import pickle
  4 | import time
  5 | 
  6 | import torch
  7 | import torch.optim as optim
  8 | from torch.autograd import Variable
  9 | 
 10 | sys.path.insert(0,'./modules')
 11 | from data_prov import *
 12 | from model import *
 13 | from pretrain_options import *
 14 | from tracker import *
 15 | import numpy as np
 16 | 
 17 | import argparse
 18 | 
 19 | torch.cuda.set_device(1)
 20 | def set_optimizer(model, lr_base, lr_mult=pretrain_opts['lr_mult'], momentum=pretrain_opts['momentum'], w_decay=pretrain_opts['w_decay']):
 21 |     params = model.get_learnable_params()
 22 |     param_list = []
 23 |     for k, p in params.iteritems():
 24 |         lr = lr_base
 25 |         for l, m in lr_mult.iteritems():
 26 |             if k.startswith(l):
 27 |                 lr = lr_base * m
 28 |         param_list.append({'params': [p], 'lr': lr})
 29 |     optimizer = optim.SGD(param_list, lr=lr, momentum=momentum, weight_decay=w_decay)
 30 |     return optimizer
 31 | 
 32 | def genConfig(seq_path, set_type):
 33 | 
 34 |     path, seqname = os.path.split(seq_path)
 35 | 
 36 |     if set_type == 'OTB':
 37 |         img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg'])
 38 | 
 39 |         if (seqname == 'Jogging') or (seqname == 'Skating2'):
 40 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt')
 41 |         elif seqname =='Human4':
 42 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',')
 43 |         elif (seqname == 'BlurBody')  or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \
 44 |                 or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \
 45 |                 or (seqname == 'Box')   or (seqname == 'Car4')  or (seqname == 'CarScale') or (seqname == 'ClifBar') \
 46 |                 or (seqname == 'Couple')  or (seqname == 'Crossing')  or (seqname == 'Dog') or (seqname == 'FaceOcc1') \
 47 |                 or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \
 48 |                 or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \
 49 |                 or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman')   :
 50 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt')
 51 |         elif (seqname == 'Freeman4') or (seqname == 'Diving') or (seqname =='Freeman3') or (seqname =='Football1'):
 52 |             gt = np.loadtxt(seq_path + '/groundtruth_rect_revise.txt', delimiter=',')
 53 |         else:
 54 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
 55 | 
 56 |         if seqname == 'David':
 57 |             img_list = img_list[300:]
 58 |             # gt = gt[300:,:]
 59 |         if seqname == 'Football1':
 60 |             img_list = img_list[0:73]
 61 |         if seqname == 'Freeman3':
 62 |             img_list = img_list[0:459]
 63 |         if seqname == 'Freeman4':
 64 |             img_list = img_list[0:282]
 65 | 
 66 |     elif set_type=='VOT/2016':
 67 |         img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg'])
 68 |         gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',')
 69 | 
 70 |         ##polygon to rect
 71 |     if gt.shape[1] == 8:
 72 |         x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
 73 |         y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
 74 |         x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
 75 |         y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
 76 |         gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1)
 77 | 
 78 |     return img_list, gt
 79 | 
 80 | 
 81 | def train_mdnet():
 82 | 
 83 |     ## set image directory
 84 |     if pretrain_opts['set_type'] == 'OTB':
 85 |         img_home = '/home/ilchae/dataset/tracking/OTB/'
 86 |         data_path = './otb-vot15.pkl'
 87 |     if pretrain_opts['set_type'] == 'VOT':
 88 |         img_home = '/home/ilchae/dataset/tracking/VOT/'
 89 |         data_path = './vot-otb.pkl'
 90 |     if pretrain_opts['set_type'] == 'IMAGENET':
 91 |         img_home = '/mnt/jgao/jgao/ILSVRC2015/Data/VID/train/'
 92 |         data_path = './modules/imagenet_refine.pkl'
 93 | 
 94 |     ## Init dataset ##
 95 |     with open(data_path, 'rb') as fp:
 96 |         data = pickle.load(fp)
 97 | 
 98 | 
 99 |     K = len(data)
100 | 
101 |     ## Init model ##
102 |     model = MDNet(pretrain_opts['init_model_path'], K)
103 |     if pretrain_opts['adaptive_align']:
104 |         align_h = model.roi_align_model.aligned_height
105 |         align_w = model.roi_align_model.aligned_width
106 |         spatial_s = model.roi_align_model.spatial_scale
107 |         model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s)
108 | 
109 |     if pretrain_opts['use_gpu']:
110 |         model = model.cuda()
111 |     model.set_learnable_params(pretrain_opts['ft_layers'])
112 |     model.train()
113 | 
114 |     dataset = [None] * K
115 |     for k, (seqname, seq) in enumerate(data.iteritems()):
116 |         img_list = seq['images']
117 |         gt = seq['gt']
118 |         if pretrain_opts['set_type'] == 'OTB':
119 |             img_dir = os.path.join(img_home, seqname+'/img')
120 |         if pretrain_opts['set_type'] == 'VOT':
121 |             img_dir = img_home + seqname
122 |         if pretrain_opts['set_type'] == 'IMAGENET':
123 |             img_dir = img_home + seqname
124 |         dataset[k]=RegionDataset(img_dir,img_list,gt,model.receptive_field,pretrain_opts)
125 |         #print(img_dir)
126 |         #print(img_list)
127 | 
128 | 
129 |     ## Init criterion and optimizer ##
130 |     binaryCriterion = BinaryLoss()
131 |     interDomainCriterion = nn.CrossEntropyLoss()
132 |     evaluator = Precision()
133 |     optimizer = set_optimizer(model, pretrain_opts['lr'])
134 | 
135 |     best_score = 0.
136 |     batch_cur_idx = 0
137 |     for i in range(pretrain_opts['n_cycles']):
138 |         print "==== Start Cycle %d ====" % (i)
139 |         k_list = np.random.permutation(K)
140 |         prec = np.zeros(K)
141 |         #totalTripleLoss = np.zeros(K)
142 |         totalInterClassLoss = np.zeros(K)
143 |         for j, k in enumerate(k_list):
144 |             tic = time.time()
145 |             try:
146 |                 cropped_scenes, pos_rois, neg_rois= dataset[k].next()
147 |             except:
148 |                 #print "______except1_______"
149 |                 continue
150 | 
151 |             try:
152 |                 for sidx in range(0, len(cropped_scenes)):
153 |                     cur_scene = cropped_scenes[sidx]
154 |                     cur_pos_rois = pos_rois[sidx]
155 |                     cur_neg_rois = neg_rois[sidx]
156 | 
157 |                     cur_scene = Variable(cur_scene)
158 |                     cur_pos_rois = Variable(cur_pos_rois)
159 |                     cur_neg_rois = Variable(cur_neg_rois)
160 |                     if pretrain_opts['use_gpu']:
161 |                         cur_scene = cur_scene.cuda()
162 |                         cur_pos_rois = cur_pos_rois.cuda()
163 |                         cur_neg_rois = cur_neg_rois.cuda()
164 |                     cur_feat_map = model(cur_scene, k, out_layer='conv3')
165 | 
166 |                     if cur_pos_rois.size(0) == 0 or cur_neg_rois.size(0) == 0:
167 |                         print "______except num rois_______"
168 |                         continue
169 | 
170 |                     #print "rois %2d, %2d" % \
171 |                     #  (cur_pos_rois.size(0), cur_neg_rois.size(0))
172 |                     cur_pos_feats = model.roi_align_model(cur_feat_map, cur_pos_rois)
173 |                     cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1)
174 |                     cur_neg_feats = model.roi_align_model(cur_feat_map, cur_neg_rois)
175 |                     cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1)
176 | 
177 |                     if sidx == 0:
178 |                         pos_feats = [cur_pos_feats]
179 |                         neg_feats = [cur_neg_feats]
180 |                     else:
181 |                         pos_feats.append(cur_pos_feats)
182 |                         neg_feats.append(cur_neg_feats)
183 |                 feat_dim = cur_neg_feats.size(1)
184 |                 pos_feats = torch.stack(pos_feats,dim=0).view(-1,feat_dim)
185 |                 neg_feats = torch.stack(neg_feats,dim=0).view(-1,feat_dim)
186 |             except:
187 |                 #print "______except2_______"
188 |                 continue
189 | 
190 | 
191 |             pos_score = model(pos_feats, k, in_layer='fc4')
192 |             neg_score = model(neg_feats, k, in_layer='fc4')
193 | 
194 |             cls_loss = binaryCriterion(pos_score, neg_score)
195 | 
196 |             ## inter frame classification
197 | 
198 |             interclass_label = Variable(torch.zeros((pos_score.size(0))).long())
199 |             if opts['use_gpu']:
200 |                 interclass_label = interclass_label.cuda()
201 |             total_interclass_score = pos_score[:,1].contiguous()
202 |             total_interclass_score = total_interclass_score.view((pos_score.size(0),1))
203 | 
204 |             K_perm = np.random.permutation(K)
205 |             K_perm = K_perm[0:100]
206 |             for cidx in K_perm:
207 |                 if k == cidx:
208 |                     continue
209 |                 else:
210 |                     interclass_score = model(pos_feats, cidx, in_layer='fc4')
211 |                     total_interclass_score = torch.cat((total_interclass_score,interclass_score[:,1].contiguous().view((interclass_score.size(0),1))),dim=1)
212 | 
213 |             interclass_loss = interDomainCriterion(total_interclass_score, interclass_label)
214 |             totalInterClassLoss[k] = interclass_loss.item()
215 | 
216 |             (cls_loss+0.1*interclass_loss).backward()
217 | 
218 |             batch_cur_idx+=1
219 |             if (batch_cur_idx%pretrain_opts['seqbatch_size'])==0:
220 |                 torch.nn.utils.clip_grad_norm(model.parameters(), pretrain_opts['grad_clip'])
221 |                 optimizer.step()
222 |                 model.zero_grad()
223 |                 batch_cur_idx = 0
224 | 
225 |             ## evaulator
226 |             prec[k] = evaluator(pos_score, neg_score)
227 |             ## computation latency
228 |             toc = time.time() - tic
229 | 
230 |             print "Cycle %2d, K %2d (%2d), BinLoss %.3f, Prec %.3f, interLoss %.3f, Time %.3f" % \
231 |                       (i, j, k, cls_loss.item(), prec[k], totalInterClassLoss[k], toc)
232 | 
233 |         cur_score = prec.mean()
234 |         try:
235 |             total_miou = sum(total_iou)/len(total_iou)
236 |         except:
237 |             total_miou = 0.
238 |         print "Mean Precision: %.3f Triple Loss: %.3f Inter Loss: %.3f IoU: %.3f" % (prec.mean(), cur_score, totalInterClassLoss.mean(),total_miou)
239 |         if cur_score > best_score:
240 |             best_score = cur_score
241 |             if pretrain_opts['use_cpu']:
242 |                 model = model.cpu()
243 |             states = {'shared_layers': model.layers.state_dict()}
244 |             print "Save model to %s" % pretrain_opts['model_path']
245 |             torch.save(states, pretrain_opts['model_path'])
246 |             if pretrain_opts['use_gpu']:
247 |                 model = model.cuda()
248 | 
249 | 
250 | if __name__ == "__main__":
251 | 
252 |     parser = argparse.ArgumentParser()
253 |     parser.add_argument("-set_type", default = 'VOT' )
254 |     parser.add_argument("-padding_ratio", default = 5., type =float)
255 |     parser.add_argument("-model_path", default ="./models/rt_mdnet.pth", help = "model path")
256 |     parser.add_argument("-frame_interval", default = 2, type=int, help="frame interval in batch. ex) interval=1 -> [1 2 3 4 5], interval=2 ->[1 3 5]")
257 |     parser.add_argument("-init_model_path", default="./models/imagenet-vgg-m.mat")
258 |     parser.add_argument("-batch_frames", default = 8, type = int)
259 |     parser.add_argument("-lr", default=0.0001, type = float)
260 |     parser.add_argument("-batch_pos",default = 64, type = int)
261 |     parser.add_argument("-batch_neg", default = 196, type = int)
262 |     parser.add_argument("-n_cycles", default = 1000, type = int )
263 |     parser.add_argument("-adaptive_align", default = True, action = 'store_false')
264 |     parser.add_argument("-seqbatch_size", default=50, type=int)
265 | 
266 |     args = parser.parse_args()
267 | 
268 |     ##################################################################################
269 |     #########################Just modify opts in this script.#########################
270 |     ######################Becuase of synchronization of options#######################
271 |     ##################################################################################
272 |     ##option setting
273 |     pretrain_opts['set_type'] = args.set_type
274 |     pretrain_opts['padding_ratio']=args.padding_ratio
275 |     pretrain_opts['padded_img_size']=pretrain_opts['img_size']*int(pretrain_opts['padding_ratio'])
276 |     pretrain_opts['model_path']=args.model_path
277 |     pretrain_opts['frame_interval'] = args.frame_interval
278 |     pretrain_opts['init_model_path'] = args.init_model_path
279 |     pretrain_opts['batch_frames'] = args.batch_frames
280 |     pretrain_opts['lr'] = args.lr
281 |     pretrain_opts['batch_pos'] = args.batch_pos  # original = 64
282 |     pretrain_opts['batch_neg'] = args.batch_neg  # original = 192
283 |     pretrain_opts['n_cycles'] = args.n_cycles
284 |     pretrain_opts['adaptive_align']=False
285 |     pretrain_opts['seqbatch_size'] = args.seqbatch_size
286 |     pretrain_opts['use_gpu'] = True
287 |     pretrain_opts['use_cpu'] = False
288 |     ##################################################################################
289 |     ############################Do not modify opts anymore.###########################
290 |     ######################Becuase of synchronization of options#######################
291 |     ##################################################################################
292 | 
293 |     print pretrain_opts
294 |     train_mdnet()
295 | 
296 | 


--------------------------------------------------------------------------------
/vot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | \file vot.py
  3 | 
  4 | @brief Python utility functions for VOT integration
  5 | 
  6 | @author Luka Cehovin, Alessio Dore
  7 | 
  8 | @date 2016
  9 | 
 10 | """
 11 | 
 12 | import sys
 13 | import copy
 14 | import collections
 15 | 
 16 | try:
 17 |     import trax
 18 | except ImportError:
 19 |     raise Exception('TraX support not found. Please add trax module to Python path.')
 20 | 
 21 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height'])
 22 | Point = collections.namedtuple('Point', ['x', 'y'])
 23 | Polygon = collections.namedtuple('Polygon', ['points'])
 24 | 
 25 | class VOT(object):
 26 |     """ Base class for Python VOT integration """
 27 |     def __init__(self, region_format, channels=None):
 28 |         """ Constructor
 29 | 
 30 |         Args:
 31 |             region_format: Region format options
 32 |         """
 33 |         assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON])
 34 | 
 35 |         if channels is None:
 36 |             channels = ['color']
 37 |         elif channels == 'rgbd':
 38 |             channels = ['color', 'depth']
 39 |         elif channels == 'rgbt':
 40 |             channels = ['color', 'ir']
 41 |         elif channels == 'ir':
 42 |             channels = ['ir']
 43 |         else:
 44 |             raise Exception('Illegal configuration {}.'.format(channels))
 45 | 
 46 |         self._trax = trax.Server([region_format], [trax.Image.PATH], channels)
 47 | 
 48 |         request = self._trax.wait()
 49 |         assert(request.type == 'initialize')
 50 |         if isinstance(request.region, trax.Polygon):
 51 |             self._region = Polygon([Point(x[0], x[1]) for x in request.region])
 52 |         else:
 53 |             self._region = Rectangle(*request.region.bounds())
 54 |         self._image = [x.path() for k, x in request.image.items()]
 55 |         if len(self._image) == 1:
 56 |             self._image = self._image[0]
 57 |         
 58 |         self._trax.status(request.region)
 59 | 
 60 |     def region(self):
 61 |         """
 62 |         Send configuration message to the client and receive the initialization
 63 |         region and the path of the first image
 64 | 
 65 |         Returns:
 66 |             initialization region
 67 |         """
 68 | 
 69 |         return self._region
 70 | 
 71 |     def report(self, region, confidence = None):
 72 |         """
 73 |         Report the tracking results to the client
 74 | 
 75 |         Arguments:
 76 |             region: region for the frame
 77 |         """
 78 |         assert(isinstance(region, Rectangle) or isinstance(region, Polygon))
 79 |         if isinstance(region, Polygon):
 80 |             tregion = trax.Polygon.create([(x.x, x.y) for x in region.points])
 81 |         else:
 82 |             tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height)
 83 |         properties = {}
 84 |         if not confidence is None:
 85 |             properties['confidence'] = confidence
 86 |         self._trax.status(tregion, properties)
 87 | 
 88 |     def frame(self):
 89 |         """
 90 |         Get a frame (image path) from client
 91 | 
 92 |         Returns:
 93 |             absolute path of the image
 94 |         """
 95 |         if hasattr(self, "_image"):
 96 |             image = self._image
 97 |             del self._image
 98 |             return image
 99 | 
100 |         request = self._trax.wait()
101 | 
102 |         if request.type == 'frame':
103 |             image = [x.path() for k, x in request.image.items()]
104 |             if len(image) == 1:
105 |                 return image[0]
106 |             return image
107 |         else:
108 |             return None
109 | 
110 | 
111 |     def quit(self):
112 |         if hasattr(self, '_trax'):
113 |             self._trax.quit()
114 | 
115 |     def __del__(self):
116 |         self.quit()
117 | 
118 | 


--------------------------------------------------------------------------------