├── ComponentAnalysis.png
├── MFGNet-rgbt-tracking-master
    ├── 210_test1.py
    ├── 234.pkl
    ├── 234.txt
    ├── 50.pkl
    ├── 50.txt
    ├── __init__.py
    ├── daTANet_module
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── generator.cpython-37.pyc
    │   │   ├── ops.cpython-37.pyc
    │   │   ├── resnet.cpython-37.pyc
    │   │   └── utils.cpython-37.pyc
    │   ├── generator.py
    │   ├── ops.py
    │   ├── resnet.py
    │   ├── testing.py
    │   ├── testing_234.py
    │   ├── train.py
    │   └── utils.py
    ├── models
    │   └── readme.txt
    ├── modules
    │   ├── bbreg.py
    │   ├── bbreg.pyc
    │   ├── data_prov.py
    │   ├── data_prov.pyc
    │   ├── img_cropper.py
    │   ├── img_cropper.pyc
    │   ├── model.py
    │   ├── model.pyc
    │   ├── prepro_data.py
    │   ├── prepro_data_imagenet.py
    │   ├── pretrain_options.py
    │   ├── pretrain_options.pyc
    │   └── roi_align
    │   │   ├── build
    │   │       ├── lib.linux-x86_64-3.7
    │   │       │   └── roi_align_cuda.cpython-37m-x86_64-linux-gnu.so
    │   │       └── temp.linux-x86_64-3.7
    │   │       │   └── src
    │   │       │       ├── roi_align_cuda.o
    │   │       │       └── roi_align_kernel_c.o
    │   │   └── functions
    │   │       ├── roi_align.py
    │   │       └── roi_align.pyc
    ├── test_234_dataset.py
    ├── tracker.py
    ├── tracker_backup.py
    └── train.py
├── README.md
├── environments.txt
├── pipelinev5.png
├── results_on_rgbt210_234.png
├── rgbt_balancebike.gif
├── rgbt_car10.gif
├── rgbt_flower1.gif
└── rgbt_kite4.gif


/ComponentAnalysis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/ComponentAnalysis.png


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/210_test1.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 
  3 | from os.path import join, isdir
  4 | from tracker import *
  5 | import numpy as np
  6 | import argparse
  7 | import pickle
  8 | import math
  9 | import pdb 
 10 | import torchvision.transforms as transforms
 11 | import random
 12 | import warnings
 13 | warnings.filterwarnings("ignore") 
 14 | 
 15 | 
 16 | 
 17 | def genConfig(seq_path, set_type):
 18 | 
 19 |     path, seqname = os.path.split(seq_path)
 20 | 
 21 |     if set_type == 'OTB100':
 22 |         img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.png'])
 23 |         gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
 24 | 
 25 |     #####################################################################
 26 |     #####               For the RGBT dataset 
 27 |     ##################################################################### 
 28 |     elif set_type == 'dataset234': 
 29 |         img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg'])
 30 |         img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg'])
 31 |         gt = np.loadtxt(seq_path + '/visible.txt', delimiter=',')
 32 | 
 33 |     elif set_type == 'dataset210': 
 34 |         img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg'])
 35 |         img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg'])
 36 |         gt = np.loadtxt(seq_path + '/init.txt', delimiter=',')
 37 | 
 38 | 
 39 |     return img_list_v, img_list_i, gt 
 40 | 
 41 | 
 42 | 
 43 | 
 44 | if __name__ == "__main__":
 45 | 
 46 |     parser = argparse.ArgumentParser()
 47 |     parser.add_argument("-set_type", default = 'dataset210')
 48 |     parser.add_argument("-model_path", default = './models/test_CBAM_dfg_rtmdnet_trained_on_50.pth')
 49 |     parser.add_argument("-result_path", default = './result.npy')
 50 |     parser.add_argument("-visual_log",default=False, action= 'store_true')
 51 |     parser.add_argument("-visualize",default=False, action='store_true')
 52 |     parser.add_argument("-adaptive_align",default=True, action='store_false')
 53 |     parser.add_argument("-padding",default=1.2, type = float)
 54 |     parser.add_argument("-jitter",default=True, action='store_false')
 55 |     
 56 |     args = parser.parse_args()
 57 | 
 58 |     ##################################################################################
 59 |     #########################Just modify opts in this script.#########################
 60 |     ######################Becuase of synchronization of options#######################
 61 |     ##################################################################################
 62 |     ## option setting
 63 |     opts['model_path']=args.model_path
 64 |     opts['result_path']=args.result_path
 65 |     opts['visual_log']=args.visual_log
 66 |     opts['set_type']=args.set_type
 67 |     opts['visualize'] = args.visualize
 68 |     opts['adaptive_align'] = args.adaptive_align
 69 |     opts['padding'] = args.padding
 70 |     opts['jitter'] = args.jitter
 71 |     ##################################################################################
 72 |     ############################Do not modify opts anymore.###########################
 73 |     ######################Becuase of synchronization of options#######################
 74 |     ##################################################################################
 75 |     print(opts)
 76 | 
 77 | 
 78 |     ## path initialization
 79 |     dataset_path = '/disc2/naipeng.ye/wangxiao/acm_mm2020_experiments/'
 80 |     result_home = '/disc2/naipeng.ye/wangxiao/acm_mm2020_experiments/daTANet-cbam-dfg-rgbt-RTMDNet-master-train-on-50/trackingResults_rgbt210/'
 81 | 
 82 |     seq_home = dataset_path + opts['set_type']
 83 |     seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))]
 84 |     seq_list = np.sort(seq_list) 
 85 | 
 86 |     iou_list=[]
 87 |     fps_list=dict()
 88 |     bb_result = dict()
 89 |     result = dict()
 90 | 
 91 |     iou_list_nobb=[]
 92 |     bb_result_nobb = dict()
 93 |     for num, seq in enumerate(seq_list):
 94 | 
 95 |         if num<-1:
 96 |             continue
 97 | 
 98 |         already_done = os.listdir(result_home) 
 99 |         
100 |         if seq+"_rgbt210-daTANet-cbam-dfg-v1.txt" in already_done: 
101 |             print("==>> Skip this video: ", seq) 
102 |         else: 
103 |             txtName = seq + '_rgbt210-daTANet-cbam-dfg-v1.txt'
104 |             fid = open(result_home + txtName, 'w')
105 |             
106 |             seq_path = seq_home + '/' + seq
107 |             img_list_v, img_list_i, gt = genConfig(seq_path, opts['set_type'])
108 | 
109 |             iou_result, result_bb, fps, result_nobb = run_mdnet(img_list_v, img_list_i, gt[0], gt, seq = seq, display=opts['visualize'])
110 | 
111 |             enable_frameNum = 0.
112 |             for iidx in range(len(iou_result)):
113 |                 if (math.isnan(iou_result[iidx])==False): 
114 |                     enable_frameNum += 1.
115 |                 else:
116 |                     ## gt is not alowed
117 |                     iou_result[iidx] = 0.
118 | 
119 |             iou_list.append(iou_result.sum()/enable_frameNum)
120 |             bb_result[seq] = result_bb
121 |             fps_list[seq]=fps
122 | 
123 |             bb_result_nobb[seq] = result_nobb
124 |             print('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list)))
125 | 
126 | 
127 |             for iidex in range(len(result_bb)):
128 |                line = result_bb[iidex]
129 | 
130 |                # pdb.set_trace() 
131 |                fid.write(str(line[0]))
132 |                fid.write(',')
133 |                fid.write(str(line[1]))
134 |                fid.write(',')
135 |                fid.write(str(line[2]))
136 |                fid.write(',')
137 |                fid.write(str(line[3]))
138 |                fid.write('\n')
139 |             fid.close()
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/234.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/234.pkl


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/234.txt:
--------------------------------------------------------------------------------
  1 | afterrain
  2 | aftertree
  3 | baby
  4 | baginhand
  5 | baketballwaliking
  6 | balancebike
  7 | basketball2
  8 | bicyclecity
  9 | bike
 10 | bikeman
 11 | bikemove1
 12 | biketwo
 13 | blackwoman
 14 | bluebike
 15 | blueCar
 16 | boundaryandfast
 17 | bus6
 18 | call
 19 | car
 20 | car10
 21 | car3
 22 | car37
 23 | car4
 24 | car41
 25 | car66
 26 | caraftertree
 27 | carLight
 28 | carnotfar
 29 | carnotmove
 30 | carred
 31 | child
 32 | child1
 33 | child3
 34 | child4
 35 | children2
 36 | children3
 37 | children4
 38 | crossroad
 39 | crouch
 40 | cycle2
 41 | cycle3
 42 | cycle4
 43 | cycle5
 44 | diamond
 45 | dog
 46 | dog1
 47 | dog10
 48 | dog11
 49 | elecbike
 50 | elecbike10
 51 | elecbike2
 52 | elecbike3
 53 | elecbikechange2
 54 | elecbikeinfrontcar
 55 | elecbikewithhat
 56 | elecbikewithlight
 57 | elecbikewithlight1
 58 | face1
 59 | flower1
 60 | flower2
 61 | fog
 62 | fog6
 63 | glass
 64 | glass2
 65 | graycar2
 66 | green
 67 | greentruck
 68 | greyman
 69 | greywoman
 70 | guidepost
 71 | hotglass
 72 | hotkettle
 73 | inglassandmobile
 74 | jump
 75 | kettle
 76 | kite2
 77 | kite4
 78 | luggage
 79 | man22
 80 | man23
 81 | man24
 82 | man26
 83 | man28
 84 | man29
 85 | man3
 86 | man4
 87 | man45
 88 | man5
 89 | man55
 90 | man68
 91 | man69
 92 | man7
 93 | man8
 94 | man88
 95 | man9
 96 | manafterrain
 97 | mancross
 98 | car20
 99 | cycle1
100 | floor-1
101 | man2
102 | mancross1
103 | manypeople
104 | redcar
105 | threeman
106 | twoelecbike1
107 | walkingwithbag2
108 | mancrossandup
109 | mandrivecar
110 | manfaraway
111 | maninblack
112 | maninglass
113 | maningreen2
114 | maninred
115 | manlight
116 | manoccpart
117 | manonboundary
118 | manonelecbike
119 | manontricycle
120 | manout2
121 | manup
122 | manwithbag
123 | manwithbag4
124 | manwithbasketball
125 | manwithluggage
126 | manwithumbrella
127 | manypeople1
128 | manypeople2
129 | mobile
130 | night2
131 | nightcar
132 | nightrun
133 | nightthreepeople
134 | notmove
135 | oldman
136 | oldman2
137 | oldwoman
138 | orangeman1
139 | people
140 | people1
141 | people3
142 | playsoccer
143 | push
144 | rainingwaliking
145 | raningcar
146 | redbag
147 | redcar2
148 | redmanchange
149 | rmo
150 | run
151 | run1
152 | run2
153 | scooter
154 | shake
155 | shoeslight
156 | single1
157 | single3
158 | soccer
159 | soccer2
160 | soccerinhand
161 | straw
162 | stroller
163 | supbus
164 | supbus2
165 | takeout
166 | tallman
167 | threeman2
168 | threepeople
169 | threewoman2
170 | together
171 | toy1
172 | toy3
173 | toy4
174 | tree2
175 | tree3
176 | tree5
177 | trees
178 | tricycle
179 | tricycle1
180 | tricycle2
181 | tricycle6
182 | tricycle9
183 | tricyclefaraway
184 | tricycletwo
185 | twoelecbike
186 | twoman
187 | twoman1
188 | twoman2
189 | twoperson
190 | twowoman
191 | twowoman1
192 | walking40
193 | walking41
194 | walkingman
195 | walkingman1
196 | walkingman12
197 | walkingman20
198 | walkingman41
199 | walkingmantiny
200 | walkingnight
201 | walkingtogether
202 | walkingtogether1
203 | walkingtogetherright
204 | walkingwithbag1
205 | walkingwoman
206 | whitebag
207 | whitecar
208 | whitecar3
209 | whitecar4
210 | whitecarafterrain
211 | whiteman1
212 | whitesuv
213 | woamn46
214 | woamnwithbike
215 | woman
216 | woman1
217 | woman100
218 | woman2
219 | woman3
220 | woman4
221 | woman48
222 | woman6
223 | woman89
224 | woman96
225 | woman99
226 | womancross
227 | womanfaraway
228 | womaninblackwithbike
229 | womanleft
230 | womanpink
231 | womanred
232 | womanrun
233 | womanwithbag6
234 | yellowcar
235 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/50.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/50.pkl


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/50.txt:
--------------------------------------------------------------------------------
 1 | Minibus1
 2 | BlackCar
 3 | BlackSwan1
 4 | BlueCar
 5 | BusScale
 6 | BusScale1
 7 | carNig
 8 | Crossing
 9 | crowdNig
10 | Cycling
11 | DarkNig
12 | Exposure2
13 | Exposure4
14 | fastCar2
15 | FastCarNig
16 | FastMotor
17 | FastMotorNig
18 | Football
19 | GarageHover
20 | Gathering
21 | GoTogether
22 | Jogging
23 | LightOcc
24 | Minibus
25 | MinibusNig
26 | MinibusNigOcc
27 | Motorbike
28 | Motorbike1
29 | MotorNig
30 | occBike
31 | OccCar-1
32 | OccCar-2
33 | Otcbvs
34 | Otcbvs1
35 | Pool
36 | Quarreling
37 | RainyCar1
38 | RainyCar2
39 | RainyMotor1
40 | RainyMotor2
41 | RainyPeople
42 | Running
43 | Torabi
44 | Torabi1
45 | Tricycle
46 | tunnel
47 | Walking
48 | WalkingNig
49 | WalkingNig1
50 | WalkingOcc
51 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/__init__.py:
--------------------------------------------------------------------------------
1 | #### 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | from .resnet18_vggm import *
3 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/generator.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/generator.cpython-37.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/ops.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/ops.cpython-37.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/resnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/resnet.cpython-37.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/generator.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.model_zoo as model_zoo
  4 | import torchvision.models as models
  5 | # import torchvision.ops as torchops
  6 | 
  7 | import math
  8 | from torch.autograd import Variable
  9 | from ops import * 
 10 | import pdb 
 11 | 
 12 | from torch.nn.parameter import Parameter
 13 | import torch.nn.functional as F
 14 | from torch.nn.modules.utils import _single, _pair, _triple
 15 | 
 16 | from resnet import resnet18 
 17 | import numpy as np
 18 | import cv2 
 19 | import pdb 
 20 | 
 21 | 
 22 | def make_conv_layers(cfg):
 23 |     layers = []
 24 |     in_channels = 3
 25 |     for v in cfg:
 26 |         if v == 'M':
 27 |             layers += [maxpool2d()]
 28 |         else:
 29 |             conv = conv2d(in_channels, v)
 30 |             layers += [conv, relu(inplace=True)]
 31 |             in_channels = v
 32 |     return nn.Sequential(*layers)
 33 | 
 34 | 
 35 | def make_deconv_layers(cfg):
 36 |     layers = []
 37 |     in_channels = 4115   
 38 |     for v in cfg:
 39 |         if v == 'U':
 40 |             layers += [nn.Upsample(scale_factor=2)]
 41 |         else:
 42 |             deconv = deconv2d(in_channels, v)
 43 |             layers += [deconv]
 44 |             in_channels = v
 45 |     return nn.Sequential(*layers)
 46 | 
 47 | 
 48 | cfg = {
 49 |     'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512],
 50 |     'D': [512, 512, 512, 'U', 512, 512, 512, 'U', 256, 256, 256, 'U', 128, 128, 'U', 64, 64]
 51 | }
 52 | 
 53 | class _ConvNd(nn.Module):
 54 | 
 55 |     def __init__(self, in_channels, out_channels, kernel_size, stride,
 56 |                  padding, dilation, transposed, output_padding, groups, bias):
 57 |         super(_ConvNd, self).__init__()
 58 |         if in_channels % groups != 0:
 59 |             raise ValueError('in_channels must be divisible by groups')
 60 |         if out_channels % groups != 0:
 61 |             raise ValueError('out_channels must be divisible by groups')
 62 |         self.in_channels = in_channels
 63 |         self.out_channels = out_channels
 64 |         self.kernel_size = kernel_size
 65 |         self.stride = stride
 66 |         self.padding = padding
 67 |         self.dilation = dilation
 68 |         self.transposed = transposed
 69 |         self.output_padding = output_padding
 70 |         self.groups = groups
 71 |         
 72 |         if bias:
 73 |             self.bias = Parameter(torch.Tensor(out_channels))
 74 |         else:
 75 |             self.register_parameter('bias', None)
 76 |         self.reset_parameters()
 77 |         
 78 |     
 79 |     def reset_parameters(self):
 80 |         n = self.in_channels
 81 |         for k in self.kernel_size:
 82 |             n *= k
 83 |         stdv = 1. / math.sqrt(n)
 84 |         if self.bias is not None:
 85 |             self.bias.data.uniform_(-stdv, stdv)
 86 | 
 87 |     def __repr__(self):
 88 |         s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}'
 89 |              ', stride={stride}')
 90 |         if self.padding != (0,) * len(self.padding):
 91 |             s += ', padding={padding}'
 92 |         if self.dilation != (1,) * len(self.dilation):
 93 |             s += ', dilation={dilation}'
 94 |         if self.output_padding != (0,) * len(self.output_padding):
 95 |             s += ', output_padding={output_padding}'
 96 |         if self.groups != 1:
 97 |             s += ', groups={groups}'
 98 |         if self.bias is None:
 99 |             s += ', bias=False'
100 |         s += ')'
101 |         return s.format(name=self.__class__.__name__, **self.__dict__)
102 | 
103 | 
104 | class AdaptiveConv2d(_ConvNd):
105 | 
106 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
107 |                  padding=0, dilation=1, groups=1, bias=True):
108 |         kernel_size = _pair(kernel_size)
109 |         stride = _pair(stride)
110 |         padding = _pair(padding)
111 |         dilation = _pair(dilation)
112 |         super(AdaptiveConv2d, self).__init__(
113 |                 in_channels, out_channels, kernel_size, stride, padding, dilation,
114 |                 False, _pair(0), groups, bias)
115 | 
116 |     def forward(self, input, dynamic_weight):
117 |         # Get batch num
118 |         batch_num = input.size(0)
119 | 
120 |         # Reshape input tensor from size (N, C, H, W) to (1, N*C, H, W)
121 |         input = input.view(1, -1, input.size(2), input.size(3))
122 | 
123 |         # Reshape dynamic_weight tensor from size (N, C, H, W) to (1, N*C, H, W)
124 |         dynamic_weight = dynamic_weight.view(-1, 1, dynamic_weight.size(2), dynamic_weight.size(3))
125 | 
126 |         # Do convolution
127 |         conv_rlt = F.conv2d(input, dynamic_weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
128 | 
129 |         # Reshape conv_rlt tensor from (1, N*C, H, W) to (N, C, H, W)
130 |         conv_rlt = conv_rlt.view(batch_num, -1, conv_rlt.size(2), conv_rlt.size(3))
131 | 
132 |         return conv_rlt
133 | 
134 | 
135 | def encoder():
136 |     return make_conv_layers(cfg['E'])
137 | 
138 | def decoder():
139 |     return make_deconv_layers(cfg['D'])
140 | 
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | #############################################################################
148 | ####                        naive RGBT generator  
149 | #############################################################################
150 | 
151 | class naive_Generator(nn.Module):
152 |     def __init__(self):
153 |         super(naive_Generator, self).__init__()
154 |         self.encoder = resnet18()
155 |         self.decoder = decoder()
156 |         self.mymodules = nn.ModuleList([deconv2d(64, 1, kernel_size=1, padding = 0), nn.Sigmoid()]) 
157 | 
158 | 
159 |     def forward(self, tarObject, gray_tarObject, batch_imgClip, batch_grayClip): 
160 | 
161 |         _, x_2, x_3 = self.encoder(tarObject)  
162 |         _, gray_x_2, gray_x_3 = self.encoder(gray_tarObject)  
163 | 
164 |         _, frame1_feat2_v, frame1_feat3_v = self.encoder(batch_imgClip[0])  
165 |         _, frame2_feat2_v, frame2_feat3_v = self.encoder(batch_imgClip[1])  
166 |         _, frame3_feat2_v, frame3_feat3_v = self.encoder(batch_imgClip[2])  
167 | 
168 |         _, frame1_feat2_i, frame1_feat3_i = self.encoder(batch_grayClip[0])  
169 |         _, frame2_feat2_i, frame2_feat3_i = self.encoder(batch_grayClip[1])  
170 |         _, frame3_feat2_i, frame3_feat3_i = self.encoder(batch_grayClip[2])  
171 | 
172 | 
173 |         x_3 = nn.functional.interpolate(x_3, size=[x_2.shape[2], x_2.shape[3]])
174 |         target_feats_v = torch.cat((x_2, x_3), dim=1)
175 |         gray_x_3 = nn.functional.interpolate(gray_x_3, size=[gray_x_2.shape[2], gray_x_2.shape[3]])
176 |         target_feats_i = torch.cat((gray_x_2, gray_x_3), dim=1)
177 |         target_feats   = target_feats_v + target_feats_i 
178 | 
179 | 
180 |         frame1_feat3_v = nn.functional.interpolate(frame1_feat3_v, size=[frame1_feat2_v.shape[2], frame1_feat2_v.shape[3]])
181 |         frame1_feats_v = torch.cat((frame1_feat2_v, frame1_feat3_v), dim=1)
182 |         frame1_feat3_i = nn.functional.interpolate(frame1_feat3_i, size=[frame1_feat2_i.shape[2], frame1_feat2_i.shape[3]])
183 |         frame1_feats_i = torch.cat((frame1_feat2_i, frame1_feat3_i), dim=1) 
184 |         frame1_feats   = frame1_feats_v + frame1_feats_i 
185 | 
186 |         frame2_feat3_v = nn.functional.interpolate(frame2_feat3_v, size=[frame2_feat2_v.shape[2], frame2_feat2_v.shape[3]])
187 |         frame2_feats_v = torch.cat((frame2_feat2_v, frame2_feat3_v), dim=1)
188 |         frame2_feat3_i = nn.functional.interpolate(frame2_feat3_i, size=[frame2_feat2_i.shape[2], frame2_feat2_i.shape[3]])
189 |         frame2_feats_i = torch.cat((frame2_feat2_i, frame2_feat3_i), dim=1)
190 |         frame2_feats   = frame2_feats_v + frame2_feats_i 
191 | 
192 | 
193 |         frame3_feat3_v = nn.functional.interpolate(frame3_feat3_v, size=[frame3_feat2_v.shape[2], frame3_feat2_v.shape[3]])
194 |         frame3_feats_v = torch.cat((frame3_feat2_v, frame3_feat3_v), dim=1)
195 |         frame3_feat3_i = nn.functional.interpolate(frame3_feat3_i, size=[frame3_feat2_i.shape[2], frame3_feat2_i.shape[3]])
196 |         frame3_feats_i = torch.cat((frame3_feat2_i, frame3_feat3_i), dim=1)
197 |         frame3_feats   = frame3_feats_v + frame3_feats_i 
198 | 
199 |         ##### 
200 |         feat_temp1 = torch.cat((target_feats, frame1_feats), dim=1) 
201 |         feat_temp2 = torch.cat((frame2_feats, frame3_feats), dim=1) 
202 |         feat_final = torch.cat((feat_temp1, feat_temp2), dim=1) 
203 |         #### feat_final: torch.Size([3, 3072, 19, 19]) 
204 | 
205 |         # pdb.set_trace() 
206 |         output = self.decoder(feat_final)
207 |         output = self.mymodules[0](output)
208 |         output = self.mymodules[1](output)
209 |         
210 |         return output 
211 | 
212 | 
213 | class Recurrent_net(nn.Module):
214 |     def __init__(self, size, in_channel, out_channel):
215 |         super(Recurrent_net, self).__init__()
216 |         self.size = size
217 |         self.in_channel = in_channel
218 |         self.out_channel = out_channel
219 |         self.vertical = nn.LSTM(input_size=in_channel, hidden_size=256, batch_first=True, bidirectional=True)  # each row
220 |         self.horizontal = nn.LSTM(input_size=512, hidden_size=256, batch_first=True, bidirectional=True)  # each column
221 |         self.conv = nn.Conv2d(512, out_channel, 1)
222 | 
223 |     def forward(self, *input):
224 |         x = input[0]
225 |         temp = []
226 |         x = torch.transpose(x, 1, 3)  # batch, width, height, in_channel
227 |         for i in range(self.size):
228 |             h, _ = self.vertical(x[:, :, i, :])
229 |             temp.append(h)  # batch, width, 512
230 |         x = torch.stack(temp, dim=2)  # batch, width, height, 512
231 |         temp = []
232 |         for i in range(self.size):
233 |             h, _ = self.horizontal(x[:, i, :, :])
234 |             temp.append(h)  # batch, width, 512
235 |         x = torch.stack(temp, dim=3)  # batch, height, 512, width
236 |         x = torch.transpose(x, 1, 2)  # batch, 512, height, width
237 |         x = self.conv(x)
238 |         return x
239 | 
240 | 
241 | #############################################################################
242 | ####              Direction-aware RGBT Target-aware Attention Module 
243 | #############################################################################
244 | 
245 | class daGenerator(nn.Module):
246 |     def __init__(self):
247 |         super(daGenerator, self).__init__()
248 |         self.encoder = resnet18()
249 |         self.decoder = decoder()
250 |         self.mymodules = nn.ModuleList([deconv2d(64, 1, kernel_size=1, padding = 0), nn.Sigmoid()]) 
251 | 
252 |         self.conv1x1_1 = nn.Conv2d(3072, 1024, kernel_size = 1, stride =1, padding=0, bias=False)
253 |         self.conv1x1_2 = nn.Conv2d(3072, 19,   kernel_size = 1, stride =1, padding=0, bias=False)
254 | 
255 |         self.spatial_renet  = Recurrent_net(19, 1024, 1024)
256 |         self.temporal_renet = Recurrent_net(19, 19,   19)
257 | 
258 |     def forward(self, tarObject, gray_tarObject, batch_imgClip, batch_grayClip): 
259 | 
260 |         _, x_2, x_3 = self.encoder(tarObject)  
261 |         _, gray_x_2, gray_x_3 = self.encoder(gray_tarObject)  
262 | 
263 |         ## batch_imgClip: torch.Size([10, 3, 3, 300, 300]) 
264 |         _, frame1_feat2_v, frame1_feat3_v = self.encoder(batch_imgClip[:, 0])  ## torch.Size([10, 256, 19, 19]) 
265 |         _, frame2_feat2_v, frame2_feat3_v = self.encoder(batch_imgClip[:, 1])  
266 |         _, frame3_feat2_v, frame3_feat3_v = self.encoder(batch_imgClip[:, 2])  
267 | 
268 |         _, frame1_feat2_i, frame1_feat3_i = self.encoder(batch_grayClip[:, 0])  
269 |         _, frame2_feat2_i, frame2_feat3_i = self.encoder(batch_grayClip[:, 1])  
270 |         _, frame3_feat2_i, frame3_feat3_i = self.encoder(batch_grayClip[:, 2])  
271 | 
272 |         x_3 = nn.functional.interpolate(x_3, size=[x_2.shape[2], x_2.shape[3]])
273 |         target_feats_v = torch.cat((x_2, x_3), dim=1)
274 |         gray_x_3 = nn.functional.interpolate(gray_x_3, size=[gray_x_2.shape[2], gray_x_2.shape[3]])
275 |         target_feats_i = torch.cat((gray_x_2, gray_x_3), dim=1)
276 |         target_feats   = target_feats_v + target_feats_i 
277 | 
278 | 
279 |         frame1_feat3_v = nn.functional.interpolate(frame1_feat3_v, size=[frame1_feat2_v.shape[2], frame1_feat2_v.shape[3]])
280 |         frame1_feats_v = torch.cat((frame1_feat2_v, frame1_feat3_v), dim=1)
281 |         frame1_feat3_i = nn.functional.interpolate(frame1_feat3_i, size=[frame1_feat2_i.shape[2], frame1_feat2_i.shape[3]])
282 |         frame1_feats_i = torch.cat((frame1_feat2_i, frame1_feat3_i), dim=1) 
283 |         frame1_feats   = frame1_feats_v + frame1_feats_i 
284 | 
285 |         frame2_feat3_v = nn.functional.interpolate(frame2_feat3_v, size=[frame2_feat2_v.shape[2], frame2_feat2_v.shape[3]])
286 |         frame2_feats_v = torch.cat((frame2_feat2_v, frame2_feat3_v), dim=1)
287 |         frame2_feat3_i = nn.functional.interpolate(frame2_feat3_i, size=[frame2_feat2_i.shape[2], frame2_feat2_i.shape[3]])
288 |         frame2_feats_i = torch.cat((frame2_feat2_i, frame2_feat3_i), dim=1)
289 |         frame2_feats   = frame2_feats_v + frame2_feats_i 
290 | 
291 | 
292 |         frame3_feat3_v = nn.functional.interpolate(frame3_feat3_v, size=[frame3_feat2_v.shape[2], frame3_feat2_v.shape[3]])
293 |         frame3_feats_v = torch.cat((frame3_feat2_v, frame3_feat3_v), dim=1)
294 |         frame3_feat3_i = nn.functional.interpolate(frame3_feat3_i, size=[frame3_feat2_i.shape[2], frame3_feat2_i.shape[3]])
295 |         frame3_feats_i = torch.cat((frame3_feat2_i, frame3_feat3_i), dim=1)
296 |         frame3_feats   = frame3_feats_v + frame3_feats_i 
297 | 
298 |         ##### 
299 |         feat_temp1 = torch.cat((target_feats, frame1_feats), dim=1) 
300 |         feat_temp2 = torch.cat((frame2_feats, frame3_feats), dim=1) 
301 |         feat_final = torch.cat((feat_temp1, feat_temp2), dim=1) 
302 |         #### feat_final: torch.Size([3, 3072, 19, 19]) 
303 | 
304 |         feat_temp = self.conv1x1_1(feat_final) ## torch.Size([3, 1024, 19, 19]) 
305 |         feat_encoded1 = self.spatial_renet(feat_temp)
306 | 
307 | 
308 |         feat_encoded2 = self.conv1x1_2(feat_final) ## torch.Size([3, 19, 19, 19]) 
309 |         feat_encoded2 = torch.transpose(feat_encoded2, 1, 2) 
310 |         feat_encoded2 = self.temporal_renet(feat_encoded2) 
311 |         feat_encoded2 = torch.transpose(feat_encoded2, 1, 2) 
312 | 
313 |         feat_final1 = torch.cat((feat_encoded1, feat_encoded2), dim=1) 
314 |         feat_final1 = torch.cat((feat_final1, feat_final), dim=1)  
315 | 
316 |         # pdb.set_trace() 
317 |         output = self.decoder(feat_final1)
318 |         output = self.mymodules[0](output)
319 |         output = self.mymodules[1](output)
320 |         
321 |         return output 
322 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/ops.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | def conv2d(in_channels, out_channels, kernel_size = 3, padding = 1):
 4 |     return nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding)
 5 | 
 6 | def deconv2d(in_channels, out_channels, kernel_size = 3, padding = 1):
 7 |     return nn.ConvTranspose2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding)
 8 | 
 9 | def relu(inplace = True): # Change to True?
10 |     return nn.ReLU(inplace)
11 | 
12 | def maxpool2d():
13 |     return nn.MaxPool2d(2)
14 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/resnet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch.nn as nn
  3 | from collections import OrderedDict
  4 | import torch.utils.model_zoo as model_zoo
  5 | from torchvision.models.resnet import BasicBlock, Bottleneck, model_urls
  6 | 
  7 | 
  8 | class ResNet(nn.Module):
  9 |     """ ResNet network module. Allows extracting specific feature blocks."""
 10 |     def __init__(self, block, layers, output_layers, num_classes=1000, inplanes=64):
 11 |         self.inplanes = inplanes
 12 |         super(ResNet, self).__init__()
 13 |         self.output_layers = output_layers
 14 |         self.conv1 = nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)
 15 |         self.bn1 = nn.BatchNorm2d(inplanes)
 16 |         self.relu = nn.ReLU(inplace=True)
 17 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 18 |         self.layer1 = self._make_layer(block, inplanes,   layers[0])
 19 |         self.layer2 = self._make_layer(block, inplanes*2, layers[1], stride=2)
 20 |         self.layer3 = self._make_layer(block, inplanes*4, layers[2], stride=2)
 21 |         self.layer4 = self._make_layer(block, inplanes*8, layers[3], stride=2)
 22 |         # self.avgpool = nn.AvgPool2d(7, stride=1)
 23 |         self.avgpool = nn.AdaptiveAvgPool2d((1,1))
 24 |         self.fc = nn.Linear(inplanes*8 * block.expansion, num_classes)
 25 | 
 26 |         for m in self.modules():
 27 |             if isinstance(m, nn.Conv2d):
 28 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 29 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 30 |             elif isinstance(m, nn.BatchNorm2d):
 31 |                 m.weight.data.fill_(1)
 32 |                 m.bias.data.zero_()
 33 | 
 34 |     def _make_layer(self, block, planes, blocks, stride=1):
 35 |         downsample = None
 36 |         if stride != 1 or self.inplanes != planes * block.expansion:
 37 |             downsample = nn.Sequential(
 38 |                 nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
 39 |                 nn.BatchNorm2d(planes * block.expansion),
 40 |             )
 41 | 
 42 |         layers = []
 43 |         layers.append(block(self.inplanes, planes, stride, downsample))
 44 |         self.inplanes = planes * block.expansion
 45 |         for i in range(1, blocks):
 46 |             layers.append(block(self.inplanes, planes))
 47 | 
 48 |         return nn.Sequential(*layers)
 49 | 
 50 |     def _add_output_and_check(self, name, x, outputs, output_layers):
 51 |         if name in output_layers:
 52 |             outputs[name] = x
 53 |         return len(output_layers) == len(outputs)
 54 | 
 55 |     def forward(self, x, output_layers=None):
 56 |         """ Forward pass with input x. The output_layers specify the feature blocks which must be returned """
 57 |         # outputs = OrderedDict()
 58 | 
 59 |         # if output_layers is None:
 60 |         #     output_layers = self.output_layers
 61 | 
 62 |         x = self.conv1(x)
 63 |         x = self.bn1(x)
 64 |         x = self.relu(x)
 65 | 
 66 |         # if self._add_output_and_check('conv1', x, outputs, output_layers):
 67 |         #     return outputs
 68 | 
 69 |         x = self.maxpool(x)
 70 | 
 71 |         x = self.layer1(x)
 72 | 
 73 |         # if self._add_output_and_check('layer1', x, outputs, output_layers):
 74 |         #     return outputs
 75 | 
 76 |         x2_feat = self.layer2(x)
 77 | 
 78 |         # if self._add_output_and_check('layer2', x, outputs, output_layers):
 79 |         #     return outputs
 80 | 
 81 |         x3_feat = self.layer3(x2_feat)
 82 | 
 83 |         # if self._add_output_and_check('layer3', x, outputs, output_layers):
 84 |         #     return outputs
 85 | 
 86 |         x4_feat = self.layer4(x3_feat)
 87 | 
 88 |         # if self._add_output_and_check('layer4', x, outputs, output_layers):
 89 |         #     return outputs
 90 | 
 91 |         # x = self.avgpool(x)
 92 |         # x = x.view(x.size(0), -1)
 93 |         # x = self.fc(x)
 94 | 
 95 |         # if self._add_output_and_check('fc', x, outputs, output_layers):
 96 |         #     return outputs
 97 | 
 98 |         # if len(output_layers) == 1 and output_layers[0] == 'default':
 99 |         #     return x
100 | 
101 |         # raise ValueError('output_layer is wrong.')
102 | 
103 |         return x2_feat, x3_feat, x4_feat 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | def resnet18(output_layers=None, pretrained=True):
111 |     """Constructs a ResNet-18 model.
112 |     """
113 | 
114 |     if output_layers is None:
115 |         output_layers = ['default']
116 |     else:
117 |         for l in output_layers:
118 |             if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']:
119 |                 raise ValueError('Unknown layer: {}'.format(l))
120 | 
121 |     model = ResNet(BasicBlock, [2, 2, 2, 2], output_layers)
122 | 
123 |     if pretrained:
124 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
125 |     return model
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | def resnet50(output_layers=None, pretrained=False):
134 |     """Constructs a ResNet-50 model.
135 |     """
136 | 
137 |     if output_layers is None:
138 |         output_layers = ['default']
139 |     else:
140 |         for l in output_layers:
141 |             if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']:
142 |                 raise ValueError('Unknown layer: {}'.format(l))
143 | 
144 |     model = ResNet(Bottleneck, [3, 4, 6, 3], output_layers)
145 |     if pretrained:
146 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
147 |     return model


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/testing.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | import time
  6 | import os
  7 | os.environ["CUDA_VISIBLE_DEVICES"]="2"
  8 | from tqdm import tqdm
  9 | from torch.autograd import Variable
 10 | import torchvision.transforms as transforms
 11 | import random
 12 | from generator import daGenerator
 13 | from utils import *
 14 | import pdb 
 15 | import os.path
 16 | 
 17 | generator_path = './20200525_directionAware_TANet_rgbt_model.pkl'
 18 | Generator = daGenerator()
 19 | Generator.load_state_dict(torch.load(generator_path))
 20 | Generator.cuda()
 21 | 
 22 | def to_variable(x, requires_grad=True):
 23 |     if torch.cuda.is_available():
 24 |         x = x.cuda()
 25 |     return Variable(x,requires_grad)
 26 | 
 27 | counter = 0
 28 | start_time = time.time()
 29 | 
 30 | attentionSave_path = "/home/wangxiao/experiments/directionAware_rgbt_TANet_module/rgbt_210_Attention/"
 31 | 					   
 32 | dataset_path = "/wangxiao/dataset/RGB-T210/"
 33 | 
 34 | 
 35 | video_files = os.listdir(dataset_path) 
 36 | video_files.sort()
 37 | count = 0 
 38 | 
 39 | for videoidx in range(len(video_files)): 
 40 |     videoName = video_files[videoidx]     
 41 |     already_Done = os.listdir(attentionSave_path)
 42 |     
 43 |     if videoName in already_Done: 
 44 |         print("==>> Skip this video .... ")
 45 |     else:
 46 | 	    dataset_img_path_v = dataset_path + videoName + "/visible/" 
 47 | 	    dataset_img_files_v = os.listdir(dataset_img_path_v)
 48 | 	    dataset_img_path_i = dataset_path + videoName + "/infrared/" 
 49 | 	    dataset_img_files_i = os.listdir(dataset_img_path_i) 
 50 | 
 51 | 	    dataset_img_files_v.sort()
 52 | 	    dataset_img_files_i.sort()
 53 | 
 54 | 	    cursor = 0
 55 | 	    batch_size = 1 
 56 | 	    clip_len = 3 
 57 | 	    size = len(dataset_img_files_v) 
 58 | 	    to_tensor = transforms.ToTensor()  
 59 | 	    targetObject_v = torch.zeros(batch_size, 3, 300, 300)
 60 | 	    targetObject_i = torch.zeros(batch_size, 3, 300, 300) 
 61 | 
 62 | 	    gt_path = dataset_path + videoName + "/init.txt"
 63 | 	    gt_files = np.loadtxt(gt_path, delimiter=',')
 64 | 	    initBBox = gt_files[0] 
 65 | 
 66 | 	    initImg_path_v = dataset_img_files_v[0] 
 67 | 	    initImg_path_i = dataset_img_files_i[0] 
 68 | 
 69 | 	    initImage_v = cv2.imread(dataset_img_path_v + initImg_path_v) 
 70 | 	    initImage_i = cv2.imread(dataset_img_path_i + initImg_path_i) 
 71 | 
 72 | 	    tarObject_v = initImage_v[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 
 73 | 	    tarObject_i = initImage_i[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 
 74 | 
 75 | 	    tarObject_v = cv2.resize(tarObject_v, (300, 300), interpolation=cv2.INTER_LINEAR)
 76 | 	    tarObject_i = cv2.resize(tarObject_i, (300, 300), interpolation=cv2.INTER_LINEAR)
 77 | 
 78 | 	    targetObject_v[0] = to_tensor(tarObject_v) 
 79 | 	    targetObject_i[0] = to_tensor(tarObject_i) 
 80 | 	    # cv2.imwrite('./tarObject_v.png', tarObject_v)
 81 | 
 82 | 	    # pdb.set_trace() 
 83 | 	    for idx in range(1, len(dataset_img_files_v)):
 84 | 
 85 | 	        batch_imgClip_v = torch.zeros(batch_size, clip_len, 3, 300, 300) 
 86 | 	        batch_imgClip_i = torch.zeros(batch_size, clip_len, 3, 300, 300) 
 87 | 	        
 88 | 	        #### initialize continuous 3 images 
 89 | 	        if cursor < 1: 
 90 | 	            v_prev_file = dataset_img_files_v[cursor]
 91 | 	            i_prev_file = dataset_img_files_i[cursor]
 92 | 	        else: 
 93 | 	            v_prev_file = dataset_img_files_v[cursor-1]
 94 | 	            i_prev_file = dataset_img_files_i[cursor-1]
 95 | 
 96 | 	        v_curr_file = dataset_img_files_v[cursor]
 97 | 	        i_curr_file = dataset_img_files_i[cursor]
 98 | 
 99 | 	        if cursor == size: 
100 | 	            v_late_file = dataset_img_files_v[size-1]
101 | 	            i_late_file = dataset_img_files_i[size-1]
102 | 	        else: 
103 | 	            v_late_file = dataset_img_files_v[cursor]
104 | 	            i_late_file = dataset_img_files_i[cursor]
105 | 	        	
106 | 
107 | 	        v_prev_img_path 	= os.path.join(dataset_img_path_v, v_prev_file)
108 | 	        i_prev_img_path 	= os.path.join(dataset_img_path_i, i_prev_file)
109 | 	        v_current_img_path  = os.path.join(dataset_img_path_v, v_curr_file)
110 | 	        i_current_img_path  = os.path.join(dataset_img_path_i, i_curr_file)  
111 | 	        v_late_img_path 	= os.path.join(dataset_img_path_v, v_late_file)
112 | 	        i_late_img_path 	= os.path.join(dataset_img_path_i, i_late_file)   
113 | 
114 | 	        v_inputimage_prev 	 = cv2.imread(v_prev_img_path) 
115 | 	        i_inputimage_prev 	 = cv2.imread(i_prev_img_path) 
116 | 	        v_inputimage_current = cv2.imread(v_current_img_path) 
117 | 	        i_inputimage_current = cv2.imread(i_current_img_path) 
118 | 	        v_inputimage_late 	 = cv2.imread(v_late_img_path) 
119 | 	        i_inputimage_late 	 = cv2.imread(i_late_img_path) 
120 | 
121 | 	        v_inputimage_prev = cv2.resize(v_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR)
122 | 	        i_inputimage_prev = cv2.resize(i_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR)
123 | 	        v_inputimage_current = cv2.resize(v_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR)
124 | 	        i_inputimage_current = cv2.resize(i_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR)
125 | 	        v_inputimage_late = cv2.resize(v_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR)
126 | 	        i_inputimage_late = cv2.resize(i_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR)
127 | 
128 | 
129 | 	        batch_imgClip_v[0, 0] = to_tensor(v_inputimage_prev) 
130 | 	        batch_imgClip_v[0, 1] = to_tensor(v_inputimage_current) 
131 | 	        batch_imgClip_v[0, 2] = to_tensor(v_inputimage_late) 
132 | 
133 | 	        batch_imgClip_i[0, 0] = to_tensor(i_inputimage_prev) 
134 | 	        batch_imgClip_i[0, 1] = to_tensor(i_inputimage_current) 
135 | 	        batch_imgClip_i[0, 2] = to_tensor(i_inputimage_late) 
136 | 
137 | 	        # pdb.set_trace() 
138 | 
139 | 	        cursor += 1
140 | 	        attention_map = Generator(targetObject_v.cuda(), targetObject_i.cuda(), batch_imgClip_v.cuda(), batch_imgClip_i.cuda()) 
141 | 	        attention_map = nn.functional.interpolate(attention_map, size=[v_inputimage_prev.shape[0], v_inputimage_prev.shape[1]]) 
142 | 
143 | 	        # pdb.set_trace() 
144 | 	        new_Savepath = attentionSave_path + videoName 
145 | 
146 | 	        if os.path.exists(new_Savepath):  
147 | 	            print(" ")
148 | 	        else: 
149 | 	            os.mkdir(new_Savepath) 
150 | 
151 | 	        pilTrans = transforms.ToPILImage()
152 | 	        pilImg = pilTrans(attention_map[0].detach().cpu()) 
153 | 
154 | 	        new_path = new_Savepath + "/" + str(cursor+1) + "_attentionMap.jpg"
155 | 	        print('==>> Image saved to ', new_path)
156 | 	        pilImg.save(new_path)
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/testing_234.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | import time
  6 | import os
  7 | os.environ["CUDA_VISIBLE_DEVICES"]="2"
  8 | from tqdm import tqdm
  9 | from torch.autograd import Variable
 10 | import torchvision.transforms as transforms
 11 | import random
 12 | from generator import daGenerator
 13 | from utils import *
 14 | import pdb 
 15 | import os.path
 16 | 
 17 | generator_path = './20200525_directionAware_TANet_rgbt_model.pkl'
 18 | Generator = daGenerator()
 19 | Generator.load_state_dict(torch.load(generator_path))
 20 | Generator.cuda()
 21 | 
 22 | def to_variable(x, requires_grad=True):
 23 |     if torch.cuda.is_available():
 24 |         x = x.cuda()
 25 |     return Variable(x,requires_grad)
 26 | 
 27 | counter = 0
 28 | start_time = time.time()
 29 | 
 30 | attentionSave_path = "/home/wangxiao/experiments/directionAware_rgbt_TANet_module/rgbt_234_Attention/"
 31 | dataset_path = "/DATA/wangxiao/dataset/RGB-T234/"
 32 | 
 33 | 
 34 | video_files = os.listdir(dataset_path) 
 35 | video_files.sort()
 36 | count = 0 
 37 | 
 38 | for videoidx in range(len(video_files)): 
 39 |     videoName = video_files[videoidx]     
 40 |     already_Done = os.listdir(attentionSave_path)
 41 |     
 42 |     if videoName in already_Done: 
 43 |         print("==>> Skip this video .... ")
 44 |     else:
 45 | 	    dataset_img_path_v = dataset_path + videoName + "/visible/" 
 46 | 	    dataset_img_files_v = os.listdir(dataset_img_path_v)
 47 | 	    dataset_img_path_i = dataset_path + videoName + "/infrared/" 
 48 | 	    dataset_img_files_i = os.listdir(dataset_img_path_i) 
 49 | 
 50 | 	    dataset_img_files_v.sort()
 51 | 	    dataset_img_files_i.sort()
 52 | 
 53 | 	    cursor = 0
 54 | 	    batch_size = 1 
 55 | 	    clip_len = 3 
 56 | 	    size = len(dataset_img_files_v) 
 57 | 	    to_tensor = transforms.ToTensor()  
 58 | 	    targetObject_v = torch.zeros(batch_size, 3, 300, 300)
 59 | 	    targetObject_i = torch.zeros(batch_size, 3, 300, 300) 
 60 | 
 61 | 	    gt_path = dataset_path + videoName + "/init.txt"
 62 | 	    gt_files = np.loadtxt(gt_path, delimiter=',')
 63 | 	    initBBox = gt_files[0] 
 64 | 
 65 | 	    initImg_path_v = dataset_img_files_v[0] 
 66 | 	    initImg_path_i = dataset_img_files_i[0] 
 67 | 
 68 | 	    initImage_v = cv2.imread(dataset_img_path_v + initImg_path_v) 
 69 | 	    initImage_i = cv2.imread(dataset_img_path_i + initImg_path_i) 
 70 | 
 71 | 	    tarObject_v = initImage_v[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 
 72 | 	    tarObject_i = initImage_i[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 
 73 | 
 74 | 	    tarObject_v = cv2.resize(tarObject_v, (300, 300), interpolation=cv2.INTER_LINEAR)
 75 | 	    tarObject_i = cv2.resize(tarObject_i, (300, 300), interpolation=cv2.INTER_LINEAR)
 76 | 
 77 | 	    targetObject_v[0] = to_tensor(tarObject_v) 
 78 | 	    targetObject_i[0] = to_tensor(tarObject_i) 
 79 | 	    # cv2.imwrite('./tarObject_v.png', tarObject_v)
 80 | 
 81 | 	    # pdb.set_trace() 
 82 | 	    for idx in range(1, len(dataset_img_files_v)):
 83 | 
 84 | 	        batch_imgClip_v = torch.zeros(batch_size, clip_len, 3, 300, 300) 
 85 | 	        batch_imgClip_i = torch.zeros(batch_size, clip_len, 3, 300, 300) 
 86 | 	        
 87 | 	        #### initialize continuous 3 images 
 88 | 	        if cursor < 1: 
 89 | 	            v_prev_file = dataset_img_files_v[cursor]
 90 | 	            i_prev_file = dataset_img_files_i[cursor]
 91 | 	        else: 
 92 | 	            v_prev_file = dataset_img_files_v[cursor-1]
 93 | 	            i_prev_file = dataset_img_files_i[cursor-1]
 94 | 
 95 | 	        v_curr_file = dataset_img_files_v[cursor]
 96 | 	        i_curr_file = dataset_img_files_i[cursor]
 97 | 
 98 | 	        if cursor == size: 
 99 | 	            v_late_file = dataset_img_files_v[size-1]
100 | 	            i_late_file = dataset_img_files_i[size-1]
101 | 	        else: 
102 | 	            v_late_file = dataset_img_files_v[cursor]
103 | 	            i_late_file = dataset_img_files_i[cursor]
104 | 	        	
105 | 
106 | 	        v_prev_img_path 	= os.path.join(dataset_img_path_v, v_prev_file)
107 | 	        i_prev_img_path 	= os.path.join(dataset_img_path_i, i_prev_file)
108 | 	        v_current_img_path  = os.path.join(dataset_img_path_v, v_curr_file)
109 | 	        i_current_img_path  = os.path.join(dataset_img_path_i, i_curr_file)  
110 | 	        v_late_img_path 	= os.path.join(dataset_img_path_v, v_late_file)
111 | 	        i_late_img_path 	= os.path.join(dataset_img_path_i, i_late_file)   
112 | 
113 | 	        v_inputimage_prev 	 = cv2.imread(v_prev_img_path) 
114 | 	        i_inputimage_prev 	 = cv2.imread(i_prev_img_path) 
115 | 	        v_inputimage_current = cv2.imread(v_current_img_path) 
116 | 	        i_inputimage_current = cv2.imread(i_current_img_path) 
117 | 	        v_inputimage_late 	 = cv2.imread(v_late_img_path) 
118 | 	        i_inputimage_late 	 = cv2.imread(i_late_img_path) 
119 | 
120 | 	        v_inputimage_prev = cv2.resize(v_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR)
121 | 	        i_inputimage_prev = cv2.resize(i_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR)
122 | 	        v_inputimage_current = cv2.resize(v_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR)
123 | 	        i_inputimage_current = cv2.resize(i_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR)
124 | 	        v_inputimage_late = cv2.resize(v_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR)
125 | 	        i_inputimage_late = cv2.resize(i_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR)
126 | 
127 | 
128 | 	        batch_imgClip_v[0, 0] = to_tensor(v_inputimage_prev) 
129 | 	        batch_imgClip_v[0, 1] = to_tensor(v_inputimage_current) 
130 | 	        batch_imgClip_v[0, 2] = to_tensor(v_inputimage_late) 
131 | 
132 | 	        batch_imgClip_i[0, 0] = to_tensor(i_inputimage_prev) 
133 | 	        batch_imgClip_i[0, 1] = to_tensor(i_inputimage_current) 
134 | 	        batch_imgClip_i[0, 2] = to_tensor(i_inputimage_late) 
135 | 
136 | 	        # pdb.set_trace() 
137 | 
138 | 	        cursor += 1
139 | 	        attention_map = Generator(targetObject_v.cuda(), targetObject_i.cuda(), batch_imgClip_v.cuda(), batch_imgClip_i.cuda()) 
140 | 	        attention_map = nn.functional.interpolate(attention_map, size=[v_inputimage_prev.shape[0], v_inputimage_prev.shape[1]]) 
141 | 
142 | 	        # pdb.set_trace() 
143 | 	        new_Savepath = attentionSave_path + videoName 
144 | 
145 | 	        if os.path.exists(new_Savepath):  
146 | 	            print(" ")
147 | 	        else: 
148 | 	            os.mkdir(new_Savepath) 
149 | 
150 | 	        pilTrans = transforms.ToPILImage()
151 | 	        pilImg = pilTrans(attention_map[0].detach().cpu()) 
152 | 
153 | 	        new_path = new_Savepath + "/" + str(cursor+1) + "_attentionMap.jpg"
154 | 	        print('==>> Image saved to ', new_path)
155 | 	        pilImg.save(new_path)
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | import time
  6 | import os
  7 | os.environ["CUDA_VISIBLE_DEVICES"]="2"
  8 | 
  9 | from torch.autograd import Variable
 10 | from generator import daGenerator
 11 | from utils import *
 12 | import pdb
 13 | import warnings
 14 | warnings.filterwarnings("ignore")
 15 | import torchvision.transforms as transforms
 16 | import random
 17 | 
 18 | batch_size = 5   
 19 | lr = 1e-5 
 20 | 
 21 | generator = daGenerator()
 22 | 
 23 | # generatorPath = "./directionAware_TANet_rgbt_model.pkl"
 24 | # generator_weights = torch.load(generatorPath)
 25 | # generator.load_state_dict(generator_weights)
 26 | 
 27 | if torch.cuda.is_available():
 28 |     generator.cuda()
 29 | 
 30 | criterion = nn.BCELoss()
 31 | g_optim = torch.optim.Adagrad(generator.parameters(), lr=lr)
 32 | num_epoch = 30   
 33 | 
 34 | def to_variable(x, requires_grad=True):
 35 |     if torch.cuda.is_available():
 36 |         x = x.cuda()
 37 |     return Variable(x,requires_grad)
 38 | 
 39 | start_time = time.time()
 40 | DIR_TO_SAVE = "./generator_output/"
 41 | if not os.path.exists(DIR_TO_SAVE):
 42 |     os.makedirs(DIR_TO_SAVE)
 43 | 
 44 | generator.train()
 45 | 
 46 | 
 47 | attention_path = "/home/wangxiao/targetAttention_train_dataset/"
 48 | 
 49 | video_files = os.listdir(attention_path) 
 50 | random.shuffle(video_files)
 51 | video_files = video_files[:300]
 52 | 
 53 | 
 54 | 
 55 | count = 0 
 56 | 
 57 | 
 58 | 
 59 | for current_epoch in range(num_epoch):
 60 |     g_cost_avg = 0
 61 |     
 62 |     for videoidx in range(len(video_files)): 
 63 |         videoName = video_files[videoidx] 
 64 | 
 65 |         dataset_img_path = attention_path + videoName + "/image/" 
 66 |         dataset_img_files = os.listdir(dataset_img_path)
 67 | 
 68 |         dataset_mask_path = attention_path + videoName + "/mask/" 
 69 |         dataset_tarObject_path = attention_path + videoName + "/tarObject/" 
 70 | 
 71 |         numBatches = len(dataset_img_files) / batch_size 
 72 |         cursor = 0
 73 | 
 74 |         # pdb.set_trace() 
 75 |         for idx in range(int(numBatches)):
 76 | 
 77 |             size = len(dataset_img_files) 
 78 | 
 79 |             if cursor + batch_size > size:
 80 |                 cursor = 0
 81 |                 # np.random.shuffle(dataset_img_files)
 82 |                 np.sort(dataset_img_files)
 83 |             
 84 |             batch_img = torch.zeros(batch_size, 3, 300, 300)
 85 |             batch_map = torch.zeros(batch_size, 1, 300, 300)
 86 |             targetObject_img = torch.zeros(batch_size, 3, 300, 300)
 87 |             targetObject_gray = torch.zeros(batch_size, 3, 300, 300)
 88 | 
 89 |             clip_len = 3 
 90 |             batch_imgClip = torch.zeros(batch_size, clip_len, 3, 300, 300) 
 91 |             batch_grayClip = torch.zeros(batch_size, clip_len, 3, 300, 300) 
 92 |                      
 93 |             to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 
 94 | 
 95 |             for batchidx in range(batch_size):
 96 | 
 97 |                 #### initialize continuous 3 images 
 98 |                 if cursor < 1: 
 99 |                     prev_file = dataset_img_files[cursor]
100 |                 else: 
101 |                     prev_file = dataset_img_files[cursor-1]
102 | 
103 |                 curr_file = dataset_img_files[cursor]
104 | 
105 |                 if cursor == size: 
106 |                     late_file = dataset_img_files[size-1]
107 |                 else: 
108 |                     late_file = dataset_img_files[cursor]
109 |                 
110 |                 imgIndex = curr_file[-12:]
111 | 
112 |                 prev_imgIndex = prev_file[-12:]
113 |                 late_imgIndex = late_file[-12:]
114 |                 # print(videoName, " ", imgIndex) 
115 | 
116 |                 targetObject_img_path = os.path.join(dataset_tarObject_path, videoName + '_target-00000001.jpg')
117 |                 full_img_path = os.path.join(dataset_img_path, videoName + "_image-" + imgIndex)
118 | 
119 |                 prev_full_img_path = os.path.join(dataset_img_path, videoName + "_image-" + prev_imgIndex) 
120 |                 late_full_img_path = os.path.join(dataset_img_path, videoName + "_image-" + late_imgIndex) 
121 | 
122 |                 full_map_path = os.path.join(dataset_mask_path, videoName + "_mask-" + imgIndex)
123 |                 cursor += 1
124 | 
125 |                 inputimage = cv2.imread(full_img_path) 
126 |                 prev_inputimage = cv2.imread(prev_full_img_path) 
127 |                 late_inputimage = cv2.imread(late_full_img_path) 
128 | 
129 | 
130 |                 #### for the gray image: 
131 |                 gray_prev_inputimage = cv2.cvtColor(prev_inputimage, cv2.COLOR_BGR2GRAY) 
132 |                 gray_prev_inputimage = to_tensor(gray_prev_inputimage)
133 |                 gray_prev_inputimage = torch.stack([gray_prev_inputimage, gray_prev_inputimage, gray_prev_inputimage], 1)[0] 
134 |                 batch_grayClip[batchidx, 0] = gray_prev_inputimage 
135 | 
136 |                 gray_inputimage = cv2.cvtColor(inputimage, cv2.COLOR_BGR2GRAY) 
137 |                 gray_inputimage = to_tensor(gray_inputimage)
138 |                 gray_inputimage = torch.stack([gray_inputimage, gray_inputimage, gray_inputimage], 1)[0] 
139 |                 batch_grayClip[batchidx, 1] = gray_inputimage 
140 | 
141 |                 gray_late_inputimage = cv2.cvtColor(prev_inputimage, cv2.COLOR_BGR2GRAY) 
142 |                 gray_late_inputimage = to_tensor(gray_late_inputimage)
143 |                 gray_late_inputimage = torch.stack([gray_late_inputimage, gray_late_inputimage, gray_late_inputimage], 1)[0] 
144 |                 batch_grayClip[batchidx, 2] = gray_late_inputimage 
145 | 
146 | 
147 |                 # pdb.set_trace() 
148 |                 batch_img[batchidx] = to_tensor(inputimage)
149 |                 batch_imgClip[batchidx, 0] = to_tensor(prev_inputimage) 
150 |                 batch_imgClip[batchidx, 1] = to_tensor(inputimage) 
151 |                 batch_imgClip[batchidx, 2] = to_tensor(late_inputimage) 
152 |                 
153 |                 targetObjectimage = cv2.imread(targetObject_img_path)
154 |                 targetObject_img[batchidx] = to_tensor(targetObjectimage)
155 |                 
156 |                 gray_targetObjectimage = cv2.cvtColor(targetObjectimage, cv2.COLOR_BGR2GRAY) 
157 |                 gray_targetObjectimage = to_tensor(gray_targetObjectimage)
158 |                 gray_targetObjectimage = torch.stack([gray_targetObjectimage, gray_targetObjectimage, gray_targetObjectimage], 1)[0] 
159 |                 targetObject_gray[batchidx] = gray_targetObjectimage 
160 | 
161 | 
162 |                 saliencyimage = cv2.imread(full_map_path, 0)
163 |                 saliencyimage = np.expand_dims(saliencyimage, axis=2)
164 |                 batch_map[batchidx] = to_tensor(saliencyimage)
165 | 
166 | 
167 | 
168 |             batch_img = to_variable(batch_img, requires_grad=True)
169 |             batch_map = to_variable(batch_map, requires_grad=False)
170 |             targetObject_img = to_variable(targetObject_img, requires_grad=True)
171 |             targetObject_gray = to_variable(targetObject_gray, requires_grad=True) 
172 |             batch_imgClip = to_variable(batch_imgClip, requires_grad=True) 
173 |             batch_grayClip = to_variable(batch_grayClip, requires_grad=True) 
174 | 
175 |             val_batchImg = batch_img
176 |             val_targetObjectImg = targetObject_img 
177 |             val_gray_targetObjectimage = targetObject_gray 
178 |             val_imgClip = batch_imgClip  
179 |             val_batch_grayClip = batch_grayClip 
180 | 
181 |             count = count + 1
182 | 
183 |             g_optim.zero_grad()
184 |             attention_map = generator(targetObject_img, targetObject_gray, batch_imgClip, batch_grayClip)
185 | 
186 |             batch_map = nn.functional.interpolate(batch_map, size=[attention_map.shape[2], attention_map.shape[3]]) 
187 | 
188 | 
189 |             # pdb.set_trace()
190 |             g_gen_loss = criterion(attention_map, batch_map)
191 |             g_loss = torch.sum(g_gen_loss)
192 |             g_cost_avg += g_loss.item()
193 |             g_loss.backward()
194 |             g_optim.step()
195 | 
196 | 
197 |             print("==>> Epoch [%d/%d], g_gen_loss: %.4f, vidIndex [%d/%d], LR: %.6f, time: %4.4f" % \
198 |                 (current_epoch, num_epoch, g_loss.item(), videoidx, len(video_files), lr, time.time()-start_time))
199 | 
200 | 
201 |         # validation 
202 |         out = generator(val_targetObjectImg, val_gray_targetObjectimage, val_imgClip, val_batch_grayClip)
203 |         map_out = out.cpu().data.squeeze(0)
204 |         for iiidex in range(batch_size): 
205 |            new_path = DIR_TO_SAVE + str(current_epoch) + str(iiidex) + ".jpg"
206 |            pilTrans = transforms.ToPILImage()
207 |            pilImg = pilTrans(map_out[iiidex]) 
208 |            # print('==>> Image saved to ', new_path)
209 |            pilImg.save(new_path)
210 | 
211 | 
212 |         g_cost_avg /= numBatches
213 | 
214 |     # pdb.set_trace()
215 |     # Save weights 
216 |     if current_epoch % 1 == 0:
217 |         print("==>> save checkpoints ... ", ' ==>> Train_loss->', (g_cost_avg))
218 |         torch.save(generator.state_dict(), '20200525_directionAware_TANet_rgbt_model.pkl')
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/daTANet_module/utils.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import torchvision.transforms as transforms
 3 | import numpy as np
 4 | import matplotlib.pyplot as plt
 5 | import torch
 6 | from torch.autograd import Variable
 7 | import torch.nn as nn
 8 | import pdb 
 9 | 
10 | from PIL import Image
11 | 
12 | def to_variable(x,requires_grad=True):
13 |     if torch.cuda.is_available():
14 |         x = x.cuda()
15 |     return Variable(x,requires_grad)
16 | 
17 | def show(img):
18 |     #print(img.shape)
19 |     pilTrans = transforms.ToPILImage()
20 |     pilImg = pilTrans(img)
21 |     s = np.array(pilImg)
22 |     plt.figure()
23 |     plt.imshow(s)
24 |     
25 | def show_gray(img):
26 |     print(img.shape)
27 |     pilTrans = transforms.ToPILImage()
28 |     pilImg = pilTrans(img)
29 |     s = np.array(pilImg)
30 |     plt.figure()
31 |     plt.imshow(s)
32 |     
33 | def save_gray(img, path):
34 |     pilTrans = transforms.ToPILImage()
35 |     pilImg = pilTrans(img)
36 |     print('Image saved to ', path)
37 |     pilImg.save(path)
38 | 
39 | 
40 | 
41 | 
42 | def predict(model, img, validation_targetObject):
43 |     to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0.
44 |     im = to_tensor(img)
45 |     val_targetObject = to_tensor(validation_targetObject)
46 |     #show(im)
47 |     inp = to_variable(im.unsqueeze(0), False)
48 |     inp = nn.functional.interpolate(inp, size=[300, 300])
49 | 
50 |     val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False) 
51 |     val_targetObject_ = nn.functional.interpolate(val_targetObject_, size=[100, 100]) 
52 | 
53 |     #print(inp.size())
54 | 
55 |     out = model(inp, val_targetObject_)
56 |     out = nn.functional.interpolate(out, size=[im.shape[1], im.shape[2]]) 
57 | 
58 |     map_out = out.cpu().data.squeeze(0)
59 |     pilTrans = transforms.ToPILImage()
60 |     pilImg = pilTrans(map_out)
61 |     dynamic_atttentonMAP = np.asarray(pilImg)
62 | 
63 |     return dynamic_atttentonMAP 
64 | 
65 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/models/readme.txt:
--------------------------------------------------------------------------------
1 | Download our pre-trained model from Google drive, or train this network yourself. 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/bbreg.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from sklearn.linear_model import Ridge
 3 | import numpy as np
 4 | 
 5 | from utils import *
 6 | 
 7 | class BBRegressor():
 8 |     def __init__(self, img_size, alpha=1000, overlap=[0.6, 1], scale=[1, 2]):
 9 |         self.img_size = img_size
10 |         self.alpha = alpha
11 |         self.overlap_range = overlap
12 |         self.scale_range = scale
13 |         self.model = Ridge(alpha=self.alpha)
14 | 
15 |     def train(self, X, bbox, gt):
16 |         X = X.cpu().numpy()
17 |         bbox = np.copy(bbox)
18 |         gt = np.copy(gt)
19 |         
20 |         if gt.ndim==1:
21 |             gt = gt[None,:]
22 | 
23 |         r = overlap_ratio(bbox, gt)
24 |         s = np.prod(bbox[:,2:], axis=1) / np.prod(gt[0,2:])
25 |         idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \
26 |               (s >= self.scale_range[0]) * (s <= self.scale_range[1])
27 | 
28 |         X = X[idx]
29 |         bbox = bbox[idx]
30 | 
31 |         Y = self.get_examples(bbox, gt)
32 |         
33 |         self.model.fit(X, Y)
34 | 
35 |     def predict(self, X, bbox):
36 |         X = X.cpu().numpy()
37 |         bbox_ = np.copy(bbox)
38 | 
39 |         Y = self.model.predict(X)
40 |     
41 |         bbox_[:,:2] = bbox_[:,:2] + bbox_[:,2:]/2
42 |         bbox_[:,:2] = Y[:,:2] * bbox_[:,2:] + bbox_[:,:2]
43 |         bbox_[:,2:] = np.exp(Y[:,2:]) * bbox_[:,2:]
44 |         bbox_[:,:2] = bbox_[:,:2] - bbox_[:,2:]/2
45 |         
46 |         r = overlap_ratio(bbox, bbox_)
47 |         s = np.prod(bbox[:,2:], axis=1) / np.prod(bbox_[:,2:], axis=1)
48 |         idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \
49 |               (s >= self.scale_range[0]) * (s <= self.scale_range[1])
50 |         idx = np.logical_not(idx)
51 |         bbox_[idx] = bbox[idx]
52 |  
53 |         bbox_[:,:2] = np.maximum(bbox_[:,:2], 0)
54 |         bbox_[:,2:] = np.minimum(bbox_[:,2:], self.img_size - bbox[:,:2])
55 | 
56 |         return bbox_
57 |     
58 |     def get_examples(self, bbox, gt):
59 |         bbox[:,:2] = bbox[:,:2] + bbox[:,2:]/2
60 |         gt[:,:2] = gt[:,:2] + gt[:,2:]/2
61 | 
62 |         dst_xy = (gt[:,:2] - bbox[:,:2]) / bbox[:,2:]
63 |         dst_wh = np.log(gt[:,2:] / bbox[:,2:])
64 | 
65 |         Y = np.concatenate((dst_xy, dst_wh), axis=1)
66 |         return Y
67 | 
68 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/bbreg.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/bbreg.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/data_prov.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import numpy as np
  3 | from PIL import Image
  4 | 
  5 | import torch
  6 | import torch.utils.data as data
  7 | import matplotlib.pyplot as plt
  8 | from utils import *
  9 | 
 10 | import matplotlib.patches as patches
 11 | 
 12 | import os
 13 | from sample_generator import *
 14 | 
 15 | import sys
 16 | from pretrain_options import *
 17 | 
 18 | from img_cropper import *
 19 | import pdb 
 20 | 
 21 | 
 22 | 
 23 | class RegionDataset(data.Dataset):
 24 |     def __init__(self, img_dir, img_list_v, img_list_i, videoPath_v, videoPath_i, gt, receptive_field, opts):
 25 |         
 26 |         # self.img_list_v = np.array([os.path.join(img_dir, img) for img in img_list_v])
 27 |         # self.img_list_i = np.array([os.path.join(img_dir, img) for img in img_list_i])
 28 | 
 29 |         self.img_list_v = np.array([img_dir+ '/v/' +img for img in img_list_v])
 30 |         self.img_list_i = np.array([img_dir+ '/i/' +img for img in img_list_i])
 31 | 
 32 |         self.videoPath_v = videoPath_v 
 33 |         self.videoPath_i = videoPath_i 
 34 | 
 35 |         self.gt = gt
 36 | 
 37 |         self.batch_frames = pretrain_opts['batch_frames']
 38 |         self.batch_pos = pretrain_opts['batch_pos']
 39 |         self.batch_neg = pretrain_opts['batch_neg']
 40 | 
 41 |         self.overlap_pos = pretrain_opts['overlap_pos']
 42 |         self.overlap_neg = pretrain_opts['overlap_neg']
 43 | 
 44 | 
 45 |         self.crop_size = pretrain_opts['img_size']
 46 |         self.padding = pretrain_opts['padding']
 47 | 
 48 |         self.index = np.random.permutation(len(self.img_list_v))
 49 |         self.pointer = 0
 50 |         
 51 |         image_v = Image.open(self.img_list_v[0]).convert('RGB')
 52 |         self.scene_generator = SampleGenerator('gaussian', image_v.size,trans_f=1.5, scale_f=1.2,valid=True)
 53 |         self.pos_generator = SampleGenerator('gaussian', image_v.size, 0.1, 1.2, 1.1, True)
 54 |         self.neg_generator = SampleGenerator('uniform', image_v.size, 1, 1.2, 1.1, True)
 55 | 
 56 |         self.receptive_field = receptive_field
 57 | 
 58 |         self.interval = pretrain_opts['frame_interval']
 59 |         self.img_crop_model = imgCropper(pretrain_opts['padded_img_size'])
 60 |         self.img_crop_model.eval()
 61 |         if pretrain_opts['use_gpu']:
 62 |             self.img_crop_model.gpuEnable()
 63 | 
 64 |     def __iter__(self):
 65 |         return self
 66 | 
 67 |     def __next__(self):
 68 | 
 69 |         next_pointer = min(self.pointer + self.batch_frames, len(self.img_list_v))
 70 |         idx = self.index[self.pointer:next_pointer]
 71 |         if len(idx) < self.batch_frames:
 72 |             self.index = np.random.permutation(len(self.img_list_v))
 73 |             next_pointer = self.batch_frames - len(idx)
 74 |             idx = np.concatenate((idx, self.index[:next_pointer]))
 75 |         self.pointer = next_pointer
 76 | 
 77 | 
 78 |         n_pos = self.batch_pos
 79 |         n_neg = self.batch_neg
 80 | 
 81 |         scenes_i = []
 82 |         scenes_v = [] 
 83 |         for i, (img_path_v, img_path_i, bbox) in enumerate(zip(self.img_list_v[idx], self.img_list_i[idx], self.gt[idx])):
 84 |             image_v = Image.open(img_path_v).convert('RGB')
 85 |             image_v = np.asarray(image_v)
 86 |             
 87 |             image_i = Image.open(img_path_i).convert('RGB')
 88 |             image_i = np.asarray(image_i)
 89 | 
 90 |             bbox[2] = bbox[2] - bbox[0]
 91 |             bbox[3] = bbox[3] - bbox[1]                        
 92 | 
 93 |             ishape = image_v.shape
 94 |             pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2, 1.1, False), bbox, n_pos, overlap_range=self.overlap_pos)
 95 |             neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 1.2, 1.1, False), bbox, n_neg, overlap_range=self.overlap_neg)
 96 | 
 97 |             # compute padded sample
 98 |             padded_x1 = (neg_examples[:, 0]-neg_examples[:,2]*(pretrain_opts['padding']-1.)/2.).min()
 99 |             padded_y1 = (neg_examples[:, 1]-neg_examples[:,3]*(pretrain_opts['padding']-1.)/2.).min()
100 |             padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2]*(pretrain_opts['padding']+1.)/2.).max()
101 |             padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3]*(pretrain_opts['padding']+1.)/2.).max()
102 |             padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1))
103 | 
104 |             jitter_scale = 1.1 ** np.clip(3.*np.random.randn(1,1),-2,2)
105 |             crop_img_size = (padded_scene_box[2:4] * ((pretrain_opts['img_size'], pretrain_opts['img_size']) / bbox[2:4])).astype('int64') * jitter_scale[0][0]
106 |             cropped_image_v, cur_image_var_v = self.img_crop_model.crop_image(image_v, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
107 |             cropped_image_v = cropped_image_v - 128.
108 | 
109 |             cropped_image_i, cur_image_var_i = self.img_crop_model.crop_image(image_i, np.reshape(padded_scene_box, (1, 4)), crop_img_size)
110 |             cropped_image_i = cropped_image_i - 128.
111 | 
112 |             if pretrain_opts['use_gpu']:
113 |                 cropped_image_i = cropped_image_i.data.cpu()
114 |                 cur_image_var_i = cur_image_var_i.cpu()
115 | 
116 |                 cropped_image_v = cropped_image_v.data.cpu()
117 |                 cur_image_var_v = cur_image_var_v.cpu()
118 | 
119 |             scenes_v.append(cropped_image_v)
120 |             scenes_i.append(cropped_image_i)
121 | 
122 |             ## get current frame and heatmap
123 |             rel_bbox = np.copy(bbox)
124 |             rel_bbox[0:2] -= padded_scene_box[0:2]
125 | 
126 |             jittered_obj_size = jitter_scale[0][0]*float(pretrain_opts['img_size'])
127 | 
128 |             batch_num = np.zeros((pos_examples.shape[0], 1))
129 |             pos_rois = np.copy(pos_examples)
130 |             pos_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), pos_rois.shape[0], axis=0)
131 |             pos_rois = samples2maskroi(pos_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding'])
132 |             pos_rois = np.concatenate((batch_num, pos_rois), axis=1)
133 | 
134 |             batch_num = np.zeros((neg_examples.shape[0], 1))
135 |             neg_rois = np.copy(neg_examples)
136 |             neg_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), neg_rois.shape[0], axis=0)
137 |             neg_rois = samples2maskroi(neg_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding'])
138 |             neg_rois = np.concatenate((batch_num, neg_rois), axis=1)
139 | 
140 |             if i==0:
141 |                 total_pos_rois = [torch.from_numpy(np.copy(pos_rois).astype('float32'))]
142 |                 total_neg_rois = [torch.from_numpy(np.copy(neg_rois).astype('float32'))]
143 |             else:
144 |                 total_pos_rois.append(torch.from_numpy(np.copy(pos_rois).astype('float32')))
145 |                 total_neg_rois.append(torch.from_numpy(np.copy(neg_rois).astype('float32')))
146 | 
147 |         return scenes_v, scenes_i, total_pos_rois, total_neg_rois
148 | 
149 | 
150 | 
151 |     next = __next__
152 | 
153 |     def extract_regions(self, image, samples):
154 |         regions = np.zeros((len(samples), self.crop_size, self.crop_size, 3), dtype='uint8')
155 |         for i, sample in enumerate(samples):
156 |             regions[i] = crop_image(image, sample, self.crop_size, self.padding, True)
157 | 
158 |         regions = regions.transpose(0, 3, 1, 2)
159 |         regions = regions.astype('float32') - 128.
160 |         return regions
161 | 
162 | 
163 | 
164 | 
165 | 
166 | class RegionExtractor():
167 |     def __init__(self, image, samples, crop_size, padding, batch_size, shuffle=False):
168 | 
169 |         self.image = np.asarray(image)
170 |         self.samples = samples
171 |         self.crop_size = crop_size
172 |         self.padding = padding
173 |         self.batch_size = batch_size
174 |         self.shuffle = shuffle
175 | 
176 |         self.index = np.arange(len(samples))
177 |         self.pointer = 0
178 | 
179 |         self.mean = self.image.mean(0).mean(0).astype('float32')
180 | 
181 |     def __iter__(self):
182 |         return self
183 | 
184 |     def __next__(self):
185 |         if self.pointer == len(self.samples):
186 |             self.pointer = 0
187 |             raise StopIteration
188 |         else:
189 |             next_pointer = min(self.pointer + self.batch_size, len(self.samples))
190 |             index = self.index[self.pointer:next_pointer]
191 |             self.pointer = next_pointer
192 | 
193 |             regions = self.extract_regions(index)
194 |             regions = torch.from_numpy(regions)
195 |             return regions
196 |     next = __next__
197 | 
198 |     def extract_regions(self, index):
199 |         regions = np.zeros((len(index),self.crop_size,self.crop_size,3),dtype='uint8')
200 |         for i, sample in enumerate(self.samples[index]):
201 |             regions[i] = crop_image(self.image, sample, self.crop_size, self.padding)
202 | 
203 |         regions = regions.transpose(0,3,1,2).astype('float32')
204 |         regions = regions - 128.
205 |         return regions
206 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/data_prov.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/data_prov.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/img_cropper.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.insert(0,'./modules')
 3 | from roi_align import RoIAlign
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from torch.autograd import Variable
 7 | import torch
 8 | import numpy as np
 9 | 
10 | import time
11 | 
12 | import matplotlib.pyplot as plt
13 | import matplotlib.patches as patches
14 | 
15 | class imgCropper(nn.Module):
16 |     def __init__(self, img_size):
17 |         super(imgCropper, self).__init__()
18 |         self.isCuda = False
19 |         self.img_size = img_size
20 |         self.roi_align_model = RoIAlign(img_size,img_size, 1. )
21 | 
22 |     def gpuEnable(self):
23 |         self.roi_align_model = self.roi_align_model.cuda()
24 |         self.isCuda = True
25 | 
26 |     def forward(self, image, roi):
27 |         aligned_image_var = self.roi_align_model(image, roi)
28 |         return aligned_image_var
29 | 
30 |     def crop_image(self,image, box, result_size):
31 |         ## constraint = several box from common 1 image
32 |         ishape = image.shape
33 |         cur_image_var = np.reshape(image, (1, ishape[0], ishape[1], ishape[2]))
34 |         cur_image_var = cur_image_var.transpose(0, 3, 1, 2)
35 |         cur_image_var = cur_image_var.astype('float32')
36 |         cur_image_var = Variable(torch.from_numpy(cur_image_var).float())
37 | 
38 | 
39 |         roi = np.copy(box)
40 |         roi[:,2:4] += roi[:,0:2]
41 |         roi = np.concatenate((np.zeros((roi.shape[0], 1)), roi), axis=1)
42 |         roi = Variable(torch.from_numpy(roi).float())
43 | 
44 |         if self.isCuda:
45 |             cur_image_var = cur_image_var.cuda()
46 |             roi = roi.cuda()
47 | 
48 |         self.roi_align_model.aligned_width = result_size[0]
49 |         self.roi_align_model.aligned_height = result_size[1]
50 |         cropped_image = self.forward(cur_image_var, roi)
51 | 
52 |         return cropped_image, cur_image_var
53 | 
54 |     def crop_several_image(self,img_list,target_list):
55 |         ## constraint = one to one matching between image and target
56 |         ## exception handling
57 |         assert(len(target_list) == len(img_list))
58 | 
59 |         ## image crop
60 |         torch.cuda.synchronize()
61 |         start_time = time.time()
62 |         cur_images = torch.squeeze(torch.stack(img_list, 0))
63 |         torch.cuda.synchronize()
64 |         print('10 image stacking time:{}'.format(time.time() - start_time))
65 | 
66 |         ishape = cur_images.size()
67 | 
68 |         # Extract sample features and get target location
69 |         sample_rois = np.array(target_list)
70 |         sample_rois[:,2:4] += sample_rois[:,0:2]
71 |         batch_num = np.reshape(np.arange(0,len(sample_rois)),(len(sample_rois),1))
72 |         sample_rois = np.concatenate( (batch_num, sample_rois), axis=1)
73 |         sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32')))
74 |         if self.isCuda:
75 |             sample_rois = sample_rois.cuda()
76 |             cur_images = cur_images.cuda()
77 | 
78 |         cropped_images = self.forward(cur_images, sample_rois)
79 | 
80 | 
81 |         return cropped_images
82 | 
83 | 
84 | 
85 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/img_cropper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/img_cropper.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/model.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import scipy.io
  3 | import numpy as np
  4 | from collections import OrderedDict
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | import torch
  9 | import time
 10 | import sys
 11 | sys.path.insert(0,'./roi_align')
 12 | from roi_align import RoIAlignAvg,RoIAlignMax
 13 | import pdb 
 14 | import math
 15 | import torch
 16 | from torch.nn.parameter import Parameter
 17 | from torch.nn.modules.utils import _single, _pair, _triple
 18 | 
 19 | 
 20 | 
 21 | class _ConvNd(nn.Module):
 22 | 
 23 |     def __init__(self, in_channels, out_channels, kernel_size, stride,
 24 |                  padding, dilation, transposed, output_padding, groups, bias):
 25 |         super(_ConvNd, self).__init__()
 26 |         if in_channels % groups != 0:
 27 |             raise ValueError('in_channels must be divisible by groups')
 28 |         if out_channels % groups != 0:
 29 |             raise ValueError('out_channels must be divisible by groups')
 30 |         self.in_channels = in_channels
 31 |         self.out_channels = out_channels
 32 |         self.kernel_size = kernel_size
 33 |         self.stride = stride
 34 |         self.padding = padding
 35 |         self.dilation = dilation
 36 |         self.transposed = transposed
 37 |         self.output_padding = output_padding
 38 |         self.groups = groups
 39 |         
 40 |         if bias:
 41 |             self.bias = Parameter(torch.Tensor(out_channels))
 42 |         else:
 43 |             self.register_parameter('bias', None)
 44 |         self.reset_parameters()
 45 |         
 46 |     
 47 |     def reset_parameters(self):
 48 |         n = self.in_channels
 49 |         for k in self.kernel_size:
 50 |             n *= k
 51 |         stdv = 1. / math.sqrt(n)
 52 |         if self.bias is not None:
 53 |             self.bias.data.uniform_(-stdv, stdv)
 54 | 
 55 |     def __repr__(self):
 56 |         s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}'
 57 |              ', stride={stride}')
 58 |         if self.padding != (0,) * len(self.padding):
 59 |             s += ', padding={padding}'
 60 |         if self.dilation != (1,) * len(self.dilation):
 61 |             s += ', dilation={dilation}'
 62 |         if self.output_padding != (0,) * len(self.output_padding):
 63 |             s += ', output_padding={output_padding}'
 64 |         if self.groups != 1:
 65 |             s += ', groups={groups}'
 66 |         if self.bias is None:
 67 |             s += ', bias=False'
 68 |         s += ')'
 69 |         return s.format(name=self.__class__.__name__, **self.__dict__)
 70 | 
 71 | 
 72 | 
 73 | 
 74 | 
 75 | class AdaptiveConv2d(_ConvNd):
 76 | 
 77 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
 78 |         kernel_size = _pair(kernel_size)
 79 |         stride = _pair(stride)
 80 |         padding = _pair(padding)
 81 |         dilation = _pair(dilation)
 82 |         super(AdaptiveConv2d, self).__init__(
 83 |                 in_channels, out_channels, kernel_size, stride, padding, dilation,
 84 |                 False, _pair(0), groups, bias)
 85 | 
 86 |     def forward(self, input, dynamic_weight):
 87 |         # Get batch num
 88 |         batch_num = input.size(0)
 89 | 
 90 |         # Reshape input tensor from size (N, C, H, W) to (1, N*C, H, W)
 91 |         input = input.view(1, -1, input.size(2), input.size(3))
 92 | 
 93 |         # Reshape dynamic_weight tensor from size (N, C, H, W) to (1, N*C, H, W)
 94 |         dynamic_weight = dynamic_weight.view(-1, 1, dynamic_weight.size(2), dynamic_weight.size(3))
 95 | 
 96 |         # Do convolution
 97 |         conv_rlt = F.conv2d(input, dynamic_weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
 98 | 
 99 |         # Reshape conv_rlt tensor from (1, N*C, H, W) to (N, C, H, W)
100 |         conv_rlt = conv_rlt.view(batch_num, -1, conv_rlt.size(2), conv_rlt.size(3))
101 | 
102 |         return conv_rlt
103 | 
104 | 
105 | def append_params(params, module, prefix):
106 |     for child in module.children():
107 |         for k,p in child._parameters.items():
108 |             if p is None: continue
109 | 
110 |             if isinstance(child, nn.BatchNorm2d):
111 |                 name = prefix + '_bn_' + k
112 |             else:
113 |                 name = prefix + '_' + k
114 | 
115 |             if name not in params:
116 |                 params[name] = p
117 |             else:
118 |                 raise RuntimeError("Duplicated param name: %s" % (name))
119 | 
120 | class LRN(nn.Module):
121 |     def __init__(self, local_size=1, alpha=0.0001, beta=0.75, ACROSS_CHANNELS=False):
122 |         super(LRN, self).__init__()
123 |         self.ACROSS_CHANNELS = ACROSS_CHANNELS
124 |         if self.ACROSS_CHANNELS:
125 |             self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1),
126 |                                         stride=1,
127 |                                         padding=(int((local_size - 1.0) / 2), 0, 0))
128 |         else:
129 |             self.average = nn.AvgPool2d(kernel_size=local_size,
130 |                                         stride=1,
131 |                                         padding=int((local_size - 1.0) / 2))
132 |         self.alpha = alpha
133 |         self.beta = beta
134 | 
135 |     def forward(self, x):
136 |         if self.ACROSS_CHANNELS:
137 |             div = x.pow(2).unsqueeze(1)
138 |             div = self.average(div).squeeze(1)
139 |             div = div.mul(self.alpha).add(2.0).pow(self.beta)
140 |         else:
141 |             div = x.pow(2)
142 |             div = self.average(div)
143 |             div = div.mul(self.alpha).add(2.0).pow(self.beta)
144 |         x = x.div(div)
145 |         return x
146 | 
147 | 
148 | class ChannelAttention(nn.Module):
149 |     def __init__(self, in_planes, ratio=16):
150 |         super(ChannelAttention, self).__init__()
151 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
152 |         self.max_pool = nn.AdaptiveMaxPool2d(1)
153 | 
154 |         self.fc1   = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False)
155 |         self.relu1 = nn.ReLU()
156 |         self.fc2   = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False)
157 | 
158 |         self.sigmoid = nn.Sigmoid()
159 | 
160 |     def forward(self, x):
161 |         avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x))))
162 |         max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x))))
163 |         out = avg_out + max_out
164 |         return self.sigmoid(out)
165 | 
166 | 
167 | 
168 | 
169 | class SpatialAttention(nn.Module):
170 |     def __init__(self, kernel_size=7):
171 |         super(SpatialAttention, self).__init__()
172 | 
173 |         assert kernel_size in (3, 7), 'kernel size must be 3 or 7'
174 |         padding = 3 if kernel_size == 7 else 1
175 | 
176 |         self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
177 |         self.sigmoid = nn.Sigmoid()
178 | 
179 |     def forward(self, x):
180 |         avg_out = torch.mean(x, dim=1, keepdim=True)
181 |         max_out, _ = torch.max(x, dim=1, keepdim=True)
182 |         x = torch.cat([avg_out, max_out], dim=1)
183 |         x = self.conv1(x)
184 |         return self.sigmoid(x)
185 | 
186 | 
187 | 
188 | 
189 | class MDNet(nn.Module):
190 |     def __init__(self, model_path=None,K=1):
191 |         super(MDNet, self).__init__()
192 |         self.K = K
193 |         self.layers = nn.Sequential(OrderedDict([
194 |                 ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2),
195 |                                         nn.ReLU(),
196 |                                         LRN(),
197 |                                         nn.MaxPool2d(kernel_size=3, stride=2)
198 |                                         )),
199 |                 ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2,dilation=1),
200 |                                         nn.ReLU(),
201 |                                         LRN(),
202 |                                         )),
203 | 
204 |                 ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1,dilation=3),
205 |                                         nn.ReLU(),
206 |                                         )),
207 |                 ('fc4',   nn.Sequential(nn.Linear(512 * 3 * 3 * 2, 512),
208 |                                         nn.ReLU())),
209 |                 ('fc5',   nn.Sequential(nn.Dropout(0.5),
210 |                                         nn.Linear(512, 512),
211 |                                         nn.ReLU()))]))
212 | 
213 |         self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5), nn.Linear(512, 2)) for _ in range(K)]) 
214 | 
215 |         self.sigmoid = nn.Sigmoid()
216 | 
217 |         self.roi_align_model = RoIAlignMax(3, 3, 1. / 8) 
218 | 
219 |         self.conv1x1_Tk = nn.Conv2d(1024, 512, 1, 1) 
220 |         self.conv1x1_Tq = nn.Conv2d(1024, 9,   1, 1)
221 |         self.conv1x1_Vk = nn.Conv2d(1024, 512, 1, 1)
222 |         self.conv1x1_Vq = nn.Conv2d(1024, 9,   1, 1)
223 | 
224 |         self.conv1x1_Tk = self.conv1x1_Tk.cuda() 
225 |         self.conv1x1_Tq = self.conv1x1_Tq.cuda() 
226 |         self.conv1x1_Vk = self.conv1x1_Vk.cuda() 
227 |         self.conv1x1_Vq = self.conv1x1_Vq.cuda() 
228 | 
229 |         self.channel_attention = ChannelAttention(1024)
230 |         self.spatial_attention = SpatialAttention()
231 | 
232 | 
233 |         # self.BatchNorm2D = nn.BatchNorm2d(100)
234 | 
235 |         self.receptive_field = 75.  # it is receptive fieald that a element of feat_map covers. feat_map is bottom layer of ROI_align_layer
236 | 
237 |         if model_path is not None:
238 |             if os.path.splitext(model_path)[1] == '.pth':
239 |                 self.load_model(model_path)
240 |             elif os.path.splitext(model_path)[1] == '.mat':
241 |                 self.load_mat_model(model_path)
242 |             else:
243 |                 raise RuntimeError("Unkown model format: %s" % (model_path))
244 |         self.build_param_dict()
245 | 
246 |     def build_param_dict(self):
247 |         self.params = OrderedDict()
248 |         for name, module in self.layers.named_children():
249 |             append_params(self.params, module, name)
250 |         for k, module in enumerate(self.branches):
251 |             append_params(self.params, module, 'fc6_%d'%(k))
252 |         for name, module in self.conv1x1_Tk.named_children():
253 |             append_params(self.params, module, name)
254 |         for name, module in self.conv1x1_Tq.named_children():
255 |             append_params(self.params, module, name)
256 |         for name, module in self.conv1x1_Vk.named_children():
257 |             append_params(self.params, module, name)
258 |         for name, module in self.conv1x1_Vq.named_children():
259 |             append_params(self.params, module, name)
260 |         for name, module in self.channel_attention.named_children():
261 |             append_params(self.params, module, name)
262 |         for name, module in self.spatial_attention.named_children():
263 |             append_params(self.params, module, name)
264 | 
265 | 
266 | 
267 |     def set_learnable_params(self, layers):
268 |         for k, p in self.params.items():
269 |             if any([k.startswith(l) for l in layers]):
270 |                 p.requires_grad = True
271 |             else:
272 |                 p.requires_grad = False
273 | 
274 | 
275 |     def get_learnable_params(self):
276 |         params = OrderedDict()
277 |         for k, p in self.params.items():
278 |             if p.requires_grad:
279 |                 params[k] = p
280 |         return params
281 | 
282 | 
283 | 
284 | 
285 |     ###########################################################################################
286 |     ####                        the forward function 
287 |     ###########################################################################################
288 | 
289 |     def forward(self, x_v, x_i, k=0, in_layer='conv1', out_layer='fc6'):
290 | 
291 |         run = False
292 |         for name, module in self.layers.named_children():
293 |             if name == in_layer:
294 |                 run = True
295 |             if run:
296 |                 x_v = module(x_v) 
297 |                 x_i = module(x_i) 
298 |                 
299 | 
300 |                 if name == "conv3": 
301 | 
302 |                     rgbt_feats = torch.cat((x_v, x_i), dim=1)   ## torch.Size([1, 192, 62, 91]) 
303 | 
304 |                     # pdb.set_trace() 
305 | 
306 |                     rgbt_feats = self.channel_attention(rgbt_feats) * rgbt_feats
307 |                     rgbt_feats = self.spatial_attention(rgbt_feats) * rgbt_feats
308 |                     
309 |                     Tk_feats = self.conv1x1_Tk(rgbt_feats)  ## torch.Size([1, 96, 117, 71])
310 |                     Tq_feats = self.conv1x1_Tq(rgbt_feats)  ## torch.Size([1, 9, 117, 71]) 
311 |                     Vk_feats = self.conv1x1_Vk(rgbt_feats) 
312 |                     Vq_feats = self.conv1x1_Vq(rgbt_feats) 
313 |                     
314 |                     # pdb.set_trace() 
315 | 
316 |                     Tk_feats = torch.squeeze(Tk_feats, dim=0)
317 |                     Tk_feats = Tk_feats.view(-1, Tk_feats.shape[1]*Tk_feats.shape[2])   ## torch.Size([96, 4150]) 
318 | 
319 |                     Tq_feats = torch.squeeze(Tq_feats, dim=0)
320 |                     Tq_feats = Tq_feats.view(-1, Tq_feats.shape[1]*Tq_feats.shape[2])
321 | 
322 |                     Vk_feats = torch.squeeze(Vk_feats, dim=0)
323 |                     Vk_feats = Vk_feats.view(-1, Vk_feats.shape[1]*Vk_feats.shape[2])
324 | 
325 |                     Vq_feats = torch.squeeze(Vq_feats, dim=0)
326 |                     Vq_feats = Vq_feats.view(-1, Vq_feats.shape[1]*Vq_feats.shape[2])
327 | 
328 |                     #### T_output.shape: torch.Size([96, 9]) 
329 |                     T_output = torch.matmul(Tk_feats, torch.transpose(Tq_feats, 1, 0)) 
330 |                     V_output = torch.matmul(Vk_feats, torch.transpose(Vq_feats, 1, 0))  
331 | 
332 |                     # pdb.set_trace() 
333 |                     T_filters = torch.reshape(T_output, (1, T_output.shape[0], 3, 3))  ## (96, 3, 3) 
334 |                     V_filters = torch.reshape(V_output, (1, V_output.shape[0], 3, 3))  ## (96, 3, 3) 
335 | 
336 | 
337 |                     # pdb.set_trace() 
338 | 
339 |                     adaptive_conv_T = AdaptiveConv2d(x_i.size(1), x_i.size(1), 3, padding=1, groups=x_i.size(1), bias=False)
340 |                     adaptive_conv_V = AdaptiveConv2d(x_v.size(1), x_v.size(1), 3, padding=1, groups=x_v.size(1), bias=False)
341 | 
342 |                     dynamic_T_feats = adaptive_conv_T(x_v, T_filters)
343 |                     dynamic_V_feats = adaptive_conv_V(x_i, V_filters)
344 | 
345 |                     dynamic_T_feats = self.sigmoid(dynamic_T_feats) 
346 |                     dynamic_V_feats = self.sigmoid(dynamic_V_feats) 
347 | 
348 |                     x_v = x_v + dynamic_V_feats  
349 |                     x_i = x_i + dynamic_T_feats 
350 | 
351 |                     fuse_x_v_i = torch.cat((x_v, x_i), dim=1)
352 |                     
353 |                     # pdb.set_trace()
354 | 
355 |                     # augmented_feats, p1 = self.attn1(fuse_x_v_i)
356 | 
357 |                 if name == out_layer:
358 |                     return x_v, x_i, fuse_x_v_i  
359 | 
360 | 
361 |         # pdb.set_trace() 
362 | 
363 |         x_v = self.branches[k](x_v) 
364 | 
365 | 
366 |         if out_layer=='fc6':
367 |             return x_v
368 |         elif out_layer=='fc6_softmax':
369 |             return F.softmax(x_v)
370 | 
371 | 
372 | 
373 |     def load_model(self, model_path):
374 |         states = torch.load(model_path)
375 |         shared_layers = states['shared_layers']
376 |         self.layers.load_state_dict(shared_layers)
377 | 
378 |     def load_mat_model(self, matfile):
379 |         mat = scipy.io.loadmat(matfile)
380 |         mat_layers = list(mat['layers'])[0]
381 | 
382 |         # copy conv weights
383 |         for i in range(3):
384 |             weight, bias = mat_layers[i*4]['weights'].item()[0]
385 |             self.layers[i][0].weight.data = torch.from_numpy(np.transpose(weight, (3,2,0,1)))
386 |             self.layers[i][0].bias.data = torch.from_numpy(bias[:,0])
387 | 
388 |     def trainSpatialTransform(self, image, bb):
389 | 
390 |         return
391 | 
392 | 
393 | class BinaryLoss(nn.Module):
394 |     def __init__(self):
395 |         super(BinaryLoss, self).__init__()
396 | 
397 |     def forward(self, pos_score, neg_score):
398 |         pos_loss = -F.log_softmax(pos_score)[:,1]
399 |         neg_loss = -F.log_softmax(neg_score)[:,0]
400 | 
401 |         loss = (pos_loss.sum() + neg_loss.sum())/(pos_loss.size(0) + neg_loss.size(0))
402 |         return loss
403 | 
404 | 
405 | class Accuracy():
406 |     def __call__(self, pos_score, neg_score):
407 | 
408 |         pos_correct = (pos_score[:,1] > pos_score[:,0]).sum().float()
409 |         neg_correct = (neg_score[:,1] < neg_score[:,0]).sum().float()
410 | 
411 |         pos_acc = pos_correct / (pos_score.size(0) + 1e-8)
412 |         neg_acc = neg_correct / (neg_score.size(0) + 1e-8)
413 | 
414 |         return pos_acc.item(), neg_acc.item()
415 | 
416 | 
417 | class Precision():
418 |     def __call__(self, pos_score, neg_score):
419 | 
420 |         scores = torch.cat((pos_score[:,1], neg_score[:,1]), 0)
421 |         topk = torch.topk(scores, pos_score.size(0))[1]
422 |         prec = (topk < pos_score.size(0)).float().sum() / (pos_score.size(0)+1e-8)
423 | 
424 |         return prec.item()
425 | 
426 | 
427 | 
428 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/model.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/model.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/prepro_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pickle
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | 
 8 | # seq_home = '../dataset/'
 9 | seqlist_path = '../vot-otb.txt'
10 | output_path = 'data/vot-otb.pkl'
11 | set_type = 'VOT'
12 | seq_home = '/home/ilchae/dataset/tracking/'+set_type +'/'
13 | 
14 | if set_type=='OTB':
15 |     seqlist_path = '../otb-vot15.txt'
16 |     output_path = '../otb-vot15.pkl'
17 | 
18 | if set_type == 'VOT':
19 |     seqlist_path = '../vot-otb.txt'
20 |     output_path = '../vot-otb.pkl'
21 | 
22 | with open(seqlist_path,'r') as fp:
23 |     seq_list = fp.read().splitlines()
24 | 
25 | data = {}
26 | for i,seqname in enumerate(seq_list):
27 |     print(seqname)
28 |     if set_type=='OTB':
29 |         seq_path = seq_home+seqname
30 |         img_list = sorted([p for p in os.listdir(seq_path+'/img') if os.path.splitext(p)[1] == '.jpg'])
31 | 
32 |         if (seqname == 'Jogging') or (seqname == 'Skating2'):
33 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt')
34 |         elif seqname == 'Human4' :
35 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',')
36 |         elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \
37 |                 or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \
38 |                 or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \
39 |                 or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \
40 |                 or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \
41 |                 or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \
42 |                 or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') :
43 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt')
44 |         elif (seqname == 'Diving'):
45 |             gt = np.loadtxt(seq_path + '/groundtruth_rect_ilchae.txt', delimiter=',')
46 |         else:
47 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
48 | 
49 |         if (seqname == 'David') or (seqname == 'Football1') or (seqname == 'Freeman3') or (seqname == 'Freeman4'):
50 |             continue
51 | 
52 |     if set_type =='VOT':
53 |         img_list = sorted([p for p in os.listdir(seq_home + seqname) if os.path.splitext(p)[1] == '.jpg'])
54 |         gt = np.loadtxt(seq_home + seqname + '/groundtruth.txt', delimiter=',')
55 | 
56 |     if set_type == 'IMAGENET':
57 |         img_list = []
58 |         gt = []
59 | 
60 |     assert len(img_list) == len(gt), "Lengths do not match!!"
61 | 
62 |     if gt.shape[1]==8:
63 |         x_min = np.min(gt[:,[0,2,4,6]],axis=1)[:,None]
64 |         y_min = np.min(gt[:,[1,3,5,7]],axis=1)[:,None]
65 |         x_max = np.max(gt[:,[0,2,4,6]],axis=1)[:,None]
66 |         y_max = np.max(gt[:,[1,3,5,7]],axis=1)[:,None]
67 |         gt = np.concatenate((x_min, y_min, x_max-x_min, y_max-y_min),axis=1)
68 | 
69 |     data[seqname] = {'images':img_list, 'gt':gt}
70 | 
71 | with open(output_path, 'wb') as fp:
72 |     pickle.dump(data, fp, -1)
73 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/prepro_data_imagenet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import pickle
 4 | from collections import OrderedDict
 5 | 
 6 | import xml.etree.ElementTree
 7 | import xmltodict
 8 | import numpy as np
 9 | 
10 | import  matplotlib.pyplot as plt
11 | import matplotlib.patches as patches
12 | from PIL import Image
13 | import time
14 | 
15 | output_path = './imagenet_refine.pkl'
16 | 
17 | 
18 | 
19 | seq_home = '/home/ilchae/dataset/ILSVRC/'
20 | train_list = [p for p in os.listdir(seq_home + 'Data/VID/train')]
21 | seq_list = []
22 | for num, cur_dir in enumerate(train_list):
23 |     seq_list += [cur_dir + '/' + p for p in os.listdir(seq_home + 'Data/VID/train/' + cur_dir)]
24 | 
25 | fig = plt.figure()
26 | ax = fig.add_subplot(1,1,1)
27 | 
28 | data = {}
29 | completeNum = 0
30 | for i,seqname in enumerate(seq_list):
31 |     print(seqname)
32 |     seq_path = seq_home + 'Data/VID/train/' + seqname
33 |     gt_path = seq_home +'Annotations/VID/train/' + seqname
34 |     img_list = sorted([p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.JPEG'])
35 | 
36 |     # gt = np.zeros((len(img_list),4))
37 |     enable_gt = []
38 |     enable_img_list = []
39 |     gt_list = sorted([gt_path + '/' + p for p in os.listdir(gt_path) if os.path.splitext(p)[1] == '.xml'])
40 |     save_enable = True
41 |     for gidx in range(0,len(img_list)):
42 |         with open(gt_list[gidx]) as fd:
43 |             doc = xmltodict.parse(fd.read())
44 |         try:
45 |             try:
46 |                 object =doc['annotation']['object'][0]
47 |             except:
48 |                 object = doc['annotation']['object']
49 |         except:
50 |             ## no object, occlusion and hidden etc.
51 |             continue
52 | 
53 |         if (int(object['trackid']) is not 0):
54 |             continue
55 | 
56 |         xmin = float(object['bndbox']['xmin'])
57 |         xmax = float(object['bndbox']['xmax'])
58 |         ymin = float(object['bndbox']['ymin'])
59 |         ymax = float(object['bndbox']['ymax'])
60 | 
61 |         ## discard too big object
62 |         if ((float(doc['annotation']['size']['width'])/2.) < (xmax-xmin) ) and ((float(doc['annotation']['size']['height'])/2.) < (ymax-ymin) ):
63 |             continue
64 | 
65 |         # gt[gidx,0] = xmin
66 |         # gt[gidx,1] = ymin
67 |         # gt[gidx,2] = xmax - xmin
68 |         # gt[gidx,3] = ymax - ymin
69 | 
70 |         cur_gt = np.zeros((4))
71 |         cur_gt[0] = xmin
72 |         cur_gt[1] = ymin
73 |         cur_gt[2] = xmax - xmin
74 |         cur_gt[3] = ymax - ymin
75 |         enable_gt.append(cur_gt)
76 | 
77 |         enable_img_list.append(img_list[gidx])
78 | 
79 |     if len(enable_img_list) == 0:
80 |         save_enable = False
81 |     if save_enable:
82 |         assert len(enable_img_list) == len(enable_gt), "Lengths do not match!!"
83 |         data[seqname] = {'images':enable_img_list, 'gt':np.asarray(enable_gt)}
84 |         completeNum += 1
85 |         print('Complete!')
86 | 
87 | with open(output_path, 'wb') as fp:
88 |     pickle.dump(data, fp, -1)
89 | 
90 | print('complete {} videos'.format(completeNum))
91 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/pretrain_options.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | pretrain_opts = OrderedDict()
 4 | pretrain_opts['use_gpu'] = True
 5 | 
 6 | pretrain_opts['init_model_path'] = './models/imagenet-vgg-m.mat'
 7 | pretrain_opts['model_path'] = './models/CBAM_dfg_rtmdnet_trained_on_50.pth'
 8 | 
 9 | pretrain_opts['batch_frames'] = 8
10 | pretrain_opts['batch_pos'] = 64
11 | pretrain_opts['batch_neg'] = 196
12 | 
13 | pretrain_opts['overlap_pos'] = [0.7, 1]
14 | pretrain_opts['overlap_neg'] = [0, 0.5]
15 | 
16 | pretrain_opts['img_size'] = 107
17 | 
18 | 
19 | pretrain_opts['lr'] = 0.0001
20 | pretrain_opts['w_decay'] = 0.0005
21 | pretrain_opts['momentum'] = 0.9
22 | pretrain_opts['grad_clip'] = 10
23 | pretrain_opts['ft_layers'] = ['conv','fc']
24 | pretrain_opts['lr_mult'] = {'fc':1}
25 | pretrain_opts['n_cycles'] = 1000
26 | 
27 | 
28 | ##################################### from RCNN #############################################
29 | pretrain_opts['padding'] = 1.2
30 | pretrain_opts['padding_ratio']=5.
31 | pretrain_opts['padded_img_size'] = pretrain_opts['img_size']*int(pretrain_opts['padding_ratio'])
32 | pretrain_opts['frame_interval'] = 2
33 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/pretrain_options.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/pretrain_options.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/roi_align/build/lib.linux-x86_64-3.7/roi_align_cuda.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/build/lib.linux-x86_64-3.7/roi_align_cuda.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_cuda.o


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_kernel_c.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_kernel_c.o


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
  1 | from torch.autograd import Function
  2 | 
  3 | from .. import roi_align_cuda
  4 | 
  5 | 
  6 | class RoIAlignFunction(Function):
  7 | 
  8 |     @staticmethod
  9 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
 10 |         if isinstance(out_size, int):
 11 |             out_h = out_size
 12 |             out_w = out_size
 13 |         elif isinstance(out_size, tuple):
 14 |             
 15 |             assert len(out_size) == 2
 16 |             assert isinstance(out_size[0], int)
 17 |             assert isinstance(out_size[1], int)
 18 |             out_h, out_w = out_size
 19 |         else:
 20 |             raise TypeError(
 21 |                 '"out_size" must be an integer or tuple of integers')
 22 |         ctx.spatial_scale = spatial_scale
 23 |         ctx.sample_num = sample_num
 24 |         ctx.save_for_backward(rois)
 25 |         ctx.feature_size = features.size()
 26 | 
 27 |         batch_size, num_channels, data_height, data_width = features.size()
 28 |         num_rois = rois.size(0)
 29 | 
 30 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
 31 |         if features.is_cuda:
 32 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
 33 |                                    sample_num, output)
 34 |         else:
 35 |             raise NotImplementedError
 36 | 
 37 |         return output
 38 | 
 39 |     @staticmethod
 40 |     def backward(ctx, grad_output):
 41 |         feature_size = ctx.feature_size
 42 |         spatial_scale = ctx.spatial_scale
 43 |         sample_num = ctx.sample_num
 44 |         rois = ctx.saved_tensors[0]
 45 |         assert (feature_size is not None and grad_output.is_cuda)
 46 | 
 47 |         batch_size, num_channels, data_height, data_width = feature_size
 48 |         out_w = grad_output.size(3)
 49 |         out_h = grad_output.size(2)
 50 | 
 51 |         grad_input = grad_rois = None
 52 |         if ctx.needs_input_grad[0]:
 53 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
 54 |                                         data_width)
 55 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
 56 |                                     out_w, spatial_scale, sample_num,
 57 |                                     grad_input)
 58 | 
 59 |         return grad_input, grad_rois, None, None, None
 60 | class RoIAlignAdaFunction(Function):
 61 | 
 62 |     @staticmethod
 63 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
 64 |         if isinstance(out_size, int):
 65 |             out_h = out_size
 66 |             out_w = out_size
 67 |         elif isinstance(out_size, tuple):
 68 |             assert len(out_size) == 2
 69 |             assert isinstance(out_size[0], int)
 70 |             assert isinstance(out_size[1], int)
 71 |             out_h, out_w = out_size
 72 |         else:
 73 |             raise TypeError(
 74 |                 '"out_size" must be an integer or tuple of integers')
 75 |         ctx.spatial_scale = spatial_scale
 76 |         ctx.sample_num = sample_num
 77 |         ctx.save_for_backward(rois)
 78 |         ctx.feature_size = features.size()
 79 | 
 80 |         batch_size, num_channels, data_height, data_width = features.size()
 81 |         num_rois = rois.size(0)
 82 | 
 83 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
 84 |         if features.is_cuda:
 85 |             roi_align_cuda.ada_forward(features, rois, out_h, out_w, spatial_scale,
 86 |                                    sample_num, output)
 87 |         else:
 88 |             raise NotImplementedError
 89 | 
 90 |         return output
 91 | 
 92 |     @staticmethod
 93 |     def backward(ctx, grad_output):
 94 |         feature_size = ctx.feature_size
 95 |         spatial_scale = ctx.spatial_scale
 96 |         sample_num = ctx.sample_num
 97 |         rois = ctx.saved_tensors[0]
 98 |         assert (feature_size is not None and grad_output.is_cuda)
 99 | 
100 |         batch_size, num_channels, data_height, data_width = feature_size
101 |         out_w = grad_output.size(3)
102 |         out_h = grad_output.size(2)
103 | 
104 |         grad_input = grad_rois = None
105 |         if ctx.needs_input_grad[0]:
106 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
107 |                                         data_width)
108 |             roi_align_cuda.ada_backward(grad_output.contiguous(), rois, out_h,
109 |                                     out_w, spatial_scale, sample_num,
110 |                                     grad_input)
111 | 
112 |         return grad_input, grad_rois, None, None, None
113 | 
114 | roi_align = RoIAlignFunction.apply
115 | roi_align_ada = RoIAlignAdaFunction.apply
116 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/modules/roi_align/functions/roi_align.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/functions/roi_align.pyc


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/test_234_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 
  3 | from os.path import join, isdir
  4 | from tracker import *
  5 | import numpy as np
  6 | import argparse
  7 | import pickle
  8 | import math
  9 | import pdb 
 10 | import torchvision.transforms as transforms
 11 | import random
 12 | import warnings
 13 | warnings.filterwarnings("ignore") 
 14 | 
 15 | 
 16 | 
 17 | def genConfig(seq_path, set_type):
 18 | 
 19 |     path, seqname = os.path.split(seq_path)
 20 | 
 21 |     if set_type == 'OTB100':
 22 |         img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.png'])
 23 |         gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
 24 | 
 25 |     #####################################################################
 26 |     #####               For the RGBT dataset 
 27 |     ##################################################################### 
 28 |     elif set_type == 'dataset234': 
 29 |         img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg'])
 30 |         img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg'])
 31 |         gt = np.loadtxt(seq_path + '/visible.txt', delimiter=',')
 32 | 
 33 |     elif set_type == 'dataset210': 
 34 |         img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg'])
 35 |         img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg'])
 36 |         gt = np.loadtxt(seq_path + '/init.txt', delimiter=',')
 37 | 
 38 | 
 39 |     return img_list_v, img_list_i, gt 
 40 | 
 41 | 
 42 | 
 43 | 
 44 | if __name__ == "__main__":
 45 | 
 46 |     parser = argparse.ArgumentParser()
 47 |     parser.add_argument("-set_type", default = 'dataset234')
 48 |     parser.add_argument("-model_path", default = './models/test_CBAM_dfg_rtmdnet_trained_on_50.pth')
 49 |     parser.add_argument("-result_path", default = './result.npy')
 50 |     parser.add_argument("-visual_log",default=False, action= 'store_true')
 51 |     parser.add_argument("-visualize",default=False, action='store_true')
 52 |     parser.add_argument("-adaptive_align",default=True, action='store_false')
 53 |     parser.add_argument("-padding",default=1.2, type = float)
 54 |     parser.add_argument("-jitter",default=True, action='store_false')
 55 |     
 56 |     args = parser.parse_args()
 57 | 
 58 |     ##################################################################################
 59 |     #########################Just modify opts in this script.#########################
 60 |     ######################Becuase of synchronization of options#######################
 61 |     ##################################################################################
 62 |     ## option setting
 63 |     opts['model_path']=args.model_path
 64 |     opts['result_path']=args.result_path
 65 |     opts['visual_log']=args.visual_log
 66 |     opts['set_type']=args.set_type
 67 |     opts['visualize'] = args.visualize
 68 |     opts['adaptive_align'] = args.adaptive_align
 69 |     opts['padding'] = args.padding
 70 |     opts['jitter'] = args.jitter
 71 |     ##################################################################################
 72 |     ############################Do not modify opts anymore.###########################
 73 |     ######################Becuase of synchronization of options#######################
 74 |     ##################################################################################
 75 |     print(opts)
 76 | 
 77 | 
 78 |     ## path initialization
 79 |     dataset_path = '/wangxiao/experiments/'
 80 |     result_home = '/wangxiao/experiments/trackingResults/'
 81 | 
 82 |     seq_home = dataset_path + opts['set_type']
 83 |     seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))]
 84 |     seq_list = np.sort(seq_list) 
 85 | 
 86 |     iou_list=[]
 87 |     fps_list=dict()
 88 |     bb_result = dict()
 89 |     result = dict()
 90 | 
 91 |     iou_list_nobb=[]
 92 |     bb_result_nobb = dict()
 93 |     for num, seq in enumerate(seq_list):
 94 | 
 95 |         if num<-1:
 96 |             continue
 97 | 
 98 |         already_done = os.listdir(result_home) 
 99 |         
100 |         if seq+"_rgbt234.txt" in already_done: 
101 |             print("==>> Skip this video: ", seq) 
102 |         else: 
103 |             txtName = seq + '_rgbt234.txt'
104 |             fid = open(result_home + txtName, 'w')
105 |             
106 |             seq_path = seq_home + '/' + seq
107 |             img_list_v, img_list_i, gt = genConfig(seq_path, opts['set_type'])
108 | 
109 |             iou_result, result_bb, fps, result_nobb = run_mdnet(img_list_v, img_list_i, gt[0], gt, seq = seq, display=opts['visualize'])
110 | 
111 |             enable_frameNum = 0.
112 |             for iidx in range(len(iou_result)):
113 |                 if (math.isnan(iou_result[iidx])==False): 
114 |                     enable_frameNum += 1.
115 |                 else:
116 |                     ## gt is not alowed
117 |                     iou_result[iidx] = 0.
118 | 
119 |             iou_list.append(iou_result.sum()/enable_frameNum)
120 |             bb_result[seq] = result_bb
121 |             fps_list[seq]=fps
122 | 
123 |             bb_result_nobb[seq] = result_nobb
124 |             print('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list)))
125 | 
126 | 
127 |             for iidex in range(len(result_bb)):
128 |                line = result_bb[iidex]
129 | 
130 |                # pdb.set_trace() 
131 |                fid.write(str(line[0]))
132 |                fid.write(',')
133 |                fid.write(str(line[1]))
134 |                fid.write(',')
135 |                fid.write(str(line[2]))
136 |                fid.write(',')
137 |                fid.write(str(line[3]))
138 |                fid.write('\n')
139 |             fid.close()
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/tracker.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | import time
  4 | 
  5 | ## for drawing package
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.patches as patches
  8 | 
  9 | import torch.optim as optim
 10 | from torch.autograd import Variable
 11 | from random import randint
 12 | 
 13 | sys.path.insert(0,'./modules')
 14 | from sample_generator import *
 15 | from data_prov import *
 16 | from model import *
 17 | from bbreg import *
 18 | from options import *
 19 | from img_cropper import *
 20 | from roi_align import RoIAlignAvg,RoIAlignMax,RoIAlignAdaMax 
 21 | 
 22 | 
 23 | 
 24 | # sys.path.insert(0,'./naive_rgbt_TANet_module/')
 25 | # from generator import naive_Generator
 26 | # from utils import *
 27 | import pdb
 28 | import warnings
 29 | warnings.filterwarnings("ignore")
 30 | import torchvision.transforms as transforms
 31 | import random
 32 | import cv2 
 33 | from skimage import measure, draw 
 34 | 
 35 | 
 36 | # generator_path = './naive_rgbt_TANet_module/naive_TANet_rgbt_model.pkl'
 37 | 
 38 | # Generator = naive_Generator() 
 39 | # Generator.load_state_dict(torch.load(generator_path))
 40 | # Generator.cuda()
 41 | 
 42 | 
 43 | np.random.seed(123)
 44 | torch.manual_seed(456)
 45 | torch.cuda.manual_seed(789)
 46 | 
 47 | # torch.set_default_tensor_type(torch.cuda.FloatTensor)
 48 | 
 49 | ##################################################################################
 50 | ############################Do not modify opts anymore.###########################
 51 | ######################Becuase of synchronization of options#######################
 52 | ##################################################################################
 53 | 
 54 | def set_optimizer(model, lr_base, lr_mult=opts['lr_mult'], momentum=opts['momentum'], w_decay=opts['w_decay']):
 55 |     params = model.get_learnable_params()
 56 |     param_list = []
 57 |     for k, p in params.items():
 58 |         lr = lr_base
 59 |         for l, m in lr_mult.items():
 60 |             if k.startswith(l):
 61 |                 lr = lr_base * m
 62 |         param_list.append({'params': [p], 'lr':lr})
 63 |     optimizer = optim.SGD(param_list, lr = lr, momentum=momentum, weight_decay=w_decay)
 64 |     return optimizer
 65 | 
 66 | 
 67 | def train(model, criterion, optimizer, pos_feats, neg_feats, maxiter, in_layer='fc4'):
 68 |     model.train()
 69 | 
 70 |     batch_pos = opts['batch_pos']
 71 |     batch_neg = opts['batch_neg']
 72 |     batch_test = opts['batch_test']
 73 |     batch_neg_cand = max(opts['batch_neg_cand'], batch_neg)
 74 | 
 75 |     pos_idx = np.random.permutation(pos_feats.size(0))
 76 |     neg_idx = np.random.permutation(neg_feats.size(0))
 77 |     while(len(pos_idx) < batch_pos*maxiter):
 78 |         pos_idx = np.concatenate([pos_idx, np.random.permutation(pos_feats.size(0))])
 79 |     while(len(neg_idx) < batch_neg_cand*maxiter):
 80 |         neg_idx = np.concatenate([neg_idx, np.random.permutation(neg_feats.size(0))])
 81 |     pos_pointer = 0
 82 |     neg_pointer = 0
 83 | 
 84 | 
 85 | 
 86 |     for iter in range(maxiter):
 87 | 
 88 |         # select pos idx
 89 |         pos_next = pos_pointer + batch_pos
 90 |         pos_cur_idx = pos_idx[pos_pointer:pos_next]
 91 |         pos_cur_idx = pos_feats.new(pos_cur_idx).long()
 92 |         pos_pointer = pos_next
 93 | 
 94 |         # select neg idx
 95 |         neg_next = neg_pointer + batch_neg_cand
 96 |         neg_cur_idx = neg_idx[neg_pointer:neg_next]
 97 |         neg_cur_idx = neg_feats.new(neg_cur_idx).long()
 98 |         neg_pointer = neg_next
 99 | 
100 |         # create batch
101 |         batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx))
102 |         batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx))
103 | 
104 |         # hard negative mining
105 |         if batch_neg_cand > batch_neg:
106 |             model.eval() ## model transfer into evaluation mode
107 |             for start in range(0,batch_neg_cand,batch_test):
108 |                 end = min(start+batch_test,batch_neg_cand)
109 | 
110 |                 if batch_neg_feats[start:end].shape[1] == 9216: 
111 |                     temp_neg_feats = batch_neg_feats[start:end] 
112 |                 else: 
113 |                     temp_neg_feats = torch.cat((batch_neg_feats[start:end], batch_neg_feats[start:end]), dim=1)  
114 | 
115 |                 score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer)
116 |                 if start==0:
117 |                     neg_cand_score = score.data[:,1].clone()
118 |                 else:
119 |                     neg_cand_score = torch.cat((neg_cand_score, score.data[:,1].clone()),0)
120 | 
121 |             _, top_idx = neg_cand_score.topk(batch_neg)
122 |             batch_neg_feats = batch_neg_feats.index_select(0, Variable(top_idx))
123 |             model.train() ## model transfer into train mode
124 | 
125 |         # forward
126 |         if batch_pos_feats.shape[1] == 9216: 
127 |             temp_pos_feats = batch_pos_feats 
128 |         else: 
129 |             temp_pos_feats = torch.cat((batch_pos_feats, batch_pos_feats), dim=1)  
130 | 
131 |         if batch_neg_feats.shape[1] == 9216: 
132 |             temp_neg_feats = batch_neg_feats 
133 |         else: 
134 |             temp_neg_feats = torch.cat((batch_neg_feats, batch_neg_feats), dim=1)  
135 | 
136 |         # pdb.set_trace() 
137 |         pos_score = model(temp_pos_feats, temp_pos_feats, in_layer=in_layer)
138 |         neg_score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer)
139 | 
140 |         # optimize
141 |         loss = criterion(pos_score, neg_score)
142 |         model.zero_grad()
143 |         loss.backward()
144 |         torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip'])
145 |         optimizer.step()
146 | 
147 |         if opts['visual_log']:
148 |             print("Iter %d, Loss %.4f" % (iter, loss.data[0]))
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | def run_mdnet(img_list_v, img_list_i, init_bbox, gt=None, seq='seq_name ex)Basketball', savefig_dir='', display=False):
161 |     
162 |     # Init bbox
163 |     target_bbox = np.array(init_bbox)
164 |     result = np.zeros((len(img_list_v),4))
165 |     result_bb = np.zeros((len(img_list_v),4))
166 |     result[0] = np.copy(target_bbox)
167 |     result_bb[0] = np.copy(target_bbox)
168 | 
169 |     iou_result = np.zeros((len(img_list_v),1))
170 | 
171 |     # execution time array
172 |     exec_time_result = np.zeros((len(img_list_v),1))
173 | 
174 |     # Init model
175 |     model = MDNet(opts['model_path'])
176 |     if opts['adaptive_align']:
177 |         align_h = model.roi_align_model.aligned_height
178 |         align_w = model.roi_align_model.aligned_width
179 |         spatial_s = model.roi_align_model.spatial_scale
180 |         model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s)
181 |     if opts['use_gpu']:
182 |         model = model.cuda()
183 | 
184 |     model.set_learnable_params(opts['ft_layers'])
185 | 
186 |     # Init image crop model
187 |     img_crop_model = imgCropper(1.)
188 |     if opts['use_gpu']:
189 |         img_crop_model.gpuEnable()
190 | 
191 |     # Init criterion and optimizer
192 |     criterion = BinaryLoss()
193 |     init_optimizer = set_optimizer(model, opts['lr_init'])
194 |     update_optimizer = set_optimizer(model, opts['lr_update'])
195 | 
196 |     tic = time.time()
197 |     # Load first image
198 |     cur_image_v = Image.open(img_list_v[0]).convert('RGB')
199 |     cur_image_v = np.asarray(cur_image_v)
200 | 
201 |     cur_image_i = Image.open(img_list_i[0]).convert('RGB')
202 |     cur_image_i = np.asarray(cur_image_i)
203 | 
204 | 
205 |     init_targetObject_v = cur_image_v[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :]   
206 |     init_targetObject_i = cur_image_i[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :] 
207 | 
208 | 
209 |     # Draw pos/neg samples
210 |     ishape = cur_image_v.shape
211 |     pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
212 |     neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 2, 1.1), target_bbox, opts['n_neg_init'], opts['overlap_neg_init'])
213 |     neg_examples = np.random.permutation(neg_examples)
214 | 
215 |     cur_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg'])
216 | 
217 |     # compute padded sample
218 |     padded_x1 = (neg_examples[:,0]-neg_examples[:,2]*(opts['padding']-1.)/2.).min()
219 |     padded_y1 = (neg_examples[:,1]-neg_examples[:,3]*(opts['padding']-1.)/2.).min()
220 |     padded_x2 = (neg_examples[:,0]+neg_examples[:,2]*(opts['padding']+1.)/2.).max()
221 |     padded_y2 = (neg_examples[:,1]+neg_examples[:,3]*(opts['padding']+1.)/2.).max()
222 |     padded_scene_box = np.reshape(np.asarray((padded_x1,padded_y1,padded_x2-padded_x1,padded_y2-padded_y1)),(1,4))
223 | 
224 |     scene_boxes = np.reshape(np.copy(padded_scene_box), (1,4))
225 |     if opts['jitter']:
226 |         ## horizontal shift
227 |         jittered_scene_box_horizon = np.copy(padded_scene_box)
228 |         jittered_scene_box_horizon[0,0] -= 4.
229 |         jitter_scale_horizon = 1.
230 | 
231 |         ## vertical shift
232 |         jittered_scene_box_vertical = np.copy(padded_scene_box)
233 |         jittered_scene_box_vertical[0,1] -= 4.
234 |         jitter_scale_vertical = 1.
235 | 
236 |         jittered_scene_box_reduce1 = np.copy(padded_scene_box)
237 |         jitter_scale_reduce1 = 1.1 ** (-1)
238 | 
239 |         ## vertical shift
240 |         jittered_scene_box_enlarge1 = np.copy(padded_scene_box)
241 |         jitter_scale_enlarge1 = 1.1 ** (1)
242 | 
243 |         ## scale reduction
244 |         jittered_scene_box_reduce2 = np.copy(padded_scene_box)
245 |         jitter_scale_reduce2 = 1.1**(-2)
246 |         ## scale enlarge
247 |         jittered_scene_box_enlarge2 = np.copy(padded_scene_box)
248 |         jitter_scale_enlarge2 = 1.1 ** (2)
249 | 
250 |         scene_boxes = np.concatenate([scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical,jittered_scene_box_reduce1,jittered_scene_box_enlarge1,jittered_scene_box_reduce2,jittered_scene_box_enlarge2],axis=0)
251 |         jitter_scale = [1.,jitter_scale_horizon,jitter_scale_vertical,jitter_scale_reduce1,jitter_scale_enlarge1,jitter_scale_reduce2,jitter_scale_enlarge2]
252 |     else:
253 |         jitter_scale = [1.]
254 | 
255 |     model.eval()
256 |     for bidx in range(0,scene_boxes.shape[0]):
257 |         crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'],opts['img_size'])/target_bbox[2:4])).astype('int64')*jitter_scale[bidx]
258 |         cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size)
259 |         cropped_image_v = cropped_image_v - 128.
260 | 
261 |         cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size)
262 |         cropped_image_i = cropped_image_i - 128.
263 | 
264 | 
265 |         feat_map_v, feat_map_i, fused_feats = model(cropped_image_v, cropped_image_i, out_layer='conv3')
266 | 
267 |         rel_target_bbox = np.copy(target_bbox)
268 |         rel_target_bbox[0:2] -= scene_boxes[bidx,0:2]
269 | 
270 |         batch_num = np.zeros((pos_examples.shape[0], 1))
271 |         cur_pos_rois = np.copy(pos_examples)
272 |         cur_pos_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_pos_rois.shape[0],axis=0)
273 |         scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx]
274 |         cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding'])
275 |         cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
276 |         cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()
277 | 
278 |         # pdb.set_trace() 
279 |         cur_pos_feats = model.roi_align_model(fused_feats, cur_pos_rois)
280 |         cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone()
281 | 
282 |         # cur_pos_feats_i = model.roi_align_model(feat_map_i, cur_pos_rois)
283 |         # cur_pos_feats_i = cur_pos_feats_i.view(cur_pos_feats_i.size(0), -1).data.clone()
284 | 
285 | 
286 |         batch_num = np.zeros((neg_examples.shape[0], 1))
287 |         cur_neg_rois = np.copy(neg_examples)
288 |         cur_neg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_neg_rois.shape[0],axis=0)
289 |         cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding'])
290 |         cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
291 |         cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()
292 | 
293 |         cur_neg_feats = model.roi_align_model(fused_feats, cur_neg_rois)
294 |         cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone()
295 | 
296 |         # cur_neg_feats_i = model.roi_align_model(feat_map_i, cur_neg_rois)
297 |         # cur_neg_feats_i = cur_neg_feats_i.view(cur_neg_feats_i.size(0), -1).data.clone()
298 | 
299 | 
300 |         ## bbreg rois
301 |         batch_num = np.zeros((cur_bbreg_examples.shape[0], 1))
302 |         cur_bbreg_rois = np.copy(cur_bbreg_examples)
303 |         cur_bbreg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)), cur_bbreg_rois.shape[0],axis=0)
304 |         scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx]
305 |         cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding'])
306 |         cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1)
307 |         cur_bbreg_rois = Variable(torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda()
308 | 
309 |         cur_bbreg_feats = model.roi_align_model(fused_feats, cur_bbreg_rois)
310 |         cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone()
311 | 
312 |         # cur_bbreg_feats_i = model.roi_align_model(feat_map_i, cur_bbreg_rois)
313 |         # cur_bbreg_feats_i = cur_bbreg_feats_i.view(cur_bbreg_feats_i.size(0), -1).data.clone()
314 | 
315 | 
316 |         feat_dim = cur_pos_feats.size(-1)
317 | 
318 |         if bidx==0:
319 |             pos_feats = cur_pos_feats
320 |             neg_feats = cur_neg_feats
321 |             ##bbreg feature
322 |             bbreg_feats = cur_bbreg_feats
323 |             bbreg_examples = cur_bbreg_examples
324 |         else:
325 |             pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
326 |             neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
327 |             ##bbreg feature
328 |             bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0)
329 |             bbreg_examples = np.concatenate((bbreg_examples, cur_bbreg_examples), axis=0)
330 | 
331 |     if pos_feats.size(0) > opts['n_pos_init']:
332 |         pos_idx = np.asarray(range(pos_feats.size(0)))
333 |         np.random.shuffle(pos_idx)
334 |         pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']],:]
335 |     if neg_feats.size(0) > opts['n_neg_init']:
336 |         neg_idx = np.asarray(range(neg_feats.size(0)))
337 |         np.random.shuffle(neg_idx)
338 |         neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :]
339 | 
340 |     ##bbreg
341 |     if bbreg_feats.size(0) > opts['n_bbreg']:
342 |         bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
343 |         np.random.shuffle(bbreg_idx)
344 |         bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
345 |         bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']],:]
346 |         #print bbreg_examples.shape
347 | 
348 | 
349 |     # init_target_feats = pos_feats[:400]    
350 | 
351 | 
352 |     ## open images and crop patch from obj
353 |     extra_obj_size = np.array((opts['img_size'],opts['img_size']))
354 |     extra_crop_img_size = extra_obj_size * (opts['padding']+0.6)
355 |     replicateNum = 100    
356 |     for iidx in range(replicateNum):
357 |         extra_target_bbox = np.copy(target_bbox)
358 | 
359 |         extra_scene_box = np.copy(extra_target_bbox)
360 |         extra_scene_box_center = extra_scene_box[0:2] + extra_scene_box[2:4] / 2.
361 |         extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6)
362 |         extra_scene_box[0:2] = extra_scene_box_center - extra_scene_box_size / 2.
363 |         extra_scene_box[2:4] = extra_scene_box_size
364 | 
365 |         extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4)
366 |         cur_extra_scale = 1.1 ** np.clip(np.random.randn(1), -2, 2)
367 | 
368 | 
369 |         extra_scene_box[0] += extra_shift_offset[0]
370 |         extra_scene_box[1] += extra_shift_offset[1]
371 |         extra_scene_box[2:4] *= cur_extra_scale[0]
372 | 
373 |         scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0]
374 | 
375 |         cur_extra_cropped_image_v, _ = img_crop_model.crop_image(cur_image_v, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size)
376 |         cur_extra_cropped_image_v = cur_extra_cropped_image_v.detach() 
377 | 
378 |         cur_extra_cropped_image_i, _ = img_crop_model.crop_image(cur_image_i, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size)
379 |         cur_extra_cropped_image_i = cur_extra_cropped_image_i.detach() 
380 | 
381 |         # extra_target_bbox = np.array(list(map(int, extra_target_bbox)))
382 |         cur_extra_pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),extra_target_bbox, opts['n_pos_init']//replicateNum, opts['overlap_pos_init'])
383 |         cur_extra_neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2, 1.1),extra_target_bbox, opts['n_neg_init']/replicateNum//4, opts['overlap_neg_init'])
384 | 
385 |         ##bbreg sample
386 |         cur_extra_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1),extra_target_bbox, opts['n_bbreg']/replicateNum//4, opts['overlap_bbreg'], opts['scale_bbreg'])
387 | 
388 |         batch_num = iidx*np.ones((cur_extra_pos_examples.shape[0], 1))
389 |         cur_extra_pos_rois = np.copy(cur_extra_pos_examples)
390 |         cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)),
391 |                                                     cur_extra_pos_rois.shape[0], axis=0)
392 |         cur_extra_pos_rois = samples2maskroi(cur_extra_pos_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding'])
393 |         cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1)
394 | 
395 |         batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1))
396 |         cur_extra_neg_rois = np.copy(cur_extra_neg_examples)
397 |         cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)),cur_extra_neg_rois.shape[0], axis=0)
398 |         cur_extra_neg_rois = samples2maskroi(cur_extra_neg_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding'])
399 |         cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1)
400 | 
401 |         ## bbreg rois
402 |         batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1))
403 |         cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples)
404 |         cur_extra_bbreg_rois[:,0:2] -= np.repeat(np.reshape(extra_scene_box[0:2],(1,2)),cur_extra_bbreg_rois.shape[0],axis=0)
405 |         cur_extra_bbreg_rois = samples2maskroi(cur_extra_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), extra_target_bbox[2:4], opts['padding'])
406 |         cur_extra_bbreg_rois = np.concatenate((batch_num, cur_extra_bbreg_rois), axis=1)
407 | 
408 | 
409 | 
410 |         if iidx==0:
411 |             extra_cropped_image_v = cur_extra_cropped_image_v 
412 |             extra_cropped_image_i = cur_extra_cropped_image_i 
413 | 
414 |             extra_pos_rois = np.copy(cur_extra_pos_rois)
415 |             extra_neg_rois = np.copy(cur_extra_neg_rois)
416 |             ##bbreg rois
417 |             extra_bbreg_rois = np.copy(cur_extra_bbreg_rois)
418 |             extra_bbreg_examples = np.copy(cur_extra_bbreg_examples)
419 |         else:
420 |             extra_cropped_image_v = torch.cat((extra_cropped_image_v, cur_extra_cropped_image_v),dim=0)
421 |             extra_cropped_image_i = torch.cat((extra_cropped_image_i, cur_extra_cropped_image_i),dim=0)
422 | 
423 |             extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0)
424 |             extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0)
425 |             ##bbreg rois
426 |             extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0 )
427 |             extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0 )
428 | 
429 | 
430 |     extra_pos_rois = Variable(torch.from_numpy(extra_pos_rois.astype('float32'))).cuda()
431 |     extra_neg_rois = Variable(torch.from_numpy(extra_neg_rois.astype('float32'))).cuda()
432 |     ##bbreg rois
433 |     extra_bbreg_rois = Variable(torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda()
434 | 
435 |     extra_cropped_image_v -= 128.
436 |     extra_cropped_image_i -= 128.    
437 | 
438 |     # pdb.set_trace() 
439 | 
440 |     for iidxxx in range(replicateNum):
441 |         temp_extra_cropped_image_v = torch.unsqueeze(extra_cropped_image_v[iidxxx], dim=0) 
442 |         temp_extra_cropped_image_i = torch.unsqueeze(extra_cropped_image_i[iidxxx], dim=0) 
443 |         temp_extra_feat_maps_v, temp_extra_feat_maps_i, temp_extra_feat_maps = model(temp_extra_cropped_image_v, temp_extra_cropped_image_i, out_layer='conv3') 
444 |         temp_extra_feat_maps = torch.squeeze(temp_extra_feat_maps, dim=0) 
445 |         # temp_extra_feat_maps_i = torch.squeeze(temp_extra_feat_maps_i, dim=0) 
446 | 
447 |         if iidxxx == 0: 
448 |             extra_feat_maps = torch.zeros(replicateNum, temp_extra_feat_maps.shape[0], temp_extra_feat_maps.shape[1], temp_extra_feat_maps.shape[2])
449 |             # extra_feat_maps_i = torch.zeros(replicateNum, temp_extra_feat_maps_i.shape[0], temp_extra_feat_maps_i.shape[1], temp_extra_feat_maps_i.shape[2])
450 | 
451 |         extra_feat_maps[iidxxx] = temp_extra_feat_maps 
452 |         # extra_feat_maps_i[iidxxx] = temp_extra_feat_maps_i 
453 | 
454 |     extra_feat_maps = extra_feat_maps.cuda() 
455 | 
456 | 
457 |     # Draw pos/neg samples
458 |     ishape = cur_image_v.shape
459 | 
460 |     # pdb.set_trace() 
461 |     extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois)
462 |     extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone()
463 | 
464 | 
465 |     extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois)
466 |     extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone()
467 | 
468 |     ##bbreg feat
469 |     extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois)
470 |     extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone()
471 | 
472 |     ## concatenate extra features to original_features
473 |     pos_feats = torch.cat((pos_feats, extra_pos_feats),dim=0)
474 |     neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0)
475 |     ## concatenate extra bbreg feats to original_bbreg_feats
476 |     bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0)
477 |     bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0)
478 | 
479 |     torch.cuda.empty_cache()
480 |     model.zero_grad()
481 | 
482 |     # Initial training
483 |     train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'])
484 | 
485 |     ##bbreg train
486 |     if bbreg_feats.size(0) > opts['n_bbreg']:
487 |         bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
488 |         np.random.shuffle(bbreg_idx)
489 |         bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
490 |         bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]
491 | 
492 |     bbreg = BBRegressor((ishape[1], ishape[0]))
493 |     bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
494 | 
495 | 
496 |     if pos_feats.size(0) > opts['n_pos_update']:
497 |         pos_idx = np.asarray(range(pos_feats.size(0)))
498 |         np.random.shuffle(pos_idx)
499 |         pos_feats_all = [pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())]
500 |     if neg_feats.size(0) > opts['n_neg_update']:
501 |         neg_idx = np.asarray(range(neg_feats.size(0)))
502 |         np.random.shuffle(neg_idx)
503 |         neg_feats_all = [neg_feats.index_select(0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())]
504 | 
505 | 
506 |     spf_total = time.time()-tic
507 | 
508 |     # Display
509 |     savefig = savefig_dir != ''
510 |     if display or savefig:
511 |         dpi = 80.0
512 |         figsize = (cur_image_v.shape[1]/dpi, cur_image_v.shape[0]/dpi)
513 | 
514 |         fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
515 |         ax = plt.Axes(fig, [0., 0., 1., 1.])
516 |         ax.set_axis_off()
517 |         fig.add_axes(ax)
518 |         im = ax.imshow(cur_image_v)
519 | 
520 |         if gt is not None:
521 |             gt_rect = plt.Rectangle(tuple(gt[0,:2]),gt[0,2],gt[0,3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False)
522 |             ax.add_patch(gt_rect)
523 | 
524 |         rect = plt.Rectangle(tuple(result_bb[0,:2]),result_bb[0,2],result_bb[0,3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False)
525 |         ax.add_patch(rect)
526 | 
527 |         if display:
528 |             plt.pause(.01)
529 |             plt.draw()
530 |         if savefig:
531 |             fig.savefig(os.path.join(savefig_dir,'0000.jpg'),dpi=dpi)
532 | 
533 | 
534 |     #####################################################################
535 |     ####                        Main loop 
536 |     ##################################################################### 
537 |     failure_count = 0 
538 |     trans_f = opts['trans_f']
539 | 
540 |     for i in range(1, len(img_list_v)):
541 | 
542 |         tic = time.time()
543 |         # Load image
544 |         cur_image_v = Image.open(img_list_v[i]).convert('RGB')
545 |         cur_image_v = np.asarray(cur_image_v)
546 |         cur_image_i = Image.open(img_list_i[i]).convert('RGB')
547 |         cur_image_i = np.asarray(cur_image_i)
548 | 
549 |         # Estimate target bbox
550 |         ishape = cur_image_v.shape
551 |         samples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), target_bbox, opts['n_samples'])
552 | 
553 | 
554 | 
555 |         #########################################################################
556 |         ####                Target-Aware Attention Prediction   
557 |         #########################################################################
558 | 
559 |         attention_path = "/daTANet_rgbt_234_Attention/" + seq + "/"  
560 |         attentionImage_name = str(i+1) + "_attentionMap.jpg"
561 | 
562 |         # pdb.set_trace() 
563 |         attentionFlag = os.path.exists(attention_path + attentionImage_name)
564 |         # print("==>> attentionFlag ", attentionFlag)
565 | 
566 |         if failure_count >= 6 and attentionFlag: 
567 |             
568 |             attentionMap = Image.open(attention_path+attentionImage_name).convert('RGB')
569 |             attentionMap = np.asarray(attentionMap)
570 |             # pdb.set_trace() 
571 | 
572 |             dynamic_atttentonMAP = cv2.resize(attentionMap, (cur_image_v.shape[1], cur_image_v.shape[0]), interpolation=cv2.INTER_LINEAR)
573 |             ret, static_atttentonMAP = cv2.threshold(dynamic_atttentonMAP, 100, 255, cv2.THRESH_BINARY) 
574 |             # cv2.imwrite('static_atttentonMAP.png', static_atttentonMAP) 
575 | 
576 |             # pdb.set_trace() 
577 | 
578 |             label_image = measure.label(static_atttentonMAP)
579 |             props = measure.regionprops(label_image)
580 | 
581 |             atttenton_BBox = [] 
582 |             attention_centerLoc = [] 
583 |             similarity_glob_target_max = 0 
584 |             global_samples = [] 
585 | 
586 |             #### for each candidate search region 
587 |             # for iii in range(len(props)): 
588 | 
589 |             if len(props) > 1:
590 |                 attNum = 1 
591 |             else: 
592 |                 attNum = len(props) 
593 | 
594 |             for iii in range(attNum):
595 |                 center_position = props[iii].centroid 
596 |                 center_position = [int(center_position[1]), int(center_position[0])]   
597 | 
598 |                 centerPos_prev_x = target_bbox[0] + target_bbox[2] / 2 
599 |                 centerPos_prev_y = target_bbox[1] + target_bbox[3] / 2 
600 | 
601 |                 if math.fabs(center_position[0] - centerPos_prev_x) < 30 and math.fabs(center_position[1] - centerPos_prev_y) < 30: 
602 | 
603 |                     bbox = props[iii].bbox 
604 | 
605 |                     new_bbox2 = np.zeros((4)) 
606 |                     new_bbox2[0] = center_position[0] - target_bbox[2]/2
607 |                     new_bbox2[1] = center_position[1] - target_bbox[3]/2
608 |                     new_bbox2[2] = target_bbox[2]
609 |                     new_bbox2[3] = target_bbox[3]
610 | 
611 | 
612 |                     # if new_bbox[2] > 10 and new_bbox[3] > 10: 
613 |                     # switch_candidate_samples2 = sample_generator(new_bbox2, 100)
614 |                     switch_samples2 = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), new_bbox2, 256)
615 |                     # global_samples.append(switch_samples2)  
616 |                     # pdb.set_trace() 
617 |                     # samples = np.concatenate((switch_samples2, samples)) 
618 |                     samples = switch_samples2 
619 |             
620 |             # print("==>> Using Global Proposals and samples: ", samples.shape[0])  
621 |             # samples = np.concatenate((switch_samples2, samples)) 
622 | 
623 | 
624 |         padded_x1 = (samples[:, 0] - samples[:, 2]*(opts['padding']-1.)/2.).min()
625 |         padded_y1 = (samples[:, 1] - samples[:, 3]*(opts['padding']-1.)/2.).min()
626 |         padded_x2 = (samples[:, 0] + samples[:, 2]*(opts['padding']+1.)/2.).max()
627 |         padded_y2 = (samples[:, 1] + samples[:, 3]*(opts['padding']+1.)/2.).max()
628 |         padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1))
629 | 
630 |         if padded_scene_box[0] > cur_image_v.shape[1]:
631 |             padded_scene_box[0] = cur_image_v.shape[1]-1
632 |         if padded_scene_box[1] > cur_image_v.shape[0]:
633 |             padded_scene_box[1] = cur_image_v.shape[0]-1
634 |         if padded_scene_box[0] + padded_scene_box[2] < 0:
635 |             padded_scene_box[2] = -padded_scene_box[0]+1
636 |         if padded_scene_box[1] + padded_scene_box[3] < 0:
637 |             padded_scene_box[3] = -padded_scene_box[1]+1
638 | 
639 | 
640 |         crop_img_size = (padded_scene_box[2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64')
641 |         cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(padded_scene_box,(1,4)), crop_img_size)
642 |         cropped_image_v = cropped_image_v - 128.
643 |         cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(padded_scene_box,(1,4)), crop_img_size)
644 |         cropped_image_i = cropped_image_i - 128.
645 | 
646 |         model.eval()
647 |         feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3')
648 | 
649 |         # relative target bbox with padded_scene_box
650 |         rel_target_bbox = np.copy(target_bbox)
651 |         rel_target_bbox[0:2] -= padded_scene_box[0:2]
652 | 
653 | 
654 |         # Extract sample features and get target location
655 |         batch_num = np.zeros((samples.shape[0], 1))
656 |         sample_rois = np.copy(samples)
657 |         sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0)
658 |         sample_rois = samples2maskroi(sample_rois, model.receptive_field, (opts['img_size'],opts['img_size']), target_bbox[2:4],opts['padding'])
659 |         sample_rois = np.concatenate((batch_num, sample_rois), axis=1)
660 |         sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))).cuda()
661 | 
662 |         sample_feats = model.roi_align_model(feat_map, sample_rois)
663 |         sample_feats = sample_feats.view(sample_feats.size(0), -1).clone()
664 | 
665 |         sample_scores = model(sample_feats, sample_feats, in_layer='fc4')
666 |         top_scores, top_idx = sample_scores[:, 1].topk(5)
667 |         top_idx = top_idx.data.cpu().numpy()
668 |         target_score = top_scores.data.mean()
669 |         target_bbox = samples[top_idx].mean(axis=0)
670 | 
671 |         success = target_score > opts['success_thr']
672 | 
673 |         # # Expand search area at failure
674 |         if success:
675 |             trans_f = opts['trans_f']
676 |         else:
677 |             trans_f = opts['trans_f_expand']
678 | 
679 |         ## Bbox regression
680 |         if success:
681 |             bbreg_feats = sample_feats[top_idx,:]
682 |             bbreg_samples = samples[top_idx]
683 |             bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples)
684 |             bbreg_bbox = bbreg_samples.mean(axis=0) 
685 | 
686 |             if failure_count >= 3: 
687 |             	failure_count = failure_count - 3  
688 |             else: 
689 |             	failure_count = 0 
690 |         else:
691 |             bbreg_bbox = target_bbox
692 |             failure_count = failure_count + 1 
693 | 
694 |         # Save result
695 |         result[i] = target_bbox
696 |         result_bb[i] = bbreg_bbox
697 |         iou_result[i] = 1.
698 | 
699 |         # Data collect
700 |         if success:
701 | 
702 |             # Draw pos/neg samples
703 |             pos_examples = gen_samples(
704 |                 SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox,
705 |                 opts['n_pos_update'],
706 |                 opts['overlap_pos_update'])
707 |             neg_examples = gen_samples(
708 |                 SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2), target_bbox,
709 |                 opts['n_neg_update'],
710 |                 opts['overlap_neg_update'])
711 | 
712 |             padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min()
713 |             padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min()
714 |             padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max()
715 |             padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max()
716 |             padded_scene_box = np.reshape(np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)),(1,4))
717 | 
718 |             scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
719 |             jitter_scale = [1.]
720 | 
721 |             for bidx in range(0, scene_boxes.shape[0]):
722 |                 crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') * jitter_scale[bidx]
723 |                 cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size)
724 |                 cropped_image_v = cropped_image_v - 128.
725 |                 cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size)
726 |                 cropped_image_i = cropped_image_i - 128.
727 | 
728 |                 feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3')
729 | 
730 |                 rel_target_bbox = np.copy(target_bbox)
731 |                 rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]
732 | 
733 |                 batch_num = np.zeros((pos_examples.shape[0], 1))
734 |                 cur_pos_rois = np.copy(pos_examples)
735 |                 cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0],axis=0)
736 |                 scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
737 |                 cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size),target_bbox[2:4], opts['padding'])
738 |                 cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
739 |                 cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()
740 | 
741 |                 cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois)
742 |                 cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone()
743 | 
744 |                 batch_num = np.zeros((neg_examples.shape[0], 1))
745 |                 cur_neg_rois = np.copy(neg_examples)
746 |                 cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0)
747 |                 cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding'])
748 |                 cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
749 |                 cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()
750 | 
751 |                 cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois)
752 |                 cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone()
753 | 
754 | 
755 |                 feat_dim = cur_pos_feats.size(-1)
756 | 
757 |                 if bidx == 0:
758 |                     pos_feats = cur_pos_feats ##index select
759 |                     neg_feats = cur_neg_feats
760 |                 else:
761 |                     pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
762 |                     neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
763 | 
764 |             if pos_feats.size(0) > opts['n_pos_update']:
765 |                 pos_idx = np.asarray(range(pos_feats.size(0)))
766 |                 np.random.shuffle(pos_idx)
767 |                 pos_feats = pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())
768 |             if neg_feats.size(0) > opts['n_neg_update']:
769 |                 neg_idx = np.asarray(range(neg_feats.size(0)))
770 |                 np.random.shuffle(neg_idx)
771 |                 neg_feats = neg_feats.index_select(0,torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())
772 | 
773 |             pos_feats_all.append(pos_feats)
774 |             neg_feats_all.append(neg_feats)
775 | 
776 |             if len(pos_feats_all) > opts['n_frames_long']:
777 |                 del pos_feats_all[0]
778 |             if len(neg_feats_all) > opts['n_frames_short']:
779 |                 del neg_feats_all[0]
780 | 
781 |         # Short term update
782 |         if not success:
783 |             nframes = min(opts['n_frames_short'],len(pos_feats_all))
784 |             pos_data = torch.stack(pos_feats_all[-nframes:],0).view(-1,feat_dim)
785 |             neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim)
786 |             train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'])
787 | 
788 |         # Long term update
789 |         elif i % opts['long_interval'] == 0:
790 |             pos_data = torch.stack(pos_feats_all,0).view(-1,feat_dim)
791 |             neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim)
792 |             train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'])
793 | 
794 |         spf = time.time()-tic
795 |         spf_total += spf
796 | 
797 |         # Display
798 |         if display or savefig:
799 |             im.set_data(cur_image_v)
800 | 
801 |             if gt is not None:
802 |                 gt_rect.set_xy(gt[i,:2])
803 |                 gt_rect.set_width(gt[i,2])
804 |                 gt_rect.set_height(gt[i,3])
805 | 
806 |             rect.set_xy(result_bb[i,:2])
807 |             rect.set_width(result_bb[i,2])
808 |             rect.set_height(result_bb[i,3])
809 | 
810 |             if display:
811 |                 plt.pause(.01)
812 |                 plt.draw()
813 |             if savefig:
814 |                 fig.savefig(os.path.join(savefig_dir,'%04d.jpg'%(i)),dpi=dpi)
815 | 
816 |         if opts['visual_log']:
817 |             if gt is None:
818 |                 print("Frame %d/%d, Score %.3f, Time %.3f" % \
819 |                     (i, len(img_list), target_score, spf))
820 |             else:
821 |                 print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
822 |                     (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf)) 
823 | 
824 |         print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
825 |             (i, len(img_list_v), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf))
826 | 
827 |         iou_result[i]= overlap_ratio(gt[i],result_bb[i])[0]
828 | 
829 | 
830 |     fps = len(img_list_v) / spf_total
831 | 
832 |     # pdb.set_trace() 
833 |     # print("==>> epochID %d, L1-Loss %.4f, Time %.3f" % (epochID, total_l1_Loss/len(img_list_v), spf_total)) 
834 | 
835 | 
836 |     return iou_result, result_bb, fps, result
837 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/tracker_backup.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import sys
  3 | import time
  4 | 
  5 | ## for drawing package
  6 | import matplotlib.pyplot as plt
  7 | import matplotlib.patches as patches
  8 | 
  9 | import torch.optim as optim
 10 | from torch.autograd import Variable
 11 | from random import randint
 12 | 
 13 | sys.path.insert(0,'./modules')
 14 | from sample_generator import *
 15 | from data_prov import *
 16 | from model import *
 17 | from bbreg import *
 18 | from options import *
 19 | from img_cropper import *
 20 | from roi_align import RoIAlignAvg,RoIAlignMax,RoIAlignAdaMax 
 21 | 
 22 | 
 23 | 
 24 | # sys.path.insert(0,'./naive_rgbt_TANet_module/')
 25 | # from generator import naive_Generator
 26 | # from utils import *
 27 | import pdb
 28 | import warnings
 29 | warnings.filterwarnings("ignore")
 30 | import torchvision.transforms as transforms
 31 | import random
 32 | import cv2 
 33 | from skimage import measure, draw 
 34 | 
 35 | 
 36 | # generator_path = './naive_rgbt_TANet_module/naive_TANet_rgbt_model.pkl'
 37 | 
 38 | # Generator = naive_Generator() 
 39 | # Generator.load_state_dict(torch.load(generator_path))
 40 | # Generator.cuda()
 41 | 
 42 | 
 43 | np.random.seed(123)
 44 | torch.manual_seed(456)
 45 | torch.cuda.manual_seed(789)
 46 | 
 47 | # torch.set_default_tensor_type(torch.cuda.FloatTensor)
 48 | 
 49 | ##################################################################################
 50 | ############################Do not modify opts anymore.###########################
 51 | ######################Becuase of synchronization of options#######################
 52 | ##################################################################################
 53 | 
 54 | def set_optimizer(model, lr_base, lr_mult=opts['lr_mult'], momentum=opts['momentum'], w_decay=opts['w_decay']):
 55 |     params = model.get_learnable_params()
 56 |     param_list = []
 57 |     for k, p in params.items():
 58 |         lr = lr_base
 59 |         for l, m in lr_mult.items():
 60 |             if k.startswith(l):
 61 |                 lr = lr_base * m
 62 |         param_list.append({'params': [p], 'lr':lr})
 63 |     optimizer = optim.SGD(param_list, lr = lr, momentum=momentum, weight_decay=w_decay)
 64 |     return optimizer
 65 | 
 66 | 
 67 | def train(model, criterion, optimizer, pos_feats, neg_feats, maxiter, in_layer='fc4'):
 68 |     model.train()
 69 | 
 70 |     batch_pos = opts['batch_pos']
 71 |     batch_neg = opts['batch_neg']
 72 |     batch_test = opts['batch_test']
 73 |     batch_neg_cand = max(opts['batch_neg_cand'], batch_neg)
 74 | 
 75 |     pos_idx = np.random.permutation(pos_feats.size(0))
 76 |     neg_idx = np.random.permutation(neg_feats.size(0))
 77 |     while(len(pos_idx) < batch_pos*maxiter):
 78 |         pos_idx = np.concatenate([pos_idx, np.random.permutation(pos_feats.size(0))])
 79 |     while(len(neg_idx) < batch_neg_cand*maxiter):
 80 |         neg_idx = np.concatenate([neg_idx, np.random.permutation(neg_feats.size(0))])
 81 |     pos_pointer = 0
 82 |     neg_pointer = 0
 83 | 
 84 | 
 85 | 
 86 |     for iter in range(maxiter):
 87 | 
 88 |         # select pos idx
 89 |         pos_next = pos_pointer + batch_pos
 90 |         pos_cur_idx = pos_idx[pos_pointer:pos_next]
 91 |         pos_cur_idx = pos_feats.new(pos_cur_idx).long()
 92 |         pos_pointer = pos_next
 93 | 
 94 |         # select neg idx
 95 |         neg_next = neg_pointer + batch_neg_cand
 96 |         neg_cur_idx = neg_idx[neg_pointer:neg_next]
 97 |         neg_cur_idx = neg_feats.new(neg_cur_idx).long()
 98 |         neg_pointer = neg_next
 99 | 
100 |         # create batch
101 |         batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx))
102 |         batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx))
103 | 
104 |         # hard negative mining
105 |         if batch_neg_cand > batch_neg:
106 |             model.eval() ## model transfer into evaluation mode
107 |             for start in range(0,batch_neg_cand,batch_test):
108 |                 end = min(start+batch_test,batch_neg_cand)
109 | 
110 |                 if batch_neg_feats[start:end].shape[1] == 9216: 
111 |                     temp_neg_feats = batch_neg_feats[start:end] 
112 |                 else: 
113 |                     temp_neg_feats = torch.cat((batch_neg_feats[start:end], batch_neg_feats[start:end]), dim=1)  
114 | 
115 |                 score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer)
116 |                 if start==0:
117 |                     neg_cand_score = score.data[:,1].clone()
118 |                 else:
119 |                     neg_cand_score = torch.cat((neg_cand_score, score.data[:,1].clone()),0)
120 | 
121 |             _, top_idx = neg_cand_score.topk(batch_neg)
122 |             batch_neg_feats = batch_neg_feats.index_select(0, Variable(top_idx))
123 |             model.train() ## model transfer into train mode
124 | 
125 |         # forward
126 |         if batch_pos_feats.shape[1] == 9216: 
127 |             temp_pos_feats = batch_pos_feats 
128 |         else: 
129 |             temp_pos_feats = torch.cat((batch_pos_feats, batch_pos_feats), dim=1)  
130 | 
131 |         if batch_neg_feats.shape[1] == 9216: 
132 |             temp_neg_feats = batch_neg_feats 
133 |         else: 
134 |             temp_neg_feats = torch.cat((batch_neg_feats, batch_neg_feats), dim=1)  
135 | 
136 |         # pdb.set_trace() 
137 |         pos_score = model(temp_pos_feats, temp_pos_feats, in_layer=in_layer)
138 |         neg_score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer)
139 | 
140 |         # optimize
141 |         loss = criterion(pos_score, neg_score)
142 |         model.zero_grad()
143 |         loss.backward()
144 |         torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip'])
145 |         optimizer.step()
146 | 
147 |         if opts['visual_log']:
148 |             print("Iter %d, Loss %.4f" % (iter, loss.data[0]))
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | def run_mdnet(img_list_v, img_list_i, init_bbox, gt=None, seq='seq_name ex)Basketball', savefig_dir='', display=False):
161 |     
162 |     # Init bbox
163 |     target_bbox = np.array(init_bbox)
164 |     result = np.zeros((len(img_list_v),4))
165 |     result_bb = np.zeros((len(img_list_v),4))
166 |     result[0] = np.copy(target_bbox)
167 |     result_bb[0] = np.copy(target_bbox)
168 | 
169 |     iou_result = np.zeros((len(img_list_v),1))
170 | 
171 |     # execution time array
172 |     exec_time_result = np.zeros((len(img_list_v),1))
173 | 
174 |     # Init model
175 |     model = MDNet(opts['model_path'])
176 |     if opts['adaptive_align']:
177 |         align_h = model.roi_align_model.aligned_height
178 |         align_w = model.roi_align_model.aligned_width
179 |         spatial_s = model.roi_align_model.spatial_scale
180 |         model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s)
181 |     if opts['use_gpu']:
182 |         model = model.cuda()
183 | 
184 |     model.set_learnable_params(opts['ft_layers'])
185 | 
186 |     # Init image crop model
187 |     img_crop_model = imgCropper(1.)
188 |     if opts['use_gpu']:
189 |         img_crop_model.gpuEnable()
190 | 
191 |     # Init criterion and optimizer
192 |     criterion = BinaryLoss()
193 |     init_optimizer = set_optimizer(model, opts['lr_init'])
194 |     update_optimizer = set_optimizer(model, opts['lr_update'])
195 | 
196 |     tic = time.time()
197 |     # Load first image
198 |     cur_image_v = Image.open(img_list_v[0]).convert('RGB')
199 |     cur_image_v = np.asarray(cur_image_v)
200 | 
201 |     cur_image_i = Image.open(img_list_i[0]).convert('RGB')
202 |     cur_image_i = np.asarray(cur_image_i)
203 | 
204 | 
205 |     init_targetObject_v = cur_image_v[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :]   
206 |     init_targetObject_i = cur_image_i[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :] 
207 | 
208 | 
209 |     # Draw pos/neg samples
210 |     ishape = cur_image_v.shape
211 |     pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init'])
212 |     neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 2, 1.1), target_bbox, opts['n_neg_init'], opts['overlap_neg_init'])
213 |     neg_examples = np.random.permutation(neg_examples)
214 | 
215 |     cur_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg'])
216 | 
217 |     # compute padded sample
218 |     padded_x1 = (neg_examples[:,0]-neg_examples[:,2]*(opts['padding']-1.)/2.).min()
219 |     padded_y1 = (neg_examples[:,1]-neg_examples[:,3]*(opts['padding']-1.)/2.).min()
220 |     padded_x2 = (neg_examples[:,0]+neg_examples[:,2]*(opts['padding']+1.)/2.).max()
221 |     padded_y2 = (neg_examples[:,1]+neg_examples[:,3]*(opts['padding']+1.)/2.).max()
222 |     padded_scene_box = np.reshape(np.asarray((padded_x1,padded_y1,padded_x2-padded_x1,padded_y2-padded_y1)),(1,4))
223 | 
224 |     scene_boxes = np.reshape(np.copy(padded_scene_box), (1,4))
225 |     if opts['jitter']:
226 |         ## horizontal shift
227 |         jittered_scene_box_horizon = np.copy(padded_scene_box)
228 |         jittered_scene_box_horizon[0,0] -= 4.
229 |         jitter_scale_horizon = 1.
230 | 
231 |         ## vertical shift
232 |         jittered_scene_box_vertical = np.copy(padded_scene_box)
233 |         jittered_scene_box_vertical[0,1] -= 4.
234 |         jitter_scale_vertical = 1.
235 | 
236 |         jittered_scene_box_reduce1 = np.copy(padded_scene_box)
237 |         jitter_scale_reduce1 = 1.1 ** (-1)
238 | 
239 |         ## vertical shift
240 |         jittered_scene_box_enlarge1 = np.copy(padded_scene_box)
241 |         jitter_scale_enlarge1 = 1.1 ** (1)
242 | 
243 |         ## scale reduction
244 |         jittered_scene_box_reduce2 = np.copy(padded_scene_box)
245 |         jitter_scale_reduce2 = 1.1**(-2)
246 |         ## scale enlarge
247 |         jittered_scene_box_enlarge2 = np.copy(padded_scene_box)
248 |         jitter_scale_enlarge2 = 1.1 ** (2)
249 | 
250 |         scene_boxes = np.concatenate([scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical,jittered_scene_box_reduce1,jittered_scene_box_enlarge1,jittered_scene_box_reduce2,jittered_scene_box_enlarge2],axis=0)
251 |         jitter_scale = [1.,jitter_scale_horizon,jitter_scale_vertical,jitter_scale_reduce1,jitter_scale_enlarge1,jitter_scale_reduce2,jitter_scale_enlarge2]
252 |     else:
253 |         jitter_scale = [1.]
254 | 
255 |     model.eval()
256 |     for bidx in range(0,scene_boxes.shape[0]):
257 |         crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'],opts['img_size'])/target_bbox[2:4])).astype('int64')*jitter_scale[bidx]
258 |         cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size)
259 |         cropped_image_v = cropped_image_v - 128.
260 | 
261 |         cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size)
262 |         cropped_image_i = cropped_image_i - 128.
263 | 
264 | 
265 |         feat_map_v, feat_map_i, fused_feats = model(cropped_image_v, cropped_image_i, out_layer='conv3')
266 | 
267 |         rel_target_bbox = np.copy(target_bbox)
268 |         rel_target_bbox[0:2] -= scene_boxes[bidx,0:2]
269 | 
270 |         batch_num = np.zeros((pos_examples.shape[0], 1))
271 |         cur_pos_rois = np.copy(pos_examples)
272 |         cur_pos_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_pos_rois.shape[0],axis=0)
273 |         scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx]
274 |         cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding'])
275 |         cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
276 |         cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()
277 | 
278 |         # pdb.set_trace() 
279 |         cur_pos_feats = model.roi_align_model(fused_feats, cur_pos_rois)
280 |         cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone()
281 | 
282 |         # cur_pos_feats_i = model.roi_align_model(feat_map_i, cur_pos_rois)
283 |         # cur_pos_feats_i = cur_pos_feats_i.view(cur_pos_feats_i.size(0), -1).data.clone()
284 | 
285 | 
286 |         batch_num = np.zeros((neg_examples.shape[0], 1))
287 |         cur_neg_rois = np.copy(neg_examples)
288 |         cur_neg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_neg_rois.shape[0],axis=0)
289 |         cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding'])
290 |         cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
291 |         cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()
292 | 
293 |         cur_neg_feats = model.roi_align_model(fused_feats, cur_neg_rois)
294 |         cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone()
295 | 
296 |         # cur_neg_feats_i = model.roi_align_model(feat_map_i, cur_neg_rois)
297 |         # cur_neg_feats_i = cur_neg_feats_i.view(cur_neg_feats_i.size(0), -1).data.clone()
298 | 
299 | 
300 |         ## bbreg rois
301 |         batch_num = np.zeros((cur_bbreg_examples.shape[0], 1))
302 |         cur_bbreg_rois = np.copy(cur_bbreg_examples)
303 |         cur_bbreg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)), cur_bbreg_rois.shape[0],axis=0)
304 |         scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx]
305 |         cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding'])
306 |         cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1)
307 |         cur_bbreg_rois = Variable(torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda()
308 | 
309 |         cur_bbreg_feats = model.roi_align_model(fused_feats, cur_bbreg_rois)
310 |         cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone()
311 | 
312 |         # cur_bbreg_feats_i = model.roi_align_model(feat_map_i, cur_bbreg_rois)
313 |         # cur_bbreg_feats_i = cur_bbreg_feats_i.view(cur_bbreg_feats_i.size(0), -1).data.clone()
314 | 
315 | 
316 |         feat_dim = cur_pos_feats.size(-1)
317 | 
318 |         if bidx==0:
319 |             pos_feats = cur_pos_feats
320 |             neg_feats = cur_neg_feats
321 |             ##bbreg feature
322 |             bbreg_feats = cur_bbreg_feats
323 |             bbreg_examples = cur_bbreg_examples
324 |         else:
325 |             pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
326 |             neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
327 |             ##bbreg feature
328 |             bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0)
329 |             bbreg_examples = np.concatenate((bbreg_examples, cur_bbreg_examples), axis=0)
330 | 
331 |     if pos_feats.size(0) > opts['n_pos_init']:
332 |         pos_idx = np.asarray(range(pos_feats.size(0)))
333 |         np.random.shuffle(pos_idx)
334 |         pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']],:]
335 |     if neg_feats.size(0) > opts['n_neg_init']:
336 |         neg_idx = np.asarray(range(neg_feats.size(0)))
337 |         np.random.shuffle(neg_idx)
338 |         neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :]
339 | 
340 |     ##bbreg
341 |     if bbreg_feats.size(0) > opts['n_bbreg']:
342 |         bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
343 |         np.random.shuffle(bbreg_idx)
344 |         bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
345 |         bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']],:]
346 |         #print bbreg_examples.shape
347 | 
348 | 
349 |     # init_target_feats = pos_feats[:400]    
350 | 
351 | 
352 |     ## open images and crop patch from obj
353 |     extra_obj_size = np.array((opts['img_size'],opts['img_size']))
354 |     extra_crop_img_size = extra_obj_size * (opts['padding']+0.6)
355 |     replicateNum = 100    
356 |     for iidx in range(replicateNum):
357 |         extra_target_bbox = np.copy(target_bbox)
358 | 
359 |         extra_scene_box = np.copy(extra_target_bbox)
360 |         extra_scene_box_center = extra_scene_box[0:2] + extra_scene_box[2:4] / 2.
361 |         extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6)
362 |         extra_scene_box[0:2] = extra_scene_box_center - extra_scene_box_size / 2.
363 |         extra_scene_box[2:4] = extra_scene_box_size
364 | 
365 |         extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4)
366 |         cur_extra_scale = 1.1 ** np.clip(np.random.randn(1), -2, 2)
367 | 
368 | 
369 |         extra_scene_box[0] += extra_shift_offset[0]
370 |         extra_scene_box[1] += extra_shift_offset[1]
371 |         extra_scene_box[2:4] *= cur_extra_scale[0]
372 | 
373 |         scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0]
374 | 
375 |         cur_extra_cropped_image_v, _ = img_crop_model.crop_image(cur_image_v, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size)
376 |         cur_extra_cropped_image_v = cur_extra_cropped_image_v.detach() 
377 | 
378 |         cur_extra_cropped_image_i, _ = img_crop_model.crop_image(cur_image_i, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size)
379 |         cur_extra_cropped_image_i = cur_extra_cropped_image_i.detach() 
380 | 
381 |         # extra_target_bbox = np.array(list(map(int, extra_target_bbox)))
382 |         cur_extra_pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),extra_target_bbox, opts['n_pos_init']//replicateNum, opts['overlap_pos_init'])
383 |         cur_extra_neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2, 1.1),extra_target_bbox, opts['n_neg_init']/replicateNum//4, opts['overlap_neg_init'])
384 | 
385 |         ##bbreg sample
386 |         cur_extra_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1),extra_target_bbox, opts['n_bbreg']/replicateNum//4, opts['overlap_bbreg'], opts['scale_bbreg'])
387 | 
388 |         batch_num = iidx*np.ones((cur_extra_pos_examples.shape[0], 1))
389 |         cur_extra_pos_rois = np.copy(cur_extra_pos_examples)
390 |         cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)),
391 |                                                     cur_extra_pos_rois.shape[0], axis=0)
392 |         cur_extra_pos_rois = samples2maskroi(cur_extra_pos_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding'])
393 |         cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1)
394 | 
395 |         batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1))
396 |         cur_extra_neg_rois = np.copy(cur_extra_neg_examples)
397 |         cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)),cur_extra_neg_rois.shape[0], axis=0)
398 |         cur_extra_neg_rois = samples2maskroi(cur_extra_neg_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding'])
399 |         cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1)
400 | 
401 |         ## bbreg rois
402 |         batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1))
403 |         cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples)
404 |         cur_extra_bbreg_rois[:,0:2] -= np.repeat(np.reshape(extra_scene_box[0:2],(1,2)),cur_extra_bbreg_rois.shape[0],axis=0)
405 |         cur_extra_bbreg_rois = samples2maskroi(cur_extra_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), extra_target_bbox[2:4], opts['padding'])
406 |         cur_extra_bbreg_rois = np.concatenate((batch_num, cur_extra_bbreg_rois), axis=1)
407 | 
408 | 
409 | 
410 |         if iidx==0:
411 |             extra_cropped_image_v = cur_extra_cropped_image_v 
412 |             extra_cropped_image_i = cur_extra_cropped_image_i 
413 | 
414 |             extra_pos_rois = np.copy(cur_extra_pos_rois)
415 |             extra_neg_rois = np.copy(cur_extra_neg_rois)
416 |             ##bbreg rois
417 |             extra_bbreg_rois = np.copy(cur_extra_bbreg_rois)
418 |             extra_bbreg_examples = np.copy(cur_extra_bbreg_examples)
419 |         else:
420 |             extra_cropped_image_v = torch.cat((extra_cropped_image_v, cur_extra_cropped_image_v),dim=0)
421 |             extra_cropped_image_i = torch.cat((extra_cropped_image_i, cur_extra_cropped_image_i),dim=0)
422 | 
423 |             extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0)
424 |             extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0)
425 |             ##bbreg rois
426 |             extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0 )
427 |             extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0 )
428 | 
429 | 
430 |     extra_pos_rois = Variable(torch.from_numpy(extra_pos_rois.astype('float32'))).cuda()
431 |     extra_neg_rois = Variable(torch.from_numpy(extra_neg_rois.astype('float32'))).cuda()
432 |     ##bbreg rois
433 |     extra_bbreg_rois = Variable(torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda()
434 | 
435 |     extra_cropped_image_v -= 128.
436 |     extra_cropped_image_i -= 128.    
437 | 
438 |     # pdb.set_trace() 
439 | 
440 |     for iidxxx in range(replicateNum):
441 |         temp_extra_cropped_image_v = torch.unsqueeze(extra_cropped_image_v[iidxxx], dim=0) 
442 |         temp_extra_cropped_image_i = torch.unsqueeze(extra_cropped_image_i[iidxxx], dim=0) 
443 |         temp_extra_feat_maps_v, temp_extra_feat_maps_i, temp_extra_feat_maps = model(temp_extra_cropped_image_v, temp_extra_cropped_image_i, out_layer='conv3') 
444 |         temp_extra_feat_maps = torch.squeeze(temp_extra_feat_maps, dim=0) 
445 |         # temp_extra_feat_maps_i = torch.squeeze(temp_extra_feat_maps_i, dim=0) 
446 | 
447 |         if iidxxx == 0: 
448 |             extra_feat_maps = torch.zeros(replicateNum, temp_extra_feat_maps.shape[0], temp_extra_feat_maps.shape[1], temp_extra_feat_maps.shape[2])
449 |             # extra_feat_maps_i = torch.zeros(replicateNum, temp_extra_feat_maps_i.shape[0], temp_extra_feat_maps_i.shape[1], temp_extra_feat_maps_i.shape[2])
450 | 
451 |         extra_feat_maps[iidxxx] = temp_extra_feat_maps 
452 |         # extra_feat_maps_i[iidxxx] = temp_extra_feat_maps_i 
453 | 
454 |     extra_feat_maps = extra_feat_maps.cuda() 
455 | 
456 | 
457 |     # Draw pos/neg samples
458 |     ishape = cur_image_v.shape
459 | 
460 |     # pdb.set_trace() 
461 |     extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois)
462 |     extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone()
463 | 
464 | 
465 |     extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois)
466 |     extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone()
467 | 
468 |     ##bbreg feat
469 |     extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois)
470 |     extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone()
471 | 
472 |     ## concatenate extra features to original_features
473 |     pos_feats = torch.cat((pos_feats, extra_pos_feats),dim=0)
474 |     neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0)
475 |     ## concatenate extra bbreg feats to original_bbreg_feats
476 |     bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0)
477 |     bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0)
478 | 
479 |     torch.cuda.empty_cache()
480 |     model.zero_grad()
481 | 
482 |     # Initial training
483 |     train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init'])
484 | 
485 |     ##bbreg train
486 |     if bbreg_feats.size(0) > opts['n_bbreg']:
487 |         bbreg_idx = np.asarray(range(bbreg_feats.size(0)))
488 |         np.random.shuffle(bbreg_idx)
489 |         bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :]
490 |         bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :]
491 | 
492 |     bbreg = BBRegressor((ishape[1], ishape[0]))
493 |     bbreg.train(bbreg_feats, bbreg_examples, target_bbox)
494 | 
495 | 
496 |     if pos_feats.size(0) > opts['n_pos_update']:
497 |         pos_idx = np.asarray(range(pos_feats.size(0)))
498 |         np.random.shuffle(pos_idx)
499 |         pos_feats_all = [pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())]
500 |     if neg_feats.size(0) > opts['n_neg_update']:
501 |         neg_idx = np.asarray(range(neg_feats.size(0)))
502 |         np.random.shuffle(neg_idx)
503 |         neg_feats_all = [neg_feats.index_select(0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())]
504 | 
505 | 
506 |     spf_total = time.time()-tic
507 | 
508 |     # Display
509 |     savefig = savefig_dir != ''
510 |     if display or savefig:
511 |         dpi = 80.0
512 |         figsize = (cur_image_v.shape[1]/dpi, cur_image_v.shape[0]/dpi)
513 | 
514 |         fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi)
515 |         ax = plt.Axes(fig, [0., 0., 1., 1.])
516 |         ax.set_axis_off()
517 |         fig.add_axes(ax)
518 |         im = ax.imshow(cur_image_v)
519 | 
520 |         if gt is not None:
521 |             gt_rect = plt.Rectangle(tuple(gt[0,:2]),gt[0,2],gt[0,3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False)
522 |             ax.add_patch(gt_rect)
523 | 
524 |         rect = plt.Rectangle(tuple(result_bb[0,:2]),result_bb[0,2],result_bb[0,3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False)
525 |         ax.add_patch(rect)
526 | 
527 |         if display:
528 |             plt.pause(.01)
529 |             plt.draw()
530 |         if savefig:
531 |             fig.savefig(os.path.join(savefig_dir,'0000.jpg'),dpi=dpi)
532 | 
533 | 
534 |     #####################################################################
535 |     ####                        Main loop 
536 |     ##################################################################### 
537 |     failure_count = 0 
538 |     trans_f = opts['trans_f']
539 | 
540 |     for i in range(1, len(img_list_v)):
541 | 
542 |         tic = time.time()
543 |         # Load image
544 |         cur_image_v = Image.open(img_list_v[i]).convert('RGB')
545 |         cur_image_v = np.asarray(cur_image_v)
546 |         cur_image_i = Image.open(img_list_i[i]).convert('RGB')
547 |         cur_image_i = np.asarray(cur_image_i)
548 | 
549 |         # Estimate target bbox
550 |         ishape = cur_image_v.shape
551 |         samples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), target_bbox, opts['n_samples'])
552 | 
553 | 
554 | 
555 |         #########################################################################
556 |         ####                Target-Aware Attention Prediction   
557 |         #########################################################################
558 | 
559 |         attention_path = "daTANet_rgbt_234_Attention/" + seq + "/"  
560 |         attentionImage_name = str(i+1) + "_attentionMap.jpg"
561 | 
562 |         # pdb.set_trace() 
563 |         attentionFlag = os.path.exists(attention_path + attentionImage_name)
564 |         # print("==>> attentionFlag ", attentionFlag)
565 | 
566 |         if failure_count >= 8 and attentionFlag: 
567 |             
568 |             attentionMap = Image.open(attention_path+attentionImage_name).convert('RGB')
569 |             attentionMap = np.asarray(attentionMap)
570 |             # pdb.set_trace() 
571 | 
572 |             dynamic_atttentonMAP = cv2.resize(attentionMap, (cur_image_v.shape[1], cur_image_v.shape[0]), interpolation=cv2.INTER_LINEAR)
573 |             ret, static_atttentonMAP = cv2.threshold(dynamic_atttentonMAP, 100, 255, cv2.THRESH_BINARY) 
574 |             # cv2.imwrite('static_atttentonMAP.png', static_atttentonMAP) 
575 | 
576 |             # pdb.set_trace() 
577 | 
578 |             label_image = measure.label(static_atttentonMAP)
579 |             props = measure.regionprops(label_image)
580 | 
581 |             atttenton_BBox = [] 
582 |             attention_centerLoc = [] 
583 |             similarity_glob_target_max = 0 
584 |             global_samples = [] 
585 | 
586 |             #### for each candidate search region 
587 |             # for iii in range(len(props)): 
588 | 
589 |             if len(props) > 1:
590 |                 attNum = 1 
591 |             else: 
592 |                 attNum = len(props) 
593 | 
594 |             for iii in range(attNum):
595 |                 center_position = props[iii].centroid 
596 |                 center_position = [int(center_position[1]), int(center_position[0])]   
597 | 
598 |                 centerPos_prev_x = target_bbox[0] + target_bbox[2] / 2 
599 |                 centerPos_prev_y = target_bbox[1] + target_bbox[3] / 2 
600 | 
601 |                 if math.fabs(center_position[0] - centerPos_prev_x) < 30 and math.fabs(center_position[1] - centerPos_prev_y) < 30: 
602 | 
603 |                     bbox = props[iii].bbox 
604 | 
605 |                     new_bbox2 = np.zeros((4)) 
606 |                     new_bbox2[0] = center_position[0] - target_bbox[2]/2
607 |                     new_bbox2[1] = center_position[1] - target_bbox[3]/2
608 |                     new_bbox2[2] = target_bbox[2]
609 |                     new_bbox2[3] = target_bbox[3]
610 | 
611 | 
612 |                     # if new_bbox[2] > 10 and new_bbox[3] > 10: 
613 |                     # switch_candidate_samples2 = sample_generator(new_bbox2, 100)
614 |                     switch_samples2 = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), new_bbox2, 256)
615 |                     # global_samples.append(switch_samples2)  
616 |                     # pdb.set_trace() 
617 |                     # samples = np.concatenate((switch_samples2, samples)) 
618 |                     samples = switch_samples2 
619 |             
620 |             # print("==>> Using Global Proposals and samples: ", samples.shape[0])  
621 |             # samples = np.concatenate((switch_samples2, samples)) 
622 | 
623 | 
624 |         padded_x1 = (samples[:, 0] - samples[:, 2]*(opts['padding']-1.)/2.).min()
625 |         padded_y1 = (samples[:, 1] - samples[:, 3]*(opts['padding']-1.)/2.).min()
626 |         padded_x2 = (samples[:, 0] + samples[:, 2]*(opts['padding']+1.)/2.).max()
627 |         padded_y2 = (samples[:, 1] + samples[:, 3]*(opts['padding']+1.)/2.).max()
628 |         padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1))
629 | 
630 |         if padded_scene_box[0] > cur_image_v.shape[1]:
631 |             padded_scene_box[0] = cur_image_v.shape[1]-1
632 |         if padded_scene_box[1] > cur_image_v.shape[0]:
633 |             padded_scene_box[1] = cur_image_v.shape[0]-1
634 |         if padded_scene_box[0] + padded_scene_box[2] < 0:
635 |             padded_scene_box[2] = -padded_scene_box[0]+1
636 |         if padded_scene_box[1] + padded_scene_box[3] < 0:
637 |             padded_scene_box[3] = -padded_scene_box[1]+1
638 | 
639 | 
640 |         crop_img_size = (padded_scene_box[2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64')
641 |         cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(padded_scene_box,(1,4)), crop_img_size)
642 |         cropped_image_v = cropped_image_v - 128.
643 |         cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(padded_scene_box,(1,4)), crop_img_size)
644 |         cropped_image_i = cropped_image_i - 128.
645 | 
646 |         model.eval()
647 |         feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3')
648 | 
649 |         # relative target bbox with padded_scene_box
650 |         rel_target_bbox = np.copy(target_bbox)
651 |         rel_target_bbox[0:2] -= padded_scene_box[0:2]
652 | 
653 | 
654 |         # Extract sample features and get target location
655 |         batch_num = np.zeros((samples.shape[0], 1))
656 |         sample_rois = np.copy(samples)
657 |         sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0)
658 |         sample_rois = samples2maskroi(sample_rois, model.receptive_field, (opts['img_size'],opts['img_size']), target_bbox[2:4],opts['padding'])
659 |         sample_rois = np.concatenate((batch_num, sample_rois), axis=1)
660 |         sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))).cuda()
661 | 
662 |         sample_feats = model.roi_align_model(feat_map, sample_rois)
663 |         sample_feats = sample_feats.view(sample_feats.size(0), -1).clone()
664 | 
665 |         sample_scores = model(sample_feats, sample_feats, in_layer='fc4')
666 |         top_scores, top_idx = sample_scores[:, 1].topk(5)
667 |         top_idx = top_idx.data.cpu().numpy()
668 |         target_score = top_scores.data.mean()
669 |         target_bbox = samples[top_idx].mean(axis=0)
670 | 
671 |         success = target_score > opts['success_thr']
672 | 
673 |         # # Expand search area at failure
674 |         if success:
675 |             trans_f = opts['trans_f']
676 |         else:
677 |             trans_f = opts['trans_f_expand']
678 | 
679 |         ## Bbox regression
680 |         if success:
681 |             bbreg_feats = sample_feats[top_idx,:]
682 |             bbreg_samples = samples[top_idx]
683 |             bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples)
684 |             bbreg_bbox = bbreg_samples.mean(axis=0) 
685 | 
686 |             if failure_count >= 3: 
687 |             	failure_count = failure_count - 3  
688 |             else: 
689 |             	failure_count = 0 
690 |         else:
691 |             bbreg_bbox = target_bbox
692 |             failure_count = failure_count + 1 
693 | 
694 |         # Save result
695 |         result[i] = target_bbox
696 |         result_bb[i] = bbreg_bbox
697 |         iou_result[i] = 1.
698 | 
699 |         # Data collect
700 |         if success:
701 | 
702 |             # Draw pos/neg samples
703 |             pos_examples = gen_samples(
704 |                 SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox,
705 |                 opts['n_pos_update'],
706 |                 opts['overlap_pos_update'])
707 |             neg_examples = gen_samples(
708 |                 SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2), target_bbox,
709 |                 opts['n_neg_update'],
710 |                 opts['overlap_neg_update'])
711 | 
712 |             padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min()
713 |             padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min()
714 |             padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max()
715 |             padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max()
716 |             padded_scene_box = np.reshape(np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)),(1,4))
717 | 
718 |             scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4))
719 |             jitter_scale = [1.]
720 | 
721 |             for bidx in range(0, scene_boxes.shape[0]):
722 |                 crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') * jitter_scale[bidx]
723 |                 cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size)
724 |                 cropped_image_v = cropped_image_v - 128.
725 |                 cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size)
726 |                 cropped_image_i = cropped_image_i - 128.
727 | 
728 |                 feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3')
729 | 
730 |                 rel_target_bbox = np.copy(target_bbox)
731 |                 rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2]
732 | 
733 |                 batch_num = np.zeros((pos_examples.shape[0], 1))
734 |                 cur_pos_rois = np.copy(pos_examples)
735 |                 cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0],axis=0)
736 |                 scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx]
737 |                 cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size),target_bbox[2:4], opts['padding'])
738 |                 cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1)
739 |                 cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda()
740 | 
741 |                 cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois)
742 |                 cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone()
743 | 
744 |                 batch_num = np.zeros((neg_examples.shape[0], 1))
745 |                 cur_neg_rois = np.copy(neg_examples)
746 |                 cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0)
747 |                 cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding'])
748 |                 cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1)
749 |                 cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda()
750 | 
751 |                 cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois)
752 |                 cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone()
753 | 
754 | 
755 |                 feat_dim = cur_pos_feats.size(-1)
756 | 
757 |                 if bidx == 0:
758 |                     pos_feats = cur_pos_feats ##index select
759 |                     neg_feats = cur_neg_feats
760 |                 else:
761 |                     pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0)
762 |                     neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0)
763 | 
764 |             if pos_feats.size(0) > opts['n_pos_update']:
765 |                 pos_idx = np.asarray(range(pos_feats.size(0)))
766 |                 np.random.shuffle(pos_idx)
767 |                 pos_feats = pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())
768 |             if neg_feats.size(0) > opts['n_neg_update']:
769 |                 neg_idx = np.asarray(range(neg_feats.size(0)))
770 |                 np.random.shuffle(neg_idx)
771 |                 neg_feats = neg_feats.index_select(0,torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())
772 | 
773 |             pos_feats_all.append(pos_feats)
774 |             neg_feats_all.append(neg_feats)
775 | 
776 |             if len(pos_feats_all) > opts['n_frames_long']:
777 |                 del pos_feats_all[0]
778 |             if len(neg_feats_all) > opts['n_frames_short']:
779 |                 del neg_feats_all[0]
780 | 
781 |         # Short term update
782 |         if not success:
783 |             nframes = min(opts['n_frames_short'],len(pos_feats_all))
784 |             pos_data = torch.stack(pos_feats_all[-nframes:],0).view(-1,feat_dim)
785 |             neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim)
786 |             train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'])
787 | 
788 |         # Long term update
789 |         elif i % opts['long_interval'] == 0:
790 |             pos_data = torch.stack(pos_feats_all,0).view(-1,feat_dim)
791 |             neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim)
792 |             train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update'])
793 | 
794 |         spf = time.time()-tic
795 |         spf_total += spf
796 | 
797 |         # Display
798 |         if display or savefig:
799 |             im.set_data(cur_image_v)
800 | 
801 |             if gt is not None:
802 |                 gt_rect.set_xy(gt[i,:2])
803 |                 gt_rect.set_width(gt[i,2])
804 |                 gt_rect.set_height(gt[i,3])
805 | 
806 |             rect.set_xy(result_bb[i,:2])
807 |             rect.set_width(result_bb[i,2])
808 |             rect.set_height(result_bb[i,3])
809 | 
810 |             if display:
811 |                 plt.pause(.01)
812 |                 plt.draw()
813 |             if savefig:
814 |                 fig.savefig(os.path.join(savefig_dir,'%04d.jpg'%(i)),dpi=dpi)
815 | 
816 |         if opts['visual_log']:
817 |             if gt is None:
818 |                 print("Frame %d/%d, Score %.3f, Time %.3f" % \
819 |                     (i, len(img_list), target_score, spf))
820 |             else:
821 |                 print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
822 |                     (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf)) 
823 | 
824 |         print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \
825 |             (i, len(img_list_v), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf))
826 | 
827 |         iou_result[i]= overlap_ratio(gt[i],result_bb[i])[0]
828 | 
829 | 
830 |     fps = len(img_list_v) / spf_total
831 | 
832 |     # pdb.set_trace() 
833 |     # print("==>> epochID %d, L1-Loss %.4f, Time %.3f" % (epochID, total_l1_Loss/len(img_list_v), spf_total)) 
834 | 
835 | 
836 |     return iou_result, result_bb, fps, result
837 | 


--------------------------------------------------------------------------------
/MFGNet-rgbt-tracking-master/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ["CUDA_VISIBLE_DEVICES"]="1"  
  3 | import sys
  4 | import pickle
  5 | import time
  6 | 
  7 | import torch
  8 | import torch.optim as optim
  9 | from torch.autograd import Variable
 10 | import warnings
 11 | warnings.filterwarnings("ignore")
 12 | sys.path.insert(0,'./modules')
 13 | from data_prov import *
 14 | from model import *
 15 | from pretrain_options import *
 16 | from tracker import *
 17 | import numpy as np
 18 | 
 19 | import argparse
 20 | import pdb 
 21 | 
 22 | 
 23 | def set_optimizer(model, lr_base, lr_mult=pretrain_opts['lr_mult'], momentum=pretrain_opts['momentum'], w_decay=pretrain_opts['w_decay']):
 24 |     params = model.get_learnable_params()
 25 |     param_list = []
 26 |     for k, p in params.items():
 27 |         lr = lr_base
 28 |         for l, m in lr_mult.items():
 29 |             if k.startswith(l):
 30 |                 lr = lr_base * m
 31 |         param_list.append({'params': [p], 'lr': lr})
 32 |     optimizer = optim.SGD(param_list, lr=lr, momentum=momentum, weight_decay=w_decay)
 33 |     return optimizer
 34 | 
 35 | def genConfig(seq_path, set_type):
 36 | 
 37 |     path, seqname = os.path.split(seq_path)
 38 | 
 39 |     if set_type == 'OTB':
 40 |         img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg'])
 41 | 
 42 |         if (seqname == 'Jogging') or (seqname == 'Skating2'):
 43 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt')
 44 |         elif seqname =='Human4':
 45 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',')
 46 |         elif (seqname == 'BlurBody')  or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \
 47 |                 or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \
 48 |                 or (seqname == 'Box')   or (seqname == 'Car4')  or (seqname == 'CarScale') or (seqname == 'ClifBar') \
 49 |                 or (seqname == 'Couple')  or (seqname == 'Crossing')  or (seqname == 'Dog') or (seqname == 'FaceOcc1') \
 50 |                 or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \
 51 |                 or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \
 52 |                 or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman')   :
 53 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt')
 54 |         elif (seqname == 'Freeman4') or (seqname == 'Diving') or (seqname =='Freeman3') or (seqname =='Football1'):
 55 |             gt = np.loadtxt(seq_path + '/groundtruth_rect_revise.txt', delimiter=',')
 56 |         else:
 57 |             gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',')
 58 | 
 59 |         if seqname == 'David':
 60 |             img_list = img_list[300:]
 61 |             # gt = gt[300:,:]
 62 |         if seqname == 'Football1':
 63 |             img_list = img_list[0:73]
 64 |         if seqname == 'Freeman3':
 65 |             img_list = img_list[0:459]
 66 |         if seqname == 'Freeman4':
 67 |             img_list = img_list[0:282]
 68 | 
 69 |     elif set_type=='VOT/2016':
 70 |         img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg'])
 71 |         gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',')
 72 | 
 73 |     elif set_type=='RGBT234':
 74 |         img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg'])
 75 |         gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',')
 76 | 
 77 |     elif set_type=='GTOT50':
 78 |         img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg'])
 79 |         gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',')
 80 | 
 81 |     ##polygon to rect
 82 |     if gt.shape[1] == 8:
 83 |         x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
 84 |         y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
 85 |         x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None]
 86 |         y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None]
 87 |         gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1)
 88 | 
 89 |     return img_list, gt
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | def train_mdnet():
 98 | 
 99 |     ## set image directory
100 |     if pretrain_opts['set_type'] == 'GTOT50': 
101 |         img_home = '/DATA/wangxiao/Multi_Modal_RGBT_dataset_CSR/'
102 |         data_path = '/DATA/wangxiao/dfg-rgbt-RTMDNet-master-train-on-50/50.pkl'
103 | 
104 |     ## Init dataset ##
105 |     with open(data_path, 'rb') as fp:
106 |         data = pickle.load(fp)
107 | 
108 | 
109 |     K = len(data)
110 | 
111 |     ## Init model ##
112 |     model = MDNet(pretrain_opts['init_model_path'], K)
113 |     if pretrain_opts['adaptive_align']:
114 |         align_h = model.roi_align_model.aligned_height
115 |         align_w = model.roi_align_model.aligned_width
116 |         spatial_s = model.roi_align_model.spatial_scale
117 |         model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s)
118 | 
119 |     if pretrain_opts['use_gpu']:
120 |         model = model.cuda()
121 |     model.set_learnable_params(pretrain_opts['ft_layers'])
122 |     model.train()
123 |     
124 |     dataset = [None] * K
125 |     for k, (seqname, seq) in enumerate(data.items()):
126 |         img_list_v = seq['images_v']
127 |         img_list_i = seq['images_i']
128 |         videoPath_v = seq['v_videoPath']
129 |         videoPath_i = seq['i_videoPath']
130 |         # seqName = seq['seqName']
131 |         gt = seq['gt_i'] 
132 | 
133 |         if pretrain_opts['set_type'] == 'GTOT50':
134 |             img_dir = img_home + seqname
135 | 
136 |         dataset[k] = RegionDataset(img_dir, img_list_v, img_list_i, videoPath_v, videoPath_i, gt, model.receptive_field, pretrain_opts)
137 | 
138 | 
139 |     ## Init criterion and optimizer ##
140 |     binaryCriterion = BinaryLoss()
141 |     interDomainCriterion = nn.CrossEntropyLoss()
142 |     evaluator = Precision()
143 |     optimizer = set_optimizer(model, pretrain_opts['lr'])
144 | 
145 |     best_score = 0.
146 |     batch_cur_idx = 0
147 |     for i in range(pretrain_opts['n_cycles']):
148 |         print("==== Start Cycle %d ====" % (i))
149 |         k_list = np.random.permutation(K)
150 |         prec = np.zeros(K)
151 |         totalTripleLoss = np.zeros(K)
152 |         totalInterClassLoss = np.zeros(K)
153 | 
154 |         # pdb.set_trace() 
155 |         # k_list = k_list[:3]
156 |         for j, k in enumerate(k_list):
157 |             tic = time.time()
158 | 
159 |                 
160 |             cropped_scenes_v, cropped_scenes_i, pos_rois, neg_rois= dataset[k].next()
161 | 
162 | 
163 |             for sidx in range(0, len(cropped_scenes_v)):
164 |                 cur_scene_v = cropped_scenes_v[sidx]
165 |                 cur_scene_i = cropped_scenes_i[sidx]
166 |                 cur_pos_rois = pos_rois[sidx]
167 |                 cur_neg_rois = neg_rois[sidx]
168 | 
169 |                 cur_scene_v = Variable(cur_scene_v)
170 |                 cur_scene_i = Variable(cur_scene_i)
171 |                 cur_pos_rois = Variable(cur_pos_rois)
172 |                 cur_neg_rois = Variable(cur_neg_rois)
173 |                 if pretrain_opts['use_gpu']:
174 |                     cur_scene_v = cur_scene_v.cuda()
175 |                     cur_scene_i = cur_scene_i.cuda()
176 |                     cur_pos_rois = cur_pos_rois.cuda()
177 |                     cur_neg_rois = cur_neg_rois.cuda()
178 |                 
179 |                 # pdb.set_trace() 
180 |                 cur_feat_map_v, cur_feat_map_i, augmented_feats = model(cur_scene_v, cur_scene_i, k, out_layer='conv3')
181 |                	
182 |                 cur_pos_feats = model.roi_align_model(augmented_feats, cur_pos_rois)
183 |                 cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1)
184 |                 cur_neg_feats = model.roi_align_model(augmented_feats, cur_neg_rois)
185 |                 cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1)
186 | 
187 |                 # pdb.set_trace() 
188 | 
189 |                 if sidx == 0:
190 |                     pos_feats = [cur_pos_feats]
191 |                     neg_feats = [cur_neg_feats]
192 |                 else:
193 |                     pos_feats.append(cur_pos_feats)
194 |                     neg_feats.append(cur_neg_feats)
195 |                     
196 |             feat_dim = cur_neg_feats.size(1)
197 |             pos_feats = torch.stack(pos_feats,dim=0).view(-1,feat_dim)
198 |             neg_feats = torch.stack(neg_feats,dim=0).view(-1,feat_dim)
199 | 
200 | 
201 |             pos_score = model(pos_feats, pos_feats, k, in_layer='fc4')
202 |             neg_score = model(neg_feats, neg_feats, k, in_layer='fc4')
203 | 
204 |             cls_loss = binaryCriterion(pos_score, neg_score)
205 | 
206 |             ## inter frame classification
207 | 
208 |             interclass_label = Variable(torch.zeros((pos_score.size(0))).long())
209 |             if opts['use_gpu']:
210 |                 interclass_label = interclass_label.cuda()
211 |             total_interclass_score = pos_score[:,1].contiguous()
212 |             total_interclass_score = total_interclass_score.view((pos_score.size(0),1))
213 | 
214 |             K_perm = np.random.permutation(K)
215 |             K_perm = K_perm[0:100]
216 |             for cidx in K_perm:
217 |                 if k == cidx:
218 |                     continue
219 |                 else:
220 |                     interclass_score = model(pos_feats, pos_feats, cidx, in_layer='fc4')
221 |                     total_interclass_score = torch.cat((total_interclass_score,interclass_score[:,1].contiguous().view((interclass_score.size(0),1))),dim=1)
222 | 
223 |             interclass_loss = interDomainCriterion(total_interclass_score, interclass_label)
224 |             totalInterClassLoss[k] = interclass_loss.item()
225 | 
226 |             (cls_loss+0.1*interclass_loss).backward()
227 | 
228 |             batch_cur_idx+=1
229 |             if (batch_cur_idx%pretrain_opts['seqbatch_size'])==0:
230 |                 torch.nn.utils.clip_grad_norm(model.parameters(), pretrain_opts['grad_clip'])
231 |                 optimizer.step()
232 |                 model.zero_grad()
233 |                 batch_cur_idx = 0
234 | 
235 |             ## evaulator
236 |             prec[k] = evaluator(pos_score, neg_score)
237 |             ## computation latency
238 |             toc = time.time() - tic
239 | 
240 |             print("Cycle %2d, K %2d (%2d), BinLoss %.3f, Prec %.3f, interLoss %.3f, Time %.3f" % \
241 |                       (i, j, k, cls_loss.item(), prec[k], totalInterClassLoss[k], toc))
242 | 
243 |         cur_score = prec.mean()
244 |         try:
245 |             total_miou = sum(total_iou)/len(total_iou)
246 |         except:
247 |             total_miou = 0.
248 |         
249 |         print("Mean Precision: %.3f Inter Loss: %.3f IoU: %.3f" % (prec.mean(), totalInterClassLoss.mean(),total_miou))
250 |         
251 |         if cur_score > best_score:
252 |             best_score = cur_score
253 |             if pretrain_opts['use_gpu']:
254 |                 model = model.cpu()
255 |             states = {'shared_layers': model.layers.state_dict()}
256 |             print("Save model to %s" % pretrain_opts['model_path'])
257 |             torch.save(states, pretrain_opts['model_path'])
258 |             # torch.save(states, '/home/wangxiao/Downloads/ACM-MM-GML_RGBT_tracking/rgbt-RTMDNet-master/models/rgbt_rtmdnet.pth') 
259 |             if pretrain_opts['use_gpu']:
260 |                 model = model.cuda()
261 | 
262 | 
263 | if __name__ == "__main__":
264 | 
265 |     parser = argparse.ArgumentParser()
266 |     parser.add_argument("-set_type", default = 'GTOT50' )
267 |     parser.add_argument("-padding_ratio", default = 5., type =float)
268 |     parser.add_argument("-model_path", default ="./models/CBAM_dfg_rtmdnet_trained_on_50.pth", help = "model path")
269 |     parser.add_argument("-frame_interval", default = 1, type=int, help="frame interval in batch. ex) interval=1 -> [1 2 3 4 5], interval=2 ->[1 3 5]")
270 |     parser.add_argument("-init_model_path", default="./models/imagenet-vgg-m.mat")
271 |     parser.add_argument("-batch_frames", default = 8, type = int)
272 |     parser.add_argument("-lr", default=0.0001, type = float)
273 |     parser.add_argument("-batch_pos",default = 64, type = int)
274 |     parser.add_argument("-batch_neg", default = 196, type = int)
275 |     parser.add_argument("-n_cycles", default = 1000, type = int )
276 |     parser.add_argument("-adaptive_align", default = True, action = 'store_false')
277 |     parser.add_argument("-seqbatch_size", default=50, type=int)
278 | 
279 |     args = parser.parse_args()
280 | 
281 |     ##################################################################################
282 |     #########################Just modify opts in this script.#########################
283 |     ######################Becuase of synchronization of options#######################
284 |     ##################################################################################
285 |     ##option setting
286 |     pretrain_opts['set_type'] = args.set_type
287 |     pretrain_opts['padding_ratio']=args.padding_ratio
288 |     pretrain_opts['padded_img_size']=pretrain_opts['img_size']*int(pretrain_opts['padding_ratio'])
289 |     pretrain_opts['model_path']=args.model_path
290 |     pretrain_opts['frame_interval'] = args.frame_interval
291 |     pretrain_opts['init_model_path'] = args.init_model_path
292 |     pretrain_opts['batch_frames'] = args.batch_frames
293 |     pretrain_opts['lr'] = args.lr
294 |     pretrain_opts['batch_pos'] = args.batch_pos  # original = 64
295 |     pretrain_opts['batch_neg'] = args.batch_neg  # original = 192
296 |     pretrain_opts['n_cycles'] = args.n_cycles
297 |     pretrain_opts['adaptive_align']=args.adaptive_align
298 |     pretrain_opts['seqbatch_size'] = args.seqbatch_size
299 |     ##################################################################################
300 |     ############################Do not modify opts anymore.###########################
301 |     ######################Becuase of synchronization of options#######################
302 |     ##################################################################################
303 | 
304 |     print(pretrain_opts)
305 |     train_mdnet()
306 | 
307 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MFGNet_RGBT_Tracking_PyTorch
 2 | Official Implementation of MFGNet-RGBT-Tracker ("Dynamic Modality-Aware Filter Generation for RGB-T Tracking") with PyTorch  [[Project](https://sites.google.com/view/mfgrgbttrack/)]   [[Paper]()] 
 3 | 
 4 | 
 5 | 
 6 | Many RGB-T trackers attempt to attain robust feature representation by utilizing an adaptive weighting scheme (or attention mechanism). Different from these works, we propose a new dynamic modality-aware filter generation module (named MFGNet) to boost the message communication between visible and thermal data by adaptively adjusting the convolutional kernels for various input images in practical tracking. Our experimental results demonstrate the advantages of our proposed MFGNet for RGB-T tracking. 
 7 | 
 8 | 
 9 | 
10 | 
11 | ![rgbt_car10](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/pipelinev5.png) 
12 | 
13 | 
14 | 
15 | ## Demo:
16 | (Red: Ours, Blue: Ground Truth, Green: RT-MDNet)  
17 | 
18 | ![rgbt_car10](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_car10.gif) 
19 | 
20 | ![rgbt_balancebike](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_balancebike.gif) 
21 | 
22 | ![rgbt_flower1](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_flower1.gif)
23 | 
24 | ![rgbt_kite4](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_kite4.gif)
25 | 
26 | 
27 | ## Install: 
28 | This code is developed based on Python 3.7, PyTorch 1.0, CUDA 10.1, Ubuntu 16.04, Tesla P100 * 4. Install anything it warnings. 
29 | 
30 | RoI align module needs to compile first: 
31 | 
32 | CUDA_HOME=/usr/local/cuda-10.1 python setup.py build_ext --inplace 
33 | 
34 | 
35 | 
36 | ## Train and Test: 
37 | 1. generate the "50.pkl" with prepro_rgbt.py as the training data; 
38 | 
39 | 2. train the tracker with train.py; 
40 | 
41 | 3. train the rgbt_TANet with train_rgbtTANet.py; 
42 | 
43 | 4. Obtain the attention maps and run the test.py for rgbt-tracking. 
44 | 
45 | 
46 | 
47 | ## Results: 
48 | 
49 | ![rgbt_kite4](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/results_on_rgbt210_234.png)
50 | 
51 | ![rgbt_kite4](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/ComponentAnalysis.png)
52 | 
53 | you can also download our pre-trained models and raw results for comprison: [[Pretrained Models]()]  [[Raw Results]()] 
54 | 
55 | 
56 | 
57 | ## Acknowledgement: 
58 | * https://github.com/BossBobxuan/RT-MDNet 
59 | * https://github.com/NieXC/pytorch-mula 
60 | * https://github.com/luuuyi/CBAM.PyTorch 
61 | 
62 | 
63 | 
64 | 
65 | ## Citation: 
66 | If you use this code for your research, please cite the following paper: 
67 | ~~~
68 | @article{wang2020dfgrgbttrack,
69 |   title={Dynamic Modality-Aware Filter Generation for RGB-T Tracking},
70 |   author={Xiao Wang, Xiujun Shu, Shiliang Zhang, Bo Jiang, Yaowei Wang, Yonghong Tian, Feng Wu},
71 |   journal={arXiv preprint},
72 |   year={2020}
73 | }
74 | ~~~
75 | 
76 | If you have any questions, feel free to contact me via email: wangx03@pcl.ac.cn 
77 | 
78 | 
79 | 
80 | 
81 | 


--------------------------------------------------------------------------------
/environments.txt:
--------------------------------------------------------------------------------
 1 | Package         Version
 2 | --------------- -------------------
 3 | certifi         2019.9.11
 4 | cffi            1.13.1
 5 | cycler          0.10.0
 6 | decorator       4.4.1
 7 | imageio         2.6.1
 8 | joblib          0.14.0
 9 | kiwisolver      1.1.0
10 | matplotlib      3.1.1
11 | mkl-fft         1.0.14
12 | mkl-random      1.1.0
13 | mkl-service     2.3.0
14 | networkx        2.4
15 | numpy           1.17.3
16 | olefile         0.46
17 | opencv-python   4.1.1.26
18 | Pillow          6.2.0
19 | pip             19.3.1
20 | pycparser       2.19
21 | pyparsing       2.4.2
22 | python-dateutil 2.8.0
23 | PyWavelets      1.1.1
24 | scikit-image    0.16.2
25 | scikit-learn    0.21.3
26 | scipy           1.1.0
27 | setuptools      41.6.0.post20191030
28 | six             1.12.0
29 | sklearn         0.0
30 | torch           1.0.1
31 | torchvision     0.4.1a0+d94043a
32 | tqdm            4.46.0
33 | wheel           0.33.6
34 | 


--------------------------------------------------------------------------------
/pipelinev5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/pipelinev5.png


--------------------------------------------------------------------------------
/results_on_rgbt210_234.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/results_on_rgbt210_234.png


--------------------------------------------------------------------------------
/rgbt_balancebike.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_balancebike.gif


--------------------------------------------------------------------------------
/rgbt_car10.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_car10.gif


--------------------------------------------------------------------------------
/rgbt_flower1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_flower1.gif


--------------------------------------------------------------------------------
/rgbt_kite4.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_kite4.gif


--------------------------------------------------------------------------------