├── ComponentAnalysis.png ├── MFGNet-rgbt-tracking-master ├── 210_test1.py ├── 234.pkl ├── 234.txt ├── 50.pkl ├── 50.txt ├── __init__.py ├── daTANet_module │ ├── __init__.py │ ├── __pycache__ │ │ ├── generator.cpython-37.pyc │ │ ├── ops.cpython-37.pyc │ │ ├── resnet.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── generator.py │ ├── ops.py │ ├── resnet.py │ ├── testing.py │ ├── testing_234.py │ ├── train.py │ └── utils.py ├── models │ └── readme.txt ├── modules │ ├── bbreg.py │ ├── bbreg.pyc │ ├── data_prov.py │ ├── data_prov.pyc │ ├── img_cropper.py │ ├── img_cropper.pyc │ ├── model.py │ ├── model.pyc │ ├── prepro_data.py │ ├── prepro_data_imagenet.py │ ├── pretrain_options.py │ ├── pretrain_options.pyc │ └── roi_align │ │ ├── build │ │ ├── lib.linux-x86_64-3.7 │ │ │ └── roi_align_cuda.cpython-37m-x86_64-linux-gnu.so │ │ └── temp.linux-x86_64-3.7 │ │ │ └── src │ │ │ ├── roi_align_cuda.o │ │ │ └── roi_align_kernel_c.o │ │ └── functions │ │ ├── roi_align.py │ │ └── roi_align.pyc ├── test_234_dataset.py ├── tracker.py ├── tracker_backup.py └── train.py ├── README.md ├── environments.txt ├── pipelinev5.png ├── results_on_rgbt210_234.png ├── rgbt_balancebike.gif ├── rgbt_car10.gif ├── rgbt_flower1.gif └── rgbt_kite4.gif /ComponentAnalysis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/ComponentAnalysis.png -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/210_test1.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 3 | from os.path import join, isdir 4 | from tracker import * 5 | import numpy as np 6 | import argparse 7 | import pickle 8 | import math 9 | import pdb 10 | import torchvision.transforms as transforms 11 | import random 12 | import warnings 13 | warnings.filterwarnings("ignore") 14 | 15 | 16 | 17 | def genConfig(seq_path, set_type): 18 | 19 | path, seqname = os.path.split(seq_path) 20 | 21 | if set_type == 'OTB100': 22 | img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.png']) 23 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 24 | 25 | ##################################################################### 26 | ##### For the RGBT dataset 27 | ##################################################################### 28 | elif set_type == 'dataset234': 29 | img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg']) 30 | img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg']) 31 | gt = np.loadtxt(seq_path + '/visible.txt', delimiter=',') 32 | 33 | elif set_type == 'dataset210': 34 | img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg']) 35 | img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg']) 36 | gt = np.loadtxt(seq_path + '/init.txt', delimiter=',') 37 | 38 | 39 | return img_list_v, img_list_i, gt 40 | 41 | 42 | 43 | 44 | if __name__ == "__main__": 45 | 46 | parser = argparse.ArgumentParser() 47 | parser.add_argument("-set_type", default = 'dataset210') 48 | parser.add_argument("-model_path", default = './models/test_CBAM_dfg_rtmdnet_trained_on_50.pth') 49 | parser.add_argument("-result_path", default = './result.npy') 50 | parser.add_argument("-visual_log",default=False, action= 'store_true') 51 | parser.add_argument("-visualize",default=False, action='store_true') 52 | parser.add_argument("-adaptive_align",default=True, action='store_false') 53 | parser.add_argument("-padding",default=1.2, type = float) 54 | parser.add_argument("-jitter",default=True, action='store_false') 55 | 56 | args = parser.parse_args() 57 | 58 | ################################################################################## 59 | #########################Just modify opts in this script.######################### 60 | ######################Becuase of synchronization of options####################### 61 | ################################################################################## 62 | ## option setting 63 | opts['model_path']=args.model_path 64 | opts['result_path']=args.result_path 65 | opts['visual_log']=args.visual_log 66 | opts['set_type']=args.set_type 67 | opts['visualize'] = args.visualize 68 | opts['adaptive_align'] = args.adaptive_align 69 | opts['padding'] = args.padding 70 | opts['jitter'] = args.jitter 71 | ################################################################################## 72 | ############################Do not modify opts anymore.########################### 73 | ######################Becuase of synchronization of options####################### 74 | ################################################################################## 75 | print(opts) 76 | 77 | 78 | ## path initialization 79 | dataset_path = '/disc2/naipeng.ye/wangxiao/acm_mm2020_experiments/' 80 | result_home = '/disc2/naipeng.ye/wangxiao/acm_mm2020_experiments/daTANet-cbam-dfg-rgbt-RTMDNet-master-train-on-50/trackingResults_rgbt210/' 81 | 82 | seq_home = dataset_path + opts['set_type'] 83 | seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))] 84 | seq_list = np.sort(seq_list) 85 | 86 | iou_list=[] 87 | fps_list=dict() 88 | bb_result = dict() 89 | result = dict() 90 | 91 | iou_list_nobb=[] 92 | bb_result_nobb = dict() 93 | for num, seq in enumerate(seq_list): 94 | 95 | if num<-1: 96 | continue 97 | 98 | already_done = os.listdir(result_home) 99 | 100 | if seq+"_rgbt210-daTANet-cbam-dfg-v1.txt" in already_done: 101 | print("==>> Skip this video: ", seq) 102 | else: 103 | txtName = seq + '_rgbt210-daTANet-cbam-dfg-v1.txt' 104 | fid = open(result_home + txtName, 'w') 105 | 106 | seq_path = seq_home + '/' + seq 107 | img_list_v, img_list_i, gt = genConfig(seq_path, opts['set_type']) 108 | 109 | iou_result, result_bb, fps, result_nobb = run_mdnet(img_list_v, img_list_i, gt[0], gt, seq = seq, display=opts['visualize']) 110 | 111 | enable_frameNum = 0. 112 | for iidx in range(len(iou_result)): 113 | if (math.isnan(iou_result[iidx])==False): 114 | enable_frameNum += 1. 115 | else: 116 | ## gt is not alowed 117 | iou_result[iidx] = 0. 118 | 119 | iou_list.append(iou_result.sum()/enable_frameNum) 120 | bb_result[seq] = result_bb 121 | fps_list[seq]=fps 122 | 123 | bb_result_nobb[seq] = result_nobb 124 | print('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list))) 125 | 126 | 127 | for iidex in range(len(result_bb)): 128 | line = result_bb[iidex] 129 | 130 | # pdb.set_trace() 131 | fid.write(str(line[0])) 132 | fid.write(',') 133 | fid.write(str(line[1])) 134 | fid.write(',') 135 | fid.write(str(line[2])) 136 | fid.write(',') 137 | fid.write(str(line[3])) 138 | fid.write('\n') 139 | fid.close() 140 | 141 | 142 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/234.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/234.pkl -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/234.txt: -------------------------------------------------------------------------------- 1 | afterrain 2 | aftertree 3 | baby 4 | baginhand 5 | baketballwaliking 6 | balancebike 7 | basketball2 8 | bicyclecity 9 | bike 10 | bikeman 11 | bikemove1 12 | biketwo 13 | blackwoman 14 | bluebike 15 | blueCar 16 | boundaryandfast 17 | bus6 18 | call 19 | car 20 | car10 21 | car3 22 | car37 23 | car4 24 | car41 25 | car66 26 | caraftertree 27 | carLight 28 | carnotfar 29 | carnotmove 30 | carred 31 | child 32 | child1 33 | child3 34 | child4 35 | children2 36 | children3 37 | children4 38 | crossroad 39 | crouch 40 | cycle2 41 | cycle3 42 | cycle4 43 | cycle5 44 | diamond 45 | dog 46 | dog1 47 | dog10 48 | dog11 49 | elecbike 50 | elecbike10 51 | elecbike2 52 | elecbike3 53 | elecbikechange2 54 | elecbikeinfrontcar 55 | elecbikewithhat 56 | elecbikewithlight 57 | elecbikewithlight1 58 | face1 59 | flower1 60 | flower2 61 | fog 62 | fog6 63 | glass 64 | glass2 65 | graycar2 66 | green 67 | greentruck 68 | greyman 69 | greywoman 70 | guidepost 71 | hotglass 72 | hotkettle 73 | inglassandmobile 74 | jump 75 | kettle 76 | kite2 77 | kite4 78 | luggage 79 | man22 80 | man23 81 | man24 82 | man26 83 | man28 84 | man29 85 | man3 86 | man4 87 | man45 88 | man5 89 | man55 90 | man68 91 | man69 92 | man7 93 | man8 94 | man88 95 | man9 96 | manafterrain 97 | mancross 98 | car20 99 | cycle1 100 | floor-1 101 | man2 102 | mancross1 103 | manypeople 104 | redcar 105 | threeman 106 | twoelecbike1 107 | walkingwithbag2 108 | mancrossandup 109 | mandrivecar 110 | manfaraway 111 | maninblack 112 | maninglass 113 | maningreen2 114 | maninred 115 | manlight 116 | manoccpart 117 | manonboundary 118 | manonelecbike 119 | manontricycle 120 | manout2 121 | manup 122 | manwithbag 123 | manwithbag4 124 | manwithbasketball 125 | manwithluggage 126 | manwithumbrella 127 | manypeople1 128 | manypeople2 129 | mobile 130 | night2 131 | nightcar 132 | nightrun 133 | nightthreepeople 134 | notmove 135 | oldman 136 | oldman2 137 | oldwoman 138 | orangeman1 139 | people 140 | people1 141 | people3 142 | playsoccer 143 | push 144 | rainingwaliking 145 | raningcar 146 | redbag 147 | redcar2 148 | redmanchange 149 | rmo 150 | run 151 | run1 152 | run2 153 | scooter 154 | shake 155 | shoeslight 156 | single1 157 | single3 158 | soccer 159 | soccer2 160 | soccerinhand 161 | straw 162 | stroller 163 | supbus 164 | supbus2 165 | takeout 166 | tallman 167 | threeman2 168 | threepeople 169 | threewoman2 170 | together 171 | toy1 172 | toy3 173 | toy4 174 | tree2 175 | tree3 176 | tree5 177 | trees 178 | tricycle 179 | tricycle1 180 | tricycle2 181 | tricycle6 182 | tricycle9 183 | tricyclefaraway 184 | tricycletwo 185 | twoelecbike 186 | twoman 187 | twoman1 188 | twoman2 189 | twoperson 190 | twowoman 191 | twowoman1 192 | walking40 193 | walking41 194 | walkingman 195 | walkingman1 196 | walkingman12 197 | walkingman20 198 | walkingman41 199 | walkingmantiny 200 | walkingnight 201 | walkingtogether 202 | walkingtogether1 203 | walkingtogetherright 204 | walkingwithbag1 205 | walkingwoman 206 | whitebag 207 | whitecar 208 | whitecar3 209 | whitecar4 210 | whitecarafterrain 211 | whiteman1 212 | whitesuv 213 | woamn46 214 | woamnwithbike 215 | woman 216 | woman1 217 | woman100 218 | woman2 219 | woman3 220 | woman4 221 | woman48 222 | woman6 223 | woman89 224 | woman96 225 | woman99 226 | womancross 227 | womanfaraway 228 | womaninblackwithbike 229 | womanleft 230 | womanpink 231 | womanred 232 | womanrun 233 | womanwithbag6 234 | yellowcar 235 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/50.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/50.pkl -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/50.txt: -------------------------------------------------------------------------------- 1 | Minibus1 2 | BlackCar 3 | BlackSwan1 4 | BlueCar 5 | BusScale 6 | BusScale1 7 | carNig 8 | Crossing 9 | crowdNig 10 | Cycling 11 | DarkNig 12 | Exposure2 13 | Exposure4 14 | fastCar2 15 | FastCarNig 16 | FastMotor 17 | FastMotorNig 18 | Football 19 | GarageHover 20 | Gathering 21 | GoTogether 22 | Jogging 23 | LightOcc 24 | Minibus 25 | MinibusNig 26 | MinibusNigOcc 27 | Motorbike 28 | Motorbike1 29 | MotorNig 30 | occBike 31 | OccCar-1 32 | OccCar-2 33 | Otcbvs 34 | Otcbvs1 35 | Pool 36 | Quarreling 37 | RainyCar1 38 | RainyCar2 39 | RainyMotor1 40 | RainyMotor2 41 | RainyPeople 42 | Running 43 | Torabi 44 | Torabi1 45 | Tricycle 46 | tunnel 47 | Walking 48 | WalkingNig 49 | WalkingNig1 50 | WalkingOcc 51 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/__init__.py: -------------------------------------------------------------------------------- 1 | #### -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .resnet18_vggm import * 3 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/generator.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/generator.cpython-37.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/ops.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/ops.cpython-37.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/resnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/resnet.cpython-37.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/daTANet_module/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | import torchvision.models as models 5 | # import torchvision.ops as torchops 6 | 7 | import math 8 | from torch.autograd import Variable 9 | from ops import * 10 | import pdb 11 | 12 | from torch.nn.parameter import Parameter 13 | import torch.nn.functional as F 14 | from torch.nn.modules.utils import _single, _pair, _triple 15 | 16 | from resnet import resnet18 17 | import numpy as np 18 | import cv2 19 | import pdb 20 | 21 | 22 | def make_conv_layers(cfg): 23 | layers = [] 24 | in_channels = 3 25 | for v in cfg: 26 | if v == 'M': 27 | layers += [maxpool2d()] 28 | else: 29 | conv = conv2d(in_channels, v) 30 | layers += [conv, relu(inplace=True)] 31 | in_channels = v 32 | return nn.Sequential(*layers) 33 | 34 | 35 | def make_deconv_layers(cfg): 36 | layers = [] 37 | in_channels = 4115 38 | for v in cfg: 39 | if v == 'U': 40 | layers += [nn.Upsample(scale_factor=2)] 41 | else: 42 | deconv = deconv2d(in_channels, v) 43 | layers += [deconv] 44 | in_channels = v 45 | return nn.Sequential(*layers) 46 | 47 | 48 | cfg = { 49 | 'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512], 50 | 'D': [512, 512, 512, 'U', 512, 512, 512, 'U', 256, 256, 256, 'U', 128, 128, 'U', 64, 64] 51 | } 52 | 53 | class _ConvNd(nn.Module): 54 | 55 | def __init__(self, in_channels, out_channels, kernel_size, stride, 56 | padding, dilation, transposed, output_padding, groups, bias): 57 | super(_ConvNd, self).__init__() 58 | if in_channels % groups != 0: 59 | raise ValueError('in_channels must be divisible by groups') 60 | if out_channels % groups != 0: 61 | raise ValueError('out_channels must be divisible by groups') 62 | self.in_channels = in_channels 63 | self.out_channels = out_channels 64 | self.kernel_size = kernel_size 65 | self.stride = stride 66 | self.padding = padding 67 | self.dilation = dilation 68 | self.transposed = transposed 69 | self.output_padding = output_padding 70 | self.groups = groups 71 | 72 | if bias: 73 | self.bias = Parameter(torch.Tensor(out_channels)) 74 | else: 75 | self.register_parameter('bias', None) 76 | self.reset_parameters() 77 | 78 | 79 | def reset_parameters(self): 80 | n = self.in_channels 81 | for k in self.kernel_size: 82 | n *= k 83 | stdv = 1. / math.sqrt(n) 84 | if self.bias is not None: 85 | self.bias.data.uniform_(-stdv, stdv) 86 | 87 | def __repr__(self): 88 | s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}' 89 | ', stride={stride}') 90 | if self.padding != (0,) * len(self.padding): 91 | s += ', padding={padding}' 92 | if self.dilation != (1,) * len(self.dilation): 93 | s += ', dilation={dilation}' 94 | if self.output_padding != (0,) * len(self.output_padding): 95 | s += ', output_padding={output_padding}' 96 | if self.groups != 1: 97 | s += ', groups={groups}' 98 | if self.bias is None: 99 | s += ', bias=False' 100 | s += ')' 101 | return s.format(name=self.__class__.__name__, **self.__dict__) 102 | 103 | 104 | class AdaptiveConv2d(_ConvNd): 105 | 106 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 107 | padding=0, dilation=1, groups=1, bias=True): 108 | kernel_size = _pair(kernel_size) 109 | stride = _pair(stride) 110 | padding = _pair(padding) 111 | dilation = _pair(dilation) 112 | super(AdaptiveConv2d, self).__init__( 113 | in_channels, out_channels, kernel_size, stride, padding, dilation, 114 | False, _pair(0), groups, bias) 115 | 116 | def forward(self, input, dynamic_weight): 117 | # Get batch num 118 | batch_num = input.size(0) 119 | 120 | # Reshape input tensor from size (N, C, H, W) to (1, N*C, H, W) 121 | input = input.view(1, -1, input.size(2), input.size(3)) 122 | 123 | # Reshape dynamic_weight tensor from size (N, C, H, W) to (1, N*C, H, W) 124 | dynamic_weight = dynamic_weight.view(-1, 1, dynamic_weight.size(2), dynamic_weight.size(3)) 125 | 126 | # Do convolution 127 | conv_rlt = F.conv2d(input, dynamic_weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 128 | 129 | # Reshape conv_rlt tensor from (1, N*C, H, W) to (N, C, H, W) 130 | conv_rlt = conv_rlt.view(batch_num, -1, conv_rlt.size(2), conv_rlt.size(3)) 131 | 132 | return conv_rlt 133 | 134 | 135 | def encoder(): 136 | return make_conv_layers(cfg['E']) 137 | 138 | def decoder(): 139 | return make_deconv_layers(cfg['D']) 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | ############################################################################# 148 | #### naive RGBT generator 149 | ############################################################################# 150 | 151 | class naive_Generator(nn.Module): 152 | def __init__(self): 153 | super(naive_Generator, self).__init__() 154 | self.encoder = resnet18() 155 | self.decoder = decoder() 156 | self.mymodules = nn.ModuleList([deconv2d(64, 1, kernel_size=1, padding = 0), nn.Sigmoid()]) 157 | 158 | 159 | def forward(self, tarObject, gray_tarObject, batch_imgClip, batch_grayClip): 160 | 161 | _, x_2, x_3 = self.encoder(tarObject) 162 | _, gray_x_2, gray_x_3 = self.encoder(gray_tarObject) 163 | 164 | _, frame1_feat2_v, frame1_feat3_v = self.encoder(batch_imgClip[0]) 165 | _, frame2_feat2_v, frame2_feat3_v = self.encoder(batch_imgClip[1]) 166 | _, frame3_feat2_v, frame3_feat3_v = self.encoder(batch_imgClip[2]) 167 | 168 | _, frame1_feat2_i, frame1_feat3_i = self.encoder(batch_grayClip[0]) 169 | _, frame2_feat2_i, frame2_feat3_i = self.encoder(batch_grayClip[1]) 170 | _, frame3_feat2_i, frame3_feat3_i = self.encoder(batch_grayClip[2]) 171 | 172 | 173 | x_3 = nn.functional.interpolate(x_3, size=[x_2.shape[2], x_2.shape[3]]) 174 | target_feats_v = torch.cat((x_2, x_3), dim=1) 175 | gray_x_3 = nn.functional.interpolate(gray_x_3, size=[gray_x_2.shape[2], gray_x_2.shape[3]]) 176 | target_feats_i = torch.cat((gray_x_2, gray_x_3), dim=1) 177 | target_feats = target_feats_v + target_feats_i 178 | 179 | 180 | frame1_feat3_v = nn.functional.interpolate(frame1_feat3_v, size=[frame1_feat2_v.shape[2], frame1_feat2_v.shape[3]]) 181 | frame1_feats_v = torch.cat((frame1_feat2_v, frame1_feat3_v), dim=1) 182 | frame1_feat3_i = nn.functional.interpolate(frame1_feat3_i, size=[frame1_feat2_i.shape[2], frame1_feat2_i.shape[3]]) 183 | frame1_feats_i = torch.cat((frame1_feat2_i, frame1_feat3_i), dim=1) 184 | frame1_feats = frame1_feats_v + frame1_feats_i 185 | 186 | frame2_feat3_v = nn.functional.interpolate(frame2_feat3_v, size=[frame2_feat2_v.shape[2], frame2_feat2_v.shape[3]]) 187 | frame2_feats_v = torch.cat((frame2_feat2_v, frame2_feat3_v), dim=1) 188 | frame2_feat3_i = nn.functional.interpolate(frame2_feat3_i, size=[frame2_feat2_i.shape[2], frame2_feat2_i.shape[3]]) 189 | frame2_feats_i = torch.cat((frame2_feat2_i, frame2_feat3_i), dim=1) 190 | frame2_feats = frame2_feats_v + frame2_feats_i 191 | 192 | 193 | frame3_feat3_v = nn.functional.interpolate(frame3_feat3_v, size=[frame3_feat2_v.shape[2], frame3_feat2_v.shape[3]]) 194 | frame3_feats_v = torch.cat((frame3_feat2_v, frame3_feat3_v), dim=1) 195 | frame3_feat3_i = nn.functional.interpolate(frame3_feat3_i, size=[frame3_feat2_i.shape[2], frame3_feat2_i.shape[3]]) 196 | frame3_feats_i = torch.cat((frame3_feat2_i, frame3_feat3_i), dim=1) 197 | frame3_feats = frame3_feats_v + frame3_feats_i 198 | 199 | ##### 200 | feat_temp1 = torch.cat((target_feats, frame1_feats), dim=1) 201 | feat_temp2 = torch.cat((frame2_feats, frame3_feats), dim=1) 202 | feat_final = torch.cat((feat_temp1, feat_temp2), dim=1) 203 | #### feat_final: torch.Size([3, 3072, 19, 19]) 204 | 205 | # pdb.set_trace() 206 | output = self.decoder(feat_final) 207 | output = self.mymodules[0](output) 208 | output = self.mymodules[1](output) 209 | 210 | return output 211 | 212 | 213 | class Recurrent_net(nn.Module): 214 | def __init__(self, size, in_channel, out_channel): 215 | super(Recurrent_net, self).__init__() 216 | self.size = size 217 | self.in_channel = in_channel 218 | self.out_channel = out_channel 219 | self.vertical = nn.LSTM(input_size=in_channel, hidden_size=256, batch_first=True, bidirectional=True) # each row 220 | self.horizontal = nn.LSTM(input_size=512, hidden_size=256, batch_first=True, bidirectional=True) # each column 221 | self.conv = nn.Conv2d(512, out_channel, 1) 222 | 223 | def forward(self, *input): 224 | x = input[0] 225 | temp = [] 226 | x = torch.transpose(x, 1, 3) # batch, width, height, in_channel 227 | for i in range(self.size): 228 | h, _ = self.vertical(x[:, :, i, :]) 229 | temp.append(h) # batch, width, 512 230 | x = torch.stack(temp, dim=2) # batch, width, height, 512 231 | temp = [] 232 | for i in range(self.size): 233 | h, _ = self.horizontal(x[:, i, :, :]) 234 | temp.append(h) # batch, width, 512 235 | x = torch.stack(temp, dim=3) # batch, height, 512, width 236 | x = torch.transpose(x, 1, 2) # batch, 512, height, width 237 | x = self.conv(x) 238 | return x 239 | 240 | 241 | ############################################################################# 242 | #### Direction-aware RGBT Target-aware Attention Module 243 | ############################################################################# 244 | 245 | class daGenerator(nn.Module): 246 | def __init__(self): 247 | super(daGenerator, self).__init__() 248 | self.encoder = resnet18() 249 | self.decoder = decoder() 250 | self.mymodules = nn.ModuleList([deconv2d(64, 1, kernel_size=1, padding = 0), nn.Sigmoid()]) 251 | 252 | self.conv1x1_1 = nn.Conv2d(3072, 1024, kernel_size = 1, stride =1, padding=0, bias=False) 253 | self.conv1x1_2 = nn.Conv2d(3072, 19, kernel_size = 1, stride =1, padding=0, bias=False) 254 | 255 | self.spatial_renet = Recurrent_net(19, 1024, 1024) 256 | self.temporal_renet = Recurrent_net(19, 19, 19) 257 | 258 | def forward(self, tarObject, gray_tarObject, batch_imgClip, batch_grayClip): 259 | 260 | _, x_2, x_3 = self.encoder(tarObject) 261 | _, gray_x_2, gray_x_3 = self.encoder(gray_tarObject) 262 | 263 | ## batch_imgClip: torch.Size([10, 3, 3, 300, 300]) 264 | _, frame1_feat2_v, frame1_feat3_v = self.encoder(batch_imgClip[:, 0]) ## torch.Size([10, 256, 19, 19]) 265 | _, frame2_feat2_v, frame2_feat3_v = self.encoder(batch_imgClip[:, 1]) 266 | _, frame3_feat2_v, frame3_feat3_v = self.encoder(batch_imgClip[:, 2]) 267 | 268 | _, frame1_feat2_i, frame1_feat3_i = self.encoder(batch_grayClip[:, 0]) 269 | _, frame2_feat2_i, frame2_feat3_i = self.encoder(batch_grayClip[:, 1]) 270 | _, frame3_feat2_i, frame3_feat3_i = self.encoder(batch_grayClip[:, 2]) 271 | 272 | x_3 = nn.functional.interpolate(x_3, size=[x_2.shape[2], x_2.shape[3]]) 273 | target_feats_v = torch.cat((x_2, x_3), dim=1) 274 | gray_x_3 = nn.functional.interpolate(gray_x_3, size=[gray_x_2.shape[2], gray_x_2.shape[3]]) 275 | target_feats_i = torch.cat((gray_x_2, gray_x_3), dim=1) 276 | target_feats = target_feats_v + target_feats_i 277 | 278 | 279 | frame1_feat3_v = nn.functional.interpolate(frame1_feat3_v, size=[frame1_feat2_v.shape[2], frame1_feat2_v.shape[3]]) 280 | frame1_feats_v = torch.cat((frame1_feat2_v, frame1_feat3_v), dim=1) 281 | frame1_feat3_i = nn.functional.interpolate(frame1_feat3_i, size=[frame1_feat2_i.shape[2], frame1_feat2_i.shape[3]]) 282 | frame1_feats_i = torch.cat((frame1_feat2_i, frame1_feat3_i), dim=1) 283 | frame1_feats = frame1_feats_v + frame1_feats_i 284 | 285 | frame2_feat3_v = nn.functional.interpolate(frame2_feat3_v, size=[frame2_feat2_v.shape[2], frame2_feat2_v.shape[3]]) 286 | frame2_feats_v = torch.cat((frame2_feat2_v, frame2_feat3_v), dim=1) 287 | frame2_feat3_i = nn.functional.interpolate(frame2_feat3_i, size=[frame2_feat2_i.shape[2], frame2_feat2_i.shape[3]]) 288 | frame2_feats_i = torch.cat((frame2_feat2_i, frame2_feat3_i), dim=1) 289 | frame2_feats = frame2_feats_v + frame2_feats_i 290 | 291 | 292 | frame3_feat3_v = nn.functional.interpolate(frame3_feat3_v, size=[frame3_feat2_v.shape[2], frame3_feat2_v.shape[3]]) 293 | frame3_feats_v = torch.cat((frame3_feat2_v, frame3_feat3_v), dim=1) 294 | frame3_feat3_i = nn.functional.interpolate(frame3_feat3_i, size=[frame3_feat2_i.shape[2], frame3_feat2_i.shape[3]]) 295 | frame3_feats_i = torch.cat((frame3_feat2_i, frame3_feat3_i), dim=1) 296 | frame3_feats = frame3_feats_v + frame3_feats_i 297 | 298 | ##### 299 | feat_temp1 = torch.cat((target_feats, frame1_feats), dim=1) 300 | feat_temp2 = torch.cat((frame2_feats, frame3_feats), dim=1) 301 | feat_final = torch.cat((feat_temp1, feat_temp2), dim=1) 302 | #### feat_final: torch.Size([3, 3072, 19, 19]) 303 | 304 | feat_temp = self.conv1x1_1(feat_final) ## torch.Size([3, 1024, 19, 19]) 305 | feat_encoded1 = self.spatial_renet(feat_temp) 306 | 307 | 308 | feat_encoded2 = self.conv1x1_2(feat_final) ## torch.Size([3, 19, 19, 19]) 309 | feat_encoded2 = torch.transpose(feat_encoded2, 1, 2) 310 | feat_encoded2 = self.temporal_renet(feat_encoded2) 311 | feat_encoded2 = torch.transpose(feat_encoded2, 1, 2) 312 | 313 | feat_final1 = torch.cat((feat_encoded1, feat_encoded2), dim=1) 314 | feat_final1 = torch.cat((feat_final1, feat_final), dim=1) 315 | 316 | # pdb.set_trace() 317 | output = self.decoder(feat_final1) 318 | output = self.mymodules[0](output) 319 | output = self.mymodules[1](output) 320 | 321 | return output 322 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/ops.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | def conv2d(in_channels, out_channels, kernel_size = 3, padding = 1): 4 | return nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding) 5 | 6 | def deconv2d(in_channels, out_channels, kernel_size = 3, padding = 1): 7 | return nn.ConvTranspose2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding) 8 | 9 | def relu(inplace = True): # Change to True? 10 | return nn.ReLU(inplace) 11 | 12 | def maxpool2d(): 13 | return nn.MaxPool2d(2) 14 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/resnet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | from collections import OrderedDict 4 | import torch.utils.model_zoo as model_zoo 5 | from torchvision.models.resnet import BasicBlock, Bottleneck, model_urls 6 | 7 | 8 | class ResNet(nn.Module): 9 | """ ResNet network module. Allows extracting specific feature blocks.""" 10 | def __init__(self, block, layers, output_layers, num_classes=1000, inplanes=64): 11 | self.inplanes = inplanes 12 | super(ResNet, self).__init__() 13 | self.output_layers = output_layers 14 | self.conv1 = nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False) 15 | self.bn1 = nn.BatchNorm2d(inplanes) 16 | self.relu = nn.ReLU(inplace=True) 17 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 18 | self.layer1 = self._make_layer(block, inplanes, layers[0]) 19 | self.layer2 = self._make_layer(block, inplanes*2, layers[1], stride=2) 20 | self.layer3 = self._make_layer(block, inplanes*4, layers[2], stride=2) 21 | self.layer4 = self._make_layer(block, inplanes*8, layers[3], stride=2) 22 | # self.avgpool = nn.AvgPool2d(7, stride=1) 23 | self.avgpool = nn.AdaptiveAvgPool2d((1,1)) 24 | self.fc = nn.Linear(inplanes*8 * block.expansion, num_classes) 25 | 26 | for m in self.modules(): 27 | if isinstance(m, nn.Conv2d): 28 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 29 | m.weight.data.normal_(0, math.sqrt(2. / n)) 30 | elif isinstance(m, nn.BatchNorm2d): 31 | m.weight.data.fill_(1) 32 | m.bias.data.zero_() 33 | 34 | def _make_layer(self, block, planes, blocks, stride=1): 35 | downsample = None 36 | if stride != 1 or self.inplanes != planes * block.expansion: 37 | downsample = nn.Sequential( 38 | nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 39 | nn.BatchNorm2d(planes * block.expansion), 40 | ) 41 | 42 | layers = [] 43 | layers.append(block(self.inplanes, planes, stride, downsample)) 44 | self.inplanes = planes * block.expansion 45 | for i in range(1, blocks): 46 | layers.append(block(self.inplanes, planes)) 47 | 48 | return nn.Sequential(*layers) 49 | 50 | def _add_output_and_check(self, name, x, outputs, output_layers): 51 | if name in output_layers: 52 | outputs[name] = x 53 | return len(output_layers) == len(outputs) 54 | 55 | def forward(self, x, output_layers=None): 56 | """ Forward pass with input x. The output_layers specify the feature blocks which must be returned """ 57 | # outputs = OrderedDict() 58 | 59 | # if output_layers is None: 60 | # output_layers = self.output_layers 61 | 62 | x = self.conv1(x) 63 | x = self.bn1(x) 64 | x = self.relu(x) 65 | 66 | # if self._add_output_and_check('conv1', x, outputs, output_layers): 67 | # return outputs 68 | 69 | x = self.maxpool(x) 70 | 71 | x = self.layer1(x) 72 | 73 | # if self._add_output_and_check('layer1', x, outputs, output_layers): 74 | # return outputs 75 | 76 | x2_feat = self.layer2(x) 77 | 78 | # if self._add_output_and_check('layer2', x, outputs, output_layers): 79 | # return outputs 80 | 81 | x3_feat = self.layer3(x2_feat) 82 | 83 | # if self._add_output_and_check('layer3', x, outputs, output_layers): 84 | # return outputs 85 | 86 | x4_feat = self.layer4(x3_feat) 87 | 88 | # if self._add_output_and_check('layer4', x, outputs, output_layers): 89 | # return outputs 90 | 91 | # x = self.avgpool(x) 92 | # x = x.view(x.size(0), -1) 93 | # x = self.fc(x) 94 | 95 | # if self._add_output_and_check('fc', x, outputs, output_layers): 96 | # return outputs 97 | 98 | # if len(output_layers) == 1 and output_layers[0] == 'default': 99 | # return x 100 | 101 | # raise ValueError('output_layer is wrong.') 102 | 103 | return x2_feat, x3_feat, x4_feat 104 | 105 | 106 | 107 | 108 | 109 | 110 | def resnet18(output_layers=None, pretrained=True): 111 | """Constructs a ResNet-18 model. 112 | """ 113 | 114 | if output_layers is None: 115 | output_layers = ['default'] 116 | else: 117 | for l in output_layers: 118 | if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']: 119 | raise ValueError('Unknown layer: {}'.format(l)) 120 | 121 | model = ResNet(BasicBlock, [2, 2, 2, 2], output_layers) 122 | 123 | if pretrained: 124 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 125 | return model 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | def resnet50(output_layers=None, pretrained=False): 134 | """Constructs a ResNet-50 model. 135 | """ 136 | 137 | if output_layers is None: 138 | output_layers = ['default'] 139 | else: 140 | for l in output_layers: 141 | if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']: 142 | raise ValueError('Unknown layer: {}'.format(l)) 143 | 144 | model = ResNet(Bottleneck, [3, 4, 6, 3], output_layers) 145 | if pretrained: 146 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 147 | return model -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/testing.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | import time 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"]="2" 8 | from tqdm import tqdm 9 | from torch.autograd import Variable 10 | import torchvision.transforms as transforms 11 | import random 12 | from generator import daGenerator 13 | from utils import * 14 | import pdb 15 | import os.path 16 | 17 | generator_path = './20200525_directionAware_TANet_rgbt_model.pkl' 18 | Generator = daGenerator() 19 | Generator.load_state_dict(torch.load(generator_path)) 20 | Generator.cuda() 21 | 22 | def to_variable(x, requires_grad=True): 23 | if torch.cuda.is_available(): 24 | x = x.cuda() 25 | return Variable(x,requires_grad) 26 | 27 | counter = 0 28 | start_time = time.time() 29 | 30 | attentionSave_path = "/home/wangxiao/experiments/directionAware_rgbt_TANet_module/rgbt_210_Attention/" 31 | 32 | dataset_path = "/wangxiao/dataset/RGB-T210/" 33 | 34 | 35 | video_files = os.listdir(dataset_path) 36 | video_files.sort() 37 | count = 0 38 | 39 | for videoidx in range(len(video_files)): 40 | videoName = video_files[videoidx] 41 | already_Done = os.listdir(attentionSave_path) 42 | 43 | if videoName in already_Done: 44 | print("==>> Skip this video .... ") 45 | else: 46 | dataset_img_path_v = dataset_path + videoName + "/visible/" 47 | dataset_img_files_v = os.listdir(dataset_img_path_v) 48 | dataset_img_path_i = dataset_path + videoName + "/infrared/" 49 | dataset_img_files_i = os.listdir(dataset_img_path_i) 50 | 51 | dataset_img_files_v.sort() 52 | dataset_img_files_i.sort() 53 | 54 | cursor = 0 55 | batch_size = 1 56 | clip_len = 3 57 | size = len(dataset_img_files_v) 58 | to_tensor = transforms.ToTensor() 59 | targetObject_v = torch.zeros(batch_size, 3, 300, 300) 60 | targetObject_i = torch.zeros(batch_size, 3, 300, 300) 61 | 62 | gt_path = dataset_path + videoName + "/init.txt" 63 | gt_files = np.loadtxt(gt_path, delimiter=',') 64 | initBBox = gt_files[0] 65 | 66 | initImg_path_v = dataset_img_files_v[0] 67 | initImg_path_i = dataset_img_files_i[0] 68 | 69 | initImage_v = cv2.imread(dataset_img_path_v + initImg_path_v) 70 | initImage_i = cv2.imread(dataset_img_path_i + initImg_path_i) 71 | 72 | tarObject_v = initImage_v[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 73 | tarObject_i = initImage_i[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 74 | 75 | tarObject_v = cv2.resize(tarObject_v, (300, 300), interpolation=cv2.INTER_LINEAR) 76 | tarObject_i = cv2.resize(tarObject_i, (300, 300), interpolation=cv2.INTER_LINEAR) 77 | 78 | targetObject_v[0] = to_tensor(tarObject_v) 79 | targetObject_i[0] = to_tensor(tarObject_i) 80 | # cv2.imwrite('./tarObject_v.png', tarObject_v) 81 | 82 | # pdb.set_trace() 83 | for idx in range(1, len(dataset_img_files_v)): 84 | 85 | batch_imgClip_v = torch.zeros(batch_size, clip_len, 3, 300, 300) 86 | batch_imgClip_i = torch.zeros(batch_size, clip_len, 3, 300, 300) 87 | 88 | #### initialize continuous 3 images 89 | if cursor < 1: 90 | v_prev_file = dataset_img_files_v[cursor] 91 | i_prev_file = dataset_img_files_i[cursor] 92 | else: 93 | v_prev_file = dataset_img_files_v[cursor-1] 94 | i_prev_file = dataset_img_files_i[cursor-1] 95 | 96 | v_curr_file = dataset_img_files_v[cursor] 97 | i_curr_file = dataset_img_files_i[cursor] 98 | 99 | if cursor == size: 100 | v_late_file = dataset_img_files_v[size-1] 101 | i_late_file = dataset_img_files_i[size-1] 102 | else: 103 | v_late_file = dataset_img_files_v[cursor] 104 | i_late_file = dataset_img_files_i[cursor] 105 | 106 | 107 | v_prev_img_path = os.path.join(dataset_img_path_v, v_prev_file) 108 | i_prev_img_path = os.path.join(dataset_img_path_i, i_prev_file) 109 | v_current_img_path = os.path.join(dataset_img_path_v, v_curr_file) 110 | i_current_img_path = os.path.join(dataset_img_path_i, i_curr_file) 111 | v_late_img_path = os.path.join(dataset_img_path_v, v_late_file) 112 | i_late_img_path = os.path.join(dataset_img_path_i, i_late_file) 113 | 114 | v_inputimage_prev = cv2.imread(v_prev_img_path) 115 | i_inputimage_prev = cv2.imread(i_prev_img_path) 116 | v_inputimage_current = cv2.imread(v_current_img_path) 117 | i_inputimage_current = cv2.imread(i_current_img_path) 118 | v_inputimage_late = cv2.imread(v_late_img_path) 119 | i_inputimage_late = cv2.imread(i_late_img_path) 120 | 121 | v_inputimage_prev = cv2.resize(v_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR) 122 | i_inputimage_prev = cv2.resize(i_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR) 123 | v_inputimage_current = cv2.resize(v_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR) 124 | i_inputimage_current = cv2.resize(i_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR) 125 | v_inputimage_late = cv2.resize(v_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR) 126 | i_inputimage_late = cv2.resize(i_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR) 127 | 128 | 129 | batch_imgClip_v[0, 0] = to_tensor(v_inputimage_prev) 130 | batch_imgClip_v[0, 1] = to_tensor(v_inputimage_current) 131 | batch_imgClip_v[0, 2] = to_tensor(v_inputimage_late) 132 | 133 | batch_imgClip_i[0, 0] = to_tensor(i_inputimage_prev) 134 | batch_imgClip_i[0, 1] = to_tensor(i_inputimage_current) 135 | batch_imgClip_i[0, 2] = to_tensor(i_inputimage_late) 136 | 137 | # pdb.set_trace() 138 | 139 | cursor += 1 140 | attention_map = Generator(targetObject_v.cuda(), targetObject_i.cuda(), batch_imgClip_v.cuda(), batch_imgClip_i.cuda()) 141 | attention_map = nn.functional.interpolate(attention_map, size=[v_inputimage_prev.shape[0], v_inputimage_prev.shape[1]]) 142 | 143 | # pdb.set_trace() 144 | new_Savepath = attentionSave_path + videoName 145 | 146 | if os.path.exists(new_Savepath): 147 | print(" ") 148 | else: 149 | os.mkdir(new_Savepath) 150 | 151 | pilTrans = transforms.ToPILImage() 152 | pilImg = pilTrans(attention_map[0].detach().cpu()) 153 | 154 | new_path = new_Savepath + "/" + str(cursor+1) + "_attentionMap.jpg" 155 | print('==>> Image saved to ', new_path) 156 | pilImg.save(new_path) 157 | 158 | 159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/testing_234.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | import time 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"]="2" 8 | from tqdm import tqdm 9 | from torch.autograd import Variable 10 | import torchvision.transforms as transforms 11 | import random 12 | from generator import daGenerator 13 | from utils import * 14 | import pdb 15 | import os.path 16 | 17 | generator_path = './20200525_directionAware_TANet_rgbt_model.pkl' 18 | Generator = daGenerator() 19 | Generator.load_state_dict(torch.load(generator_path)) 20 | Generator.cuda() 21 | 22 | def to_variable(x, requires_grad=True): 23 | if torch.cuda.is_available(): 24 | x = x.cuda() 25 | return Variable(x,requires_grad) 26 | 27 | counter = 0 28 | start_time = time.time() 29 | 30 | attentionSave_path = "/home/wangxiao/experiments/directionAware_rgbt_TANet_module/rgbt_234_Attention/" 31 | dataset_path = "/DATA/wangxiao/dataset/RGB-T234/" 32 | 33 | 34 | video_files = os.listdir(dataset_path) 35 | video_files.sort() 36 | count = 0 37 | 38 | for videoidx in range(len(video_files)): 39 | videoName = video_files[videoidx] 40 | already_Done = os.listdir(attentionSave_path) 41 | 42 | if videoName in already_Done: 43 | print("==>> Skip this video .... ") 44 | else: 45 | dataset_img_path_v = dataset_path + videoName + "/visible/" 46 | dataset_img_files_v = os.listdir(dataset_img_path_v) 47 | dataset_img_path_i = dataset_path + videoName + "/infrared/" 48 | dataset_img_files_i = os.listdir(dataset_img_path_i) 49 | 50 | dataset_img_files_v.sort() 51 | dataset_img_files_i.sort() 52 | 53 | cursor = 0 54 | batch_size = 1 55 | clip_len = 3 56 | size = len(dataset_img_files_v) 57 | to_tensor = transforms.ToTensor() 58 | targetObject_v = torch.zeros(batch_size, 3, 300, 300) 59 | targetObject_i = torch.zeros(batch_size, 3, 300, 300) 60 | 61 | gt_path = dataset_path + videoName + "/init.txt" 62 | gt_files = np.loadtxt(gt_path, delimiter=',') 63 | initBBox = gt_files[0] 64 | 65 | initImg_path_v = dataset_img_files_v[0] 66 | initImg_path_i = dataset_img_files_i[0] 67 | 68 | initImage_v = cv2.imread(dataset_img_path_v + initImg_path_v) 69 | initImage_i = cv2.imread(dataset_img_path_i + initImg_path_i) 70 | 71 | tarObject_v = initImage_v[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 72 | tarObject_i = initImage_i[int(initBBox[1]):int(initBBox[1]+initBBox[3]), int(initBBox[0]):int(initBBox[0]+initBBox[2]), :] 73 | 74 | tarObject_v = cv2.resize(tarObject_v, (300, 300), interpolation=cv2.INTER_LINEAR) 75 | tarObject_i = cv2.resize(tarObject_i, (300, 300), interpolation=cv2.INTER_LINEAR) 76 | 77 | targetObject_v[0] = to_tensor(tarObject_v) 78 | targetObject_i[0] = to_tensor(tarObject_i) 79 | # cv2.imwrite('./tarObject_v.png', tarObject_v) 80 | 81 | # pdb.set_trace() 82 | for idx in range(1, len(dataset_img_files_v)): 83 | 84 | batch_imgClip_v = torch.zeros(batch_size, clip_len, 3, 300, 300) 85 | batch_imgClip_i = torch.zeros(batch_size, clip_len, 3, 300, 300) 86 | 87 | #### initialize continuous 3 images 88 | if cursor < 1: 89 | v_prev_file = dataset_img_files_v[cursor] 90 | i_prev_file = dataset_img_files_i[cursor] 91 | else: 92 | v_prev_file = dataset_img_files_v[cursor-1] 93 | i_prev_file = dataset_img_files_i[cursor-1] 94 | 95 | v_curr_file = dataset_img_files_v[cursor] 96 | i_curr_file = dataset_img_files_i[cursor] 97 | 98 | if cursor == size: 99 | v_late_file = dataset_img_files_v[size-1] 100 | i_late_file = dataset_img_files_i[size-1] 101 | else: 102 | v_late_file = dataset_img_files_v[cursor] 103 | i_late_file = dataset_img_files_i[cursor] 104 | 105 | 106 | v_prev_img_path = os.path.join(dataset_img_path_v, v_prev_file) 107 | i_prev_img_path = os.path.join(dataset_img_path_i, i_prev_file) 108 | v_current_img_path = os.path.join(dataset_img_path_v, v_curr_file) 109 | i_current_img_path = os.path.join(dataset_img_path_i, i_curr_file) 110 | v_late_img_path = os.path.join(dataset_img_path_v, v_late_file) 111 | i_late_img_path = os.path.join(dataset_img_path_i, i_late_file) 112 | 113 | v_inputimage_prev = cv2.imread(v_prev_img_path) 114 | i_inputimage_prev = cv2.imread(i_prev_img_path) 115 | v_inputimage_current = cv2.imread(v_current_img_path) 116 | i_inputimage_current = cv2.imread(i_current_img_path) 117 | v_inputimage_late = cv2.imread(v_late_img_path) 118 | i_inputimage_late = cv2.imread(i_late_img_path) 119 | 120 | v_inputimage_prev = cv2.resize(v_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR) 121 | i_inputimage_prev = cv2.resize(i_inputimage_prev, (300, 300), interpolation=cv2.INTER_LINEAR) 122 | v_inputimage_current = cv2.resize(v_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR) 123 | i_inputimage_current = cv2.resize(i_inputimage_current, (300, 300), interpolation=cv2.INTER_LINEAR) 124 | v_inputimage_late = cv2.resize(v_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR) 125 | i_inputimage_late = cv2.resize(i_inputimage_late, (300, 300), interpolation=cv2.INTER_LINEAR) 126 | 127 | 128 | batch_imgClip_v[0, 0] = to_tensor(v_inputimage_prev) 129 | batch_imgClip_v[0, 1] = to_tensor(v_inputimage_current) 130 | batch_imgClip_v[0, 2] = to_tensor(v_inputimage_late) 131 | 132 | batch_imgClip_i[0, 0] = to_tensor(i_inputimage_prev) 133 | batch_imgClip_i[0, 1] = to_tensor(i_inputimage_current) 134 | batch_imgClip_i[0, 2] = to_tensor(i_inputimage_late) 135 | 136 | # pdb.set_trace() 137 | 138 | cursor += 1 139 | attention_map = Generator(targetObject_v.cuda(), targetObject_i.cuda(), batch_imgClip_v.cuda(), batch_imgClip_i.cuda()) 140 | attention_map = nn.functional.interpolate(attention_map, size=[v_inputimage_prev.shape[0], v_inputimage_prev.shape[1]]) 141 | 142 | # pdb.set_trace() 143 | new_Savepath = attentionSave_path + videoName 144 | 145 | if os.path.exists(new_Savepath): 146 | print(" ") 147 | else: 148 | os.mkdir(new_Savepath) 149 | 150 | pilTrans = transforms.ToPILImage() 151 | pilImg = pilTrans(attention_map[0].detach().cpu()) 152 | 153 | new_path = new_Savepath + "/" + str(cursor+1) + "_attentionMap.jpg" 154 | print('==>> Image saved to ', new_path) 155 | pilImg.save(new_path) 156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | import time 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"]="2" 8 | 9 | from torch.autograd import Variable 10 | from generator import daGenerator 11 | from utils import * 12 | import pdb 13 | import warnings 14 | warnings.filterwarnings("ignore") 15 | import torchvision.transforms as transforms 16 | import random 17 | 18 | batch_size = 5 19 | lr = 1e-5 20 | 21 | generator = daGenerator() 22 | 23 | # generatorPath = "./directionAware_TANet_rgbt_model.pkl" 24 | # generator_weights = torch.load(generatorPath) 25 | # generator.load_state_dict(generator_weights) 26 | 27 | if torch.cuda.is_available(): 28 | generator.cuda() 29 | 30 | criterion = nn.BCELoss() 31 | g_optim = torch.optim.Adagrad(generator.parameters(), lr=lr) 32 | num_epoch = 30 33 | 34 | def to_variable(x, requires_grad=True): 35 | if torch.cuda.is_available(): 36 | x = x.cuda() 37 | return Variable(x,requires_grad) 38 | 39 | start_time = time.time() 40 | DIR_TO_SAVE = "./generator_output/" 41 | if not os.path.exists(DIR_TO_SAVE): 42 | os.makedirs(DIR_TO_SAVE) 43 | 44 | generator.train() 45 | 46 | 47 | attention_path = "/home/wangxiao/targetAttention_train_dataset/" 48 | 49 | video_files = os.listdir(attention_path) 50 | random.shuffle(video_files) 51 | video_files = video_files[:300] 52 | 53 | 54 | 55 | count = 0 56 | 57 | 58 | 59 | for current_epoch in range(num_epoch): 60 | g_cost_avg = 0 61 | 62 | for videoidx in range(len(video_files)): 63 | videoName = video_files[videoidx] 64 | 65 | dataset_img_path = attention_path + videoName + "/image/" 66 | dataset_img_files = os.listdir(dataset_img_path) 67 | 68 | dataset_mask_path = attention_path + videoName + "/mask/" 69 | dataset_tarObject_path = attention_path + videoName + "/tarObject/" 70 | 71 | numBatches = len(dataset_img_files) / batch_size 72 | cursor = 0 73 | 74 | # pdb.set_trace() 75 | for idx in range(int(numBatches)): 76 | 77 | size = len(dataset_img_files) 78 | 79 | if cursor + batch_size > size: 80 | cursor = 0 81 | # np.random.shuffle(dataset_img_files) 82 | np.sort(dataset_img_files) 83 | 84 | batch_img = torch.zeros(batch_size, 3, 300, 300) 85 | batch_map = torch.zeros(batch_size, 1, 300, 300) 86 | targetObject_img = torch.zeros(batch_size, 3, 300, 300) 87 | targetObject_gray = torch.zeros(batch_size, 3, 300, 300) 88 | 89 | clip_len = 3 90 | batch_imgClip = torch.zeros(batch_size, clip_len, 3, 300, 300) 91 | batch_grayClip = torch.zeros(batch_size, clip_len, 3, 300, 300) 92 | 93 | to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 94 | 95 | for batchidx in range(batch_size): 96 | 97 | #### initialize continuous 3 images 98 | if cursor < 1: 99 | prev_file = dataset_img_files[cursor] 100 | else: 101 | prev_file = dataset_img_files[cursor-1] 102 | 103 | curr_file = dataset_img_files[cursor] 104 | 105 | if cursor == size: 106 | late_file = dataset_img_files[size-1] 107 | else: 108 | late_file = dataset_img_files[cursor] 109 | 110 | imgIndex = curr_file[-12:] 111 | 112 | prev_imgIndex = prev_file[-12:] 113 | late_imgIndex = late_file[-12:] 114 | # print(videoName, " ", imgIndex) 115 | 116 | targetObject_img_path = os.path.join(dataset_tarObject_path, videoName + '_target-00000001.jpg') 117 | full_img_path = os.path.join(dataset_img_path, videoName + "_image-" + imgIndex) 118 | 119 | prev_full_img_path = os.path.join(dataset_img_path, videoName + "_image-" + prev_imgIndex) 120 | late_full_img_path = os.path.join(dataset_img_path, videoName + "_image-" + late_imgIndex) 121 | 122 | full_map_path = os.path.join(dataset_mask_path, videoName + "_mask-" + imgIndex) 123 | cursor += 1 124 | 125 | inputimage = cv2.imread(full_img_path) 126 | prev_inputimage = cv2.imread(prev_full_img_path) 127 | late_inputimage = cv2.imread(late_full_img_path) 128 | 129 | 130 | #### for the gray image: 131 | gray_prev_inputimage = cv2.cvtColor(prev_inputimage, cv2.COLOR_BGR2GRAY) 132 | gray_prev_inputimage = to_tensor(gray_prev_inputimage) 133 | gray_prev_inputimage = torch.stack([gray_prev_inputimage, gray_prev_inputimage, gray_prev_inputimage], 1)[0] 134 | batch_grayClip[batchidx, 0] = gray_prev_inputimage 135 | 136 | gray_inputimage = cv2.cvtColor(inputimage, cv2.COLOR_BGR2GRAY) 137 | gray_inputimage = to_tensor(gray_inputimage) 138 | gray_inputimage = torch.stack([gray_inputimage, gray_inputimage, gray_inputimage], 1)[0] 139 | batch_grayClip[batchidx, 1] = gray_inputimage 140 | 141 | gray_late_inputimage = cv2.cvtColor(prev_inputimage, cv2.COLOR_BGR2GRAY) 142 | gray_late_inputimage = to_tensor(gray_late_inputimage) 143 | gray_late_inputimage = torch.stack([gray_late_inputimage, gray_late_inputimage, gray_late_inputimage], 1)[0] 144 | batch_grayClip[batchidx, 2] = gray_late_inputimage 145 | 146 | 147 | # pdb.set_trace() 148 | batch_img[batchidx] = to_tensor(inputimage) 149 | batch_imgClip[batchidx, 0] = to_tensor(prev_inputimage) 150 | batch_imgClip[batchidx, 1] = to_tensor(inputimage) 151 | batch_imgClip[batchidx, 2] = to_tensor(late_inputimage) 152 | 153 | targetObjectimage = cv2.imread(targetObject_img_path) 154 | targetObject_img[batchidx] = to_tensor(targetObjectimage) 155 | 156 | gray_targetObjectimage = cv2.cvtColor(targetObjectimage, cv2.COLOR_BGR2GRAY) 157 | gray_targetObjectimage = to_tensor(gray_targetObjectimage) 158 | gray_targetObjectimage = torch.stack([gray_targetObjectimage, gray_targetObjectimage, gray_targetObjectimage], 1)[0] 159 | targetObject_gray[batchidx] = gray_targetObjectimage 160 | 161 | 162 | saliencyimage = cv2.imread(full_map_path, 0) 163 | saliencyimage = np.expand_dims(saliencyimage, axis=2) 164 | batch_map[batchidx] = to_tensor(saliencyimage) 165 | 166 | 167 | 168 | batch_img = to_variable(batch_img, requires_grad=True) 169 | batch_map = to_variable(batch_map, requires_grad=False) 170 | targetObject_img = to_variable(targetObject_img, requires_grad=True) 171 | targetObject_gray = to_variable(targetObject_gray, requires_grad=True) 172 | batch_imgClip = to_variable(batch_imgClip, requires_grad=True) 173 | batch_grayClip = to_variable(batch_grayClip, requires_grad=True) 174 | 175 | val_batchImg = batch_img 176 | val_targetObjectImg = targetObject_img 177 | val_gray_targetObjectimage = targetObject_gray 178 | val_imgClip = batch_imgClip 179 | val_batch_grayClip = batch_grayClip 180 | 181 | count = count + 1 182 | 183 | g_optim.zero_grad() 184 | attention_map = generator(targetObject_img, targetObject_gray, batch_imgClip, batch_grayClip) 185 | 186 | batch_map = nn.functional.interpolate(batch_map, size=[attention_map.shape[2], attention_map.shape[3]]) 187 | 188 | 189 | # pdb.set_trace() 190 | g_gen_loss = criterion(attention_map, batch_map) 191 | g_loss = torch.sum(g_gen_loss) 192 | g_cost_avg += g_loss.item() 193 | g_loss.backward() 194 | g_optim.step() 195 | 196 | 197 | print("==>> Epoch [%d/%d], g_gen_loss: %.4f, vidIndex [%d/%d], LR: %.6f, time: %4.4f" % \ 198 | (current_epoch, num_epoch, g_loss.item(), videoidx, len(video_files), lr, time.time()-start_time)) 199 | 200 | 201 | # validation 202 | out = generator(val_targetObjectImg, val_gray_targetObjectimage, val_imgClip, val_batch_grayClip) 203 | map_out = out.cpu().data.squeeze(0) 204 | for iiidex in range(batch_size): 205 | new_path = DIR_TO_SAVE + str(current_epoch) + str(iiidex) + ".jpg" 206 | pilTrans = transforms.ToPILImage() 207 | pilImg = pilTrans(map_out[iiidex]) 208 | # print('==>> Image saved to ', new_path) 209 | pilImg.save(new_path) 210 | 211 | 212 | g_cost_avg /= numBatches 213 | 214 | # pdb.set_trace() 215 | # Save weights 216 | if current_epoch % 1 == 0: 217 | print("==>> save checkpoints ... ", ' ==>> Train_loss->', (g_cost_avg)) 218 | torch.save(generator.state_dict(), '20200525_directionAware_TANet_rgbt_model.pkl') 219 | 220 | 221 | 222 | 223 | 224 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/daTANet_module/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import torch 6 | from torch.autograd import Variable 7 | import torch.nn as nn 8 | import pdb 9 | 10 | from PIL import Image 11 | 12 | def to_variable(x,requires_grad=True): 13 | if torch.cuda.is_available(): 14 | x = x.cuda() 15 | return Variable(x,requires_grad) 16 | 17 | def show(img): 18 | #print(img.shape) 19 | pilTrans = transforms.ToPILImage() 20 | pilImg = pilTrans(img) 21 | s = np.array(pilImg) 22 | plt.figure() 23 | plt.imshow(s) 24 | 25 | def show_gray(img): 26 | print(img.shape) 27 | pilTrans = transforms.ToPILImage() 28 | pilImg = pilTrans(img) 29 | s = np.array(pilImg) 30 | plt.figure() 31 | plt.imshow(s) 32 | 33 | def save_gray(img, path): 34 | pilTrans = transforms.ToPILImage() 35 | pilImg = pilTrans(img) 36 | print('Image saved to ', path) 37 | pilImg.save(path) 38 | 39 | 40 | 41 | 42 | def predict(model, img, validation_targetObject): 43 | to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 44 | im = to_tensor(img) 45 | val_targetObject = to_tensor(validation_targetObject) 46 | #show(im) 47 | inp = to_variable(im.unsqueeze(0), False) 48 | inp = nn.functional.interpolate(inp, size=[300, 300]) 49 | 50 | val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False) 51 | val_targetObject_ = nn.functional.interpolate(val_targetObject_, size=[100, 100]) 52 | 53 | #print(inp.size()) 54 | 55 | out = model(inp, val_targetObject_) 56 | out = nn.functional.interpolate(out, size=[im.shape[1], im.shape[2]]) 57 | 58 | map_out = out.cpu().data.squeeze(0) 59 | pilTrans = transforms.ToPILImage() 60 | pilImg = pilTrans(map_out) 61 | dynamic_atttentonMAP = np.asarray(pilImg) 62 | 63 | return dynamic_atttentonMAP 64 | 65 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/models/readme.txt: -------------------------------------------------------------------------------- 1 | Download our pre-trained model from Google drive, or train this network yourself. -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/bbreg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from sklearn.linear_model import Ridge 3 | import numpy as np 4 | 5 | from utils import * 6 | 7 | class BBRegressor(): 8 | def __init__(self, img_size, alpha=1000, overlap=[0.6, 1], scale=[1, 2]): 9 | self.img_size = img_size 10 | self.alpha = alpha 11 | self.overlap_range = overlap 12 | self.scale_range = scale 13 | self.model = Ridge(alpha=self.alpha) 14 | 15 | def train(self, X, bbox, gt): 16 | X = X.cpu().numpy() 17 | bbox = np.copy(bbox) 18 | gt = np.copy(gt) 19 | 20 | if gt.ndim==1: 21 | gt = gt[None,:] 22 | 23 | r = overlap_ratio(bbox, gt) 24 | s = np.prod(bbox[:,2:], axis=1) / np.prod(gt[0,2:]) 25 | idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ 26 | (s >= self.scale_range[0]) * (s <= self.scale_range[1]) 27 | 28 | X = X[idx] 29 | bbox = bbox[idx] 30 | 31 | Y = self.get_examples(bbox, gt) 32 | 33 | self.model.fit(X, Y) 34 | 35 | def predict(self, X, bbox): 36 | X = X.cpu().numpy() 37 | bbox_ = np.copy(bbox) 38 | 39 | Y = self.model.predict(X) 40 | 41 | bbox_[:,:2] = bbox_[:,:2] + bbox_[:,2:]/2 42 | bbox_[:,:2] = Y[:,:2] * bbox_[:,2:] + bbox_[:,:2] 43 | bbox_[:,2:] = np.exp(Y[:,2:]) * bbox_[:,2:] 44 | bbox_[:,:2] = bbox_[:,:2] - bbox_[:,2:]/2 45 | 46 | r = overlap_ratio(bbox, bbox_) 47 | s = np.prod(bbox[:,2:], axis=1) / np.prod(bbox_[:,2:], axis=1) 48 | idx = (r >= self.overlap_range[0]) * (r <= self.overlap_range[1]) * \ 49 | (s >= self.scale_range[0]) * (s <= self.scale_range[1]) 50 | idx = np.logical_not(idx) 51 | bbox_[idx] = bbox[idx] 52 | 53 | bbox_[:,:2] = np.maximum(bbox_[:,:2], 0) 54 | bbox_[:,2:] = np.minimum(bbox_[:,2:], self.img_size - bbox[:,:2]) 55 | 56 | return bbox_ 57 | 58 | def get_examples(self, bbox, gt): 59 | bbox[:,:2] = bbox[:,:2] + bbox[:,2:]/2 60 | gt[:,:2] = gt[:,:2] + gt[:,2:]/2 61 | 62 | dst_xy = (gt[:,:2] - bbox[:,:2]) / bbox[:,2:] 63 | dst_wh = np.log(gt[:,2:] / bbox[:,2:]) 64 | 65 | Y = np.concatenate((dst_xy, dst_wh), axis=1) 66 | return Y 67 | 68 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/bbreg.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/bbreg.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/data_prov.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | from PIL import Image 4 | 5 | import torch 6 | import torch.utils.data as data 7 | import matplotlib.pyplot as plt 8 | from utils import * 9 | 10 | import matplotlib.patches as patches 11 | 12 | import os 13 | from sample_generator import * 14 | 15 | import sys 16 | from pretrain_options import * 17 | 18 | from img_cropper import * 19 | import pdb 20 | 21 | 22 | 23 | class RegionDataset(data.Dataset): 24 | def __init__(self, img_dir, img_list_v, img_list_i, videoPath_v, videoPath_i, gt, receptive_field, opts): 25 | 26 | # self.img_list_v = np.array([os.path.join(img_dir, img) for img in img_list_v]) 27 | # self.img_list_i = np.array([os.path.join(img_dir, img) for img in img_list_i]) 28 | 29 | self.img_list_v = np.array([img_dir+ '/v/' +img for img in img_list_v]) 30 | self.img_list_i = np.array([img_dir+ '/i/' +img for img in img_list_i]) 31 | 32 | self.videoPath_v = videoPath_v 33 | self.videoPath_i = videoPath_i 34 | 35 | self.gt = gt 36 | 37 | self.batch_frames = pretrain_opts['batch_frames'] 38 | self.batch_pos = pretrain_opts['batch_pos'] 39 | self.batch_neg = pretrain_opts['batch_neg'] 40 | 41 | self.overlap_pos = pretrain_opts['overlap_pos'] 42 | self.overlap_neg = pretrain_opts['overlap_neg'] 43 | 44 | 45 | self.crop_size = pretrain_opts['img_size'] 46 | self.padding = pretrain_opts['padding'] 47 | 48 | self.index = np.random.permutation(len(self.img_list_v)) 49 | self.pointer = 0 50 | 51 | image_v = Image.open(self.img_list_v[0]).convert('RGB') 52 | self.scene_generator = SampleGenerator('gaussian', image_v.size,trans_f=1.5, scale_f=1.2,valid=True) 53 | self.pos_generator = SampleGenerator('gaussian', image_v.size, 0.1, 1.2, 1.1, True) 54 | self.neg_generator = SampleGenerator('uniform', image_v.size, 1, 1.2, 1.1, True) 55 | 56 | self.receptive_field = receptive_field 57 | 58 | self.interval = pretrain_opts['frame_interval'] 59 | self.img_crop_model = imgCropper(pretrain_opts['padded_img_size']) 60 | self.img_crop_model.eval() 61 | if pretrain_opts['use_gpu']: 62 | self.img_crop_model.gpuEnable() 63 | 64 | def __iter__(self): 65 | return self 66 | 67 | def __next__(self): 68 | 69 | next_pointer = min(self.pointer + self.batch_frames, len(self.img_list_v)) 70 | idx = self.index[self.pointer:next_pointer] 71 | if len(idx) < self.batch_frames: 72 | self.index = np.random.permutation(len(self.img_list_v)) 73 | next_pointer = self.batch_frames - len(idx) 74 | idx = np.concatenate((idx, self.index[:next_pointer])) 75 | self.pointer = next_pointer 76 | 77 | 78 | n_pos = self.batch_pos 79 | n_neg = self.batch_neg 80 | 81 | scenes_i = [] 82 | scenes_v = [] 83 | for i, (img_path_v, img_path_i, bbox) in enumerate(zip(self.img_list_v[idx], self.img_list_i[idx], self.gt[idx])): 84 | image_v = Image.open(img_path_v).convert('RGB') 85 | image_v = np.asarray(image_v) 86 | 87 | image_i = Image.open(img_path_i).convert('RGB') 88 | image_i = np.asarray(image_i) 89 | 90 | bbox[2] = bbox[2] - bbox[0] 91 | bbox[3] = bbox[3] - bbox[1] 92 | 93 | ishape = image_v.shape 94 | pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2, 1.1, False), bbox, n_pos, overlap_range=self.overlap_pos) 95 | neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 1.2, 1.1, False), bbox, n_neg, overlap_range=self.overlap_neg) 96 | 97 | # compute padded sample 98 | padded_x1 = (neg_examples[:, 0]-neg_examples[:,2]*(pretrain_opts['padding']-1.)/2.).min() 99 | padded_y1 = (neg_examples[:, 1]-neg_examples[:,3]*(pretrain_opts['padding']-1.)/2.).min() 100 | padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2]*(pretrain_opts['padding']+1.)/2.).max() 101 | padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3]*(pretrain_opts['padding']+1.)/2.).max() 102 | padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) 103 | 104 | jitter_scale = 1.1 ** np.clip(3.*np.random.randn(1,1),-2,2) 105 | crop_img_size = (padded_scene_box[2:4] * ((pretrain_opts['img_size'], pretrain_opts['img_size']) / bbox[2:4])).astype('int64') * jitter_scale[0][0] 106 | cropped_image_v, cur_image_var_v = self.img_crop_model.crop_image(image_v, np.reshape(padded_scene_box, (1, 4)), crop_img_size) 107 | cropped_image_v = cropped_image_v - 128. 108 | 109 | cropped_image_i, cur_image_var_i = self.img_crop_model.crop_image(image_i, np.reshape(padded_scene_box, (1, 4)), crop_img_size) 110 | cropped_image_i = cropped_image_i - 128. 111 | 112 | if pretrain_opts['use_gpu']: 113 | cropped_image_i = cropped_image_i.data.cpu() 114 | cur_image_var_i = cur_image_var_i.cpu() 115 | 116 | cropped_image_v = cropped_image_v.data.cpu() 117 | cur_image_var_v = cur_image_var_v.cpu() 118 | 119 | scenes_v.append(cropped_image_v) 120 | scenes_i.append(cropped_image_i) 121 | 122 | ## get current frame and heatmap 123 | rel_bbox = np.copy(bbox) 124 | rel_bbox[0:2] -= padded_scene_box[0:2] 125 | 126 | jittered_obj_size = jitter_scale[0][0]*float(pretrain_opts['img_size']) 127 | 128 | batch_num = np.zeros((pos_examples.shape[0], 1)) 129 | pos_rois = np.copy(pos_examples) 130 | pos_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), pos_rois.shape[0], axis=0) 131 | pos_rois = samples2maskroi(pos_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding']) 132 | pos_rois = np.concatenate((batch_num, pos_rois), axis=1) 133 | 134 | batch_num = np.zeros((neg_examples.shape[0], 1)) 135 | neg_rois = np.copy(neg_examples) 136 | neg_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), neg_rois.shape[0], axis=0) 137 | neg_rois = samples2maskroi(neg_rois, self.receptive_field, (jittered_obj_size, jittered_obj_size),bbox[2:4], pretrain_opts['padding']) 138 | neg_rois = np.concatenate((batch_num, neg_rois), axis=1) 139 | 140 | if i==0: 141 | total_pos_rois = [torch.from_numpy(np.copy(pos_rois).astype('float32'))] 142 | total_neg_rois = [torch.from_numpy(np.copy(neg_rois).astype('float32'))] 143 | else: 144 | total_pos_rois.append(torch.from_numpy(np.copy(pos_rois).astype('float32'))) 145 | total_neg_rois.append(torch.from_numpy(np.copy(neg_rois).astype('float32'))) 146 | 147 | return scenes_v, scenes_i, total_pos_rois, total_neg_rois 148 | 149 | 150 | 151 | next = __next__ 152 | 153 | def extract_regions(self, image, samples): 154 | regions = np.zeros((len(samples), self.crop_size, self.crop_size, 3), dtype='uint8') 155 | for i, sample in enumerate(samples): 156 | regions[i] = crop_image(image, sample, self.crop_size, self.padding, True) 157 | 158 | regions = regions.transpose(0, 3, 1, 2) 159 | regions = regions.astype('float32') - 128. 160 | return regions 161 | 162 | 163 | 164 | 165 | 166 | class RegionExtractor(): 167 | def __init__(self, image, samples, crop_size, padding, batch_size, shuffle=False): 168 | 169 | self.image = np.asarray(image) 170 | self.samples = samples 171 | self.crop_size = crop_size 172 | self.padding = padding 173 | self.batch_size = batch_size 174 | self.shuffle = shuffle 175 | 176 | self.index = np.arange(len(samples)) 177 | self.pointer = 0 178 | 179 | self.mean = self.image.mean(0).mean(0).astype('float32') 180 | 181 | def __iter__(self): 182 | return self 183 | 184 | def __next__(self): 185 | if self.pointer == len(self.samples): 186 | self.pointer = 0 187 | raise StopIteration 188 | else: 189 | next_pointer = min(self.pointer + self.batch_size, len(self.samples)) 190 | index = self.index[self.pointer:next_pointer] 191 | self.pointer = next_pointer 192 | 193 | regions = self.extract_regions(index) 194 | regions = torch.from_numpy(regions) 195 | return regions 196 | next = __next__ 197 | 198 | def extract_regions(self, index): 199 | regions = np.zeros((len(index),self.crop_size,self.crop_size,3),dtype='uint8') 200 | for i, sample in enumerate(self.samples[index]): 201 | regions[i] = crop_image(self.image, sample, self.crop_size, self.padding) 202 | 203 | regions = regions.transpose(0,3,1,2).astype('float32') 204 | regions = regions - 128. 205 | return regions 206 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/data_prov.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/data_prov.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/img_cropper.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0,'./modules') 3 | from roi_align import RoIAlign 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from torch.autograd import Variable 7 | import torch 8 | import numpy as np 9 | 10 | import time 11 | 12 | import matplotlib.pyplot as plt 13 | import matplotlib.patches as patches 14 | 15 | class imgCropper(nn.Module): 16 | def __init__(self, img_size): 17 | super(imgCropper, self).__init__() 18 | self.isCuda = False 19 | self.img_size = img_size 20 | self.roi_align_model = RoIAlign(img_size,img_size, 1. ) 21 | 22 | def gpuEnable(self): 23 | self.roi_align_model = self.roi_align_model.cuda() 24 | self.isCuda = True 25 | 26 | def forward(self, image, roi): 27 | aligned_image_var = self.roi_align_model(image, roi) 28 | return aligned_image_var 29 | 30 | def crop_image(self,image, box, result_size): 31 | ## constraint = several box from common 1 image 32 | ishape = image.shape 33 | cur_image_var = np.reshape(image, (1, ishape[0], ishape[1], ishape[2])) 34 | cur_image_var = cur_image_var.transpose(0, 3, 1, 2) 35 | cur_image_var = cur_image_var.astype('float32') 36 | cur_image_var = Variable(torch.from_numpy(cur_image_var).float()) 37 | 38 | 39 | roi = np.copy(box) 40 | roi[:,2:4] += roi[:,0:2] 41 | roi = np.concatenate((np.zeros((roi.shape[0], 1)), roi), axis=1) 42 | roi = Variable(torch.from_numpy(roi).float()) 43 | 44 | if self.isCuda: 45 | cur_image_var = cur_image_var.cuda() 46 | roi = roi.cuda() 47 | 48 | self.roi_align_model.aligned_width = result_size[0] 49 | self.roi_align_model.aligned_height = result_size[1] 50 | cropped_image = self.forward(cur_image_var, roi) 51 | 52 | return cropped_image, cur_image_var 53 | 54 | def crop_several_image(self,img_list,target_list): 55 | ## constraint = one to one matching between image and target 56 | ## exception handling 57 | assert(len(target_list) == len(img_list)) 58 | 59 | ## image crop 60 | torch.cuda.synchronize() 61 | start_time = time.time() 62 | cur_images = torch.squeeze(torch.stack(img_list, 0)) 63 | torch.cuda.synchronize() 64 | print('10 image stacking time:{}'.format(time.time() - start_time)) 65 | 66 | ishape = cur_images.size() 67 | 68 | # Extract sample features and get target location 69 | sample_rois = np.array(target_list) 70 | sample_rois[:,2:4] += sample_rois[:,0:2] 71 | batch_num = np.reshape(np.arange(0,len(sample_rois)),(len(sample_rois),1)) 72 | sample_rois = np.concatenate( (batch_num, sample_rois), axis=1) 73 | sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))) 74 | if self.isCuda: 75 | sample_rois = sample_rois.cuda() 76 | cur_images = cur_images.cuda() 77 | 78 | cropped_images = self.forward(cur_images, sample_rois) 79 | 80 | 81 | return cropped_images 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/img_cropper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/img_cropper.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.io 3 | import numpy as np 4 | from collections import OrderedDict 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | import torch 9 | import time 10 | import sys 11 | sys.path.insert(0,'./roi_align') 12 | from roi_align import RoIAlignAvg,RoIAlignMax 13 | import pdb 14 | import math 15 | import torch 16 | from torch.nn.parameter import Parameter 17 | from torch.nn.modules.utils import _single, _pair, _triple 18 | 19 | 20 | 21 | class _ConvNd(nn.Module): 22 | 23 | def __init__(self, in_channels, out_channels, kernel_size, stride, 24 | padding, dilation, transposed, output_padding, groups, bias): 25 | super(_ConvNd, self).__init__() 26 | if in_channels % groups != 0: 27 | raise ValueError('in_channels must be divisible by groups') 28 | if out_channels % groups != 0: 29 | raise ValueError('out_channels must be divisible by groups') 30 | self.in_channels = in_channels 31 | self.out_channels = out_channels 32 | self.kernel_size = kernel_size 33 | self.stride = stride 34 | self.padding = padding 35 | self.dilation = dilation 36 | self.transposed = transposed 37 | self.output_padding = output_padding 38 | self.groups = groups 39 | 40 | if bias: 41 | self.bias = Parameter(torch.Tensor(out_channels)) 42 | else: 43 | self.register_parameter('bias', None) 44 | self.reset_parameters() 45 | 46 | 47 | def reset_parameters(self): 48 | n = self.in_channels 49 | for k in self.kernel_size: 50 | n *= k 51 | stdv = 1. / math.sqrt(n) 52 | if self.bias is not None: 53 | self.bias.data.uniform_(-stdv, stdv) 54 | 55 | def __repr__(self): 56 | s = ('{name}({in_channels}, {out_channels}, kernel_size={kernel_size}' 57 | ', stride={stride}') 58 | if self.padding != (0,) * len(self.padding): 59 | s += ', padding={padding}' 60 | if self.dilation != (1,) * len(self.dilation): 61 | s += ', dilation={dilation}' 62 | if self.output_padding != (0,) * len(self.output_padding): 63 | s += ', output_padding={output_padding}' 64 | if self.groups != 1: 65 | s += ', groups={groups}' 66 | if self.bias is None: 67 | s += ', bias=False' 68 | s += ')' 69 | return s.format(name=self.__class__.__name__, **self.__dict__) 70 | 71 | 72 | 73 | 74 | 75 | class AdaptiveConv2d(_ConvNd): 76 | 77 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True): 78 | kernel_size = _pair(kernel_size) 79 | stride = _pair(stride) 80 | padding = _pair(padding) 81 | dilation = _pair(dilation) 82 | super(AdaptiveConv2d, self).__init__( 83 | in_channels, out_channels, kernel_size, stride, padding, dilation, 84 | False, _pair(0), groups, bias) 85 | 86 | def forward(self, input, dynamic_weight): 87 | # Get batch num 88 | batch_num = input.size(0) 89 | 90 | # Reshape input tensor from size (N, C, H, W) to (1, N*C, H, W) 91 | input = input.view(1, -1, input.size(2), input.size(3)) 92 | 93 | # Reshape dynamic_weight tensor from size (N, C, H, W) to (1, N*C, H, W) 94 | dynamic_weight = dynamic_weight.view(-1, 1, dynamic_weight.size(2), dynamic_weight.size(3)) 95 | 96 | # Do convolution 97 | conv_rlt = F.conv2d(input, dynamic_weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 98 | 99 | # Reshape conv_rlt tensor from (1, N*C, H, W) to (N, C, H, W) 100 | conv_rlt = conv_rlt.view(batch_num, -1, conv_rlt.size(2), conv_rlt.size(3)) 101 | 102 | return conv_rlt 103 | 104 | 105 | def append_params(params, module, prefix): 106 | for child in module.children(): 107 | for k,p in child._parameters.items(): 108 | if p is None: continue 109 | 110 | if isinstance(child, nn.BatchNorm2d): 111 | name = prefix + '_bn_' + k 112 | else: 113 | name = prefix + '_' + k 114 | 115 | if name not in params: 116 | params[name] = p 117 | else: 118 | raise RuntimeError("Duplicated param name: %s" % (name)) 119 | 120 | class LRN(nn.Module): 121 | def __init__(self, local_size=1, alpha=0.0001, beta=0.75, ACROSS_CHANNELS=False): 122 | super(LRN, self).__init__() 123 | self.ACROSS_CHANNELS = ACROSS_CHANNELS 124 | if self.ACROSS_CHANNELS: 125 | self.average = nn.AvgPool3d(kernel_size=(local_size, 1, 1), 126 | stride=1, 127 | padding=(int((local_size - 1.0) / 2), 0, 0)) 128 | else: 129 | self.average = nn.AvgPool2d(kernel_size=local_size, 130 | stride=1, 131 | padding=int((local_size - 1.0) / 2)) 132 | self.alpha = alpha 133 | self.beta = beta 134 | 135 | def forward(self, x): 136 | if self.ACROSS_CHANNELS: 137 | div = x.pow(2).unsqueeze(1) 138 | div = self.average(div).squeeze(1) 139 | div = div.mul(self.alpha).add(2.0).pow(self.beta) 140 | else: 141 | div = x.pow(2) 142 | div = self.average(div) 143 | div = div.mul(self.alpha).add(2.0).pow(self.beta) 144 | x = x.div(div) 145 | return x 146 | 147 | 148 | class ChannelAttention(nn.Module): 149 | def __init__(self, in_planes, ratio=16): 150 | super(ChannelAttention, self).__init__() 151 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 152 | self.max_pool = nn.AdaptiveMaxPool2d(1) 153 | 154 | self.fc1 = nn.Conv2d(in_planes, in_planes // 16, 1, bias=False) 155 | self.relu1 = nn.ReLU() 156 | self.fc2 = nn.Conv2d(in_planes // 16, in_planes, 1, bias=False) 157 | 158 | self.sigmoid = nn.Sigmoid() 159 | 160 | def forward(self, x): 161 | avg_out = self.fc2(self.relu1(self.fc1(self.avg_pool(x)))) 162 | max_out = self.fc2(self.relu1(self.fc1(self.max_pool(x)))) 163 | out = avg_out + max_out 164 | return self.sigmoid(out) 165 | 166 | 167 | 168 | 169 | class SpatialAttention(nn.Module): 170 | def __init__(self, kernel_size=7): 171 | super(SpatialAttention, self).__init__() 172 | 173 | assert kernel_size in (3, 7), 'kernel size must be 3 or 7' 174 | padding = 3 if kernel_size == 7 else 1 175 | 176 | self.conv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False) 177 | self.sigmoid = nn.Sigmoid() 178 | 179 | def forward(self, x): 180 | avg_out = torch.mean(x, dim=1, keepdim=True) 181 | max_out, _ = torch.max(x, dim=1, keepdim=True) 182 | x = torch.cat([avg_out, max_out], dim=1) 183 | x = self.conv1(x) 184 | return self.sigmoid(x) 185 | 186 | 187 | 188 | 189 | class MDNet(nn.Module): 190 | def __init__(self, model_path=None,K=1): 191 | super(MDNet, self).__init__() 192 | self.K = K 193 | self.layers = nn.Sequential(OrderedDict([ 194 | ('conv1', nn.Sequential(nn.Conv2d(3, 96, kernel_size=7, stride=2), 195 | nn.ReLU(), 196 | LRN(), 197 | nn.MaxPool2d(kernel_size=3, stride=2) 198 | )), 199 | ('conv2', nn.Sequential(nn.Conv2d(96, 256, kernel_size=5, stride=2,dilation=1), 200 | nn.ReLU(), 201 | LRN(), 202 | )), 203 | 204 | ('conv3', nn.Sequential(nn.Conv2d(256, 512, kernel_size=3, stride=1,dilation=3), 205 | nn.ReLU(), 206 | )), 207 | ('fc4', nn.Sequential(nn.Linear(512 * 3 * 3 * 2, 512), 208 | nn.ReLU())), 209 | ('fc5', nn.Sequential(nn.Dropout(0.5), 210 | nn.Linear(512, 512), 211 | nn.ReLU()))])) 212 | 213 | self.branches = nn.ModuleList([nn.Sequential(nn.Dropout(0.5), nn.Linear(512, 2)) for _ in range(K)]) 214 | 215 | self.sigmoid = nn.Sigmoid() 216 | 217 | self.roi_align_model = RoIAlignMax(3, 3, 1. / 8) 218 | 219 | self.conv1x1_Tk = nn.Conv2d(1024, 512, 1, 1) 220 | self.conv1x1_Tq = nn.Conv2d(1024, 9, 1, 1) 221 | self.conv1x1_Vk = nn.Conv2d(1024, 512, 1, 1) 222 | self.conv1x1_Vq = nn.Conv2d(1024, 9, 1, 1) 223 | 224 | self.conv1x1_Tk = self.conv1x1_Tk.cuda() 225 | self.conv1x1_Tq = self.conv1x1_Tq.cuda() 226 | self.conv1x1_Vk = self.conv1x1_Vk.cuda() 227 | self.conv1x1_Vq = self.conv1x1_Vq.cuda() 228 | 229 | self.channel_attention = ChannelAttention(1024) 230 | self.spatial_attention = SpatialAttention() 231 | 232 | 233 | # self.BatchNorm2D = nn.BatchNorm2d(100) 234 | 235 | self.receptive_field = 75. # it is receptive fieald that a element of feat_map covers. feat_map is bottom layer of ROI_align_layer 236 | 237 | if model_path is not None: 238 | if os.path.splitext(model_path)[1] == '.pth': 239 | self.load_model(model_path) 240 | elif os.path.splitext(model_path)[1] == '.mat': 241 | self.load_mat_model(model_path) 242 | else: 243 | raise RuntimeError("Unkown model format: %s" % (model_path)) 244 | self.build_param_dict() 245 | 246 | def build_param_dict(self): 247 | self.params = OrderedDict() 248 | for name, module in self.layers.named_children(): 249 | append_params(self.params, module, name) 250 | for k, module in enumerate(self.branches): 251 | append_params(self.params, module, 'fc6_%d'%(k)) 252 | for name, module in self.conv1x1_Tk.named_children(): 253 | append_params(self.params, module, name) 254 | for name, module in self.conv1x1_Tq.named_children(): 255 | append_params(self.params, module, name) 256 | for name, module in self.conv1x1_Vk.named_children(): 257 | append_params(self.params, module, name) 258 | for name, module in self.conv1x1_Vq.named_children(): 259 | append_params(self.params, module, name) 260 | for name, module in self.channel_attention.named_children(): 261 | append_params(self.params, module, name) 262 | for name, module in self.spatial_attention.named_children(): 263 | append_params(self.params, module, name) 264 | 265 | 266 | 267 | def set_learnable_params(self, layers): 268 | for k, p in self.params.items(): 269 | if any([k.startswith(l) for l in layers]): 270 | p.requires_grad = True 271 | else: 272 | p.requires_grad = False 273 | 274 | 275 | def get_learnable_params(self): 276 | params = OrderedDict() 277 | for k, p in self.params.items(): 278 | if p.requires_grad: 279 | params[k] = p 280 | return params 281 | 282 | 283 | 284 | 285 | ########################################################################################### 286 | #### the forward function 287 | ########################################################################################### 288 | 289 | def forward(self, x_v, x_i, k=0, in_layer='conv1', out_layer='fc6'): 290 | 291 | run = False 292 | for name, module in self.layers.named_children(): 293 | if name == in_layer: 294 | run = True 295 | if run: 296 | x_v = module(x_v) 297 | x_i = module(x_i) 298 | 299 | 300 | if name == "conv3": 301 | 302 | rgbt_feats = torch.cat((x_v, x_i), dim=1) ## torch.Size([1, 192, 62, 91]) 303 | 304 | # pdb.set_trace() 305 | 306 | rgbt_feats = self.channel_attention(rgbt_feats) * rgbt_feats 307 | rgbt_feats = self.spatial_attention(rgbt_feats) * rgbt_feats 308 | 309 | Tk_feats = self.conv1x1_Tk(rgbt_feats) ## torch.Size([1, 96, 117, 71]) 310 | Tq_feats = self.conv1x1_Tq(rgbt_feats) ## torch.Size([1, 9, 117, 71]) 311 | Vk_feats = self.conv1x1_Vk(rgbt_feats) 312 | Vq_feats = self.conv1x1_Vq(rgbt_feats) 313 | 314 | # pdb.set_trace() 315 | 316 | Tk_feats = torch.squeeze(Tk_feats, dim=0) 317 | Tk_feats = Tk_feats.view(-1, Tk_feats.shape[1]*Tk_feats.shape[2]) ## torch.Size([96, 4150]) 318 | 319 | Tq_feats = torch.squeeze(Tq_feats, dim=0) 320 | Tq_feats = Tq_feats.view(-1, Tq_feats.shape[1]*Tq_feats.shape[2]) 321 | 322 | Vk_feats = torch.squeeze(Vk_feats, dim=0) 323 | Vk_feats = Vk_feats.view(-1, Vk_feats.shape[1]*Vk_feats.shape[2]) 324 | 325 | Vq_feats = torch.squeeze(Vq_feats, dim=0) 326 | Vq_feats = Vq_feats.view(-1, Vq_feats.shape[1]*Vq_feats.shape[2]) 327 | 328 | #### T_output.shape: torch.Size([96, 9]) 329 | T_output = torch.matmul(Tk_feats, torch.transpose(Tq_feats, 1, 0)) 330 | V_output = torch.matmul(Vk_feats, torch.transpose(Vq_feats, 1, 0)) 331 | 332 | # pdb.set_trace() 333 | T_filters = torch.reshape(T_output, (1, T_output.shape[0], 3, 3)) ## (96, 3, 3) 334 | V_filters = torch.reshape(V_output, (1, V_output.shape[0], 3, 3)) ## (96, 3, 3) 335 | 336 | 337 | # pdb.set_trace() 338 | 339 | adaptive_conv_T = AdaptiveConv2d(x_i.size(1), x_i.size(1), 3, padding=1, groups=x_i.size(1), bias=False) 340 | adaptive_conv_V = AdaptiveConv2d(x_v.size(1), x_v.size(1), 3, padding=1, groups=x_v.size(1), bias=False) 341 | 342 | dynamic_T_feats = adaptive_conv_T(x_v, T_filters) 343 | dynamic_V_feats = adaptive_conv_V(x_i, V_filters) 344 | 345 | dynamic_T_feats = self.sigmoid(dynamic_T_feats) 346 | dynamic_V_feats = self.sigmoid(dynamic_V_feats) 347 | 348 | x_v = x_v + dynamic_V_feats 349 | x_i = x_i + dynamic_T_feats 350 | 351 | fuse_x_v_i = torch.cat((x_v, x_i), dim=1) 352 | 353 | # pdb.set_trace() 354 | 355 | # augmented_feats, p1 = self.attn1(fuse_x_v_i) 356 | 357 | if name == out_layer: 358 | return x_v, x_i, fuse_x_v_i 359 | 360 | 361 | # pdb.set_trace() 362 | 363 | x_v = self.branches[k](x_v) 364 | 365 | 366 | if out_layer=='fc6': 367 | return x_v 368 | elif out_layer=='fc6_softmax': 369 | return F.softmax(x_v) 370 | 371 | 372 | 373 | def load_model(self, model_path): 374 | states = torch.load(model_path) 375 | shared_layers = states['shared_layers'] 376 | self.layers.load_state_dict(shared_layers) 377 | 378 | def load_mat_model(self, matfile): 379 | mat = scipy.io.loadmat(matfile) 380 | mat_layers = list(mat['layers'])[0] 381 | 382 | # copy conv weights 383 | for i in range(3): 384 | weight, bias = mat_layers[i*4]['weights'].item()[0] 385 | self.layers[i][0].weight.data = torch.from_numpy(np.transpose(weight, (3,2,0,1))) 386 | self.layers[i][0].bias.data = torch.from_numpy(bias[:,0]) 387 | 388 | def trainSpatialTransform(self, image, bb): 389 | 390 | return 391 | 392 | 393 | class BinaryLoss(nn.Module): 394 | def __init__(self): 395 | super(BinaryLoss, self).__init__() 396 | 397 | def forward(self, pos_score, neg_score): 398 | pos_loss = -F.log_softmax(pos_score)[:,1] 399 | neg_loss = -F.log_softmax(neg_score)[:,0] 400 | 401 | loss = (pos_loss.sum() + neg_loss.sum())/(pos_loss.size(0) + neg_loss.size(0)) 402 | return loss 403 | 404 | 405 | class Accuracy(): 406 | def __call__(self, pos_score, neg_score): 407 | 408 | pos_correct = (pos_score[:,1] > pos_score[:,0]).sum().float() 409 | neg_correct = (neg_score[:,1] < neg_score[:,0]).sum().float() 410 | 411 | pos_acc = pos_correct / (pos_score.size(0) + 1e-8) 412 | neg_acc = neg_correct / (neg_score.size(0) + 1e-8) 413 | 414 | return pos_acc.item(), neg_acc.item() 415 | 416 | 417 | class Precision(): 418 | def __call__(self, pos_score, neg_score): 419 | 420 | scores = torch.cat((pos_score[:,1], neg_score[:,1]), 0) 421 | topk = torch.topk(scores, pos_score.size(0))[1] 422 | prec = (topk < pos_score.size(0)).float().sum() / (pos_score.size(0)+1e-8) 423 | 424 | return prec.item() 425 | 426 | 427 | 428 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/model.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/model.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/prepro_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | from collections import OrderedDict 5 | 6 | 7 | 8 | # seq_home = '../dataset/' 9 | seqlist_path = '../vot-otb.txt' 10 | output_path = 'data/vot-otb.pkl' 11 | set_type = 'VOT' 12 | seq_home = '/home/ilchae/dataset/tracking/'+set_type +'/' 13 | 14 | if set_type=='OTB': 15 | seqlist_path = '../otb-vot15.txt' 16 | output_path = '../otb-vot15.pkl' 17 | 18 | if set_type == 'VOT': 19 | seqlist_path = '../vot-otb.txt' 20 | output_path = '../vot-otb.pkl' 21 | 22 | with open(seqlist_path,'r') as fp: 23 | seq_list = fp.read().splitlines() 24 | 25 | data = {} 26 | for i,seqname in enumerate(seq_list): 27 | print(seqname) 28 | if set_type=='OTB': 29 | seq_path = seq_home+seqname 30 | img_list = sorted([p for p in os.listdir(seq_path+'/img') if os.path.splitext(p)[1] == '.jpg']) 31 | 32 | if (seqname == 'Jogging') or (seqname == 'Skating2'): 33 | gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt') 34 | elif seqname == 'Human4' : 35 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',') 36 | elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \ 37 | or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \ 38 | or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \ 39 | or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \ 40 | or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \ 41 | or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \ 42 | or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') : 43 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt') 44 | elif (seqname == 'Diving'): 45 | gt = np.loadtxt(seq_path + '/groundtruth_rect_ilchae.txt', delimiter=',') 46 | else: 47 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 48 | 49 | if (seqname == 'David') or (seqname == 'Football1') or (seqname == 'Freeman3') or (seqname == 'Freeman4'): 50 | continue 51 | 52 | if set_type =='VOT': 53 | img_list = sorted([p for p in os.listdir(seq_home + seqname) if os.path.splitext(p)[1] == '.jpg']) 54 | gt = np.loadtxt(seq_home + seqname + '/groundtruth.txt', delimiter=',') 55 | 56 | if set_type == 'IMAGENET': 57 | img_list = [] 58 | gt = [] 59 | 60 | assert len(img_list) == len(gt), "Lengths do not match!!" 61 | 62 | if gt.shape[1]==8: 63 | x_min = np.min(gt[:,[0,2,4,6]],axis=1)[:,None] 64 | y_min = np.min(gt[:,[1,3,5,7]],axis=1)[:,None] 65 | x_max = np.max(gt[:,[0,2,4,6]],axis=1)[:,None] 66 | y_max = np.max(gt[:,[1,3,5,7]],axis=1)[:,None] 67 | gt = np.concatenate((x_min, y_min, x_max-x_min, y_max-y_min),axis=1) 68 | 69 | data[seqname] = {'images':img_list, 'gt':gt} 70 | 71 | with open(output_path, 'wb') as fp: 72 | pickle.dump(data, fp, -1) 73 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/prepro_data_imagenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pickle 4 | from collections import OrderedDict 5 | 6 | import xml.etree.ElementTree 7 | import xmltodict 8 | import numpy as np 9 | 10 | import matplotlib.pyplot as plt 11 | import matplotlib.patches as patches 12 | from PIL import Image 13 | import time 14 | 15 | output_path = './imagenet_refine.pkl' 16 | 17 | 18 | 19 | seq_home = '/home/ilchae/dataset/ILSVRC/' 20 | train_list = [p for p in os.listdir(seq_home + 'Data/VID/train')] 21 | seq_list = [] 22 | for num, cur_dir in enumerate(train_list): 23 | seq_list += [cur_dir + '/' + p for p in os.listdir(seq_home + 'Data/VID/train/' + cur_dir)] 24 | 25 | fig = plt.figure() 26 | ax = fig.add_subplot(1,1,1) 27 | 28 | data = {} 29 | completeNum = 0 30 | for i,seqname in enumerate(seq_list): 31 | print(seqname) 32 | seq_path = seq_home + 'Data/VID/train/' + seqname 33 | gt_path = seq_home +'Annotations/VID/train/' + seqname 34 | img_list = sorted([p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.JPEG']) 35 | 36 | # gt = np.zeros((len(img_list),4)) 37 | enable_gt = [] 38 | enable_img_list = [] 39 | gt_list = sorted([gt_path + '/' + p for p in os.listdir(gt_path) if os.path.splitext(p)[1] == '.xml']) 40 | save_enable = True 41 | for gidx in range(0,len(img_list)): 42 | with open(gt_list[gidx]) as fd: 43 | doc = xmltodict.parse(fd.read()) 44 | try: 45 | try: 46 | object =doc['annotation']['object'][0] 47 | except: 48 | object = doc['annotation']['object'] 49 | except: 50 | ## no object, occlusion and hidden etc. 51 | continue 52 | 53 | if (int(object['trackid']) is not 0): 54 | continue 55 | 56 | xmin = float(object['bndbox']['xmin']) 57 | xmax = float(object['bndbox']['xmax']) 58 | ymin = float(object['bndbox']['ymin']) 59 | ymax = float(object['bndbox']['ymax']) 60 | 61 | ## discard too big object 62 | if ((float(doc['annotation']['size']['width'])/2.) < (xmax-xmin) ) and ((float(doc['annotation']['size']['height'])/2.) < (ymax-ymin) ): 63 | continue 64 | 65 | # gt[gidx,0] = xmin 66 | # gt[gidx,1] = ymin 67 | # gt[gidx,2] = xmax - xmin 68 | # gt[gidx,3] = ymax - ymin 69 | 70 | cur_gt = np.zeros((4)) 71 | cur_gt[0] = xmin 72 | cur_gt[1] = ymin 73 | cur_gt[2] = xmax - xmin 74 | cur_gt[3] = ymax - ymin 75 | enable_gt.append(cur_gt) 76 | 77 | enable_img_list.append(img_list[gidx]) 78 | 79 | if len(enable_img_list) == 0: 80 | save_enable = False 81 | if save_enable: 82 | assert len(enable_img_list) == len(enable_gt), "Lengths do not match!!" 83 | data[seqname] = {'images':enable_img_list, 'gt':np.asarray(enable_gt)} 84 | completeNum += 1 85 | print('Complete!') 86 | 87 | with open(output_path, 'wb') as fp: 88 | pickle.dump(data, fp, -1) 89 | 90 | print('complete {} videos'.format(completeNum)) 91 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/pretrain_options.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | pretrain_opts = OrderedDict() 4 | pretrain_opts['use_gpu'] = True 5 | 6 | pretrain_opts['init_model_path'] = './models/imagenet-vgg-m.mat' 7 | pretrain_opts['model_path'] = './models/CBAM_dfg_rtmdnet_trained_on_50.pth' 8 | 9 | pretrain_opts['batch_frames'] = 8 10 | pretrain_opts['batch_pos'] = 64 11 | pretrain_opts['batch_neg'] = 196 12 | 13 | pretrain_opts['overlap_pos'] = [0.7, 1] 14 | pretrain_opts['overlap_neg'] = [0, 0.5] 15 | 16 | pretrain_opts['img_size'] = 107 17 | 18 | 19 | pretrain_opts['lr'] = 0.0001 20 | pretrain_opts['w_decay'] = 0.0005 21 | pretrain_opts['momentum'] = 0.9 22 | pretrain_opts['grad_clip'] = 10 23 | pretrain_opts['ft_layers'] = ['conv','fc'] 24 | pretrain_opts['lr_mult'] = {'fc':1} 25 | pretrain_opts['n_cycles'] = 1000 26 | 27 | 28 | ##################################### from RCNN ############################################# 29 | pretrain_opts['padding'] = 1.2 30 | pretrain_opts['padding_ratio']=5. 31 | pretrain_opts['padded_img_size'] = pretrain_opts['img_size']*int(pretrain_opts['padding_ratio']) 32 | pretrain_opts['frame_interval'] = 2 33 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/pretrain_options.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/pretrain_options.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/roi_align/build/lib.linux-x86_64-3.7/roi_align_cuda.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/build/lib.linux-x86_64-3.7/roi_align_cuda.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_cuda.o -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_kernel_c.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/build/temp.linux-x86_64-3.7/src/roi_align_kernel_c.o -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/roi_align/functions/roi_align.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function 2 | 3 | from .. import roi_align_cuda 4 | 5 | 6 | class RoIAlignFunction(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 10 | if isinstance(out_size, int): 11 | out_h = out_size 12 | out_w = out_size 13 | elif isinstance(out_size, tuple): 14 | 15 | assert len(out_size) == 2 16 | assert isinstance(out_size[0], int) 17 | assert isinstance(out_size[1], int) 18 | out_h, out_w = out_size 19 | else: 20 | raise TypeError( 21 | '"out_size" must be an integer or tuple of integers') 22 | ctx.spatial_scale = spatial_scale 23 | ctx.sample_num = sample_num 24 | ctx.save_for_backward(rois) 25 | ctx.feature_size = features.size() 26 | 27 | batch_size, num_channels, data_height, data_width = features.size() 28 | num_rois = rois.size(0) 29 | 30 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 31 | if features.is_cuda: 32 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 33 | sample_num, output) 34 | else: 35 | raise NotImplementedError 36 | 37 | return output 38 | 39 | @staticmethod 40 | def backward(ctx, grad_output): 41 | feature_size = ctx.feature_size 42 | spatial_scale = ctx.spatial_scale 43 | sample_num = ctx.sample_num 44 | rois = ctx.saved_tensors[0] 45 | assert (feature_size is not None and grad_output.is_cuda) 46 | 47 | batch_size, num_channels, data_height, data_width = feature_size 48 | out_w = grad_output.size(3) 49 | out_h = grad_output.size(2) 50 | 51 | grad_input = grad_rois = None 52 | if ctx.needs_input_grad[0]: 53 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 54 | data_width) 55 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 56 | out_w, spatial_scale, sample_num, 57 | grad_input) 58 | 59 | return grad_input, grad_rois, None, None, None 60 | class RoIAlignAdaFunction(Function): 61 | 62 | @staticmethod 63 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 64 | if isinstance(out_size, int): 65 | out_h = out_size 66 | out_w = out_size 67 | elif isinstance(out_size, tuple): 68 | assert len(out_size) == 2 69 | assert isinstance(out_size[0], int) 70 | assert isinstance(out_size[1], int) 71 | out_h, out_w = out_size 72 | else: 73 | raise TypeError( 74 | '"out_size" must be an integer or tuple of integers') 75 | ctx.spatial_scale = spatial_scale 76 | ctx.sample_num = sample_num 77 | ctx.save_for_backward(rois) 78 | ctx.feature_size = features.size() 79 | 80 | batch_size, num_channels, data_height, data_width = features.size() 81 | num_rois = rois.size(0) 82 | 83 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 84 | if features.is_cuda: 85 | roi_align_cuda.ada_forward(features, rois, out_h, out_w, spatial_scale, 86 | sample_num, output) 87 | else: 88 | raise NotImplementedError 89 | 90 | return output 91 | 92 | @staticmethod 93 | def backward(ctx, grad_output): 94 | feature_size = ctx.feature_size 95 | spatial_scale = ctx.spatial_scale 96 | sample_num = ctx.sample_num 97 | rois = ctx.saved_tensors[0] 98 | assert (feature_size is not None and grad_output.is_cuda) 99 | 100 | batch_size, num_channels, data_height, data_width = feature_size 101 | out_w = grad_output.size(3) 102 | out_h = grad_output.size(2) 103 | 104 | grad_input = grad_rois = None 105 | if ctx.needs_input_grad[0]: 106 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 107 | data_width) 108 | roi_align_cuda.ada_backward(grad_output.contiguous(), rois, out_h, 109 | out_w, spatial_scale, sample_num, 110 | grad_input) 111 | 112 | return grad_input, grad_rois, None, None, None 113 | 114 | roi_align = RoIAlignFunction.apply 115 | roi_align_ada = RoIAlignAdaFunction.apply 116 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/modules/roi_align/functions/roi_align.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/MFGNet-rgbt-tracking-master/modules/roi_align/functions/roi_align.pyc -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/test_234_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 3 | from os.path import join, isdir 4 | from tracker import * 5 | import numpy as np 6 | import argparse 7 | import pickle 8 | import math 9 | import pdb 10 | import torchvision.transforms as transforms 11 | import random 12 | import warnings 13 | warnings.filterwarnings("ignore") 14 | 15 | 16 | 17 | def genConfig(seq_path, set_type): 18 | 19 | path, seqname = os.path.split(seq_path) 20 | 21 | if set_type == 'OTB100': 22 | img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.png']) 23 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 24 | 25 | ##################################################################### 26 | ##### For the RGBT dataset 27 | ##################################################################### 28 | elif set_type == 'dataset234': 29 | img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg']) 30 | img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg']) 31 | gt = np.loadtxt(seq_path + '/visible.txt', delimiter=',') 32 | 33 | elif set_type == 'dataset210': 34 | img_list_v = sorted([seq_path + '/visible/' + p for p in os.listdir(seq_path + '/visible') if os.path.splitext(p)[1] == '.jpg']) 35 | img_list_i = sorted([seq_path + '/infrared/' + p for p in os.listdir(seq_path + '/infrared') if os.path.splitext(p)[1] == '.jpg']) 36 | gt = np.loadtxt(seq_path + '/init.txt', delimiter=',') 37 | 38 | 39 | return img_list_v, img_list_i, gt 40 | 41 | 42 | 43 | 44 | if __name__ == "__main__": 45 | 46 | parser = argparse.ArgumentParser() 47 | parser.add_argument("-set_type", default = 'dataset234') 48 | parser.add_argument("-model_path", default = './models/test_CBAM_dfg_rtmdnet_trained_on_50.pth') 49 | parser.add_argument("-result_path", default = './result.npy') 50 | parser.add_argument("-visual_log",default=False, action= 'store_true') 51 | parser.add_argument("-visualize",default=False, action='store_true') 52 | parser.add_argument("-adaptive_align",default=True, action='store_false') 53 | parser.add_argument("-padding",default=1.2, type = float) 54 | parser.add_argument("-jitter",default=True, action='store_false') 55 | 56 | args = parser.parse_args() 57 | 58 | ################################################################################## 59 | #########################Just modify opts in this script.######################### 60 | ######################Becuase of synchronization of options####################### 61 | ################################################################################## 62 | ## option setting 63 | opts['model_path']=args.model_path 64 | opts['result_path']=args.result_path 65 | opts['visual_log']=args.visual_log 66 | opts['set_type']=args.set_type 67 | opts['visualize'] = args.visualize 68 | opts['adaptive_align'] = args.adaptive_align 69 | opts['padding'] = args.padding 70 | opts['jitter'] = args.jitter 71 | ################################################################################## 72 | ############################Do not modify opts anymore.########################### 73 | ######################Becuase of synchronization of options####################### 74 | ################################################################################## 75 | print(opts) 76 | 77 | 78 | ## path initialization 79 | dataset_path = '/wangxiao/experiments/' 80 | result_home = '/wangxiao/experiments/trackingResults/' 81 | 82 | seq_home = dataset_path + opts['set_type'] 83 | seq_list = [f for f in os.listdir(seq_home) if isdir(join(seq_home,f))] 84 | seq_list = np.sort(seq_list) 85 | 86 | iou_list=[] 87 | fps_list=dict() 88 | bb_result = dict() 89 | result = dict() 90 | 91 | iou_list_nobb=[] 92 | bb_result_nobb = dict() 93 | for num, seq in enumerate(seq_list): 94 | 95 | if num<-1: 96 | continue 97 | 98 | already_done = os.listdir(result_home) 99 | 100 | if seq+"_rgbt234.txt" in already_done: 101 | print("==>> Skip this video: ", seq) 102 | else: 103 | txtName = seq + '_rgbt234.txt' 104 | fid = open(result_home + txtName, 'w') 105 | 106 | seq_path = seq_home + '/' + seq 107 | img_list_v, img_list_i, gt = genConfig(seq_path, opts['set_type']) 108 | 109 | iou_result, result_bb, fps, result_nobb = run_mdnet(img_list_v, img_list_i, gt[0], gt, seq = seq, display=opts['visualize']) 110 | 111 | enable_frameNum = 0. 112 | for iidx in range(len(iou_result)): 113 | if (math.isnan(iou_result[iidx])==False): 114 | enable_frameNum += 1. 115 | else: 116 | ## gt is not alowed 117 | iou_result[iidx] = 0. 118 | 119 | iou_list.append(iou_result.sum()/enable_frameNum) 120 | bb_result[seq] = result_bb 121 | fps_list[seq]=fps 122 | 123 | bb_result_nobb[seq] = result_nobb 124 | print('{} {} : {} , total mIoU:{}, fps:{}'.format(num,seq,iou_result.mean(), sum(iou_list)/len(iou_list),sum(fps_list.values())/len(fps_list))) 125 | 126 | 127 | for iidex in range(len(result_bb)): 128 | line = result_bb[iidex] 129 | 130 | # pdb.set_trace() 131 | fid.write(str(line[0])) 132 | fid.write(',') 133 | fid.write(str(line[1])) 134 | fid.write(',') 135 | fid.write(str(line[2])) 136 | fid.write(',') 137 | fid.write(str(line[3])) 138 | fid.write('\n') 139 | fid.close() 140 | 141 | 142 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/tracker.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import time 4 | 5 | ## for drawing package 6 | import matplotlib.pyplot as plt 7 | import matplotlib.patches as patches 8 | 9 | import torch.optim as optim 10 | from torch.autograd import Variable 11 | from random import randint 12 | 13 | sys.path.insert(0,'./modules') 14 | from sample_generator import * 15 | from data_prov import * 16 | from model import * 17 | from bbreg import * 18 | from options import * 19 | from img_cropper import * 20 | from roi_align import RoIAlignAvg,RoIAlignMax,RoIAlignAdaMax 21 | 22 | 23 | 24 | # sys.path.insert(0,'./naive_rgbt_TANet_module/') 25 | # from generator import naive_Generator 26 | # from utils import * 27 | import pdb 28 | import warnings 29 | warnings.filterwarnings("ignore") 30 | import torchvision.transforms as transforms 31 | import random 32 | import cv2 33 | from skimage import measure, draw 34 | 35 | 36 | # generator_path = './naive_rgbt_TANet_module/naive_TANet_rgbt_model.pkl' 37 | 38 | # Generator = naive_Generator() 39 | # Generator.load_state_dict(torch.load(generator_path)) 40 | # Generator.cuda() 41 | 42 | 43 | np.random.seed(123) 44 | torch.manual_seed(456) 45 | torch.cuda.manual_seed(789) 46 | 47 | # torch.set_default_tensor_type(torch.cuda.FloatTensor) 48 | 49 | ################################################################################## 50 | ############################Do not modify opts anymore.########################### 51 | ######################Becuase of synchronization of options####################### 52 | ################################################################################## 53 | 54 | def set_optimizer(model, lr_base, lr_mult=opts['lr_mult'], momentum=opts['momentum'], w_decay=opts['w_decay']): 55 | params = model.get_learnable_params() 56 | param_list = [] 57 | for k, p in params.items(): 58 | lr = lr_base 59 | for l, m in lr_mult.items(): 60 | if k.startswith(l): 61 | lr = lr_base * m 62 | param_list.append({'params': [p], 'lr':lr}) 63 | optimizer = optim.SGD(param_list, lr = lr, momentum=momentum, weight_decay=w_decay) 64 | return optimizer 65 | 66 | 67 | def train(model, criterion, optimizer, pos_feats, neg_feats, maxiter, in_layer='fc4'): 68 | model.train() 69 | 70 | batch_pos = opts['batch_pos'] 71 | batch_neg = opts['batch_neg'] 72 | batch_test = opts['batch_test'] 73 | batch_neg_cand = max(opts['batch_neg_cand'], batch_neg) 74 | 75 | pos_idx = np.random.permutation(pos_feats.size(0)) 76 | neg_idx = np.random.permutation(neg_feats.size(0)) 77 | while(len(pos_idx) < batch_pos*maxiter): 78 | pos_idx = np.concatenate([pos_idx, np.random.permutation(pos_feats.size(0))]) 79 | while(len(neg_idx) < batch_neg_cand*maxiter): 80 | neg_idx = np.concatenate([neg_idx, np.random.permutation(neg_feats.size(0))]) 81 | pos_pointer = 0 82 | neg_pointer = 0 83 | 84 | 85 | 86 | for iter in range(maxiter): 87 | 88 | # select pos idx 89 | pos_next = pos_pointer + batch_pos 90 | pos_cur_idx = pos_idx[pos_pointer:pos_next] 91 | pos_cur_idx = pos_feats.new(pos_cur_idx).long() 92 | pos_pointer = pos_next 93 | 94 | # select neg idx 95 | neg_next = neg_pointer + batch_neg_cand 96 | neg_cur_idx = neg_idx[neg_pointer:neg_next] 97 | neg_cur_idx = neg_feats.new(neg_cur_idx).long() 98 | neg_pointer = neg_next 99 | 100 | # create batch 101 | batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx)) 102 | batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx)) 103 | 104 | # hard negative mining 105 | if batch_neg_cand > batch_neg: 106 | model.eval() ## model transfer into evaluation mode 107 | for start in range(0,batch_neg_cand,batch_test): 108 | end = min(start+batch_test,batch_neg_cand) 109 | 110 | if batch_neg_feats[start:end].shape[1] == 9216: 111 | temp_neg_feats = batch_neg_feats[start:end] 112 | else: 113 | temp_neg_feats = torch.cat((batch_neg_feats[start:end], batch_neg_feats[start:end]), dim=1) 114 | 115 | score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer) 116 | if start==0: 117 | neg_cand_score = score.data[:,1].clone() 118 | else: 119 | neg_cand_score = torch.cat((neg_cand_score, score.data[:,1].clone()),0) 120 | 121 | _, top_idx = neg_cand_score.topk(batch_neg) 122 | batch_neg_feats = batch_neg_feats.index_select(0, Variable(top_idx)) 123 | model.train() ## model transfer into train mode 124 | 125 | # forward 126 | if batch_pos_feats.shape[1] == 9216: 127 | temp_pos_feats = batch_pos_feats 128 | else: 129 | temp_pos_feats = torch.cat((batch_pos_feats, batch_pos_feats), dim=1) 130 | 131 | if batch_neg_feats.shape[1] == 9216: 132 | temp_neg_feats = batch_neg_feats 133 | else: 134 | temp_neg_feats = torch.cat((batch_neg_feats, batch_neg_feats), dim=1) 135 | 136 | # pdb.set_trace() 137 | pos_score = model(temp_pos_feats, temp_pos_feats, in_layer=in_layer) 138 | neg_score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer) 139 | 140 | # optimize 141 | loss = criterion(pos_score, neg_score) 142 | model.zero_grad() 143 | loss.backward() 144 | torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip']) 145 | optimizer.step() 146 | 147 | if opts['visual_log']: 148 | print("Iter %d, Loss %.4f" % (iter, loss.data[0])) 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | def run_mdnet(img_list_v, img_list_i, init_bbox, gt=None, seq='seq_name ex)Basketball', savefig_dir='', display=False): 161 | 162 | # Init bbox 163 | target_bbox = np.array(init_bbox) 164 | result = np.zeros((len(img_list_v),4)) 165 | result_bb = np.zeros((len(img_list_v),4)) 166 | result[0] = np.copy(target_bbox) 167 | result_bb[0] = np.copy(target_bbox) 168 | 169 | iou_result = np.zeros((len(img_list_v),1)) 170 | 171 | # execution time array 172 | exec_time_result = np.zeros((len(img_list_v),1)) 173 | 174 | # Init model 175 | model = MDNet(opts['model_path']) 176 | if opts['adaptive_align']: 177 | align_h = model.roi_align_model.aligned_height 178 | align_w = model.roi_align_model.aligned_width 179 | spatial_s = model.roi_align_model.spatial_scale 180 | model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) 181 | if opts['use_gpu']: 182 | model = model.cuda() 183 | 184 | model.set_learnable_params(opts['ft_layers']) 185 | 186 | # Init image crop model 187 | img_crop_model = imgCropper(1.) 188 | if opts['use_gpu']: 189 | img_crop_model.gpuEnable() 190 | 191 | # Init criterion and optimizer 192 | criterion = BinaryLoss() 193 | init_optimizer = set_optimizer(model, opts['lr_init']) 194 | update_optimizer = set_optimizer(model, opts['lr_update']) 195 | 196 | tic = time.time() 197 | # Load first image 198 | cur_image_v = Image.open(img_list_v[0]).convert('RGB') 199 | cur_image_v = np.asarray(cur_image_v) 200 | 201 | cur_image_i = Image.open(img_list_i[0]).convert('RGB') 202 | cur_image_i = np.asarray(cur_image_i) 203 | 204 | 205 | init_targetObject_v = cur_image_v[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :] 206 | init_targetObject_i = cur_image_i[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :] 207 | 208 | 209 | # Draw pos/neg samples 210 | ishape = cur_image_v.shape 211 | pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) 212 | neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 2, 1.1), target_bbox, opts['n_neg_init'], opts['overlap_neg_init']) 213 | neg_examples = np.random.permutation(neg_examples) 214 | 215 | cur_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) 216 | 217 | # compute padded sample 218 | padded_x1 = (neg_examples[:,0]-neg_examples[:,2]*(opts['padding']-1.)/2.).min() 219 | padded_y1 = (neg_examples[:,1]-neg_examples[:,3]*(opts['padding']-1.)/2.).min() 220 | padded_x2 = (neg_examples[:,0]+neg_examples[:,2]*(opts['padding']+1.)/2.).max() 221 | padded_y2 = (neg_examples[:,1]+neg_examples[:,3]*(opts['padding']+1.)/2.).max() 222 | padded_scene_box = np.reshape(np.asarray((padded_x1,padded_y1,padded_x2-padded_x1,padded_y2-padded_y1)),(1,4)) 223 | 224 | scene_boxes = np.reshape(np.copy(padded_scene_box), (1,4)) 225 | if opts['jitter']: 226 | ## horizontal shift 227 | jittered_scene_box_horizon = np.copy(padded_scene_box) 228 | jittered_scene_box_horizon[0,0] -= 4. 229 | jitter_scale_horizon = 1. 230 | 231 | ## vertical shift 232 | jittered_scene_box_vertical = np.copy(padded_scene_box) 233 | jittered_scene_box_vertical[0,1] -= 4. 234 | jitter_scale_vertical = 1. 235 | 236 | jittered_scene_box_reduce1 = np.copy(padded_scene_box) 237 | jitter_scale_reduce1 = 1.1 ** (-1) 238 | 239 | ## vertical shift 240 | jittered_scene_box_enlarge1 = np.copy(padded_scene_box) 241 | jitter_scale_enlarge1 = 1.1 ** (1) 242 | 243 | ## scale reduction 244 | jittered_scene_box_reduce2 = np.copy(padded_scene_box) 245 | jitter_scale_reduce2 = 1.1**(-2) 246 | ## scale enlarge 247 | jittered_scene_box_enlarge2 = np.copy(padded_scene_box) 248 | jitter_scale_enlarge2 = 1.1 ** (2) 249 | 250 | scene_boxes = np.concatenate([scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical,jittered_scene_box_reduce1,jittered_scene_box_enlarge1,jittered_scene_box_reduce2,jittered_scene_box_enlarge2],axis=0) 251 | jitter_scale = [1.,jitter_scale_horizon,jitter_scale_vertical,jitter_scale_reduce1,jitter_scale_enlarge1,jitter_scale_reduce2,jitter_scale_enlarge2] 252 | else: 253 | jitter_scale = [1.] 254 | 255 | model.eval() 256 | for bidx in range(0,scene_boxes.shape[0]): 257 | crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'],opts['img_size'])/target_bbox[2:4])).astype('int64')*jitter_scale[bidx] 258 | cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size) 259 | cropped_image_v = cropped_image_v - 128. 260 | 261 | cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size) 262 | cropped_image_i = cropped_image_i - 128. 263 | 264 | 265 | feat_map_v, feat_map_i, fused_feats = model(cropped_image_v, cropped_image_i, out_layer='conv3') 266 | 267 | rel_target_bbox = np.copy(target_bbox) 268 | rel_target_bbox[0:2] -= scene_boxes[bidx,0:2] 269 | 270 | batch_num = np.zeros((pos_examples.shape[0], 1)) 271 | cur_pos_rois = np.copy(pos_examples) 272 | cur_pos_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_pos_rois.shape[0],axis=0) 273 | scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx] 274 | cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) 275 | cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) 276 | cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() 277 | 278 | # pdb.set_trace() 279 | cur_pos_feats = model.roi_align_model(fused_feats, cur_pos_rois) 280 | cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() 281 | 282 | # cur_pos_feats_i = model.roi_align_model(feat_map_i, cur_pos_rois) 283 | # cur_pos_feats_i = cur_pos_feats_i.view(cur_pos_feats_i.size(0), -1).data.clone() 284 | 285 | 286 | batch_num = np.zeros((neg_examples.shape[0], 1)) 287 | cur_neg_rois = np.copy(neg_examples) 288 | cur_neg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_neg_rois.shape[0],axis=0) 289 | cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding']) 290 | cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) 291 | cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() 292 | 293 | cur_neg_feats = model.roi_align_model(fused_feats, cur_neg_rois) 294 | cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() 295 | 296 | # cur_neg_feats_i = model.roi_align_model(feat_map_i, cur_neg_rois) 297 | # cur_neg_feats_i = cur_neg_feats_i.view(cur_neg_feats_i.size(0), -1).data.clone() 298 | 299 | 300 | ## bbreg rois 301 | batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) 302 | cur_bbreg_rois = np.copy(cur_bbreg_examples) 303 | cur_bbreg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)), cur_bbreg_rois.shape[0],axis=0) 304 | scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx] 305 | cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding']) 306 | cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) 307 | cur_bbreg_rois = Variable(torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda() 308 | 309 | cur_bbreg_feats = model.roi_align_model(fused_feats, cur_bbreg_rois) 310 | cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() 311 | 312 | # cur_bbreg_feats_i = model.roi_align_model(feat_map_i, cur_bbreg_rois) 313 | # cur_bbreg_feats_i = cur_bbreg_feats_i.view(cur_bbreg_feats_i.size(0), -1).data.clone() 314 | 315 | 316 | feat_dim = cur_pos_feats.size(-1) 317 | 318 | if bidx==0: 319 | pos_feats = cur_pos_feats 320 | neg_feats = cur_neg_feats 321 | ##bbreg feature 322 | bbreg_feats = cur_bbreg_feats 323 | bbreg_examples = cur_bbreg_examples 324 | else: 325 | pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) 326 | neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) 327 | ##bbreg feature 328 | bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) 329 | bbreg_examples = np.concatenate((bbreg_examples, cur_bbreg_examples), axis=0) 330 | 331 | if pos_feats.size(0) > opts['n_pos_init']: 332 | pos_idx = np.asarray(range(pos_feats.size(0))) 333 | np.random.shuffle(pos_idx) 334 | pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']],:] 335 | if neg_feats.size(0) > opts['n_neg_init']: 336 | neg_idx = np.asarray(range(neg_feats.size(0))) 337 | np.random.shuffle(neg_idx) 338 | neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :] 339 | 340 | ##bbreg 341 | if bbreg_feats.size(0) > opts['n_bbreg']: 342 | bbreg_idx = np.asarray(range(bbreg_feats.size(0))) 343 | np.random.shuffle(bbreg_idx) 344 | bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] 345 | bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']],:] 346 | #print bbreg_examples.shape 347 | 348 | 349 | # init_target_feats = pos_feats[:400] 350 | 351 | 352 | ## open images and crop patch from obj 353 | extra_obj_size = np.array((opts['img_size'],opts['img_size'])) 354 | extra_crop_img_size = extra_obj_size * (opts['padding']+0.6) 355 | replicateNum = 100 356 | for iidx in range(replicateNum): 357 | extra_target_bbox = np.copy(target_bbox) 358 | 359 | extra_scene_box = np.copy(extra_target_bbox) 360 | extra_scene_box_center = extra_scene_box[0:2] + extra_scene_box[2:4] / 2. 361 | extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6) 362 | extra_scene_box[0:2] = extra_scene_box_center - extra_scene_box_size / 2. 363 | extra_scene_box[2:4] = extra_scene_box_size 364 | 365 | extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4) 366 | cur_extra_scale = 1.1 ** np.clip(np.random.randn(1), -2, 2) 367 | 368 | 369 | extra_scene_box[0] += extra_shift_offset[0] 370 | extra_scene_box[1] += extra_shift_offset[1] 371 | extra_scene_box[2:4] *= cur_extra_scale[0] 372 | 373 | scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0] 374 | 375 | cur_extra_cropped_image_v, _ = img_crop_model.crop_image(cur_image_v, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size) 376 | cur_extra_cropped_image_v = cur_extra_cropped_image_v.detach() 377 | 378 | cur_extra_cropped_image_i, _ = img_crop_model.crop_image(cur_image_i, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size) 379 | cur_extra_cropped_image_i = cur_extra_cropped_image_i.detach() 380 | 381 | # extra_target_bbox = np.array(list(map(int, extra_target_bbox))) 382 | cur_extra_pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),extra_target_bbox, opts['n_pos_init']//replicateNum, opts['overlap_pos_init']) 383 | cur_extra_neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2, 1.1),extra_target_bbox, opts['n_neg_init']/replicateNum//4, opts['overlap_neg_init']) 384 | 385 | ##bbreg sample 386 | cur_extra_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1),extra_target_bbox, opts['n_bbreg']/replicateNum//4, opts['overlap_bbreg'], opts['scale_bbreg']) 387 | 388 | batch_num = iidx*np.ones((cur_extra_pos_examples.shape[0], 1)) 389 | cur_extra_pos_rois = np.copy(cur_extra_pos_examples) 390 | cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)), 391 | cur_extra_pos_rois.shape[0], axis=0) 392 | cur_extra_pos_rois = samples2maskroi(cur_extra_pos_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) 393 | cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1) 394 | 395 | batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) 396 | cur_extra_neg_rois = np.copy(cur_extra_neg_examples) 397 | cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)),cur_extra_neg_rois.shape[0], axis=0) 398 | cur_extra_neg_rois = samples2maskroi(cur_extra_neg_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) 399 | cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1) 400 | 401 | ## bbreg rois 402 | batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) 403 | cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) 404 | cur_extra_bbreg_rois[:,0:2] -= np.repeat(np.reshape(extra_scene_box[0:2],(1,2)),cur_extra_bbreg_rois.shape[0],axis=0) 405 | cur_extra_bbreg_rois = samples2maskroi(cur_extra_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), extra_target_bbox[2:4], opts['padding']) 406 | cur_extra_bbreg_rois = np.concatenate((batch_num, cur_extra_bbreg_rois), axis=1) 407 | 408 | 409 | 410 | if iidx==0: 411 | extra_cropped_image_v = cur_extra_cropped_image_v 412 | extra_cropped_image_i = cur_extra_cropped_image_i 413 | 414 | extra_pos_rois = np.copy(cur_extra_pos_rois) 415 | extra_neg_rois = np.copy(cur_extra_neg_rois) 416 | ##bbreg rois 417 | extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) 418 | extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) 419 | else: 420 | extra_cropped_image_v = torch.cat((extra_cropped_image_v, cur_extra_cropped_image_v),dim=0) 421 | extra_cropped_image_i = torch.cat((extra_cropped_image_i, cur_extra_cropped_image_i),dim=0) 422 | 423 | extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) 424 | extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) 425 | ##bbreg rois 426 | extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0 ) 427 | extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0 ) 428 | 429 | 430 | extra_pos_rois = Variable(torch.from_numpy(extra_pos_rois.astype('float32'))).cuda() 431 | extra_neg_rois = Variable(torch.from_numpy(extra_neg_rois.astype('float32'))).cuda() 432 | ##bbreg rois 433 | extra_bbreg_rois = Variable(torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda() 434 | 435 | extra_cropped_image_v -= 128. 436 | extra_cropped_image_i -= 128. 437 | 438 | # pdb.set_trace() 439 | 440 | for iidxxx in range(replicateNum): 441 | temp_extra_cropped_image_v = torch.unsqueeze(extra_cropped_image_v[iidxxx], dim=0) 442 | temp_extra_cropped_image_i = torch.unsqueeze(extra_cropped_image_i[iidxxx], dim=0) 443 | temp_extra_feat_maps_v, temp_extra_feat_maps_i, temp_extra_feat_maps = model(temp_extra_cropped_image_v, temp_extra_cropped_image_i, out_layer='conv3') 444 | temp_extra_feat_maps = torch.squeeze(temp_extra_feat_maps, dim=0) 445 | # temp_extra_feat_maps_i = torch.squeeze(temp_extra_feat_maps_i, dim=0) 446 | 447 | if iidxxx == 0: 448 | extra_feat_maps = torch.zeros(replicateNum, temp_extra_feat_maps.shape[0], temp_extra_feat_maps.shape[1], temp_extra_feat_maps.shape[2]) 449 | # extra_feat_maps_i = torch.zeros(replicateNum, temp_extra_feat_maps_i.shape[0], temp_extra_feat_maps_i.shape[1], temp_extra_feat_maps_i.shape[2]) 450 | 451 | extra_feat_maps[iidxxx] = temp_extra_feat_maps 452 | # extra_feat_maps_i[iidxxx] = temp_extra_feat_maps_i 453 | 454 | extra_feat_maps = extra_feat_maps.cuda() 455 | 456 | 457 | # Draw pos/neg samples 458 | ishape = cur_image_v.shape 459 | 460 | # pdb.set_trace() 461 | extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois) 462 | extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() 463 | 464 | 465 | extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois) 466 | extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() 467 | 468 | ##bbreg feat 469 | extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois) 470 | extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() 471 | 472 | ## concatenate extra features to original_features 473 | pos_feats = torch.cat((pos_feats, extra_pos_feats),dim=0) 474 | neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) 475 | ## concatenate extra bbreg feats to original_bbreg_feats 476 | bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) 477 | bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) 478 | 479 | torch.cuda.empty_cache() 480 | model.zero_grad() 481 | 482 | # Initial training 483 | train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) 484 | 485 | ##bbreg train 486 | if bbreg_feats.size(0) > opts['n_bbreg']: 487 | bbreg_idx = np.asarray(range(bbreg_feats.size(0))) 488 | np.random.shuffle(bbreg_idx) 489 | bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] 490 | bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] 491 | 492 | bbreg = BBRegressor((ishape[1], ishape[0])) 493 | bbreg.train(bbreg_feats, bbreg_examples, target_bbox) 494 | 495 | 496 | if pos_feats.size(0) > opts['n_pos_update']: 497 | pos_idx = np.asarray(range(pos_feats.size(0))) 498 | np.random.shuffle(pos_idx) 499 | pos_feats_all = [pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())] 500 | if neg_feats.size(0) > opts['n_neg_update']: 501 | neg_idx = np.asarray(range(neg_feats.size(0))) 502 | np.random.shuffle(neg_idx) 503 | neg_feats_all = [neg_feats.index_select(0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())] 504 | 505 | 506 | spf_total = time.time()-tic 507 | 508 | # Display 509 | savefig = savefig_dir != '' 510 | if display or savefig: 511 | dpi = 80.0 512 | figsize = (cur_image_v.shape[1]/dpi, cur_image_v.shape[0]/dpi) 513 | 514 | fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) 515 | ax = plt.Axes(fig, [0., 0., 1., 1.]) 516 | ax.set_axis_off() 517 | fig.add_axes(ax) 518 | im = ax.imshow(cur_image_v) 519 | 520 | if gt is not None: 521 | gt_rect = plt.Rectangle(tuple(gt[0,:2]),gt[0,2],gt[0,3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) 522 | ax.add_patch(gt_rect) 523 | 524 | rect = plt.Rectangle(tuple(result_bb[0,:2]),result_bb[0,2],result_bb[0,3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) 525 | ax.add_patch(rect) 526 | 527 | if display: 528 | plt.pause(.01) 529 | plt.draw() 530 | if savefig: 531 | fig.savefig(os.path.join(savefig_dir,'0000.jpg'),dpi=dpi) 532 | 533 | 534 | ##################################################################### 535 | #### Main loop 536 | ##################################################################### 537 | failure_count = 0 538 | trans_f = opts['trans_f'] 539 | 540 | for i in range(1, len(img_list_v)): 541 | 542 | tic = time.time() 543 | # Load image 544 | cur_image_v = Image.open(img_list_v[i]).convert('RGB') 545 | cur_image_v = np.asarray(cur_image_v) 546 | cur_image_i = Image.open(img_list_i[i]).convert('RGB') 547 | cur_image_i = np.asarray(cur_image_i) 548 | 549 | # Estimate target bbox 550 | ishape = cur_image_v.shape 551 | samples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), target_bbox, opts['n_samples']) 552 | 553 | 554 | 555 | ######################################################################### 556 | #### Target-Aware Attention Prediction 557 | ######################################################################### 558 | 559 | attention_path = "/daTANet_rgbt_234_Attention/" + seq + "/" 560 | attentionImage_name = str(i+1) + "_attentionMap.jpg" 561 | 562 | # pdb.set_trace() 563 | attentionFlag = os.path.exists(attention_path + attentionImage_name) 564 | # print("==>> attentionFlag ", attentionFlag) 565 | 566 | if failure_count >= 6 and attentionFlag: 567 | 568 | attentionMap = Image.open(attention_path+attentionImage_name).convert('RGB') 569 | attentionMap = np.asarray(attentionMap) 570 | # pdb.set_trace() 571 | 572 | dynamic_atttentonMAP = cv2.resize(attentionMap, (cur_image_v.shape[1], cur_image_v.shape[0]), interpolation=cv2.INTER_LINEAR) 573 | ret, static_atttentonMAP = cv2.threshold(dynamic_atttentonMAP, 100, 255, cv2.THRESH_BINARY) 574 | # cv2.imwrite('static_atttentonMAP.png', static_atttentonMAP) 575 | 576 | # pdb.set_trace() 577 | 578 | label_image = measure.label(static_atttentonMAP) 579 | props = measure.regionprops(label_image) 580 | 581 | atttenton_BBox = [] 582 | attention_centerLoc = [] 583 | similarity_glob_target_max = 0 584 | global_samples = [] 585 | 586 | #### for each candidate search region 587 | # for iii in range(len(props)): 588 | 589 | if len(props) > 1: 590 | attNum = 1 591 | else: 592 | attNum = len(props) 593 | 594 | for iii in range(attNum): 595 | center_position = props[iii].centroid 596 | center_position = [int(center_position[1]), int(center_position[0])] 597 | 598 | centerPos_prev_x = target_bbox[0] + target_bbox[2] / 2 599 | centerPos_prev_y = target_bbox[1] + target_bbox[3] / 2 600 | 601 | if math.fabs(center_position[0] - centerPos_prev_x) < 30 and math.fabs(center_position[1] - centerPos_prev_y) < 30: 602 | 603 | bbox = props[iii].bbox 604 | 605 | new_bbox2 = np.zeros((4)) 606 | new_bbox2[0] = center_position[0] - target_bbox[2]/2 607 | new_bbox2[1] = center_position[1] - target_bbox[3]/2 608 | new_bbox2[2] = target_bbox[2] 609 | new_bbox2[3] = target_bbox[3] 610 | 611 | 612 | # if new_bbox[2] > 10 and new_bbox[3] > 10: 613 | # switch_candidate_samples2 = sample_generator(new_bbox2, 100) 614 | switch_samples2 = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), new_bbox2, 256) 615 | # global_samples.append(switch_samples2) 616 | # pdb.set_trace() 617 | # samples = np.concatenate((switch_samples2, samples)) 618 | samples = switch_samples2 619 | 620 | # print("==>> Using Global Proposals and samples: ", samples.shape[0]) 621 | # samples = np.concatenate((switch_samples2, samples)) 622 | 623 | 624 | padded_x1 = (samples[:, 0] - samples[:, 2]*(opts['padding']-1.)/2.).min() 625 | padded_y1 = (samples[:, 1] - samples[:, 3]*(opts['padding']-1.)/2.).min() 626 | padded_x2 = (samples[:, 0] + samples[:, 2]*(opts['padding']+1.)/2.).max() 627 | padded_y2 = (samples[:, 1] + samples[:, 3]*(opts['padding']+1.)/2.).max() 628 | padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) 629 | 630 | if padded_scene_box[0] > cur_image_v.shape[1]: 631 | padded_scene_box[0] = cur_image_v.shape[1]-1 632 | if padded_scene_box[1] > cur_image_v.shape[0]: 633 | padded_scene_box[1] = cur_image_v.shape[0]-1 634 | if padded_scene_box[0] + padded_scene_box[2] < 0: 635 | padded_scene_box[2] = -padded_scene_box[0]+1 636 | if padded_scene_box[1] + padded_scene_box[3] < 0: 637 | padded_scene_box[3] = -padded_scene_box[1]+1 638 | 639 | 640 | crop_img_size = (padded_scene_box[2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') 641 | cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(padded_scene_box,(1,4)), crop_img_size) 642 | cropped_image_v = cropped_image_v - 128. 643 | cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(padded_scene_box,(1,4)), crop_img_size) 644 | cropped_image_i = cropped_image_i - 128. 645 | 646 | model.eval() 647 | feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3') 648 | 649 | # relative target bbox with padded_scene_box 650 | rel_target_bbox = np.copy(target_bbox) 651 | rel_target_bbox[0:2] -= padded_scene_box[0:2] 652 | 653 | 654 | # Extract sample features and get target location 655 | batch_num = np.zeros((samples.shape[0], 1)) 656 | sample_rois = np.copy(samples) 657 | sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0) 658 | sample_rois = samples2maskroi(sample_rois, model.receptive_field, (opts['img_size'],opts['img_size']), target_bbox[2:4],opts['padding']) 659 | sample_rois = np.concatenate((batch_num, sample_rois), axis=1) 660 | sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))).cuda() 661 | 662 | sample_feats = model.roi_align_model(feat_map, sample_rois) 663 | sample_feats = sample_feats.view(sample_feats.size(0), -1).clone() 664 | 665 | sample_scores = model(sample_feats, sample_feats, in_layer='fc4') 666 | top_scores, top_idx = sample_scores[:, 1].topk(5) 667 | top_idx = top_idx.data.cpu().numpy() 668 | target_score = top_scores.data.mean() 669 | target_bbox = samples[top_idx].mean(axis=0) 670 | 671 | success = target_score > opts['success_thr'] 672 | 673 | # # Expand search area at failure 674 | if success: 675 | trans_f = opts['trans_f'] 676 | else: 677 | trans_f = opts['trans_f_expand'] 678 | 679 | ## Bbox regression 680 | if success: 681 | bbreg_feats = sample_feats[top_idx,:] 682 | bbreg_samples = samples[top_idx] 683 | bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples) 684 | bbreg_bbox = bbreg_samples.mean(axis=0) 685 | 686 | if failure_count >= 3: 687 | failure_count = failure_count - 3 688 | else: 689 | failure_count = 0 690 | else: 691 | bbreg_bbox = target_bbox 692 | failure_count = failure_count + 1 693 | 694 | # Save result 695 | result[i] = target_bbox 696 | result_bb[i] = bbreg_bbox 697 | iou_result[i] = 1. 698 | 699 | # Data collect 700 | if success: 701 | 702 | # Draw pos/neg samples 703 | pos_examples = gen_samples( 704 | SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox, 705 | opts['n_pos_update'], 706 | opts['overlap_pos_update']) 707 | neg_examples = gen_samples( 708 | SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2), target_bbox, 709 | opts['n_neg_update'], 710 | opts['overlap_neg_update']) 711 | 712 | padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() 713 | padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() 714 | padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() 715 | padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() 716 | padded_scene_box = np.reshape(np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)),(1,4)) 717 | 718 | scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) 719 | jitter_scale = [1.] 720 | 721 | for bidx in range(0, scene_boxes.shape[0]): 722 | crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') * jitter_scale[bidx] 723 | cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size) 724 | cropped_image_v = cropped_image_v - 128. 725 | cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size) 726 | cropped_image_i = cropped_image_i - 128. 727 | 728 | feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3') 729 | 730 | rel_target_bbox = np.copy(target_bbox) 731 | rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] 732 | 733 | batch_num = np.zeros((pos_examples.shape[0], 1)) 734 | cur_pos_rois = np.copy(pos_examples) 735 | cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0],axis=0) 736 | scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] 737 | cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size),target_bbox[2:4], opts['padding']) 738 | cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) 739 | cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() 740 | 741 | cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) 742 | cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() 743 | 744 | batch_num = np.zeros((neg_examples.shape[0], 1)) 745 | cur_neg_rois = np.copy(neg_examples) 746 | cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) 747 | cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) 748 | cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) 749 | cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() 750 | 751 | cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) 752 | cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() 753 | 754 | 755 | feat_dim = cur_pos_feats.size(-1) 756 | 757 | if bidx == 0: 758 | pos_feats = cur_pos_feats ##index select 759 | neg_feats = cur_neg_feats 760 | else: 761 | pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) 762 | neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) 763 | 764 | if pos_feats.size(0) > opts['n_pos_update']: 765 | pos_idx = np.asarray(range(pos_feats.size(0))) 766 | np.random.shuffle(pos_idx) 767 | pos_feats = pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) 768 | if neg_feats.size(0) > opts['n_neg_update']: 769 | neg_idx = np.asarray(range(neg_feats.size(0))) 770 | np.random.shuffle(neg_idx) 771 | neg_feats = neg_feats.index_select(0,torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) 772 | 773 | pos_feats_all.append(pos_feats) 774 | neg_feats_all.append(neg_feats) 775 | 776 | if len(pos_feats_all) > opts['n_frames_long']: 777 | del pos_feats_all[0] 778 | if len(neg_feats_all) > opts['n_frames_short']: 779 | del neg_feats_all[0] 780 | 781 | # Short term update 782 | if not success: 783 | nframes = min(opts['n_frames_short'],len(pos_feats_all)) 784 | pos_data = torch.stack(pos_feats_all[-nframes:],0).view(-1,feat_dim) 785 | neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim) 786 | train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) 787 | 788 | # Long term update 789 | elif i % opts['long_interval'] == 0: 790 | pos_data = torch.stack(pos_feats_all,0).view(-1,feat_dim) 791 | neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim) 792 | train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) 793 | 794 | spf = time.time()-tic 795 | spf_total += spf 796 | 797 | # Display 798 | if display or savefig: 799 | im.set_data(cur_image_v) 800 | 801 | if gt is not None: 802 | gt_rect.set_xy(gt[i,:2]) 803 | gt_rect.set_width(gt[i,2]) 804 | gt_rect.set_height(gt[i,3]) 805 | 806 | rect.set_xy(result_bb[i,:2]) 807 | rect.set_width(result_bb[i,2]) 808 | rect.set_height(result_bb[i,3]) 809 | 810 | if display: 811 | plt.pause(.01) 812 | plt.draw() 813 | if savefig: 814 | fig.savefig(os.path.join(savefig_dir,'%04d.jpg'%(i)),dpi=dpi) 815 | 816 | if opts['visual_log']: 817 | if gt is None: 818 | print("Frame %d/%d, Score %.3f, Time %.3f" % \ 819 | (i, len(img_list), target_score, spf)) 820 | else: 821 | print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ 822 | (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf)) 823 | 824 | print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ 825 | (i, len(img_list_v), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf)) 826 | 827 | iou_result[i]= overlap_ratio(gt[i],result_bb[i])[0] 828 | 829 | 830 | fps = len(img_list_v) / spf_total 831 | 832 | # pdb.set_trace() 833 | # print("==>> epochID %d, L1-Loss %.4f, Time %.3f" % (epochID, total_l1_Loss/len(img_list_v), spf_total)) 834 | 835 | 836 | return iou_result, result_bb, fps, result 837 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/tracker_backup.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import sys 3 | import time 4 | 5 | ## for drawing package 6 | import matplotlib.pyplot as plt 7 | import matplotlib.patches as patches 8 | 9 | import torch.optim as optim 10 | from torch.autograd import Variable 11 | from random import randint 12 | 13 | sys.path.insert(0,'./modules') 14 | from sample_generator import * 15 | from data_prov import * 16 | from model import * 17 | from bbreg import * 18 | from options import * 19 | from img_cropper import * 20 | from roi_align import RoIAlignAvg,RoIAlignMax,RoIAlignAdaMax 21 | 22 | 23 | 24 | # sys.path.insert(0,'./naive_rgbt_TANet_module/') 25 | # from generator import naive_Generator 26 | # from utils import * 27 | import pdb 28 | import warnings 29 | warnings.filterwarnings("ignore") 30 | import torchvision.transforms as transforms 31 | import random 32 | import cv2 33 | from skimage import measure, draw 34 | 35 | 36 | # generator_path = './naive_rgbt_TANet_module/naive_TANet_rgbt_model.pkl' 37 | 38 | # Generator = naive_Generator() 39 | # Generator.load_state_dict(torch.load(generator_path)) 40 | # Generator.cuda() 41 | 42 | 43 | np.random.seed(123) 44 | torch.manual_seed(456) 45 | torch.cuda.manual_seed(789) 46 | 47 | # torch.set_default_tensor_type(torch.cuda.FloatTensor) 48 | 49 | ################################################################################## 50 | ############################Do not modify opts anymore.########################### 51 | ######################Becuase of synchronization of options####################### 52 | ################################################################################## 53 | 54 | def set_optimizer(model, lr_base, lr_mult=opts['lr_mult'], momentum=opts['momentum'], w_decay=opts['w_decay']): 55 | params = model.get_learnable_params() 56 | param_list = [] 57 | for k, p in params.items(): 58 | lr = lr_base 59 | for l, m in lr_mult.items(): 60 | if k.startswith(l): 61 | lr = lr_base * m 62 | param_list.append({'params': [p], 'lr':lr}) 63 | optimizer = optim.SGD(param_list, lr = lr, momentum=momentum, weight_decay=w_decay) 64 | return optimizer 65 | 66 | 67 | def train(model, criterion, optimizer, pos_feats, neg_feats, maxiter, in_layer='fc4'): 68 | model.train() 69 | 70 | batch_pos = opts['batch_pos'] 71 | batch_neg = opts['batch_neg'] 72 | batch_test = opts['batch_test'] 73 | batch_neg_cand = max(opts['batch_neg_cand'], batch_neg) 74 | 75 | pos_idx = np.random.permutation(pos_feats.size(0)) 76 | neg_idx = np.random.permutation(neg_feats.size(0)) 77 | while(len(pos_idx) < batch_pos*maxiter): 78 | pos_idx = np.concatenate([pos_idx, np.random.permutation(pos_feats.size(0))]) 79 | while(len(neg_idx) < batch_neg_cand*maxiter): 80 | neg_idx = np.concatenate([neg_idx, np.random.permutation(neg_feats.size(0))]) 81 | pos_pointer = 0 82 | neg_pointer = 0 83 | 84 | 85 | 86 | for iter in range(maxiter): 87 | 88 | # select pos idx 89 | pos_next = pos_pointer + batch_pos 90 | pos_cur_idx = pos_idx[pos_pointer:pos_next] 91 | pos_cur_idx = pos_feats.new(pos_cur_idx).long() 92 | pos_pointer = pos_next 93 | 94 | # select neg idx 95 | neg_next = neg_pointer + batch_neg_cand 96 | neg_cur_idx = neg_idx[neg_pointer:neg_next] 97 | neg_cur_idx = neg_feats.new(neg_cur_idx).long() 98 | neg_pointer = neg_next 99 | 100 | # create batch 101 | batch_pos_feats = Variable(pos_feats.index_select(0, pos_cur_idx)) 102 | batch_neg_feats = Variable(neg_feats.index_select(0, neg_cur_idx)) 103 | 104 | # hard negative mining 105 | if batch_neg_cand > batch_neg: 106 | model.eval() ## model transfer into evaluation mode 107 | for start in range(0,batch_neg_cand,batch_test): 108 | end = min(start+batch_test,batch_neg_cand) 109 | 110 | if batch_neg_feats[start:end].shape[1] == 9216: 111 | temp_neg_feats = batch_neg_feats[start:end] 112 | else: 113 | temp_neg_feats = torch.cat((batch_neg_feats[start:end], batch_neg_feats[start:end]), dim=1) 114 | 115 | score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer) 116 | if start==0: 117 | neg_cand_score = score.data[:,1].clone() 118 | else: 119 | neg_cand_score = torch.cat((neg_cand_score, score.data[:,1].clone()),0) 120 | 121 | _, top_idx = neg_cand_score.topk(batch_neg) 122 | batch_neg_feats = batch_neg_feats.index_select(0, Variable(top_idx)) 123 | model.train() ## model transfer into train mode 124 | 125 | # forward 126 | if batch_pos_feats.shape[1] == 9216: 127 | temp_pos_feats = batch_pos_feats 128 | else: 129 | temp_pos_feats = torch.cat((batch_pos_feats, batch_pos_feats), dim=1) 130 | 131 | if batch_neg_feats.shape[1] == 9216: 132 | temp_neg_feats = batch_neg_feats 133 | else: 134 | temp_neg_feats = torch.cat((batch_neg_feats, batch_neg_feats), dim=1) 135 | 136 | # pdb.set_trace() 137 | pos_score = model(temp_pos_feats, temp_pos_feats, in_layer=in_layer) 138 | neg_score = model(temp_neg_feats, temp_neg_feats, in_layer=in_layer) 139 | 140 | # optimize 141 | loss = criterion(pos_score, neg_score) 142 | model.zero_grad() 143 | loss.backward() 144 | torch.nn.utils.clip_grad_norm(model.parameters(), opts['grad_clip']) 145 | optimizer.step() 146 | 147 | if opts['visual_log']: 148 | print("Iter %d, Loss %.4f" % (iter, loss.data[0])) 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | def run_mdnet(img_list_v, img_list_i, init_bbox, gt=None, seq='seq_name ex)Basketball', savefig_dir='', display=False): 161 | 162 | # Init bbox 163 | target_bbox = np.array(init_bbox) 164 | result = np.zeros((len(img_list_v),4)) 165 | result_bb = np.zeros((len(img_list_v),4)) 166 | result[0] = np.copy(target_bbox) 167 | result_bb[0] = np.copy(target_bbox) 168 | 169 | iou_result = np.zeros((len(img_list_v),1)) 170 | 171 | # execution time array 172 | exec_time_result = np.zeros((len(img_list_v),1)) 173 | 174 | # Init model 175 | model = MDNet(opts['model_path']) 176 | if opts['adaptive_align']: 177 | align_h = model.roi_align_model.aligned_height 178 | align_w = model.roi_align_model.aligned_width 179 | spatial_s = model.roi_align_model.spatial_scale 180 | model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) 181 | if opts['use_gpu']: 182 | model = model.cuda() 183 | 184 | model.set_learnable_params(opts['ft_layers']) 185 | 186 | # Init image crop model 187 | img_crop_model = imgCropper(1.) 188 | if opts['use_gpu']: 189 | img_crop_model.gpuEnable() 190 | 191 | # Init criterion and optimizer 192 | criterion = BinaryLoss() 193 | init_optimizer = set_optimizer(model, opts['lr_init']) 194 | update_optimizer = set_optimizer(model, opts['lr_update']) 195 | 196 | tic = time.time() 197 | # Load first image 198 | cur_image_v = Image.open(img_list_v[0]).convert('RGB') 199 | cur_image_v = np.asarray(cur_image_v) 200 | 201 | cur_image_i = Image.open(img_list_i[0]).convert('RGB') 202 | cur_image_i = np.asarray(cur_image_i) 203 | 204 | 205 | init_targetObject_v = cur_image_v[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :] 206 | init_targetObject_i = cur_image_i[int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), :] 207 | 208 | 209 | # Draw pos/neg samples 210 | ishape = cur_image_v.shape 211 | pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1],ishape[0]), 0.1, 1.2), target_bbox, opts['n_pos_init'], opts['overlap_pos_init']) 212 | neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 1, 2, 1.1), target_bbox, opts['n_neg_init'], opts['overlap_neg_init']) 213 | neg_examples = np.random.permutation(neg_examples) 214 | 215 | cur_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1],ishape[0]), 0.3, 1.5, 1.1), target_bbox, opts['n_bbreg'], opts['overlap_bbreg'], opts['scale_bbreg']) 216 | 217 | # compute padded sample 218 | padded_x1 = (neg_examples[:,0]-neg_examples[:,2]*(opts['padding']-1.)/2.).min() 219 | padded_y1 = (neg_examples[:,1]-neg_examples[:,3]*(opts['padding']-1.)/2.).min() 220 | padded_x2 = (neg_examples[:,0]+neg_examples[:,2]*(opts['padding']+1.)/2.).max() 221 | padded_y2 = (neg_examples[:,1]+neg_examples[:,3]*(opts['padding']+1.)/2.).max() 222 | padded_scene_box = np.reshape(np.asarray((padded_x1,padded_y1,padded_x2-padded_x1,padded_y2-padded_y1)),(1,4)) 223 | 224 | scene_boxes = np.reshape(np.copy(padded_scene_box), (1,4)) 225 | if opts['jitter']: 226 | ## horizontal shift 227 | jittered_scene_box_horizon = np.copy(padded_scene_box) 228 | jittered_scene_box_horizon[0,0] -= 4. 229 | jitter_scale_horizon = 1. 230 | 231 | ## vertical shift 232 | jittered_scene_box_vertical = np.copy(padded_scene_box) 233 | jittered_scene_box_vertical[0,1] -= 4. 234 | jitter_scale_vertical = 1. 235 | 236 | jittered_scene_box_reduce1 = np.copy(padded_scene_box) 237 | jitter_scale_reduce1 = 1.1 ** (-1) 238 | 239 | ## vertical shift 240 | jittered_scene_box_enlarge1 = np.copy(padded_scene_box) 241 | jitter_scale_enlarge1 = 1.1 ** (1) 242 | 243 | ## scale reduction 244 | jittered_scene_box_reduce2 = np.copy(padded_scene_box) 245 | jitter_scale_reduce2 = 1.1**(-2) 246 | ## scale enlarge 247 | jittered_scene_box_enlarge2 = np.copy(padded_scene_box) 248 | jitter_scale_enlarge2 = 1.1 ** (2) 249 | 250 | scene_boxes = np.concatenate([scene_boxes, jittered_scene_box_horizon, jittered_scene_box_vertical,jittered_scene_box_reduce1,jittered_scene_box_enlarge1,jittered_scene_box_reduce2,jittered_scene_box_enlarge2],axis=0) 251 | jitter_scale = [1.,jitter_scale_horizon,jitter_scale_vertical,jitter_scale_reduce1,jitter_scale_enlarge1,jitter_scale_reduce2,jitter_scale_enlarge2] 252 | else: 253 | jitter_scale = [1.] 254 | 255 | model.eval() 256 | for bidx in range(0,scene_boxes.shape[0]): 257 | crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'],opts['img_size'])/target_bbox[2:4])).astype('int64')*jitter_scale[bidx] 258 | cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size) 259 | cropped_image_v = cropped_image_v - 128. 260 | 261 | cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx],(1,4)), crop_img_size) 262 | cropped_image_i = cropped_image_i - 128. 263 | 264 | 265 | feat_map_v, feat_map_i, fused_feats = model(cropped_image_v, cropped_image_i, out_layer='conv3') 266 | 267 | rel_target_bbox = np.copy(target_bbox) 268 | rel_target_bbox[0:2] -= scene_boxes[bidx,0:2] 269 | 270 | batch_num = np.zeros((pos_examples.shape[0], 1)) 271 | cur_pos_rois = np.copy(pos_examples) 272 | cur_pos_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_pos_rois.shape[0],axis=0) 273 | scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx] 274 | cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) 275 | cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) 276 | cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() 277 | 278 | # pdb.set_trace() 279 | cur_pos_feats = model.roi_align_model(fused_feats, cur_pos_rois) 280 | cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() 281 | 282 | # cur_pos_feats_i = model.roi_align_model(feat_map_i, cur_pos_rois) 283 | # cur_pos_feats_i = cur_pos_feats_i.view(cur_pos_feats_i.size(0), -1).data.clone() 284 | 285 | 286 | batch_num = np.zeros((neg_examples.shape[0], 1)) 287 | cur_neg_rois = np.copy(neg_examples) 288 | cur_neg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)),cur_neg_rois.shape[0],axis=0) 289 | cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding']) 290 | cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) 291 | cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() 292 | 293 | cur_neg_feats = model.roi_align_model(fused_feats, cur_neg_rois) 294 | cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() 295 | 296 | # cur_neg_feats_i = model.roi_align_model(feat_map_i, cur_neg_rois) 297 | # cur_neg_feats_i = cur_neg_feats_i.view(cur_neg_feats_i.size(0), -1).data.clone() 298 | 299 | 300 | ## bbreg rois 301 | batch_num = np.zeros((cur_bbreg_examples.shape[0], 1)) 302 | cur_bbreg_rois = np.copy(cur_bbreg_examples) 303 | cur_bbreg_rois[:,0:2] -= np.repeat(np.reshape(scene_boxes[bidx,0:2],(1,2)), cur_bbreg_rois.shape[0],axis=0) 304 | scaled_obj_size = float(opts['img_size'])*jitter_scale[bidx] 305 | cur_bbreg_rois = samples2maskroi(cur_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), target_bbox[2:4], opts['padding']) 306 | cur_bbreg_rois = np.concatenate((batch_num, cur_bbreg_rois), axis=1) 307 | cur_bbreg_rois = Variable(torch.from_numpy(cur_bbreg_rois.astype('float32'))).cuda() 308 | 309 | cur_bbreg_feats = model.roi_align_model(fused_feats, cur_bbreg_rois) 310 | cur_bbreg_feats = cur_bbreg_feats.view(cur_bbreg_feats.size(0), -1).data.clone() 311 | 312 | # cur_bbreg_feats_i = model.roi_align_model(feat_map_i, cur_bbreg_rois) 313 | # cur_bbreg_feats_i = cur_bbreg_feats_i.view(cur_bbreg_feats_i.size(0), -1).data.clone() 314 | 315 | 316 | feat_dim = cur_pos_feats.size(-1) 317 | 318 | if bidx==0: 319 | pos_feats = cur_pos_feats 320 | neg_feats = cur_neg_feats 321 | ##bbreg feature 322 | bbreg_feats = cur_bbreg_feats 323 | bbreg_examples = cur_bbreg_examples 324 | else: 325 | pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) 326 | neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) 327 | ##bbreg feature 328 | bbreg_feats = torch.cat((bbreg_feats, cur_bbreg_feats), dim=0) 329 | bbreg_examples = np.concatenate((bbreg_examples, cur_bbreg_examples), axis=0) 330 | 331 | if pos_feats.size(0) > opts['n_pos_init']: 332 | pos_idx = np.asarray(range(pos_feats.size(0))) 333 | np.random.shuffle(pos_idx) 334 | pos_feats = pos_feats[pos_idx[0:opts['n_pos_init']],:] 335 | if neg_feats.size(0) > opts['n_neg_init']: 336 | neg_idx = np.asarray(range(neg_feats.size(0))) 337 | np.random.shuffle(neg_idx) 338 | neg_feats = neg_feats[neg_idx[0:opts['n_neg_init']], :] 339 | 340 | ##bbreg 341 | if bbreg_feats.size(0) > opts['n_bbreg']: 342 | bbreg_idx = np.asarray(range(bbreg_feats.size(0))) 343 | np.random.shuffle(bbreg_idx) 344 | bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] 345 | bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']],:] 346 | #print bbreg_examples.shape 347 | 348 | 349 | # init_target_feats = pos_feats[:400] 350 | 351 | 352 | ## open images and crop patch from obj 353 | extra_obj_size = np.array((opts['img_size'],opts['img_size'])) 354 | extra_crop_img_size = extra_obj_size * (opts['padding']+0.6) 355 | replicateNum = 100 356 | for iidx in range(replicateNum): 357 | extra_target_bbox = np.copy(target_bbox) 358 | 359 | extra_scene_box = np.copy(extra_target_bbox) 360 | extra_scene_box_center = extra_scene_box[0:2] + extra_scene_box[2:4] / 2. 361 | extra_scene_box_size = extra_scene_box[2:4] * (opts['padding'] + 0.6) 362 | extra_scene_box[0:2] = extra_scene_box_center - extra_scene_box_size / 2. 363 | extra_scene_box[2:4] = extra_scene_box_size 364 | 365 | extra_shift_offset = np.clip(2. * np.random.randn(2), -4, 4) 366 | cur_extra_scale = 1.1 ** np.clip(np.random.randn(1), -2, 2) 367 | 368 | 369 | extra_scene_box[0] += extra_shift_offset[0] 370 | extra_scene_box[1] += extra_shift_offset[1] 371 | extra_scene_box[2:4] *= cur_extra_scale[0] 372 | 373 | scaled_obj_size = float(opts['img_size']) / cur_extra_scale[0] 374 | 375 | cur_extra_cropped_image_v, _ = img_crop_model.crop_image(cur_image_v, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size) 376 | cur_extra_cropped_image_v = cur_extra_cropped_image_v.detach() 377 | 378 | cur_extra_cropped_image_i, _ = img_crop_model.crop_image(cur_image_i, np.reshape(extra_scene_box,(1,4)), extra_crop_img_size) 379 | cur_extra_cropped_image_i = cur_extra_cropped_image_i.detach() 380 | 381 | # extra_target_bbox = np.array(list(map(int, extra_target_bbox))) 382 | cur_extra_pos_examples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2),extra_target_bbox, opts['n_pos_init']//replicateNum, opts['overlap_pos_init']) 383 | cur_extra_neg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 2, 1.1),extra_target_bbox, opts['n_neg_init']/replicateNum//4, opts['overlap_neg_init']) 384 | 385 | ##bbreg sample 386 | cur_extra_bbreg_examples = gen_samples(SampleGenerator('uniform', (ishape[1], ishape[0]), 0.3, 1.5, 1.1),extra_target_bbox, opts['n_bbreg']/replicateNum//4, opts['overlap_bbreg'], opts['scale_bbreg']) 387 | 388 | batch_num = iidx*np.ones((cur_extra_pos_examples.shape[0], 1)) 389 | cur_extra_pos_rois = np.copy(cur_extra_pos_examples) 390 | cur_extra_pos_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)), 391 | cur_extra_pos_rois.shape[0], axis=0) 392 | cur_extra_pos_rois = samples2maskroi(cur_extra_pos_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) 393 | cur_extra_pos_rois = np.concatenate((batch_num, cur_extra_pos_rois), axis=1) 394 | 395 | batch_num = iidx * np.ones((cur_extra_neg_examples.shape[0], 1)) 396 | cur_extra_neg_rois = np.copy(cur_extra_neg_examples) 397 | cur_extra_neg_rois[:, 0:2] -= np.repeat(np.reshape(extra_scene_box[0:2], (1, 2)),cur_extra_neg_rois.shape[0], axis=0) 398 | cur_extra_neg_rois = samples2maskroi(cur_extra_neg_rois, model.receptive_field,(scaled_obj_size, scaled_obj_size), extra_target_bbox[2:4], opts['padding']) 399 | cur_extra_neg_rois = np.concatenate((batch_num, cur_extra_neg_rois), axis=1) 400 | 401 | ## bbreg rois 402 | batch_num = iidx * np.ones((cur_extra_bbreg_examples.shape[0], 1)) 403 | cur_extra_bbreg_rois = np.copy(cur_extra_bbreg_examples) 404 | cur_extra_bbreg_rois[:,0:2] -= np.repeat(np.reshape(extra_scene_box[0:2],(1,2)),cur_extra_bbreg_rois.shape[0],axis=0) 405 | cur_extra_bbreg_rois = samples2maskroi(cur_extra_bbreg_rois, model.receptive_field,(scaled_obj_size,scaled_obj_size), extra_target_bbox[2:4], opts['padding']) 406 | cur_extra_bbreg_rois = np.concatenate((batch_num, cur_extra_bbreg_rois), axis=1) 407 | 408 | 409 | 410 | if iidx==0: 411 | extra_cropped_image_v = cur_extra_cropped_image_v 412 | extra_cropped_image_i = cur_extra_cropped_image_i 413 | 414 | extra_pos_rois = np.copy(cur_extra_pos_rois) 415 | extra_neg_rois = np.copy(cur_extra_neg_rois) 416 | ##bbreg rois 417 | extra_bbreg_rois = np.copy(cur_extra_bbreg_rois) 418 | extra_bbreg_examples = np.copy(cur_extra_bbreg_examples) 419 | else: 420 | extra_cropped_image_v = torch.cat((extra_cropped_image_v, cur_extra_cropped_image_v),dim=0) 421 | extra_cropped_image_i = torch.cat((extra_cropped_image_i, cur_extra_cropped_image_i),dim=0) 422 | 423 | extra_pos_rois = np.concatenate( (extra_pos_rois, np.copy(cur_extra_pos_rois)), axis=0) 424 | extra_neg_rois = np.concatenate( (extra_neg_rois, np.copy(cur_extra_neg_rois)), axis=0) 425 | ##bbreg rois 426 | extra_bbreg_rois = np.concatenate( (extra_bbreg_rois, np.copy(cur_extra_bbreg_rois)), axis=0 ) 427 | extra_bbreg_examples = np.concatenate( (extra_bbreg_examples, np.copy(cur_extra_bbreg_examples)), axis=0 ) 428 | 429 | 430 | extra_pos_rois = Variable(torch.from_numpy(extra_pos_rois.astype('float32'))).cuda() 431 | extra_neg_rois = Variable(torch.from_numpy(extra_neg_rois.astype('float32'))).cuda() 432 | ##bbreg rois 433 | extra_bbreg_rois = Variable(torch.from_numpy(extra_bbreg_rois.astype('float32'))).cuda() 434 | 435 | extra_cropped_image_v -= 128. 436 | extra_cropped_image_i -= 128. 437 | 438 | # pdb.set_trace() 439 | 440 | for iidxxx in range(replicateNum): 441 | temp_extra_cropped_image_v = torch.unsqueeze(extra_cropped_image_v[iidxxx], dim=0) 442 | temp_extra_cropped_image_i = torch.unsqueeze(extra_cropped_image_i[iidxxx], dim=0) 443 | temp_extra_feat_maps_v, temp_extra_feat_maps_i, temp_extra_feat_maps = model(temp_extra_cropped_image_v, temp_extra_cropped_image_i, out_layer='conv3') 444 | temp_extra_feat_maps = torch.squeeze(temp_extra_feat_maps, dim=0) 445 | # temp_extra_feat_maps_i = torch.squeeze(temp_extra_feat_maps_i, dim=0) 446 | 447 | if iidxxx == 0: 448 | extra_feat_maps = torch.zeros(replicateNum, temp_extra_feat_maps.shape[0], temp_extra_feat_maps.shape[1], temp_extra_feat_maps.shape[2]) 449 | # extra_feat_maps_i = torch.zeros(replicateNum, temp_extra_feat_maps_i.shape[0], temp_extra_feat_maps_i.shape[1], temp_extra_feat_maps_i.shape[2]) 450 | 451 | extra_feat_maps[iidxxx] = temp_extra_feat_maps 452 | # extra_feat_maps_i[iidxxx] = temp_extra_feat_maps_i 453 | 454 | extra_feat_maps = extra_feat_maps.cuda() 455 | 456 | 457 | # Draw pos/neg samples 458 | ishape = cur_image_v.shape 459 | 460 | # pdb.set_trace() 461 | extra_pos_feats = model.roi_align_model(extra_feat_maps, extra_pos_rois) 462 | extra_pos_feats = extra_pos_feats.view(extra_pos_feats.size(0), -1).data.clone() 463 | 464 | 465 | extra_neg_feats = model.roi_align_model(extra_feat_maps, extra_neg_rois) 466 | extra_neg_feats = extra_neg_feats.view(extra_neg_feats.size(0), -1).data.clone() 467 | 468 | ##bbreg feat 469 | extra_bbreg_feats = model.roi_align_model(extra_feat_maps, extra_bbreg_rois) 470 | extra_bbreg_feats = extra_bbreg_feats.view(extra_bbreg_feats.size(0), -1).data.clone() 471 | 472 | ## concatenate extra features to original_features 473 | pos_feats = torch.cat((pos_feats, extra_pos_feats),dim=0) 474 | neg_feats = torch.cat((neg_feats, extra_neg_feats), dim=0) 475 | ## concatenate extra bbreg feats to original_bbreg_feats 476 | bbreg_feats = torch.cat((bbreg_feats, extra_bbreg_feats), dim=0) 477 | bbreg_examples = np.concatenate((bbreg_examples, extra_bbreg_examples), axis=0) 478 | 479 | torch.cuda.empty_cache() 480 | model.zero_grad() 481 | 482 | # Initial training 483 | train(model, criterion, init_optimizer, pos_feats, neg_feats, opts['maxiter_init']) 484 | 485 | ##bbreg train 486 | if bbreg_feats.size(0) > opts['n_bbreg']: 487 | bbreg_idx = np.asarray(range(bbreg_feats.size(0))) 488 | np.random.shuffle(bbreg_idx) 489 | bbreg_feats = bbreg_feats[bbreg_idx[0:opts['n_bbreg']], :] 490 | bbreg_examples = bbreg_examples[bbreg_idx[0:opts['n_bbreg']], :] 491 | 492 | bbreg = BBRegressor((ishape[1], ishape[0])) 493 | bbreg.train(bbreg_feats, bbreg_examples, target_bbox) 494 | 495 | 496 | if pos_feats.size(0) > opts['n_pos_update']: 497 | pos_idx = np.asarray(range(pos_feats.size(0))) 498 | np.random.shuffle(pos_idx) 499 | pos_feats_all = [pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda())] 500 | if neg_feats.size(0) > opts['n_neg_update']: 501 | neg_idx = np.asarray(range(neg_feats.size(0))) 502 | np.random.shuffle(neg_idx) 503 | neg_feats_all = [neg_feats.index_select(0, torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda())] 504 | 505 | 506 | spf_total = time.time()-tic 507 | 508 | # Display 509 | savefig = savefig_dir != '' 510 | if display or savefig: 511 | dpi = 80.0 512 | figsize = (cur_image_v.shape[1]/dpi, cur_image_v.shape[0]/dpi) 513 | 514 | fig = plt.figure(frameon=False, figsize=figsize, dpi=dpi) 515 | ax = plt.Axes(fig, [0., 0., 1., 1.]) 516 | ax.set_axis_off() 517 | fig.add_axes(ax) 518 | im = ax.imshow(cur_image_v) 519 | 520 | if gt is not None: 521 | gt_rect = plt.Rectangle(tuple(gt[0,:2]),gt[0,2],gt[0,3], linewidth=3, edgecolor="#00ff00", zorder=1, fill=False) 522 | ax.add_patch(gt_rect) 523 | 524 | rect = plt.Rectangle(tuple(result_bb[0,:2]),result_bb[0,2],result_bb[0,3], linewidth=3, edgecolor="#ff0000", zorder=1, fill=False) 525 | ax.add_patch(rect) 526 | 527 | if display: 528 | plt.pause(.01) 529 | plt.draw() 530 | if savefig: 531 | fig.savefig(os.path.join(savefig_dir,'0000.jpg'),dpi=dpi) 532 | 533 | 534 | ##################################################################### 535 | #### Main loop 536 | ##################################################################### 537 | failure_count = 0 538 | trans_f = opts['trans_f'] 539 | 540 | for i in range(1, len(img_list_v)): 541 | 542 | tic = time.time() 543 | # Load image 544 | cur_image_v = Image.open(img_list_v[i]).convert('RGB') 545 | cur_image_v = np.asarray(cur_image_v) 546 | cur_image_i = Image.open(img_list_i[i]).convert('RGB') 547 | cur_image_i = np.asarray(cur_image_i) 548 | 549 | # Estimate target bbox 550 | ishape = cur_image_v.shape 551 | samples = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), target_bbox, opts['n_samples']) 552 | 553 | 554 | 555 | ######################################################################### 556 | #### Target-Aware Attention Prediction 557 | ######################################################################### 558 | 559 | attention_path = "daTANet_rgbt_234_Attention/" + seq + "/" 560 | attentionImage_name = str(i+1) + "_attentionMap.jpg" 561 | 562 | # pdb.set_trace() 563 | attentionFlag = os.path.exists(attention_path + attentionImage_name) 564 | # print("==>> attentionFlag ", attentionFlag) 565 | 566 | if failure_count >= 8 and attentionFlag: 567 | 568 | attentionMap = Image.open(attention_path+attentionImage_name).convert('RGB') 569 | attentionMap = np.asarray(attentionMap) 570 | # pdb.set_trace() 571 | 572 | dynamic_atttentonMAP = cv2.resize(attentionMap, (cur_image_v.shape[1], cur_image_v.shape[0]), interpolation=cv2.INTER_LINEAR) 573 | ret, static_atttentonMAP = cv2.threshold(dynamic_atttentonMAP, 100, 255, cv2.THRESH_BINARY) 574 | # cv2.imwrite('static_atttentonMAP.png', static_atttentonMAP) 575 | 576 | # pdb.set_trace() 577 | 578 | label_image = measure.label(static_atttentonMAP) 579 | props = measure.regionprops(label_image) 580 | 581 | atttenton_BBox = [] 582 | attention_centerLoc = [] 583 | similarity_glob_target_max = 0 584 | global_samples = [] 585 | 586 | #### for each candidate search region 587 | # for iii in range(len(props)): 588 | 589 | if len(props) > 1: 590 | attNum = 1 591 | else: 592 | attNum = len(props) 593 | 594 | for iii in range(attNum): 595 | center_position = props[iii].centroid 596 | center_position = [int(center_position[1]), int(center_position[0])] 597 | 598 | centerPos_prev_x = target_bbox[0] + target_bbox[2] / 2 599 | centerPos_prev_y = target_bbox[1] + target_bbox[3] / 2 600 | 601 | if math.fabs(center_position[0] - centerPos_prev_x) < 30 and math.fabs(center_position[1] - centerPos_prev_y) < 30: 602 | 603 | bbox = props[iii].bbox 604 | 605 | new_bbox2 = np.zeros((4)) 606 | new_bbox2[0] = center_position[0] - target_bbox[2]/2 607 | new_bbox2[1] = center_position[1] - target_bbox[3]/2 608 | new_bbox2[2] = target_bbox[2] 609 | new_bbox2[3] = target_bbox[3] 610 | 611 | 612 | # if new_bbox[2] > 10 and new_bbox[3] > 10: 613 | # switch_candidate_samples2 = sample_generator(new_bbox2, 100) 614 | switch_samples2 = gen_samples(SampleGenerator('gaussian', (ishape[1], ishape[0]), trans_f, opts['scale_f'], valid=True), new_bbox2, 256) 615 | # global_samples.append(switch_samples2) 616 | # pdb.set_trace() 617 | # samples = np.concatenate((switch_samples2, samples)) 618 | samples = switch_samples2 619 | 620 | # print("==>> Using Global Proposals and samples: ", samples.shape[0]) 621 | # samples = np.concatenate((switch_samples2, samples)) 622 | 623 | 624 | padded_x1 = (samples[:, 0] - samples[:, 2]*(opts['padding']-1.)/2.).min() 625 | padded_y1 = (samples[:, 1] - samples[:, 3]*(opts['padding']-1.)/2.).min() 626 | padded_x2 = (samples[:, 0] + samples[:, 2]*(opts['padding']+1.)/2.).max() 627 | padded_y2 = (samples[:, 1] + samples[:, 3]*(opts['padding']+1.)/2.).max() 628 | padded_scene_box = np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)) 629 | 630 | if padded_scene_box[0] > cur_image_v.shape[1]: 631 | padded_scene_box[0] = cur_image_v.shape[1]-1 632 | if padded_scene_box[1] > cur_image_v.shape[0]: 633 | padded_scene_box[1] = cur_image_v.shape[0]-1 634 | if padded_scene_box[0] + padded_scene_box[2] < 0: 635 | padded_scene_box[2] = -padded_scene_box[0]+1 636 | if padded_scene_box[1] + padded_scene_box[3] < 0: 637 | padded_scene_box[3] = -padded_scene_box[1]+1 638 | 639 | 640 | crop_img_size = (padded_scene_box[2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') 641 | cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(padded_scene_box,(1,4)), crop_img_size) 642 | cropped_image_v = cropped_image_v - 128. 643 | cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(padded_scene_box,(1,4)), crop_img_size) 644 | cropped_image_i = cropped_image_i - 128. 645 | 646 | model.eval() 647 | feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3') 648 | 649 | # relative target bbox with padded_scene_box 650 | rel_target_bbox = np.copy(target_bbox) 651 | rel_target_bbox[0:2] -= padded_scene_box[0:2] 652 | 653 | 654 | # Extract sample features and get target location 655 | batch_num = np.zeros((samples.shape[0], 1)) 656 | sample_rois = np.copy(samples) 657 | sample_rois[:, 0:2] -= np.repeat(np.reshape(padded_scene_box[0:2], (1, 2)), sample_rois.shape[0], axis=0) 658 | sample_rois = samples2maskroi(sample_rois, model.receptive_field, (opts['img_size'],opts['img_size']), target_bbox[2:4],opts['padding']) 659 | sample_rois = np.concatenate((batch_num, sample_rois), axis=1) 660 | sample_rois = Variable(torch.from_numpy(sample_rois.astype('float32'))).cuda() 661 | 662 | sample_feats = model.roi_align_model(feat_map, sample_rois) 663 | sample_feats = sample_feats.view(sample_feats.size(0), -1).clone() 664 | 665 | sample_scores = model(sample_feats, sample_feats, in_layer='fc4') 666 | top_scores, top_idx = sample_scores[:, 1].topk(5) 667 | top_idx = top_idx.data.cpu().numpy() 668 | target_score = top_scores.data.mean() 669 | target_bbox = samples[top_idx].mean(axis=0) 670 | 671 | success = target_score > opts['success_thr'] 672 | 673 | # # Expand search area at failure 674 | if success: 675 | trans_f = opts['trans_f'] 676 | else: 677 | trans_f = opts['trans_f_expand'] 678 | 679 | ## Bbox regression 680 | if success: 681 | bbreg_feats = sample_feats[top_idx,:] 682 | bbreg_samples = samples[top_idx] 683 | bbreg_samples = bbreg.predict(bbreg_feats.data, bbreg_samples) 684 | bbreg_bbox = bbreg_samples.mean(axis=0) 685 | 686 | if failure_count >= 3: 687 | failure_count = failure_count - 3 688 | else: 689 | failure_count = 0 690 | else: 691 | bbreg_bbox = target_bbox 692 | failure_count = failure_count + 1 693 | 694 | # Save result 695 | result[i] = target_bbox 696 | result_bb[i] = bbreg_bbox 697 | iou_result[i] = 1. 698 | 699 | # Data collect 700 | if success: 701 | 702 | # Draw pos/neg samples 703 | pos_examples = gen_samples( 704 | SampleGenerator('gaussian', (ishape[1], ishape[0]), 0.1, 1.2), target_bbox, 705 | opts['n_pos_update'], 706 | opts['overlap_pos_update']) 707 | neg_examples = gen_samples( 708 | SampleGenerator('uniform', (ishape[1], ishape[0]), 1.5, 1.2), target_bbox, 709 | opts['n_neg_update'], 710 | opts['overlap_neg_update']) 711 | 712 | padded_x1 = (neg_examples[:, 0] - neg_examples[:, 2] * (opts['padding'] - 1.) / 2.).min() 713 | padded_y1 = (neg_examples[:, 1] - neg_examples[:, 3] * (opts['padding'] - 1.) / 2.).min() 714 | padded_x2 = (neg_examples[:, 0] + neg_examples[:, 2] * (opts['padding'] + 1.) / 2.).max() 715 | padded_y2 = (neg_examples[:, 1] + neg_examples[:, 3] * (opts['padding'] + 1.) / 2.).max() 716 | padded_scene_box = np.reshape(np.asarray((padded_x1, padded_y1, padded_x2 - padded_x1, padded_y2 - padded_y1)),(1,4)) 717 | 718 | scene_boxes = np.reshape(np.copy(padded_scene_box), (1, 4)) 719 | jitter_scale = [1.] 720 | 721 | for bidx in range(0, scene_boxes.shape[0]): 722 | crop_img_size = (scene_boxes[bidx, 2:4] * ((opts['img_size'], opts['img_size']) / target_bbox[2:4])).astype('int64') * jitter_scale[bidx] 723 | cropped_image_v, cur_image_var_v = img_crop_model.crop_image(cur_image_v, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size) 724 | cropped_image_v = cropped_image_v - 128. 725 | cropped_image_i, cur_image_var_i = img_crop_model.crop_image(cur_image_i, np.reshape(scene_boxes[bidx], (1, 4)),crop_img_size) 726 | cropped_image_i = cropped_image_i - 128. 727 | 728 | feat_map_v, feat_map_i, feat_map = model(cropped_image_v, cropped_image_i, out_layer='conv3') 729 | 730 | rel_target_bbox = np.copy(target_bbox) 731 | rel_target_bbox[0:2] -= scene_boxes[bidx, 0:2] 732 | 733 | batch_num = np.zeros((pos_examples.shape[0], 1)) 734 | cur_pos_rois = np.copy(pos_examples) 735 | cur_pos_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_pos_rois.shape[0],axis=0) 736 | scaled_obj_size = float(opts['img_size']) * jitter_scale[bidx] 737 | cur_pos_rois = samples2maskroi(cur_pos_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size),target_bbox[2:4], opts['padding']) 738 | cur_pos_rois = np.concatenate((batch_num, cur_pos_rois), axis=1) 739 | cur_pos_rois = Variable(torch.from_numpy(cur_pos_rois.astype('float32'))).cuda() 740 | 741 | cur_pos_feats = model.roi_align_model(feat_map, cur_pos_rois) 742 | cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1).data.clone() 743 | 744 | batch_num = np.zeros((neg_examples.shape[0], 1)) 745 | cur_neg_rois = np.copy(neg_examples) 746 | cur_neg_rois[:, 0:2] -= np.repeat(np.reshape(scene_boxes[bidx, 0:2], (1, 2)), cur_neg_rois.shape[0], axis=0) 747 | cur_neg_rois = samples2maskroi(cur_neg_rois, model.receptive_field, (scaled_obj_size, scaled_obj_size), target_bbox[2:4], opts['padding']) 748 | cur_neg_rois = np.concatenate((batch_num, cur_neg_rois), axis=1) 749 | cur_neg_rois = Variable(torch.from_numpy(cur_neg_rois.astype('float32'))).cuda() 750 | 751 | cur_neg_feats = model.roi_align_model(feat_map, cur_neg_rois) 752 | cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1).data.clone() 753 | 754 | 755 | feat_dim = cur_pos_feats.size(-1) 756 | 757 | if bidx == 0: 758 | pos_feats = cur_pos_feats ##index select 759 | neg_feats = cur_neg_feats 760 | else: 761 | pos_feats = torch.cat((pos_feats, cur_pos_feats), dim=0) 762 | neg_feats = torch.cat((neg_feats, cur_neg_feats), dim=0) 763 | 764 | if pos_feats.size(0) > opts['n_pos_update']: 765 | pos_idx = np.asarray(range(pos_feats.size(0))) 766 | np.random.shuffle(pos_idx) 767 | pos_feats = pos_feats.index_select(0, torch.from_numpy(pos_idx[0:opts['n_pos_update']]).cuda()) 768 | if neg_feats.size(0) > opts['n_neg_update']: 769 | neg_idx = np.asarray(range(neg_feats.size(0))) 770 | np.random.shuffle(neg_idx) 771 | neg_feats = neg_feats.index_select(0,torch.from_numpy(neg_idx[0:opts['n_neg_update']]).cuda()) 772 | 773 | pos_feats_all.append(pos_feats) 774 | neg_feats_all.append(neg_feats) 775 | 776 | if len(pos_feats_all) > opts['n_frames_long']: 777 | del pos_feats_all[0] 778 | if len(neg_feats_all) > opts['n_frames_short']: 779 | del neg_feats_all[0] 780 | 781 | # Short term update 782 | if not success: 783 | nframes = min(opts['n_frames_short'],len(pos_feats_all)) 784 | pos_data = torch.stack(pos_feats_all[-nframes:],0).view(-1,feat_dim) 785 | neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim) 786 | train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) 787 | 788 | # Long term update 789 | elif i % opts['long_interval'] == 0: 790 | pos_data = torch.stack(pos_feats_all,0).view(-1,feat_dim) 791 | neg_data = torch.stack(neg_feats_all,0).view(-1,feat_dim) 792 | train(model, criterion, update_optimizer, pos_data, neg_data, opts['maxiter_update']) 793 | 794 | spf = time.time()-tic 795 | spf_total += spf 796 | 797 | # Display 798 | if display or savefig: 799 | im.set_data(cur_image_v) 800 | 801 | if gt is not None: 802 | gt_rect.set_xy(gt[i,:2]) 803 | gt_rect.set_width(gt[i,2]) 804 | gt_rect.set_height(gt[i,3]) 805 | 806 | rect.set_xy(result_bb[i,:2]) 807 | rect.set_width(result_bb[i,2]) 808 | rect.set_height(result_bb[i,3]) 809 | 810 | if display: 811 | plt.pause(.01) 812 | plt.draw() 813 | if savefig: 814 | fig.savefig(os.path.join(savefig_dir,'%04d.jpg'%(i)),dpi=dpi) 815 | 816 | if opts['visual_log']: 817 | if gt is None: 818 | print("Frame %d/%d, Score %.3f, Time %.3f" % \ 819 | (i, len(img_list), target_score, spf)) 820 | else: 821 | print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ 822 | (i, len(img_list), overlap_ratio(gt[i],result_bb[i])[0], target_score, spf)) 823 | 824 | print("Frame %d/%d, Overlap %.3f, Score %.3f, Time %.3f" % \ 825 | (i, len(img_list_v), overlap_ratio(gt[i], result_bb[i])[0], target_score, spf)) 826 | 827 | iou_result[i]= overlap_ratio(gt[i],result_bb[i])[0] 828 | 829 | 830 | fps = len(img_list_v) / spf_total 831 | 832 | # pdb.set_trace() 833 | # print("==>> epochID %d, L1-Loss %.4f, Time %.3f" % (epochID, total_l1_Loss/len(img_list_v), spf_total)) 834 | 835 | 836 | return iou_result, result_bb, fps, result 837 | -------------------------------------------------------------------------------- /MFGNet-rgbt-tracking-master/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"]="1" 3 | import sys 4 | import pickle 5 | import time 6 | 7 | import torch 8 | import torch.optim as optim 9 | from torch.autograd import Variable 10 | import warnings 11 | warnings.filterwarnings("ignore") 12 | sys.path.insert(0,'./modules') 13 | from data_prov import * 14 | from model import * 15 | from pretrain_options import * 16 | from tracker import * 17 | import numpy as np 18 | 19 | import argparse 20 | import pdb 21 | 22 | 23 | def set_optimizer(model, lr_base, lr_mult=pretrain_opts['lr_mult'], momentum=pretrain_opts['momentum'], w_decay=pretrain_opts['w_decay']): 24 | params = model.get_learnable_params() 25 | param_list = [] 26 | for k, p in params.items(): 27 | lr = lr_base 28 | for l, m in lr_mult.items(): 29 | if k.startswith(l): 30 | lr = lr_base * m 31 | param_list.append({'params': [p], 'lr': lr}) 32 | optimizer = optim.SGD(param_list, lr=lr, momentum=momentum, weight_decay=w_decay) 33 | return optimizer 34 | 35 | def genConfig(seq_path, set_type): 36 | 37 | path, seqname = os.path.split(seq_path) 38 | 39 | if set_type == 'OTB': 40 | img_list = sorted([seq_path + '/img/' + p for p in os.listdir(seq_path + '/img') if os.path.splitext(p)[1] == '.jpg']) 41 | 42 | if (seqname == 'Jogging') or (seqname == 'Skating2'): 43 | gt = np.loadtxt(seq_path + '/groundtruth_rect.1.txt') 44 | elif seqname =='Human4': 45 | gt = np.loadtxt(seq_path + '/groundtruth_rect.2.txt', delimiter=',') 46 | elif (seqname == 'BlurBody') or (seqname == 'BlurCar1') or (seqname == 'BlurCar2') or (seqname == 'BlurCar3') \ 47 | or (seqname == 'BlurCar4') or (seqname == 'BlurFace') or (seqname == 'BlurOwl') or (seqname == 'Board') \ 48 | or (seqname == 'Box') or (seqname == 'Car4') or (seqname == 'CarScale') or (seqname == 'ClifBar') \ 49 | or (seqname == 'Couple') or (seqname == 'Crossing') or (seqname == 'Dog') or (seqname == 'FaceOcc1') \ 50 | or (seqname == 'Girl') or (seqname == 'Rubik') or (seqname == 'Singer1') or (seqname == 'Subway') \ 51 | or (seqname == 'Surfer') or (seqname == 'Sylvester') or (seqname == 'Toy') or (seqname == 'Twinnings') \ 52 | or (seqname == 'Vase') or (seqname == 'Walking') or (seqname == 'Walking2') or (seqname == 'Woman') : 53 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt') 54 | elif (seqname == 'Freeman4') or (seqname == 'Diving') or (seqname =='Freeman3') or (seqname =='Football1'): 55 | gt = np.loadtxt(seq_path + '/groundtruth_rect_revise.txt', delimiter=',') 56 | else: 57 | gt = np.loadtxt(seq_path + '/groundtruth_rect.txt', delimiter=',') 58 | 59 | if seqname == 'David': 60 | img_list = img_list[300:] 61 | # gt = gt[300:,:] 62 | if seqname == 'Football1': 63 | img_list = img_list[0:73] 64 | if seqname == 'Freeman3': 65 | img_list = img_list[0:459] 66 | if seqname == 'Freeman4': 67 | img_list = img_list[0:282] 68 | 69 | elif set_type=='VOT/2016': 70 | img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg']) 71 | gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',') 72 | 73 | elif set_type=='RGBT234': 74 | img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg']) 75 | gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',') 76 | 77 | elif set_type=='GTOT50': 78 | img_list = sorted([seq_path + '/'+p for p in os.listdir(seq_path) if os.path.splitext(p)[1] == '.jpg']) 79 | gt = np.loadtxt(seq_path + '/groundtruth.txt', delimiter=',') 80 | 81 | ##polygon to rect 82 | if gt.shape[1] == 8: 83 | x_min = np.min(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 84 | y_min = np.min(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 85 | x_max = np.max(gt[:, [0, 2, 4, 6]], axis=1)[:, None] 86 | y_max = np.max(gt[:, [1, 3, 5, 7]], axis=1)[:, None] 87 | gt = np.concatenate((x_min, y_min, x_max - x_min, y_max - y_min), axis=1) 88 | 89 | return img_list, gt 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | def train_mdnet(): 98 | 99 | ## set image directory 100 | if pretrain_opts['set_type'] == 'GTOT50': 101 | img_home = '/DATA/wangxiao/Multi_Modal_RGBT_dataset_CSR/' 102 | data_path = '/DATA/wangxiao/dfg-rgbt-RTMDNet-master-train-on-50/50.pkl' 103 | 104 | ## Init dataset ## 105 | with open(data_path, 'rb') as fp: 106 | data = pickle.load(fp) 107 | 108 | 109 | K = len(data) 110 | 111 | ## Init model ## 112 | model = MDNet(pretrain_opts['init_model_path'], K) 113 | if pretrain_opts['adaptive_align']: 114 | align_h = model.roi_align_model.aligned_height 115 | align_w = model.roi_align_model.aligned_width 116 | spatial_s = model.roi_align_model.spatial_scale 117 | model.roi_align_model = RoIAlignAdaMax(align_h, align_w, spatial_s) 118 | 119 | if pretrain_opts['use_gpu']: 120 | model = model.cuda() 121 | model.set_learnable_params(pretrain_opts['ft_layers']) 122 | model.train() 123 | 124 | dataset = [None] * K 125 | for k, (seqname, seq) in enumerate(data.items()): 126 | img_list_v = seq['images_v'] 127 | img_list_i = seq['images_i'] 128 | videoPath_v = seq['v_videoPath'] 129 | videoPath_i = seq['i_videoPath'] 130 | # seqName = seq['seqName'] 131 | gt = seq['gt_i'] 132 | 133 | if pretrain_opts['set_type'] == 'GTOT50': 134 | img_dir = img_home + seqname 135 | 136 | dataset[k] = RegionDataset(img_dir, img_list_v, img_list_i, videoPath_v, videoPath_i, gt, model.receptive_field, pretrain_opts) 137 | 138 | 139 | ## Init criterion and optimizer ## 140 | binaryCriterion = BinaryLoss() 141 | interDomainCriterion = nn.CrossEntropyLoss() 142 | evaluator = Precision() 143 | optimizer = set_optimizer(model, pretrain_opts['lr']) 144 | 145 | best_score = 0. 146 | batch_cur_idx = 0 147 | for i in range(pretrain_opts['n_cycles']): 148 | print("==== Start Cycle %d ====" % (i)) 149 | k_list = np.random.permutation(K) 150 | prec = np.zeros(K) 151 | totalTripleLoss = np.zeros(K) 152 | totalInterClassLoss = np.zeros(K) 153 | 154 | # pdb.set_trace() 155 | # k_list = k_list[:3] 156 | for j, k in enumerate(k_list): 157 | tic = time.time() 158 | 159 | 160 | cropped_scenes_v, cropped_scenes_i, pos_rois, neg_rois= dataset[k].next() 161 | 162 | 163 | for sidx in range(0, len(cropped_scenes_v)): 164 | cur_scene_v = cropped_scenes_v[sidx] 165 | cur_scene_i = cropped_scenes_i[sidx] 166 | cur_pos_rois = pos_rois[sidx] 167 | cur_neg_rois = neg_rois[sidx] 168 | 169 | cur_scene_v = Variable(cur_scene_v) 170 | cur_scene_i = Variable(cur_scene_i) 171 | cur_pos_rois = Variable(cur_pos_rois) 172 | cur_neg_rois = Variable(cur_neg_rois) 173 | if pretrain_opts['use_gpu']: 174 | cur_scene_v = cur_scene_v.cuda() 175 | cur_scene_i = cur_scene_i.cuda() 176 | cur_pos_rois = cur_pos_rois.cuda() 177 | cur_neg_rois = cur_neg_rois.cuda() 178 | 179 | # pdb.set_trace() 180 | cur_feat_map_v, cur_feat_map_i, augmented_feats = model(cur_scene_v, cur_scene_i, k, out_layer='conv3') 181 | 182 | cur_pos_feats = model.roi_align_model(augmented_feats, cur_pos_rois) 183 | cur_pos_feats = cur_pos_feats.view(cur_pos_feats.size(0), -1) 184 | cur_neg_feats = model.roi_align_model(augmented_feats, cur_neg_rois) 185 | cur_neg_feats = cur_neg_feats.view(cur_neg_feats.size(0), -1) 186 | 187 | # pdb.set_trace() 188 | 189 | if sidx == 0: 190 | pos_feats = [cur_pos_feats] 191 | neg_feats = [cur_neg_feats] 192 | else: 193 | pos_feats.append(cur_pos_feats) 194 | neg_feats.append(cur_neg_feats) 195 | 196 | feat_dim = cur_neg_feats.size(1) 197 | pos_feats = torch.stack(pos_feats,dim=0).view(-1,feat_dim) 198 | neg_feats = torch.stack(neg_feats,dim=0).view(-1,feat_dim) 199 | 200 | 201 | pos_score = model(pos_feats, pos_feats, k, in_layer='fc4') 202 | neg_score = model(neg_feats, neg_feats, k, in_layer='fc4') 203 | 204 | cls_loss = binaryCriterion(pos_score, neg_score) 205 | 206 | ## inter frame classification 207 | 208 | interclass_label = Variable(torch.zeros((pos_score.size(0))).long()) 209 | if opts['use_gpu']: 210 | interclass_label = interclass_label.cuda() 211 | total_interclass_score = pos_score[:,1].contiguous() 212 | total_interclass_score = total_interclass_score.view((pos_score.size(0),1)) 213 | 214 | K_perm = np.random.permutation(K) 215 | K_perm = K_perm[0:100] 216 | for cidx in K_perm: 217 | if k == cidx: 218 | continue 219 | else: 220 | interclass_score = model(pos_feats, pos_feats, cidx, in_layer='fc4') 221 | total_interclass_score = torch.cat((total_interclass_score,interclass_score[:,1].contiguous().view((interclass_score.size(0),1))),dim=1) 222 | 223 | interclass_loss = interDomainCriterion(total_interclass_score, interclass_label) 224 | totalInterClassLoss[k] = interclass_loss.item() 225 | 226 | (cls_loss+0.1*interclass_loss).backward() 227 | 228 | batch_cur_idx+=1 229 | if (batch_cur_idx%pretrain_opts['seqbatch_size'])==0: 230 | torch.nn.utils.clip_grad_norm(model.parameters(), pretrain_opts['grad_clip']) 231 | optimizer.step() 232 | model.zero_grad() 233 | batch_cur_idx = 0 234 | 235 | ## evaulator 236 | prec[k] = evaluator(pos_score, neg_score) 237 | ## computation latency 238 | toc = time.time() - tic 239 | 240 | print("Cycle %2d, K %2d (%2d), BinLoss %.3f, Prec %.3f, interLoss %.3f, Time %.3f" % \ 241 | (i, j, k, cls_loss.item(), prec[k], totalInterClassLoss[k], toc)) 242 | 243 | cur_score = prec.mean() 244 | try: 245 | total_miou = sum(total_iou)/len(total_iou) 246 | except: 247 | total_miou = 0. 248 | 249 | print("Mean Precision: %.3f Inter Loss: %.3f IoU: %.3f" % (prec.mean(), totalInterClassLoss.mean(),total_miou)) 250 | 251 | if cur_score > best_score: 252 | best_score = cur_score 253 | if pretrain_opts['use_gpu']: 254 | model = model.cpu() 255 | states = {'shared_layers': model.layers.state_dict()} 256 | print("Save model to %s" % pretrain_opts['model_path']) 257 | torch.save(states, pretrain_opts['model_path']) 258 | # torch.save(states, '/home/wangxiao/Downloads/ACM-MM-GML_RGBT_tracking/rgbt-RTMDNet-master/models/rgbt_rtmdnet.pth') 259 | if pretrain_opts['use_gpu']: 260 | model = model.cuda() 261 | 262 | 263 | if __name__ == "__main__": 264 | 265 | parser = argparse.ArgumentParser() 266 | parser.add_argument("-set_type", default = 'GTOT50' ) 267 | parser.add_argument("-padding_ratio", default = 5., type =float) 268 | parser.add_argument("-model_path", default ="./models/CBAM_dfg_rtmdnet_trained_on_50.pth", help = "model path") 269 | parser.add_argument("-frame_interval", default = 1, type=int, help="frame interval in batch. ex) interval=1 -> [1 2 3 4 5], interval=2 ->[1 3 5]") 270 | parser.add_argument("-init_model_path", default="./models/imagenet-vgg-m.mat") 271 | parser.add_argument("-batch_frames", default = 8, type = int) 272 | parser.add_argument("-lr", default=0.0001, type = float) 273 | parser.add_argument("-batch_pos",default = 64, type = int) 274 | parser.add_argument("-batch_neg", default = 196, type = int) 275 | parser.add_argument("-n_cycles", default = 1000, type = int ) 276 | parser.add_argument("-adaptive_align", default = True, action = 'store_false') 277 | parser.add_argument("-seqbatch_size", default=50, type=int) 278 | 279 | args = parser.parse_args() 280 | 281 | ################################################################################## 282 | #########################Just modify opts in this script.######################### 283 | ######################Becuase of synchronization of options####################### 284 | ################################################################################## 285 | ##option setting 286 | pretrain_opts['set_type'] = args.set_type 287 | pretrain_opts['padding_ratio']=args.padding_ratio 288 | pretrain_opts['padded_img_size']=pretrain_opts['img_size']*int(pretrain_opts['padding_ratio']) 289 | pretrain_opts['model_path']=args.model_path 290 | pretrain_opts['frame_interval'] = args.frame_interval 291 | pretrain_opts['init_model_path'] = args.init_model_path 292 | pretrain_opts['batch_frames'] = args.batch_frames 293 | pretrain_opts['lr'] = args.lr 294 | pretrain_opts['batch_pos'] = args.batch_pos # original = 64 295 | pretrain_opts['batch_neg'] = args.batch_neg # original = 192 296 | pretrain_opts['n_cycles'] = args.n_cycles 297 | pretrain_opts['adaptive_align']=args.adaptive_align 298 | pretrain_opts['seqbatch_size'] = args.seqbatch_size 299 | ################################################################################## 300 | ############################Do not modify opts anymore.########################### 301 | ######################Becuase of synchronization of options####################### 302 | ################################################################################## 303 | 304 | print(pretrain_opts) 305 | train_mdnet() 306 | 307 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MFGNet_RGBT_Tracking_PyTorch 2 | Official Implementation of MFGNet-RGBT-Tracker ("Dynamic Modality-Aware Filter Generation for RGB-T Tracking") with PyTorch [[Project](https://sites.google.com/view/mfgrgbttrack/)] [[Paper]()] 3 | 4 | 5 | 6 | Many RGB-T trackers attempt to attain robust feature representation by utilizing an adaptive weighting scheme (or attention mechanism). Different from these works, we propose a new dynamic modality-aware filter generation module (named MFGNet) to boost the message communication between visible and thermal data by adaptively adjusting the convolutional kernels for various input images in practical tracking. Our experimental results demonstrate the advantages of our proposed MFGNet for RGB-T tracking. 7 | 8 | 9 | 10 | 11 | ![rgbt_car10](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/pipelinev5.png) 12 | 13 | 14 | 15 | ## Demo: 16 | (Red: Ours, Blue: Ground Truth, Green: RT-MDNet) 17 | 18 | ![rgbt_car10](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_car10.gif) 19 | 20 | ![rgbt_balancebike](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_balancebike.gif) 21 | 22 | ![rgbt_flower1](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_flower1.gif) 23 | 24 | ![rgbt_kite4](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/rgbt_kite4.gif) 25 | 26 | 27 | ## Install: 28 | This code is developed based on Python 3.7, PyTorch 1.0, CUDA 10.1, Ubuntu 16.04, Tesla P100 * 4. Install anything it warnings. 29 | 30 | RoI align module needs to compile first: 31 | 32 | CUDA_HOME=/usr/local/cuda-10.1 python setup.py build_ext --inplace 33 | 34 | 35 | 36 | ## Train and Test: 37 | 1. generate the "50.pkl" with prepro_rgbt.py as the training data; 38 | 39 | 2. train the tracker with train.py; 40 | 41 | 3. train the rgbt_TANet with train_rgbtTANet.py; 42 | 43 | 4. Obtain the attention maps and run the test.py for rgbt-tracking. 44 | 45 | 46 | 47 | ## Results: 48 | 49 | ![rgbt_kite4](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/results_on_rgbt210_234.png) 50 | 51 | ![rgbt_kite4](https://github.com/wangxiao5791509/DFG_RGBT_Tracking_PyTorch/blob/master/ComponentAnalysis.png) 52 | 53 | you can also download our pre-trained models and raw results for comprison: [[Pretrained Models]()] [[Raw Results]()] 54 | 55 | 56 | 57 | ## Acknowledgement: 58 | * https://github.com/BossBobxuan/RT-MDNet 59 | * https://github.com/NieXC/pytorch-mula 60 | * https://github.com/luuuyi/CBAM.PyTorch 61 | 62 | 63 | 64 | 65 | ## Citation: 66 | If you use this code for your research, please cite the following paper: 67 | ~~~ 68 | @article{wang2020dfgrgbttrack, 69 | title={Dynamic Modality-Aware Filter Generation for RGB-T Tracking}, 70 | author={Xiao Wang, Xiujun Shu, Shiliang Zhang, Bo Jiang, Yaowei Wang, Yonghong Tian, Feng Wu}, 71 | journal={arXiv preprint}, 72 | year={2020} 73 | } 74 | ~~~ 75 | 76 | If you have any questions, feel free to contact me via email: wangx03@pcl.ac.cn 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /environments.txt: -------------------------------------------------------------------------------- 1 | Package Version 2 | --------------- ------------------- 3 | certifi 2019.9.11 4 | cffi 1.13.1 5 | cycler 0.10.0 6 | decorator 4.4.1 7 | imageio 2.6.1 8 | joblib 0.14.0 9 | kiwisolver 1.1.0 10 | matplotlib 3.1.1 11 | mkl-fft 1.0.14 12 | mkl-random 1.1.0 13 | mkl-service 2.3.0 14 | networkx 2.4 15 | numpy 1.17.3 16 | olefile 0.46 17 | opencv-python 4.1.1.26 18 | Pillow 6.2.0 19 | pip 19.3.1 20 | pycparser 2.19 21 | pyparsing 2.4.2 22 | python-dateutil 2.8.0 23 | PyWavelets 1.1.1 24 | scikit-image 0.16.2 25 | scikit-learn 0.21.3 26 | scipy 1.1.0 27 | setuptools 41.6.0.post20191030 28 | six 1.12.0 29 | sklearn 0.0 30 | torch 1.0.1 31 | torchvision 0.4.1a0+d94043a 32 | tqdm 4.46.0 33 | wheel 0.33.6 34 | -------------------------------------------------------------------------------- /pipelinev5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/pipelinev5.png -------------------------------------------------------------------------------- /results_on_rgbt210_234.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/results_on_rgbt210_234.png -------------------------------------------------------------------------------- /rgbt_balancebike.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_balancebike.gif -------------------------------------------------------------------------------- /rgbt_car10.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_car10.gif -------------------------------------------------------------------------------- /rgbt_flower1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_flower1.gif -------------------------------------------------------------------------------- /rgbt_kite4.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hyzcn/MFG_RGBT_Tracking_PyTorch/d389658f64cdbb19316e46e903ad73325850aa55/rgbt_kite4.gif --------------------------------------------------------------------------------