├── DeepMTA_TCSVT_project.tar.gz
├── DeepMTA_code
    ├── README.md
    ├── benchmark
    │   └── bench_utils
    │   │   ├── bbox_helper.py
    │   │   ├── benchmark_helper.py
    │   │   └── pysot
    │   │       ├── datasets
    │   │           ├── __init__.py
    │   │           ├── dataset.py
    │   │           ├── video.py
    │   │           └── vot.py
    │   │       ├── evaluation
    │   │           ├── __init__.py
    │   │           ├── ar_benchmark.py
    │   │           ├── eao_benchmark.py
    │   │           └── f1_benchmark.py
    │   │       └── utils
    │   │           ├── __init__.py
    │   │           ├── build
    │   │               └── temp.linux-x86_64-3.7
    │   │               │   ├── region.o
    │   │               │   └── src
    │   │               │       └── region.o
    │   │           ├── c_region.pxd
    │   │           ├── misc.py
    │   │           ├── region.c
    │   │           ├── region.cpython-37m-x86_64-linux-gnu.so
    │   │           ├── region.pyx
    │   │           ├── setup.py
    │   │           ├── src
    │   │               ├── buffer.h
    │   │               ├── region.c
    │   │               └── region.h
    │   │           └── statistics.py
    ├── configs
    │   ├── SiamFC
    │   │   ├── OTB2015_THOR_dynamic.json
    │   │   ├── OTB2015_THOR_ensemble.json
    │   │   ├── OTB2015_vanilla.json
    │   │   ├── VOT2018_THOR_dynamic.json
    │   │   ├── VOT2018_THOR_ensemble.json
    │   │   └── VOT2018_vanilla.json
    │   ├── SiamMask
    │   │   ├── OTB2015_THOR_dynamic.json
    │   │   ├── OTB2015_THOR_ensemble.json
    │   │   ├── OTB2015_vanilla.json
    │   │   ├── VOT2018_THOR_dynamic.json
    │   │   ├── VOT2018_THOR_ensemble.json
    │   │   └── VOT2018_vanilla.json
    │   └── SiamRPN
    │   │   ├── GOT10k_THOR_ensemble.json
    │   │   ├── LaSOT_THOR_ensemble.json
    │   │   ├── OTB2015_THOR_dynamic.json
    │   │   ├── OTB2015_THOR_ensemble.json
    │   │   ├── OTB2015_vanilla.json
    │   │   ├── OXUVA_THOR_ensemble.json
    │   │   ├── UAV123_THOR_ensemble.json
    │   │   ├── UAV20L_THOR_ensemble.json
    │   │   ├── VOT2018_THOR_dynamic.json
    │   │   ├── VOT2018_THOR_ensemble.json
    │   │   └── VOT2018_vanilla.json
    ├── data
    │   ├── download_links_for_tracking_datasets.txt
    │   └── get_test_otb2015_data.sh
    ├── environment.yml
    ├── network.py
    ├── scripts
    │   └── transform_oxuva_results_txt_to_csv.py
    ├── temp_DIR_TO_SAVE_static_Global_attentionMap
    │   └── mkdir_your_self.txt
    ├── testing.py
    ├── trackers
    │   ├── SiamFC
    │   │   ├── config.py
    │   │   ├── model.pth
    │   │   ├── net.py
    │   │   ├── siamfc.py
    │   │   └── utils.py
    │   ├── SiamMask
    │   │   ├── net.py
    │   │   ├── resnet.py
    │   │   ├── siammask.py
    │   │   └── utils
    │   │   │   ├── anchors.py
    │   │   │   ├── bbox_helper.py
    │   │   │   ├── config_helper.py
    │   │   │   ├── load_helper.py
    │   │   │   ├── log_helper.py
    │   │   │   ├── tracker_config.py
    │   │   │   └── tracking_utils.py
    │   └── dcynet_modules_adaptis
    │   │   ├── __init__.py
    │   │   ├── constants.py
    │   │   ├── data_loader.py
    │   │   ├── generator.py
    │   │   ├── logger.py
    │   │   ├── ops.py
    │   │   ├── resnet.py
    │   │   ├── train.py
    │   │   └── utils.py
    ├── train_traj_measure_net.py
    └── webcam_demo.py
├── GOT10K_dataset_video_list
    ├── 01_mask_prepreocessing.m
    ├── GOT10K_dataset_test_video_list.txt
    ├── GOT10K_dataset_train_video_list.txt
    └── GOT10K_dataset_val_video_list.txt
├── README.md
├── deepmta_arts.png
├── download_links_for_tracking_datasets.txt
└── figures
    ├── attention_supplement.jpg
    ├── benchmarkresults.png
    ├── lasot_result.png
    ├── lasot_results.jpg
    ├── motivation.jpg
    ├── pipeline.png
    └── trackingresults_vis.jpg


/DeepMTA_TCSVT_project.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_TCSVT_project.tar.gz


--------------------------------------------------------------------------------
/DeepMTA_code/README.md:
--------------------------------------------------------------------------------
 1 | ###################################################################################################
 2 | #################################        GOT10K         ###########################################
 3 | ###################################################################################################
 4 | python testing.py -d GOT10k -t SiamRPN --lb_type ensemble
 5 | 
 6 | 
 7 | ###################################################################################################
 8 | #################################        LaSOT          ###########################################
 9 | ###################################################################################################
10 | python testing.py -d LaSOT -t SiamRPN --lb_type ensemble
11 | 
12 | python lasot_thor_testing.py -d LaSOT -t SiamRPN --lb_type ensemble
13 | 
14 | 
15 | ###################################################################################################
16 | #################################        UAV20L          ##########################################
17 | ###################################################################################################
18 | python testing.py -d UAV20L -t SiamRPN --lb_type ensemble
19 | 
20 | ###################################################################################################
21 | #################################        UAV123          ##########################################
22 | ###################################################################################################
23 | python testing.py -d UAV123 -t SiamRPN --lb_type ensemble
24 | 
25 | ###################################################################################################
26 | #################################        OXUVA           ##########################################
27 | ###################################################################################################
28 | python testing.py -d OXUVA -t SiamRPN --lb_type ensemble
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/bbox_helper.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # SiamMask
  3 | # Licensed under The MIT License
  4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
  5 | # --------------------------------------------------------
  6 | import numpy as np
  7 | from collections import namedtuple
  8 | 
  9 | Corner = namedtuple('Corner', 'x1 y1 x2 y2')
 10 | BBox = Corner
 11 | Center = namedtuple('Center', 'x y w h')
 12 | 
 13 | 
 14 | def corner2center(corner):
 15 |     """
 16 |     :param corner: Corner or np.array 4*N
 17 |     :return: Center or 4 np.array N
 18 |     """
 19 |     if isinstance(corner, Corner):
 20 |         x1, y1, x2, y2 = corner
 21 |         return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1))
 22 |     else:
 23 |         x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3]
 24 |         x = (x1 + x2) * 0.5
 25 |         y = (y1 + y2) * 0.5
 26 |         w = x2 - x1
 27 |         h = y2 - y1
 28 |         return x, y, w, h
 29 | 
 30 | 
 31 | def center2corner(center):
 32 |     """
 33 |     :param center: Center or np.array 4*N
 34 |     :return: Corner or np.array 4*N
 35 |     """
 36 |     if isinstance(center, Center):
 37 |         x, y, w, h = center
 38 |         return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5)
 39 |     else:
 40 |         x, y, w, h = center[0], center[1], center[2], center[3]
 41 |         x1 = x - w * 0.5
 42 |         y1 = y - h * 0.5
 43 |         x2 = x + w * 0.5
 44 |         y2 = y + h * 0.5
 45 |         return x1, y1, x2, y2
 46 | 
 47 | 
 48 | def cxy_wh_2_rect(pos, sz):
 49 |     return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]])  # 0-index
 50 | 
 51 | 
 52 | def rect_2_cxy_wh(rect):
 53 |     return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]])  # 0-index
 54 | 
 55 | 
 56 | def get_axis_aligned_bbox(region):
 57 |     nv = region.size
 58 |     if nv == 8:
 59 |         cx = np.mean(region[0::2])
 60 |         cy = np.mean(region[1::2])
 61 |         x1 = min(region[0::2])
 62 |         x2 = max(region[0::2])
 63 |         y1 = min(region[1::2])
 64 |         y2 = max(region[1::2])
 65 |         A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6])
 66 |         A2 = (x2 - x1) * (y2 - y1)
 67 |         s = np.sqrt(A1 / A2)
 68 |         w = s * (x2 - x1) + 1
 69 |         h = s * (y2 - y1) + 1
 70 |     else:
 71 |         x = region[0]
 72 |         y = region[1]
 73 |         w = region[2]
 74 |         h = region[3]
 75 |         cx = x+w/2
 76 |         cy = y+h/2
 77 | 
 78 |     return cx, cy, w, h
 79 | 
 80 | 
 81 | LIMIT = 99999999
 82 | def xyxy_to_xywh(bboxes, clipMin=-LIMIT, clipWidth=LIMIT, clipHeight=LIMIT,
 83 |         round=False):
 84 |     addedAxis = False
 85 |     if isinstance(bboxes, list):
 86 |         bboxes = np.array(bboxes).astype(np.float32)
 87 |     if len(bboxes.shape) == 1:
 88 |         addedAxis = True
 89 |         bboxes = bboxes[:,np.newaxis]
 90 |     bboxesOut = np.zeros(bboxes.shape)
 91 |     x1 = bboxes[0,...]
 92 |     y1 = bboxes[1,...]
 93 |     x2 = bboxes[2,...]
 94 |     y2 = bboxes[3,...]
 95 |     bboxesOut[0,...] = (x1 + x2) / 2.0
 96 |     bboxesOut[1,...] = (y1 + y2) / 2.0
 97 |     bboxesOut[2,...] = x2 - x1
 98 |     bboxesOut[3,...] = y2 - y1
 99 |     if clipMin != -LIMIT or clipWidth != LIMIT or clipHeight != LIMIT:
100 |         bboxesOut = clip_bbox(bboxesOut, clipMin, clipWidth, clipHeight)
101 |     if bboxesOut.shape[0] > 4:
102 |         bboxesOut[4:,...] = bboxes[4:,...]
103 |     if addedAxis:
104 |         bboxesOut = bboxesOut[:,0]
105 |     if round:
106 |         bboxesOut = np.round(bboxesOut).astype(int)
107 |     return bboxesOut
108 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/benchmark_helper.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # SiamMask
  3 | # Licensed under The MIT License
  4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
  5 | # --------------------------------------------------------
  6 | from os.path import join, realpath, dirname, exists, isdir
  7 | from os import listdir
  8 | import logging
  9 | import glob
 10 | import numpy as np
 11 | import json
 12 | from collections import OrderedDict
 13 | import functools
 14 | 
 15 | import pdb 
 16 | 
 17 | 
 18 | 
 19 | 
 20 | 
 21 | def get_dataset_zoo():
 22 |     root = realpath(join(dirname(__file__), '../../data'))
 23 |     zoos = listdir(root)
 24 | 
 25 |     def valid(x):
 26 |         y = join(root, x)
 27 |         if not isdir(y): return False
 28 | 
 29 |         return exists(join(y, 'list.txt')) \
 30 |                or exists(join(y, 'train', 'meta.json'))\
 31 |                or exists(join(y, 'ImageSets', '2016', 'val.txt'))
 32 | 
 33 |     zoos = list(filter(valid, zoos))
 34 |     return zoos
 35 | 
 36 | 
 37 | dataset_zoo = get_dataset_zoo()
 38 | 
 39 | def load_tasks_with_annotations(fname):
 40 |     with open(fname, 'r') as fp:
 41 |         if fname.endswith('.csv'):
 42 |             tracks = oxuva.load_dataset_annotations_csv(fp)
 43 |         else:
 44 |             raise ValueError(f"unknown extension: {fname}")
 45 |     return oxuva.map_dict(oxuva.make_task_from_track, tracks)
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | def load_dataset(dataset):
 53 | 
 54 |     ##################################################################
 55 |     ####    VOT2018, VOT2018-LT, OTB2015, GOT10k, LaSOT, OxUVA  
 56 |     ##################################################################    
 57 | 
 58 |     info = OrderedDict()
 59 |     if 'VOT' in dataset:
 60 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
 61 |         if not exists(base_path):
 62 |             logging.error("Please download test dataset!!!")
 63 |             exit()
 64 |         list_path = join(base_path, 'list.txt')
 65 |         with open(list_path) as f:
 66 |             videos = [v.strip() for v in f.readlines()]
 67 |         for video in videos:
 68 |             video_path = join(base_path, video)
 69 |             image_path = join(video_path, '*.jpg')
 70 |             image_files = sorted(glob.glob(image_path))
 71 |             if len(image_files) == 0:  # VOT2018
 72 |                 image_path = join(video_path, 'color', '*.jpg')
 73 |                 image_files = sorted(glob.glob(image_path))
 74 |             gt_path = join(video_path, 'groundtruth.txt')
 75 |             gt = np.loadtxt(gt_path, delimiter=',').astype(np.float64)
 76 |             if gt.shape[1] == 4:
 77 |                 gt = np.column_stack((gt[:, 0], gt[:, 1], gt[:, 0], gt[:, 1] + gt[:, 3]-1,
 78 |                                       gt[:, 0] + gt[:, 2]-1, gt[:, 1] + gt[:, 3]-1, gt[:, 0] + gt[:, 2]-1, gt[:, 1]))
 79 |             info[video] = {'image_files': image_files, 'gt': gt, 'name': video}
 80 | 
 81 | 
 82 |     elif 'VOT2018-LT' in dataset:
 83 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
 84 |         if not exists(base_path):
 85 |             logging.error("Please download test dataset!!!")
 86 |             exit()
 87 |         list_path = join(base_path, 'list.txt')
 88 |         with open(list_path) as f:
 89 |             videos = [v.strip() for v in f.readlines()]
 90 |         for video in videos:
 91 |             video_path = join(base_path, video)
 92 |             image_path = join(video_path, '*.jpg')
 93 |             image_files = sorted(glob.glob(image_path))
 94 |             if len(image_files) == 0:  # VOT2018
 95 |                 image_path = join(video_path, 'color', '*.jpg')
 96 |                 image_files = sorted(glob.glob(image_path))
 97 |             gt_path = join(video_path, 'groundtruth.txt')
 98 |             gt = np.loadtxt(gt_path, delimiter=',').astype(np.float64)
 99 |             if gt.shape[1] == 4:
100 |                 gt = np.column_stack((gt[:, 0], gt[:, 1], gt[:, 0], gt[:, 1] + gt[:, 3]-1,
101 |                                       gt[:, 0] + gt[:, 2]-1, gt[:, 1] + gt[:, 3]-1, gt[:, 0] + gt[:, 2]-1, gt[:, 1]))
102 |             info[video] = {'image_files': image_files, 'gt': gt, 'name': video}
103 | 
104 | 
105 |     elif 'OTB' in dataset:
106 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
107 |         if not exists(base_path):
108 |             print("Please download OTB dataset into data folder")
109 |         json_path = base_path + '.json'
110 |         info = json.load(open(json_path, 'r'))
111 |       
112 |         # load the video frames
113 |         for v in info.keys():
114 |             path_name = info[v]['name']
115 |             info[v]['image_files'] = [join(base_path, path_name, 'img', im_f) for im_f in info[v]['image_files']]
116 |             info[v]['gt'] = np.array(info[v]['gt_rect'])-[1,1,0,0]  # our tracker is 0-index
117 |             info[v]['name'] = v
118 | 
119 | 
120 |     elif 'GOT' in dataset: 
121 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
122 |         if not exists(base_path):
123 |             print("Please download GOT10K dataset into data folder")
124 | 
125 |         json_path = base_path + '.json' 
126 |         info = json.load(open(json_path, 'r'))
127 | 
128 | 
129 |     elif 'GOT10k_train_val' in dataset: 
130 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
131 |         if not exists(base_path):
132 |             print("Please download GOT10k_train_val dataset into data folder")
133 | 
134 |         json_path = base_path + '.json' 
135 |         info = json.load(open(json_path, 'r'))
136 | 
137 | 
138 | 
139 |     elif 'LaSOT' in dataset: 
140 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
141 |         if not exists(base_path):
142 |             print("Please download LaSOT dataset into data folder")
143 |         json_path = base_path + '.json' 
144 |         info = json.load(open(json_path, 'r'))
145 | 
146 |     elif 'UAV20L' in dataset: 
147 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
148 |         if not exists(base_path):
149 |             print("Please download UAV20L dataset into data folder")
150 |         json_path = base_path + '.json' 
151 |         info = json.load(open(json_path, 'r'))
152 | 
153 |     elif 'OXUVA' in dataset: 
154 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
155 |         if not exists(base_path):
156 |             print("Please download OXUVA dataset into data folder")
157 |         json_path = base_path + '.json' 
158 |         info = json.load(open(json_path, 'r'))
159 | 
160 |     elif 'TC128' in dataset: 
161 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
162 |         if not exists(base_path):
163 |             print("Please download TC128 dataset into data folder")
164 |         json_path = base_path + '.json' 
165 |         info = json.load(open(json_path, 'r'))        
166 | 
167 |     elif 'UAV123' in dataset: 
168 |         base_path = join(realpath(dirname(__file__)), '../../data', dataset)
169 |         if not exists(base_path):
170 |             print("Please download UAV123 dataset into data folder")
171 |         json_path = base_path + '.json' 
172 |         info = json.load(open(json_path, 'r'))
173 | 
174 | 
175 |     else:
176 |         logging.error(f'{dataset} not supported')
177 |         exit()
178 |     return info
179 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Python Single Object Tracking Evaluation
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Fangyi Zhang
 5 | # @author fangyi.zhang@vipl.ict.ac.cn
 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
 7 | # Revised for SiamMask by foolwood
 8 | # --------------------------------------------------------
 9 | from .vot import VOTDataset, VOTLTDataset
10 | 
11 | 
12 | class DatasetFactory(object):
13 |     @staticmethod
14 |     def create_dataset(**kwargs):
15 |         """
16 |         Args:
17 |             name: dataset name 'VOT2018', 'VOT2016'
18 |             dataset_root: dataset root
19 |         Return:
20 |             dataset
21 |         """
22 |         assert 'name' in kwargs, "should provide dataset name"
23 |         name = kwargs['name']
24 |         if 'VOT2018' == name or 'VOT2016' == name:
25 |             dataset = VOTDataset(**kwargs)
26 |         elif 'VOT-LT' == name:
27 |             dataset = VOTLTDataset(**kwargs)
28 |         else:
29 |             raise Exception("unknow dataset {}".format(kwargs['name']))
30 |         return dataset
31 | 
32 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/datasets/dataset.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Python Single Object Tracking Evaluation
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Fangyi Zhang
 5 | # @author fangyi.zhang@vipl.ict.ac.cn
 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
 7 | # Revised for SiamMask by foolwood
 8 | # --------------------------------------------------------
 9 | class Dataset(object):
10 |     def __init__(self, name, dataset_root):
11 |         self.name = name
12 |         self.dataset_root = dataset_root
13 |         self.videos = None
14 | 
15 |     def __getitem__(self, idx):
16 |         if isinstance(idx, str):
17 |             return self.videos[idx]
18 |         elif isinstance(idx, int):
19 |             return self.videos[sorted(list(self.videos.keys()))[idx]]
20 | 
21 |     def __len__(self):
22 |         return len(self.videos)
23 | 
24 |     def __iter__(self):
25 |         keys = sorted(list(self.videos.keys()))
26 |         for key in keys:
27 |             yield self.videos[key]
28 | 
29 |     def set_tracker(self, path, tracker_names):
30 |         """
31 |         Args:
32 |             path: path to tracker results,
33 |             tracker_names: list of tracker name
34 |         """
35 |         self.tracker_path = path
36 |         self.tracker_names = tracker_names
37 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/datasets/video.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import cv2
 3 | from glob import glob
 4 | 
 5 | class Video(object):
 6 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 7 |             gt_rect, attr):
 8 |         self.name = name
 9 |         self.video_dir = video_dir
10 |         self.init_rect = init_rect
11 |         self.gt_traj = gt_rect
12 |         self.attr = attr
13 |         self.pred_trajs = {}
14 |         self.img_names = [os.path.join(root, x) for x in img_names]
15 |         self.imgs = None
16 | 
17 |     def load_tracker(self, path, tracker_names=None, store=True):
18 |         """
19 |         Args:
20 |             path(str): path to result
21 |             tracker_name(list): name of tracker
22 |         """
23 |         if not tracker_names:
24 |             tracker_names = [x.split('/')[-1] for x in glob(path)
25 |                     if os.path.isdir(x)]
26 |         if isinstance(tracker_names, str):
27 |             tracker_names = [tracker_names]
28 |         for name in tracker_names:
29 |             traj_file = os.path.join(path, name, self.name+'.txt')
30 |             if os.path.exists(traj_file):
31 |                 with open(traj_file, 'r') as f :
32 |                     pred_traj = [list(map(float, x.strip().split(',')))
33 |                             for x in f.readlines()]
34 |                 if len(pred_traj) != len(self.gt_traj):
35 |                     print(name, len(pred_traj), len(self.gt_traj), self.name)
36 |                 if store:
37 |                     self.pred_trajs[name] = pred_traj
38 |                 else:
39 |                     return pred_traj
40 |             else:
41 |                 print(traj_file)
42 |         self.tracker_names = list(self.pred_trajs.keys())
43 | 
44 |     def load_img(self):
45 |         if self.imgs is None:
46 |             self.imgs = [cv2.imread(x) for x in self.img_names]
47 |             self.width = self.imgs[0].shape[1]
48 |             self.height = self.imgs[0].shape[0]
49 | 
50 |     def free_img(self):
51 |         self.imgs = None
52 | 
53 |     def __len__(self):
54 |         return len(self.img_names)
55 | 
56 |     def __getitem__(self, idx):
57 |         if self.imgs is None:
58 |             return cv2.imread(self.img_names[idx]), self.gt_traj[idx]
59 |         else:
60 |             return self.imgs[idx], self.gt_traj[idx]
61 | 
62 |     def __iter__(self):
63 |         for i in range(len(self.img_names)):
64 |             if self.imgs is not None:
65 |                 yield self.imgs[i], self.gt_traj[i]
66 |             else:
67 |                 yield cv2.imread(self.img_names[i]), self.gt_traj[i]
68 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/datasets/vot.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Python Single Object Tracking Evaluation
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Fangyi Zhang
  5 | # @author fangyi.zhang@vipl.ict.ac.cn
  6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
  7 | # Revised for SiamMask by foolwood
  8 | # --------------------------------------------------------
  9 | import os
 10 | import json
 11 | import numpy as np
 12 | 
 13 | from glob import glob
 14 | from tqdm import tqdm
 15 | from PIL import Image
 16 | from ipdb import set_trace
 17 | 
 18 | from .dataset import Dataset
 19 | from .video import Video
 20 | 
 21 | 
 22 | class VOTVideo(Video):
 23 |     """
 24 |     Args:
 25 |         name: video name
 26 |         root: dataset root
 27 |         video_dir: video directory
 28 |         init_rect: init rectangle
 29 |         img_names: image names
 30 |         gt_rect: groundtruth rectangle
 31 |         camera_motion: camera motion tag
 32 |         illum_change: illum change tag
 33 |         motion_change: motion change tag
 34 |         size_change: size change
 35 |         occlusion: occlusion
 36 |     """
 37 |     def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect,
 38 |             camera_motion, illum_change, motion_change, size_change, occlusion, width, height):
 39 |         super(VOTVideo, self).__init__(name, root, video_dir, init_rect, img_names, gt_rect, None)
 40 |         self.tags= {'all': [1] * len(gt_rect)}
 41 |         self.tags['camera_motion'] = camera_motion
 42 |         self.tags['illum_change'] = illum_change
 43 |         self.tags['motion_change'] = motion_change
 44 |         self.tags['size_change'] = size_change
 45 |         self.tags['occlusion'] = occlusion
 46 | 
 47 |         self.width = width
 48 |         self.height = height
 49 | 
 50 |         # empty tag
 51 |         all_tag = [v for k, v in self.tags.items() if len(v) > 0 ]
 52 |         self.tags['empty'] = np.all(1 - np.array(all_tag), axis=1).astype(np.int32).tolist()
 53 | 
 54 |         self.tag_names = list(self.tags.keys())
 55 | 
 56 |     def select_tag(self, tag, start=0, end=0):
 57 |         if tag == 'empty':
 58 |             return self.tags[tag]
 59 |         return self.tags[tag][start:end]
 60 | 
 61 |     def load_tracker(self, path, tracker_names=None, store=True):
 62 |         """
 63 |         Args:
 64 |             path(str): path to result
 65 |             tracker_name(list): name of tracker
 66 |         """
 67 |         if not tracker_names:
 68 |             tracker_names = [x.split('/')[-1] for x in glob(path)
 69 |                     if os.path.isdir(x)]
 70 |         if isinstance(tracker_names, str):
 71 |             tracker_names = [tracker_names]
 72 |         for name in tracker_names:
 73 |             traj_files = glob(os.path.join(path, name, 'baseline', self.name, '*0*.txt'))
 74 |             if len(traj_files) == 15:
 75 |                 traj_files = traj_files
 76 |             else:
 77 |                 traj_files = traj_files[0:1]
 78 |             pred_traj = []
 79 |             for traj_file in traj_files:
 80 |                 with open(traj_file, 'r') as f:
 81 |                     traj = [list(map(float, x.strip().split(',')))
 82 |                             for x in f.readlines()]
 83 |                     pred_traj.append(traj)
 84 |             if store:
 85 |                 self.pred_trajs[name] = pred_traj
 86 |             else:
 87 |                 return pred_traj
 88 | 
 89 | 
 90 | class VOTDataset(Dataset):
 91 |     """
 92 |     Args:
 93 |         name: dataset name, should be 'VOT2018', 'VOT2016'
 94 |         dataset_root: dataset root
 95 |         load_img: wether to load all imgs
 96 |     """
 97 |     def __init__(self, name, dataset_root):
 98 |         super(VOTDataset, self).__init__(name, dataset_root)
 99 |         try:
100 |             with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
101 |                 meta_data = json.load(f)
102 |         except:
103 |             download_str = '# download json file for eval toolkit\n'+\
104 |                            'cd $SiamMask/data\n'+\
105 |                            'wget http://www.robots.ox.ac.uk/~qwang/VOT2016.json\n'+\
106 |                            'wget http://www.robots.ox.ac.uk/~qwang/VOT2018.json'
107 |             print(download_str)
108 |             exit()
109 | 
110 |         # load videos
111 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
112 |         self.videos = {}
113 |         for video in pbar:
114 |             pbar.set_postfix_str(video)
115 |             self.videos[video] = VOTVideo(video,
116 |                                           dataset_root,
117 |                                           meta_data[video]['video_dir'],
118 |                                           meta_data[video]['init_rect'],
119 |                                           meta_data[video]['img_names'],
120 |                                           meta_data[video]['gt_rect'],
121 |                                           meta_data[video]['camera_motion'],
122 |                                           meta_data[video]['illum_change'],
123 |                                           meta_data[video]['motion_change'],
124 |                                           meta_data[video]['size_change'],
125 |                                           meta_data[video]['occlusion'],
126 |                                           meta_data[video]['width'],
127 |                                           meta_data[video]['height'])
128 | 
129 |         self.tags = ['all', 'camera_motion', 'illum_change', 'motion_change',
130 |                      'size_change', 'occlusion', 'empty']
131 | 
132 | class VOTLTVideo(Video):
133 |     """
134 |     Args:
135 |         name: video name
136 |         root: dataset root
137 |         video_dir: video directory
138 |         init_rect: init rectangle
139 |         img_names: image names
140 |         gt_rect: groundtruth rectangle
141 |     """
142 |     def __init__(self, name, root, video_dir, init_rect, img_names,
143 |             gt_rect, load_img=False):
144 |         super(VOTLTVideo, self).__init__(name, root, video_dir,
145 |                 init_rect, img_names, gt_rect, None)
146 |         self.gt_traj = [[0] if np.isnan(bbox[0]) else bbox
147 |                 for bbox in self.gt_traj]
148 |         if not load_img:
149 |             img_name = os.path.join(root, self.img_names[0])
150 |             # adjustments
151 |             img_name = img_name.replace('color/', '').replace('data', 'data/VOT-LT')
152 |             img = np.array(Image.open(img_name), np.uint8)
153 |             self.width = img.shape[1]
154 |             self.height = img.shape[0]
155 |         self.confidence = {}
156 | 
157 |     def load_tracker(self, path, tracker_names=None, store=True):
158 |         """
159 |         Args:
160 |             path(str): path to result
161 |             tracker_name(list): name of tracker
162 |         """
163 |         if not tracker_names:
164 |             tracker_names = [x.split('/')[-1] for x in glob(path)
165 |                     if os.path.isdir(x)]
166 |         if isinstance(tracker_names, str):
167 |             tracker_names = [tracker_names]
168 |         for name in tracker_names:
169 |             traj_files = glob(os.path.join(path, name, 'baseline', self.name, '*0*.txt'))[0:1]
170 | 
171 |             pred_traj = []
172 |             for traj_file in traj_files:
173 |                 with open(traj_file, 'r') as f:
174 |                     traj = [list(map(float, x.strip().split(',')))
175 |                             for x in f.readlines()]
176 |                     pred_traj.append(traj)
177 |             if store:
178 |                 self.pred_trajs[name] = pred_traj
179 | 
180 |             confidence_file = glob(os.path.join(path, name, 'baseline', self.name, '*0*.value'))[0]
181 |             with open(confidence_file, 'r') as f:
182 |                 score = [float(x.strip()[1:]) for x in f.readlines()[1:]]
183 |                 score.insert(0, float('nan'))
184 |             if store:
185 |                 self.confidence[name] = score
186 |         return traj, score
187 | 
188 | class VOTLTDataset(Dataset):
189 |     """
190 |     Args:
191 |         name: dataset name, 'VOT2018-LT'
192 |         dataset_root: dataset root
193 |         load_img: wether to load all imgs
194 |     """
195 |     def __init__(self, name, dataset_root, load_img=False):
196 |         super(VOTLTDataset, self).__init__(name, dataset_root)
197 |         with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
198 |             meta_data = json.load(f)
199 | 
200 |         # load videos
201 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
202 |         self.videos = {}
203 |         for video in pbar:
204 |             pbar.set_postfix_str(video)
205 |             self.videos[video] = VOTLTVideo(video,
206 |                                           dataset_root,
207 |                                           meta_data[video]['video_dir'],
208 |                                           meta_data[video]['init_rect'],
209 |                                           meta_data[video]['img_names'],
210 |                                           meta_data[video]['gt_rect'])
211 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Python Single Object Tracking Evaluation
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Fangyi Zhang
 5 | # @author fangyi.zhang@vipl.ict.ac.cn
 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
 7 | # Revised for SiamMask by foolwood
 8 | # --------------------------------------------------------
 9 | from .ar_benchmark import AccuracyRobustnessBenchmark
10 | from .eao_benchmark import EAOBenchmark
11 | from .f1_benchmark import F1Benchmark
12 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/evaluation/ar_benchmark.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Python Single Object Tracking Evaluation
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Fangyi Zhang
  5 | # @author fangyi.zhang@vipl.ict.ac.cn
  6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
  7 | # Revised for SiamMask by foolwood
  8 | # --------------------------------------------------------
  9 | 
 10 | import warnings
 11 | import itertools
 12 | import numpy as np
 13 | 
 14 | from colorama import Style, Fore
 15 | from ..utils import calculate_failures, calculate_accuracy
 16 | 
 17 | 
 18 | class AccuracyRobustnessBenchmark:
 19 |     """
 20 |     Args:
 21 |         dataset:
 22 |         burnin:
 23 |     """
 24 |     def __init__(self, dataset, burnin=10):
 25 |         self.dataset = dataset
 26 |         self.burnin = burnin
 27 | 
 28 |     def eval(self, eval_trackers=None):
 29 |         """
 30 |         Args:
 31 |             eval_tags: list of tag
 32 |             eval_trackers: list of tracker name
 33 |         Returns:
 34 |             ret: dict of results
 35 |         """
 36 |         if eval_trackers is None:
 37 |             eval_trackers = self.dataset.tracker_names
 38 |         if isinstance(eval_trackers, str):
 39 |             eval_trackers = [eval_trackers]
 40 | 
 41 |         result = {}
 42 |         for tracker_name in eval_trackers:
 43 |             accuracy, failures = self._calculate_accuracy_robustness(tracker_name)
 44 |             result[tracker_name] = {'overlaps': accuracy,
 45 |                                     'failures': failures}
 46 |         return result
 47 | 
 48 |     def show_result(self, result, eao_result=None, show_video_level=False, helight_threshold=0.5):
 49 |         """pretty print result
 50 |         Args:
 51 |             result: returned dict from function eval
 52 |         """
 53 |         tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
 54 |         if eao_result is not None:
 55 |             header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|{:^7}|"
 56 |             header = header.format('Tracker Name',
 57 |                     'Accuracy', 'Robustness', 'Lost Number', 'EAO')
 58 |             formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|{:^7.3f}|"
 59 |         else:
 60 |             header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|"
 61 |             header = header.format('Tracker Name',
 62 |                     'Accuracy', 'Robustness', 'Lost Number')
 63 |             formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|"
 64 |         bar = '-'*len(header)
 65 |         print(bar)
 66 |         print(header)
 67 |         print(bar)
 68 |         if eao_result is not None:
 69 |             tracker_eao = sorted(eao_result.items(),
 70 |                                  key=lambda x:x[1]['all'],
 71 |                                  reverse=True)[:20]
 72 |             tracker_names = [x[0] for x in tracker_eao]
 73 |         else:
 74 |             tracker_names = list(result.keys())
 75 |         for tracker_name in tracker_names:
 76 |             ret = result[tracker_name]
 77 |             overlaps = list(itertools.chain(*ret['overlaps'].values()))
 78 |             accuracy = np.nanmean(overlaps)
 79 |             length = sum([len(x) for x in ret['overlaps'].values()])
 80 |             failures = list(ret['failures'].values())
 81 |             lost_number = np.mean(np.sum(failures, axis=0))
 82 |             robustness = np.mean(np.sum(np.array(failures), axis=0) / length) * 100
 83 |             if eao_result is None:
 84 |                 print(formatter.format(tracker_name, accuracy, robustness, lost_number))
 85 |             else:
 86 |                 print(formatter.format(tracker_name, accuracy, robustness, lost_number, eao_result[tracker_name]['all']))
 87 |         print(bar)
 88 | 
 89 |         if show_video_level and len(result) < 10:
 90 |             print('\n\n')
 91 |             header1 = "|{:^14}|".format("Tracker name")
 92 |             header2 = "|{:^14}|".format("Video name")
 93 |             for tracker_name in result.keys():
 94 |                 header1 += ("{:^17}|").format(tracker_name)
 95 |                 header2 += "{:^8}|{:^8}|".format("Acc", "LN")
 96 |             print('-'*len(header1))
 97 |             print(header1)
 98 |             print('-'*len(header1))
 99 |             print(header2)
100 |             print('-'*len(header1))
101 |             videos = list(result[tracker_name]['overlaps'].keys())
102 |             for video in videos:
103 |                 row = "|{:^14}|".format(video)
104 |                 for tracker_name in result.keys():
105 |                     overlaps = result[tracker_name]['overlaps'][video]
106 |                     accuracy = np.nanmean(overlaps)
107 |                     failures = result[tracker_name]['failures'][video]
108 |                     lost_number = np.mean(failures)
109 | 
110 |                     accuracy_str = "{:^8.3f}".format(accuracy)
111 |                     if accuracy < helight_threshold:
112 |                         row += f'{Fore.RED}{accuracy_str}{Style.RESET_ALL}|'
113 |                     else:
114 |                         row += accuracy_str+'|'
115 |                     lost_num_str = "{:^8.3f}".format(lost_number)
116 |                     if lost_number > 0:
117 |                         row += f'{Fore.RED}{lost_num_str}{Style.RESET_ALL}|'
118 |                     else:
119 |                         row += lost_num_str+'|'
120 |                 print(row)
121 |             print('-'*len(header1))
122 | 
123 |     def _calculate_accuracy_robustness(self, tracker_name):
124 |         overlaps = {}
125 |         failures = {}
126 |         all_length = {}
127 |         for i in range(len(self.dataset)):
128 |             video = self.dataset[i]
129 |             gt_traj = video.gt_traj
130 |             if tracker_name not in video.pred_trajs:
131 |                 tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
132 |             else:
133 |                 tracker_trajs = video.pred_trajs[tracker_name]
134 |             overlaps_group = []
135 |             num_failures_group = []
136 |             for tracker_traj in tracker_trajs:
137 |                 num_failures = calculate_failures(tracker_traj)[0]
138 |                 overlaps_ = calculate_accuracy(tracker_traj, gt_traj,
139 |                         burnin=10, bound=(video.width, video.height))[1]
140 |                 overlaps_group.append(overlaps_)
141 |                 num_failures_group.append(num_failures)
142 |             with warnings.catch_warnings():
143 |                 warnings.simplefilter("ignore", category=RuntimeWarning)
144 |                 overlaps[video.name] = np.nanmean(overlaps_group, axis=0).tolist()
145 |                 failures[video.name] = num_failures_group
146 |         return overlaps, failures
147 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/evaluation/eao_benchmark.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Python Single Object Tracking Evaluation
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Fangyi Zhang
  5 | # @author fangyi.zhang@vipl.ict.ac.cn
  6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
  7 | # Revised for SiamMask by foolwood
  8 | # --------------------------------------------------------
  9 | import numpy as np
 10 | 
 11 | from ..utils import calculate_failures, calculate_accuracy, calculate_expected_overlap
 12 | 
 13 | 
 14 | class EAOBenchmark:
 15 |     """
 16 |     Args:
 17 |         dataset:
 18 |     """
 19 |     def __init__(self, dataset, skipping=5, tags=['all']):
 20 |         self.dataset = dataset
 21 |         self.skipping = skipping
 22 |         self.tags = tags
 23 |         # NOTE we not use gmm to generate low, high, peak value
 24 |         if dataset.name == 'VOT2018' or dataset.name == 'VOT2017':
 25 |             self.low = 100
 26 |             self.high = 356
 27 |             self.peak = 160
 28 |         elif dataset.name == 'VOT2016':
 29 |             self.low = 100  # TODO
 30 |             self.high = 356
 31 |             self.peak = 160
 32 | 
 33 |     def eval(self, eval_trackers=None):
 34 |         """
 35 |         Args:
 36 |             eval_tags: list of tag
 37 |             eval_trackers: list of tracker name
 38 |         Returns:
 39 |             eao: dict of results
 40 |         """
 41 |         if eval_trackers is None:
 42 |             eval_trackers = self.dataset.tracker_names
 43 |         if isinstance(eval_trackers, str):
 44 |             eval_trackers = [eval_trackers]
 45 | 
 46 |         ret = {}
 47 |         for tracker_name in eval_trackers:
 48 |             eao = self._calculate_eao(tracker_name, self.tags)
 49 |             ret[tracker_name] = eao
 50 |         return ret
 51 | 
 52 |     def show_result(self, result, topk=10):
 53 |         """pretty print result
 54 |         Args:
 55 |             result: returned dict from function eval
 56 |         """
 57 |         if len(self.tags) == 1:
 58 |             tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
 59 |             header = ("|{:^"+str(tracker_name_len)+"}|{:^10}|").format('Tracker Name', 'EAO')
 60 |             bar = '-'*len(header)
 61 |             formatter = "|{:^20}|{:^10.3f}|"
 62 |             print(bar)
 63 |             print(header)
 64 |             print(bar)
 65 |             tracker_eao = sorted(result.items(), 
 66 |                                  key=lambda x: x[1]['all'], 
 67 |                                  reverse=True)[:topk]
 68 |             for tracker_name, eao in tracker_eao:
 69 |                 print(formatter.format(tracker_name, eao))
 70 |             print(bar)
 71 |         else:
 72 |             header = "|{:^20}|".format('Tracker Name')
 73 |             header += "{:^7}|{:^15}|{:^14}|{:^15}|{:^13}|{:^11}|{:^7}|".format(*self.tags)
 74 |             bar = '-'*len(header)
 75 |             formatter = "{:^7.3f}|{:^15.3f}|{:^14.3f}|{:^15.3f}|{:^13.3f}|{:^11.3f}|{:^7.3f}|"
 76 |             print(bar)
 77 |             print(header)
 78 |             print(bar)
 79 |             sorted_tacker = sorted(result.items(), 
 80 |                                    key=lambda x: x[1]['all'],
 81 |                                    reverse=True)[:topk]
 82 |             sorted_tacker = [x[0] for x in sorted_tacker]
 83 |             for tracker_name in sorted_tacker:
 84 |                 print("|{:^20}|".format(tracker_name)+formatter.format(
 85 |                     *[result[tracker_name][x] for x in self.tags]))
 86 |             print(bar)
 87 | 
 88 |     def _calculate_eao(self, tracker_name, tags):
 89 |         all_overlaps = []
 90 |         all_failures = []
 91 |         video_names = []
 92 |         gt_traj_length = []
 93 |         for video in self.dataset:
 94 |             gt_traj = video.gt_traj
 95 |             if tracker_name not in video.pred_trajs:
 96 |                 tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
 97 |             else:
 98 |                 tracker_trajs = video.pred_trajs[tracker_name]
 99 |             for tracker_traj in tracker_trajs:
100 |                 gt_traj_length.append(len(gt_traj))
101 |                 video_names.append(video.name)
102 |                 overlaps = calculate_accuracy(tracker_traj, gt_traj, bound=(video.width-1, video.height-1))[1]
103 |                 failures = calculate_failures(tracker_traj)[1]
104 |                 all_overlaps.append(overlaps)
105 |                 all_failures.append(failures)
106 |         fragment_num = sum([len(x)+1 for x in all_failures])
107 |         max_len = max([len(x) for x in all_overlaps])
108 |         seq_weight = 1 / len(tracker_trajs)
109 | 
110 |         eao = {}
111 |         for tag in tags:
112 |             # prepare segments
113 |             fweights = np.ones((fragment_num)) * np.nan
114 |             fragments = np.ones((fragment_num, max_len)) * np.nan
115 |             seg_counter = 0
116 |             for name, traj_len, failures, overlaps in zip(video_names, gt_traj_length,
117 |                     all_failures, all_overlaps):
118 |                 if len(failures) > 0:
119 |                     points = [x+self.skipping for x in failures if
120 |                             x+self.skipping <= len(overlaps)]
121 |                     points.insert(0, 0)
122 |                     for i in range(len(points)):
123 |                         if i != len(points) - 1:
124 |                             fragment = np.array(overlaps[points[i]:points[i+1]+1])
125 |                             fragments[seg_counter, :] = 0
126 |                         else:
127 |                             fragment = np.array(overlaps[points[i]:])
128 |                         fragment[np.isnan(fragment)] = 0
129 |                         fragments[seg_counter, :len(fragment)] = fragment
130 |                         if i != len(points) - 1:
131 |                             tag_value = self.dataset[name].select_tag(tag, points[i], points[i+1]+1)
132 |                             w = sum(tag_value) / (points[i+1] - points[i]+1)
133 |                             fweights[seg_counter] = seq_weight * w
134 |                         else:
135 |                             tag_value = self.dataset[name].select_tag(tag, points[i], len(overlaps))
136 |                             w = sum(tag_value) / (traj_len - points[i]+1e-16)
137 |                             fweights[seg_counter] = seq_weight * w
138 |                         seg_counter += 1
139 |                 else:
140 |                     # no failure
141 |                     max_idx = min(len(overlaps), max_len)
142 |                     fragments[seg_counter, :max_idx] = overlaps[:max_idx]
143 |                     tag_value = self.dataset[name].select_tag(tag, 0, max_idx)
144 |                     w = sum(tag_value) / max_idx
145 |                     fweights[seg_counter] = seq_weight * w
146 |                     seg_counter += 1
147 | 
148 |             expected_overlaps = calculate_expected_overlap(fragments, fweights)
149 |             # caculate eao
150 |             weight = np.zeros((len(expected_overlaps)))
151 |             weight[self.low-1:self.high-1+1] = 1
152 |             is_valid = np.logical_not(np.isnan(expected_overlaps))
153 |             eao_ = np.sum(expected_overlaps[is_valid] * weight[is_valid]) / np.sum(weight[is_valid])
154 |             eao[tag] = eao_
155 |         return eao
156 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/evaluation/f1_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | from glob import glob
  5 | from tqdm import tqdm
  6 | from colorama import Style, Fore
  7 | 
  8 | from ..utils import determine_thresholds, calculate_accuracy, calculate_f1
  9 | 
 10 | class F1Benchmark:
 11 |     def __init__(self, dataset):
 12 |         """
 13 |         Args:
 14 |             result_path:
 15 |         """
 16 |         self.dataset = dataset
 17 | 
 18 |     def eval(self, eval_trackers=None):
 19 |         """
 20 |         Args:
 21 |             eval_tags: list of tag
 22 |             eval_trackers: list of tracker name
 23 |         Returns:
 24 |             eao: dict of results
 25 |         """
 26 |         if eval_trackers is None:
 27 |             eval_trackers = self.dataset.tracker_names
 28 |         if isinstance(eval_trackers, str):
 29 |             eval_trackers = [eval_trackers]
 30 | 
 31 |         ret = {}
 32 |         for tracker_name in eval_trackers:
 33 |             precision, recall, f1 = self._cal_precision_reall(tracker_name)
 34 |             ret[tracker_name] = {"precision": precision,
 35 |                                  "recall": recall,
 36 |                                  "f1": f1
 37 |                                 }
 38 |         return ret
 39 | 
 40 |     def _cal_precision_reall(self, tracker_name):
 41 |         score = []
 42 |         # for i in range(len(self.dataset)):
 43 |         #     video = self.dataset[i]
 44 |         for video in self.dataset:
 45 |             if tracker_name not in video.confidence:
 46 |                 score += video.load_tracker(self.dataset.tracker_path, tracker_name, False)[1]
 47 |             else:
 48 |                 score += video.confidence[tracker_name]
 49 |         score = np.array(score)
 50 |         thresholds = determine_thresholds(score)[::-1]
 51 | 
 52 |         precision = {}
 53 |         recall = {}
 54 |         f1 = {}
 55 |         for i in range(len(self.dataset)):
 56 |             video = self.dataset[i]
 57 |             gt_traj = video.gt_traj
 58 |             N = sum([1 for x in gt_traj if len(x) > 1])
 59 |             if tracker_name not in video.pred_trajs:
 60 |                 tracker_traj, score = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
 61 |             else:
 62 |                 tracker_traj = video.pred_trajs[tracker_name]
 63 |                 score = video.confidence[tracker_name]
 64 |             overlaps = calculate_accuracy(tracker_traj, gt_traj, \
 65 |                     bound=(video.width,video.height))[1]
 66 |             f1[video.name], precision[video.name], recall[video.name] = \
 67 |                     calculate_f1(overlaps, score, (video.width,video.height),thresholds, N)
 68 |         return precision, recall, f1
 69 | 
 70 |     def show_result(self, result, show_video_level=False, helight_threshold=0.5):
 71 |         """pretty print result
 72 |         Args:
 73 |             result: returned dict from function eval
 74 |         """
 75 |         # sort tracker according to f1
 76 |         sorted_tracker = {}
 77 |         for tracker_name, ret in result.items():
 78 |             precision = np.mean(list(ret['precision'].values()), axis=0)
 79 |             recall = np.mean(list(ret['recall'].values()), axis=0)
 80 |             f1 = 2 * precision * recall / (precision + recall)
 81 |             max_idx = np.argmax(f1)
 82 |             sorted_tracker[tracker_name] = (precision[max_idx], recall[max_idx],
 83 |                     f1[max_idx])
 84 |         sorted_tracker_ = sorted(sorted_tracker.items(),
 85 |                                  key=lambda x:x[1][2],
 86 |                                  reverse=True)[:20]
 87 |         tracker_names = [x[0] for x in sorted_tracker_]
 88 | 
 89 |         tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
 90 |         header = "|{:^"+str(tracker_name_len)+"}|{:^11}|{:^8}|{:^7}|"
 91 |         header = header.format('Tracker Name',
 92 |                 'Precision', 'Recall', 'F1')
 93 |         bar = '-' * len(header)
 94 |         formatter = "|{:^"+str(tracker_name_len)+"}|{:^11.3f}|{:^8.3f}|{:^7.3f}|"
 95 |         print(bar)
 96 |         print(header)
 97 |         print(bar)
 98 |         # for tracker_name, ret in result.items():
 99 |         #     precision = np.mean(list(ret['precision'].values()), axis=0)
100 |         #     recall = np.mean(list(ret['recall'].values()), axis=0)
101 |         #     f1 = 2 * precision * recall / (precision + recall)
102 |         #     max_idx = np.argmax(f1)
103 |         for tracker_name in tracker_names:
104 |             precision = sorted_tracker[tracker_name][0]
105 |             recall = sorted_tracker[tracker_name][1]
106 |             f1 = sorted_tracker[tracker_name][2]
107 |             print(formatter.format(tracker_name, precision, recall, f1))
108 |         print(bar)
109 | 
110 |         if show_video_level and len(result) < 10:
111 |             print('\n\n')
112 |             header1 = "|{:^14}|".format("Tracker name")
113 |             header2 = "|{:^14}|".format("Video name")
114 |             for tracker_name in result.keys():
115 |                 # col_len = max(20, len(tracker_name))
116 |                 header1 += ("{:^28}|").format(tracker_name)
117 |                 header2 += "{:^11}|{:^8}|{:^7}|".format("Precision", "Recall", "F1")
118 |             print('-'*len(header1))
119 |             print(header1)
120 |             print('-'*len(header1))
121 |             print(header2)
122 |             print('-'*len(header1))
123 |             videos = list(result[tracker_name]['precision'].keys())
124 |             for video in videos:
125 |                 row = "|{:^14}|".format(video)
126 |                 for tracker_name in result.keys():
127 |                     precision = result[tracker_name]['precision'][video]
128 |                     recall = result[tracker_name]['recall'][video]
129 |                     f1 = result[tracker_name]['f1'][video]
130 |                     max_idx = np.argmax(f1)
131 |                     precision_str = "{:^11.3f}".format(precision[max_idx])
132 |                     if precision[max_idx] < helight_threshold:
133 |                         row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|'
134 |                     else:
135 |                         row += precision_str+'|'
136 |                     recall_str = "{:^8.3f}".format(recall[max_idx])
137 |                     if recall[max_idx] < helight_threshold:
138 |                         row += f'{Fore.RED}{recall_str}{Style.RESET_ALL}|'
139 |                     else:
140 |                         row += recall_str+'|'
141 |                     f1_str = "{:^7.3f}".format(f1[max_idx])
142 |                     if f1[max_idx] < helight_threshold:
143 |                         row += f'{Fore.RED}{f1_str}{Style.RESET_ALL}|'
144 |                     else:
145 |                         row += f1_str+'|'
146 |                 print(row)
147 |             print('-'*len(header1))
148 |         return {'f1': f1, 'precision': precision, 'recall': recall}
149 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Python Single Object Tracking Evaluation
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Fangyi Zhang
 5 | # @author fangyi.zhang@vipl.ict.ac.cn
 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
 7 | # Revised for SiamMask by foolwood
 8 | # --------------------------------------------------------
 9 | from . import region
10 | from .statistics import *
11 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/region.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/region.o


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/c_region.pxd:
--------------------------------------------------------------------------------
 1 | cdef extern from "src/region.h":
 2 |     ctypedef enum region_type "RegionType":
 3 |         EMTPY
 4 |         SPECIAL
 5 |         RECTANGEL
 6 |         POLYGON
 7 |         MASK
 8 | 
 9 |     ctypedef struct region_bounds:
10 |         float top
11 |         float bottom
12 |         float left
13 |         float right
14 | 
15 |     ctypedef struct region_rectangle:
16 |         float x
17 |         float y
18 |         float width
19 |         float height
20 | 
21 |     # ctypedef struct region_mask:
22 |     #     int x
23 |     #     int y
24 |     #     int width
25 |     #     int height
26 |     #     char *data
27 | 
28 |     ctypedef struct region_polygon:
29 |         int count
30 |         float *x
31 |         float *y
32 | 
33 |     ctypedef union region_container_data:
34 |         region_rectangle rectangle
35 |         region_polygon polygon
36 |         # region_mask mask
37 |         int special
38 | 
39 |     ctypedef struct region_container:
40 |         region_type type
41 |         region_container_data data
42 | 
43 |     # ctypedef struct region_overlap:
44 |     #     float overlap
45 |     #     float only1
46 |     #     float only2
47 | 
48 |     # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds)
49 | 
50 |     float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds)
51 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Python Single Object Tracking Evaluation
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Fangyi Zhang
 5 | # @author fangyi.zhang@vipl.ict.ac.cn
 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
 7 | # Revised for SiamMask by foolwood
 8 | # --------------------------------------------------------
 9 | import numpy as np
10 | 
11 | def determine_thresholds(confidence, resolution=100):
12 |     """choose threshold according to confidence
13 | 
14 |     Args:
15 |         confidence: list or numpy array or numpy array
16 |         reolution: number of threshold to choose
17 | 
18 |     Restures:
19 |         threshold: numpy array
20 |     """
21 |     if isinstance(confidence, list):
22 |         confidence = np.array(confidence)
23 |     confidence = confidence.flatten()
24 |     confidence = confidence[~np.isnan(confidence)]
25 |     confidence.sort()
26 | 
27 |     assert len(confidence) > resolution and resolution > 2
28 | 
29 |     thresholds = np.ones((resolution))
30 |     thresholds[0] = - np.inf
31 |     thresholds[-1] = np.inf
32 |     delta = np.floor(len(confidence) / (resolution - 2))
33 |     idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32)
34 |     thresholds[1:-1] =  confidence[idxs]
35 |     return thresholds
36 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/benchmark/bench_utils/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/region.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Python Single Object Tracking Evaluation
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Fangyi Zhang
  5 | # @author fangyi.zhang@vipl.ict.ac.cn
  6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
  7 | # Revised for SiamMask by foolwood
  8 | # --------------------------------------------------------
  9 | 
 10 | # distutils: sources = src/region.c
 11 | # distutils: include_dirs = src/
 12 | 
 13 | from libc.stdlib cimport malloc, free
 14 | from libc.stdio cimport sprintf
 15 | from libc.string cimport strlen
 16 | 
 17 | cimport c_region
 18 | 
 19 | cpdef enum RegionType:
 20 |     EMTPY
 21 |     SPECIAL
 22 |     RECTANGEL
 23 |     POLYGON
 24 |     MASK
 25 | 
 26 | cdef class RegionBounds:
 27 |     cdef c_region.region_bounds* _c_region_bounds
 28 | 
 29 |     def __cinit__(self):
 30 |         self._c_region_bounds = <c_region.region_bounds*>malloc(
 31 |                 sizeof(c_region.region_bounds))
 32 |         if not self._c_region_bounds:
 33 |             self._c_region_bounds = NULL
 34 |             raise MemoryError()
 35 | 
 36 |     def __init__(self, top, bottom, left, right):
 37 |         self.set(top, bottom, left, right)
 38 | 
 39 |     def __dealloc__(self):
 40 |         if self._c_region_bounds is not NULL:
 41 |             free(self._c_region_bounds)
 42 |             self._c_region_bounds = NULL
 43 | 
 44 |     def __str__(self):
 45 |         return "top: {:.3f} bottom: {:.3f} left: {:.3f} reight: {:.3f}".format(
 46 |                 self._c_region_bounds.top,
 47 |                 self._c_region_bounds.bottom,
 48 |                 self._c_region_bounds.left,
 49 |                 self._c_region_bounds.right)
 50 | 
 51 |     def get(self):
 52 |         return (self._c_region_bounds.top,
 53 |                 self._c_region_bounds.bottom,
 54 |                 self._c_region_bounds.left,
 55 |                 self._c_region_bounds.right)
 56 | 
 57 |     def set(self, top, bottom, left, right):
 58 |         self._c_region_bounds.top = top
 59 |         self._c_region_bounds.bottom = bottom
 60 |         self._c_region_bounds.left = left
 61 |         self._c_region_bounds.right = right
 62 | 
 63 | cdef class Rectangle:
 64 |     cdef c_region.region_rectangle* _c_region_rectangle
 65 | 
 66 |     def __cinit__(self):
 67 |         self._c_region_rectangle = <c_region.region_rectangle*>malloc(
 68 |                 sizeof(c_region.region_rectangle))
 69 |         if not self._c_region_rectangle:
 70 |             self._c_region_rectangle = NULL
 71 |             raise MemoryError()
 72 | 
 73 |     def __init__(self, x, y, width, height):
 74 |         self.set(x, y, width, height)
 75 | 
 76 |     def __dealloc__(self):
 77 |         if self._c_region_rectangle is not NULL:
 78 |             free(self._c_region_rectangle)
 79 |             self._c_region_rectangle = NULL
 80 | 
 81 |     def __str__(self):
 82 |         return "x: {:.3f} y: {:.3f} width: {:.3f} height: {:.3f}".format(
 83 |                 self._c_region_rectangle.x,
 84 |                 self._c_region_rectangle.y,
 85 |                 self._c_region_rectangle.width,
 86 |                 self._c_region_rectangle.height)
 87 | 
 88 |     def set(self, x, y, width, height):
 89 |         self._c_region_rectangle.x = x
 90 |         self._c_region_rectangle.y = y
 91 |         self._c_region_rectangle.width = width
 92 |         self._c_region_rectangle.height = height
 93 | 
 94 |     def get(self):
 95 |         """
 96 |         return:
 97 |             (x, y, width, height)
 98 |         """
 99 |         return (self._c_region_rectangle.x,
100 |                 self._c_region_rectangle.y,
101 |                 self._c_region_rectangle.width,
102 |                 self._c_region_rectangle.height)
103 | 
104 | cdef class Polygon:
105 |     cdef c_region.region_polygon* _c_region_polygon
106 | 
107 |     def __cinit__(self, points):
108 |         """
109 |         args:
110 |             points: tuple of point
111 |             points = ((1, 1), (10, 10))
112 |         """
113 |         num = len(points) // 2
114 |         self._c_region_polygon = <c_region.region_polygon*>malloc(
115 |                 sizeof(c_region.region_polygon))
116 |         if not self._c_region_polygon:
117 |             self._c_region_polygon = NULL
118 |             raise MemoryError()
119 |         self._c_region_polygon.count = num
120 |         self._c_region_polygon.x = <float*>malloc(sizeof(float) * num)
121 |         if not self._c_region_polygon.x:
122 |             raise MemoryError()
123 |         self._c_region_polygon.y = <float*>malloc(sizeof(float) * num)
124 |         if not self._c_region_polygon.y:
125 |             raise MemoryError()
126 | 
127 |         for i in range(num):
128 |             self._c_region_polygon.x[i] = points[i*2]
129 |             self._c_region_polygon.y[i] = points[i*2+1]
130 | 
131 |     def __dealloc__(self):
132 |         if self._c_region_polygon is not NULL:
133 |             if self._c_region_polygon.x is not NULL:
134 |                 free(self._c_region_polygon.x)
135 |                 self._c_region_polygon.x = NULL
136 |             if self._c_region_polygon.y is not NULL:
137 |                 free(self._c_region_polygon.y)
138 |                 self._c_region_polygon.y = NULL
139 |             free(self._c_region_polygon)
140 |             self._c_region_polygon = NULL
141 | 
142 |     def __str__(self):
143 |         ret = ""
144 |         for i in range(self._c_region_polygon.count-1):
145 |             ret += "({:.3f} {:.3f}) ".format(self._c_region_polygon.x[i],
146 |                     self._c_region_polygon.y[i])
147 |         ret += "({:.3f} {:.3f})".format(self._c_region_polygon.x[i],
148 |                 self._c_region_polygon.y[i])
149 |         return ret
150 | 
151 | def vot_overlap(polygon1, polygon2, bounds=None):
152 |     """ computing overlap between two polygon
153 |     Args:
154 |         polygon1: polygon tuple of points
155 |         polygon2: polygon tuple of points
156 |         bounds: tuple of (left, top, right, bottom) or tuple of (width height)
157 |     Return:
158 |         overlap: overlap between two polygons
159 |     """
160 |     if len(polygon1) == 1 or len(polygon2) == 1:
161 |         return float("nan")
162 | 
163 |     if len(polygon1) == 4:
164 |         polygon1_ = Polygon([polygon1[0], polygon1[1],
165 |                              polygon1[0]+polygon1[2], polygon1[1],
166 |                              polygon1[0]+polygon1[2], polygon1[1]+polygon1[3],
167 |                              polygon1[0], polygon1[1]+polygon1[3]])
168 |     else:
169 |         polygon1_ = Polygon(polygon1)
170 | 
171 |     if len(polygon2) == 4:
172 |         polygon2_ = Polygon([polygon2[0], polygon2[1],
173 |                              polygon2[0]+polygon2[2], polygon2[1],
174 |                              polygon2[0]+polygon2[2], polygon2[1]+polygon2[3],
175 |                              polygon2[0], polygon2[1]+polygon2[3]])
176 |     else:
177 |         polygon2_ = Polygon(polygon2)
178 | 
179 |     if bounds is not None and len(bounds) == 4:
180 |         pno_bounds = RegionBounds(bounds[0], bounds[1], bounds[2], bounds[3])
181 |     elif bounds is not None and len(bounds) == 2:
182 |         pno_bounds = RegionBounds(0, bounds[1], 0, bounds[0])
183 |     else:
184 |         pno_bounds = RegionBounds(-float("inf"), float("inf"),
185 |                                   -float("inf"), float("inf"))
186 |     cdef float only1 = 0
187 |     cdef float only2 = 0
188 |     cdef c_region.region_polygon* c_polygon1 = polygon1_._c_region_polygon
189 |     cdef c_region.region_polygon* c_polygon2 = polygon2_._c_region_polygon
190 |     cdef c_region.region_bounds no_bounds = pno_bounds._c_region_bounds[0] # deference
191 |     return c_region.compute_polygon_overlap(c_polygon1,
192 |                                             c_polygon2,
193 |                                             &only1,
194 |                                             &only2,
195 |                                             no_bounds)
196 | 
197 | def vot_overlap_traj(polygons1, polygons2, bounds=None):
198 |     """ computing overlap between two trajectory
199 |     Args:
200 |         polygons1: list of polygon
201 |         polygons2: list of polygon
202 |         bounds: tuple of (left, top, right, bottom) or tuple of (width height)
203 |     Return:
204 |         overlaps: overlaps between all pair of polygons
205 |     """
206 |     assert len(polygons1) == len(polygons2)
207 |     overlaps = []
208 |     for i in range(len(polygons1)):
209 |         overlap = vot_overlap(polygons1[i], polygons2[i], bounds=bounds)
210 |         overlaps.append(overlap)
211 |     return overlaps
212 | 
213 | 
214 | def vot_float2str(template, float value):
215 |     """
216 |     Args:
217 |         tempate: like "%.3f" in C syntax
218 |         value: float value
219 |     """
220 |     cdef bytes ptemplate = template.encode()
221 |     cdef const char* ctemplate = ptemplate
222 |     cdef char* output = <char*>malloc(sizeof(char) * 100)
223 |     if not output:
224 |         raise MemoryError()
225 |     sprintf(output, ctemplate, value)
226 |     try:
227 |         ret = output[:strlen(output)].decode()
228 |     finally:
229 |         free(output)
230 |     return ret
231 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/setup.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Python Single Object Tracking Evaluation
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Fangyi Zhang
 5 | # @author fangyi.zhang@vipl.ict.ac.cn
 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
 7 | # Revised for SiamMask by foolwood
 8 | # --------------------------------------------------------
 9 | from distutils.core import setup
10 | from distutils.extension import Extension
11 | from Cython.Build import cythonize
12 | 
13 | setup(
14 |     ext_modules = cythonize([Extension("region", ["region.pyx", "src/region.c"])]),
15 | )
16 | 
17 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/src/buffer.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef __STRING_BUFFER_H
  3 | #define __STRING_BUFFER_H
  4 | 
  5 | // Enable MinGW secure API for _snprintf_s
  6 | #define MINGW_HAS_SECURE_API 1
  7 | 
  8 | #ifdef _MSC_VER
  9 | #define __INLINE __inline
 10 | #else
 11 | #define __INLINE inline
 12 | #endif
 13 | 
 14 | #include <string.h>
 15 | #include <stdlib.h>
 16 | #include <stdarg.h>
 17 | 
 18 | typedef struct string_buffer {
 19 | 	char* buffer;
 20 | 	int position;
 21 | 	int size;
 22 | } string_buffer;
 23 | 
 24 | typedef struct string_list {
 25 | 	char** buffer;
 26 | 	int position;
 27 | 	int size;
 28 | } string_list;
 29 | 
 30 | #define BUFFER_INCREMENT_STEP 4096
 31 | 
 32 | static __INLINE string_buffer* buffer_create(int L) {
 33 | 	string_buffer* B = (string_buffer*) malloc(sizeof(string_buffer));
 34 | 	B->size = L;
 35 | 	B->buffer = (char*) malloc(sizeof(char) * B->size);
 36 | 	B->position = 0;
 37 | 	return B;
 38 | }
 39 | 
 40 | static __INLINE void buffer_reset(string_buffer* B) {
 41 | 	B->position = 0;
 42 | }
 43 | 
 44 | static __INLINE void buffer_destroy(string_buffer** B) {
 45 | 	if (!(*B)) return;
 46 | 	if ((*B)->buffer) {
 47 | 		free((*B)->buffer);
 48 | 		(*B)->buffer = NULL;
 49 | 	}
 50 | 	free((*B));
 51 | 	(*B) = NULL;
 52 | }
 53 | 
 54 | static __INLINE char* buffer_extract(const string_buffer* B) {
 55 | 	char *S = (char*) malloc(sizeof(char) * (B->position + 1));
 56 | 	memcpy(S, B->buffer, B->position);
 57 | 	S[B->position] = '\0';
 58 | 	return S;
 59 | }
 60 | 
 61 | static __INLINE int buffer_size(const string_buffer* B) {
 62 | 	return B->position;
 63 | }
 64 | 
 65 | static __INLINE void buffer_push(string_buffer* B, char C) {
 66 | 	int required = 1;
 67 | 	if (required > B->size - B->position) {
 68 | 		B->size = B->position + BUFFER_INCREMENT_STEP;
 69 | 		B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
 70 | 	}
 71 | 	B->buffer[B->position] = C;
 72 | 	B->position += required;
 73 | }
 74 | 
 75 | static __INLINE void buffer_append(string_buffer* B, const char *format, ...) {
 76 | 
 77 | 	int required;
 78 | 	va_list args;
 79 | 
 80 | #if defined(__OS2__) || defined(__WINDOWS__) || defined(WIN32) || defined(_MSC_VER)
 81 | 
 82 | 	va_start(args, format);
 83 | 	required = _vscprintf(format, args) + 1;
 84 | 	va_end(args);
 85 | 	if (required >= B->size - B->position) {
 86 | 		B->size = B->position + required + 1;
 87 | 		B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
 88 | 	}
 89 | 	va_start(args, format);
 90 | 	required = _vsnprintf_s(&(B->buffer[B->position]), B->size - B->position, _TRUNCATE, format, args);
 91 | 	va_end(args);
 92 | 	B->position += required;
 93 | 
 94 | #else
 95 | 	va_start(args, format);
 96 | 	required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args);
 97 | 	va_end(args);
 98 | 	if (required >= B->size - B->position) {
 99 | 		B->size = B->position + required + 1;
100 | 		B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
101 | 		va_start(args, format);
102 | 		required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args);
103 | 		va_end(args);
104 | 	}
105 | 	B->position += required;
106 | #endif
107 | 
108 | }
109 | 
110 | static __INLINE string_list* list_create(int L) {
111 | 	string_list* B = (string_list*) malloc(sizeof(string_list));
112 | 	B->size = L;
113 | 	B->buffer = (char**) malloc(sizeof(char*) * B->size);
114 | 	memset(B->buffer, 0, sizeof(char*) * B->size);
115 | 	B->position = 0;
116 | 	return B;
117 | }
118 | 
119 | static __INLINE void list_reset(string_list* B) {
120 | 	int i;
121 | 	for (i = 0; i < B->position; i++) {
122 | 		if (B->buffer[i]) free(B->buffer[i]);
123 | 		B->buffer[i] = NULL;
124 | 	}
125 | 	B->position = 0;
126 | }
127 | 
128 | static __INLINE void list_destroy(string_list **B) {
129 | 	int i;
130 | 
131 | 	if (!(*B)) return;
132 | 
133 | 	for (i = 0; i < (*B)->position; i++) {
134 | 		if ((*B)->buffer[i]) free((*B)->buffer[i]); (*B)->buffer[i] = NULL;
135 | 	}
136 | 
137 | 	if ((*B)->buffer) {
138 | 		free((*B)->buffer); (*B)->buffer = NULL;
139 | 	}
140 | 
141 | 	free((*B));
142 | 	(*B) = NULL;
143 | }
144 | 
145 | static __INLINE char* list_get(const string_list *B, int I) {
146 | 	if (I < 0 || I >= B->position) {
147 | 		return NULL;
148 | 	} else {
149 | 		if (!B->buffer[I]) {
150 | 			return NULL;
151 | 		} else {
152 | 			char *S;
153 | 			int length = strlen(B->buffer[I]);
154 | 			S = (char*) malloc(sizeof(char) * (length + 1));
155 | 			memcpy(S, B->buffer[I], length + 1);
156 | 			return S;
157 | 		}
158 | 	}
159 | }
160 | 
161 | static __INLINE int list_size(const string_list *B) {
162 | 	return B->position;
163 | }
164 | 
165 | static __INLINE void list_append(string_list *B, char* S) {
166 | 	int required = 1;
167 | 	int length = strlen(S);
168 | 	if (required > B->size - B->position) {
169 | 		B->size = B->position + 16;
170 | 		B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size);
171 | 	}
172 | 	B->buffer[B->position] = (char*) malloc(sizeof(char) * (length + 1));
173 | 	memcpy(B->buffer[B->position], S, length + 1);
174 | 	B->position += required;
175 | }
176 | 
177 | // This version of the append does not copy the string but simply takes the control of its allocation
178 | static __INLINE void list_append_direct(string_list *B, char* S) {
179 | 	int required = 1;
180 | 	// int length = strlen(S);
181 | 	if (required > B->size - B->position) {
182 | 		B->size = B->position + 16;
183 | 		B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size);
184 | 	}
185 | 	B->buffer[B->position] = S;
186 | 	B->position += required;
187 | }
188 | 
189 | 
190 | #endif
191 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/src/region.h:
--------------------------------------------------------------------------------
  1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */
  2 | 
  3 | #ifndef _REGION_H_
  4 | #define _REGION_H_
  5 | 
  6 | #ifdef TRAX_STATIC_DEFINE
  7 | #  define __TRAX_EXPORT
  8 | #else
  9 | #  ifndef __TRAX_EXPORT
 10 | #    if defined(_MSC_VER)
 11 | #      ifdef trax_EXPORTS
 12 |          /* We are building this library */
 13 | #        define __TRAX_EXPORT __declspec(dllexport)
 14 | #      else
 15 |          /* We are using this library */
 16 | #        define __TRAX_EXPORT __declspec(dllimport)
 17 | #      endif
 18 | #    elif defined(__GNUC__)
 19 | #      ifdef trax_EXPORTS
 20 |          /* We are building this library */
 21 | #        define __TRAX_EXPORT __attribute__((visibility("default")))
 22 | #      else
 23 |          /* We are using this library */
 24 | #        define __TRAX_EXPORT __attribute__((visibility("default")))
 25 | #      endif
 26 | #    endif
 27 | #  endif
 28 | #endif
 29 | 
 30 | #ifndef MAX
 31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b))
 32 | #endif
 33 | 
 34 | #ifndef MIN
 35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b))
 36 | #endif
 37 | 
 38 | #define TRAX_DEFAULT_CODE 0
 39 | 
 40 | #define REGION_LEGACY_RASTERIZATION 1
 41 | 
 42 | #ifdef __cplusplus
 43 | extern "C" {
 44 | #endif
 45 | 
 46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type;
 47 | 
 48 | typedef struct region_bounds {
 49 | 
 50 | 	float top;
 51 | 	float bottom;
 52 | 	float left;
 53 | 	float right;
 54 | 
 55 | } region_bounds;
 56 | 
 57 | typedef struct region_polygon {
 58 | 
 59 | 	int count;
 60 | 
 61 | 	float* x;
 62 | 	float* y;
 63 | 
 64 | } region_polygon;
 65 | 
 66 | typedef struct region_mask {
 67 | 
 68 |     int x;
 69 |     int y;
 70 | 
 71 |     int width;
 72 |     int height;
 73 | 
 74 |     char* data;
 75 | 
 76 | } region_mask;
 77 | 
 78 | typedef struct region_rectangle {
 79 | 
 80 |     float x;
 81 |     float y;
 82 |     float width;
 83 |     float height;
 84 | 
 85 | } region_rectangle;
 86 | 
 87 | typedef struct region_container {
 88 |     enum region_type type;
 89 |     union {
 90 |         region_rectangle rectangle;
 91 |         region_polygon polygon;
 92 |         region_mask mask;
 93 |         int special;
 94 |     } data;
 95 | } region_container;
 96 | 
 97 | typedef struct region_overlap {
 98 | 
 99 | 	float overlap;    
100 |     float only1;
101 |     float only2;
102 | 
103 | } region_overlap;
104 | 
105 | extern const region_bounds region_no_bounds; 
106 | 
107 | __TRAX_EXPORT int region_set_flags(int mask);
108 | 
109 | __TRAX_EXPORT int region_clear_flags(int mask);
110 | 
111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds);
112 | 
113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds);
114 | 
115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom);
116 | 
117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region);
118 | 
119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region);
120 | 
121 | __TRAX_EXPORT char* region_string(region_container* region);
122 | 
123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region);
124 | 
125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type);
126 | 
127 | __TRAX_EXPORT void region_release(region_container** region);
128 | 
129 | __TRAX_EXPORT region_container* region_create_special(int code);
130 | 
131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height);
132 | 
133 | __TRAX_EXPORT region_container* region_create_polygon(int count);
134 | 
135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y);
136 | 
137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height);
138 | 
139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height);
140 | 
141 | #ifdef __cplusplus
142 | }
143 | #endif
144 | 
145 | #endif
146 | 


--------------------------------------------------------------------------------
/DeepMTA_code/benchmark/bench_utils/pysot/utils/statistics.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Python Single Object Tracking Evaluation
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Fangyi Zhang
  5 | # @author fangyi.zhang@vipl.ict.ac.cn
  6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git
  7 | # Revised for SiamMask by foolwood
  8 | # --------------------------------------------------------
  9 | 
 10 | import numpy as np
 11 | from numba import jit
 12 | from . import region
 13 | 
 14 | def calculate_failures(trajectory):
 15 |     """ Calculate number of failures
 16 |     Args:
 17 |         trajectory: list of bbox
 18 |     Returns:
 19 |         num_failures: number of failures
 20 |         failures: failures point in trajectory, start with 0
 21 |     """
 22 |     failures = [i for i, x in zip(range(len(trajectory)), trajectory)
 23 |             if len(x) == 1 and x[0] == 2]
 24 |     num_failures = len(failures)
 25 |     return num_failures, failures
 26 | 
 27 | def calculate_accuracy(pred_trajectory, gt_trajectory,
 28 |         burnin=0, ignore_unknown=True, bound=None):
 29 |     """Caculate accuracy socre as average overlap over the entire sequence
 30 |     Args:
 31 |         trajectory: list of bbox
 32 |         gt_trajectory: list of bbox
 33 |         burnin: number of frames that have to be ignored after the failure
 34 |         ignore_unknown: ignore frames where the overlap is unknown
 35 |         bound: bounding region
 36 |     Return:
 37 |         acc: average overlap
 38 |         overlaps: per frame overlaps
 39 |     """
 40 |     pred_trajectory_ = pred_trajectory
 41 |     if not ignore_unknown:
 42 |         unkown = [len(x)==1 and x[0] == 0 for x in pred_trajectory]
 43 |     
 44 |     if burnin > 0:
 45 |         pred_trajectory_ = pred_trajectory[:]
 46 |         mask = [len(x)==1 and x[0] == 1 for x in pred_trajectory]
 47 |         for i in range(len(mask)):
 48 |             if mask[i]:
 49 |                 for j in range(burnin):
 50 |                     if i + j < len(mask):
 51 |                         pred_trajectory_[i+j] = [0]
 52 |     min_len = min(len(pred_trajectory_), len(gt_trajectory))
 53 |     overlaps = region.vot_overlap_traj(pred_trajectory_[:min_len],
 54 |             gt_trajectory[:min_len], bound)
 55 | 
 56 |     if not ignore_unknown:
 57 |         overlaps = [u if u else 0 for u in unkown]
 58 | 
 59 |     acc = 0
 60 |     if len(overlaps) > 0:
 61 |         acc = np.nanmean(overlaps)
 62 |     return acc, overlaps
 63 | 
 64 | @jit(nopython=True)
 65 | def overlap_ratio(rect1, rect2):
 66 |     '''Compute overlap ratio between two rects
 67 |     Args
 68 |         rect:2d array of N x [x,y,w,h]
 69 |     Return:
 70 |         iou
 71 |     '''
 72 |     # if rect1.ndim==1:
 73 |     #     rect1 = rect1[np.newaxis, :]
 74 |     # if rect2.ndim==1:
 75 |     #     rect2 = rect2[np.newaxis, :]
 76 |     left = np.maximum(rect1[:,0], rect2[:,0])
 77 |     right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
 78 |     top = np.maximum(rect1[:,1], rect2[:,1])
 79 |     bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
 80 | 
 81 |     intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
 82 |     union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
 83 |     iou = intersect / union
 84 |     iou = np.maximum(np.minimum(1, iou), 0)
 85 |     return iou
 86 | 
 87 | @jit(nopython=True)
 88 | def success_overlap(gt_bb, result_bb, n_frame):
 89 |     thresholds_overlap = np.arange(0, 1.05, 0.05)
 90 |     success = np.zeros(len(thresholds_overlap))
 91 |     iou = np.ones(len(gt_bb)) * (-1)
 92 |     mask = np.sum(gt_bb > 0, axis=1) == 4
 93 |     iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask])
 94 |     for i in range(len(thresholds_overlap)):
 95 |         success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame)
 96 |     return success
 97 | 
 98 | @jit(nopython=True)
 99 | def success_error(gt_center, result_center, thresholds, n_frame):
100 |     # n_frame = len(gt_center)
101 |     success = np.zeros(len(thresholds))
102 |     dist = np.ones(len(gt_center)) * (-1)
103 |     mask = np.sum(gt_center > 0, axis=1) == 2
104 |     dist[mask] = np.sqrt(np.sum(
105 |         np.power(gt_center[mask] - result_center[mask], 2), axis=1))
106 |     for i in range(len(thresholds)):
107 |         success[i] = np.sum(dist <= thresholds[i]) / float(n_frame)
108 |     return success
109 | 
110 | @jit(nopython=True)
111 | def determine_thresholds(scores, resolution=100):
112 |     """
113 |     Args:
114 |         scores: 1d array of score
115 |     """
116 |     scores = np.sort(scores[np.logical_not(np.isnan(scores))])
117 |     delta = np.floor(len(scores) / (resolution - 2))
118 |     idxs = np.floor(np.linspace(delta-1, len(scores)-delta, resolution-2)+0.5).astype(np.int32)
119 |     thresholds = np.zeros((resolution))
120 |     thresholds[0] = - np.inf
121 |     thresholds[-1] = np.inf
122 |     thresholds[1:-1] = scores[idxs]
123 |     return thresholds
124 | 
125 | @jit(nopython=True)
126 | def calculate_f1(overlaps, score, bound, thresholds, N):
127 |     overlaps = np.array(overlaps)
128 |     overlaps[np.isnan(overlaps)] = 0
129 |     score = np.array(score)
130 |     score[np.isnan(score)] = 0
131 |     precision = np.zeros(len(thresholds))
132 |     recall = np.zeros(len(thresholds))
133 |     for i, th in enumerate(thresholds):
134 |         if th == - np.inf:
135 |             idx = score > 0
136 |         else:
137 |             idx = score >= th
138 |         if np.sum(idx) == 0:
139 |             precision[i] = 1
140 |             recall[i] = 0
141 |         else:
142 |             precision[i] = np.mean(overlaps[idx])
143 |             recall[i] = np.sum(overlaps[idx]) / N
144 |     f1 = 2 * precision * recall / (precision + recall)
145 |     return f1, precision, recall
146 | 
147 | @jit(nopython=True)
148 | def calculate_expected_overlap(fragments, fweights):
149 |     max_len = fragments.shape[1]
150 |     expected_overlaps = np.zeros((max_len), np.float32)
151 |     expected_overlaps[0] = 1
152 | 
153 |     # TODO Speed Up 
154 |     for i in range(1, max_len):
155 |         mask = np.logical_not(np.isnan(fragments[:, i]))
156 |         if np.any(mask):
157 |             fragment = fragments[mask, 1:i+1]
158 |             seq_mean = np.sum(fragment, 1) / fragment.shape[1]
159 |             expected_overlaps[i] = np.sum(seq_mean *
160 |                 fweights[mask]) / np.sum(fweights[mask])
161 |     return expected_overlaps
162 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamFC/OTB2015_THOR_dynamic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "lr": 0.641662,
 4 |       "penalty_k": 0.982769,
 5 |       "window_influence": 0.199673
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 5,
10 |       "iou_tresh": 0.938244,
11 |       "lb": 0.790933,
12 |       "tukey_alpha": 0.232146,
13 |       "lb_type": "dynamic",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamFC/OTB2015_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "lr": 0.641662,
 4 |       "penalty_k": 0.982769,
 5 |       "window_influence": 0.199673
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 2,
 9 |       "K_lt": 9,
10 |       "iou_tresh": 0.959911,
11 |       "lb": 0.362411,
12 |       "tukey_alpha": 0.859503,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamFC/OTB2015_vanilla.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "lr": 0.641662,
 4 |       "penalty_k": 0.982769,
 5 |       "window_influence": 0.199673
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 0,
 9 |       "K_lt": 1,
10 |       "iou_tresh": 1.0,
11 |       "lb": 1.0,
12 |       "tukey_alpha": 0,
13 |       "dilation": 1000,
14 |       "lb_type": "static",
15 |       "modulate": false,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": true
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamFC/VOT2018_THOR_dynamic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "lr": 0.641662,
 4 |       "penalty_k": 0.982769,
 5 |       "window_influence": 0.199673
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 7,
 9 |       "K_lt": 10,
10 |       "iou_tresh": 0.731403,
11 |       "lb": 0.814394,
12 |       "tukey_alpha": 0.919536,
13 |       "lb_type": "dynamic",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamFC/VOT2018_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "lr": 0.641662,
 4 |       "penalty_k": 0.982769,
 5 |       "window_influence": 0.199673
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 7,
 9 |       "K_lt": 6,
10 |       "iou_tresh": 0.753538,
11 |       "lb": 0.247764,
12 |       "tukey_alpha": 0.462796,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamFC/VOT2018_vanilla.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "lr": 0.641662,
 4 |       "penalty_k": 0.982769,
 5 |       "window_influence": 0.199673
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 0,
 9 |       "K_lt": 1,
10 |       "iou_tresh": 1.0,
11 |       "lb": 1.0,
12 |       "tukey_alpha": 0,
13 |       "dilation": 1000,
14 |       "lb_type": "static",
15 |       "modulate": false,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": true
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamMask/OTB2015_THOR_dynamic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "window_influence": 0.42,
 4 |       "instance_size": 255,
 5 |       "base_size": 8,
 6 |       "out_size": 127,
 7 |       "seg_thr": 0.3,
 8 |       "penalty_k": 0.908068,
 9 |       "lr": 0.272863
10 |     },
11 |     "anchors":{
12 |       "stride": 8,
13 |       "ratios": [0.33, 0.5, 1, 2, 3],
14 | 			"scales": [8],
15 | 			"round_dight": 0
16 | 		},
17 |     "THOR": {
18 |       "K_st": 3,
19 |       "K_lt": 3,
20 |       "iou_tresh": 0.992637,
21 |       "lb": 0.832829,
22 |       "tukey_alpha": 0.310958,
23 |       "lb_type": "dynamic",
24 |       "modulate": true,
25 |       "dilation": 10,
26 |       "context_temp": 0.5,
27 |       "viz": false,
28 |       "verbose": false,
29 |       "vanilla": false
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamMask/OTB2015_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "window_influence": 0.42,
 4 |       "instance_size": 255,
 5 |       "base_size": 8,
 6 |       "out_size": 127,
 7 |       "seg_thr": 0.3,
 8 |       "penalty_k": 0.53364,
 9 |       "lr": 0.176627
10 |     },
11 |     "anchors":{
12 |       "stride": 8,
13 |       "ratios": [0.33, 0.5, 1, 2, 3],
14 | 			"scales": [8],
15 | 			"round_dight": 0
16 | 		},
17 |     "THOR": {
18 |       "K_st": 4,
19 |       "K_lt": 7,
20 |       "iou_tresh": 0.985634,
21 |       "lb": 0.397495,
22 |       "tukey_alpha": 0.450157,
23 |       "lb_type": "ensemble",
24 |       "modulate": true,
25 |       "dilation": 10,
26 |       "context_temp": 0.5,
27 |       "viz": false,
28 |       "verbose": false,
29 |       "vanilla": false
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamMask/OTB2015_vanilla.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "window_influence": 0.42,
 4 |       "instance_size": 255,
 5 |       "base_size": 8,
 6 |       "out_size": 127,
 7 |       "seg_thr": 0.3,
 8 |       "penalty_k": 0.04,
 9 |       "lr": 0.25
10 |     },
11 |     "anchors":{
12 |       "stride": 8,
13 |       "ratios": [0.33, 0.5, 1, 2, 3],
14 | 			"scales": [8],
15 | 			"round_dight": 0
16 | 		},
17 |     "THOR": {
18 |       "K_st": 0,
19 |       "K_lt": 1,
20 |       "iou_tresh": 1.0,
21 |       "lb": 1.0,
22 |       "tukey_alpha": 0,
23 |       "dilation": 1000,
24 |       "lb_type": "static",
25 |       "modulate": false,
26 |       "context_temp": 0.5,
27 |       "viz": false,
28 |       "verbose": false,
29 |       "vanilla": true
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamMask/VOT2018_THOR_dynamic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "window_influence": 0.42,
 4 |       "instance_size": 255,
 5 |       "base_size": 8,
 6 |       "out_size": 127,
 7 |       "seg_thr": 0.3,
 8 |       "penalty_k": 0.087875,
 9 |       "lr": 0.651924
10 |     },
11 |     "anchors":{
12 |       "stride": 8,
13 |       "ratios": [0.33, 0.5, 1, 2, 3],
14 | 			"scales": [8],
15 | 			"round_dight": 0
16 | 		},
17 |     "THOR": {
18 |       "K_st": 5,
19 |       "K_lt": 5,
20 |       "iou_tresh": 0.772009,
21 |       "lb": 0.789802,
22 |       "tukey_alpha": 0.981623,
23 |       "lb_type": "dynamic",
24 |       "modulate": true,
25 |       "dilation": 10,
26 |       "context_temp": 0.5,
27 |       "viz": false,
28 |       "verbose": false,
29 |       "vanilla": false
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamMask/VOT2018_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "window_influence": 0.42,
 4 |       "instance_size": 255,
 5 |       "base_size": 8,
 6 |       "out_size": 127,
 7 |       "seg_thr": 0.3,
 8 |       "penalty_k": 0.04,
 9 |       "lr": 0.25
10 |     },
11 |     "anchors":{
12 |       "stride": 8,
13 |       "ratios": [0.33, 0.5, 1, 2, 3],
14 | 			"scales": [8],
15 | 			"round_dight": 0
16 | 		},
17 |     "THOR": {
18 |       "K_st": 6,
19 |       "K_lt": 3,
20 |       "iou_tresh": 0.742568,
21 |       "lb": 0.27996,
22 |       "tukey_alpha": 0.697998,
23 |       "lb_type": "ensemble",
24 |       "modulate": true,
25 |       "dilation": 10,
26 |       "context_temp": 0.5,
27 |       "viz": false,
28 |       "verbose": false,
29 |       "vanilla": false
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamMask/VOT2018_vanilla.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "window_influence": 0.42,
 4 |       "instance_size": 255,
 5 |       "base_size": 8,
 6 |       "out_size": 127,
 7 |       "seg_thr": 0.3,
 8 |       "penalty_k": 0.04,
 9 |       "lr": 0.25
10 |     },
11 |     "anchors":{
12 |       "stride": 8,
13 |       "ratios": [0.33, 0.5, 1, 2, 3],
14 | 			"scales": [8],
15 | 			"round_dight": 0
16 | 		},
17 |     "THOR": {
18 |       "K_st": 0,
19 |       "K_lt": 1,
20 |       "iou_tresh": 1.0,
21 |       "lb": 1.0,
22 |       "tukey_alpha": 0,
23 |       "dilation": 1000,
24 |       "lb_type": "static",
25 |       "modulate": false,
26 |       "context_temp": 0.5,
27 |       "viz": false,
28 |       "verbose": false,
29 |       "vanilla": true
30 |     }
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/GOT10k_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.692764,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.73434
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 7,
10 |       "iou_tresh": 1.0,
11 |       "lb": 0.281661,
12 |       "tukey_alpha": 0.567957,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/LaSOT_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.692764,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.73434
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 7,
10 |       "iou_tresh": 1.0,
11 |       "lb": 0.281661,
12 |       "tukey_alpha": 0.567957,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/OTB2015_THOR_dynamic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.385994,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.38457
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 7,
 9 |       "K_lt": 7,
10 |       "iou_tresh": 0.980916,
11 |       "lb": 0.883665,
12 |       "tukey_alpha": 0.0,
13 |       "lb_type": "dynamic",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/OTB2015_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.692764,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.73434
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 7,
10 |       "iou_tresh": 1.0,
11 |       "lb": 0.281661,
12 |       "tukey_alpha": 0.567957,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/OTB2015_vanilla.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.055,
 4 |       "window_influence": 0.42,
 5 |       "lr": 0.295
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 0,
 9 |       "K_lt": 1,
10 |       "iou_tresh": 1.0,
11 |       "lb": 1.0,
12 |       "tukey_alpha": 0,
13 |       "dilation": 1000,
14 |       "lb_type": "static",
15 |       "modulate": false,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": true
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/OXUVA_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.692764,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.73434
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 7,
10 |       "iou_tresh": 1.0,
11 |       "lb": 0.281661,
12 |       "tukey_alpha": 0.567957,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/UAV123_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.692764,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.73434
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 7,
10 |       "iou_tresh": 1.0,
11 |       "lb": 0.281661,
12 |       "tukey_alpha": 0.567957,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/UAV20L_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.692764,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.73434
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 7,
10 |       "iou_tresh": 1.0,
11 |       "lb": 0.281661,
12 |       "tukey_alpha": 0.567957,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/VOT2018_THOR_dynamic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.0595839,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.597262
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 3,
 9 |       "K_lt": 4,
10 |       "iou_tresh": 0.943046,
11 |       "lb": 0.759742,
12 |       "tukey_alpha": 0.451395,
13 |       "lb_type": "dynamic",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/VOT2018_THOR_ensemble.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.0584502,
 4 |       "window_influence": 0.357794,
 5 |       "lr": 0.353687
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 9,
 9 |       "K_lt": 11,
10 |       "iou_tresh": 0.875719,
11 |       "lb": 0.394676,
12 |       "tukey_alpha": 0.374259,
13 |       "lb_type": "ensemble",
14 |       "modulate": true,
15 |       "dilation": 10,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": false
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/configs/SiamRPN/VOT2018_vanilla.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tracker": {
 3 |       "penalty_k": 0.055,
 4 |       "window_influence": 0.42,
 5 |       "lr": 0.295
 6 |     },
 7 |     "THOR": {
 8 |       "K_st": 0,
 9 |       "K_lt": 1,
10 |       "iou_tresh": 1.0,
11 |       "lb": 1.0,
12 |       "tukey_alpha": 0,
13 |       "dilation": 1000,
14 |       "lb_type": "static",
15 |       "modulate": false,
16 |       "context_temp": 0.5,
17 |       "viz": false,
18 |       "verbose": false,
19 |       "vanilla": true
20 |     }
21 | }
22 | 
23 | 


--------------------------------------------------------------------------------
/DeepMTA_code/data/download_links_for_tracking_datasets.txt:
--------------------------------------------------------------------------------
 1 | Popular Tracking datasets: 
 2 | UAV123: https://pan.baidu.com/s/1AhNnfjF4fZe14sUFefU3iA password: 2iq4
 3 | 
 4 | VOT2018: https://pan.baidu.com/s/1MOWZ5lcxfF0wsgSuj5g4Yw password: e5eh
 5 | 
 6 | VisDrone2019: https://pan.baidu.com/s/1Y6ubKHuYX65mK_iDVSfKPQ password: yxb6
 7 | 
 8 | OTB2015: https://pan.baidu.com/s/1ZjKgRMYSHfR_w3Z7iQEkYA password: t5i1
 9 | 
10 | DTB70: https://pan.baidu.com/s/1kfHrArw0aVhGPSM91WHomw password: e7qm 
11 | 
12 | TLP50 (Long-Term): https://amoudgl.github.io/tlp/  
13 | 
14 | ILSVRC2015 VID: https://pan.baidu.com/s/1CXWgpAG4CYpk-WnaUY5mAQ password: uqzj
15 | 
16 | NFS: https://pan.baidu.com/s/1ei54oKNA05iBkoUwXPOB7g password: vng1
17 | 
18 | GOT10k: https://pan.baidu.com/s/172oiQPA_Ky2iujcW5Irlow password: uxds
19 | 
20 | UAVDT: https://pan.baidu.com/s/1K8oo53mPYCxUFVMXIGLhVA password: keva
21 | 
22 | YTB-VOS: https://pan.baidu.com/s/1WMB0q9GJson75QBFVfeH5A password: sf1m
23 | 
24 | YTB-Crop511 (used in siamrpn++ and siammask): https://pan.baidu.com/s/112zLS_02-Z2ouKGbnPlTjw password: ebq1
25 | 
26 | TColor128: https://pan.baidu.com/s/1v4J6zWqZwj8fHi5eo5EJvQ password: 26d4
27 | 
28 | DAVIS2017: https://pan.baidu.com/s/1JTsumpnkWotEJQE7KQmh6A password: c9qp
29 | 
30 | YTB&VID (used in siamrpn): https://pan.baidu.com/s/1gF8PSZDzw-7EAVrdYHQwsA password: 6vkz
31 | 
32 | TrackingNet: https://pan.baidu.com/s/1PXSRAqcw-KMfBIJYUtI4Aw code: nkb9 (Note that this link is provided by SiamFC++ author) 
33 | 
34 | TAO: A Large-Scale Benchmark for Tracking Any Object: https://github.com/TAO-Dataset/tao 
35 | 
36 | vot 2018 and vot 2019: 　   链接: https://pan.baidu.com/s/1q6lv3cUhezBb5pmdj3BRGw 提取码: d7r3 
37 | 
38 | vot 2018 LT: 　　　　　　链接: https://pan.baidu.com/s/16Q4_sxhBjmddIHU8b7XK3w 提取码: 67xf 
39 | 
40 | vot 2019 LT: 　　　　　　链接：https://pan.baidu.com/s/1z9HBPNprbt2gb2RGzRJkwA  提取码：7yq5
41 | 
42 | vot 2019 rgb-thermal: 　　链接: https://pan.baidu.com/s/1oT8qFmKBpYa3VlXP1ZwfCA 提取码: mn1b
43 | 
44 | 


--------------------------------------------------------------------------------
/DeepMTA_code/data/get_test_otb2015_data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # OTB2015
 4 | mkdir OTB2015 && cd OTB2015
 5 | baseurl="http://cvlab.hanyang.ac.kr/tracker_benchmark"
 6 | wget "$baseurl/datasets.html"
 7 | cat datasets.html | grep '\.zip' | sed -e 's/\.zip".*/.zip/' | sed -e s'/.*"//' >files.txt
 8 | cat files.txt | xargs -n 1 -P 8 -I {} wget -c "$baseurl/{}"
 9 | ls *.zip | xargs -n 1 unzip
10 | rm -r __MACOSX/
11 | cd ..
12 | 


--------------------------------------------------------------------------------
/DeepMTA_code/environment.yml:
--------------------------------------------------------------------------------
 1 | name: deepmta
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - blas=1.0=mkl
 7 |   - ca-certificates=2019.1.23=0
 8 |   - certifi=2019.3.9=py37_0
 9 |   - cffi=1.12.3=py37h2e261b9_0
10 |   - cudatoolkit=10.0.130=0
11 |   - freetype=2.9.1=h8a8886c_1
12 |   - intel-openmp=2019.4=243
13 |   - jpeg=9b=h024ee3a_2
14 |   - libedit=3.1.20181209=hc058e9b_0
15 |   - libffi=3.2.1=hd88cf55_4
16 |   - libgcc-ng=8.2.0=hdf63c60_1
17 |   - libgfortran-ng=7.3.0=hdf63c60_0
18 |   - libpng=1.6.37=hbc83047_0
19 |   - libstdcxx-ng=8.2.0=hdf63c60_1
20 |   - libtiff=4.0.10=h2733197_2
21 |   - mkl=2019.4=243
22 |   - mkl_fft=1.0.12=py37ha843d7b_0
23 |   - mkl_random=1.0.2=py37hd81dba3_0
24 |   - ncurses=6.1=he6710b0_1
25 |   - ninja=1.9.0=py37hfd86e86_0
26 |   - numpy=1.16.4=py37h7e9f1db_0
27 |   - numpy-base=1.16.4=py37hde5b4d6_0
28 |   - olefile=0.46=py37_0
29 |   - openssl=1.1.1c=h7b6447c_1
30 |   - pillow=6.0.0=py37h34e0f95_0
31 |   - pip=19.1.1=py37_0
32 |   - pycparser=2.19=py37_0
33 |   - python=3.7.3=h0371630_0
34 |   - pytorch=1.1.0=py3.7_cuda10.0.130_cudnn7.5.1_0
35 |   - readline=7.0=h7b6447c_5
36 |   - setuptools=41.0.1=py37_0
37 |   - six=1.12.0=py37_0
38 |   - sqlite=3.28.0=h7b6447c_0
39 |   - tk=8.6.8=hbc83047_0
40 |   - torchvision=0.3.0=py37_cu10.0.130_1
41 |   - wheel=0.33.4=py37_0
42 |   - xz=5.2.4=h14c3975_4
43 |   - zlib=1.2.11=h7b6447c_3
44 |   - zstd=1.3.7=h0b5b093_0
45 |   - pip:
46 |     - backcall==0.1.0
47 |     - colorama==0.4.1
48 |     - cycler==0.10.0
49 |     - cython==0.29.10
50 |     - decorator==4.4.0
51 |     - fire==0.1.3
52 |     - got10k==0.1.3
53 |     - imutils==0.5.3
54 |     - ipdb==0.12
55 |     - ipython==7.5.0
56 |     - ipython-genutils==0.2.0
57 |     - jedi==0.13.3
58 |     - kiwisolver==1.1.0
59 |     - llvmlite==0.29.0
60 |     - matplotlib==3.1.0
61 |     - numba==0.44.0
62 |     - opencv-python==4.1.0.25
63 |     - pandas==0.24.2
64 |     - parso==0.4.0
65 |     - pexpect==4.7.0
66 |     - pickleshare==0.7.5
67 |     - prompt-toolkit==2.0.9
68 |     - ptyprocess==0.6.0
69 |     - pygments==2.4.2
70 |     - pyparsing==2.4.0
71 |     - python-dateutil==2.8.0
72 |     - pytz==2019.1
73 |     - scipy==1.3.0
74 |     - shapely==1.6.4.post2
75 |     - tqdm==4.32.1
76 |     - traitlets==4.3.2
77 |     - wcwidth==0.1.7
78 |     - wget==3.2
79 |     - yacs==0.1.6
80 |     - scikit-image 
81 | prefix: ~/anaconda3/envs/deepmta
82 | 


--------------------------------------------------------------------------------
/DeepMTA_code/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.utils.model_zoo as model_zoo
  4 | from torchvision import models
  5 | 
  6 | import torchvision.ops as torchops
  7 | 
  8 | import math
  9 | from torch.autograd import Variable
 10 | import pdb 
 11 | 
 12 | from torch.nn.parameter import Parameter
 13 | import torch.nn.functional as F
 14 | from torch.nn.modules.utils import _single, _pair, _triple
 15 | 
 16 | import numpy as np
 17 | import cv2 
 18 | import pdb 
 19 | 
 20 | 
 21 | 
 22 | class traj_critic(nn.Module):
 23 |     def __init__(self):
 24 |         super(traj_critic, self).__init__()
 25 |         #### ResNet model 
 26 |         caffenet = models.resnet18(pretrained=True)
 27 |         self.encoder = nn.Sequential(*list(caffenet.children())[:-1])
 28 | 
 29 |         self.trajBBox_linear = nn.Linear(4, 32) 
 30 |         self.trajScore_linear = nn.Linear(10, 32) 
 31 |         self.imgReducDIM_linear = nn.Linear(2560, 512) 
 32 | 
 33 |         self.regressor = nn.Sequential(
 34 |                 nn.Linear(5472, 512),
 35 |                 nn.ReLU(inplace=True),
 36 |                 nn.Dropout(),
 37 |                 nn.Linear(512, 1),
 38 |                 )
 39 |         self.relu = nn.ReLU(inplace=True)
 40 | 
 41 | 
 42 | 
 43 | 
 44 |     def forward(self, img, attMap, targetImg, targetAtten, initTarget, trajBBox, trajScore):       
 45 |         img_feat        = self.encoder(img.cuda())      ## torch.Size([10, 512, 1, 1]) 
 46 |         img_feat        = self.relu(img_feat)
 47 | 
 48 |         attMap_feat     = self.encoder(attMap.cuda())      
 49 |         attMap_feat     = self.relu(attMap_feat)
 50 | 
 51 |         targetImg_feat  = self.encoder(targetImg.cuda())      
 52 |         targetImg_feat  = self.relu(targetImg_feat)
 53 | 
 54 |         targetAtt_feat  = self.encoder(targetAtten.cuda())      
 55 |         targetAtt_feat  = self.relu(targetAtt_feat)
 56 | 
 57 |         initTarget_feat = self.encoder(initTarget.cuda())   
 58 |         initTarget_feat = self.relu(initTarget_feat)
 59 | 
 60 | 
 61 | 
 62 |         fused1 = torch.cat((img_feat, attMap_feat), 1)              ## torch.Size([10, 1024, 1, 1]) 
 63 |         fused2 = torch.cat((targetImg_feat, targetAtt_feat), 1)     ## torch.Size([10, 1024, 1, 1])  
 64 |         fused2 = torch.cat((fused2, initTarget_feat), 1) 
 65 |         fused3 = torch.cat((fused1, fused2), 1)     ## torch.Size([10, 2560, 1, 1]) 
 66 | 
 67 |         fused3 = torch.squeeze(fused3, dim=2)
 68 |         fused3 = torch.squeeze(fused3, dim=2)        
 69 |         fused3 = self.imgReducDIM_linear(fused3)
 70 |         fused3 = fused3.view(-1)
 71 | 
 72 |         trajBBox_feat  = self.trajBBox_linear(trajBBox.cuda()) 
 73 |         trajBBox_feat  = trajBBox_feat.view(-1)
 74 |         trajBBox_feat  = self.relu(trajBBox_feat)
 75 | 
 76 |         trajScore      = torch.transpose(trajScore, 0, 1) 
 77 |         trajScore_feat = self.trajScore_linear(trajScore.cuda()) 
 78 |         trajScore_feat = trajScore_feat.view(-1)
 79 |         trajScore_feat  = self.relu(trajScore_feat)
 80 | 
 81 |         fused4 = torch.cat((trajBBox_feat, trajScore_feat)) ## 352-D 
 82 |         final_feat = torch.cat((fused3, fused4)) ## 5472-D 
 83 |         final_feat = self.relu(final_feat) 
 84 |         pred_traj_score = self.regressor(final_feat)
 85 |  
 86 |         return pred_traj_score 
 87 | 
 88 | 
 89 | 
 90 | 
 91 | 
 92 | 
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | 
111 | 
112 | 
113 | 
114 | 
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | def axis_aligned_iou(boxA, boxB):
123 |     # make sure that x1,y1,x2,y2 of a box are valid
124 |     assert(boxA[0] <= boxA[2])
125 |     assert(boxA[1] <= boxA[3])
126 |     assert(boxB[0] <= boxB[2])
127 |     assert(boxB[1] <= boxB[3])
128 | 
129 |     # determine the (x, y)-coordinates of the intersection rectangle
130 |     xA = max(boxA[0], boxB[0])
131 |     yA = max(boxA[1], boxB[1])
132 |     xB = min(boxA[2], boxB[2])
133 |     yB = min(boxA[3], boxB[3])
134 | 
135 |     # compute the area of intersection rectangle
136 |     interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
137 | 
138 |     # compute the area of both the prediction and ground-truth
139 |     # rectangles
140 |     boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
141 |     boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
142 | 
143 |     # compute the intersection over union by taking the intersection
144 |     # area and dividing it by the sum of prediction + ground-truth
145 |     # areas - the interesection area
146 |     iou = interArea / float(boxAArea + boxBArea - interArea)
147 | 
148 |     # return the intersection over union value
149 |     return iou
150 | 
151 | 
152 | 
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | 
204 | 
205 | 
206 | 
207 | 
208 | 
209 | 
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/DeepMTA_code/scripts/transform_oxuva_results_txt_to_csv.py:
--------------------------------------------------------------------------------
  1 | import os 
  2 | import numpy as np 
  3 | import cv2
  4 | import time
  5 | import oxuva
  6 | import pdb 
  7 | from skimage import measure
  8 | import json
  9 | import pdb
 10 | import cv2
 11 | import os
 12 | import pandas as pd
 13 | resultpath= '/home/wangxiao/Documents/deepMTA_project/DeepMTA_TCSVT_project/benchmark/results/oxuva_txt_files/' 
 14 | videopath="/home/wangxiao/dataset/OxUvA/images/test/"
 15 | videos=os.listdir(videopath)
 16 | txtFiles = os.listdir(resultpath) 
 17 | 
 18 | attMap_path = "/home/wangxiao/Documents/deepMTA_project/DeepMTA_TCSVT_project/temp_DIR_TO_SAVE_static_Global_attentionMap/"
 19 | 
 20 | # export PYTHONPATH="/home/wangxiao/Documents/deepMTA_project/DeepMTA_TCSVT_project/long-term-tracking-benchmark-master/python:$PYTHONPATH"
 21 | 
 22 | for i in range(len(videos)):
 23 |     txtName = videos[i] + ".txt"
 24 |     preds = np.loadtxt(resultpath + txtName, delimiter=',') 
 25 | 
 26 |     print("==>> txtName: ", txtName) 
 27 |     xmin=[]
 28 |     xmax=[]
 29 |     ymin=[]
 30 |     ymax=[]
 31 |     video_ids=[]
 32 |     obj_ids=[]
 33 |     frame_nums=[]
 34 |     presents=[]
 35 |     scores=[]
 36 |     video_id=videos[i][0:7]
 37 |     if(len(videos[i])==7):
 38 |         obj_id='obj0000'
 39 |     elif(videos[i][-1]=='2'):
 40 |         obj_id='obj0001'
 41 |     else:
 42 |         obj_id='obj0002'
 43 |     
 44 |     score = 0.5
 45 |     # l=result['res']
 46 | 
 47 |     imgs = os.listdir(videopath+videos[i]+'/')
 48 |     imgs = np.sort(imgs) 
 49 |     # pdb.set_trace() 
 50 | 
 51 |     image = cv2.imread(videopath+videos[i]+'/'+imgs[0])
 52 |     imgh  = image.shape[0]
 53 |     imgw  = image.shape[1]
 54 | 
 55 |     attvideo_attPath = attMap_path + videos[i] + "/"
 56 |     attFiles = os.listdir(attvideo_attPath) 
 57 | 
 58 | 
 59 |     occurFlag_list = [] 
 60 | 
 61 |     if len(attFiles)+1 == len(imgs): 
 62 |         ###############################################################################
 63 |         #### 					Scan the Attention Map 
 64 |         ###############################################################################
 65 |         occurFlag_list.append(1)
 66 |         for j in range(len(imgs)-1):
 67 |             attMap = cv2.imread(attvideo_attPath + attFiles[j]) 
 68 |             ret, static_atttentonMAP = cv2.threshold(attMap, 5, 255, cv2.THRESH_BINARY)
 69 |             label_image = measure.label(static_atttentonMAP)
 70 |             props = measure.regionprops(label_image)
 71 | 
 72 |             if len(props) > 0: 
 73 |                 occurFlag_list.append(1) 
 74 |             else: 
 75 |                 occurFlag_list.append(0) 
 76 |     else: 
 77 |         for j in range(len(imgs)):
 78 |             occurFlag_list.append(1) 
 79 | 
 80 |     # pdb.set_trace() 
 81 | 
 82 |     for j in range(len(imgs)):
 83 | 
 84 |         x=preds[j][0]
 85 |         y=preds[j][1]
 86 |         w=preds[j][2]
 87 |         h=preds[j][3]
 88 | 
 89 |         ## results relative to original image size. 
 90 |         x1=x/imgw
 91 |         x2=(x+w)/imgw
 92 |         y1=y/imgh
 93 |         y2=(y+h)/imgh
 94 |  
 95 | 
 96 |         if j >= 5 and  np.sum(occurFlag_list[j-5:j]) == 0: 
 97 |             present = 'False' 
 98 |             print("==>> got one missing ......")
 99 |         else: 
100 |             present = 'True' 
101 | 
102 |         x1=round(x1,4)
103 |         x2=round(x2,4)
104 |         y1=round(y1,4)
105 |         y2=round(y2,4)
106 | 
107 |         frame=imgs[j][0:6]
108 | 
109 |         if(frame=='000000'):
110 |             frame_num=0
111 |         else:
112 |             frame_num=frame.lstrip('0')
113 | 
114 |         xmin.append(x1)
115 |         xmax.append(x2)
116 |         ymin.append(y1)
117 |         ymax.append(y2)
118 |         video_ids.append(video_id)
119 |         obj_ids.append(obj_id)
120 |         frame_nums.append(frame_num)
121 |         presents.append(present)
122 |         scores.append(score)
123 | 
124 |     # pdb.set_trace() 
125 | 
126 |     dataframe=pd.DataFrame({'video_id':video_ids,'object_id':obj_ids,'frame_num':frame_nums,'present':presents,\
127 |                             'score':scores,'xmin':xmin,'xmax':xmax,'ymin':ymin,'ymax':ymax})
128 |     savepath='./oxuva_csv_results_missFlag/' +videos[i][0:7]+'_'+obj_id+'.csv'
129 |     columns=['video_id','object_id','frame_num','present','score','xmin','xmax','ymin','ymax']
130 | 
131 |     dataframe.to_csv(savepath,index=False,columns=columns,header=None)
132 | 
133 | 
134 |     # pdb.set_trace() 
135 | 


--------------------------------------------------------------------------------
/DeepMTA_code/temp_DIR_TO_SAVE_static_Global_attentionMap/mkdir_your_self.txt:
--------------------------------------------------------------------------------
1 | sss


--------------------------------------------------------------------------------
/DeepMTA_code/testing.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import numpy as np
  3 | from types import SimpleNamespace
  4 | import json
  5 | 
  6 | from benchmark.vot import test_vot, eval_vot
  7 | from benchmark.otb import test_otb, eval_otb
  8 | from benchmark.got10k import test_got
  9 | # from benchmark.got10ktrainval import test_gottrainval 
 10 | from benchmark.lasot import test_lasot, eval_lasot
 11 | # from benchmark.uav20l import test_uav20l, eval_uav20l
 12 | from benchmark.uav123 import test_uav123, eval_uav123
 13 | # from benchmark.oxuva import test_oxuva 
 14 | # from benchmark.tc128 import test_tc128, eval_tc128 
 15 | 
 16 | from trackers.tracker import SiamFC_Tracker, SiamRPN_Tracker, SiamMask_Tracker
 17 | from benchmark.bench_utils.benchmark_helper import load_dataset
 18 | import warnings
 19 | warnings.filterwarnings("ignore")
 20 | import ast 
 21 | import pdb 
 22 | 
 23 | parser = argparse.ArgumentParser(description='Test Trackers on Benchmarks.')
 24 | parser.add_argument('-d', '--dataset', dest='dataset', default='OTB2015', 
 25 |                     help='Dataset on which the benchmark is run [VOT2018, OTB2015, GOT10k, LaSOT, UAV20L]')
 26 | parser.add_argument('-t', '--tracker', dest='tracker', default='SiamRPN', 
 27 |                     help='Name of the tracker [SiamFC, SiamRPN, SiamMask]')
 28 | parser.add_argument('--vanilla', action='store_true',
 29 |                     help='Run the tracker without THOR')
 30 | parser.add_argument('-v', '--viz', action='store_true', default=False,                
 31 |                     help='Show the tracked scene, the stored templated and the modulated view')
 32 | parser.add_argument('--verbose', action='store_true',
 33 |                     help='Print additional info about THOR')
 34 | parser.add_argument('--lb_type', type=str, default='dynamic',
 35 |                     help='Specify the type of lower bound [dynamic, ensemble]')
 36 | parser.add_argument('--spec_video', type=str, default='', 
 37 |                     help='Pick a specific video by name, e.g. "lemming" on OTB2015')
 38 | parser.add_argument('--save_path', dest='save_path', default='Tracker',
 39 |                     help='Name where the tracked trajectory is stored')
 40 | 
 41 | def load_cfg(args):
 42 |     json_path = f"configs/{args.tracker}/"
 43 |     json_path += f"{args.dataset}_"
 44 |     if args.vanilla:
 45 |         json_path += "vanilla.json"
 46 |     else:
 47 |         json_path += f"THOR_{args.lb_type}.json"
 48 | 
 49 |     # pdb.set_trace() 
 50 | 
 51 |     cfg = json.load(open(json_path))
 52 |     return cfg
 53 | 
 54 | 
 55 | def run_bench(delete_after=False):
 56 |     args = parser.parse_args()
 57 | 
 58 |     cfg = load_cfg(args)
 59 |     cfg['THOR']['viz'] = args.viz
 60 |     cfg['THOR']['verbose'] = args.verbose
 61 |     
 62 |     # setup tracker and dataset
 63 |     if args.tracker == 'SiamFC':
 64 |         tracker = SiamFC_Tracker(cfg)
 65 |     elif args.tracker == 'SiamRPN':
 66 |         tracker = SiamRPN_Tracker(cfg)
 67 |     elif args.tracker == 'SiamMask':
 68 |         tracker = SiamMask_Tracker(cfg)
 69 |     else:
 70 |         raise ValueError(f"Tracker {args.tracker} does not exist.")
 71 | 
 72 | 
 73 | 
 74 |     dataset = load_dataset(args.dataset)
 75 |     # optionally filter for a specific videos
 76 |     if args.spec_video:
 77 | 
 78 |         # pdb.set_trace() 
 79 |         dataset = {args.spec_video: dataset[args.spec_video]}
 80 | 
 81 |     if args.dataset=="VOT2018":
 82 |         test_bench, eval_bench = test_vot, eval_vot
 83 |     elif args.dataset=="OTB2015":
 84 |         test_bench, eval_bench = test_otb, eval_otb
 85 |     elif args.dataset=="GOT10k":
 86 |         test_bench = test_got 
 87 |     elif args.dataset=="GOT10k_train_val":
 88 |         test_bench = test_gottrainval 
 89 |     elif args.dataset=="LaSOT":
 90 |         test_bench, eval_bench = test_lasot, eval_lasot
 91 |     elif args.dataset=="UAV20L":  
 92 |         test_bench, eval_bench = test_uav20l, eval_uav20l
 93 |     elif args.dataset=="UAV123":  
 94 |         test_bench, eval_bench = test_uav123, eval_uav123        
 95 |     elif args.dataset=="OXUVA": 
 96 |         test_bench = test_oxuva 
 97 |     elif args.dataset=="TC128":
 98 |         test_bench, eval_bench = test_tc128, eval_tc128 
 99 |     else:
100 |         raise NotImplementedError(f"Procedure for {args.dataset} does not exist.")
101 | 
102 |     # testing
103 |     total_lost = 0
104 |     speed_list = []
105 | 
106 |     if args.dataset=="OTB2015":
107 |         print("==>> No processing for the json file ... ")
108 |     else: 
109 |         dataset = ast.literal_eval(dataset) 
110 |     # pdb.set_trace()
111 |         
112 |     for v_id, video in enumerate(dataset.keys(), start=1):
113 |         tracker.temp_mem.do_full_init = True
114 |         speed = test_bench(v_id, tracker, dataset[video], args)
115 |         speed_list.append(speed)
116 | 
117 | 
118 |     if args.dataset=="GOT10k": 
119 |         print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
120 |         print("==>> Please evaluate online for GOT10k dataset ... ")
121 |         print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")        
122 |     elif args.dataset=="OxUvA":
123 |         print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")        
124 |         print("==>> Please evaluate online for OxUvA dataset ... ")
125 |         print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")        
126 |     else: 
127 |         # evaluation
128 |         # pdb.set_trace() 
129 |         bench_res = eval_bench(args.save_path, delete_after)
130 |         print(bench_res)  
131 |         mean_fps = np.mean(np.array(speed_list))
132 |         bench_res['mean_fps'] = mean_fps              
133 |         print(bench_res)  
134 | 
135 |         return bench_res
136 | 
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     run_bench()
141 | 
142 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamFC/config.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # THOR
 3 | # Licensed under The MIT License
 4 | # Written by Axel Sauer (axel.sauer@tum.de)
 5 | # --------------------------------------------------------
 6 | 
 7 | class TrackerConfig(object):
 8 |     exemplar_sz = 127
 9 |     instance_sz = 255
10 |     context = 0.5
11 |     scale_num = 3
12 |     scale_step = 1.0375
13 |     lr = 0.641662
14 |     penalty_k = 0.982769 
15 |     window_influence = 0.199673
16 |     response_sz = 17
17 |     response_up = 16
18 |     upscale_sz = response_up*response_up
19 |     total_stride = 8
20 |     adjust_scale = 0.001
21 | 
22 |     def update(self, cfg):
23 |         for k, v in cfg.items():
24 |             if hasattr(self, k):
25 |                 setattr(self, k, v)
26 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamFC/model.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/trackers/SiamFC/model.pth


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamFC/net.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # THOR
 3 | # Licensed under The MIT License
 4 | # Written by Axel Sauer (axel.sauer@tum.de)
 5 | # --------------------------------------------------------
 6 | 
 7 | import torch.nn as nn
 8 | import torch.nn.functional as F
 9 | 
10 | class SiamFC(nn.Module):
11 | 
12 |     def __init__(self):
13 |         super(SiamFC, self).__init__()
14 |         self.feature = nn.Sequential(
15 |             # conv1
16 |             nn.Conv2d(3, 96, 11, 2),
17 |             nn.BatchNorm2d(96, eps=1e-6, momentum=0.05),
18 |             nn.ReLU(inplace=True),
19 |             nn.MaxPool2d(3, 2),
20 |             # conv2
21 |             nn.Conv2d(96, 256, 5, 1, groups=2),
22 |             nn.BatchNorm2d(256, eps=1e-6, momentum=0.05),
23 |             nn.ReLU(inplace=True),
24 |             nn.MaxPool2d(3, 2),
25 |             # conv3
26 |             nn.Conv2d(256, 384, 3, 1),
27 |             nn.BatchNorm2d(384, eps=1e-6, momentum=0.05),
28 |             nn.ReLU(inplace=True),
29 |             # conv4
30 |             nn.Conv2d(384, 384, 3, 1, groups=2),
31 |             nn.BatchNorm2d(384, eps=1e-6, momentum=0.05),
32 |             nn.ReLU(inplace=True),
33 |             # conv5
34 |             nn.Conv2d(384, 256, 3, 1, groups=2))
35 | 
36 |     def forward(self, z, x):
37 |         z = self.feature(z)
38 |         x = self.feature(x)
39 | 
40 |         # fast cross correlation
41 |         n, c, h, w = x.size()
42 |         x = x.view(1, n * c, h, w)
43 |         out = F.conv2d(x, z, groups=n)
44 |         out = out.view(n, 1, out.size(-2), out.size(-1))
45 | 
46 |         # adjust the scale of responses
47 |         out = 0.001 * out + 0.0
48 | 
49 |         return out
50 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamFC/siamfc.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # THOR
 3 | # Licensed under The MIT License
 4 | # Written by Axel Sauer (axel.sauer@tum.de)
 5 | # --------------------------------------------------------
 6 | 
 7 | import torch
 8 | import numpy as np
 9 | from .utils import to_one_indexed, to_zero_indexed, crop_and_resize
10 | from .config import TrackerConfig
11 | 
12 | def SiamFC_init(im, target_pos, target_sz, cfg):
13 |     state = {}
14 |     state['im_h'] = im.shape[0]
15 |     state['im_w'] = im.shape[1]
16 |     target_pos, target_sz = to_zero_indexed(target_pos, target_sz)
17 | 
18 |     # set the tracker_config
19 |     p = TrackerConfig()
20 |     p.update(cfg)
21 | 
22 |     # create hanning window
23 |     p.hann_window = np.outer(np.hanning(p.upscale_sz), np.hanning(p.upscale_sz))
24 |     p.hann_window /= p.hann_window.sum()
25 | 
26 |     # search scale factors
27 |     p.scale_factors = p.scale_step ** np.linspace(
28 |         -(p.scale_num // 2),
29 |         p.scale_num // 2, p.scale_num)
30 |     p.scale_factors_glob = np.array([0.5, 0.75, 1.0, 1.25, 1.75])
31 | 
32 |     # exemplar image
33 |     avg_chans = np.mean(im, axis=(0, 1))
34 | 
35 |     # important params for later use
36 |     context = p.context * np.sum(target_sz)
37 |     p.z_sz = np.sqrt(np.prod(target_sz + context))
38 |     p.x_sz = p.z_sz * p.instance_sz / p.exemplar_sz
39 | 
40 |     target_pos, target_sz = to_one_indexed(target_pos, target_sz)
41 | 
42 |     # fill the state dict
43 |     state['device'] = torch.device("cuda" if torch.cuda.is_available() else "cpu")
44 |     state['target_pos'] = target_pos
45 |     state['target_sz'] = target_sz
46 |     state['score'] = 1.0
47 |     state['p'] = p
48 |     state['avg_chans'] = avg_chans
49 |     return state
50 | 
51 | def SiamFC_track(state, im, temp_mem):
52 |     p = state['p']
53 |     avg_chans = state['avg_chans']
54 |     inst_sz = p.instance_sz
55 |     scale_factors = p.scale_factors
56 |     old_pos, old_sz = to_zero_indexed(state['target_pos'], state['target_sz'])
57 |     dev = state['device']
58 | 
59 |     # get instance images
60 |     ims = [crop_and_resize(
61 |         image=im, center=old_pos, size=p.x_sz * f,
62 |         out_size=inst_sz,
63 |         pad_color=avg_chans) for f in scale_factors]
64 |     ims = np.stack(ims, axis=0)
65 |     ims = torch.from_numpy(ims).to(dev).permute([0, 3, 1, 2]).float()
66 | 
67 |     # track
68 |     target_pos, target_sz, score, scale = temp_mem.batch_evaluate(ims, old_pos, old_sz, p)
69 | 
70 |     p.x_sz *= float(scale)
71 |     p.z_sz *= float(scale)
72 | 
73 |     # return 1-indexed and left-top based bounding box
74 |     target_pos, target_sz = to_one_indexed(target_pos, target_sz)
75 | 
76 |     target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
77 |     target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
78 |     target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
79 |     target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
80 |     state['target_pos'] = target_pos
81 |     state['target_sz'] = target_sz
82 |     state['score'] = score
83 |     state['p'] = p
84 |     state['crop'] = ims[1] # get non scaled image
85 |     return state
86 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamFC/utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # THOR
 3 | # Licensed under The MIT License
 4 | # Written by Axel Sauer (axel.sauer@tum.de)
 5 | # --------------------------------------------------------
 6 | 
 7 | import numpy as np
 8 | import cv2
 9 | 
10 | def cxy_wh_2_rect(pos, sz):
11 |     return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]])  # 0-index
12 | 
13 | def rect_2_cxy_wh(rect):
14 |     return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]])  # 0-index
15 | 
16 | def crop_and_resize(image, center, size, out_size, pad_color):
17 |     # convert box to corners (0-indexed)
18 |     size = round(size)
19 |     corners = np.concatenate((
20 |         np.round(center - (size - 1) / 2),
21 |         np.round(center - (size - 1) / 2) + size))
22 |     corners = np.round(corners).astype(int)
23 | 
24 |     # pad image if necessary
25 |     pads = np.concatenate((
26 |         -corners[:2], corners[2:] - image.shape[:2]))
27 |     npad = max(0, int(pads.max()))
28 |     if npad > 0:
29 |         image = cv2.copyMakeBorder(
30 |             image, npad, npad, npad, npad,
31 |             cv2.BORDER_CONSTANT, value=pad_color)
32 | 
33 |     # crop image patch
34 |     corners = (corners + npad).astype(int)
35 |     patch = image[corners[0]:corners[2], corners[1]:corners[3]]
36 | 
37 |     # resize to out_size
38 |     patch = cv2.resize(patch, (out_size, out_size))
39 | 
40 |     return patch
41 | 
42 | def to_zero_indexed(pos, sz):
43 |     # convert box to 0-indexed and center based [y, x, h, w]
44 |     box = cxy_wh_2_rect(pos, sz)
45 |     box = np.array([
46 |         box[1] - 1 + (box[3] - 1) / 2,
47 |         box[0] - 1 + (box[2] - 1) / 2,
48 |         box[3], box[2]], dtype=np.float32)
49 |     return box[:2], box[2:]
50 | 
51 | def to_one_indexed(pos, sz):
52 |     box = np.array([
53 |         pos[1] + 1 - (sz[1] - 1) / 2,
54 |         pos[0] + 1 - (sz[0] - 1) / 2,
55 |         sz[1], sz[0]])
56 |     return rect_2_cxy_wh(box)
57 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/net.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from .utils.load_helper import load_pretrain
  6 | from .utils.anchors import Anchors
  7 | from .resnet import resnet50
  8 | 
  9 | # basic model
 10 | 
 11 | class SiamMask(nn.Module):
 12 |     def __init__(self, anchors=None, o_sz=127, g_sz=127):
 13 |         super(SiamMask, self).__init__()
 14 |         self.anchors = anchors  # anchor_cfg
 15 |         self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"])
 16 |         self.anchor = Anchors(anchors)
 17 |         self.features = None
 18 |         self.rpn_model = None
 19 |         self.mask_model = None
 20 |         self.o_sz = o_sz
 21 |         self.g_sz = g_sz
 22 |         self.all_anchors = None
 23 | 
 24 |     def feature_extractor(self, x):
 25 |         return self.features(x)
 26 | 
 27 |     def rpn(self, template, search):
 28 |         pred_cls, pred_loc = self.rpn_model(template, search)
 29 |         return pred_cls, pred_loc
 30 | 
 31 |     def mask(self, template, search):
 32 |         pred_mask = self.mask_model(template, search)
 33 |         return pred_mask
 34 | 
 35 |     def template(self, z):
 36 |         self.zf = self.feature_extractor(z)
 37 |         cls_kernel, loc_kernel = self.rpn_model.template(self.zf)
 38 |         return cls_kernel, loc_kernel
 39 | 
 40 |     def track(self, x, cls_kernel=None, loc_kernel=None, softmax=False):
 41 |         xf = self.feature_extractor(x)
 42 |         rpn_pred_cls, rpn_pred_loc = self.rpn_model.track(xf, cls_kernel, loc_kernel)
 43 |         if softmax:
 44 |             rpn_pred_cls = self.softmax(rpn_pred_cls)
 45 |         return rpn_pred_cls, rpn_pred_loc
 46 | 
 47 | # rpn
 48 | 
 49 | class RPN(nn.Module):
 50 |     def __init__(self):
 51 |         super(RPN, self).__init__()
 52 | 
 53 |     def forward(self, z_f, x_f):
 54 |         raise NotImplementedError
 55 | 
 56 |     def template(self, template):
 57 |         raise NotImplementedError
 58 | 
 59 |     def track(self, search):
 60 |         raise NotImplementedError
 61 | 
 62 | def conv2d_dw_group(x, kernel):
 63 |     batch, channel = kernel.shape[:2]
 64 |     ## WRAPPER: changed, otherwise it does not work with batches
 65 |     # x = x.view(1, batch*channel, x.size(2), x.size(3))  # 1 * (b*c) * k * k
 66 |     x = x.expand(batch, *x.shape[1:])
 67 |     x = x.contiguous().view(1, batch*channel, x.size(2), x.size(3))  # 1 * (b*c) * k * k
 68 |     kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3))  # (b*c) * 1 * H * W
 69 |     out = F.conv2d(x, kernel, groups=batch*channel)
 70 |     out = out.view(batch, channel, out.size(2), out.size(3))
 71 | 
 72 |     return out
 73 | 
 74 | class DepthCorr(nn.Module):
 75 |     def __init__(self, in_channels, hidden, out_channels, kernel_size=3):
 76 |         super(DepthCorr, self).__init__()
 77 |         # adjust layer for asymmetrical features
 78 |         self.conv_kernel = nn.Sequential(
 79 |                 nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False),
 80 |                 nn.BatchNorm2d(hidden),
 81 |                 nn.ReLU(inplace=True),
 82 |                 )
 83 |         self.conv_search = nn.Sequential(
 84 |                 nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False),
 85 |                 nn.BatchNorm2d(hidden),
 86 |                 nn.ReLU(inplace=True),
 87 |                 )
 88 | 
 89 |         self.head = nn.Sequential(
 90 |                 nn.Conv2d(hidden, hidden, kernel_size=1, bias=False),
 91 |                 nn.BatchNorm2d(hidden),
 92 |                 nn.ReLU(inplace=True),
 93 |                 nn.Conv2d(hidden, out_channels, kernel_size=1)
 94 |                 )
 95 | 
 96 |     def forward_corr(self, kernel, input):
 97 |         kernel = self.conv_kernel(kernel)
 98 |         input = self.conv_search(input)
 99 |         feature = conv2d_dw_group(input, kernel)
100 |         return feature
101 | 
102 |     def forward(self, kernel, search):
103 |         feature = self.forward_corr(kernel, search)
104 |         out = self.head(feature)
105 |         return out
106 | 
107 | # mask
108 | 
109 | class Mask(nn.Module):
110 |     def __init__(self):
111 |         super(Mask, self).__init__()
112 | 
113 |     def forward(self, z_f, x_f):
114 |         raise NotImplementedError
115 | 
116 |     def template(self, template):
117 |         raise NotImplementedError
118 | 
119 |     def track(self, search):
120 |         raise NotImplementedError
121 | 
122 | # additional modules
123 | 
124 | class Features(nn.Module):
125 |     def __init__(self):
126 |         super(Features, self).__init__()
127 |         self.feature_size = -1
128 | 
129 |     def forward(self, x):
130 |         raise NotImplementedError
131 | 
132 | class ResDownS(nn.Module):
133 |     def __init__(self, inplane, outplane):
134 |         super(ResDownS, self).__init__()
135 |         self.downsample = nn.Sequential(
136 |                 nn.Conv2d(inplane, outplane, kernel_size=1, bias=False),
137 |                 nn.BatchNorm2d(outplane))
138 | 
139 |     def forward(self, x):
140 |         x = self.downsample(x)
141 |         if x.size(3) < 20:
142 |             l, r = 4, -4
143 |             x = x[:, :, l:r, l:r]
144 |         return x
145 | 
146 | class ResDown(Features):
147 |     def __init__(self, pretrain=False):
148 |         super(ResDown, self).__init__()
149 |         self.features = resnet50(layer3=True, layer4=False)
150 |         if pretrain:
151 |             load_pretrain(self.features, 'resnet.model')
152 | 
153 |         self.downsample = ResDownS(1024, 256)
154 | 
155 |     def forward(self, x):
156 |         output = self.features(x)
157 |         p3 = self.downsample(output[-1])
158 |         return p3
159 | 
160 |     def forward_all(self, x):
161 |         output = self.features(x)
162 |         p3 = self.downsample(output[-1])
163 |         return output, p3
164 | 
165 | class UP(RPN):
166 |     def __init__(self, anchor_num=5, feature_in=256, feature_out=256):
167 |         super(UP, self).__init__()
168 | 
169 |         self.anchor_num = anchor_num
170 |         self.feature_in = feature_in
171 |         self.feature_out = feature_out
172 | 
173 |         self.cls_output = 2 * self.anchor_num
174 |         self.loc_output = 4 * self.anchor_num
175 | 
176 |         self.cls = DepthCorr(feature_in, feature_out, self.cls_output)
177 |         self.loc = DepthCorr(feature_in, feature_out, self.loc_output)
178 | 
179 |     def forward(self, z_f, x_f):
180 |         cls = self.cls(z_f, x_f)
181 |         loc = self.loc(z_f, x_f)
182 |         return cls, loc
183 | 
184 | class MaskCorr(Mask):
185 |     def __init__(self, oSz=63):
186 |         super(MaskCorr, self).__init__()
187 |         self.oSz = oSz
188 |         self.mask = DepthCorr(256, 256, self.oSz**2)
189 | 
190 |     def forward(self, z, x):
191 |         return self.mask(z, x)
192 | 
193 | class Refine(nn.Module):
194 |     def __init__(self):
195 |         """
196 |         Mask refinement module
197 |         Please refer SiamMask (Appendix A)
198 |         https://arxiv.org/abs/1812.05050
199 |         """
200 |         super(Refine, self).__init__()
201 |         self.v0 = nn.Sequential(nn.Conv2d(64, 16, 3, padding=1), nn.ReLU(),
202 |                            nn.Conv2d(16, 4, 3, padding=1), nn.ReLU())
203 | 
204 |         self.v1 = nn.Sequential(nn.Conv2d(256, 64, 3, padding=1), nn.ReLU(),
205 |                            nn.Conv2d(64, 16, 3, padding=1), nn.ReLU())
206 | 
207 |         self.v2 = nn.Sequential(nn.Conv2d(512, 128, 3, padding=1), nn.ReLU(),
208 |                            nn.Conv2d(128, 32, 3, padding=1), nn.ReLU())
209 | 
210 |         self.h2 = nn.Sequential(nn.Conv2d(32, 32, 3, padding=1), nn.ReLU(),
211 |                            nn.Conv2d(32, 32, 3, padding=1), nn.ReLU())
212 | 
213 |         self.h1 = nn.Sequential(nn.Conv2d(16, 16, 3, padding=1), nn.ReLU(),
214 |                            nn.Conv2d(16, 16, 3, padding=1), nn.ReLU())
215 | 
216 |         self.h0 = nn.Sequential(nn.Conv2d(4, 4, 3, padding=1), nn.ReLU(),
217 |                            nn.Conv2d(4, 4, 3, padding=1), nn.ReLU())
218 | 
219 |         self.deconv = nn.ConvTranspose2d(256, 32, 15, 15)
220 | 
221 |         self.post0 = nn.Conv2d(32, 16, 3, padding=1)
222 |         self.post1 = nn.Conv2d(16, 4, 3, padding=1)
223 |         self.post2 = nn.Conv2d(4, 1, 3, padding=1)
224 | 
225 |     def forward(self, f, corr_feature, pos=None):
226 |         pos = [int(i) for i in pos]
227 |         p0 = torch.nn.functional.pad(f[0], [16,16,16,16])[:, :, 4*pos[0]:4*pos[0]+61, 4*pos[1]:4*pos[1]+61]
228 |         p1 = torch.nn.functional.pad(f[1], [8,8,8,8])[:, :, 2*pos[0]:2*pos[0]+31, 2*pos[1]:2*pos[1]+31]
229 |         p2 = torch.nn.functional.pad(f[2], [4,4,4,4])[:, :, pos[0]:pos[0]+15, pos[1]:pos[1]+15]
230 | 
231 |         p3 = corr_feature[:, :, pos[0], pos[1]].view(-1, 256, 1, 1)
232 | 
233 |         out = self.deconv(p3)
234 |         out = self.post0(F.upsample(self.h2(out) + self.v2(p2), size=(31, 31)))
235 |         out = self.post1(F.upsample(self.h1(out) + self.v1(p1), size=(61, 61)))
236 |         out = self.post2(F.upsample(self.h0(out) + self.v0(p0), size=(127, 127)))
237 |         out = out.view(-1, 127*127)
238 |         return out
239 | 
240 | # final siammask model
241 | 
242 | class SiamMaskCustom(SiamMask):
243 |     def __init__(self, pretrain=False, **kwargs):
244 |         super(SiamMaskCustom, self).__init__(**kwargs)
245 |         self.features = ResDown(pretrain=pretrain)
246 |         self.rpn_model = UP(anchor_num=self.anchor_num, feature_in=256, feature_out=256)
247 |         self.mask_model = MaskCorr()
248 |         self.refine_model = Refine()
249 |         self.best_temp = 0
250 | 
251 |     def refine(self, f, pos=None):
252 |         return self.refine_model(f, pos)
253 | 
254 |     def template(self, template):
255 |         self.zf = self.features(template)
256 |         return self.zf
257 | 
258 |     def track(self, search):
259 |         search = self.features(search)
260 |         rpn_pred_cls, rpn_pred_loc = self.rpn(self.zf, search)
261 |         return rpn_pred_cls, rpn_pred_loc
262 | 
263 |     def track_mask(self, search):
264 |         self.feature, self.search = self.features.forward_all(search)
265 |         rpn_pred_cls, rpn_pred_loc = self.rpn(self.zf, self.search)
266 |         self.corr_feature = self.mask_model.mask.forward_corr(self.zf, self.search)
267 |         pred_mask = self.mask_model.mask.head(self.corr_feature)
268 |         return rpn_pred_cls, rpn_pred_loc, pred_mask
269 | 
270 |     def track_refine(self, pos):
271 |         ### WRAPPER
272 |         self.corr_feature = self.corr_feature[self.best_temp].unsqueeze(0)
273 |         ###
274 |         pred_mask = self.refine_model(self.feature, self.corr_feature, pos=pos)
275 |         return pred_mask
276 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/siammask.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # SiamMask
  3 | # Licensed under The MIT License
  4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
  5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de)
  6 | # --------------------------------------------------------
  7 | 
  8 | from __future__ import division
  9 | import argparse
 10 | import logging
 11 | import numpy as np
 12 | import cv2
 13 | from PIL import Image
 14 | from os import makedirs
 15 | from os.path import join, isdir, isfile
 16 | 
 17 | import torch
 18 | from torch.autograd import Variable
 19 | import torch.nn.functional as F
 20 | 
 21 | # relative imports
 22 | from .utils.log_helper import init_log, add_file_handler
 23 | from .utils.bbox_helper import get_axis_aligned_bbox, cxy_wh_2_rect
 24 | from .utils.anchors import Anchors, generate_anchor
 25 | from .utils.tracker_config import TrackerConfig
 26 | from .utils.tracking_utils import get_subwindow_tracking
 27 | 
 28 | def SiamMask_init(im, target_pos, target_sz, model, hp=None):
 29 |     state = dict()
 30 |     state['im_h'] = im.shape[0]
 31 |     state['im_w'] = im.shape[1]
 32 | 
 33 |     p = TrackerConfig()
 34 |     p.update(hp, model.anchors)
 35 |     p.renew()
 36 | 
 37 |     p.scales = model.anchors['scales']
 38 |     p.ratios = model.anchors['ratios']
 39 |     p.anchor_num = len(p.ratios) * len(p.scales)
 40 |     p.anchor = generate_anchor(model.anchors, p.score_size)
 41 | 
 42 |     avg_chans = np.mean(im, axis=(0, 1))
 43 | 
 44 |     if p.windowing == 'cosine':
 45 |         window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size))
 46 |     elif p.windowing == 'uniform':
 47 |         window = np.ones((p.score_size, p.score_size))
 48 |     window = np.tile(window.flatten(), p.anchor_num)
 49 | 
 50 |     use_cuda = torch.cuda.is_available()
 51 |     state['device'] = torch.device("cuda" if use_cuda else "cpu")
 52 |     state['p'] = p
 53 |     state['model'] = model
 54 |     state['avg_chans'] = avg_chans
 55 |     state['window'] = window
 56 |     state['score'] = 1.0
 57 |     state['target_pos'] = target_pos
 58 |     state['target_sz'] = target_sz
 59 |     return state
 60 | 
 61 | def SiamMask_track(state, im, temp_mem):
 62 |     p = state['p']
 63 |     avg_chans = state['avg_chans']
 64 |     window = state['window']
 65 |     old_pos = state['target_pos']
 66 |     old_sz = state['target_sz']
 67 |     dev = state['device']
 68 | 
 69 |     # get search area
 70 |     wc_x = old_sz[1] + p.context_amount * sum(old_sz)
 71 |     hc_x = old_sz[0] + p.context_amount * sum(old_sz)
 72 |     s_z = np.sqrt(wc_x * hc_x)
 73 | 
 74 |     scale_x = p.exemplar_size / s_z
 75 |     d_search = (p.instance_size - p.exemplar_size) / 2
 76 |     pad = d_search / scale_x
 77 |     s_x = s_z + 2 * pad
 78 |     crop_box = [old_pos[0] - round(s_x) / 2, old_pos[1] - round(s_x) / 2, round(s_x), round(s_x)]
 79 | 
 80 |     # extract scaled crops for search region x at previous target position
 81 |     x_crop = Variable(get_subwindow_tracking(im, old_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0))
 82 | 
 83 |     # track
 84 |     target_pos, target_sz, score, best_id = temp_mem.batch_evaluate(x_crop.to(dev), old_pos,
 85 |                                                                 old_sz, window,
 86 |                                                                 scale_x, p)
 87 | 
 88 |     # mask refinement
 89 |     best_pscore_id_mask = np.unravel_index(best_id, (5, p.score_size, p.score_size))
 90 |     delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1]
 91 |     mask = state['model'].track_refine((delta_y, delta_x)).to(dev).sigmoid().squeeze().view(
 92 |         p.out_size, p.out_size).cpu().data.numpy()
 93 | 
 94 |     def crop_back(image, bbox, out_sz, padding=-1):
 95 |         a = (out_sz[0] - 1) / bbox[2]
 96 |         b = (out_sz[1] - 1) / bbox[3]
 97 |         c = -a * bbox[0]
 98 |         d = -b * bbox[1]
 99 |         mapping = np.array([[a, 0, c],
100 |                             [0, b, d]]).astype(np.float)
101 |         crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]),
102 |                               flags=cv2.INTER_LINEAR,
103 |                               borderMode=cv2.BORDER_CONSTANT,
104 |                               borderValue=padding)
105 |         return crop
106 | 
107 |     s = crop_box[2] / p.instance_size
108 |     sub_box = [crop_box[0] + (delta_x - p.base_size / 2) * p.total_stride * s,
109 |                crop_box[1] + (delta_y - p.base_size / 2) * p.total_stride * s,
110 |                s * p.exemplar_size, s * p.exemplar_size]
111 |     s = p.out_size / sub_box[2]
112 |     back_box = [-sub_box[0] * s, -sub_box[1] * s, state['im_w'] * s, state['im_h'] * s]
113 |     mask_in_img = crop_back(mask, back_box, (state['im_w'], state['im_h']))
114 | 
115 |     target_mask = (mask_in_img > p.seg_thr).astype(np.uint8)
116 |     if cv2.__version__[-5] == '4':
117 |         contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
118 |     else:
119 |         _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
120 |     cnt_area = [cv2.contourArea(cnt) for cnt in contours]
121 |     if len(contours) != 0 and np.max(cnt_area) > 100:
122 |         contour = contours[np.argmax(cnt_area)]  # use max area polygon
123 |         polygon = contour.reshape(-1, 2)
124 |         prbox = cv2.boxPoints(cv2.minAreaRect(polygon))  # Rotated Rectangle
125 |         rbox_in_img = prbox
126 |     else:  # empty mask
127 |         location = cxy_wh_2_rect(target_pos, target_sz)
128 |         rbox_in_img = np.array([[location[0], location[1]],
129 |                                 [location[0] + location[2], location[1]],
130 |                                 [location[0] + location[2], location[1] + location[3]],
131 |                                 [location[0], location[1] + location[3]]])
132 | 
133 |     state['mask'] = mask_in_img
134 |     state['polygon'] = rbox_in_img
135 | 
136 |     # clip in min and max of the bb
137 |     target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
138 |     target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
139 |     target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
140 |     target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
141 | 
142 |     state['target_pos'] = target_pos
143 |     state['target_sz'] = target_sz
144 |     state['score'] = score
145 |     state['crop'] = x_crop
146 | 
147 |     return state
148 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/utils/anchors.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # SiamMask
 3 | # Licensed under The MIT License
 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import math
10 | from .bbox_helper import center2corner, corner2center
11 | 
12 | def generate_anchor(cfg, score_size):
13 |     anchors = Anchors(cfg)
14 |     anchor = anchors.anchors
15 |     x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3]
16 |     anchor = np.stack([(x1+x2)*0.5, (y1+y2)*0.5, x2-x1, y2-y1], 1)
17 | 
18 |     total_stride = anchors.stride
19 |     anchor_num = anchor.shape[0]
20 | 
21 |     anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
22 |     ori = - (score_size // 2) * total_stride
23 |     xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
24 |                          [ori + total_stride * dy for dy in range(score_size)])
25 |     xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
26 |              np.tile(yy.flatten(), (anchor_num, 1)).flatten()
27 |     anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
28 |     return anchor
29 | 
30 | class Anchors:
31 |     def __init__(self, cfg):
32 |         self.stride = 8
33 |         self.ratios = [0.33, 0.5, 1, 2, 3]
34 |         self.scales = [8]
35 |         self.round_dight = 0
36 |         self.image_center = 0
37 |         self.size = 0
38 | 
39 |         self.__dict__.update(cfg)
40 | 
41 |         self.anchor_num = len(self.scales) * len(self.ratios)
42 |         self.anchors = None  # in single position (anchor_num*4)
43 |         self.all_anchors = None  # in all position 2*(4*anchor_num*h*w)
44 |         self.generate_anchors()
45 | 
46 |     def generate_anchors(self):
47 |         self.anchors = np.zeros((self.anchor_num, 4), dtype=np.float32)
48 | 
49 |         size = self.stride * self.stride
50 |         count = 0
51 |         for r in self.ratios:
52 |             if self.round_dight > 0:
53 |                 ws = round(math.sqrt(size*1. / r), self.round_dight)
54 |                 hs = round(ws * r, self.round_dight)
55 |             else:
56 |                 ws = int(math.sqrt(size*1. / r))
57 |                 hs = int(ws * r)
58 | 
59 |             for s in self.scales:
60 |                 w = ws * s
61 |                 h = hs * s
62 |                 self.anchors[count][:] = [-w*0.5, -h*0.5, w*0.5, h*0.5][:]
63 |                 count += 1
64 | 
65 |     def generate_all_anchors(self, im_c, size):
66 |         if self.image_center == im_c and self.size == size:
67 |             return False
68 |         self.image_center = im_c
69 |         self.size = size
70 | 
71 |         a0x = im_c - size // 2 * self.stride
72 |         ori = np.array([a0x] * 4, dtype=np.float32)
73 |         zero_anchors = self.anchors + ori
74 | 
75 |         x1 = zero_anchors[:, 0]
76 |         y1 = zero_anchors[:, 1]
77 |         x2 = zero_anchors[:, 2]
78 |         y2 = zero_anchors[:, 3]
79 | 
80 |         x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2])
81 |         cx, cy, w, h = corner2center([x1, y1, x2, y2])
82 | 
83 |         disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride
84 |         disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride
85 | 
86 |         cx = cx + disp_x
87 |         cy = cy + disp_y
88 | 
89 |         # broadcast
90 |         zero = np.zeros((self.anchor_num, size, size), dtype=np.float32)
91 |         cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h])
92 |         x1, y1, x2, y2 = center2corner([cx, cy, w, h])
93 | 
94 |         self.all_anchors = np.stack([x1, y1, x2, y2]), np.stack([cx, cy, w, h])
95 |         return True
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/utils/bbox_helper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # SiamMask
 3 | # Licensed under The MIT License
 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de)
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | from collections import namedtuple
10 | 
11 | Corner = namedtuple('Corner', 'x1 y1 x2 y2')
12 | BBox = Corner
13 | Center = namedtuple('Center', 'x y w h')
14 | 
15 | 
16 | def corner2center(corner):
17 |     """
18 |     :param corner: Corner or np.array 4*N
19 |     :return: Center or 4 np.array N
20 |     """
21 |     if isinstance(corner, Corner):
22 |         x1, y1, x2, y2 = corner
23 |         return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1))
24 |     else:
25 |         x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3]
26 |         x = (x1 + x2) * 0.5
27 |         y = (y1 + y2) * 0.5
28 |         w = x2 - x1
29 |         h = y2 - y1
30 |         return x, y, w, h
31 | 
32 | 
33 | def center2corner(center):
34 |     """
35 |     :param center: Center or np.array 4*N
36 |     :return: Corner or np.array 4*N
37 |     """
38 |     if isinstance(center, Center):
39 |         x, y, w, h = center
40 |         return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5)
41 |     else:
42 |         x, y, w, h = center[0], center[1], center[2], center[3]
43 |         x1 = x - w * 0.5
44 |         y1 = y - h * 0.5
45 |         x2 = x + w * 0.5
46 |         y2 = y + h * 0.5
47 |         return x1, y1, x2, y2
48 | 
49 | 
50 | def cxy_wh_2_rect(pos, sz):
51 |     return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]])  # 0-index
52 | 
53 | 
54 | def get_axis_aligned_bbox(region):
55 |     nv = region.size
56 |     if nv == 8:
57 |         cx = np.mean(region[0::2])
58 |         cy = np.mean(region[1::2])
59 |         x1 = min(region[0::2])
60 |         x2 = max(region[0::2])
61 |         y1 = min(region[1::2])
62 |         y2 = max(region[1::2])
63 |         A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6])
64 |         A2 = (x2 - x1) * (y2 - y1)
65 |         s = np.sqrt(A1 / A2)
66 |         w = s * (x2 - x1) + 1
67 |         h = s * (y2 - y1) + 1
68 |     else:
69 |         x = region[0]
70 |         y = region[1]
71 |         w = region[2]
72 |         h = region[3]
73 |         cx = x+w/2
74 |         cy = y+h/2
75 | 
76 |     return cx, cy, w, h
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/utils/config_helper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # SiamMask
 3 | # Licensed under The MIT License
 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de)
 6 | # --------------------------------------------------------
 7 | 
 8 | import json
 9 | from os.path import exists
10 | 
11 | 
12 | def load_config(config, arch):
13 |     assert exists(config), '"{}" not exists'.format(config)
14 |     config = json.load(open(config))
15 | 
16 |     # deal with network
17 |     if 'network' not in config:
18 |         print('Warning: network lost in config. This will be error in next version')
19 | 
20 |         config['network'] = {}
21 | 
22 |         if not arch:
23 |             raise Exception('no arch provided')
24 | 
25 |     arch = config['network']['arch']
26 | 
27 |     return config
28 | 
29 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/utils/load_helper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import logging
 3 | logger = logging.getLogger('global')
 4 | 
 5 | 
 6 | def check_keys(model, pretrained_state_dict):
 7 |     ckpt_keys = set(pretrained_state_dict.keys())
 8 |     model_keys = set(model.state_dict().keys())
 9 |     used_pretrained_keys = model_keys & ckpt_keys
10 |     unused_pretrained_keys = ckpt_keys - model_keys
11 |     missing_keys = model_keys - ckpt_keys
12 |     if len(missing_keys) > 0:
13 |         logger.info('[Warning] missing keys: {}'.format(missing_keys))
14 |         logger.info('missing keys:{}'.format(len(missing_keys)))
15 |     if len(unused_pretrained_keys) > 0:
16 |         logger.info('[Warning] unused_pretrained_keys: {}'.format(unused_pretrained_keys))
17 |         logger.info('unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
18 |     logger.info('used keys:{}'.format(len(used_pretrained_keys)))
19 |     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
20 |     return True
21 | 
22 | 
23 | def remove_prefix(state_dict, prefix):
24 |     ''' Old style model is stored with all names of parameters share common prefix 'module.' '''
25 |     logger.info('remove prefix \'{}\''.format(prefix))
26 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
27 |     return {f(key): value for key, value in state_dict.items()}
28 | 
29 | 
30 | def load_pretrain(model, pretrained_path):
31 |     logger.info('load pretrained model from {}'.format(pretrained_path))
32 | 
33 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
34 |     pretrained_dict = torch.load(pretrained_path, map_location=device)
35 |     if "state_dict" in pretrained_dict.keys():
36 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
37 |     else:
38 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
39 | 
40 |     try:
41 |         check_keys(model, pretrained_dict)
42 |     except:
43 |         logger.info('[Warning]: using pretrain as features. Adding "features." as prefix')
44 |         new_dict = {}
45 |         for k, v in pretrained_dict.items():
46 |             k = 'features.' + k
47 |             new_dict[k] = v
48 |         pretrained_dict = new_dict
49 |         check_keys(model, pretrained_dict)
50 |     model.load_state_dict(pretrained_dict, strict=False)
51 |     return model
52 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/utils/log_helper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # SiamMask
 3 | # Licensed under The MIT License
 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
 5 | # --------------------------------------------------------
 6 | from __future__ import division
 7 | 
 8 | import os
 9 | import logging
10 | import sys
11 | 
12 | if hasattr(sys, 'frozen'):  # support for py2exe
13 |     _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:])
14 | elif __file__[-4:].lower() in ['.pyc', '.pyo']:
15 |     _srcfile = __file__[:-4] + '.py'
16 | else:
17 |     _srcfile = __file__
18 | _srcfile = os.path.normcase(_srcfile)
19 | 
20 | 
21 | logs = set()
22 | 
23 | 
24 | class Filter:
25 |     def __init__(self, flag):
26 |         self.flag = flag
27 | 
28 |     def filter(self, x): return self.flag
29 | 
30 | 
31 | class Dummy:
32 |     def __init__(self, *arg, **kwargs):
33 |         pass
34 | 
35 |     def __getattr__(self, arg):
36 |         def dummy(*args, **kwargs): pass
37 |         return dummy
38 | 
39 | 
40 | def get_format(logger, level):
41 |     if 'SLURM_PROCID' in os.environ:
42 |         rank = int(os.environ['SLURM_PROCID'])
43 | 
44 |         if level == logging.INFO:
45 |             logger.addFilter(Filter(rank == 0))
46 |     else:
47 |         rank = 0
48 |     format_str = '[%(asctime)s-rk{}-%(filename)s#%(lineno)3d] %(message)s'.format(rank)
49 |     formatter = logging.Formatter(format_str)
50 |     return formatter
51 | 
52 | 
53 | def init_log(name, level = logging.INFO, format_func=get_format):
54 |     if (name, level) in logs: return
55 |     logs.add((name, level))
56 |     logger = logging.getLogger(name)
57 |     logger.setLevel(level)
58 |     ch = logging.StreamHandler()
59 |     ch.setLevel(level)
60 |     formatter = format_func(logger, level)
61 |     ch.setFormatter(formatter)
62 |     logger.addHandler(ch)
63 |     return logger
64 | 
65 | 
66 | def add_file_handler(name, log_file, level = logging.INFO):
67 |     logger = logging.getLogger(name)
68 |     fh = logging.FileHandler(log_file)
69 |     fh.setFormatter(get_format(logger, level))
70 |     logger.addHandler(fh)
71 | 
72 | 
73 | init_log('global')
74 | 
75 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/utils/tracker_config.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # SiamMask
 3 | # Licensed under The MIT License
 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
 5 | # --------------------------------------------------------
 6 | from __future__ import division
 7 | from .anchors import Anchors
 8 | 
 9 | 
10 | class TrackerConfig(object):
11 |     # These are the default hyper-params for SiamMask
12 |     penalty_k = 0.04
13 |     window_influence = 0.42
14 |     lr = 0.25
15 |     seg_thr = 0.3  # for mask
16 |     windowing = 'cosine'  # to penalize large displacements [cosine/uniform]
17 |     # Params from the network architecture, have to be consistent with the training
18 |     exemplar_size = 127  # input z size
19 |     instance_size = 255  # input x size (search region)
20 |     instance_size_glob = 550  # input x size (search region)
21 |     total_stride = 8
22 |     out_size = 63  # for mask
23 |     base_size = 8
24 |     score_size = (instance_size-exemplar_size)//total_stride+1+base_size
25 |     score_size_glob = (instance_size_glob-exemplar_size)//total_stride+1+base_size
26 |     context_amount = 0.5  # context amount for the exemplar
27 |     ratios = [0.33, 0.5, 1, 2, 3]
28 |     scales = [8, ]
29 |     anchor_num = len(ratios) * len(scales)
30 |     round_dight = 0
31 |     anchor = []
32 | 
33 |     def update(self, newparam=None, anchors=None):
34 |         if newparam:
35 |             for key, value in newparam.items():
36 |                 setattr(self, key, value)
37 |         if anchors is not None:
38 |             if isinstance(anchors, dict):
39 |                 anchors = Anchors(anchors)
40 |             if isinstance(anchors, Anchors):
41 |                 self.total_stride = anchors.stride
42 |                 self.ratios = anchors.ratios
43 |                 self.scales = anchors.scales
44 |                 self.round_dight = anchors.round_dight
45 |         self.renew()
46 | 
47 |     def renew(self):
48 |         self.score_size = (self.instance_size - self.exemplar_size) // self.total_stride + 1 + self.base_size
49 |         self.anchor_num = len(self.ratios) * len(self.scales)
50 | 
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/SiamMask/utils/tracking_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import cv2
 4 | 
 5 | def to_torch(ndarray):
 6 |     if type(ndarray).__module__ == 'numpy':
 7 |         return torch.from_numpy(ndarray)
 8 |     elif not torch.is_tensor(ndarray):
 9 |         raise ValueError("Cannot convert {} to torch tensor"
10 |                          .format(type(ndarray)))
11 |     return ndarray
12 | 
13 | 
14 | def im_to_torch(img):
15 |     img = np.transpose(img, (2, 0, 1))  # C*H*W
16 |     img = to_torch(img).float()
17 |     return img
18 | 
19 | 
20 | 
21 | def get_subwindow_tracking(im, pos, model_sz, original_sz, avg_chans, out_mode='torch'):
22 |     if isinstance(pos, float):
23 |         pos = [pos, pos]
24 |     sz = original_sz
25 |     im_sz = im.shape
26 |     c = (original_sz + 1) / 2
27 |     context_xmin = round(pos[0] - c)
28 |     context_xmax = context_xmin + sz - 1
29 |     context_ymin = round(pos[1] - c)
30 |     context_ymax = context_ymin + sz - 1
31 |     left_pad = int(max(0., -context_xmin))
32 |     top_pad = int(max(0., -context_ymin))
33 |     right_pad = int(max(0., context_xmax - im_sz[1] + 1))
34 |     bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
35 | 
36 |     context_xmin = context_xmin + left_pad
37 |     context_xmax = context_xmax + left_pad
38 |     context_ymin = context_ymin + top_pad
39 |     context_ymax = context_ymax + top_pad
40 | 
41 |     r, c, k = im.shape
42 |     if any([top_pad, bottom_pad, left_pad, right_pad]):
43 |         te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8)
44 |         te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
45 |         if top_pad:
46 |             te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
47 |         if bottom_pad:
48 |             te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
49 |         if left_pad:
50 |             te_im[:, 0:left_pad, :] = avg_chans
51 |         if right_pad:
52 |             te_im[:, c + left_pad:, :] = avg_chans
53 |         im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :]
54 |     else:
55 |         im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :]
56 | 
57 |     if not np.array_equal(model_sz, original_sz):
58 |         im_patch = cv2.resize(im_patch_original, (model_sz, model_sz))
59 |     else:
60 |         im_patch = im_patch_original
61 | 
62 |     return im_to_torch(im_patch)
63 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | from .resnet18_vggm import *
3 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/constants.py:
--------------------------------------------------------------------------------
 1 | # Path to SALICON raw data
 2 | pathToImages = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/image'
 3 | pathToMapsTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/attention_map'
 4 | pathToMapsVal = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/attention_map'
 5 | 
 6 | # Path to processed data. Created using preprocess.py
 7 | pathToResizedImagesTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/image'
 8 | pathToResizedMapsTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/attention_map'
 9 | pathToResizedTargetObjectTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/target_objects/first_frame'
10 | 
11 | 
12 | pathToResizedImagesVal = pathToResizedImagesTrain
13 | pathToResizedMapsVal = pathToResizedMapsTrain
14 | 
15 | 
16 | pathToResizedImagesTest = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/image'
17 | 
18 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/data_loader.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os
 3 | import numpy as np
 4 | import cv2
 5 | import torch
 6 | import torchvision.transforms as transforms
 7 | import matplotlib.pyplot as plt
 8 | from constants import *
 9 | import pdb 
10 | import random
11 | 
12 | class DataLoader(object):
13 | 
14 |     def __init__(self, batch_size = 5):
15 |         #reading data list
16 |         # self.list_img = [k.split('/')[-1].split('.')[0] for k in glob.glob(os.path.join(pathToResizedImagesTrain, '*train*'))]
17 |         self.list_img = [k.split('/')[-1].split('.')[0] for k in glob.glob(os.path.join(pathToResizedImagesTrain, '*image*'))]
18 |         self.batch_size = batch_size
19 |         self.size = len(self.list_img)
20 |         self.cursor = 0
21 |         self.num_batches = self.size / batch_size
22 | 
23 |     def get_batch(self): # Returns 
24 |         if self.cursor + self.batch_size > self.size:
25 |             self.cursor = 0
26 |             np.random.shuffle(self.list_img)
27 |         
28 |         mask_size = 216  
29 |         img = torch.zeros(self.batch_size, 3, 300, 300)
30 |         sal_map = torch.zeros(self.batch_size, 1, mask_size, mask_size)
31 |         targetObject = torch.zeros(self.batch_size, 3, 100, 100)
32 |         coords = torch.zeros(self.batch_size, 2) 
33 |         
34 |         #to_tensor = transform.Compose(transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0.
35 |         to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0.
36 | 
37 |         for idx in range(self.batch_size):
38 |             curr_file = self.list_img[self.cursor]
39 | 
40 |             # pdb.set_trace()
41 |             temp_index2 = curr_file.find('_')
42 |             videoName = curr_file[:temp_index2]
43 | 
44 |             imgIndex = curr_file[temp_index2+7:]
45 | 
46 |             # pdb.set_trace()
47 |             
48 |             targetObject_img_path = os.path.join(pathToResizedTargetObjectTrain, videoName + '_targetObject.jpg')
49 |             full_img_path = os.path.join(pathToResizedImagesTrain, videoName + "_image-" + imgIndex + '.jpg')
50 |             full_map_path = os.path.join(pathToResizedMapsTrain, videoName + "_mask-" + imgIndex + '.jpg')
51 |             self.cursor += 1
52 |             inputimage = cv2.imread(full_img_path) # (192,256,3)
53 | 
54 | 
55 |             # pdb.set_trace()
56 |             img[idx] = to_tensor(inputimage)
57 |             
58 |             targetObjectimage = cv2.imread(targetObject_img_path)
59 |             targetObject[idx] = to_tensor(targetObjectimage)
60 |             
61 |             saliencyimage = cv2.imread(full_map_path, 0)
62 |             saliencyimage = cv2.resize(saliencyimage, (mask_size, mask_size), interpolation=cv2.INTER_CUBIC)
63 | 
64 |             num_points = 1
65 |             possible_points = np.where(saliencyimage)
66 |             num_possible_points = possible_points[0].shape[0]
67 |             rindx = random.sample(list(range(num_possible_points)), k=min(num_points, num_possible_points))
68 |             points = []
69 |             for j in rindx:
70 |                 points.append((possible_points[0][j], possible_points[1][j]))
71 |             points = np.array(points)
72 |             coords[idx] = torch.from_numpy(points)  
73 | 
74 | 
75 |             saliencyimage = np.expand_dims(saliencyimage, axis=2)
76 |             sal_map[idx] = to_tensor(saliencyimage)
77 | 
78 |             
79 |             
80 |         return (img, sal_map, targetObject, coords)
81 | 
82 |         
83 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/logger.py:
--------------------------------------------------------------------------------
 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 2 | # Tensorboard logger for PyTorch
 3 | import tensorflow as tf
 4 | import numpy as np
 5 | import scipy.misc 
 6 | try:
 7 |     from StringIO import StringIO  # Python 2.7
 8 | except ImportError:
 9 |     from io import BytesIO         # Python 3.x
10 | 
11 | 
12 | class Logger(object):
13 | 
14 |     def __init__(self, log_dir):
15 |         """Create a summary writer logging to log_dir."""
16 |         self.writer = tf.summary.FileWriter(log_dir)
17 | 
18 |     def scalar_summary(self, tag, value, step):
19 |         """Log a scalar variable."""
20 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
21 |         self.writer.add_summary(summary, step)
22 | 
23 |     def image_summary(self, tag, images, step):
24 |         """Log a list of images."""
25 | 
26 |         img_summaries = []
27 |         for i, img in enumerate(images):
28 |             # Write the image to a string
29 |             try:
30 |                 s = StringIO()
31 |             except:
32 |                 s = BytesIO()
33 |             scipy.misc.toimage(img).save(s, format="png")
34 | 
35 | # Create an Image object
36 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
37 |                     height=img.shape[0],
38 |                     width=img.shape[1])
39 | # Create a Summary value
40 |             img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
41 | 
42 | # Create and write Summary
43 |         summary = tf.Summary(value=img_summaries)
44 |         self.writer.add_summary(summary, step)
45 | 
46 |     def histo_summary(self, tag, values, step, bins=1000):
47 |         """Log a histogram of the tensor of values."""
48 | 
49 | # Create a histogram using numpy
50 |         counts, bin_edges = np.histogram(values, bins=bins)
51 | 
52 | # Fill the fields of the histogram proto
53 |         hist = tf.HistogramProto()
54 |         hist.min = float(np.min(values))
55 |         hist.max = float(np.max(values))
56 |         hist.num = int(np.prod(values.shape))
57 |         hist.sum = float(np.sum(values))
58 |         hist.sum_squares = float(np.sum(values**2))
59 | 
60 | # Drop the start of the first bin
61 |         bin_edges = bin_edges[1:]
62 | 
63 | # Add bin edges and counts
64 |         for edge in bin_edges:
65 |             hist.bucket_limit.append(edge)
66 |         for c in counts:
67 |             hist.bucket.append(c)
68 | 
69 | # Create and write Summary
70 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
71 |         self.writer.add_summary(summary, step)
72 |         self.writer.flush()
73 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/ops.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | def conv2d(in_channels, out_channels, kernel_size = 3, padding = 1):
 4 |     return nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding)
 5 | 
 6 | def deconv2d(in_channels, out_channels, kernel_size = 3, padding = 1):
 7 |     return nn.ConvTranspose2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding)
 8 | 
 9 | def relu(inplace = True): # Change to True?
10 |     return nn.ReLU(inplace)
11 | 
12 | def maxpool2d():
13 |     return nn.MaxPool2d(2)
14 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/resnet.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch.nn as nn
  3 | from collections import OrderedDict
  4 | import torch.utils.model_zoo as model_zoo
  5 | from torchvision.models.resnet import BasicBlock, Bottleneck, model_urls
  6 | 
  7 | 
  8 | class ResNet(nn.Module):
  9 |     """ ResNet network module. Allows extracting specific feature blocks."""
 10 |     def __init__(self, block, layers, output_layers, num_classes=1000, inplanes=64):
 11 |         self.inplanes = inplanes
 12 |         super(ResNet, self).__init__()
 13 |         self.output_layers = output_layers
 14 |         self.conv1 = nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)
 15 |         self.bn1 = nn.BatchNorm2d(inplanes)
 16 |         self.relu = nn.ReLU(inplace=True)
 17 |         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
 18 |         self.layer1 = self._make_layer(block, inplanes,   layers[0])
 19 |         self.layer2 = self._make_layer(block, inplanes*2, layers[1], stride=2)
 20 |         self.layer3 = self._make_layer(block, inplanes*4, layers[2], stride=2)
 21 |         self.layer4 = self._make_layer(block, inplanes*8, layers[3], stride=2)
 22 |         # self.avgpool = nn.AvgPool2d(7, stride=1)
 23 |         self.avgpool = nn.AdaptiveAvgPool2d((1,1))
 24 |         self.fc = nn.Linear(inplanes*8 * block.expansion, num_classes)
 25 | 
 26 |         for m in self.modules():
 27 |             if isinstance(m, nn.Conv2d):
 28 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 29 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 30 |             elif isinstance(m, nn.BatchNorm2d):
 31 |                 m.weight.data.fill_(1)
 32 |                 m.bias.data.zero_()
 33 | 
 34 |     def _make_layer(self, block, planes, blocks, stride=1):
 35 |         downsample = None
 36 |         if stride != 1 or self.inplanes != planes * block.expansion:
 37 |             downsample = nn.Sequential(
 38 |                 nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False),
 39 |                 nn.BatchNorm2d(planes * block.expansion),
 40 |             )
 41 | 
 42 |         layers = []
 43 |         layers.append(block(self.inplanes, planes, stride, downsample))
 44 |         self.inplanes = planes * block.expansion
 45 |         for i in range(1, blocks):
 46 |             layers.append(block(self.inplanes, planes))
 47 | 
 48 |         return nn.Sequential(*layers)
 49 | 
 50 |     def _add_output_and_check(self, name, x, outputs, output_layers):
 51 |         if name in output_layers:
 52 |             outputs[name] = x
 53 |         return len(output_layers) == len(outputs)
 54 | 
 55 |     def forward(self, x, output_layers=None):
 56 |         """ Forward pass with input x. The output_layers specify the feature blocks which must be returned """
 57 |         # outputs = OrderedDict()
 58 | 
 59 |         # if output_layers is None:
 60 |         #     output_layers = self.output_layers
 61 | 
 62 |         x = self.conv1(x)
 63 |         x = self.bn1(x)
 64 |         x = self.relu(x)
 65 | 
 66 |         # if self._add_output_and_check('conv1', x, outputs, output_layers):
 67 |         #     return outputs
 68 | 
 69 |         x = self.maxpool(x)
 70 | 
 71 |         x = self.layer1(x)
 72 | 
 73 |         # if self._add_output_and_check('layer1', x, outputs, output_layers):
 74 |         #     return outputs
 75 | 
 76 |         x2_feat = self.layer2(x)
 77 | 
 78 |         # if self._add_output_and_check('layer2', x, outputs, output_layers):
 79 |         #     return outputs
 80 | 
 81 |         x3_feat = self.layer3(x2_feat)
 82 | 
 83 |         # if self._add_output_and_check('layer3', x, outputs, output_layers):
 84 |         #     return outputs
 85 | 
 86 |         x4_feat = self.layer4(x3_feat)
 87 | 
 88 |         # if self._add_output_and_check('layer4', x, outputs, output_layers):
 89 |         #     return outputs
 90 | 
 91 |         # x = self.avgpool(x)
 92 |         # x = x.view(x.size(0), -1)
 93 |         # x = self.fc(x)
 94 | 
 95 |         # if self._add_output_and_check('fc', x, outputs, output_layers):
 96 |         #     return outputs
 97 | 
 98 |         # if len(output_layers) == 1 and output_layers[0] == 'default':
 99 |         #     return x
100 | 
101 |         # raise ValueError('output_layer is wrong.')
102 | 
103 |         return x2_feat, x3_feat, x4_feat 
104 | 
105 | 
106 | 
107 | 
108 | 
109 | 
110 | def resnet18(output_layers=None, pretrained=True):
111 |     """Constructs a ResNet-18 model.
112 |     """
113 | 
114 |     if output_layers is None:
115 |         output_layers = ['default']
116 |     else:
117 |         for l in output_layers:
118 |             if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']:
119 |                 raise ValueError('Unknown layer: {}'.format(l))
120 | 
121 |     model = ResNet(BasicBlock, [2, 2, 2, 2], output_layers)
122 | 
123 |     if pretrained:
124 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
125 |     return model
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | def resnet50(output_layers=None, pretrained=False):
134 |     """Constructs a ResNet-50 model.
135 |     """
136 | 
137 |     if output_layers is None:
138 |         output_layers = ['default']
139 |     else:
140 |         for l in output_layers:
141 |             if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']:
142 |                 raise ValueError('Unknown layer: {}'.format(l))
143 | 
144 |     model = ResNet(Bottleneck, [3, 4, 6, 3], output_layers)
145 |     if pretrained:
146 |         model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
147 |     return model


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | import time
  6 | import os
  7 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
  8 | 
  9 | from torch.autograd import Variable
 10 | 
 11 | from data_loader import DataLoader
 12 | from generator import DC_adaIS_Generator
 13 | from utils import *
 14 | import pdb
 15 | import warnings
 16 | warnings.filterwarnings("ignore")
 17 | 
 18 | 
 19 | batch_size = 10 
 20 | lr = 1e-4
 21 | 
 22 | 
 23 | generator = DC_adaIS_Generator()
 24 | 
 25 | #### load pre-trained model 
 26 | # print("==>> Loading pre-trained model ... ")
 27 | # generator.load_state_dict(torch.load('./dynamic_global_search_region_generator.pkl'))
 28 | # # generator = torch.load('./dynamic_global_search_region_generator.pkl')
 29 | # print("==>> Done !!!")
 30 | 
 31 | if torch.cuda.is_available():
 32 |     generator.cuda()
 33 | 
 34 | criterion = nn.BCELoss()
 35 | 
 36 | 
 37 | print("===================================================================================")
 38 | print("===================================================================================")
 39 | print(generator)
 40 | print("===================================================================================")
 41 | print("===================================================================================")
 42 | 
 43 | g_optim = torch.optim.Adagrad(generator.parameters(), lr=lr)
 44 | 
 45 | num_epoch = 50 
 46 | dataloader = DataLoader(batch_size)
 47 | num_batch = 500  
 48 | print("==>> num_batch: ", num_batch)
 49 | 
 50 | 
 51 | def to_variable(x, requires_grad=True):
 52 |     if torch.cuda.is_available():
 53 |         x = x.cuda()
 54 |     return Variable(x,requires_grad)
 55 | 
 56 | counter = 0
 57 | start_time = time.time()
 58 | DIR_TO_SAVE = "./generator_output/"
 59 | if not os.path.exists(DIR_TO_SAVE):
 60 |     os.makedirs(DIR_TO_SAVE)
 61 | 
 62 | 
 63 | 
 64 | print("###################################################################################")
 65 | print("                             The Main Training Loop                                ")
 66 | print("###################################################################################")
 67 | 
 68 | generator.train()
 69 | 
 70 | for current_epoch in range(num_epoch):
 71 |     n_updates = 1
 72 |     d_cost_avg = 0
 73 |     g_cost_avg = 0
 74 |     
 75 |     for idx in range(int(num_batch)):
 76 |         (batch_img, batch_map, targetObject_img, coords) = dataloader.get_batch()
 77 |         batch_img = to_variable(batch_img, requires_grad=True)
 78 |         batch_map = to_variable(batch_map, requires_grad=False)
 79 |         targetObject_img = to_variable(targetObject_img, requires_grad=True)
 80 |         # batch_map = nn.functional.interpolate(batch_map, size=[216, 216])
 81 |         
 82 |         val_batchImg = batch_img
 83 |         val_targetObjectImg = targetObject_img
 84 |         val_coords = coords 
 85 |         
 86 |         g_optim.zero_grad()
 87 |         attention_map = generator(batch_img, targetObject_img, coords)
 88 |         
 89 |         # pdb.set_trace()
 90 |         g_gen_loss = criterion(attention_map, batch_map)
 91 |         g_loss = torch.sum(g_gen_loss)
 92 |         g_cost_avg += g_loss.item()
 93 |         g_loss.backward()
 94 |         g_optim.step()
 95 | 
 96 |         n_updates += 1
 97 | 
 98 |         if (idx+1)%100 == 0:
 99 |             print("==>> Epoch [%d/%d], Step[%d/%d], g_gen_loss: %.4f, LR: %.6f, time: %4.4f" % \
100 |                 (current_epoch, num_epoch, idx+1, num_batch, g_loss.item(), lr, time.time()-start_time))
101 |         counter += 1 
102 | 
103 |     # pdb.set_trace()
104 |     g_cost_avg /= num_batch
105 | 
106 |     # Save weights every 3 epoch
107 |     if current_epoch % 3 == 0:
108 |         print('==>> Epoch:', current_epoch, ' ==>> Train_loss->', (g_cost_avg))
109 |         torch.save(generator.state_dict(), 'generator_dcyNet_adaIS_1e4.pkl')
110 | 
111 |     # validation 
112 |     out = generator(val_batchImg, val_targetObjectImg, val_coords)
113 |     map_out = out.cpu().data.squeeze(0)
114 |     for iiidex in range(5): 
115 |        new_path = DIR_TO_SAVE + str(current_epoch) + str(iiidex) + ".jpg"
116 |        pilTrans = transforms.ToPILImage()
117 |        pilImg = pilTrans(map_out[iiidex]) 
118 |        print('==>> Image saved to ', new_path)
119 |        pilImg.save(new_path)
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/DeepMTA_code/trackers/dcynet_modules_adaptis/utils.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import torchvision.transforms as transforms
  3 | import numpy as np
  4 | import matplotlib.pyplot as plt
  5 | import torch
  6 | from torch.autograd import Variable
  7 | import torch.nn as nn
  8 | import pdb 
  9 | 
 10 | from PIL import Image
 11 | 
 12 | def to_variable(x,requires_grad=True):
 13 |     if torch.cuda.is_available():
 14 |         x = x.cuda()
 15 |     return Variable(x,requires_grad)
 16 | 
 17 | def show(img):
 18 |     #print(img.shape)
 19 |     pilTrans = transforms.ToPILImage()
 20 |     pilImg = pilTrans(img)
 21 |     s = np.array(pilImg)
 22 |     plt.figure()
 23 |     plt.imshow(s)
 24 |     
 25 | def show_gray(img):
 26 |     print(img.shape)
 27 |     pilTrans = transforms.ToPILImage()
 28 |     pilImg = pilTrans(img)
 29 |     s = np.array(pilImg)
 30 |     plt.figure()
 31 |     plt.imshow(s)
 32 |     
 33 | def save_gray(img, path):
 34 |     pilTrans = transforms.ToPILImage()
 35 |     pilImg = pilTrans(img)
 36 |     print('Image saved to ', path)
 37 |     pilImg.save(path)
 38 | 
 39 | 
 40 | 
 41 | 
 42 | def predict(model, img, validation_targetObject):
 43 |     to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0.
 44 |     im = to_tensor(img)
 45 |     val_targetObject = to_tensor(validation_targetObject)
 46 |     #show(im)
 47 |     inp = to_variable(im.unsqueeze(0), False)
 48 |     inp = nn.functional.interpolate(inp, size=[300, 300])
 49 | 
 50 |     val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False) 
 51 |     val_targetObject_ = nn.functional.interpolate(val_targetObject_, size=[100, 100]) 
 52 | 
 53 |     #print(inp.size())
 54 | 
 55 |     out = model(inp, val_targetObject_)
 56 |     out = nn.functional.interpolate(out, size=[im.shape[1], im.shape[2]]) 
 57 | 
 58 |     map_out = out.cpu().data.squeeze(0)
 59 |     pilTrans = transforms.ToPILImage()
 60 |     pilImg = pilTrans(map_out)
 61 |     dynamic_atttentonMAP = np.asarray(pilImg)
 62 | 
 63 |     return dynamic_atttentonMAP 
 64 | 
 65 |     
 66 | # def predict(model, img, validation_targetObject, epoch, path):
 67 | #     to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0.
 68 | #     im = to_tensor(img)
 69 | #     val_targetObject = to_tensor(validation_targetObject)
 70 | #     #show(im)
 71 | #     inp = to_variable(im.unsqueeze(0), False)
 72 | #     val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False)
 73 | #     #print(inp.size())
 74 | #     out = model(inp, val_targetObject_)
 75 | #     map_out = out.cpu().data.squeeze(0)
 76 | #     #show_gray(map_out)
 77 |     
 78 | #     new_path = path + str(epoch) + ".jpg"
 79 | #     save_gray(map_out, new_path)
 80 |     
 81 | #     #s = np.array(Image.open(new_path))
 82 | #     #plt.figure()
 83 | #     #plt.imshow(s)
 84 | 
 85 | 
 86 | 
 87 | 
 88 |     # out = generator(val_batchImg, val_targetObjectImg, val_coords)
 89 |     # map_out = out.cpu().data.squeeze(0)
 90 |     # for iiidex in range(5): 
 91 |     #    new_path = DIR_TO_SAVE + str(current_epoch) + str(iiidex) + ".jpg"
 92 |     #    pilTrans = transforms.ToPILImage()
 93 |     #    pilImg = pilTrans(map_out[iiidex]) 
 94 |     #    print('==>> Image saved to ', new_path)
 95 |     #    pilImg.save(new_path)
 96 | 
 97 | 
 98 | # def predict(model, img, validation_targetObject, val_coords):
 99 | #     to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0.
100 | #     im = to_tensor(img)
101 | #     val_targetObject = to_tensor(validation_targetObject)
102 | #     inp = to_variable(im.unsqueeze(0), False)
103 | #     inp = nn.functional.interpolate(inp, size=[300, 300])
104 | 
105 | #     val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False) 
106 | #     val_targetObject_ = nn.functional.interpolate(val_targetObject_, size=[100, 100]) 
107 | 
108 | #     #print(inp.size())
109 | #     out = model(inp, val_targetObject_)
110 | #     out = nn.functional.interpolate(out, size=[im.shape[1], im.shape[2]]) 
111 | 
112 | #     map_out = out.cpu().data.squeeze(0)
113 | #     pilTrans = transforms.ToPILImage()
114 | #     pilImg = pilTrans(map_out)
115 | #     dynamic_atttentonMAP = np.asarray(pilImg)
116 | #     return dynamic_atttentonMAP 
117 | 
118 | 
119 | 


--------------------------------------------------------------------------------
/DeepMTA_code/train_traj_measure_net.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import torch
  3 | import torch.nn as nn
  4 | import numpy as np
  5 | import time
  6 | import os
  7 | os.environ["CUDA_VISIBLE_DEVICES"]="0"
  8 | 
  9 | from network import traj_critic, axis_aligned_iou   
 10 | import torchvision 
 11 | import cv2 
 12 | import pdb 
 13 | import warnings
 14 | warnings.filterwarnings("ignore")
 15 | import torchvision.transforms as transforms
 16 | to_tensor = transforms.ToTensor()
 17 | 
 18 | 
 19 | got10k_dataset_path = "./data/GOT10k_train_val/"
 20 | result_path = "./benchmark/results/GOT10k_train_val/Tracker/"
 21 | attentionMap_path = "./temp_DIR_TO_SAVE_static_Global_attentionMap/"
 22 | 
 23 | 
 24 | # batchSize = 20 
 25 | lr = 1e-3    
 26 | num_epoch = 5000  
 27 | clip_len = 10 
 28 | img_size = 300   
 29 | 
 30 | traj_critic_net = traj_critic()
 31 | traj_critic_net = traj_critic_net.cuda() 
 32 | 
 33 | optimizer  = torch.optim.Adagrad(traj_critic_net.parameters(), lr=lr)
 34 | loss_fn    = torch.nn.L1Loss().cuda() 
 35 | videoFiles = os.listdir(attentionMap_path) 
 36 | 
 37 | traj_critic_net.train() 
 38 | 
 39 | 
 40 | #########################################################################################################
 41 | ####                                         The Main Loop 
 42 | #########################################################################################################
 43 | 
 44 | for epochID in range(num_epoch): 
 45 | 
 46 |     epoch_totalLoss = 0
 47 | 
 48 |     for videoIndex in range(len(videoFiles)): 
 49 |         videoName = videoFiles[videoIndex] 
 50 |         
 51 |         result1_path = result_path + videoName + "/" + videoName + "_001.txt"
 52 |         result2_path = result_path + videoName + "/" + videoName + "_002.txt"
 53 |         gt_path      = result_path + videoName + "/" + videoName + "_groundtruth.txt"
 54 |         local_score_path  = result_path + videoName + "/" + videoName + "_scoreGlobal.txt" 
 55 |         global_score_path = result_path + videoName + "/" + videoName + "_scoreLocal.txt"
 56 |         
 57 |         #### tracking results and score. 
 58 |         tracking_result1 = np.loadtxt(result1_path, delimiter=',')  ## (90, 4) 
 59 |         tracking_result2 = np.loadtxt(result2_path, delimiter=',')  ## (90, 4) 
 60 |         gt               = np.loadtxt(gt_path, delimiter=',')       ## (90, 4) 
 61 |         local_score      = torch.from_numpy(np.loadtxt(local_score_path))             ## (90,) 
 62 |         global_score     = torch.from_numpy(np.loadtxt(global_score_path))            ## (90,) 
 63 | 
 64 |         local_score      = torch.unsqueeze(local_score, dim=1)
 65 |         global_score     = torch.unsqueeze(global_score, dim=1)
 66 |         
 67 |         #### image and attention maps. 
 68 |         imgFiles = os.listdir(got10k_dataset_path + videoName + "/")
 69 |         imgFiles = np.sort(imgFiles) 
 70 | 
 71 |         attentionFiles = os.listdir(attentionMap_path + videoName + "/") 
 72 |         attentionFiles = np.sort(attentionFiles) 
 73 | 
 74 |         init_imgPath = got10k_dataset_path + videoName + "/" + imgFiles[0]  
 75 |         init_image = cv2.imread(init_imgPath)
 76 |         init_bbox = gt[0] 
 77 | 
 78 |         # pdb.set_trace() 
 79 |         init_target = init_image[int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]),  :]
 80 |         init_target = cv2.resize(init_target, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
 81 |         # cv2.imwrite('./init_target.png', init_target) 
 82 |         # cv2.imwrite('./init_image.png', init_image)
 83 | 
 84 |         startIndex = np.random.random_integers(len(attentionFiles) - clip_len) 
 85 | 
 86 |         image_list      = torch.zeros(clip_len, 3, img_size, img_size)
 87 |         initTarget_list = torch.zeros(clip_len, 3, img_size, img_size)
 88 |         attMap_list     = torch.zeros(clip_len, 3, img_size, img_size) 
 89 |         targetImg1_list = torch.zeros(clip_len, 3, img_size, img_size)
 90 |         targetImg2_list = torch.zeros(clip_len, 3, img_size, img_size)
 91 |         targetMap1_list = torch.zeros(clip_len, 3, img_size, img_size)
 92 |         targetMap2_list = torch.zeros(clip_len, 3, img_size, img_size)
 93 | 
 94 |         trajScore_list1 = torch.zeros(clip_len, 1)
 95 |         trajScore_list2 = torch.zeros(clip_len, 1)
 96 | 
 97 |         trajBBox_list1  = torch.zeros(clip_len, 4)
 98 |         trajBBox_list2  = torch.zeros(clip_len, 4)
 99 | 
100 |         IoU_score_1 = 0
101 |         IoU_score_2 = 0     
102 | 
103 |         #########################################################################################################
104 |         ####                                            Load Batch data 
105 |         #########################################################################################################
106 |         count = 0 
107 |         startIndex = 0 
108 |         for INdex in range(startIndex, startIndex+clip_len):  
109 |             imgPath = got10k_dataset_path + videoName + "/" + imgFiles[INdex]  
110 |             image = cv2.imread(imgPath)
111 |             
112 |             imgIndex = int(imgFiles[INdex][:-4])  
113 |             attMap_path = attentionMap_path + videoName + "/" + str(imgIndex)+"_dynamic_atttentonMAP_adaptIS.png" 
114 |             attMap = cv2.imread(attMap_path)    ## (720, 1280, 3) 
115 |             
116 |             gt_curr        = gt[INdex] 
117 |             result_curr1   = tracking_result1[INdex] 
118 |             result_curr2   = tracking_result2[INdex] 
119 | 
120 |             #### BBox normalization 
121 |             result_curr1[0] = max(0,  min(image.shape[1], result_curr1[0]))
122 |             result_curr1[1] = max(0,  min(image.shape[0], result_curr1[1]))
123 |             result_curr1[2] = max(10, min(image.shape[1], result_curr1[2]))
124 |             result_curr1[3] = max(10, min(image.shape[0], result_curr1[3]))
125 | 
126 |             result_curr2[0] = max(0,  min(image.shape[1], result_curr2[0]))
127 |             result_curr2[1] = max(0,  min(image.shape[0], result_curr2[1]))
128 |             result_curr2[2] = max(10, min(image.shape[1], result_curr2[2]))
129 |             result_curr2[3] = max(10, min(image.shape[0], result_curr2[3]))
130 | 
131 | 
132 |             targetImg1     = image[int(result_curr1[1]):int(result_curr1[1]+result_curr1[3]), int(result_curr1[0]):int(result_curr1[0]+result_curr1[2]), :]  
133 |             targetImg2     = image[int(result_curr2[1]):int(result_curr2[1]+result_curr2[3]), int(result_curr2[0]):int(result_curr2[0]+result_curr2[2]), :] 
134 |             tagetattMap1   = attMap[int(result_curr1[1]):int(result_curr1[1]+result_curr1[3]), int(result_curr1[0]):int(result_curr1[0]+result_curr1[2]), :]  
135 |             tagetattMap2   = attMap[int(result_curr2[1]):int(result_curr2[1]+result_curr2[3]), int(result_curr2[0]):int(result_curr2[0]+result_curr2[2]), :] 
136 | 
137 |             trajScore1 = local_score[INdex] 
138 |             trajScore2 = global_score[INdex] 
139 | 
140 |             #### Normalization 
141 |             image        = cv2.resize(image, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 
142 |             attMap       = cv2.resize(attMap, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 
143 |             targetImg1   = cv2.resize(targetImg1, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 
144 |             targetImg2   = cv2.resize(targetImg2, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
145 |             tagetattMap1 = cv2.resize(tagetattMap1, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 
146 |             tagetattMap2 = cv2.resize(tagetattMap2, (img_size, img_size), interpolation=cv2.INTER_CUBIC)
147 | 
148 |             # cv2.imwrite('./image.png', image) 
149 |             # cv2.imwrite('./attMap.png', attMap) 
150 |             # cv2.imwrite('./targetImg1.png', targetImg1) 
151 |             # cv2.imwrite('./targetImg2.png', targetImg2) 
152 |             # cv2.imwrite('./tagetattMap1.png', tagetattMap1) 
153 |             # cv2.imwrite('./tagetattMap2.png', tagetattMap2) 
154 | 
155 |             # pdb.set_trace() 
156 | 
157 |             image_list[count]      = to_tensor(image)  
158 |             attMap_list[count]     = to_tensor(attMap)
159 |             targetImg1_list[count] = to_tensor(targetImg1)
160 |             targetImg2_list[count] = to_tensor(targetImg2)
161 |             targetMap1_list[count] = to_tensor(tagetattMap1)
162 |             targetMap2_list[count] = to_tensor(tagetattMap2)
163 |             initTarget_list[count] = to_tensor(init_target)
164 | 
165 |             trajBBox_list1[count] = torch.from_numpy(result_curr1) 
166 |             trajBBox_list2[count] = torch.from_numpy(result_curr2) 
167 | 
168 |             # pdb.set_trace()  
169 | 
170 |             trajScore_list1[count] = trajScore1
171 |             trajScore_list2[count] = trajScore2   
172 | 
173 |             count = count + 1 
174 |             
175 | 
176 |             #### Calculate the GIoU score 
177 |             gt_curr[2] = gt_curr[0]+gt_curr[2];                         gt_curr[3] = gt_curr[1]+gt_curr[3]
178 |             result_curr1[2] = result_curr1[0]+result_curr1[2];          result_curr1[3] = result_curr1[1]+result_curr1[3]
179 |             result_curr2[2] = result_curr2[0]+result_curr2[2];          result_curr2[3] = result_curr2[1]+result_curr2[3]
180 | 
181 |             IoU_score_1 = IoU_score_1 + axis_aligned_iou(gt_curr, result_curr1) 
182 |             IoU_score_2 = IoU_score_2 + axis_aligned_iou(gt_curr, result_curr2) 
183 | 
184 |             # pdb.set_trace() 
185 |         
186 |         optimizer.zero_grad() 
187 |         pred_traj_score1 = traj_critic_net(image_list, attMap_list, targetImg1_list, targetMap1_list, initTarget_list, trajBBox_list1, trajScore_list1)
188 |         pred_traj_score2 = traj_critic_net(image_list, attMap_list, targetImg2_list, targetMap2_list, initTarget_list, trajBBox_list2, trajScore_list2)
189 | 
190 |         IoU_score_1 = torch.from_numpy(np.array(IoU_score_1)).float().cuda() 
191 |         IoU_score_2 = torch.from_numpy(np.array(IoU_score_2)).float().cuda() 
192 |         traj_loss1 = loss_fn(pred_traj_score1, IoU_score_1)
193 |         traj_loss2 = loss_fn(pred_traj_score2, IoU_score_2)
194 |         total_loss = traj_loss1 + traj_loss2 
195 | 
196 | 
197 |         # print('Epoch:', epochID, "     video: ", videoName, "     loss:", total_loss.item())
198 | 
199 |         # backward + optimize
200 |         total_loss.backward()
201 |         optimizer.step()
202 | 
203 | 
204 |         epoch_totalLoss = epoch_totalLoss + total_loss.item()
205 |     # Save weights
206 |     if epochID % 50 == 0:
207 |         print('==>> Epoch:', epochID, ' ==>> Train_loss->', epoch_totalLoss) 
208 |         checkpointName = str(epochID) + "_traj_critic_net.pkl" 
209 |         torch.save(traj_critic_net.state_dict(), './traj_measure_model_checkoints/'+checkpointName)
210 | 
211 | 
212 | 
213 | 
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | 
233 | 
234 | 
235 | 
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | 
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 
271 | 
272 | 
273 | 
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 
288 | 
289 | 
290 | 
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | 
301 | 
302 | 
303 | 
304 | 
305 | 
306 | 
307 | 
308 | 
309 | 
310 | 
311 | 
312 | 
313 | 
314 | 
315 | 
316 | 
317 | 
318 | 
319 | 
320 | 
321 | 
322 | 
323 | 
324 | 
325 | 
326 | 
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 
334 | 
335 | 
336 | 
337 | 
338 | 
339 | 
340 | 
341 | 
342 | 
343 | 
344 | 
345 | 
346 | 
347 | 
348 | 
349 | 
350 | 
351 | 


--------------------------------------------------------------------------------
/DeepMTA_code/webcam_demo.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # THOR
  3 | # Licensed under The MIT License
  4 | # Written by Axel Sauer (axel.sauer@tum.de)
  5 | # --------------------------------------------------------
  6 | 
  7 | import pdb
  8 | import argparse, cv2, os
  9 | import numpy as np
 10 | import sys
 11 | from imutils.video import FPS
 12 | import json
 13 | 
 14 | from trackers.tracker import SiamFC_Tracker, SiamRPN_Tracker, SiamMask_Tracker
 15 | from benchmark.bench_utils.bbox_helper import cxy_wh_2_rect, xyxy_to_xywh
 16 | 
 17 | # constants
 18 | BRIGHTGREEN = [102, 255, 0]
 19 | RED = [0, 0, 255]
 20 | YELLOW = [0, 255, 255]
 21 | np.set_printoptions(precision=6, suppress=True)
 22 | 
 23 | OUTPUT_WIDTH = 740
 24 | OUTPUT_HEIGHT = 555
 25 | PADDING = 2
 26 | 
 27 | parser = argparse.ArgumentParser(description='Webcam Test')
 28 | parser.add_argument('-t', '--tracker', dest='tracker', required=True,
 29 |                     help='Name of the tracker [SiamFC, SiamRPN, SiamMask]')
 30 | parser.add_argument('--vanilla', action='store_true',
 31 |                     help='run the tracker without memory')
 32 | parser.add_argument('-v', '--viz', action='store_true',
 33 |                     help='whether visualize result')
 34 | parser.add_argument('--verbose', action='store_true',
 35 |                     help='print info about temp mem')
 36 | parser.add_argument('--lb_type', type=str, default='ensemble',
 37 |                     help='Specify the type of lower bound [dynamic, ensemble]')
 38 | 
 39 | drawnBox = np.zeros(4)
 40 | boxToDraw = np.zeros(4)
 41 | mousedown = False
 42 | mouseupdown = False
 43 | initialize = False
 44 | 
 45 | def on_mouse(event, x, y, flags, params):
 46 |     global mousedown, mouseupdown, drawnBox, boxToDraw, initialize, boxToDraw_xywh
 47 |     if event == cv2.EVENT_LBUTTONDOWN:
 48 |         drawnBox[[0,2]] = x
 49 |         drawnBox[[1,3]] = y
 50 |         mousedown = True
 51 |         mouseupdown = False
 52 |     elif mousedown and event == cv2.EVENT_MOUSEMOVE:
 53 |         drawnBox[2] = x
 54 |         drawnBox[3] = y
 55 |     elif event == cv2.EVENT_LBUTTONUP:
 56 |         drawnBox[2] = x
 57 |         drawnBox[3] = y
 58 |         mousedown = False
 59 |         mouseupdown = True
 60 |         initialize = True
 61 |     boxToDraw = drawnBox.copy()
 62 |     boxToDraw[[0, 2]] = np.sort(boxToDraw[[0, 2]])
 63 |     boxToDraw[[1, 3]] = np.sort(boxToDraw[[1, 3]])
 64 |     boxToDraw_xywh = xyxy_to_xywh(boxToDraw)
 65 | 
 66 | def bb_on_im(im, location, mask):
 67 |     location = [int(l) for l in location]  #
 68 | 
 69 |     if len(mask):
 70 |         im[:, :, 2] = mask * 255 + (1 - mask) * im[:, :, 2]
 71 | 
 72 |     # prediction
 73 |     cv2.rectangle(im, (location[0], location[1]),
 74 |                   (location[0] + location[2], location[1] + location[3]),
 75 |                   (0, 255, 255), 3)
 76 | 
 77 |     return im
 78 | 
 79 | def show_webcam(tracker, mirror=False, viz=False):
 80 |     global initialize
 81 | 
 82 |     vs = cv2.VideoCapture(0)
 83 |     cv2.namedWindow('Webcam', cv2.WINDOW_NORMAL)
 84 |     cv2.resizeWindow('Webcam', OUTPUT_WIDTH, OUTPUT_HEIGHT)
 85 |     cv2.setMouseCallback('Webcam', on_mouse, 0)
 86 | 
 87 |     outputBoxToDraw = None
 88 |     bbox = None
 89 |     fps = None
 90 |     state = None
 91 |     mask = []
 92 | 
 93 |     # loop over video stream ims
 94 |     while True:
 95 |         _, im = vs.read()
 96 | 
 97 |         if mirror:
 98 |             im = cv2.flip(im, 1)
 99 | 
100 |         if mousedown:
101 |             (x1, y1, x2, y2) = [int(l) for l in boxToDraw]
102 |             cv2.rectangle(im, (x1, y1), (x2, y2),
103 |                           BRIGHTGREEN, PADDING)
104 | 
105 |         elif mouseupdown:
106 |             if initialize:
107 |                 init_pos = boxToDraw_xywh[[0, 1]]
108 |                 init_sz = boxToDraw_xywh[[2, 3]]
109 |                 state = tracker.setup(im, init_pos, init_sz)
110 |                 initialize = False
111 |                 fps = FPS().start()
112 |             else:
113 |                 state = tracker.track(im, state)
114 |                 location = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
115 |                 (cx, cy, w, h) = [int(l) for l in location]
116 | 
117 |                 fps.update()
118 |                 fps.stop()
119 | 
120 |                 # Display the image
121 |                 info = [
122 |                     ("Score:", f"{state['score']:.4f}"),
123 |                     ("FPS:", f"{fps.fps():.2f}"),
124 |                 ]
125 | 
126 |                 if not state['score'] > 0.8:
127 |                     info.insert(0, ("Object lost since", ""))
128 |                 else:
129 |                     if 'mask' in state.keys():
130 |                         mask = state['mask'] > state['p'].seg_thr
131 |                     im = bb_on_im(im, location, mask)
132 | 
133 |                 for (i, (k, v)) in enumerate(info):
134 |                     text = "{}: {}".format(k, v)
135 |                     cv2.putText(im, text, (10, OUTPUT_HEIGHT - ((i * 20) + 20)),
136 |                                 cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
137 | 
138 |         cv2.imshow("Webcam", im)
139 | 
140 |         # check for escape key
141 |         key = cv2.waitKey(1)
142 |         if key==27 or key==1048603:
143 |             break
144 | 
145 |     # release the pointer
146 |     cv2.destroyAllWindows()
147 | 
148 | def load_cfg(args):
149 |     json_path = f"configs/{args.tracker}/VOT2018_"
150 |     if args.vanilla:
151 |         json_path += "vanilla.json"
152 |     else:
153 |         json_path += f"THOR_{args.lb_type}.json"
154 |     cfg = json.load(open(json_path))
155 |     return cfg
156 | 
157 | if __name__ == '__main__':
158 |     args = parser.parse_args()
159 | 
160 |     cfg = load_cfg(args)
161 |     cfg['THOR']['viz'] = args.viz
162 |     cfg['THOR']['verbose'] = args.verbose
163 | 
164 |     print("[INFO] Initializing the tracker.")
165 |     if args.tracker == 'SiamFC':
166 |         tracker = SiamFC_Tracker(cfg)
167 |     elif args.tracker == 'SiamRPN':
168 |         tracker = SiamRPN_Tracker(cfg)
169 |     elif args.tracker == 'SiamMask':
170 |         tracker = SiamMask_Tracker(cfg)
171 |     elif args.tracker == 'SiamRPN_PP':
172 |         tracker = SiamRPN_PP_Tracker(cfg)
173 |     else:
174 |         raise ValueError(f"Tracker {args.tracker} does not exist.")
175 | 
176 |     print("[INFO] Starting video stream.")
177 |     show_webcam(tracker, mirror=True, viz=args.viz)
178 | 


--------------------------------------------------------------------------------
/GOT10K_dataset_video_list/01_mask_prepreocessing.m:
--------------------------------------------------------------------------------
 1 | %% 
 2 | clc; clear all; close all; warning off;
 3 | 
 4 | path = '/media/wangxiao/4T_wangxiao/GOT-10K_dataset/train/';
 5 | files = dir(path);
 6 | files = files(3:end);
 7 | 
 8 | for i = 1:size(files, 1)
 9 |     
10 |     disp(['==>> deal with ', num2str(i), '/', num2str(size(files, 1))]);
11 |     
12 |     videoName = files(i).name;
13 |     imgPath = [path videoName '/'];
14 |     firstFrame = imread([imgPath '00000001.jpg']);
15 |     
16 |     gt_name = ['groundtruth.txt'];
17 |     gt_file = importdata([path videoName '/' gt_name]);
18 |     initial_BBox = gt_file(1, :);
19 |     target_Object = imcrop(firstFrame, initial_BBox);
20 |     target_Object = imresize(target_Object, [320 640]);
21 |     savePath = [path videoName '/'];
22 |     imwrite(target_Object, [savePath, 'init_targetObject.png']);
23 |     
24 |     maskSavePath = [path videoName '/resizedImage/'];
25 |     mkdir(maskSavePath);
26 |     imgfiles = dir([imgPath, '*.jpg']);
27 |     for j=1:size(imgfiles, 1)
28 |         image = imread([imgPath imgfiles(j).name]);
29 |         image = imresize(image, [320 640]);
30 |         imwrite(image, fullfile(maskSavePath, imgfiles(j).name),'jpg');
31 |     end
32 |     
33 |     
34 |     maskSavePath = [path videoName '/mask_imgs/'];
35 |     mkdir(maskSavePath);
36 |     imgfiles = dir([imgPath, '*.jpg']);
37 |     for j=1:size(imgfiles, 1)
38 |         image = imread([imgPath imgfiles(j).name]);
39 |         
40 |         BBox = gt_file(j, :);
41 |         
42 |         if BBox(1) <= 0 BBox(1)=1; end
43 |         if BBox(2) <= 0 BBox(2)=1; end
44 |         if BBox(3) <= 0 BBox(3)=1; end
45 |         if BBox(4) <= 0 BBox(4)=1; end
46 |         
47 |         BinaryMap = zeros(size(image, 1), size(image, 2));
48 |         for iidex = floor(BBox(1)):floor(BBox(1)+BBox(3))
49 |             for jidex = floor(BBox(2)):floor(BBox(2)+BBox(4))
50 |                 BinaryMap(jidex, iidex) = 255;
51 |             end
52 |         end
53 |         BinaryMap = imresize(BinaryMap, [320, 640]); 
54 |         imwrite(BinaryMap, fullfile(maskSavePath, imgfiles(j).name),'jpg');
55 |         
56 |         
57 |         
58 |     end
59 |     
60 |     
61 | end
62 | 
63 | 
64 | 
65 | 
66 | 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/GOT10K_dataset_video_list/GOT10K_dataset_test_video_list.txt:
--------------------------------------------------------------------------------
  1 | GOT-10k_Test_000001
  2 | GOT-10k_Test_000002
  3 | GOT-10k_Test_000003
  4 | GOT-10k_Test_000004
  5 | GOT-10k_Test_000005
  6 | GOT-10k_Test_000006
  7 | GOT-10k_Test_000007
  8 | GOT-10k_Test_000008
  9 | GOT-10k_Test_000009
 10 | GOT-10k_Test_000010
 11 | GOT-10k_Test_000011
 12 | GOT-10k_Test_000012
 13 | GOT-10k_Test_000013
 14 | GOT-10k_Test_000014
 15 | GOT-10k_Test_000015
 16 | GOT-10k_Test_000016
 17 | GOT-10k_Test_000017
 18 | GOT-10k_Test_000018
 19 | GOT-10k_Test_000019
 20 | GOT-10k_Test_000020
 21 | GOT-10k_Test_000021
 22 | GOT-10k_Test_000022
 23 | GOT-10k_Test_000023
 24 | GOT-10k_Test_000024
 25 | GOT-10k_Test_000025
 26 | GOT-10k_Test_000026
 27 | GOT-10k_Test_000027
 28 | GOT-10k_Test_000028
 29 | GOT-10k_Test_000029
 30 | GOT-10k_Test_000030
 31 | GOT-10k_Test_000031
 32 | GOT-10k_Test_000032
 33 | GOT-10k_Test_000033
 34 | GOT-10k_Test_000034
 35 | GOT-10k_Test_000035
 36 | GOT-10k_Test_000036
 37 | GOT-10k_Test_000037
 38 | GOT-10k_Test_000038
 39 | GOT-10k_Test_000039
 40 | GOT-10k_Test_000040
 41 | GOT-10k_Test_000041
 42 | GOT-10k_Test_000042
 43 | GOT-10k_Test_000043
 44 | GOT-10k_Test_000044
 45 | GOT-10k_Test_000045
 46 | GOT-10k_Test_000046
 47 | GOT-10k_Test_000047
 48 | GOT-10k_Test_000048
 49 | GOT-10k_Test_000049
 50 | GOT-10k_Test_000050
 51 | GOT-10k_Test_000051
 52 | GOT-10k_Test_000052
 53 | GOT-10k_Test_000053
 54 | GOT-10k_Test_000054
 55 | GOT-10k_Test_000055
 56 | GOT-10k_Test_000056
 57 | GOT-10k_Test_000057
 58 | GOT-10k_Test_000058
 59 | GOT-10k_Test_000059
 60 | GOT-10k_Test_000060
 61 | GOT-10k_Test_000061
 62 | GOT-10k_Test_000062
 63 | GOT-10k_Test_000063
 64 | GOT-10k_Test_000064
 65 | GOT-10k_Test_000065
 66 | GOT-10k_Test_000066
 67 | GOT-10k_Test_000067
 68 | GOT-10k_Test_000068
 69 | GOT-10k_Test_000069
 70 | GOT-10k_Test_000070
 71 | GOT-10k_Test_000071
 72 | GOT-10k_Test_000072
 73 | GOT-10k_Test_000073
 74 | GOT-10k_Test_000074
 75 | GOT-10k_Test_000075
 76 | GOT-10k_Test_000076
 77 | GOT-10k_Test_000077
 78 | GOT-10k_Test_000078
 79 | GOT-10k_Test_000079
 80 | GOT-10k_Test_000080
 81 | GOT-10k_Test_000081
 82 | GOT-10k_Test_000082
 83 | GOT-10k_Test_000083
 84 | GOT-10k_Test_000084
 85 | GOT-10k_Test_000085
 86 | GOT-10k_Test_000086
 87 | GOT-10k_Test_000087
 88 | GOT-10k_Test_000088
 89 | GOT-10k_Test_000089
 90 | GOT-10k_Test_000090
 91 | GOT-10k_Test_000091
 92 | GOT-10k_Test_000092
 93 | GOT-10k_Test_000093
 94 | GOT-10k_Test_000094
 95 | GOT-10k_Test_000095
 96 | GOT-10k_Test_000096
 97 | GOT-10k_Test_000097
 98 | GOT-10k_Test_000098
 99 | GOT-10k_Test_000099
100 | GOT-10k_Test_000100
101 | GOT-10k_Test_000101
102 | GOT-10k_Test_000102
103 | GOT-10k_Test_000103
104 | GOT-10k_Test_000104
105 | GOT-10k_Test_000105
106 | GOT-10k_Test_000106
107 | GOT-10k_Test_000107
108 | GOT-10k_Test_000108
109 | GOT-10k_Test_000109
110 | GOT-10k_Test_000110
111 | GOT-10k_Test_000111
112 | GOT-10k_Test_000112
113 | GOT-10k_Test_000113
114 | GOT-10k_Test_000114
115 | GOT-10k_Test_000115
116 | GOT-10k_Test_000116
117 | GOT-10k_Test_000117
118 | GOT-10k_Test_000118
119 | GOT-10k_Test_000119
120 | GOT-10k_Test_000120
121 | GOT-10k_Test_000121
122 | GOT-10k_Test_000122
123 | GOT-10k_Test_000123
124 | GOT-10k_Test_000124
125 | GOT-10k_Test_000125
126 | GOT-10k_Test_000126
127 | GOT-10k_Test_000127
128 | GOT-10k_Test_000128
129 | GOT-10k_Test_000129
130 | GOT-10k_Test_000130
131 | GOT-10k_Test_000131
132 | GOT-10k_Test_000132
133 | GOT-10k_Test_000133
134 | GOT-10k_Test_000134
135 | GOT-10k_Test_000135
136 | GOT-10k_Test_000136
137 | GOT-10k_Test_000137
138 | GOT-10k_Test_000138
139 | GOT-10k_Test_000139
140 | GOT-10k_Test_000140
141 | GOT-10k_Test_000141
142 | GOT-10k_Test_000142
143 | GOT-10k_Test_000143
144 | GOT-10k_Test_000144
145 | GOT-10k_Test_000145
146 | GOT-10k_Test_000146
147 | GOT-10k_Test_000147
148 | GOT-10k_Test_000148
149 | GOT-10k_Test_000149
150 | GOT-10k_Test_000150
151 | GOT-10k_Test_000151
152 | GOT-10k_Test_000152
153 | GOT-10k_Test_000153
154 | GOT-10k_Test_000154
155 | GOT-10k_Test_000155
156 | GOT-10k_Test_000156
157 | GOT-10k_Test_000157
158 | GOT-10k_Test_000158
159 | GOT-10k_Test_000159
160 | GOT-10k_Test_000160
161 | GOT-10k_Test_000161
162 | GOT-10k_Test_000162
163 | GOT-10k_Test_000163
164 | GOT-10k_Test_000164
165 | GOT-10k_Test_000165
166 | GOT-10k_Test_000166
167 | GOT-10k_Test_000167
168 | GOT-10k_Test_000168
169 | GOT-10k_Test_000169
170 | GOT-10k_Test_000170
171 | GOT-10k_Test_000171
172 | GOT-10k_Test_000172
173 | GOT-10k_Test_000173
174 | GOT-10k_Test_000174
175 | GOT-10k_Test_000175
176 | GOT-10k_Test_000176
177 | GOT-10k_Test_000177
178 | GOT-10k_Test_000178
179 | GOT-10k_Test_000179
180 | GOT-10k_Test_000180


--------------------------------------------------------------------------------
/GOT10K_dataset_video_list/GOT10K_dataset_val_video_list.txt:
--------------------------------------------------------------------------------
  1 | GOT-10k_Val_000001
  2 | GOT-10k_Val_000002
  3 | GOT-10k_Val_000003
  4 | GOT-10k_Val_000004
  5 | GOT-10k_Val_000005
  6 | GOT-10k_Val_000006
  7 | GOT-10k_Val_000007
  8 | GOT-10k_Val_000008
  9 | GOT-10k_Val_000009
 10 | GOT-10k_Val_000010
 11 | GOT-10k_Val_000011
 12 | GOT-10k_Val_000012
 13 | GOT-10k_Val_000013
 14 | GOT-10k_Val_000014
 15 | GOT-10k_Val_000015
 16 | GOT-10k_Val_000016
 17 | GOT-10k_Val_000017
 18 | GOT-10k_Val_000018
 19 | GOT-10k_Val_000019
 20 | GOT-10k_Val_000020
 21 | GOT-10k_Val_000021
 22 | GOT-10k_Val_000022
 23 | GOT-10k_Val_000023
 24 | GOT-10k_Val_000024
 25 | GOT-10k_Val_000025
 26 | GOT-10k_Val_000026
 27 | GOT-10k_Val_000027
 28 | GOT-10k_Val_000028
 29 | GOT-10k_Val_000029
 30 | GOT-10k_Val_000030
 31 | GOT-10k_Val_000031
 32 | GOT-10k_Val_000032
 33 | GOT-10k_Val_000033
 34 | GOT-10k_Val_000034
 35 | GOT-10k_Val_000035
 36 | GOT-10k_Val_000036
 37 | GOT-10k_Val_000037
 38 | GOT-10k_Val_000038
 39 | GOT-10k_Val_000039
 40 | GOT-10k_Val_000040
 41 | GOT-10k_Val_000041
 42 | GOT-10k_Val_000042
 43 | GOT-10k_Val_000043
 44 | GOT-10k_Val_000044
 45 | GOT-10k_Val_000045
 46 | GOT-10k_Val_000046
 47 | GOT-10k_Val_000047
 48 | GOT-10k_Val_000048
 49 | GOT-10k_Val_000049
 50 | GOT-10k_Val_000050
 51 | GOT-10k_Val_000051
 52 | GOT-10k_Val_000052
 53 | GOT-10k_Val_000053
 54 | GOT-10k_Val_000054
 55 | GOT-10k_Val_000055
 56 | GOT-10k_Val_000056
 57 | GOT-10k_Val_000057
 58 | GOT-10k_Val_000058
 59 | GOT-10k_Val_000059
 60 | GOT-10k_Val_000060
 61 | GOT-10k_Val_000061
 62 | GOT-10k_Val_000062
 63 | GOT-10k_Val_000063
 64 | GOT-10k_Val_000064
 65 | GOT-10k_Val_000065
 66 | GOT-10k_Val_000066
 67 | GOT-10k_Val_000067
 68 | GOT-10k_Val_000068
 69 | GOT-10k_Val_000069
 70 | GOT-10k_Val_000070
 71 | GOT-10k_Val_000071
 72 | GOT-10k_Val_000072
 73 | GOT-10k_Val_000073
 74 | GOT-10k_Val_000074
 75 | GOT-10k_Val_000075
 76 | GOT-10k_Val_000076
 77 | GOT-10k_Val_000077
 78 | GOT-10k_Val_000078
 79 | GOT-10k_Val_000079
 80 | GOT-10k_Val_000080
 81 | GOT-10k_Val_000081
 82 | GOT-10k_Val_000082
 83 | GOT-10k_Val_000083
 84 | GOT-10k_Val_000084
 85 | GOT-10k_Val_000085
 86 | GOT-10k_Val_000086
 87 | GOT-10k_Val_000087
 88 | GOT-10k_Val_000088
 89 | GOT-10k_Val_000089
 90 | GOT-10k_Val_000090
 91 | GOT-10k_Val_000091
 92 | GOT-10k_Val_000092
 93 | GOT-10k_Val_000093
 94 | GOT-10k_Val_000094
 95 | GOT-10k_Val_000095
 96 | GOT-10k_Val_000096
 97 | GOT-10k_Val_000097
 98 | GOT-10k_Val_000098
 99 | GOT-10k_Val_000099
100 | GOT-10k_Val_000100
101 | GOT-10k_Val_000101
102 | GOT-10k_Val_000102
103 | GOT-10k_Val_000103
104 | GOT-10k_Val_000104
105 | GOT-10k_Val_000105
106 | GOT-10k_Val_000106
107 | GOT-10k_Val_000107
108 | GOT-10k_Val_000108
109 | GOT-10k_Val_000109
110 | GOT-10k_Val_000110
111 | GOT-10k_Val_000111
112 | GOT-10k_Val_000112
113 | GOT-10k_Val_000113
114 | GOT-10k_Val_000114
115 | GOT-10k_Val_000115
116 | GOT-10k_Val_000116
117 | GOT-10k_Val_000117
118 | GOT-10k_Val_000118
119 | GOT-10k_Val_000119
120 | GOT-10k_Val_000120
121 | GOT-10k_Val_000121
122 | GOT-10k_Val_000122
123 | GOT-10k_Val_000123
124 | GOT-10k_Val_000124
125 | GOT-10k_Val_000125
126 | GOT-10k_Val_000126
127 | GOT-10k_Val_000127
128 | GOT-10k_Val_000128
129 | GOT-10k_Val_000129
130 | GOT-10k_Val_000130
131 | GOT-10k_Val_000131
132 | GOT-10k_Val_000132
133 | GOT-10k_Val_000133
134 | GOT-10k_Val_000134
135 | GOT-10k_Val_000135
136 | GOT-10k_Val_000136
137 | GOT-10k_Val_000137
138 | GOT-10k_Val_000138
139 | GOT-10k_Val_000139
140 | GOT-10k_Val_000140
141 | GOT-10k_Val_000141
142 | GOT-10k_Val_000142
143 | GOT-10k_Val_000143
144 | GOT-10k_Val_000144
145 | GOT-10k_Val_000145
146 | GOT-10k_Val_000146
147 | GOT-10k_Val_000147
148 | GOT-10k_Val_000148
149 | GOT-10k_Val_000149
150 | GOT-10k_Val_000150
151 | GOT-10k_Val_000151
152 | GOT-10k_Val_000152
153 | GOT-10k_Val_000153
154 | GOT-10k_Val_000154
155 | GOT-10k_Val_000155
156 | GOT-10k_Val_000156
157 | GOT-10k_Val_000157
158 | GOT-10k_Val_000158
159 | GOT-10k_Val_000159
160 | GOT-10k_Val_000160
161 | GOT-10k_Val_000161
162 | GOT-10k_Val_000162
163 | GOT-10k_Val_000163
164 | GOT-10k_Val_000164
165 | GOT-10k_Val_000165
166 | GOT-10k_Val_000166
167 | GOT-10k_Val_000167
168 | GOT-10k_Val_000168
169 | GOT-10k_Val_000169
170 | GOT-10k_Val_000170
171 | GOT-10k_Val_000171
172 | GOT-10k_Val_000172
173 | GOT-10k_Val_000173
174 | GOT-10k_Val_000174
175 | GOT-10k_Val_000175
176 | GOT-10k_Val_000176
177 | GOT-10k_Val_000177
178 | GOT-10k_Val_000178
179 | GOT-10k_Val_000179
180 | GOT-10k_Val_000180
181 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DeepMTA_PyTorch  <img src="deepmta_arts.png" width="400" align="right"> 
  2 | 
  3 | ### Officical PyTorch Implementation of "Dynamic Attention-guided Multi-TrajectoryAnalysis for Single Object Tracking", Xiao Wang, Zhe Chen, Jin Tang, Bin Luo, Yaowei Wang, Yonghong Tian, Feng Wu, IEEE Transactions on Circuits and Systems for Video Technology (T-CSVT 2021) [[Paper](https://ieeexplore.ieee.org/document/9345930)] [[Project](https://sites.google.com/view/mt-track/home)] 
  4 | 
  5 | 
  6 | ## Abstract: 
  7 | Most of the existing single object trackers track the target in a unitary local search window, making them particularly vulnerable to challenging factors such as heavy occlusions and out-of-view movements. Despite the attempts to further incorporate global search, prevailing mechanisms that cooperate local and global search are relatively static, thus are still sub-optimal for improving tracking performance. By further studying the local and global search results, we raise a question: can we allow more dynamics for cooperating both results? In this paper, we propose to introduce more dynamics by devising a dynamic attention-guided multi-trajectory tracking strategy. In particular, we construct dynamic appearance model that contains multiple target templates, each of which provides its own attention for locating the target in the new frame. Guided by different attention, we maintain diversified tracking results for the target to build multi-trajectory tracking history, allowing more candidates to represent the true target trajectory. After spanning the whole sequence, we introduce a multi-trajectory selection network to find the best trajectory that deliver improved tracking performance. Extensive experimental results show that our proposed tracking strategy achieves compelling performance on various large-scale tracking benchmarks.
  8 | 
  9 | 
 10 | ## Our Proposed Approach: 
 11 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/pipeline.png)
 12 | 
 13 | 
 14 | 
 15 | 
 16 | ## Install: 
 17 | ~~~
 18 | git clone https://github.com/wangxiao5791509/DeepMTA_PyTorch
 19 | cd DeepMTA_TCSVT_project
 20 | 
 21 | # create the conda environment
 22 | conda env create -f environment.yml
 23 | conda activate deepmta
 24 | 
 25 | # build the vot toolkits
 26 | bash benchmark/make_toolkits.sh
 27 | ~~~
 28 | 
 29 | ## Download Dataset and Model: 
 30 | download pre-trained **Traj-Evaluation-Network** and **Dynamic-TANet-Model** from [[Onedrive](https://ahueducn-my.sharepoint.com/:f:/g/personal/e16101002_stu_ahu_edu_cn/EpMTPeqEVOFHoCvTLMI8WTUBNHt65WtgB31-cB8WqlaIfQ?e=HiDhLQ)]
 31 | 
 32 | 
 33 | get the dataset OTB2015, GOT-10k, LaSOT, UAV123, UAV20L, OxUvA from [[List](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/download_links_for_tracking_datasets.txt)]. 
 34 | 
 35 | Download TNL2K dataset (published on CVPR 2021, 1300/700 for train and test subset) from: https://sites.google.com/view/langtrackbenchmark/
 36 | 
 37 | 
 38 | ## Train: 
 39 | 1. you can directly use the pre-trained tracking model of THOR [[github](https://github.com/xl-sr/THOR)]; 
 40 | 
 41 | 2. train Dynamic Target-aware Attention: 
 42 | ~~~
 43 | cd ~/DeepMTA_TCSVT_project/trackers/dcynet_modules_adaptis/ 
 44 | python train.py
 45 | ~~~
 46 | 
 47 | 3. train Trajectory Evaluation Network: 
 48 | ~~~
 49 | python train_traj_measure_net.py
 50 | ~~~
 51 | 
 52 | 
 53 | 
 54 | 
 55 | ## Tracking:
 56 | 
 57 | take got-10k and LaSOT dataset as the examples: 
 58 | ~~~
 59 | python testing.py -d GOT10k -t SiamRPN --lb_type ensemble
 60 | 
 61 | python testing.py -d LaSOT -t SiamRPN --lb_type ensemble
 62 | ~~~
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | ### Benchmark Results: 
 70 | Experimental results on the compared tracking benchmarks 
 71 | 
 72 | [[OTB2015]()]
 73 | [[LaSOT](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/Ec99MGQJXlJEjJFtpn7tJzoBTl77yVKt4wBOd9amXWR5lQ?e=u0eShJ)]
 74 | [[OxUvA](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/Efqz3Y2KSVdCnEl0ephudGQBNELXW7dgESWfvGmmdVVFyQ?e=D049Wf)]
 75 | [[GOT-10k](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/EbUB51geqFJEupM70SY6lfYBRkMAgKjfpH9MB6dlPKWzMg?e=kkuB6f)]
 76 | [[UAV123](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/EbhtNj6ZHRpJp34c07Qk9a4Bd522CYx4zcjOFKB6AWTUpA?e=4qEBdP)]
 77 | [[TNL2K](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/EaiGld9vweVNv6HiR3gfnlQBLlFiC29Se-MOFLJV_ooJIA?e=cXliLz)]
 78 | 
 79 | 
 80 | 
 81 | 
 82 | 
 83 | ### Tracking Results: 
 84 | 
 85 | #### Tracking results on LaSOT dataset. 
 86 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/lasot_result.png)
 87 | 
 88 | #### Tracking results on TNL2K dataset. 
 89 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/benchmarkresults.png)
 90 | 
 91 | #### Attention prediciton and Tracking Results. 
 92 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/attention_supplement.jpg)
 93 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/trackingresults_vis.jpg)
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | ### Acknowledgement:
101 | Our tracker is developed based on **THOR** which is published on BMVC-2019 [[Paper](https://arxiv.org/pdf/1907.12920.pdf)] [[Code](https://github.com/xl-sr/THOR)] 
102 | 
103 | 
104 | ### Other related works: 
105 | * MTP: Multi-hypothesis Tracking and Prediction for Reduced Error Propagation, Xinshuo Weng, Boris Ivanovic, and Marco Pavone [[Paper](https://arxiv.org/pdf/2110.09481.pdf)] [[Code](https://www.xinshuoweng.com/projects/MTP/)] 
106 | * D.-Y. Lee, J.-Y. Sim, and C.-S. Kim, “Multihypothesis trajectory analysis for robust visual tracking,” in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2015, pp. 5088–5096. [[Paper](http://openaccess.thecvf.com/content_cvpr_2015/papers/Lee_Multihypothesis_Trajectory_Analysis_2015_CVPR_paper.pdf)] 
107 | * C. Kim, F. Li, A. Ciptadi, and J. M. Rehg, “Multiple hypothesis tracking revisited,” in Proceedings of the IEEE International Conference on Computer Vision, 2015, pp. 4696–4704. [[Paper](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Kim_Multiple_Hypothesis_Tracking_ICCV_2015_paper.pdf)]
108 | 
109 | 
110 | 
111 | 
112 | 
113 | ### Citation: 
114 | If you find this paper useful for your research, please consider to cite our paper:
115 | ~~~
116 | @inproceedings{wang2021deepmta,
117 |  title={Dynamic Attention guided Multi-Trajectory Analysis for Single Object Tracking},
118 |  author={Xiao, Wang and Zhe, Chen and Jin, Tang and Bin, Luo and Yaowei, Wang and Yonghong, Tian and Feng, Wu},
119 |  booktitle={IEEE Transactions on Circuits and Systems for Video Technology},
120 |  doi={10.1109/TCSVT.2021.3056684}, 
121 |  year={2021}
122 | }
123 | ~~~
124 | 
125 | If you have any questions about this work, please contact with me via wangxiaocvpr@foxmail.com 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/deepmta_arts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/deepmta_arts.png


--------------------------------------------------------------------------------
/download_links_for_tracking_datasets.txt:
--------------------------------------------------------------------------------
 1 | Popular Tracking datasets: 
 2 | UAV123: https://pan.baidu.com/s/1AhNnfjF4fZe14sUFefU3iA password: 2iq4
 3 | 
 4 | VOT2018: https://pan.baidu.com/s/1MOWZ5lcxfF0wsgSuj5g4Yw password: e5eh
 5 | 
 6 | VisDrone2019: https://pan.baidu.com/s/1Y6ubKHuYX65mK_iDVSfKPQ password: yxb6
 7 | 
 8 | OTB2015: https://pan.baidu.com/s/1ZjKgRMYSHfR_w3Z7iQEkYA password: t5i1
 9 | 
10 | DTB70: https://pan.baidu.com/s/1kfHrArw0aVhGPSM91WHomw password: e7qm 
11 | 
12 | TLP50 (Long-Term): https://amoudgl.github.io/tlp/  
13 | 
14 | ILSVRC2015 VID: https://pan.baidu.com/s/1CXWgpAG4CYpk-WnaUY5mAQ password: uqzj
15 | 
16 | NFS: https://pan.baidu.com/s/1ei54oKNA05iBkoUwXPOB7g password: vng1
17 | 
18 | GOT10k: https://pan.baidu.com/s/172oiQPA_Ky2iujcW5Irlow password: uxds
19 | 
20 | UAVDT: https://pan.baidu.com/s/1K8oo53mPYCxUFVMXIGLhVA password: keva
21 | 
22 | YTB-VOS: https://pan.baidu.com/s/1WMB0q9GJson75QBFVfeH5A password: sf1m
23 | 
24 | YTB-Crop511 (used in siamrpn++ and siammask): https://pan.baidu.com/s/112zLS_02-Z2ouKGbnPlTjw password: ebq1
25 | 
26 | TColor128: https://pan.baidu.com/s/1v4J6zWqZwj8fHi5eo5EJvQ password: 26d4
27 | 
28 | DAVIS2017: https://pan.baidu.com/s/1JTsumpnkWotEJQE7KQmh6A password: c9qp
29 | 
30 | YTB&VID (used in siamrpn): https://pan.baidu.com/s/1gF8PSZDzw-7EAVrdYHQwsA password: 6vkz
31 | 
32 | TrackingNet: https://pan.baidu.com/s/1PXSRAqcw-KMfBIJYUtI4Aw code: nkb9 (Note that this link is provided by SiamFC++ author) 
33 | 
34 | TAO: A Large-Scale Benchmark for Tracking Any Object: https://github.com/TAO-Dataset/tao 
35 | 
36 | vot 2018 and vot 2019: 　   链接: https://pan.baidu.com/s/1q6lv3cUhezBb5pmdj3BRGw 提取码: d7r3 
37 | 
38 | vot 2018 LT: 　　　　　　链接: https://pan.baidu.com/s/16Q4_sxhBjmddIHU8b7XK3w 提取码: 67xf 
39 | 
40 | vot 2019 LT: 　　　　　　链接：https://pan.baidu.com/s/1z9HBPNprbt2gb2RGzRJkwA  提取码：7yq5
41 | 
42 | vot 2019 rgb-thermal: 　　链接: https://pan.baidu.com/s/1oT8qFmKBpYa3VlXP1ZwfCA 提取码: mn1b
43 | 
44 | 


--------------------------------------------------------------------------------
/figures/attention_supplement.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/attention_supplement.jpg


--------------------------------------------------------------------------------
/figures/benchmarkresults.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/benchmarkresults.png


--------------------------------------------------------------------------------
/figures/lasot_result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/lasot_result.png


--------------------------------------------------------------------------------
/figures/lasot_results.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/lasot_results.jpg


--------------------------------------------------------------------------------
/figures/motivation.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/motivation.jpg


--------------------------------------------------------------------------------
/figures/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/pipeline.png


--------------------------------------------------------------------------------
/figures/trackingresults_vis.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/trackingresults_vis.jpg


--------------------------------------------------------------------------------