├── DeepMTA_TCSVT_project.tar.gz ├── DeepMTA_code ├── README.md ├── benchmark │ └── bench_utils │ │ ├── bbox_helper.py │ │ ├── benchmark_helper.py │ │ └── pysot │ │ ├── datasets │ │ ├── __init__.py │ │ ├── dataset.py │ │ ├── video.py │ │ └── vot.py │ │ ├── evaluation │ │ ├── __init__.py │ │ ├── ar_benchmark.py │ │ ├── eao_benchmark.py │ │ └── f1_benchmark.py │ │ └── utils │ │ ├── __init__.py │ │ ├── build │ │ └── temp.linux-x86_64-3.7 │ │ │ ├── region.o │ │ │ └── src │ │ │ └── region.o │ │ ├── c_region.pxd │ │ ├── misc.py │ │ ├── region.c │ │ ├── region.cpython-37m-x86_64-linux-gnu.so │ │ ├── region.pyx │ │ ├── setup.py │ │ ├── src │ │ ├── buffer.h │ │ ├── region.c │ │ └── region.h │ │ └── statistics.py ├── configs │ ├── SiamFC │ │ ├── OTB2015_THOR_dynamic.json │ │ ├── OTB2015_THOR_ensemble.json │ │ ├── OTB2015_vanilla.json │ │ ├── VOT2018_THOR_dynamic.json │ │ ├── VOT2018_THOR_ensemble.json │ │ └── VOT2018_vanilla.json │ ├── SiamMask │ │ ├── OTB2015_THOR_dynamic.json │ │ ├── OTB2015_THOR_ensemble.json │ │ ├── OTB2015_vanilla.json │ │ ├── VOT2018_THOR_dynamic.json │ │ ├── VOT2018_THOR_ensemble.json │ │ └── VOT2018_vanilla.json │ └── SiamRPN │ │ ├── GOT10k_THOR_ensemble.json │ │ ├── LaSOT_THOR_ensemble.json │ │ ├── OTB2015_THOR_dynamic.json │ │ ├── OTB2015_THOR_ensemble.json │ │ ├── OTB2015_vanilla.json │ │ ├── OXUVA_THOR_ensemble.json │ │ ├── UAV123_THOR_ensemble.json │ │ ├── UAV20L_THOR_ensemble.json │ │ ├── VOT2018_THOR_dynamic.json │ │ ├── VOT2018_THOR_ensemble.json │ │ └── VOT2018_vanilla.json ├── data │ ├── download_links_for_tracking_datasets.txt │ └── get_test_otb2015_data.sh ├── environment.yml ├── network.py ├── scripts │ └── transform_oxuva_results_txt_to_csv.py ├── temp_DIR_TO_SAVE_static_Global_attentionMap │ └── mkdir_your_self.txt ├── testing.py ├── trackers │ ├── SiamFC │ │ ├── config.py │ │ ├── model.pth │ │ ├── net.py │ │ ├── siamfc.py │ │ └── utils.py │ ├── SiamMask │ │ ├── net.py │ │ ├── resnet.py │ │ ├── siammask.py │ │ └── utils │ │ │ ├── anchors.py │ │ │ ├── bbox_helper.py │ │ │ ├── config_helper.py │ │ │ ├── load_helper.py │ │ │ ├── log_helper.py │ │ │ ├── tracker_config.py │ │ │ └── tracking_utils.py │ └── dcynet_modules_adaptis │ │ ├── __init__.py │ │ ├── constants.py │ │ ├── data_loader.py │ │ ├── generator.py │ │ ├── logger.py │ │ ├── ops.py │ │ ├── resnet.py │ │ ├── train.py │ │ └── utils.py ├── train_traj_measure_net.py └── webcam_demo.py ├── GOT10K_dataset_video_list ├── 01_mask_prepreocessing.m ├── GOT10K_dataset_test_video_list.txt ├── GOT10K_dataset_train_video_list.txt └── GOT10K_dataset_val_video_list.txt ├── README.md ├── deepmta_arts.png ├── download_links_for_tracking_datasets.txt └── figures ├── attention_supplement.jpg ├── benchmarkresults.png ├── lasot_result.png ├── lasot_results.jpg ├── motivation.jpg ├── pipeline.png └── trackingresults_vis.jpg /DeepMTA_TCSVT_project.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_TCSVT_project.tar.gz -------------------------------------------------------------------------------- /DeepMTA_code/README.md: -------------------------------------------------------------------------------- 1 | ################################################################################################### 2 | ################################# GOT10K ########################################### 3 | ################################################################################################### 4 | python testing.py -d GOT10k -t SiamRPN --lb_type ensemble 5 | 6 | 7 | ################################################################################################### 8 | ################################# LaSOT ########################################### 9 | ################################################################################################### 10 | python testing.py -d LaSOT -t SiamRPN --lb_type ensemble 11 | 12 | python lasot_thor_testing.py -d LaSOT -t SiamRPN --lb_type ensemble 13 | 14 | 15 | ################################################################################################### 16 | ################################# UAV20L ########################################## 17 | ################################################################################################### 18 | python testing.py -d UAV20L -t SiamRPN --lb_type ensemble 19 | 20 | ################################################################################################### 21 | ################################# UAV123 ########################################## 22 | ################################################################################################### 23 | python testing.py -d UAV123 -t SiamRPN --lb_type ensemble 24 | 25 | ################################################################################################### 26 | ################################# OXUVA ########################################## 27 | ################################################################################################### 28 | python testing.py -d OXUVA -t SiamRPN --lb_type ensemble 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/bbox_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | import numpy as np 7 | from collections import namedtuple 8 | 9 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 10 | BBox = Corner 11 | Center = namedtuple('Center', 'x y w h') 12 | 13 | 14 | def corner2center(corner): 15 | """ 16 | :param corner: Corner or np.array 4*N 17 | :return: Center or 4 np.array N 18 | """ 19 | if isinstance(corner, Corner): 20 | x1, y1, x2, y2 = corner 21 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 22 | else: 23 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 24 | x = (x1 + x2) * 0.5 25 | y = (y1 + y2) * 0.5 26 | w = x2 - x1 27 | h = y2 - y1 28 | return x, y, w, h 29 | 30 | 31 | def center2corner(center): 32 | """ 33 | :param center: Center or np.array 4*N 34 | :return: Corner or np.array 4*N 35 | """ 36 | if isinstance(center, Center): 37 | x, y, w, h = center 38 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 39 | else: 40 | x, y, w, h = center[0], center[1], center[2], center[3] 41 | x1 = x - w * 0.5 42 | y1 = y - h * 0.5 43 | x2 = x + w * 0.5 44 | y2 = y + h * 0.5 45 | return x1, y1, x2, y2 46 | 47 | 48 | def cxy_wh_2_rect(pos, sz): 49 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) # 0-index 50 | 51 | 52 | def rect_2_cxy_wh(rect): 53 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]]) # 0-index 54 | 55 | 56 | def get_axis_aligned_bbox(region): 57 | nv = region.size 58 | if nv == 8: 59 | cx = np.mean(region[0::2]) 60 | cy = np.mean(region[1::2]) 61 | x1 = min(region[0::2]) 62 | x2 = max(region[0::2]) 63 | y1 = min(region[1::2]) 64 | y2 = max(region[1::2]) 65 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) 66 | A2 = (x2 - x1) * (y2 - y1) 67 | s = np.sqrt(A1 / A2) 68 | w = s * (x2 - x1) + 1 69 | h = s * (y2 - y1) + 1 70 | else: 71 | x = region[0] 72 | y = region[1] 73 | w = region[2] 74 | h = region[3] 75 | cx = x+w/2 76 | cy = y+h/2 77 | 78 | return cx, cy, w, h 79 | 80 | 81 | LIMIT = 99999999 82 | def xyxy_to_xywh(bboxes, clipMin=-LIMIT, clipWidth=LIMIT, clipHeight=LIMIT, 83 | round=False): 84 | addedAxis = False 85 | if isinstance(bboxes, list): 86 | bboxes = np.array(bboxes).astype(np.float32) 87 | if len(bboxes.shape) == 1: 88 | addedAxis = True 89 | bboxes = bboxes[:,np.newaxis] 90 | bboxesOut = np.zeros(bboxes.shape) 91 | x1 = bboxes[0,...] 92 | y1 = bboxes[1,...] 93 | x2 = bboxes[2,...] 94 | y2 = bboxes[3,...] 95 | bboxesOut[0,...] = (x1 + x2) / 2.0 96 | bboxesOut[1,...] = (y1 + y2) / 2.0 97 | bboxesOut[2,...] = x2 - x1 98 | bboxesOut[3,...] = y2 - y1 99 | if clipMin != -LIMIT or clipWidth != LIMIT or clipHeight != LIMIT: 100 | bboxesOut = clip_bbox(bboxesOut, clipMin, clipWidth, clipHeight) 101 | if bboxesOut.shape[0] > 4: 102 | bboxesOut[4:,...] = bboxes[4:,...] 103 | if addedAxis: 104 | bboxesOut = bboxesOut[:,0] 105 | if round: 106 | bboxesOut = np.round(bboxesOut).astype(int) 107 | return bboxesOut 108 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/benchmark_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join, realpath, dirname, exists, isdir 7 | from os import listdir 8 | import logging 9 | import glob 10 | import numpy as np 11 | import json 12 | from collections import OrderedDict 13 | import functools 14 | 15 | import pdb 16 | 17 | 18 | 19 | 20 | 21 | def get_dataset_zoo(): 22 | root = realpath(join(dirname(__file__), '../../data')) 23 | zoos = listdir(root) 24 | 25 | def valid(x): 26 | y = join(root, x) 27 | if not isdir(y): return False 28 | 29 | return exists(join(y, 'list.txt')) \ 30 | or exists(join(y, 'train', 'meta.json'))\ 31 | or exists(join(y, 'ImageSets', '2016', 'val.txt')) 32 | 33 | zoos = list(filter(valid, zoos)) 34 | return zoos 35 | 36 | 37 | dataset_zoo = get_dataset_zoo() 38 | 39 | def load_tasks_with_annotations(fname): 40 | with open(fname, 'r') as fp: 41 | if fname.endswith('.csv'): 42 | tracks = oxuva.load_dataset_annotations_csv(fp) 43 | else: 44 | raise ValueError(f"unknown extension: {fname}") 45 | return oxuva.map_dict(oxuva.make_task_from_track, tracks) 46 | 47 | 48 | 49 | 50 | 51 | 52 | def load_dataset(dataset): 53 | 54 | ################################################################## 55 | #### VOT2018, VOT2018-LT, OTB2015, GOT10k, LaSOT, OxUVA 56 | ################################################################## 57 | 58 | info = OrderedDict() 59 | if 'VOT' in dataset: 60 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 61 | if not exists(base_path): 62 | logging.error("Please download test dataset!!!") 63 | exit() 64 | list_path = join(base_path, 'list.txt') 65 | with open(list_path) as f: 66 | videos = [v.strip() for v in f.readlines()] 67 | for video in videos: 68 | video_path = join(base_path, video) 69 | image_path = join(video_path, '*.jpg') 70 | image_files = sorted(glob.glob(image_path)) 71 | if len(image_files) == 0: # VOT2018 72 | image_path = join(video_path, 'color', '*.jpg') 73 | image_files = sorted(glob.glob(image_path)) 74 | gt_path = join(video_path, 'groundtruth.txt') 75 | gt = np.loadtxt(gt_path, delimiter=',').astype(np.float64) 76 | if gt.shape[1] == 4: 77 | gt = np.column_stack((gt[:, 0], gt[:, 1], gt[:, 0], gt[:, 1] + gt[:, 3]-1, 78 | gt[:, 0] + gt[:, 2]-1, gt[:, 1] + gt[:, 3]-1, gt[:, 0] + gt[:, 2]-1, gt[:, 1])) 79 | info[video] = {'image_files': image_files, 'gt': gt, 'name': video} 80 | 81 | 82 | elif 'VOT2018-LT' in dataset: 83 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 84 | if not exists(base_path): 85 | logging.error("Please download test dataset!!!") 86 | exit() 87 | list_path = join(base_path, 'list.txt') 88 | with open(list_path) as f: 89 | videos = [v.strip() for v in f.readlines()] 90 | for video in videos: 91 | video_path = join(base_path, video) 92 | image_path = join(video_path, '*.jpg') 93 | image_files = sorted(glob.glob(image_path)) 94 | if len(image_files) == 0: # VOT2018 95 | image_path = join(video_path, 'color', '*.jpg') 96 | image_files = sorted(glob.glob(image_path)) 97 | gt_path = join(video_path, 'groundtruth.txt') 98 | gt = np.loadtxt(gt_path, delimiter=',').astype(np.float64) 99 | if gt.shape[1] == 4: 100 | gt = np.column_stack((gt[:, 0], gt[:, 1], gt[:, 0], gt[:, 1] + gt[:, 3]-1, 101 | gt[:, 0] + gt[:, 2]-1, gt[:, 1] + gt[:, 3]-1, gt[:, 0] + gt[:, 2]-1, gt[:, 1])) 102 | info[video] = {'image_files': image_files, 'gt': gt, 'name': video} 103 | 104 | 105 | elif 'OTB' in dataset: 106 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 107 | if not exists(base_path): 108 | print("Please download OTB dataset into data folder") 109 | json_path = base_path + '.json' 110 | info = json.load(open(json_path, 'r')) 111 | 112 | # load the video frames 113 | for v in info.keys(): 114 | path_name = info[v]['name'] 115 | info[v]['image_files'] = [join(base_path, path_name, 'img', im_f) for im_f in info[v]['image_files']] 116 | info[v]['gt'] = np.array(info[v]['gt_rect'])-[1,1,0,0] # our tracker is 0-index 117 | info[v]['name'] = v 118 | 119 | 120 | elif 'GOT' in dataset: 121 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 122 | if not exists(base_path): 123 | print("Please download GOT10K dataset into data folder") 124 | 125 | json_path = base_path + '.json' 126 | info = json.load(open(json_path, 'r')) 127 | 128 | 129 | elif 'GOT10k_train_val' in dataset: 130 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 131 | if not exists(base_path): 132 | print("Please download GOT10k_train_val dataset into data folder") 133 | 134 | json_path = base_path + '.json' 135 | info = json.load(open(json_path, 'r')) 136 | 137 | 138 | 139 | elif 'LaSOT' in dataset: 140 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 141 | if not exists(base_path): 142 | print("Please download LaSOT dataset into data folder") 143 | json_path = base_path + '.json' 144 | info = json.load(open(json_path, 'r')) 145 | 146 | elif 'UAV20L' in dataset: 147 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 148 | if not exists(base_path): 149 | print("Please download UAV20L dataset into data folder") 150 | json_path = base_path + '.json' 151 | info = json.load(open(json_path, 'r')) 152 | 153 | elif 'OXUVA' in dataset: 154 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 155 | if not exists(base_path): 156 | print("Please download OXUVA dataset into data folder") 157 | json_path = base_path + '.json' 158 | info = json.load(open(json_path, 'r')) 159 | 160 | elif 'TC128' in dataset: 161 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 162 | if not exists(base_path): 163 | print("Please download TC128 dataset into data folder") 164 | json_path = base_path + '.json' 165 | info = json.load(open(json_path, 'r')) 166 | 167 | elif 'UAV123' in dataset: 168 | base_path = join(realpath(dirname(__file__)), '../../data', dataset) 169 | if not exists(base_path): 170 | print("Please download UAV123 dataset into data folder") 171 | json_path = base_path + '.json' 172 | info = json.load(open(json_path, 'r')) 173 | 174 | 175 | else: 176 | logging.error(f'{dataset} not supported') 177 | exit() 178 | return info 179 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | from .vot import VOTDataset, VOTLTDataset 10 | 11 | 12 | class DatasetFactory(object): 13 | @staticmethod 14 | def create_dataset(**kwargs): 15 | """ 16 | Args: 17 | name: dataset name 'VOT2018', 'VOT2016' 18 | dataset_root: dataset root 19 | Return: 20 | dataset 21 | """ 22 | assert 'name' in kwargs, "should provide dataset name" 23 | name = kwargs['name'] 24 | if 'VOT2018' == name or 'VOT2016' == name: 25 | dataset = VOTDataset(**kwargs) 26 | elif 'VOT-LT' == name: 27 | dataset = VOTLTDataset(**kwargs) 28 | else: 29 | raise Exception("unknow dataset {}".format(kwargs['name'])) 30 | return dataset 31 | 32 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | class Dataset(object): 10 | def __init__(self, name, dataset_root): 11 | self.name = name 12 | self.dataset_root = dataset_root 13 | self.videos = None 14 | 15 | def __getitem__(self, idx): 16 | if isinstance(idx, str): 17 | return self.videos[idx] 18 | elif isinstance(idx, int): 19 | return self.videos[sorted(list(self.videos.keys()))[idx]] 20 | 21 | def __len__(self): 22 | return len(self.videos) 23 | 24 | def __iter__(self): 25 | keys = sorted(list(self.videos.keys())) 26 | for key in keys: 27 | yield self.videos[key] 28 | 29 | def set_tracker(self, path, tracker_names): 30 | """ 31 | Args: 32 | path: path to tracker results, 33 | tracker_names: list of tracker name 34 | """ 35 | self.tracker_path = path 36 | self.tracker_names = tracker_names 37 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/datasets/video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | from glob import glob 4 | 5 | class Video(object): 6 | def __init__(self, name, root, video_dir, init_rect, img_names, 7 | gt_rect, attr): 8 | self.name = name 9 | self.video_dir = video_dir 10 | self.init_rect = init_rect 11 | self.gt_traj = gt_rect 12 | self.attr = attr 13 | self.pred_trajs = {} 14 | self.img_names = [os.path.join(root, x) for x in img_names] 15 | self.imgs = None 16 | 17 | def load_tracker(self, path, tracker_names=None, store=True): 18 | """ 19 | Args: 20 | path(str): path to result 21 | tracker_name(list): name of tracker 22 | """ 23 | if not tracker_names: 24 | tracker_names = [x.split('/')[-1] for x in glob(path) 25 | if os.path.isdir(x)] 26 | if isinstance(tracker_names, str): 27 | tracker_names = [tracker_names] 28 | for name in tracker_names: 29 | traj_file = os.path.join(path, name, self.name+'.txt') 30 | if os.path.exists(traj_file): 31 | with open(traj_file, 'r') as f : 32 | pred_traj = [list(map(float, x.strip().split(','))) 33 | for x in f.readlines()] 34 | if len(pred_traj) != len(self.gt_traj): 35 | print(name, len(pred_traj), len(self.gt_traj), self.name) 36 | if store: 37 | self.pred_trajs[name] = pred_traj 38 | else: 39 | return pred_traj 40 | else: 41 | print(traj_file) 42 | self.tracker_names = list(self.pred_trajs.keys()) 43 | 44 | def load_img(self): 45 | if self.imgs is None: 46 | self.imgs = [cv2.imread(x) for x in self.img_names] 47 | self.width = self.imgs[0].shape[1] 48 | self.height = self.imgs[0].shape[0] 49 | 50 | def free_img(self): 51 | self.imgs = None 52 | 53 | def __len__(self): 54 | return len(self.img_names) 55 | 56 | def __getitem__(self, idx): 57 | if self.imgs is None: 58 | return cv2.imread(self.img_names[idx]), self.gt_traj[idx] 59 | else: 60 | return self.imgs[idx], self.gt_traj[idx] 61 | 62 | def __iter__(self): 63 | for i in range(len(self.img_names)): 64 | if self.imgs is not None: 65 | yield self.imgs[i], self.gt_traj[i] 66 | else: 67 | yield cv2.imread(self.img_names[i]), self.gt_traj[i] 68 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/datasets/vot.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | import os 10 | import json 11 | import numpy as np 12 | 13 | from glob import glob 14 | from tqdm import tqdm 15 | from PIL import Image 16 | from ipdb import set_trace 17 | 18 | from .dataset import Dataset 19 | from .video import Video 20 | 21 | 22 | class VOTVideo(Video): 23 | """ 24 | Args: 25 | name: video name 26 | root: dataset root 27 | video_dir: video directory 28 | init_rect: init rectangle 29 | img_names: image names 30 | gt_rect: groundtruth rectangle 31 | camera_motion: camera motion tag 32 | illum_change: illum change tag 33 | motion_change: motion change tag 34 | size_change: size change 35 | occlusion: occlusion 36 | """ 37 | def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, 38 | camera_motion, illum_change, motion_change, size_change, occlusion, width, height): 39 | super(VOTVideo, self).__init__(name, root, video_dir, init_rect, img_names, gt_rect, None) 40 | self.tags= {'all': [1] * len(gt_rect)} 41 | self.tags['camera_motion'] = camera_motion 42 | self.tags['illum_change'] = illum_change 43 | self.tags['motion_change'] = motion_change 44 | self.tags['size_change'] = size_change 45 | self.tags['occlusion'] = occlusion 46 | 47 | self.width = width 48 | self.height = height 49 | 50 | # empty tag 51 | all_tag = [v for k, v in self.tags.items() if len(v) > 0 ] 52 | self.tags['empty'] = np.all(1 - np.array(all_tag), axis=1).astype(np.int32).tolist() 53 | 54 | self.tag_names = list(self.tags.keys()) 55 | 56 | def select_tag(self, tag, start=0, end=0): 57 | if tag == 'empty': 58 | return self.tags[tag] 59 | return self.tags[tag][start:end] 60 | 61 | def load_tracker(self, path, tracker_names=None, store=True): 62 | """ 63 | Args: 64 | path(str): path to result 65 | tracker_name(list): name of tracker 66 | """ 67 | if not tracker_names: 68 | tracker_names = [x.split('/')[-1] for x in glob(path) 69 | if os.path.isdir(x)] 70 | if isinstance(tracker_names, str): 71 | tracker_names = [tracker_names] 72 | for name in tracker_names: 73 | traj_files = glob(os.path.join(path, name, 'baseline', self.name, '*0*.txt')) 74 | if len(traj_files) == 15: 75 | traj_files = traj_files 76 | else: 77 | traj_files = traj_files[0:1] 78 | pred_traj = [] 79 | for traj_file in traj_files: 80 | with open(traj_file, 'r') as f: 81 | traj = [list(map(float, x.strip().split(','))) 82 | for x in f.readlines()] 83 | pred_traj.append(traj) 84 | if store: 85 | self.pred_trajs[name] = pred_traj 86 | else: 87 | return pred_traj 88 | 89 | 90 | class VOTDataset(Dataset): 91 | """ 92 | Args: 93 | name: dataset name, should be 'VOT2018', 'VOT2016' 94 | dataset_root: dataset root 95 | load_img: wether to load all imgs 96 | """ 97 | def __init__(self, name, dataset_root): 98 | super(VOTDataset, self).__init__(name, dataset_root) 99 | try: 100 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 101 | meta_data = json.load(f) 102 | except: 103 | download_str = '# download json file for eval toolkit\n'+\ 104 | 'cd $SiamMask/data\n'+\ 105 | 'wget http://www.robots.ox.ac.uk/~qwang/VOT2016.json\n'+\ 106 | 'wget http://www.robots.ox.ac.uk/~qwang/VOT2018.json' 107 | print(download_str) 108 | exit() 109 | 110 | # load videos 111 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 112 | self.videos = {} 113 | for video in pbar: 114 | pbar.set_postfix_str(video) 115 | self.videos[video] = VOTVideo(video, 116 | dataset_root, 117 | meta_data[video]['video_dir'], 118 | meta_data[video]['init_rect'], 119 | meta_data[video]['img_names'], 120 | meta_data[video]['gt_rect'], 121 | meta_data[video]['camera_motion'], 122 | meta_data[video]['illum_change'], 123 | meta_data[video]['motion_change'], 124 | meta_data[video]['size_change'], 125 | meta_data[video]['occlusion'], 126 | meta_data[video]['width'], 127 | meta_data[video]['height']) 128 | 129 | self.tags = ['all', 'camera_motion', 'illum_change', 'motion_change', 130 | 'size_change', 'occlusion', 'empty'] 131 | 132 | class VOTLTVideo(Video): 133 | """ 134 | Args: 135 | name: video name 136 | root: dataset root 137 | video_dir: video directory 138 | init_rect: init rectangle 139 | img_names: image names 140 | gt_rect: groundtruth rectangle 141 | """ 142 | def __init__(self, name, root, video_dir, init_rect, img_names, 143 | gt_rect, load_img=False): 144 | super(VOTLTVideo, self).__init__(name, root, video_dir, 145 | init_rect, img_names, gt_rect, None) 146 | self.gt_traj = [[0] if np.isnan(bbox[0]) else bbox 147 | for bbox in self.gt_traj] 148 | if not load_img: 149 | img_name = os.path.join(root, self.img_names[0]) 150 | # adjustments 151 | img_name = img_name.replace('color/', '').replace('data', 'data/VOT-LT') 152 | img = np.array(Image.open(img_name), np.uint8) 153 | self.width = img.shape[1] 154 | self.height = img.shape[0] 155 | self.confidence = {} 156 | 157 | def load_tracker(self, path, tracker_names=None, store=True): 158 | """ 159 | Args: 160 | path(str): path to result 161 | tracker_name(list): name of tracker 162 | """ 163 | if not tracker_names: 164 | tracker_names = [x.split('/')[-1] for x in glob(path) 165 | if os.path.isdir(x)] 166 | if isinstance(tracker_names, str): 167 | tracker_names = [tracker_names] 168 | for name in tracker_names: 169 | traj_files = glob(os.path.join(path, name, 'baseline', self.name, '*0*.txt'))[0:1] 170 | 171 | pred_traj = [] 172 | for traj_file in traj_files: 173 | with open(traj_file, 'r') as f: 174 | traj = [list(map(float, x.strip().split(','))) 175 | for x in f.readlines()] 176 | pred_traj.append(traj) 177 | if store: 178 | self.pred_trajs[name] = pred_traj 179 | 180 | confidence_file = glob(os.path.join(path, name, 'baseline', self.name, '*0*.value'))[0] 181 | with open(confidence_file, 'r') as f: 182 | score = [float(x.strip()[1:]) for x in f.readlines()[1:]] 183 | score.insert(0, float('nan')) 184 | if store: 185 | self.confidence[name] = score 186 | return traj, score 187 | 188 | class VOTLTDataset(Dataset): 189 | """ 190 | Args: 191 | name: dataset name, 'VOT2018-LT' 192 | dataset_root: dataset root 193 | load_img: wether to load all imgs 194 | """ 195 | def __init__(self, name, dataset_root, load_img=False): 196 | super(VOTLTDataset, self).__init__(name, dataset_root) 197 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 198 | meta_data = json.load(f) 199 | 200 | # load videos 201 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 202 | self.videos = {} 203 | for video in pbar: 204 | pbar.set_postfix_str(video) 205 | self.videos[video] = VOTLTVideo(video, 206 | dataset_root, 207 | meta_data[video]['video_dir'], 208 | meta_data[video]['init_rect'], 209 | meta_data[video]['img_names'], 210 | meta_data[video]['gt_rect']) 211 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | from .ar_benchmark import AccuracyRobustnessBenchmark 10 | from .eao_benchmark import EAOBenchmark 11 | from .f1_benchmark import F1Benchmark 12 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/evaluation/ar_benchmark.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | 10 | import warnings 11 | import itertools 12 | import numpy as np 13 | 14 | from colorama import Style, Fore 15 | from ..utils import calculate_failures, calculate_accuracy 16 | 17 | 18 | class AccuracyRobustnessBenchmark: 19 | """ 20 | Args: 21 | dataset: 22 | burnin: 23 | """ 24 | def __init__(self, dataset, burnin=10): 25 | self.dataset = dataset 26 | self.burnin = burnin 27 | 28 | def eval(self, eval_trackers=None): 29 | """ 30 | Args: 31 | eval_tags: list of tag 32 | eval_trackers: list of tracker name 33 | Returns: 34 | ret: dict of results 35 | """ 36 | if eval_trackers is None: 37 | eval_trackers = self.dataset.tracker_names 38 | if isinstance(eval_trackers, str): 39 | eval_trackers = [eval_trackers] 40 | 41 | result = {} 42 | for tracker_name in eval_trackers: 43 | accuracy, failures = self._calculate_accuracy_robustness(tracker_name) 44 | result[tracker_name] = {'overlaps': accuracy, 45 | 'failures': failures} 46 | return result 47 | 48 | def show_result(self, result, eao_result=None, show_video_level=False, helight_threshold=0.5): 49 | """pretty print result 50 | Args: 51 | result: returned dict from function eval 52 | """ 53 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 54 | if eao_result is not None: 55 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|{:^7}|" 56 | header = header.format('Tracker Name', 57 | 'Accuracy', 'Robustness', 'Lost Number', 'EAO') 58 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|{:^7.3f}|" 59 | else: 60 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|" 61 | header = header.format('Tracker Name', 62 | 'Accuracy', 'Robustness', 'Lost Number') 63 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|" 64 | bar = '-'*len(header) 65 | print(bar) 66 | print(header) 67 | print(bar) 68 | if eao_result is not None: 69 | tracker_eao = sorted(eao_result.items(), 70 | key=lambda x:x[1]['all'], 71 | reverse=True)[:20] 72 | tracker_names = [x[0] for x in tracker_eao] 73 | else: 74 | tracker_names = list(result.keys()) 75 | for tracker_name in tracker_names: 76 | ret = result[tracker_name] 77 | overlaps = list(itertools.chain(*ret['overlaps'].values())) 78 | accuracy = np.nanmean(overlaps) 79 | length = sum([len(x) for x in ret['overlaps'].values()]) 80 | failures = list(ret['failures'].values()) 81 | lost_number = np.mean(np.sum(failures, axis=0)) 82 | robustness = np.mean(np.sum(np.array(failures), axis=0) / length) * 100 83 | if eao_result is None: 84 | print(formatter.format(tracker_name, accuracy, robustness, lost_number)) 85 | else: 86 | print(formatter.format(tracker_name, accuracy, robustness, lost_number, eao_result[tracker_name]['all'])) 87 | print(bar) 88 | 89 | if show_video_level and len(result) < 10: 90 | print('\n\n') 91 | header1 = "|{:^14}|".format("Tracker name") 92 | header2 = "|{:^14}|".format("Video name") 93 | for tracker_name in result.keys(): 94 | header1 += ("{:^17}|").format(tracker_name) 95 | header2 += "{:^8}|{:^8}|".format("Acc", "LN") 96 | print('-'*len(header1)) 97 | print(header1) 98 | print('-'*len(header1)) 99 | print(header2) 100 | print('-'*len(header1)) 101 | videos = list(result[tracker_name]['overlaps'].keys()) 102 | for video in videos: 103 | row = "|{:^14}|".format(video) 104 | for tracker_name in result.keys(): 105 | overlaps = result[tracker_name]['overlaps'][video] 106 | accuracy = np.nanmean(overlaps) 107 | failures = result[tracker_name]['failures'][video] 108 | lost_number = np.mean(failures) 109 | 110 | accuracy_str = "{:^8.3f}".format(accuracy) 111 | if accuracy < helight_threshold: 112 | row += f'{Fore.RED}{accuracy_str}{Style.RESET_ALL}|' 113 | else: 114 | row += accuracy_str+'|' 115 | lost_num_str = "{:^8.3f}".format(lost_number) 116 | if lost_number > 0: 117 | row += f'{Fore.RED}{lost_num_str}{Style.RESET_ALL}|' 118 | else: 119 | row += lost_num_str+'|' 120 | print(row) 121 | print('-'*len(header1)) 122 | 123 | def _calculate_accuracy_robustness(self, tracker_name): 124 | overlaps = {} 125 | failures = {} 126 | all_length = {} 127 | for i in range(len(self.dataset)): 128 | video = self.dataset[i] 129 | gt_traj = video.gt_traj 130 | if tracker_name not in video.pred_trajs: 131 | tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 132 | else: 133 | tracker_trajs = video.pred_trajs[tracker_name] 134 | overlaps_group = [] 135 | num_failures_group = [] 136 | for tracker_traj in tracker_trajs: 137 | num_failures = calculate_failures(tracker_traj)[0] 138 | overlaps_ = calculate_accuracy(tracker_traj, gt_traj, 139 | burnin=10, bound=(video.width, video.height))[1] 140 | overlaps_group.append(overlaps_) 141 | num_failures_group.append(num_failures) 142 | with warnings.catch_warnings(): 143 | warnings.simplefilter("ignore", category=RuntimeWarning) 144 | overlaps[video.name] = np.nanmean(overlaps_group, axis=0).tolist() 145 | failures[video.name] = num_failures_group 146 | return overlaps, failures 147 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/evaluation/eao_benchmark.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | import numpy as np 10 | 11 | from ..utils import calculate_failures, calculate_accuracy, calculate_expected_overlap 12 | 13 | 14 | class EAOBenchmark: 15 | """ 16 | Args: 17 | dataset: 18 | """ 19 | def __init__(self, dataset, skipping=5, tags=['all']): 20 | self.dataset = dataset 21 | self.skipping = skipping 22 | self.tags = tags 23 | # NOTE we not use gmm to generate low, high, peak value 24 | if dataset.name == 'VOT2018' or dataset.name == 'VOT2017': 25 | self.low = 100 26 | self.high = 356 27 | self.peak = 160 28 | elif dataset.name == 'VOT2016': 29 | self.low = 100 # TODO 30 | self.high = 356 31 | self.peak = 160 32 | 33 | def eval(self, eval_trackers=None): 34 | """ 35 | Args: 36 | eval_tags: list of tag 37 | eval_trackers: list of tracker name 38 | Returns: 39 | eao: dict of results 40 | """ 41 | if eval_trackers is None: 42 | eval_trackers = self.dataset.tracker_names 43 | if isinstance(eval_trackers, str): 44 | eval_trackers = [eval_trackers] 45 | 46 | ret = {} 47 | for tracker_name in eval_trackers: 48 | eao = self._calculate_eao(tracker_name, self.tags) 49 | ret[tracker_name] = eao 50 | return ret 51 | 52 | def show_result(self, result, topk=10): 53 | """pretty print result 54 | Args: 55 | result: returned dict from function eval 56 | """ 57 | if len(self.tags) == 1: 58 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 59 | header = ("|{:^"+str(tracker_name_len)+"}|{:^10}|").format('Tracker Name', 'EAO') 60 | bar = '-'*len(header) 61 | formatter = "|{:^20}|{:^10.3f}|" 62 | print(bar) 63 | print(header) 64 | print(bar) 65 | tracker_eao = sorted(result.items(), 66 | key=lambda x: x[1]['all'], 67 | reverse=True)[:topk] 68 | for tracker_name, eao in tracker_eao: 69 | print(formatter.format(tracker_name, eao)) 70 | print(bar) 71 | else: 72 | header = "|{:^20}|".format('Tracker Name') 73 | header += "{:^7}|{:^15}|{:^14}|{:^15}|{:^13}|{:^11}|{:^7}|".format(*self.tags) 74 | bar = '-'*len(header) 75 | formatter = "{:^7.3f}|{:^15.3f}|{:^14.3f}|{:^15.3f}|{:^13.3f}|{:^11.3f}|{:^7.3f}|" 76 | print(bar) 77 | print(header) 78 | print(bar) 79 | sorted_tacker = sorted(result.items(), 80 | key=lambda x: x[1]['all'], 81 | reverse=True)[:topk] 82 | sorted_tacker = [x[0] for x in sorted_tacker] 83 | for tracker_name in sorted_tacker: 84 | print("|{:^20}|".format(tracker_name)+formatter.format( 85 | *[result[tracker_name][x] for x in self.tags])) 86 | print(bar) 87 | 88 | def _calculate_eao(self, tracker_name, tags): 89 | all_overlaps = [] 90 | all_failures = [] 91 | video_names = [] 92 | gt_traj_length = [] 93 | for video in self.dataset: 94 | gt_traj = video.gt_traj 95 | if tracker_name not in video.pred_trajs: 96 | tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 97 | else: 98 | tracker_trajs = video.pred_trajs[tracker_name] 99 | for tracker_traj in tracker_trajs: 100 | gt_traj_length.append(len(gt_traj)) 101 | video_names.append(video.name) 102 | overlaps = calculate_accuracy(tracker_traj, gt_traj, bound=(video.width-1, video.height-1))[1] 103 | failures = calculate_failures(tracker_traj)[1] 104 | all_overlaps.append(overlaps) 105 | all_failures.append(failures) 106 | fragment_num = sum([len(x)+1 for x in all_failures]) 107 | max_len = max([len(x) for x in all_overlaps]) 108 | seq_weight = 1 / len(tracker_trajs) 109 | 110 | eao = {} 111 | for tag in tags: 112 | # prepare segments 113 | fweights = np.ones((fragment_num)) * np.nan 114 | fragments = np.ones((fragment_num, max_len)) * np.nan 115 | seg_counter = 0 116 | for name, traj_len, failures, overlaps in zip(video_names, gt_traj_length, 117 | all_failures, all_overlaps): 118 | if len(failures) > 0: 119 | points = [x+self.skipping for x in failures if 120 | x+self.skipping <= len(overlaps)] 121 | points.insert(0, 0) 122 | for i in range(len(points)): 123 | if i != len(points) - 1: 124 | fragment = np.array(overlaps[points[i]:points[i+1]+1]) 125 | fragments[seg_counter, :] = 0 126 | else: 127 | fragment = np.array(overlaps[points[i]:]) 128 | fragment[np.isnan(fragment)] = 0 129 | fragments[seg_counter, :len(fragment)] = fragment 130 | if i != len(points) - 1: 131 | tag_value = self.dataset[name].select_tag(tag, points[i], points[i+1]+1) 132 | w = sum(tag_value) / (points[i+1] - points[i]+1) 133 | fweights[seg_counter] = seq_weight * w 134 | else: 135 | tag_value = self.dataset[name].select_tag(tag, points[i], len(overlaps)) 136 | w = sum(tag_value) / (traj_len - points[i]+1e-16) 137 | fweights[seg_counter] = seq_weight * w 138 | seg_counter += 1 139 | else: 140 | # no failure 141 | max_idx = min(len(overlaps), max_len) 142 | fragments[seg_counter, :max_idx] = overlaps[:max_idx] 143 | tag_value = self.dataset[name].select_tag(tag, 0, max_idx) 144 | w = sum(tag_value) / max_idx 145 | fweights[seg_counter] = seq_weight * w 146 | seg_counter += 1 147 | 148 | expected_overlaps = calculate_expected_overlap(fragments, fweights) 149 | # caculate eao 150 | weight = np.zeros((len(expected_overlaps))) 151 | weight[self.low-1:self.high-1+1] = 1 152 | is_valid = np.logical_not(np.isnan(expected_overlaps)) 153 | eao_ = np.sum(expected_overlaps[is_valid] * weight[is_valid]) / np.sum(weight[is_valid]) 154 | eao[tag] = eao_ 155 | return eao 156 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/evaluation/f1_benchmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from glob import glob 5 | from tqdm import tqdm 6 | from colorama import Style, Fore 7 | 8 | from ..utils import determine_thresholds, calculate_accuracy, calculate_f1 9 | 10 | class F1Benchmark: 11 | def __init__(self, dataset): 12 | """ 13 | Args: 14 | result_path: 15 | """ 16 | self.dataset = dataset 17 | 18 | def eval(self, eval_trackers=None): 19 | """ 20 | Args: 21 | eval_tags: list of tag 22 | eval_trackers: list of tracker name 23 | Returns: 24 | eao: dict of results 25 | """ 26 | if eval_trackers is None: 27 | eval_trackers = self.dataset.tracker_names 28 | if isinstance(eval_trackers, str): 29 | eval_trackers = [eval_trackers] 30 | 31 | ret = {} 32 | for tracker_name in eval_trackers: 33 | precision, recall, f1 = self._cal_precision_reall(tracker_name) 34 | ret[tracker_name] = {"precision": precision, 35 | "recall": recall, 36 | "f1": f1 37 | } 38 | return ret 39 | 40 | def _cal_precision_reall(self, tracker_name): 41 | score = [] 42 | # for i in range(len(self.dataset)): 43 | # video = self.dataset[i] 44 | for video in self.dataset: 45 | if tracker_name not in video.confidence: 46 | score += video.load_tracker(self.dataset.tracker_path, tracker_name, False)[1] 47 | else: 48 | score += video.confidence[tracker_name] 49 | score = np.array(score) 50 | thresholds = determine_thresholds(score)[::-1] 51 | 52 | precision = {} 53 | recall = {} 54 | f1 = {} 55 | for i in range(len(self.dataset)): 56 | video = self.dataset[i] 57 | gt_traj = video.gt_traj 58 | N = sum([1 for x in gt_traj if len(x) > 1]) 59 | if tracker_name not in video.pred_trajs: 60 | tracker_traj, score = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 61 | else: 62 | tracker_traj = video.pred_trajs[tracker_name] 63 | score = video.confidence[tracker_name] 64 | overlaps = calculate_accuracy(tracker_traj, gt_traj, \ 65 | bound=(video.width,video.height))[1] 66 | f1[video.name], precision[video.name], recall[video.name] = \ 67 | calculate_f1(overlaps, score, (video.width,video.height),thresholds, N) 68 | return precision, recall, f1 69 | 70 | def show_result(self, result, show_video_level=False, helight_threshold=0.5): 71 | """pretty print result 72 | Args: 73 | result: returned dict from function eval 74 | """ 75 | # sort tracker according to f1 76 | sorted_tracker = {} 77 | for tracker_name, ret in result.items(): 78 | precision = np.mean(list(ret['precision'].values()), axis=0) 79 | recall = np.mean(list(ret['recall'].values()), axis=0) 80 | f1 = 2 * precision * recall / (precision + recall) 81 | max_idx = np.argmax(f1) 82 | sorted_tracker[tracker_name] = (precision[max_idx], recall[max_idx], 83 | f1[max_idx]) 84 | sorted_tracker_ = sorted(sorted_tracker.items(), 85 | key=lambda x:x[1][2], 86 | reverse=True)[:20] 87 | tracker_names = [x[0] for x in sorted_tracker_] 88 | 89 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 90 | header = "|{:^"+str(tracker_name_len)+"}|{:^11}|{:^8}|{:^7}|" 91 | header = header.format('Tracker Name', 92 | 'Precision', 'Recall', 'F1') 93 | bar = '-' * len(header) 94 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^11.3f}|{:^8.3f}|{:^7.3f}|" 95 | print(bar) 96 | print(header) 97 | print(bar) 98 | # for tracker_name, ret in result.items(): 99 | # precision = np.mean(list(ret['precision'].values()), axis=0) 100 | # recall = np.mean(list(ret['recall'].values()), axis=0) 101 | # f1 = 2 * precision * recall / (precision + recall) 102 | # max_idx = np.argmax(f1) 103 | for tracker_name in tracker_names: 104 | precision = sorted_tracker[tracker_name][0] 105 | recall = sorted_tracker[tracker_name][1] 106 | f1 = sorted_tracker[tracker_name][2] 107 | print(formatter.format(tracker_name, precision, recall, f1)) 108 | print(bar) 109 | 110 | if show_video_level and len(result) < 10: 111 | print('\n\n') 112 | header1 = "|{:^14}|".format("Tracker name") 113 | header2 = "|{:^14}|".format("Video name") 114 | for tracker_name in result.keys(): 115 | # col_len = max(20, len(tracker_name)) 116 | header1 += ("{:^28}|").format(tracker_name) 117 | header2 += "{:^11}|{:^8}|{:^7}|".format("Precision", "Recall", "F1") 118 | print('-'*len(header1)) 119 | print(header1) 120 | print('-'*len(header1)) 121 | print(header2) 122 | print('-'*len(header1)) 123 | videos = list(result[tracker_name]['precision'].keys()) 124 | for video in videos: 125 | row = "|{:^14}|".format(video) 126 | for tracker_name in result.keys(): 127 | precision = result[tracker_name]['precision'][video] 128 | recall = result[tracker_name]['recall'][video] 129 | f1 = result[tracker_name]['f1'][video] 130 | max_idx = np.argmax(f1) 131 | precision_str = "{:^11.3f}".format(precision[max_idx]) 132 | if precision[max_idx] < helight_threshold: 133 | row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|' 134 | else: 135 | row += precision_str+'|' 136 | recall_str = "{:^8.3f}".format(recall[max_idx]) 137 | if recall[max_idx] < helight_threshold: 138 | row += f'{Fore.RED}{recall_str}{Style.RESET_ALL}|' 139 | else: 140 | row += recall_str+'|' 141 | f1_str = "{:^7.3f}".format(f1[max_idx]) 142 | if f1[max_idx] < helight_threshold: 143 | row += f'{Fore.RED}{f1_str}{Style.RESET_ALL}|' 144 | else: 145 | row += f1_str+'|' 146 | print(row) 147 | print('-'*len(header1)) 148 | return {'f1': f1, 'precision': precision, 'recall': recall} 149 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | from . import region 10 | from .statistics import * 11 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/region.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/region.o -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/benchmark/bench_utils/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/c_region.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "src/region.h": 2 | ctypedef enum region_type "RegionType": 3 | EMTPY 4 | SPECIAL 5 | RECTANGEL 6 | POLYGON 7 | MASK 8 | 9 | ctypedef struct region_bounds: 10 | float top 11 | float bottom 12 | float left 13 | float right 14 | 15 | ctypedef struct region_rectangle: 16 | float x 17 | float y 18 | float width 19 | float height 20 | 21 | # ctypedef struct region_mask: 22 | # int x 23 | # int y 24 | # int width 25 | # int height 26 | # char *data 27 | 28 | ctypedef struct region_polygon: 29 | int count 30 | float *x 31 | float *y 32 | 33 | ctypedef union region_container_data: 34 | region_rectangle rectangle 35 | region_polygon polygon 36 | # region_mask mask 37 | int special 38 | 39 | ctypedef struct region_container: 40 | region_type type 41 | region_container_data data 42 | 43 | # ctypedef struct region_overlap: 44 | # float overlap 45 | # float only1 46 | # float only2 47 | 48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds) 49 | 50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds) 51 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/misc.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | import numpy as np 10 | 11 | def determine_thresholds(confidence, resolution=100): 12 | """choose threshold according to confidence 13 | 14 | Args: 15 | confidence: list or numpy array or numpy array 16 | reolution: number of threshold to choose 17 | 18 | Restures: 19 | threshold: numpy array 20 | """ 21 | if isinstance(confidence, list): 22 | confidence = np.array(confidence) 23 | confidence = confidence.flatten() 24 | confidence = confidence[~np.isnan(confidence)] 25 | confidence.sort() 26 | 27 | assert len(confidence) > resolution and resolution > 2 28 | 29 | thresholds = np.ones((resolution)) 30 | thresholds[0] = - np.inf 31 | thresholds[-1] = np.inf 32 | delta = np.floor(len(confidence) / (resolution - 2)) 33 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32) 34 | thresholds[1:-1] = confidence[idxs] 35 | return thresholds 36 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/benchmark/bench_utils/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/region.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | 10 | # distutils: sources = src/region.c 11 | # distutils: include_dirs = src/ 12 | 13 | from libc.stdlib cimport malloc, free 14 | from libc.stdio cimport sprintf 15 | from libc.string cimport strlen 16 | 17 | cimport c_region 18 | 19 | cpdef enum RegionType: 20 | EMTPY 21 | SPECIAL 22 | RECTANGEL 23 | POLYGON 24 | MASK 25 | 26 | cdef class RegionBounds: 27 | cdef c_region.region_bounds* _c_region_bounds 28 | 29 | def __cinit__(self): 30 | self._c_region_bounds = malloc( 31 | sizeof(c_region.region_bounds)) 32 | if not self._c_region_bounds: 33 | self._c_region_bounds = NULL 34 | raise MemoryError() 35 | 36 | def __init__(self, top, bottom, left, right): 37 | self.set(top, bottom, left, right) 38 | 39 | def __dealloc__(self): 40 | if self._c_region_bounds is not NULL: 41 | free(self._c_region_bounds) 42 | self._c_region_bounds = NULL 43 | 44 | def __str__(self): 45 | return "top: {:.3f} bottom: {:.3f} left: {:.3f} reight: {:.3f}".format( 46 | self._c_region_bounds.top, 47 | self._c_region_bounds.bottom, 48 | self._c_region_bounds.left, 49 | self._c_region_bounds.right) 50 | 51 | def get(self): 52 | return (self._c_region_bounds.top, 53 | self._c_region_bounds.bottom, 54 | self._c_region_bounds.left, 55 | self._c_region_bounds.right) 56 | 57 | def set(self, top, bottom, left, right): 58 | self._c_region_bounds.top = top 59 | self._c_region_bounds.bottom = bottom 60 | self._c_region_bounds.left = left 61 | self._c_region_bounds.right = right 62 | 63 | cdef class Rectangle: 64 | cdef c_region.region_rectangle* _c_region_rectangle 65 | 66 | def __cinit__(self): 67 | self._c_region_rectangle = malloc( 68 | sizeof(c_region.region_rectangle)) 69 | if not self._c_region_rectangle: 70 | self._c_region_rectangle = NULL 71 | raise MemoryError() 72 | 73 | def __init__(self, x, y, width, height): 74 | self.set(x, y, width, height) 75 | 76 | def __dealloc__(self): 77 | if self._c_region_rectangle is not NULL: 78 | free(self._c_region_rectangle) 79 | self._c_region_rectangle = NULL 80 | 81 | def __str__(self): 82 | return "x: {:.3f} y: {:.3f} width: {:.3f} height: {:.3f}".format( 83 | self._c_region_rectangle.x, 84 | self._c_region_rectangle.y, 85 | self._c_region_rectangle.width, 86 | self._c_region_rectangle.height) 87 | 88 | def set(self, x, y, width, height): 89 | self._c_region_rectangle.x = x 90 | self._c_region_rectangle.y = y 91 | self._c_region_rectangle.width = width 92 | self._c_region_rectangle.height = height 93 | 94 | def get(self): 95 | """ 96 | return: 97 | (x, y, width, height) 98 | """ 99 | return (self._c_region_rectangle.x, 100 | self._c_region_rectangle.y, 101 | self._c_region_rectangle.width, 102 | self._c_region_rectangle.height) 103 | 104 | cdef class Polygon: 105 | cdef c_region.region_polygon* _c_region_polygon 106 | 107 | def __cinit__(self, points): 108 | """ 109 | args: 110 | points: tuple of point 111 | points = ((1, 1), (10, 10)) 112 | """ 113 | num = len(points) // 2 114 | self._c_region_polygon = malloc( 115 | sizeof(c_region.region_polygon)) 116 | if not self._c_region_polygon: 117 | self._c_region_polygon = NULL 118 | raise MemoryError() 119 | self._c_region_polygon.count = num 120 | self._c_region_polygon.x = malloc(sizeof(float) * num) 121 | if not self._c_region_polygon.x: 122 | raise MemoryError() 123 | self._c_region_polygon.y = malloc(sizeof(float) * num) 124 | if not self._c_region_polygon.y: 125 | raise MemoryError() 126 | 127 | for i in range(num): 128 | self._c_region_polygon.x[i] = points[i*2] 129 | self._c_region_polygon.y[i] = points[i*2+1] 130 | 131 | def __dealloc__(self): 132 | if self._c_region_polygon is not NULL: 133 | if self._c_region_polygon.x is not NULL: 134 | free(self._c_region_polygon.x) 135 | self._c_region_polygon.x = NULL 136 | if self._c_region_polygon.y is not NULL: 137 | free(self._c_region_polygon.y) 138 | self._c_region_polygon.y = NULL 139 | free(self._c_region_polygon) 140 | self._c_region_polygon = NULL 141 | 142 | def __str__(self): 143 | ret = "" 144 | for i in range(self._c_region_polygon.count-1): 145 | ret += "({:.3f} {:.3f}) ".format(self._c_region_polygon.x[i], 146 | self._c_region_polygon.y[i]) 147 | ret += "({:.3f} {:.3f})".format(self._c_region_polygon.x[i], 148 | self._c_region_polygon.y[i]) 149 | return ret 150 | 151 | def vot_overlap(polygon1, polygon2, bounds=None): 152 | """ computing overlap between two polygon 153 | Args: 154 | polygon1: polygon tuple of points 155 | polygon2: polygon tuple of points 156 | bounds: tuple of (left, top, right, bottom) or tuple of (width height) 157 | Return: 158 | overlap: overlap between two polygons 159 | """ 160 | if len(polygon1) == 1 or len(polygon2) == 1: 161 | return float("nan") 162 | 163 | if len(polygon1) == 4: 164 | polygon1_ = Polygon([polygon1[0], polygon1[1], 165 | polygon1[0]+polygon1[2], polygon1[1], 166 | polygon1[0]+polygon1[2], polygon1[1]+polygon1[3], 167 | polygon1[0], polygon1[1]+polygon1[3]]) 168 | else: 169 | polygon1_ = Polygon(polygon1) 170 | 171 | if len(polygon2) == 4: 172 | polygon2_ = Polygon([polygon2[0], polygon2[1], 173 | polygon2[0]+polygon2[2], polygon2[1], 174 | polygon2[0]+polygon2[2], polygon2[1]+polygon2[3], 175 | polygon2[0], polygon2[1]+polygon2[3]]) 176 | else: 177 | polygon2_ = Polygon(polygon2) 178 | 179 | if bounds is not None and len(bounds) == 4: 180 | pno_bounds = RegionBounds(bounds[0], bounds[1], bounds[2], bounds[3]) 181 | elif bounds is not None and len(bounds) == 2: 182 | pno_bounds = RegionBounds(0, bounds[1], 0, bounds[0]) 183 | else: 184 | pno_bounds = RegionBounds(-float("inf"), float("inf"), 185 | -float("inf"), float("inf")) 186 | cdef float only1 = 0 187 | cdef float only2 = 0 188 | cdef c_region.region_polygon* c_polygon1 = polygon1_._c_region_polygon 189 | cdef c_region.region_polygon* c_polygon2 = polygon2_._c_region_polygon 190 | cdef c_region.region_bounds no_bounds = pno_bounds._c_region_bounds[0] # deference 191 | return c_region.compute_polygon_overlap(c_polygon1, 192 | c_polygon2, 193 | &only1, 194 | &only2, 195 | no_bounds) 196 | 197 | def vot_overlap_traj(polygons1, polygons2, bounds=None): 198 | """ computing overlap between two trajectory 199 | Args: 200 | polygons1: list of polygon 201 | polygons2: list of polygon 202 | bounds: tuple of (left, top, right, bottom) or tuple of (width height) 203 | Return: 204 | overlaps: overlaps between all pair of polygons 205 | """ 206 | assert len(polygons1) == len(polygons2) 207 | overlaps = [] 208 | for i in range(len(polygons1)): 209 | overlap = vot_overlap(polygons1[i], polygons2[i], bounds=bounds) 210 | overlaps.append(overlap) 211 | return overlaps 212 | 213 | 214 | def vot_float2str(template, float value): 215 | """ 216 | Args: 217 | tempate: like "%.3f" in C syntax 218 | value: float value 219 | """ 220 | cdef bytes ptemplate = template.encode() 221 | cdef const char* ctemplate = ptemplate 222 | cdef char* output = malloc(sizeof(char) * 100) 223 | if not output: 224 | raise MemoryError() 225 | sprintf(output, ctemplate, value) 226 | try: 227 | ret = output[:strlen(output)].decode() 228 | finally: 229 | free(output) 230 | return ret 231 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | from distutils.core import setup 10 | from distutils.extension import Extension 11 | from Cython.Build import cythonize 12 | 13 | setup( 14 | ext_modules = cythonize([Extension("region", ["region.pyx", "src/region.c"])]), 15 | ) 16 | 17 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/src/buffer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __STRING_BUFFER_H 3 | #define __STRING_BUFFER_H 4 | 5 | // Enable MinGW secure API for _snprintf_s 6 | #define MINGW_HAS_SECURE_API 1 7 | 8 | #ifdef _MSC_VER 9 | #define __INLINE __inline 10 | #else 11 | #define __INLINE inline 12 | #endif 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | typedef struct string_buffer { 19 | char* buffer; 20 | int position; 21 | int size; 22 | } string_buffer; 23 | 24 | typedef struct string_list { 25 | char** buffer; 26 | int position; 27 | int size; 28 | } string_list; 29 | 30 | #define BUFFER_INCREMENT_STEP 4096 31 | 32 | static __INLINE string_buffer* buffer_create(int L) { 33 | string_buffer* B = (string_buffer*) malloc(sizeof(string_buffer)); 34 | B->size = L; 35 | B->buffer = (char*) malloc(sizeof(char) * B->size); 36 | B->position = 0; 37 | return B; 38 | } 39 | 40 | static __INLINE void buffer_reset(string_buffer* B) { 41 | B->position = 0; 42 | } 43 | 44 | static __INLINE void buffer_destroy(string_buffer** B) { 45 | if (!(*B)) return; 46 | if ((*B)->buffer) { 47 | free((*B)->buffer); 48 | (*B)->buffer = NULL; 49 | } 50 | free((*B)); 51 | (*B) = NULL; 52 | } 53 | 54 | static __INLINE char* buffer_extract(const string_buffer* B) { 55 | char *S = (char*) malloc(sizeof(char) * (B->position + 1)); 56 | memcpy(S, B->buffer, B->position); 57 | S[B->position] = '\0'; 58 | return S; 59 | } 60 | 61 | static __INLINE int buffer_size(const string_buffer* B) { 62 | return B->position; 63 | } 64 | 65 | static __INLINE void buffer_push(string_buffer* B, char C) { 66 | int required = 1; 67 | if (required > B->size - B->position) { 68 | B->size = B->position + BUFFER_INCREMENT_STEP; 69 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 70 | } 71 | B->buffer[B->position] = C; 72 | B->position += required; 73 | } 74 | 75 | static __INLINE void buffer_append(string_buffer* B, const char *format, ...) { 76 | 77 | int required; 78 | va_list args; 79 | 80 | #if defined(__OS2__) || defined(__WINDOWS__) || defined(WIN32) || defined(_MSC_VER) 81 | 82 | va_start(args, format); 83 | required = _vscprintf(format, args) + 1; 84 | va_end(args); 85 | if (required >= B->size - B->position) { 86 | B->size = B->position + required + 1; 87 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 88 | } 89 | va_start(args, format); 90 | required = _vsnprintf_s(&(B->buffer[B->position]), B->size - B->position, _TRUNCATE, format, args); 91 | va_end(args); 92 | B->position += required; 93 | 94 | #else 95 | va_start(args, format); 96 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args); 97 | va_end(args); 98 | if (required >= B->size - B->position) { 99 | B->size = B->position + required + 1; 100 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 101 | va_start(args, format); 102 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args); 103 | va_end(args); 104 | } 105 | B->position += required; 106 | #endif 107 | 108 | } 109 | 110 | static __INLINE string_list* list_create(int L) { 111 | string_list* B = (string_list*) malloc(sizeof(string_list)); 112 | B->size = L; 113 | B->buffer = (char**) malloc(sizeof(char*) * B->size); 114 | memset(B->buffer, 0, sizeof(char*) * B->size); 115 | B->position = 0; 116 | return B; 117 | } 118 | 119 | static __INLINE void list_reset(string_list* B) { 120 | int i; 121 | for (i = 0; i < B->position; i++) { 122 | if (B->buffer[i]) free(B->buffer[i]); 123 | B->buffer[i] = NULL; 124 | } 125 | B->position = 0; 126 | } 127 | 128 | static __INLINE void list_destroy(string_list **B) { 129 | int i; 130 | 131 | if (!(*B)) return; 132 | 133 | for (i = 0; i < (*B)->position; i++) { 134 | if ((*B)->buffer[i]) free((*B)->buffer[i]); (*B)->buffer[i] = NULL; 135 | } 136 | 137 | if ((*B)->buffer) { 138 | free((*B)->buffer); (*B)->buffer = NULL; 139 | } 140 | 141 | free((*B)); 142 | (*B) = NULL; 143 | } 144 | 145 | static __INLINE char* list_get(const string_list *B, int I) { 146 | if (I < 0 || I >= B->position) { 147 | return NULL; 148 | } else { 149 | if (!B->buffer[I]) { 150 | return NULL; 151 | } else { 152 | char *S; 153 | int length = strlen(B->buffer[I]); 154 | S = (char*) malloc(sizeof(char) * (length + 1)); 155 | memcpy(S, B->buffer[I], length + 1); 156 | return S; 157 | } 158 | } 159 | } 160 | 161 | static __INLINE int list_size(const string_list *B) { 162 | return B->position; 163 | } 164 | 165 | static __INLINE void list_append(string_list *B, char* S) { 166 | int required = 1; 167 | int length = strlen(S); 168 | if (required > B->size - B->position) { 169 | B->size = B->position + 16; 170 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size); 171 | } 172 | B->buffer[B->position] = (char*) malloc(sizeof(char) * (length + 1)); 173 | memcpy(B->buffer[B->position], S, length + 1); 174 | B->position += required; 175 | } 176 | 177 | // This version of the append does not copy the string but simply takes the control of its allocation 178 | static __INLINE void list_append_direct(string_list *B, char* S) { 179 | int required = 1; 180 | // int length = strlen(S); 181 | if (required > B->size - B->position) { 182 | B->size = B->position + 16; 183 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size); 184 | } 185 | B->buffer[B->position] = S; 186 | B->position += required; 187 | } 188 | 189 | 190 | #endif 191 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/src/region.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */ 2 | 3 | #ifndef _REGION_H_ 4 | #define _REGION_H_ 5 | 6 | #ifdef TRAX_STATIC_DEFINE 7 | # define __TRAX_EXPORT 8 | #else 9 | # ifndef __TRAX_EXPORT 10 | # if defined(_MSC_VER) 11 | # ifdef trax_EXPORTS 12 | /* We are building this library */ 13 | # define __TRAX_EXPORT __declspec(dllexport) 14 | # else 15 | /* We are using this library */ 16 | # define __TRAX_EXPORT __declspec(dllimport) 17 | # endif 18 | # elif defined(__GNUC__) 19 | # ifdef trax_EXPORTS 20 | /* We are building this library */ 21 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 22 | # else 23 | /* We are using this library */ 24 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 25 | # endif 26 | # endif 27 | # endif 28 | #endif 29 | 30 | #ifndef MAX 31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b)) 32 | #endif 33 | 34 | #ifndef MIN 35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b)) 36 | #endif 37 | 38 | #define TRAX_DEFAULT_CODE 0 39 | 40 | #define REGION_LEGACY_RASTERIZATION 1 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type; 47 | 48 | typedef struct region_bounds { 49 | 50 | float top; 51 | float bottom; 52 | float left; 53 | float right; 54 | 55 | } region_bounds; 56 | 57 | typedef struct region_polygon { 58 | 59 | int count; 60 | 61 | float* x; 62 | float* y; 63 | 64 | } region_polygon; 65 | 66 | typedef struct region_mask { 67 | 68 | int x; 69 | int y; 70 | 71 | int width; 72 | int height; 73 | 74 | char* data; 75 | 76 | } region_mask; 77 | 78 | typedef struct region_rectangle { 79 | 80 | float x; 81 | float y; 82 | float width; 83 | float height; 84 | 85 | } region_rectangle; 86 | 87 | typedef struct region_container { 88 | enum region_type type; 89 | union { 90 | region_rectangle rectangle; 91 | region_polygon polygon; 92 | region_mask mask; 93 | int special; 94 | } data; 95 | } region_container; 96 | 97 | typedef struct region_overlap { 98 | 99 | float overlap; 100 | float only1; 101 | float only2; 102 | 103 | } region_overlap; 104 | 105 | extern const region_bounds region_no_bounds; 106 | 107 | __TRAX_EXPORT int region_set_flags(int mask); 108 | 109 | __TRAX_EXPORT int region_clear_flags(int mask); 110 | 111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds); 112 | 113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds); 114 | 115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom); 116 | 117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region); 118 | 119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region); 120 | 121 | __TRAX_EXPORT char* region_string(region_container* region); 122 | 123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region); 124 | 125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type); 126 | 127 | __TRAX_EXPORT void region_release(region_container** region); 128 | 129 | __TRAX_EXPORT region_container* region_create_special(int code); 130 | 131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height); 132 | 133 | __TRAX_EXPORT region_container* region_create_polygon(int count); 134 | 135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y); 136 | 137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height); 138 | 139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height); 140 | 141 | #ifdef __cplusplus 142 | } 143 | #endif 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /DeepMTA_code/benchmark/bench_utils/pysot/utils/statistics.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Python Single Object Tracking Evaluation 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Fangyi Zhang 5 | # @author fangyi.zhang@vipl.ict.ac.cn 6 | # @project https://github.com/StrangerZhang/pysot-toolkit.git 7 | # Revised for SiamMask by foolwood 8 | # -------------------------------------------------------- 9 | 10 | import numpy as np 11 | from numba import jit 12 | from . import region 13 | 14 | def calculate_failures(trajectory): 15 | """ Calculate number of failures 16 | Args: 17 | trajectory: list of bbox 18 | Returns: 19 | num_failures: number of failures 20 | failures: failures point in trajectory, start with 0 21 | """ 22 | failures = [i for i, x in zip(range(len(trajectory)), trajectory) 23 | if len(x) == 1 and x[0] == 2] 24 | num_failures = len(failures) 25 | return num_failures, failures 26 | 27 | def calculate_accuracy(pred_trajectory, gt_trajectory, 28 | burnin=0, ignore_unknown=True, bound=None): 29 | """Caculate accuracy socre as average overlap over the entire sequence 30 | Args: 31 | trajectory: list of bbox 32 | gt_trajectory: list of bbox 33 | burnin: number of frames that have to be ignored after the failure 34 | ignore_unknown: ignore frames where the overlap is unknown 35 | bound: bounding region 36 | Return: 37 | acc: average overlap 38 | overlaps: per frame overlaps 39 | """ 40 | pred_trajectory_ = pred_trajectory 41 | if not ignore_unknown: 42 | unkown = [len(x)==1 and x[0] == 0 for x in pred_trajectory] 43 | 44 | if burnin > 0: 45 | pred_trajectory_ = pred_trajectory[:] 46 | mask = [len(x)==1 and x[0] == 1 for x in pred_trajectory] 47 | for i in range(len(mask)): 48 | if mask[i]: 49 | for j in range(burnin): 50 | if i + j < len(mask): 51 | pred_trajectory_[i+j] = [0] 52 | min_len = min(len(pred_trajectory_), len(gt_trajectory)) 53 | overlaps = region.vot_overlap_traj(pred_trajectory_[:min_len], 54 | gt_trajectory[:min_len], bound) 55 | 56 | if not ignore_unknown: 57 | overlaps = [u if u else 0 for u in unkown] 58 | 59 | acc = 0 60 | if len(overlaps) > 0: 61 | acc = np.nanmean(overlaps) 62 | return acc, overlaps 63 | 64 | @jit(nopython=True) 65 | def overlap_ratio(rect1, rect2): 66 | '''Compute overlap ratio between two rects 67 | Args 68 | rect:2d array of N x [x,y,w,h] 69 | Return: 70 | iou 71 | ''' 72 | # if rect1.ndim==1: 73 | # rect1 = rect1[np.newaxis, :] 74 | # if rect2.ndim==1: 75 | # rect2 = rect2[np.newaxis, :] 76 | left = np.maximum(rect1[:,0], rect2[:,0]) 77 | right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2]) 78 | top = np.maximum(rect1[:,1], rect2[:,1]) 79 | bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3]) 80 | 81 | intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top) 82 | union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect 83 | iou = intersect / union 84 | iou = np.maximum(np.minimum(1, iou), 0) 85 | return iou 86 | 87 | @jit(nopython=True) 88 | def success_overlap(gt_bb, result_bb, n_frame): 89 | thresholds_overlap = np.arange(0, 1.05, 0.05) 90 | success = np.zeros(len(thresholds_overlap)) 91 | iou = np.ones(len(gt_bb)) * (-1) 92 | mask = np.sum(gt_bb > 0, axis=1) == 4 93 | iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask]) 94 | for i in range(len(thresholds_overlap)): 95 | success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame) 96 | return success 97 | 98 | @jit(nopython=True) 99 | def success_error(gt_center, result_center, thresholds, n_frame): 100 | # n_frame = len(gt_center) 101 | success = np.zeros(len(thresholds)) 102 | dist = np.ones(len(gt_center)) * (-1) 103 | mask = np.sum(gt_center > 0, axis=1) == 2 104 | dist[mask] = np.sqrt(np.sum( 105 | np.power(gt_center[mask] - result_center[mask], 2), axis=1)) 106 | for i in range(len(thresholds)): 107 | success[i] = np.sum(dist <= thresholds[i]) / float(n_frame) 108 | return success 109 | 110 | @jit(nopython=True) 111 | def determine_thresholds(scores, resolution=100): 112 | """ 113 | Args: 114 | scores: 1d array of score 115 | """ 116 | scores = np.sort(scores[np.logical_not(np.isnan(scores))]) 117 | delta = np.floor(len(scores) / (resolution - 2)) 118 | idxs = np.floor(np.linspace(delta-1, len(scores)-delta, resolution-2)+0.5).astype(np.int32) 119 | thresholds = np.zeros((resolution)) 120 | thresholds[0] = - np.inf 121 | thresholds[-1] = np.inf 122 | thresholds[1:-1] = scores[idxs] 123 | return thresholds 124 | 125 | @jit(nopython=True) 126 | def calculate_f1(overlaps, score, bound, thresholds, N): 127 | overlaps = np.array(overlaps) 128 | overlaps[np.isnan(overlaps)] = 0 129 | score = np.array(score) 130 | score[np.isnan(score)] = 0 131 | precision = np.zeros(len(thresholds)) 132 | recall = np.zeros(len(thresholds)) 133 | for i, th in enumerate(thresholds): 134 | if th == - np.inf: 135 | idx = score > 0 136 | else: 137 | idx = score >= th 138 | if np.sum(idx) == 0: 139 | precision[i] = 1 140 | recall[i] = 0 141 | else: 142 | precision[i] = np.mean(overlaps[idx]) 143 | recall[i] = np.sum(overlaps[idx]) / N 144 | f1 = 2 * precision * recall / (precision + recall) 145 | return f1, precision, recall 146 | 147 | @jit(nopython=True) 148 | def calculate_expected_overlap(fragments, fweights): 149 | max_len = fragments.shape[1] 150 | expected_overlaps = np.zeros((max_len), np.float32) 151 | expected_overlaps[0] = 1 152 | 153 | # TODO Speed Up 154 | for i in range(1, max_len): 155 | mask = np.logical_not(np.isnan(fragments[:, i])) 156 | if np.any(mask): 157 | fragment = fragments[mask, 1:i+1] 158 | seq_mean = np.sum(fragment, 1) / fragment.shape[1] 159 | expected_overlaps[i] = np.sum(seq_mean * 160 | fweights[mask]) / np.sum(fweights[mask]) 161 | return expected_overlaps 162 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamFC/OTB2015_THOR_dynamic.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "lr": 0.641662, 4 | "penalty_k": 0.982769, 5 | "window_influence": 0.199673 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 5, 10 | "iou_tresh": 0.938244, 11 | "lb": 0.790933, 12 | "tukey_alpha": 0.232146, 13 | "lb_type": "dynamic", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamFC/OTB2015_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "lr": 0.641662, 4 | "penalty_k": 0.982769, 5 | "window_influence": 0.199673 6 | }, 7 | "THOR": { 8 | "K_st": 2, 9 | "K_lt": 9, 10 | "iou_tresh": 0.959911, 11 | "lb": 0.362411, 12 | "tukey_alpha": 0.859503, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamFC/OTB2015_vanilla.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "lr": 0.641662, 4 | "penalty_k": 0.982769, 5 | "window_influence": 0.199673 6 | }, 7 | "THOR": { 8 | "K_st": 0, 9 | "K_lt": 1, 10 | "iou_tresh": 1.0, 11 | "lb": 1.0, 12 | "tukey_alpha": 0, 13 | "dilation": 1000, 14 | "lb_type": "static", 15 | "modulate": false, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": true 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamFC/VOT2018_THOR_dynamic.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "lr": 0.641662, 4 | "penalty_k": 0.982769, 5 | "window_influence": 0.199673 6 | }, 7 | "THOR": { 8 | "K_st": 7, 9 | "K_lt": 10, 10 | "iou_tresh": 0.731403, 11 | "lb": 0.814394, 12 | "tukey_alpha": 0.919536, 13 | "lb_type": "dynamic", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamFC/VOT2018_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "lr": 0.641662, 4 | "penalty_k": 0.982769, 5 | "window_influence": 0.199673 6 | }, 7 | "THOR": { 8 | "K_st": 7, 9 | "K_lt": 6, 10 | "iou_tresh": 0.753538, 11 | "lb": 0.247764, 12 | "tukey_alpha": 0.462796, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamFC/VOT2018_vanilla.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "lr": 0.641662, 4 | "penalty_k": 0.982769, 5 | "window_influence": 0.199673 6 | }, 7 | "THOR": { 8 | "K_st": 0, 9 | "K_lt": 1, 10 | "iou_tresh": 1.0, 11 | "lb": 1.0, 12 | "tukey_alpha": 0, 13 | "dilation": 1000, 14 | "lb_type": "static", 15 | "modulate": false, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": true 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamMask/OTB2015_THOR_dynamic.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "window_influence": 0.42, 4 | "instance_size": 255, 5 | "base_size": 8, 6 | "out_size": 127, 7 | "seg_thr": 0.3, 8 | "penalty_k": 0.908068, 9 | "lr": 0.272863 10 | }, 11 | "anchors":{ 12 | "stride": 8, 13 | "ratios": [0.33, 0.5, 1, 2, 3], 14 | "scales": [8], 15 | "round_dight": 0 16 | }, 17 | "THOR": { 18 | "K_st": 3, 19 | "K_lt": 3, 20 | "iou_tresh": 0.992637, 21 | "lb": 0.832829, 22 | "tukey_alpha": 0.310958, 23 | "lb_type": "dynamic", 24 | "modulate": true, 25 | "dilation": 10, 26 | "context_temp": 0.5, 27 | "viz": false, 28 | "verbose": false, 29 | "vanilla": false 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamMask/OTB2015_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "window_influence": 0.42, 4 | "instance_size": 255, 5 | "base_size": 8, 6 | "out_size": 127, 7 | "seg_thr": 0.3, 8 | "penalty_k": 0.53364, 9 | "lr": 0.176627 10 | }, 11 | "anchors":{ 12 | "stride": 8, 13 | "ratios": [0.33, 0.5, 1, 2, 3], 14 | "scales": [8], 15 | "round_dight": 0 16 | }, 17 | "THOR": { 18 | "K_st": 4, 19 | "K_lt": 7, 20 | "iou_tresh": 0.985634, 21 | "lb": 0.397495, 22 | "tukey_alpha": 0.450157, 23 | "lb_type": "ensemble", 24 | "modulate": true, 25 | "dilation": 10, 26 | "context_temp": 0.5, 27 | "viz": false, 28 | "verbose": false, 29 | "vanilla": false 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamMask/OTB2015_vanilla.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "window_influence": 0.42, 4 | "instance_size": 255, 5 | "base_size": 8, 6 | "out_size": 127, 7 | "seg_thr": 0.3, 8 | "penalty_k": 0.04, 9 | "lr": 0.25 10 | }, 11 | "anchors":{ 12 | "stride": 8, 13 | "ratios": [0.33, 0.5, 1, 2, 3], 14 | "scales": [8], 15 | "round_dight": 0 16 | }, 17 | "THOR": { 18 | "K_st": 0, 19 | "K_lt": 1, 20 | "iou_tresh": 1.0, 21 | "lb": 1.0, 22 | "tukey_alpha": 0, 23 | "dilation": 1000, 24 | "lb_type": "static", 25 | "modulate": false, 26 | "context_temp": 0.5, 27 | "viz": false, 28 | "verbose": false, 29 | "vanilla": true 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamMask/VOT2018_THOR_dynamic.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "window_influence": 0.42, 4 | "instance_size": 255, 5 | "base_size": 8, 6 | "out_size": 127, 7 | "seg_thr": 0.3, 8 | "penalty_k": 0.087875, 9 | "lr": 0.651924 10 | }, 11 | "anchors":{ 12 | "stride": 8, 13 | "ratios": [0.33, 0.5, 1, 2, 3], 14 | "scales": [8], 15 | "round_dight": 0 16 | }, 17 | "THOR": { 18 | "K_st": 5, 19 | "K_lt": 5, 20 | "iou_tresh": 0.772009, 21 | "lb": 0.789802, 22 | "tukey_alpha": 0.981623, 23 | "lb_type": "dynamic", 24 | "modulate": true, 25 | "dilation": 10, 26 | "context_temp": 0.5, 27 | "viz": false, 28 | "verbose": false, 29 | "vanilla": false 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamMask/VOT2018_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "window_influence": 0.42, 4 | "instance_size": 255, 5 | "base_size": 8, 6 | "out_size": 127, 7 | "seg_thr": 0.3, 8 | "penalty_k": 0.04, 9 | "lr": 0.25 10 | }, 11 | "anchors":{ 12 | "stride": 8, 13 | "ratios": [0.33, 0.5, 1, 2, 3], 14 | "scales": [8], 15 | "round_dight": 0 16 | }, 17 | "THOR": { 18 | "K_st": 6, 19 | "K_lt": 3, 20 | "iou_tresh": 0.742568, 21 | "lb": 0.27996, 22 | "tukey_alpha": 0.697998, 23 | "lb_type": "ensemble", 24 | "modulate": true, 25 | "dilation": 10, 26 | "context_temp": 0.5, 27 | "viz": false, 28 | "verbose": false, 29 | "vanilla": false 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamMask/VOT2018_vanilla.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "window_influence": 0.42, 4 | "instance_size": 255, 5 | "base_size": 8, 6 | "out_size": 127, 7 | "seg_thr": 0.3, 8 | "penalty_k": 0.04, 9 | "lr": 0.25 10 | }, 11 | "anchors":{ 12 | "stride": 8, 13 | "ratios": [0.33, 0.5, 1, 2, 3], 14 | "scales": [8], 15 | "round_dight": 0 16 | }, 17 | "THOR": { 18 | "K_st": 0, 19 | "K_lt": 1, 20 | "iou_tresh": 1.0, 21 | "lb": 1.0, 22 | "tukey_alpha": 0, 23 | "dilation": 1000, 24 | "lb_type": "static", 25 | "modulate": false, 26 | "context_temp": 0.5, 27 | "viz": false, 28 | "verbose": false, 29 | "vanilla": true 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/GOT10k_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.692764, 4 | "window_influence": 0.357794, 5 | "lr": 0.73434 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 7, 10 | "iou_tresh": 1.0, 11 | "lb": 0.281661, 12 | "tukey_alpha": 0.567957, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/LaSOT_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.692764, 4 | "window_influence": 0.357794, 5 | "lr": 0.73434 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 7, 10 | "iou_tresh": 1.0, 11 | "lb": 0.281661, 12 | "tukey_alpha": 0.567957, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/OTB2015_THOR_dynamic.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.385994, 4 | "window_influence": 0.357794, 5 | "lr": 0.38457 6 | }, 7 | "THOR": { 8 | "K_st": 7, 9 | "K_lt": 7, 10 | "iou_tresh": 0.980916, 11 | "lb": 0.883665, 12 | "tukey_alpha": 0.0, 13 | "lb_type": "dynamic", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/OTB2015_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.692764, 4 | "window_influence": 0.357794, 5 | "lr": 0.73434 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 7, 10 | "iou_tresh": 1.0, 11 | "lb": 0.281661, 12 | "tukey_alpha": 0.567957, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/OTB2015_vanilla.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.055, 4 | "window_influence": 0.42, 5 | "lr": 0.295 6 | }, 7 | "THOR": { 8 | "K_st": 0, 9 | "K_lt": 1, 10 | "iou_tresh": 1.0, 11 | "lb": 1.0, 12 | "tukey_alpha": 0, 13 | "dilation": 1000, 14 | "lb_type": "static", 15 | "modulate": false, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": true 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/OXUVA_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.692764, 4 | "window_influence": 0.357794, 5 | "lr": 0.73434 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 7, 10 | "iou_tresh": 1.0, 11 | "lb": 0.281661, 12 | "tukey_alpha": 0.567957, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/UAV123_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.692764, 4 | "window_influence": 0.357794, 5 | "lr": 0.73434 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 7, 10 | "iou_tresh": 1.0, 11 | "lb": 0.281661, 12 | "tukey_alpha": 0.567957, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/UAV20L_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.692764, 4 | "window_influence": 0.357794, 5 | "lr": 0.73434 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 7, 10 | "iou_tresh": 1.0, 11 | "lb": 0.281661, 12 | "tukey_alpha": 0.567957, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/VOT2018_THOR_dynamic.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.0595839, 4 | "window_influence": 0.357794, 5 | "lr": 0.597262 6 | }, 7 | "THOR": { 8 | "K_st": 3, 9 | "K_lt": 4, 10 | "iou_tresh": 0.943046, 11 | "lb": 0.759742, 12 | "tukey_alpha": 0.451395, 13 | "lb_type": "dynamic", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/VOT2018_THOR_ensemble.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.0584502, 4 | "window_influence": 0.357794, 5 | "lr": 0.353687 6 | }, 7 | "THOR": { 8 | "K_st": 9, 9 | "K_lt": 11, 10 | "iou_tresh": 0.875719, 11 | "lb": 0.394676, 12 | "tukey_alpha": 0.374259, 13 | "lb_type": "ensemble", 14 | "modulate": true, 15 | "dilation": 10, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": false 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/configs/SiamRPN/VOT2018_vanilla.json: -------------------------------------------------------------------------------- 1 | { 2 | "tracker": { 3 | "penalty_k": 0.055, 4 | "window_influence": 0.42, 5 | "lr": 0.295 6 | }, 7 | "THOR": { 8 | "K_st": 0, 9 | "K_lt": 1, 10 | "iou_tresh": 1.0, 11 | "lb": 1.0, 12 | "tukey_alpha": 0, 13 | "dilation": 1000, 14 | "lb_type": "static", 15 | "modulate": false, 16 | "context_temp": 0.5, 17 | "viz": false, 18 | "verbose": false, 19 | "vanilla": true 20 | } 21 | } 22 | 23 | -------------------------------------------------------------------------------- /DeepMTA_code/data/download_links_for_tracking_datasets.txt: -------------------------------------------------------------------------------- 1 | Popular Tracking datasets: 2 | UAV123: https://pan.baidu.com/s/1AhNnfjF4fZe14sUFefU3iA password: 2iq4 3 | 4 | VOT2018: https://pan.baidu.com/s/1MOWZ5lcxfF0wsgSuj5g4Yw password: e5eh 5 | 6 | VisDrone2019: https://pan.baidu.com/s/1Y6ubKHuYX65mK_iDVSfKPQ password: yxb6 7 | 8 | OTB2015: https://pan.baidu.com/s/1ZjKgRMYSHfR_w3Z7iQEkYA password: t5i1 9 | 10 | DTB70: https://pan.baidu.com/s/1kfHrArw0aVhGPSM91WHomw password: e7qm 11 | 12 | TLP50 (Long-Term): https://amoudgl.github.io/tlp/ 13 | 14 | ILSVRC2015 VID: https://pan.baidu.com/s/1CXWgpAG4CYpk-WnaUY5mAQ password: uqzj 15 | 16 | NFS: https://pan.baidu.com/s/1ei54oKNA05iBkoUwXPOB7g password: vng1 17 | 18 | GOT10k: https://pan.baidu.com/s/172oiQPA_Ky2iujcW5Irlow password: uxds 19 | 20 | UAVDT: https://pan.baidu.com/s/1K8oo53mPYCxUFVMXIGLhVA password: keva 21 | 22 | YTB-VOS: https://pan.baidu.com/s/1WMB0q9GJson75QBFVfeH5A password: sf1m 23 | 24 | YTB-Crop511 (used in siamrpn++ and siammask): https://pan.baidu.com/s/112zLS_02-Z2ouKGbnPlTjw password: ebq1 25 | 26 | TColor128: https://pan.baidu.com/s/1v4J6zWqZwj8fHi5eo5EJvQ password: 26d4 27 | 28 | DAVIS2017: https://pan.baidu.com/s/1JTsumpnkWotEJQE7KQmh6A password: c9qp 29 | 30 | YTB&VID (used in siamrpn): https://pan.baidu.com/s/1gF8PSZDzw-7EAVrdYHQwsA password: 6vkz 31 | 32 | TrackingNet: https://pan.baidu.com/s/1PXSRAqcw-KMfBIJYUtI4Aw code: nkb9 (Note that this link is provided by SiamFC++ author) 33 | 34 | TAO: A Large-Scale Benchmark for Tracking Any Object: https://github.com/TAO-Dataset/tao 35 | 36 | vot 2018 and vot 2019:   链接: https://pan.baidu.com/s/1q6lv3cUhezBb5pmdj3BRGw 提取码: d7r3 37 | 38 | vot 2018 LT:       链接: https://pan.baidu.com/s/16Q4_sxhBjmddIHU8b7XK3w 提取码: 67xf 39 | 40 | vot 2019 LT:       链接:https://pan.baidu.com/s/1z9HBPNprbt2gb2RGzRJkwA 提取码:7yq5 41 | 42 | vot 2019 rgb-thermal:   链接: https://pan.baidu.com/s/1oT8qFmKBpYa3VlXP1ZwfCA 提取码: mn1b 43 | 44 | -------------------------------------------------------------------------------- /DeepMTA_code/data/get_test_otb2015_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # OTB2015 4 | mkdir OTB2015 && cd OTB2015 5 | baseurl="http://cvlab.hanyang.ac.kr/tracker_benchmark" 6 | wget "$baseurl/datasets.html" 7 | cat datasets.html | grep '\.zip' | sed -e 's/\.zip".*/.zip/' | sed -e s'/.*"//' >files.txt 8 | cat files.txt | xargs -n 1 -P 8 -I {} wget -c "$baseurl/{}" 9 | ls *.zip | xargs -n 1 unzip 10 | rm -r __MACOSX/ 11 | cd .. 12 | -------------------------------------------------------------------------------- /DeepMTA_code/environment.yml: -------------------------------------------------------------------------------- 1 | name: deepmta 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - blas=1.0=mkl 7 | - ca-certificates=2019.1.23=0 8 | - certifi=2019.3.9=py37_0 9 | - cffi=1.12.3=py37h2e261b9_0 10 | - cudatoolkit=10.0.130=0 11 | - freetype=2.9.1=h8a8886c_1 12 | - intel-openmp=2019.4=243 13 | - jpeg=9b=h024ee3a_2 14 | - libedit=3.1.20181209=hc058e9b_0 15 | - libffi=3.2.1=hd88cf55_4 16 | - libgcc-ng=8.2.0=hdf63c60_1 17 | - libgfortran-ng=7.3.0=hdf63c60_0 18 | - libpng=1.6.37=hbc83047_0 19 | - libstdcxx-ng=8.2.0=hdf63c60_1 20 | - libtiff=4.0.10=h2733197_2 21 | - mkl=2019.4=243 22 | - mkl_fft=1.0.12=py37ha843d7b_0 23 | - mkl_random=1.0.2=py37hd81dba3_0 24 | - ncurses=6.1=he6710b0_1 25 | - ninja=1.9.0=py37hfd86e86_0 26 | - numpy=1.16.4=py37h7e9f1db_0 27 | - numpy-base=1.16.4=py37hde5b4d6_0 28 | - olefile=0.46=py37_0 29 | - openssl=1.1.1c=h7b6447c_1 30 | - pillow=6.0.0=py37h34e0f95_0 31 | - pip=19.1.1=py37_0 32 | - pycparser=2.19=py37_0 33 | - python=3.7.3=h0371630_0 34 | - pytorch=1.1.0=py3.7_cuda10.0.130_cudnn7.5.1_0 35 | - readline=7.0=h7b6447c_5 36 | - setuptools=41.0.1=py37_0 37 | - six=1.12.0=py37_0 38 | - sqlite=3.28.0=h7b6447c_0 39 | - tk=8.6.8=hbc83047_0 40 | - torchvision=0.3.0=py37_cu10.0.130_1 41 | - wheel=0.33.4=py37_0 42 | - xz=5.2.4=h14c3975_4 43 | - zlib=1.2.11=h7b6447c_3 44 | - zstd=1.3.7=h0b5b093_0 45 | - pip: 46 | - backcall==0.1.0 47 | - colorama==0.4.1 48 | - cycler==0.10.0 49 | - cython==0.29.10 50 | - decorator==4.4.0 51 | - fire==0.1.3 52 | - got10k==0.1.3 53 | - imutils==0.5.3 54 | - ipdb==0.12 55 | - ipython==7.5.0 56 | - ipython-genutils==0.2.0 57 | - jedi==0.13.3 58 | - kiwisolver==1.1.0 59 | - llvmlite==0.29.0 60 | - matplotlib==3.1.0 61 | - numba==0.44.0 62 | - opencv-python==4.1.0.25 63 | - pandas==0.24.2 64 | - parso==0.4.0 65 | - pexpect==4.7.0 66 | - pickleshare==0.7.5 67 | - prompt-toolkit==2.0.9 68 | - ptyprocess==0.6.0 69 | - pygments==2.4.2 70 | - pyparsing==2.4.0 71 | - python-dateutil==2.8.0 72 | - pytz==2019.1 73 | - scipy==1.3.0 74 | - shapely==1.6.4.post2 75 | - tqdm==4.32.1 76 | - traitlets==4.3.2 77 | - wcwidth==0.1.7 78 | - wget==3.2 79 | - yacs==0.1.6 80 | - scikit-image 81 | prefix: ~/anaconda3/envs/deepmta 82 | -------------------------------------------------------------------------------- /DeepMTA_code/network.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.utils.model_zoo as model_zoo 4 | from torchvision import models 5 | 6 | import torchvision.ops as torchops 7 | 8 | import math 9 | from torch.autograd import Variable 10 | import pdb 11 | 12 | from torch.nn.parameter import Parameter 13 | import torch.nn.functional as F 14 | from torch.nn.modules.utils import _single, _pair, _triple 15 | 16 | import numpy as np 17 | import cv2 18 | import pdb 19 | 20 | 21 | 22 | class traj_critic(nn.Module): 23 | def __init__(self): 24 | super(traj_critic, self).__init__() 25 | #### ResNet model 26 | caffenet = models.resnet18(pretrained=True) 27 | self.encoder = nn.Sequential(*list(caffenet.children())[:-1]) 28 | 29 | self.trajBBox_linear = nn.Linear(4, 32) 30 | self.trajScore_linear = nn.Linear(10, 32) 31 | self.imgReducDIM_linear = nn.Linear(2560, 512) 32 | 33 | self.regressor = nn.Sequential( 34 | nn.Linear(5472, 512), 35 | nn.ReLU(inplace=True), 36 | nn.Dropout(), 37 | nn.Linear(512, 1), 38 | ) 39 | self.relu = nn.ReLU(inplace=True) 40 | 41 | 42 | 43 | 44 | def forward(self, img, attMap, targetImg, targetAtten, initTarget, trajBBox, trajScore): 45 | img_feat = self.encoder(img.cuda()) ## torch.Size([10, 512, 1, 1]) 46 | img_feat = self.relu(img_feat) 47 | 48 | attMap_feat = self.encoder(attMap.cuda()) 49 | attMap_feat = self.relu(attMap_feat) 50 | 51 | targetImg_feat = self.encoder(targetImg.cuda()) 52 | targetImg_feat = self.relu(targetImg_feat) 53 | 54 | targetAtt_feat = self.encoder(targetAtten.cuda()) 55 | targetAtt_feat = self.relu(targetAtt_feat) 56 | 57 | initTarget_feat = self.encoder(initTarget.cuda()) 58 | initTarget_feat = self.relu(initTarget_feat) 59 | 60 | 61 | 62 | fused1 = torch.cat((img_feat, attMap_feat), 1) ## torch.Size([10, 1024, 1, 1]) 63 | fused2 = torch.cat((targetImg_feat, targetAtt_feat), 1) ## torch.Size([10, 1024, 1, 1]) 64 | fused2 = torch.cat((fused2, initTarget_feat), 1) 65 | fused3 = torch.cat((fused1, fused2), 1) ## torch.Size([10, 2560, 1, 1]) 66 | 67 | fused3 = torch.squeeze(fused3, dim=2) 68 | fused3 = torch.squeeze(fused3, dim=2) 69 | fused3 = self.imgReducDIM_linear(fused3) 70 | fused3 = fused3.view(-1) 71 | 72 | trajBBox_feat = self.trajBBox_linear(trajBBox.cuda()) 73 | trajBBox_feat = trajBBox_feat.view(-1) 74 | trajBBox_feat = self.relu(trajBBox_feat) 75 | 76 | trajScore = torch.transpose(trajScore, 0, 1) 77 | trajScore_feat = self.trajScore_linear(trajScore.cuda()) 78 | trajScore_feat = trajScore_feat.view(-1) 79 | trajScore_feat = self.relu(trajScore_feat) 80 | 81 | fused4 = torch.cat((trajBBox_feat, trajScore_feat)) ## 352-D 82 | final_feat = torch.cat((fused3, fused4)) ## 5472-D 83 | final_feat = self.relu(final_feat) 84 | pred_traj_score = self.regressor(final_feat) 85 | 86 | return pred_traj_score 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | def axis_aligned_iou(boxA, boxB): 123 | # make sure that x1,y1,x2,y2 of a box are valid 124 | assert(boxA[0] <= boxA[2]) 125 | assert(boxA[1] <= boxA[3]) 126 | assert(boxB[0] <= boxB[2]) 127 | assert(boxB[1] <= boxB[3]) 128 | 129 | # determine the (x, y)-coordinates of the intersection rectangle 130 | xA = max(boxA[0], boxB[0]) 131 | yA = max(boxA[1], boxB[1]) 132 | xB = min(boxA[2], boxB[2]) 133 | yB = min(boxA[3], boxB[3]) 134 | 135 | # compute the area of intersection rectangle 136 | interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1) 137 | 138 | # compute the area of both the prediction and ground-truth 139 | # rectangles 140 | boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1) 141 | boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1) 142 | 143 | # compute the intersection over union by taking the intersection 144 | # area and dividing it by the sum of prediction + ground-truth 145 | # areas - the interesection area 146 | iou = interArea / float(boxAArea + boxBArea - interArea) 147 | 148 | # return the intersection over union value 149 | return iou 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | -------------------------------------------------------------------------------- /DeepMTA_code/scripts/transform_oxuva_results_txt_to_csv.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import cv2 4 | import time 5 | import oxuva 6 | import pdb 7 | from skimage import measure 8 | import json 9 | import pdb 10 | import cv2 11 | import os 12 | import pandas as pd 13 | resultpath= '/home/wangxiao/Documents/deepMTA_project/DeepMTA_TCSVT_project/benchmark/results/oxuva_txt_files/' 14 | videopath="/home/wangxiao/dataset/OxUvA/images/test/" 15 | videos=os.listdir(videopath) 16 | txtFiles = os.listdir(resultpath) 17 | 18 | attMap_path = "/home/wangxiao/Documents/deepMTA_project/DeepMTA_TCSVT_project/temp_DIR_TO_SAVE_static_Global_attentionMap/" 19 | 20 | # export PYTHONPATH="/home/wangxiao/Documents/deepMTA_project/DeepMTA_TCSVT_project/long-term-tracking-benchmark-master/python:$PYTHONPATH" 21 | 22 | for i in range(len(videos)): 23 | txtName = videos[i] + ".txt" 24 | preds = np.loadtxt(resultpath + txtName, delimiter=',') 25 | 26 | print("==>> txtName: ", txtName) 27 | xmin=[] 28 | xmax=[] 29 | ymin=[] 30 | ymax=[] 31 | video_ids=[] 32 | obj_ids=[] 33 | frame_nums=[] 34 | presents=[] 35 | scores=[] 36 | video_id=videos[i][0:7] 37 | if(len(videos[i])==7): 38 | obj_id='obj0000' 39 | elif(videos[i][-1]=='2'): 40 | obj_id='obj0001' 41 | else: 42 | obj_id='obj0002' 43 | 44 | score = 0.5 45 | # l=result['res'] 46 | 47 | imgs = os.listdir(videopath+videos[i]+'/') 48 | imgs = np.sort(imgs) 49 | # pdb.set_trace() 50 | 51 | image = cv2.imread(videopath+videos[i]+'/'+imgs[0]) 52 | imgh = image.shape[0] 53 | imgw = image.shape[1] 54 | 55 | attvideo_attPath = attMap_path + videos[i] + "/" 56 | attFiles = os.listdir(attvideo_attPath) 57 | 58 | 59 | occurFlag_list = [] 60 | 61 | if len(attFiles)+1 == len(imgs): 62 | ############################################################################### 63 | #### Scan the Attention Map 64 | ############################################################################### 65 | occurFlag_list.append(1) 66 | for j in range(len(imgs)-1): 67 | attMap = cv2.imread(attvideo_attPath + attFiles[j]) 68 | ret, static_atttentonMAP = cv2.threshold(attMap, 5, 255, cv2.THRESH_BINARY) 69 | label_image = measure.label(static_atttentonMAP) 70 | props = measure.regionprops(label_image) 71 | 72 | if len(props) > 0: 73 | occurFlag_list.append(1) 74 | else: 75 | occurFlag_list.append(0) 76 | else: 77 | for j in range(len(imgs)): 78 | occurFlag_list.append(1) 79 | 80 | # pdb.set_trace() 81 | 82 | for j in range(len(imgs)): 83 | 84 | x=preds[j][0] 85 | y=preds[j][1] 86 | w=preds[j][2] 87 | h=preds[j][3] 88 | 89 | ## results relative to original image size. 90 | x1=x/imgw 91 | x2=(x+w)/imgw 92 | y1=y/imgh 93 | y2=(y+h)/imgh 94 | 95 | 96 | if j >= 5 and np.sum(occurFlag_list[j-5:j]) == 0: 97 | present = 'False' 98 | print("==>> got one missing ......") 99 | else: 100 | present = 'True' 101 | 102 | x1=round(x1,4) 103 | x2=round(x2,4) 104 | y1=round(y1,4) 105 | y2=round(y2,4) 106 | 107 | frame=imgs[j][0:6] 108 | 109 | if(frame=='000000'): 110 | frame_num=0 111 | else: 112 | frame_num=frame.lstrip('0') 113 | 114 | xmin.append(x1) 115 | xmax.append(x2) 116 | ymin.append(y1) 117 | ymax.append(y2) 118 | video_ids.append(video_id) 119 | obj_ids.append(obj_id) 120 | frame_nums.append(frame_num) 121 | presents.append(present) 122 | scores.append(score) 123 | 124 | # pdb.set_trace() 125 | 126 | dataframe=pd.DataFrame({'video_id':video_ids,'object_id':obj_ids,'frame_num':frame_nums,'present':presents,\ 127 | 'score':scores,'xmin':xmin,'xmax':xmax,'ymin':ymin,'ymax':ymax}) 128 | savepath='./oxuva_csv_results_missFlag/' +videos[i][0:7]+'_'+obj_id+'.csv' 129 | columns=['video_id','object_id','frame_num','present','score','xmin','xmax','ymin','ymax'] 130 | 131 | dataframe.to_csv(savepath,index=False,columns=columns,header=None) 132 | 133 | 134 | # pdb.set_trace() 135 | -------------------------------------------------------------------------------- /DeepMTA_code/temp_DIR_TO_SAVE_static_Global_attentionMap/mkdir_your_self.txt: -------------------------------------------------------------------------------- 1 | sss -------------------------------------------------------------------------------- /DeepMTA_code/testing.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import numpy as np 3 | from types import SimpleNamespace 4 | import json 5 | 6 | from benchmark.vot import test_vot, eval_vot 7 | from benchmark.otb import test_otb, eval_otb 8 | from benchmark.got10k import test_got 9 | # from benchmark.got10ktrainval import test_gottrainval 10 | from benchmark.lasot import test_lasot, eval_lasot 11 | # from benchmark.uav20l import test_uav20l, eval_uav20l 12 | from benchmark.uav123 import test_uav123, eval_uav123 13 | # from benchmark.oxuva import test_oxuva 14 | # from benchmark.tc128 import test_tc128, eval_tc128 15 | 16 | from trackers.tracker import SiamFC_Tracker, SiamRPN_Tracker, SiamMask_Tracker 17 | from benchmark.bench_utils.benchmark_helper import load_dataset 18 | import warnings 19 | warnings.filterwarnings("ignore") 20 | import ast 21 | import pdb 22 | 23 | parser = argparse.ArgumentParser(description='Test Trackers on Benchmarks.') 24 | parser.add_argument('-d', '--dataset', dest='dataset', default='OTB2015', 25 | help='Dataset on which the benchmark is run [VOT2018, OTB2015, GOT10k, LaSOT, UAV20L]') 26 | parser.add_argument('-t', '--tracker', dest='tracker', default='SiamRPN', 27 | help='Name of the tracker [SiamFC, SiamRPN, SiamMask]') 28 | parser.add_argument('--vanilla', action='store_true', 29 | help='Run the tracker without THOR') 30 | parser.add_argument('-v', '--viz', action='store_true', default=False, 31 | help='Show the tracked scene, the stored templated and the modulated view') 32 | parser.add_argument('--verbose', action='store_true', 33 | help='Print additional info about THOR') 34 | parser.add_argument('--lb_type', type=str, default='dynamic', 35 | help='Specify the type of lower bound [dynamic, ensemble]') 36 | parser.add_argument('--spec_video', type=str, default='', 37 | help='Pick a specific video by name, e.g. "lemming" on OTB2015') 38 | parser.add_argument('--save_path', dest='save_path', default='Tracker', 39 | help='Name where the tracked trajectory is stored') 40 | 41 | def load_cfg(args): 42 | json_path = f"configs/{args.tracker}/" 43 | json_path += f"{args.dataset}_" 44 | if args.vanilla: 45 | json_path += "vanilla.json" 46 | else: 47 | json_path += f"THOR_{args.lb_type}.json" 48 | 49 | # pdb.set_trace() 50 | 51 | cfg = json.load(open(json_path)) 52 | return cfg 53 | 54 | 55 | def run_bench(delete_after=False): 56 | args = parser.parse_args() 57 | 58 | cfg = load_cfg(args) 59 | cfg['THOR']['viz'] = args.viz 60 | cfg['THOR']['verbose'] = args.verbose 61 | 62 | # setup tracker and dataset 63 | if args.tracker == 'SiamFC': 64 | tracker = SiamFC_Tracker(cfg) 65 | elif args.tracker == 'SiamRPN': 66 | tracker = SiamRPN_Tracker(cfg) 67 | elif args.tracker == 'SiamMask': 68 | tracker = SiamMask_Tracker(cfg) 69 | else: 70 | raise ValueError(f"Tracker {args.tracker} does not exist.") 71 | 72 | 73 | 74 | dataset = load_dataset(args.dataset) 75 | # optionally filter for a specific videos 76 | if args.spec_video: 77 | 78 | # pdb.set_trace() 79 | dataset = {args.spec_video: dataset[args.spec_video]} 80 | 81 | if args.dataset=="VOT2018": 82 | test_bench, eval_bench = test_vot, eval_vot 83 | elif args.dataset=="OTB2015": 84 | test_bench, eval_bench = test_otb, eval_otb 85 | elif args.dataset=="GOT10k": 86 | test_bench = test_got 87 | elif args.dataset=="GOT10k_train_val": 88 | test_bench = test_gottrainval 89 | elif args.dataset=="LaSOT": 90 | test_bench, eval_bench = test_lasot, eval_lasot 91 | elif args.dataset=="UAV20L": 92 | test_bench, eval_bench = test_uav20l, eval_uav20l 93 | elif args.dataset=="UAV123": 94 | test_bench, eval_bench = test_uav123, eval_uav123 95 | elif args.dataset=="OXUVA": 96 | test_bench = test_oxuva 97 | elif args.dataset=="TC128": 98 | test_bench, eval_bench = test_tc128, eval_tc128 99 | else: 100 | raise NotImplementedError(f"Procedure for {args.dataset} does not exist.") 101 | 102 | # testing 103 | total_lost = 0 104 | speed_list = [] 105 | 106 | if args.dataset=="OTB2015": 107 | print("==>> No processing for the json file ... ") 108 | else: 109 | dataset = ast.literal_eval(dataset) 110 | # pdb.set_trace() 111 | 112 | for v_id, video in enumerate(dataset.keys(), start=1): 113 | tracker.temp_mem.do_full_init = True 114 | speed = test_bench(v_id, tracker, dataset[video], args) 115 | speed_list.append(speed) 116 | 117 | 118 | if args.dataset=="GOT10k": 119 | print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") 120 | print("==>> Please evaluate online for GOT10k dataset ... ") 121 | print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") 122 | elif args.dataset=="OxUvA": 123 | print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") 124 | print("==>> Please evaluate online for OxUvA dataset ... ") 125 | print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") 126 | else: 127 | # evaluation 128 | # pdb.set_trace() 129 | bench_res = eval_bench(args.save_path, delete_after) 130 | print(bench_res) 131 | mean_fps = np.mean(np.array(speed_list)) 132 | bench_res['mean_fps'] = mean_fps 133 | print(bench_res) 134 | 135 | return bench_res 136 | 137 | 138 | 139 | if __name__ == '__main__': 140 | run_bench() 141 | 142 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamFC/config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | class TrackerConfig(object): 8 | exemplar_sz = 127 9 | instance_sz = 255 10 | context = 0.5 11 | scale_num = 3 12 | scale_step = 1.0375 13 | lr = 0.641662 14 | penalty_k = 0.982769 15 | window_influence = 0.199673 16 | response_sz = 17 17 | response_up = 16 18 | upscale_sz = response_up*response_up 19 | total_stride = 8 20 | adjust_scale = 0.001 21 | 22 | def update(self, cfg): 23 | for k, v in cfg.items(): 24 | if hasattr(self, k): 25 | setattr(self, k, v) 26 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamFC/model.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/DeepMTA_code/trackers/SiamFC/model.pth -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamFC/net.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | class SiamFC(nn.Module): 11 | 12 | def __init__(self): 13 | super(SiamFC, self).__init__() 14 | self.feature = nn.Sequential( 15 | # conv1 16 | nn.Conv2d(3, 96, 11, 2), 17 | nn.BatchNorm2d(96, eps=1e-6, momentum=0.05), 18 | nn.ReLU(inplace=True), 19 | nn.MaxPool2d(3, 2), 20 | # conv2 21 | nn.Conv2d(96, 256, 5, 1, groups=2), 22 | nn.BatchNorm2d(256, eps=1e-6, momentum=0.05), 23 | nn.ReLU(inplace=True), 24 | nn.MaxPool2d(3, 2), 25 | # conv3 26 | nn.Conv2d(256, 384, 3, 1), 27 | nn.BatchNorm2d(384, eps=1e-6, momentum=0.05), 28 | nn.ReLU(inplace=True), 29 | # conv4 30 | nn.Conv2d(384, 384, 3, 1, groups=2), 31 | nn.BatchNorm2d(384, eps=1e-6, momentum=0.05), 32 | nn.ReLU(inplace=True), 33 | # conv5 34 | nn.Conv2d(384, 256, 3, 1, groups=2)) 35 | 36 | def forward(self, z, x): 37 | z = self.feature(z) 38 | x = self.feature(x) 39 | 40 | # fast cross correlation 41 | n, c, h, w = x.size() 42 | x = x.view(1, n * c, h, w) 43 | out = F.conv2d(x, z, groups=n) 44 | out = out.view(n, 1, out.size(-2), out.size(-1)) 45 | 46 | # adjust the scale of responses 47 | out = 0.001 * out + 0.0 48 | 49 | return out 50 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamFC/siamfc.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | import torch 8 | import numpy as np 9 | from .utils import to_one_indexed, to_zero_indexed, crop_and_resize 10 | from .config import TrackerConfig 11 | 12 | def SiamFC_init(im, target_pos, target_sz, cfg): 13 | state = {} 14 | state['im_h'] = im.shape[0] 15 | state['im_w'] = im.shape[1] 16 | target_pos, target_sz = to_zero_indexed(target_pos, target_sz) 17 | 18 | # set the tracker_config 19 | p = TrackerConfig() 20 | p.update(cfg) 21 | 22 | # create hanning window 23 | p.hann_window = np.outer(np.hanning(p.upscale_sz), np.hanning(p.upscale_sz)) 24 | p.hann_window /= p.hann_window.sum() 25 | 26 | # search scale factors 27 | p.scale_factors = p.scale_step ** np.linspace( 28 | -(p.scale_num // 2), 29 | p.scale_num // 2, p.scale_num) 30 | p.scale_factors_glob = np.array([0.5, 0.75, 1.0, 1.25, 1.75]) 31 | 32 | # exemplar image 33 | avg_chans = np.mean(im, axis=(0, 1)) 34 | 35 | # important params for later use 36 | context = p.context * np.sum(target_sz) 37 | p.z_sz = np.sqrt(np.prod(target_sz + context)) 38 | p.x_sz = p.z_sz * p.instance_sz / p.exemplar_sz 39 | 40 | target_pos, target_sz = to_one_indexed(target_pos, target_sz) 41 | 42 | # fill the state dict 43 | state['device'] = torch.device("cuda" if torch.cuda.is_available() else "cpu") 44 | state['target_pos'] = target_pos 45 | state['target_sz'] = target_sz 46 | state['score'] = 1.0 47 | state['p'] = p 48 | state['avg_chans'] = avg_chans 49 | return state 50 | 51 | def SiamFC_track(state, im, temp_mem): 52 | p = state['p'] 53 | avg_chans = state['avg_chans'] 54 | inst_sz = p.instance_sz 55 | scale_factors = p.scale_factors 56 | old_pos, old_sz = to_zero_indexed(state['target_pos'], state['target_sz']) 57 | dev = state['device'] 58 | 59 | # get instance images 60 | ims = [crop_and_resize( 61 | image=im, center=old_pos, size=p.x_sz * f, 62 | out_size=inst_sz, 63 | pad_color=avg_chans) for f in scale_factors] 64 | ims = np.stack(ims, axis=0) 65 | ims = torch.from_numpy(ims).to(dev).permute([0, 3, 1, 2]).float() 66 | 67 | # track 68 | target_pos, target_sz, score, scale = temp_mem.batch_evaluate(ims, old_pos, old_sz, p) 69 | 70 | p.x_sz *= float(scale) 71 | p.z_sz *= float(scale) 72 | 73 | # return 1-indexed and left-top based bounding box 74 | target_pos, target_sz = to_one_indexed(target_pos, target_sz) 75 | 76 | target_pos[0] = max(0, min(state['im_w'], target_pos[0])) 77 | target_pos[1] = max(0, min(state['im_h'], target_pos[1])) 78 | target_sz[0] = max(10, min(state['im_w'], target_sz[0])) 79 | target_sz[1] = max(10, min(state['im_h'], target_sz[1])) 80 | state['target_pos'] = target_pos 81 | state['target_sz'] = target_sz 82 | state['score'] = score 83 | state['p'] = p 84 | state['crop'] = ims[1] # get non scaled image 85 | return state 86 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamFC/utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | import numpy as np 8 | import cv2 9 | 10 | def cxy_wh_2_rect(pos, sz): 11 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) # 0-index 12 | 13 | def rect_2_cxy_wh(rect): 14 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]]) # 0-index 15 | 16 | def crop_and_resize(image, center, size, out_size, pad_color): 17 | # convert box to corners (0-indexed) 18 | size = round(size) 19 | corners = np.concatenate(( 20 | np.round(center - (size - 1) / 2), 21 | np.round(center - (size - 1) / 2) + size)) 22 | corners = np.round(corners).astype(int) 23 | 24 | # pad image if necessary 25 | pads = np.concatenate(( 26 | -corners[:2], corners[2:] - image.shape[:2])) 27 | npad = max(0, int(pads.max())) 28 | if npad > 0: 29 | image = cv2.copyMakeBorder( 30 | image, npad, npad, npad, npad, 31 | cv2.BORDER_CONSTANT, value=pad_color) 32 | 33 | # crop image patch 34 | corners = (corners + npad).astype(int) 35 | patch = image[corners[0]:corners[2], corners[1]:corners[3]] 36 | 37 | # resize to out_size 38 | patch = cv2.resize(patch, (out_size, out_size)) 39 | 40 | return patch 41 | 42 | def to_zero_indexed(pos, sz): 43 | # convert box to 0-indexed and center based [y, x, h, w] 44 | box = cxy_wh_2_rect(pos, sz) 45 | box = np.array([ 46 | box[1] - 1 + (box[3] - 1) / 2, 47 | box[0] - 1 + (box[2] - 1) / 2, 48 | box[3], box[2]], dtype=np.float32) 49 | return box[:2], box[2:] 50 | 51 | def to_one_indexed(pos, sz): 52 | box = np.array([ 53 | pos[1] + 1 - (sz[1] - 1) / 2, 54 | pos[0] + 1 - (sz[0] - 1) / 2, 55 | sz[1], sz[0]]) 56 | return rect_2_cxy_wh(box) 57 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/net.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils.load_helper import load_pretrain 6 | from .utils.anchors import Anchors 7 | from .resnet import resnet50 8 | 9 | # basic model 10 | 11 | class SiamMask(nn.Module): 12 | def __init__(self, anchors=None, o_sz=127, g_sz=127): 13 | super(SiamMask, self).__init__() 14 | self.anchors = anchors # anchor_cfg 15 | self.anchor_num = len(self.anchors["ratios"]) * len(self.anchors["scales"]) 16 | self.anchor = Anchors(anchors) 17 | self.features = None 18 | self.rpn_model = None 19 | self.mask_model = None 20 | self.o_sz = o_sz 21 | self.g_sz = g_sz 22 | self.all_anchors = None 23 | 24 | def feature_extractor(self, x): 25 | return self.features(x) 26 | 27 | def rpn(self, template, search): 28 | pred_cls, pred_loc = self.rpn_model(template, search) 29 | return pred_cls, pred_loc 30 | 31 | def mask(self, template, search): 32 | pred_mask = self.mask_model(template, search) 33 | return pred_mask 34 | 35 | def template(self, z): 36 | self.zf = self.feature_extractor(z) 37 | cls_kernel, loc_kernel = self.rpn_model.template(self.zf) 38 | return cls_kernel, loc_kernel 39 | 40 | def track(self, x, cls_kernel=None, loc_kernel=None, softmax=False): 41 | xf = self.feature_extractor(x) 42 | rpn_pred_cls, rpn_pred_loc = self.rpn_model.track(xf, cls_kernel, loc_kernel) 43 | if softmax: 44 | rpn_pred_cls = self.softmax(rpn_pred_cls) 45 | return rpn_pred_cls, rpn_pred_loc 46 | 47 | # rpn 48 | 49 | class RPN(nn.Module): 50 | def __init__(self): 51 | super(RPN, self).__init__() 52 | 53 | def forward(self, z_f, x_f): 54 | raise NotImplementedError 55 | 56 | def template(self, template): 57 | raise NotImplementedError 58 | 59 | def track(self, search): 60 | raise NotImplementedError 61 | 62 | def conv2d_dw_group(x, kernel): 63 | batch, channel = kernel.shape[:2] 64 | ## WRAPPER: changed, otherwise it does not work with batches 65 | # x = x.view(1, batch*channel, x.size(2), x.size(3)) # 1 * (b*c) * k * k 66 | x = x.expand(batch, *x.shape[1:]) 67 | x = x.contiguous().view(1, batch*channel, x.size(2), x.size(3)) # 1 * (b*c) * k * k 68 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3)) # (b*c) * 1 * H * W 69 | out = F.conv2d(x, kernel, groups=batch*channel) 70 | out = out.view(batch, channel, out.size(2), out.size(3)) 71 | 72 | return out 73 | 74 | class DepthCorr(nn.Module): 75 | def __init__(self, in_channels, hidden, out_channels, kernel_size=3): 76 | super(DepthCorr, self).__init__() 77 | # adjust layer for asymmetrical features 78 | self.conv_kernel = nn.Sequential( 79 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False), 80 | nn.BatchNorm2d(hidden), 81 | nn.ReLU(inplace=True), 82 | ) 83 | self.conv_search = nn.Sequential( 84 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False), 85 | nn.BatchNorm2d(hidden), 86 | nn.ReLU(inplace=True), 87 | ) 88 | 89 | self.head = nn.Sequential( 90 | nn.Conv2d(hidden, hidden, kernel_size=1, bias=False), 91 | nn.BatchNorm2d(hidden), 92 | nn.ReLU(inplace=True), 93 | nn.Conv2d(hidden, out_channels, kernel_size=1) 94 | ) 95 | 96 | def forward_corr(self, kernel, input): 97 | kernel = self.conv_kernel(kernel) 98 | input = self.conv_search(input) 99 | feature = conv2d_dw_group(input, kernel) 100 | return feature 101 | 102 | def forward(self, kernel, search): 103 | feature = self.forward_corr(kernel, search) 104 | out = self.head(feature) 105 | return out 106 | 107 | # mask 108 | 109 | class Mask(nn.Module): 110 | def __init__(self): 111 | super(Mask, self).__init__() 112 | 113 | def forward(self, z_f, x_f): 114 | raise NotImplementedError 115 | 116 | def template(self, template): 117 | raise NotImplementedError 118 | 119 | def track(self, search): 120 | raise NotImplementedError 121 | 122 | # additional modules 123 | 124 | class Features(nn.Module): 125 | def __init__(self): 126 | super(Features, self).__init__() 127 | self.feature_size = -1 128 | 129 | def forward(self, x): 130 | raise NotImplementedError 131 | 132 | class ResDownS(nn.Module): 133 | def __init__(self, inplane, outplane): 134 | super(ResDownS, self).__init__() 135 | self.downsample = nn.Sequential( 136 | nn.Conv2d(inplane, outplane, kernel_size=1, bias=False), 137 | nn.BatchNorm2d(outplane)) 138 | 139 | def forward(self, x): 140 | x = self.downsample(x) 141 | if x.size(3) < 20: 142 | l, r = 4, -4 143 | x = x[:, :, l:r, l:r] 144 | return x 145 | 146 | class ResDown(Features): 147 | def __init__(self, pretrain=False): 148 | super(ResDown, self).__init__() 149 | self.features = resnet50(layer3=True, layer4=False) 150 | if pretrain: 151 | load_pretrain(self.features, 'resnet.model') 152 | 153 | self.downsample = ResDownS(1024, 256) 154 | 155 | def forward(self, x): 156 | output = self.features(x) 157 | p3 = self.downsample(output[-1]) 158 | return p3 159 | 160 | def forward_all(self, x): 161 | output = self.features(x) 162 | p3 = self.downsample(output[-1]) 163 | return output, p3 164 | 165 | class UP(RPN): 166 | def __init__(self, anchor_num=5, feature_in=256, feature_out=256): 167 | super(UP, self).__init__() 168 | 169 | self.anchor_num = anchor_num 170 | self.feature_in = feature_in 171 | self.feature_out = feature_out 172 | 173 | self.cls_output = 2 * self.anchor_num 174 | self.loc_output = 4 * self.anchor_num 175 | 176 | self.cls = DepthCorr(feature_in, feature_out, self.cls_output) 177 | self.loc = DepthCorr(feature_in, feature_out, self.loc_output) 178 | 179 | def forward(self, z_f, x_f): 180 | cls = self.cls(z_f, x_f) 181 | loc = self.loc(z_f, x_f) 182 | return cls, loc 183 | 184 | class MaskCorr(Mask): 185 | def __init__(self, oSz=63): 186 | super(MaskCorr, self).__init__() 187 | self.oSz = oSz 188 | self.mask = DepthCorr(256, 256, self.oSz**2) 189 | 190 | def forward(self, z, x): 191 | return self.mask(z, x) 192 | 193 | class Refine(nn.Module): 194 | def __init__(self): 195 | """ 196 | Mask refinement module 197 | Please refer SiamMask (Appendix A) 198 | https://arxiv.org/abs/1812.05050 199 | """ 200 | super(Refine, self).__init__() 201 | self.v0 = nn.Sequential(nn.Conv2d(64, 16, 3, padding=1), nn.ReLU(), 202 | nn.Conv2d(16, 4, 3, padding=1), nn.ReLU()) 203 | 204 | self.v1 = nn.Sequential(nn.Conv2d(256, 64, 3, padding=1), nn.ReLU(), 205 | nn.Conv2d(64, 16, 3, padding=1), nn.ReLU()) 206 | 207 | self.v2 = nn.Sequential(nn.Conv2d(512, 128, 3, padding=1), nn.ReLU(), 208 | nn.Conv2d(128, 32, 3, padding=1), nn.ReLU()) 209 | 210 | self.h2 = nn.Sequential(nn.Conv2d(32, 32, 3, padding=1), nn.ReLU(), 211 | nn.Conv2d(32, 32, 3, padding=1), nn.ReLU()) 212 | 213 | self.h1 = nn.Sequential(nn.Conv2d(16, 16, 3, padding=1), nn.ReLU(), 214 | nn.Conv2d(16, 16, 3, padding=1), nn.ReLU()) 215 | 216 | self.h0 = nn.Sequential(nn.Conv2d(4, 4, 3, padding=1), nn.ReLU(), 217 | nn.Conv2d(4, 4, 3, padding=1), nn.ReLU()) 218 | 219 | self.deconv = nn.ConvTranspose2d(256, 32, 15, 15) 220 | 221 | self.post0 = nn.Conv2d(32, 16, 3, padding=1) 222 | self.post1 = nn.Conv2d(16, 4, 3, padding=1) 223 | self.post2 = nn.Conv2d(4, 1, 3, padding=1) 224 | 225 | def forward(self, f, corr_feature, pos=None): 226 | pos = [int(i) for i in pos] 227 | p0 = torch.nn.functional.pad(f[0], [16,16,16,16])[:, :, 4*pos[0]:4*pos[0]+61, 4*pos[1]:4*pos[1]+61] 228 | p1 = torch.nn.functional.pad(f[1], [8,8,8,8])[:, :, 2*pos[0]:2*pos[0]+31, 2*pos[1]:2*pos[1]+31] 229 | p2 = torch.nn.functional.pad(f[2], [4,4,4,4])[:, :, pos[0]:pos[0]+15, pos[1]:pos[1]+15] 230 | 231 | p3 = corr_feature[:, :, pos[0], pos[1]].view(-1, 256, 1, 1) 232 | 233 | out = self.deconv(p3) 234 | out = self.post0(F.upsample(self.h2(out) + self.v2(p2), size=(31, 31))) 235 | out = self.post1(F.upsample(self.h1(out) + self.v1(p1), size=(61, 61))) 236 | out = self.post2(F.upsample(self.h0(out) + self.v0(p0), size=(127, 127))) 237 | out = out.view(-1, 127*127) 238 | return out 239 | 240 | # final siammask model 241 | 242 | class SiamMaskCustom(SiamMask): 243 | def __init__(self, pretrain=False, **kwargs): 244 | super(SiamMaskCustom, self).__init__(**kwargs) 245 | self.features = ResDown(pretrain=pretrain) 246 | self.rpn_model = UP(anchor_num=self.anchor_num, feature_in=256, feature_out=256) 247 | self.mask_model = MaskCorr() 248 | self.refine_model = Refine() 249 | self.best_temp = 0 250 | 251 | def refine(self, f, pos=None): 252 | return self.refine_model(f, pos) 253 | 254 | def template(self, template): 255 | self.zf = self.features(template) 256 | return self.zf 257 | 258 | def track(self, search): 259 | search = self.features(search) 260 | rpn_pred_cls, rpn_pred_loc = self.rpn(self.zf, search) 261 | return rpn_pred_cls, rpn_pred_loc 262 | 263 | def track_mask(self, search): 264 | self.feature, self.search = self.features.forward_all(search) 265 | rpn_pred_cls, rpn_pred_loc = self.rpn(self.zf, self.search) 266 | self.corr_feature = self.mask_model.mask.forward_corr(self.zf, self.search) 267 | pred_mask = self.mask_model.mask.head(self.corr_feature) 268 | return rpn_pred_cls, rpn_pred_loc, pred_mask 269 | 270 | def track_refine(self, pos): 271 | ### WRAPPER 272 | self.corr_feature = self.corr_feature[self.best_temp].unsqueeze(0) 273 | ### 274 | pred_mask = self.refine_model(self.feature, self.corr_feature, pos=pos) 275 | return pred_mask 276 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/siammask.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | from __future__ import division 9 | import argparse 10 | import logging 11 | import numpy as np 12 | import cv2 13 | from PIL import Image 14 | from os import makedirs 15 | from os.path import join, isdir, isfile 16 | 17 | import torch 18 | from torch.autograd import Variable 19 | import torch.nn.functional as F 20 | 21 | # relative imports 22 | from .utils.log_helper import init_log, add_file_handler 23 | from .utils.bbox_helper import get_axis_aligned_bbox, cxy_wh_2_rect 24 | from .utils.anchors import Anchors, generate_anchor 25 | from .utils.tracker_config import TrackerConfig 26 | from .utils.tracking_utils import get_subwindow_tracking 27 | 28 | def SiamMask_init(im, target_pos, target_sz, model, hp=None): 29 | state = dict() 30 | state['im_h'] = im.shape[0] 31 | state['im_w'] = im.shape[1] 32 | 33 | p = TrackerConfig() 34 | p.update(hp, model.anchors) 35 | p.renew() 36 | 37 | p.scales = model.anchors['scales'] 38 | p.ratios = model.anchors['ratios'] 39 | p.anchor_num = len(p.ratios) * len(p.scales) 40 | p.anchor = generate_anchor(model.anchors, p.score_size) 41 | 42 | avg_chans = np.mean(im, axis=(0, 1)) 43 | 44 | if p.windowing == 'cosine': 45 | window = np.outer(np.hanning(p.score_size), np.hanning(p.score_size)) 46 | elif p.windowing == 'uniform': 47 | window = np.ones((p.score_size, p.score_size)) 48 | window = np.tile(window.flatten(), p.anchor_num) 49 | 50 | use_cuda = torch.cuda.is_available() 51 | state['device'] = torch.device("cuda" if use_cuda else "cpu") 52 | state['p'] = p 53 | state['model'] = model 54 | state['avg_chans'] = avg_chans 55 | state['window'] = window 56 | state['score'] = 1.0 57 | state['target_pos'] = target_pos 58 | state['target_sz'] = target_sz 59 | return state 60 | 61 | def SiamMask_track(state, im, temp_mem): 62 | p = state['p'] 63 | avg_chans = state['avg_chans'] 64 | window = state['window'] 65 | old_pos = state['target_pos'] 66 | old_sz = state['target_sz'] 67 | dev = state['device'] 68 | 69 | # get search area 70 | wc_x = old_sz[1] + p.context_amount * sum(old_sz) 71 | hc_x = old_sz[0] + p.context_amount * sum(old_sz) 72 | s_z = np.sqrt(wc_x * hc_x) 73 | 74 | scale_x = p.exemplar_size / s_z 75 | d_search = (p.instance_size - p.exemplar_size) / 2 76 | pad = d_search / scale_x 77 | s_x = s_z + 2 * pad 78 | crop_box = [old_pos[0] - round(s_x) / 2, old_pos[1] - round(s_x) / 2, round(s_x), round(s_x)] 79 | 80 | # extract scaled crops for search region x at previous target position 81 | x_crop = Variable(get_subwindow_tracking(im, old_pos, p.instance_size, round(s_x), avg_chans).unsqueeze(0)) 82 | 83 | # track 84 | target_pos, target_sz, score, best_id = temp_mem.batch_evaluate(x_crop.to(dev), old_pos, 85 | old_sz, window, 86 | scale_x, p) 87 | 88 | # mask refinement 89 | best_pscore_id_mask = np.unravel_index(best_id, (5, p.score_size, p.score_size)) 90 | delta_x, delta_y = best_pscore_id_mask[2], best_pscore_id_mask[1] 91 | mask = state['model'].track_refine((delta_y, delta_x)).to(dev).sigmoid().squeeze().view( 92 | p.out_size, p.out_size).cpu().data.numpy() 93 | 94 | def crop_back(image, bbox, out_sz, padding=-1): 95 | a = (out_sz[0] - 1) / bbox[2] 96 | b = (out_sz[1] - 1) / bbox[3] 97 | c = -a * bbox[0] 98 | d = -b * bbox[1] 99 | mapping = np.array([[a, 0, c], 100 | [0, b, d]]).astype(np.float) 101 | crop = cv2.warpAffine(image, mapping, (out_sz[0], out_sz[1]), 102 | flags=cv2.INTER_LINEAR, 103 | borderMode=cv2.BORDER_CONSTANT, 104 | borderValue=padding) 105 | return crop 106 | 107 | s = crop_box[2] / p.instance_size 108 | sub_box = [crop_box[0] + (delta_x - p.base_size / 2) * p.total_stride * s, 109 | crop_box[1] + (delta_y - p.base_size / 2) * p.total_stride * s, 110 | s * p.exemplar_size, s * p.exemplar_size] 111 | s = p.out_size / sub_box[2] 112 | back_box = [-sub_box[0] * s, -sub_box[1] * s, state['im_w'] * s, state['im_h'] * s] 113 | mask_in_img = crop_back(mask, back_box, (state['im_w'], state['im_h'])) 114 | 115 | target_mask = (mask_in_img > p.seg_thr).astype(np.uint8) 116 | if cv2.__version__[-5] == '4': 117 | contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 118 | else: 119 | _, contours, _ = cv2.findContours(target_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 120 | cnt_area = [cv2.contourArea(cnt) for cnt in contours] 121 | if len(contours) != 0 and np.max(cnt_area) > 100: 122 | contour = contours[np.argmax(cnt_area)] # use max area polygon 123 | polygon = contour.reshape(-1, 2) 124 | prbox = cv2.boxPoints(cv2.minAreaRect(polygon)) # Rotated Rectangle 125 | rbox_in_img = prbox 126 | else: # empty mask 127 | location = cxy_wh_2_rect(target_pos, target_sz) 128 | rbox_in_img = np.array([[location[0], location[1]], 129 | [location[0] + location[2], location[1]], 130 | [location[0] + location[2], location[1] + location[3]], 131 | [location[0], location[1] + location[3]]]) 132 | 133 | state['mask'] = mask_in_img 134 | state['polygon'] = rbox_in_img 135 | 136 | # clip in min and max of the bb 137 | target_pos[0] = max(0, min(state['im_w'], target_pos[0])) 138 | target_pos[1] = max(0, min(state['im_h'], target_pos[1])) 139 | target_sz[0] = max(10, min(state['im_w'], target_sz[0])) 140 | target_sz[1] = max(10, min(state['im_h'], target_sz[1])) 141 | 142 | state['target_pos'] = target_pos 143 | state['target_sz'] = target_sz 144 | state['score'] = score 145 | state['crop'] = x_crop 146 | 147 | return state 148 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/utils/anchors.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import math 10 | from .bbox_helper import center2corner, corner2center 11 | 12 | def generate_anchor(cfg, score_size): 13 | anchors = Anchors(cfg) 14 | anchor = anchors.anchors 15 | x1, y1, x2, y2 = anchor[:, 0], anchor[:, 1], anchor[:, 2], anchor[:, 3] 16 | anchor = np.stack([(x1+x2)*0.5, (y1+y2)*0.5, x2-x1, y2-y1], 1) 17 | 18 | total_stride = anchors.stride 19 | anchor_num = anchor.shape[0] 20 | 21 | anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) 22 | ori = - (score_size // 2) * total_stride 23 | xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], 24 | [ori + total_stride * dy for dy in range(score_size)]) 25 | xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ 26 | np.tile(yy.flatten(), (anchor_num, 1)).flatten() 27 | anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) 28 | return anchor 29 | 30 | class Anchors: 31 | def __init__(self, cfg): 32 | self.stride = 8 33 | self.ratios = [0.33, 0.5, 1, 2, 3] 34 | self.scales = [8] 35 | self.round_dight = 0 36 | self.image_center = 0 37 | self.size = 0 38 | 39 | self.__dict__.update(cfg) 40 | 41 | self.anchor_num = len(self.scales) * len(self.ratios) 42 | self.anchors = None # in single position (anchor_num*4) 43 | self.all_anchors = None # in all position 2*(4*anchor_num*h*w) 44 | self.generate_anchors() 45 | 46 | def generate_anchors(self): 47 | self.anchors = np.zeros((self.anchor_num, 4), dtype=np.float32) 48 | 49 | size = self.stride * self.stride 50 | count = 0 51 | for r in self.ratios: 52 | if self.round_dight > 0: 53 | ws = round(math.sqrt(size*1. / r), self.round_dight) 54 | hs = round(ws * r, self.round_dight) 55 | else: 56 | ws = int(math.sqrt(size*1. / r)) 57 | hs = int(ws * r) 58 | 59 | for s in self.scales: 60 | w = ws * s 61 | h = hs * s 62 | self.anchors[count][:] = [-w*0.5, -h*0.5, w*0.5, h*0.5][:] 63 | count += 1 64 | 65 | def generate_all_anchors(self, im_c, size): 66 | if self.image_center == im_c and self.size == size: 67 | return False 68 | self.image_center = im_c 69 | self.size = size 70 | 71 | a0x = im_c - size // 2 * self.stride 72 | ori = np.array([a0x] * 4, dtype=np.float32) 73 | zero_anchors = self.anchors + ori 74 | 75 | x1 = zero_anchors[:, 0] 76 | y1 = zero_anchors[:, 1] 77 | x2 = zero_anchors[:, 2] 78 | y2 = zero_anchors[:, 3] 79 | 80 | x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), [x1, y1, x2, y2]) 81 | cx, cy, w, h = corner2center([x1, y1, x2, y2]) 82 | 83 | disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride 84 | disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride 85 | 86 | cx = cx + disp_x 87 | cy = cy + disp_y 88 | 89 | # broadcast 90 | zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) 91 | cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) 92 | x1, y1, x2, y2 = center2corner([cx, cy, w, h]) 93 | 94 | self.all_anchors = np.stack([x1, y1, x2, y2]), np.stack([cx, cy, w, h]) 95 | return True 96 | 97 | 98 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/utils/bbox_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | from collections import namedtuple 10 | 11 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 12 | BBox = Corner 13 | Center = namedtuple('Center', 'x y w h') 14 | 15 | 16 | def corner2center(corner): 17 | """ 18 | :param corner: Corner or np.array 4*N 19 | :return: Center or 4 np.array N 20 | """ 21 | if isinstance(corner, Corner): 22 | x1, y1, x2, y2 = corner 23 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 24 | else: 25 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 26 | x = (x1 + x2) * 0.5 27 | y = (y1 + y2) * 0.5 28 | w = x2 - x1 29 | h = y2 - y1 30 | return x, y, w, h 31 | 32 | 33 | def center2corner(center): 34 | """ 35 | :param center: Center or np.array 4*N 36 | :return: Corner or np.array 4*N 37 | """ 38 | if isinstance(center, Center): 39 | x, y, w, h = center 40 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 41 | else: 42 | x, y, w, h = center[0], center[1], center[2], center[3] 43 | x1 = x - w * 0.5 44 | y1 = y - h * 0.5 45 | x2 = x + w * 0.5 46 | y2 = y + h * 0.5 47 | return x1, y1, x2, y2 48 | 49 | 50 | def cxy_wh_2_rect(pos, sz): 51 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) # 0-index 52 | 53 | 54 | def get_axis_aligned_bbox(region): 55 | nv = region.size 56 | if nv == 8: 57 | cx = np.mean(region[0::2]) 58 | cy = np.mean(region[1::2]) 59 | x1 = min(region[0::2]) 60 | x2 = max(region[0::2]) 61 | y1 = min(region[1::2]) 62 | y2 = max(region[1::2]) 63 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) 64 | A2 = (x2 - x1) * (y2 - y1) 65 | s = np.sqrt(A1 / A2) 66 | w = s * (x2 - x1) + 1 67 | h = s * (y2 - y1) + 1 68 | else: 69 | x = region[0] 70 | y = region[1] 71 | w = region[2] 72 | h = region[3] 73 | cx = x+w/2 74 | cy = y+h/2 75 | 76 | return cx, cy, w, h 77 | 78 | 79 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/utils/config_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # Revised for THOR by Axel Sauer (axel.sauer@tum.de) 6 | # -------------------------------------------------------- 7 | 8 | import json 9 | from os.path import exists 10 | 11 | 12 | def load_config(config, arch): 13 | assert exists(config), '"{}" not exists'.format(config) 14 | config = json.load(open(config)) 15 | 16 | # deal with network 17 | if 'network' not in config: 18 | print('Warning: network lost in config. This will be error in next version') 19 | 20 | config['network'] = {} 21 | 22 | if not arch: 23 | raise Exception('no arch provided') 24 | 25 | arch = config['network']['arch'] 26 | 27 | return config 28 | 29 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/utils/load_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import logging 3 | logger = logging.getLogger('global') 4 | 5 | 6 | def check_keys(model, pretrained_state_dict): 7 | ckpt_keys = set(pretrained_state_dict.keys()) 8 | model_keys = set(model.state_dict().keys()) 9 | used_pretrained_keys = model_keys & ckpt_keys 10 | unused_pretrained_keys = ckpt_keys - model_keys 11 | missing_keys = model_keys - ckpt_keys 12 | if len(missing_keys) > 0: 13 | logger.info('[Warning] missing keys: {}'.format(missing_keys)) 14 | logger.info('missing keys:{}'.format(len(missing_keys))) 15 | if len(unused_pretrained_keys) > 0: 16 | logger.info('[Warning] unused_pretrained_keys: {}'.format(unused_pretrained_keys)) 17 | logger.info('unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) 18 | logger.info('used keys:{}'.format(len(used_pretrained_keys))) 19 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 20 | return True 21 | 22 | 23 | def remove_prefix(state_dict, prefix): 24 | ''' Old style model is stored with all names of parameters share common prefix 'module.' ''' 25 | logger.info('remove prefix \'{}\''.format(prefix)) 26 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 27 | return {f(key): value for key, value in state_dict.items()} 28 | 29 | 30 | def load_pretrain(model, pretrained_path): 31 | logger.info('load pretrained model from {}'.format(pretrained_path)) 32 | 33 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 34 | pretrained_dict = torch.load(pretrained_path, map_location=device) 35 | if "state_dict" in pretrained_dict.keys(): 36 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.') 37 | else: 38 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 39 | 40 | try: 41 | check_keys(model, pretrained_dict) 42 | except: 43 | logger.info('[Warning]: using pretrain as features. Adding "features." as prefix') 44 | new_dict = {} 45 | for k, v in pretrained_dict.items(): 46 | k = 'features.' + k 47 | new_dict[k] = v 48 | pretrained_dict = new_dict 49 | check_keys(model, pretrained_dict) 50 | model.load_state_dict(pretrained_dict, strict=False) 51 | return model 52 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/utils/log_helper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from __future__ import division 7 | 8 | import os 9 | import logging 10 | import sys 11 | 12 | if hasattr(sys, 'frozen'): # support for py2exe 13 | _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:]) 14 | elif __file__[-4:].lower() in ['.pyc', '.pyo']: 15 | _srcfile = __file__[:-4] + '.py' 16 | else: 17 | _srcfile = __file__ 18 | _srcfile = os.path.normcase(_srcfile) 19 | 20 | 21 | logs = set() 22 | 23 | 24 | class Filter: 25 | def __init__(self, flag): 26 | self.flag = flag 27 | 28 | def filter(self, x): return self.flag 29 | 30 | 31 | class Dummy: 32 | def __init__(self, *arg, **kwargs): 33 | pass 34 | 35 | def __getattr__(self, arg): 36 | def dummy(*args, **kwargs): pass 37 | return dummy 38 | 39 | 40 | def get_format(logger, level): 41 | if 'SLURM_PROCID' in os.environ: 42 | rank = int(os.environ['SLURM_PROCID']) 43 | 44 | if level == logging.INFO: 45 | logger.addFilter(Filter(rank == 0)) 46 | else: 47 | rank = 0 48 | format_str = '[%(asctime)s-rk{}-%(filename)s#%(lineno)3d] %(message)s'.format(rank) 49 | formatter = logging.Formatter(format_str) 50 | return formatter 51 | 52 | 53 | def init_log(name, level = logging.INFO, format_func=get_format): 54 | if (name, level) in logs: return 55 | logs.add((name, level)) 56 | logger = logging.getLogger(name) 57 | logger.setLevel(level) 58 | ch = logging.StreamHandler() 59 | ch.setLevel(level) 60 | formatter = format_func(logger, level) 61 | ch.setFormatter(formatter) 62 | logger.addHandler(ch) 63 | return logger 64 | 65 | 66 | def add_file_handler(name, log_file, level = logging.INFO): 67 | logger = logging.getLogger(name) 68 | fh = logging.FileHandler(log_file) 69 | fh.setFormatter(get_format(logger, level)) 70 | logger.addHandler(fh) 71 | 72 | 73 | init_log('global') 74 | 75 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/utils/tracker_config.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from __future__ import division 7 | from .anchors import Anchors 8 | 9 | 10 | class TrackerConfig(object): 11 | # These are the default hyper-params for SiamMask 12 | penalty_k = 0.04 13 | window_influence = 0.42 14 | lr = 0.25 15 | seg_thr = 0.3 # for mask 16 | windowing = 'cosine' # to penalize large displacements [cosine/uniform] 17 | # Params from the network architecture, have to be consistent with the training 18 | exemplar_size = 127 # input z size 19 | instance_size = 255 # input x size (search region) 20 | instance_size_glob = 550 # input x size (search region) 21 | total_stride = 8 22 | out_size = 63 # for mask 23 | base_size = 8 24 | score_size = (instance_size-exemplar_size)//total_stride+1+base_size 25 | score_size_glob = (instance_size_glob-exemplar_size)//total_stride+1+base_size 26 | context_amount = 0.5 # context amount for the exemplar 27 | ratios = [0.33, 0.5, 1, 2, 3] 28 | scales = [8, ] 29 | anchor_num = len(ratios) * len(scales) 30 | round_dight = 0 31 | anchor = [] 32 | 33 | def update(self, newparam=None, anchors=None): 34 | if newparam: 35 | for key, value in newparam.items(): 36 | setattr(self, key, value) 37 | if anchors is not None: 38 | if isinstance(anchors, dict): 39 | anchors = Anchors(anchors) 40 | if isinstance(anchors, Anchors): 41 | self.total_stride = anchors.stride 42 | self.ratios = anchors.ratios 43 | self.scales = anchors.scales 44 | self.round_dight = anchors.round_dight 45 | self.renew() 46 | 47 | def renew(self): 48 | self.score_size = (self.instance_size - self.exemplar_size) // self.total_stride + 1 + self.base_size 49 | self.anchor_num = len(self.ratios) * len(self.scales) 50 | 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/SiamMask/utils/tracking_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import cv2 4 | 5 | def to_torch(ndarray): 6 | if type(ndarray).__module__ == 'numpy': 7 | return torch.from_numpy(ndarray) 8 | elif not torch.is_tensor(ndarray): 9 | raise ValueError("Cannot convert {} to torch tensor" 10 | .format(type(ndarray))) 11 | return ndarray 12 | 13 | 14 | def im_to_torch(img): 15 | img = np.transpose(img, (2, 0, 1)) # C*H*W 16 | img = to_torch(img).float() 17 | return img 18 | 19 | 20 | 21 | def get_subwindow_tracking(im, pos, model_sz, original_sz, avg_chans, out_mode='torch'): 22 | if isinstance(pos, float): 23 | pos = [pos, pos] 24 | sz = original_sz 25 | im_sz = im.shape 26 | c = (original_sz + 1) / 2 27 | context_xmin = round(pos[0] - c) 28 | context_xmax = context_xmin + sz - 1 29 | context_ymin = round(pos[1] - c) 30 | context_ymax = context_ymin + sz - 1 31 | left_pad = int(max(0., -context_xmin)) 32 | top_pad = int(max(0., -context_ymin)) 33 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 34 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 35 | 36 | context_xmin = context_xmin + left_pad 37 | context_xmax = context_xmax + left_pad 38 | context_ymin = context_ymin + top_pad 39 | context_ymax = context_ymax + top_pad 40 | 41 | r, c, k = im.shape 42 | if any([top_pad, bottom_pad, left_pad, right_pad]): 43 | te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8) 44 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 45 | if top_pad: 46 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 47 | if bottom_pad: 48 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 49 | if left_pad: 50 | te_im[:, 0:left_pad, :] = avg_chans 51 | if right_pad: 52 | te_im[:, c + left_pad:, :] = avg_chans 53 | im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 54 | else: 55 | im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 56 | 57 | if not np.array_equal(model_sz, original_sz): 58 | im_patch = cv2.resize(im_patch_original, (model_sz, model_sz)) 59 | else: 60 | im_patch = im_patch_original 61 | 62 | return im_to_torch(im_patch) 63 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .resnet18_vggm import * 3 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/constants.py: -------------------------------------------------------------------------------- 1 | # Path to SALICON raw data 2 | pathToImages = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/image' 3 | pathToMapsTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/attention_map' 4 | pathToMapsVal = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/attention_map' 5 | 6 | # Path to processed data. Created using preprocess.py 7 | pathToResizedImagesTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/image' 8 | pathToResizedMapsTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/attention_map' 9 | pathToResizedTargetObjectTrain = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/target_objects/first_frame' 10 | 11 | 12 | pathToResizedImagesVal = pathToResizedImagesTrain 13 | pathToResizedMapsVal = pathToResizedMapsTrain 14 | 15 | 16 | pathToResizedImagesTest = '/home/wangxiao/Downloads/test_dataset/global_Attention_generation_train_dataset/image' 17 | 18 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/data_loader.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import numpy as np 4 | import cv2 5 | import torch 6 | import torchvision.transforms as transforms 7 | import matplotlib.pyplot as plt 8 | from constants import * 9 | import pdb 10 | import random 11 | 12 | class DataLoader(object): 13 | 14 | def __init__(self, batch_size = 5): 15 | #reading data list 16 | # self.list_img = [k.split('/')[-1].split('.')[0] for k in glob.glob(os.path.join(pathToResizedImagesTrain, '*train*'))] 17 | self.list_img = [k.split('/')[-1].split('.')[0] for k in glob.glob(os.path.join(pathToResizedImagesTrain, '*image*'))] 18 | self.batch_size = batch_size 19 | self.size = len(self.list_img) 20 | self.cursor = 0 21 | self.num_batches = self.size / batch_size 22 | 23 | def get_batch(self): # Returns 24 | if self.cursor + self.batch_size > self.size: 25 | self.cursor = 0 26 | np.random.shuffle(self.list_img) 27 | 28 | mask_size = 216 29 | img = torch.zeros(self.batch_size, 3, 300, 300) 30 | sal_map = torch.zeros(self.batch_size, 1, mask_size, mask_size) 31 | targetObject = torch.zeros(self.batch_size, 3, 100, 100) 32 | coords = torch.zeros(self.batch_size, 2) 33 | 34 | #to_tensor = transform.Compose(transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 35 | to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 36 | 37 | for idx in range(self.batch_size): 38 | curr_file = self.list_img[self.cursor] 39 | 40 | # pdb.set_trace() 41 | temp_index2 = curr_file.find('_') 42 | videoName = curr_file[:temp_index2] 43 | 44 | imgIndex = curr_file[temp_index2+7:] 45 | 46 | # pdb.set_trace() 47 | 48 | targetObject_img_path = os.path.join(pathToResizedTargetObjectTrain, videoName + '_targetObject.jpg') 49 | full_img_path = os.path.join(pathToResizedImagesTrain, videoName + "_image-" + imgIndex + '.jpg') 50 | full_map_path = os.path.join(pathToResizedMapsTrain, videoName + "_mask-" + imgIndex + '.jpg') 51 | self.cursor += 1 52 | inputimage = cv2.imread(full_img_path) # (192,256,3) 53 | 54 | 55 | # pdb.set_trace() 56 | img[idx] = to_tensor(inputimage) 57 | 58 | targetObjectimage = cv2.imread(targetObject_img_path) 59 | targetObject[idx] = to_tensor(targetObjectimage) 60 | 61 | saliencyimage = cv2.imread(full_map_path, 0) 62 | saliencyimage = cv2.resize(saliencyimage, (mask_size, mask_size), interpolation=cv2.INTER_CUBIC) 63 | 64 | num_points = 1 65 | possible_points = np.where(saliencyimage) 66 | num_possible_points = possible_points[0].shape[0] 67 | rindx = random.sample(list(range(num_possible_points)), k=min(num_points, num_possible_points)) 68 | points = [] 69 | for j in rindx: 70 | points.append((possible_points[0][j], possible_points[1][j])) 71 | points = np.array(points) 72 | coords[idx] = torch.from_numpy(points) 73 | 74 | 75 | saliencyimage = np.expand_dims(saliencyimage, axis=2) 76 | sal_map[idx] = to_tensor(saliencyimage) 77 | 78 | 79 | 80 | return (img, sal_map, targetObject, coords) 81 | 82 | 83 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | # Tensorboard logger for PyTorch 3 | import tensorflow as tf 4 | import numpy as np 5 | import scipy.misc 6 | try: 7 | from StringIO import StringIO # Python 2.7 8 | except ImportError: 9 | from io import BytesIO # Python 3.x 10 | 11 | 12 | class Logger(object): 13 | 14 | def __init__(self, log_dir): 15 | """Create a summary writer logging to log_dir.""" 16 | self.writer = tf.summary.FileWriter(log_dir) 17 | 18 | def scalar_summary(self, tag, value, step): 19 | """Log a scalar variable.""" 20 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) 21 | self.writer.add_summary(summary, step) 22 | 23 | def image_summary(self, tag, images, step): 24 | """Log a list of images.""" 25 | 26 | img_summaries = [] 27 | for i, img in enumerate(images): 28 | # Write the image to a string 29 | try: 30 | s = StringIO() 31 | except: 32 | s = BytesIO() 33 | scipy.misc.toimage(img).save(s, format="png") 34 | 35 | # Create an Image object 36 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 37 | height=img.shape[0], 38 | width=img.shape[1]) 39 | # Create a Summary value 40 | img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 41 | 42 | # Create and write Summary 43 | summary = tf.Summary(value=img_summaries) 44 | self.writer.add_summary(summary, step) 45 | 46 | def histo_summary(self, tag, values, step, bins=1000): 47 | """Log a histogram of the tensor of values.""" 48 | 49 | # Create a histogram using numpy 50 | counts, bin_edges = np.histogram(values, bins=bins) 51 | 52 | # Fill the fields of the histogram proto 53 | hist = tf.HistogramProto() 54 | hist.min = float(np.min(values)) 55 | hist.max = float(np.max(values)) 56 | hist.num = int(np.prod(values.shape)) 57 | hist.sum = float(np.sum(values)) 58 | hist.sum_squares = float(np.sum(values**2)) 59 | 60 | # Drop the start of the first bin 61 | bin_edges = bin_edges[1:] 62 | 63 | # Add bin edges and counts 64 | for edge in bin_edges: 65 | hist.bucket_limit.append(edge) 66 | for c in counts: 67 | hist.bucket.append(c) 68 | 69 | # Create and write Summary 70 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 71 | self.writer.add_summary(summary, step) 72 | self.writer.flush() 73 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/ops.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | def conv2d(in_channels, out_channels, kernel_size = 3, padding = 1): 4 | return nn.Conv2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding) 5 | 6 | def deconv2d(in_channels, out_channels, kernel_size = 3, padding = 1): 7 | return nn.ConvTranspose2d(in_channels, out_channels, kernel_size = kernel_size, padding = padding) 8 | 9 | def relu(inplace = True): # Change to True? 10 | return nn.ReLU(inplace) 11 | 12 | def maxpool2d(): 13 | return nn.MaxPool2d(2) 14 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/resnet.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | from collections import OrderedDict 4 | import torch.utils.model_zoo as model_zoo 5 | from torchvision.models.resnet import BasicBlock, Bottleneck, model_urls 6 | 7 | 8 | class ResNet(nn.Module): 9 | """ ResNet network module. Allows extracting specific feature blocks.""" 10 | def __init__(self, block, layers, output_layers, num_classes=1000, inplanes=64): 11 | self.inplanes = inplanes 12 | super(ResNet, self).__init__() 13 | self.output_layers = output_layers 14 | self.conv1 = nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False) 15 | self.bn1 = nn.BatchNorm2d(inplanes) 16 | self.relu = nn.ReLU(inplace=True) 17 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 18 | self.layer1 = self._make_layer(block, inplanes, layers[0]) 19 | self.layer2 = self._make_layer(block, inplanes*2, layers[1], stride=2) 20 | self.layer3 = self._make_layer(block, inplanes*4, layers[2], stride=2) 21 | self.layer4 = self._make_layer(block, inplanes*8, layers[3], stride=2) 22 | # self.avgpool = nn.AvgPool2d(7, stride=1) 23 | self.avgpool = nn.AdaptiveAvgPool2d((1,1)) 24 | self.fc = nn.Linear(inplanes*8 * block.expansion, num_classes) 25 | 26 | for m in self.modules(): 27 | if isinstance(m, nn.Conv2d): 28 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 29 | m.weight.data.normal_(0, math.sqrt(2. / n)) 30 | elif isinstance(m, nn.BatchNorm2d): 31 | m.weight.data.fill_(1) 32 | m.bias.data.zero_() 33 | 34 | def _make_layer(self, block, planes, blocks, stride=1): 35 | downsample = None 36 | if stride != 1 or self.inplanes != planes * block.expansion: 37 | downsample = nn.Sequential( 38 | nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), 39 | nn.BatchNorm2d(planes * block.expansion), 40 | ) 41 | 42 | layers = [] 43 | layers.append(block(self.inplanes, planes, stride, downsample)) 44 | self.inplanes = planes * block.expansion 45 | for i in range(1, blocks): 46 | layers.append(block(self.inplanes, planes)) 47 | 48 | return nn.Sequential(*layers) 49 | 50 | def _add_output_and_check(self, name, x, outputs, output_layers): 51 | if name in output_layers: 52 | outputs[name] = x 53 | return len(output_layers) == len(outputs) 54 | 55 | def forward(self, x, output_layers=None): 56 | """ Forward pass with input x. The output_layers specify the feature blocks which must be returned """ 57 | # outputs = OrderedDict() 58 | 59 | # if output_layers is None: 60 | # output_layers = self.output_layers 61 | 62 | x = self.conv1(x) 63 | x = self.bn1(x) 64 | x = self.relu(x) 65 | 66 | # if self._add_output_and_check('conv1', x, outputs, output_layers): 67 | # return outputs 68 | 69 | x = self.maxpool(x) 70 | 71 | x = self.layer1(x) 72 | 73 | # if self._add_output_and_check('layer1', x, outputs, output_layers): 74 | # return outputs 75 | 76 | x2_feat = self.layer2(x) 77 | 78 | # if self._add_output_and_check('layer2', x, outputs, output_layers): 79 | # return outputs 80 | 81 | x3_feat = self.layer3(x2_feat) 82 | 83 | # if self._add_output_and_check('layer3', x, outputs, output_layers): 84 | # return outputs 85 | 86 | x4_feat = self.layer4(x3_feat) 87 | 88 | # if self._add_output_and_check('layer4', x, outputs, output_layers): 89 | # return outputs 90 | 91 | # x = self.avgpool(x) 92 | # x = x.view(x.size(0), -1) 93 | # x = self.fc(x) 94 | 95 | # if self._add_output_and_check('fc', x, outputs, output_layers): 96 | # return outputs 97 | 98 | # if len(output_layers) == 1 and output_layers[0] == 'default': 99 | # return x 100 | 101 | # raise ValueError('output_layer is wrong.') 102 | 103 | return x2_feat, x3_feat, x4_feat 104 | 105 | 106 | 107 | 108 | 109 | 110 | def resnet18(output_layers=None, pretrained=True): 111 | """Constructs a ResNet-18 model. 112 | """ 113 | 114 | if output_layers is None: 115 | output_layers = ['default'] 116 | else: 117 | for l in output_layers: 118 | if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']: 119 | raise ValueError('Unknown layer: {}'.format(l)) 120 | 121 | model = ResNet(BasicBlock, [2, 2, 2, 2], output_layers) 122 | 123 | if pretrained: 124 | model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) 125 | return model 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | def resnet50(output_layers=None, pretrained=False): 134 | """Constructs a ResNet-50 model. 135 | """ 136 | 137 | if output_layers is None: 138 | output_layers = ['default'] 139 | else: 140 | for l in output_layers: 141 | if l not in ['conv1', 'layer1', 'layer2', 'layer3', 'layer4', 'fc']: 142 | raise ValueError('Unknown layer: {}'.format(l)) 143 | 144 | model = ResNet(Bottleneck, [3, 4, 6, 3], output_layers) 145 | if pretrained: 146 | model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) 147 | return model -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | import time 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 8 | 9 | from torch.autograd import Variable 10 | 11 | from data_loader import DataLoader 12 | from generator import DC_adaIS_Generator 13 | from utils import * 14 | import pdb 15 | import warnings 16 | warnings.filterwarnings("ignore") 17 | 18 | 19 | batch_size = 10 20 | lr = 1e-4 21 | 22 | 23 | generator = DC_adaIS_Generator() 24 | 25 | #### load pre-trained model 26 | # print("==>> Loading pre-trained model ... ") 27 | # generator.load_state_dict(torch.load('./dynamic_global_search_region_generator.pkl')) 28 | # # generator = torch.load('./dynamic_global_search_region_generator.pkl') 29 | # print("==>> Done !!!") 30 | 31 | if torch.cuda.is_available(): 32 | generator.cuda() 33 | 34 | criterion = nn.BCELoss() 35 | 36 | 37 | print("===================================================================================") 38 | print("===================================================================================") 39 | print(generator) 40 | print("===================================================================================") 41 | print("===================================================================================") 42 | 43 | g_optim = torch.optim.Adagrad(generator.parameters(), lr=lr) 44 | 45 | num_epoch = 50 46 | dataloader = DataLoader(batch_size) 47 | num_batch = 500 48 | print("==>> num_batch: ", num_batch) 49 | 50 | 51 | def to_variable(x, requires_grad=True): 52 | if torch.cuda.is_available(): 53 | x = x.cuda() 54 | return Variable(x,requires_grad) 55 | 56 | counter = 0 57 | start_time = time.time() 58 | DIR_TO_SAVE = "./generator_output/" 59 | if not os.path.exists(DIR_TO_SAVE): 60 | os.makedirs(DIR_TO_SAVE) 61 | 62 | 63 | 64 | print("###################################################################################") 65 | print(" The Main Training Loop ") 66 | print("###################################################################################") 67 | 68 | generator.train() 69 | 70 | for current_epoch in range(num_epoch): 71 | n_updates = 1 72 | d_cost_avg = 0 73 | g_cost_avg = 0 74 | 75 | for idx in range(int(num_batch)): 76 | (batch_img, batch_map, targetObject_img, coords) = dataloader.get_batch() 77 | batch_img = to_variable(batch_img, requires_grad=True) 78 | batch_map = to_variable(batch_map, requires_grad=False) 79 | targetObject_img = to_variable(targetObject_img, requires_grad=True) 80 | # batch_map = nn.functional.interpolate(batch_map, size=[216, 216]) 81 | 82 | val_batchImg = batch_img 83 | val_targetObjectImg = targetObject_img 84 | val_coords = coords 85 | 86 | g_optim.zero_grad() 87 | attention_map = generator(batch_img, targetObject_img, coords) 88 | 89 | # pdb.set_trace() 90 | g_gen_loss = criterion(attention_map, batch_map) 91 | g_loss = torch.sum(g_gen_loss) 92 | g_cost_avg += g_loss.item() 93 | g_loss.backward() 94 | g_optim.step() 95 | 96 | n_updates += 1 97 | 98 | if (idx+1)%100 == 0: 99 | print("==>> Epoch [%d/%d], Step[%d/%d], g_gen_loss: %.4f, LR: %.6f, time: %4.4f" % \ 100 | (current_epoch, num_epoch, idx+1, num_batch, g_loss.item(), lr, time.time()-start_time)) 101 | counter += 1 102 | 103 | # pdb.set_trace() 104 | g_cost_avg /= num_batch 105 | 106 | # Save weights every 3 epoch 107 | if current_epoch % 3 == 0: 108 | print('==>> Epoch:', current_epoch, ' ==>> Train_loss->', (g_cost_avg)) 109 | torch.save(generator.state_dict(), 'generator_dcyNet_adaIS_1e4.pkl') 110 | 111 | # validation 112 | out = generator(val_batchImg, val_targetObjectImg, val_coords) 113 | map_out = out.cpu().data.squeeze(0) 114 | for iiidex in range(5): 115 | new_path = DIR_TO_SAVE + str(current_epoch) + str(iiidex) + ".jpg" 116 | pilTrans = transforms.ToPILImage() 117 | pilImg = pilTrans(map_out[iiidex]) 118 | print('==>> Image saved to ', new_path) 119 | pilImg.save(new_path) 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /DeepMTA_code/trackers/dcynet_modules_adaptis/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import torchvision.transforms as transforms 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import torch 6 | from torch.autograd import Variable 7 | import torch.nn as nn 8 | import pdb 9 | 10 | from PIL import Image 11 | 12 | def to_variable(x,requires_grad=True): 13 | if torch.cuda.is_available(): 14 | x = x.cuda() 15 | return Variable(x,requires_grad) 16 | 17 | def show(img): 18 | #print(img.shape) 19 | pilTrans = transforms.ToPILImage() 20 | pilImg = pilTrans(img) 21 | s = np.array(pilImg) 22 | plt.figure() 23 | plt.imshow(s) 24 | 25 | def show_gray(img): 26 | print(img.shape) 27 | pilTrans = transforms.ToPILImage() 28 | pilImg = pilTrans(img) 29 | s = np.array(pilImg) 30 | plt.figure() 31 | plt.imshow(s) 32 | 33 | def save_gray(img, path): 34 | pilTrans = transforms.ToPILImage() 35 | pilImg = pilTrans(img) 36 | print('Image saved to ', path) 37 | pilImg.save(path) 38 | 39 | 40 | 41 | 42 | def predict(model, img, validation_targetObject): 43 | to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 44 | im = to_tensor(img) 45 | val_targetObject = to_tensor(validation_targetObject) 46 | #show(im) 47 | inp = to_variable(im.unsqueeze(0), False) 48 | inp = nn.functional.interpolate(inp, size=[300, 300]) 49 | 50 | val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False) 51 | val_targetObject_ = nn.functional.interpolate(val_targetObject_, size=[100, 100]) 52 | 53 | #print(inp.size()) 54 | 55 | out = model(inp, val_targetObject_) 56 | out = nn.functional.interpolate(out, size=[im.shape[1], im.shape[2]]) 57 | 58 | map_out = out.cpu().data.squeeze(0) 59 | pilTrans = transforms.ToPILImage() 60 | pilImg = pilTrans(map_out) 61 | dynamic_atttentonMAP = np.asarray(pilImg) 62 | 63 | return dynamic_atttentonMAP 64 | 65 | 66 | # def predict(model, img, validation_targetObject, epoch, path): 67 | # to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 68 | # im = to_tensor(img) 69 | # val_targetObject = to_tensor(validation_targetObject) 70 | # #show(im) 71 | # inp = to_variable(im.unsqueeze(0), False) 72 | # val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False) 73 | # #print(inp.size()) 74 | # out = model(inp, val_targetObject_) 75 | # map_out = out.cpu().data.squeeze(0) 76 | # #show_gray(map_out) 77 | 78 | # new_path = path + str(epoch) + ".jpg" 79 | # save_gray(map_out, new_path) 80 | 81 | # #s = np.array(Image.open(new_path)) 82 | # #plt.figure() 83 | # #plt.imshow(s) 84 | 85 | 86 | 87 | 88 | # out = generator(val_batchImg, val_targetObjectImg, val_coords) 89 | # map_out = out.cpu().data.squeeze(0) 90 | # for iiidex in range(5): 91 | # new_path = DIR_TO_SAVE + str(current_epoch) + str(iiidex) + ".jpg" 92 | # pilTrans = transforms.ToPILImage() 93 | # pilImg = pilTrans(map_out[iiidex]) 94 | # print('==>> Image saved to ', new_path) 95 | # pilImg.save(new_path) 96 | 97 | 98 | # def predict(model, img, validation_targetObject, val_coords): 99 | # to_tensor = transforms.ToTensor() # Transforms 0-255 numbers to 0 - 1.0. 100 | # im = to_tensor(img) 101 | # val_targetObject = to_tensor(validation_targetObject) 102 | # inp = to_variable(im.unsqueeze(0), False) 103 | # inp = nn.functional.interpolate(inp, size=[300, 300]) 104 | 105 | # val_targetObject_ = to_variable(val_targetObject.unsqueeze(0), False) 106 | # val_targetObject_ = nn.functional.interpolate(val_targetObject_, size=[100, 100]) 107 | 108 | # #print(inp.size()) 109 | # out = model(inp, val_targetObject_) 110 | # out = nn.functional.interpolate(out, size=[im.shape[1], im.shape[2]]) 111 | 112 | # map_out = out.cpu().data.squeeze(0) 113 | # pilTrans = transforms.ToPILImage() 114 | # pilImg = pilTrans(map_out) 115 | # dynamic_atttentonMAP = np.asarray(pilImg) 116 | # return dynamic_atttentonMAP 117 | 118 | 119 | -------------------------------------------------------------------------------- /DeepMTA_code/train_traj_measure_net.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import numpy as np 5 | import time 6 | import os 7 | os.environ["CUDA_VISIBLE_DEVICES"]="0" 8 | 9 | from network import traj_critic, axis_aligned_iou 10 | import torchvision 11 | import cv2 12 | import pdb 13 | import warnings 14 | warnings.filterwarnings("ignore") 15 | import torchvision.transforms as transforms 16 | to_tensor = transforms.ToTensor() 17 | 18 | 19 | got10k_dataset_path = "./data/GOT10k_train_val/" 20 | result_path = "./benchmark/results/GOT10k_train_val/Tracker/" 21 | attentionMap_path = "./temp_DIR_TO_SAVE_static_Global_attentionMap/" 22 | 23 | 24 | # batchSize = 20 25 | lr = 1e-3 26 | num_epoch = 5000 27 | clip_len = 10 28 | img_size = 300 29 | 30 | traj_critic_net = traj_critic() 31 | traj_critic_net = traj_critic_net.cuda() 32 | 33 | optimizer = torch.optim.Adagrad(traj_critic_net.parameters(), lr=lr) 34 | loss_fn = torch.nn.L1Loss().cuda() 35 | videoFiles = os.listdir(attentionMap_path) 36 | 37 | traj_critic_net.train() 38 | 39 | 40 | ######################################################################################################### 41 | #### The Main Loop 42 | ######################################################################################################### 43 | 44 | for epochID in range(num_epoch): 45 | 46 | epoch_totalLoss = 0 47 | 48 | for videoIndex in range(len(videoFiles)): 49 | videoName = videoFiles[videoIndex] 50 | 51 | result1_path = result_path + videoName + "/" + videoName + "_001.txt" 52 | result2_path = result_path + videoName + "/" + videoName + "_002.txt" 53 | gt_path = result_path + videoName + "/" + videoName + "_groundtruth.txt" 54 | local_score_path = result_path + videoName + "/" + videoName + "_scoreGlobal.txt" 55 | global_score_path = result_path + videoName + "/" + videoName + "_scoreLocal.txt" 56 | 57 | #### tracking results and score. 58 | tracking_result1 = np.loadtxt(result1_path, delimiter=',') ## (90, 4) 59 | tracking_result2 = np.loadtxt(result2_path, delimiter=',') ## (90, 4) 60 | gt = np.loadtxt(gt_path, delimiter=',') ## (90, 4) 61 | local_score = torch.from_numpy(np.loadtxt(local_score_path)) ## (90,) 62 | global_score = torch.from_numpy(np.loadtxt(global_score_path)) ## (90,) 63 | 64 | local_score = torch.unsqueeze(local_score, dim=1) 65 | global_score = torch.unsqueeze(global_score, dim=1) 66 | 67 | #### image and attention maps. 68 | imgFiles = os.listdir(got10k_dataset_path + videoName + "/") 69 | imgFiles = np.sort(imgFiles) 70 | 71 | attentionFiles = os.listdir(attentionMap_path + videoName + "/") 72 | attentionFiles = np.sort(attentionFiles) 73 | 74 | init_imgPath = got10k_dataset_path + videoName + "/" + imgFiles[0] 75 | init_image = cv2.imread(init_imgPath) 76 | init_bbox = gt[0] 77 | 78 | # pdb.set_trace() 79 | init_target = init_image[int(init_bbox[1]):int(init_bbox[1]+init_bbox[3]), int(init_bbox[0]):int(init_bbox[0]+init_bbox[2]), :] 80 | init_target = cv2.resize(init_target, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 81 | # cv2.imwrite('./init_target.png', init_target) 82 | # cv2.imwrite('./init_image.png', init_image) 83 | 84 | startIndex = np.random.random_integers(len(attentionFiles) - clip_len) 85 | 86 | image_list = torch.zeros(clip_len, 3, img_size, img_size) 87 | initTarget_list = torch.zeros(clip_len, 3, img_size, img_size) 88 | attMap_list = torch.zeros(clip_len, 3, img_size, img_size) 89 | targetImg1_list = torch.zeros(clip_len, 3, img_size, img_size) 90 | targetImg2_list = torch.zeros(clip_len, 3, img_size, img_size) 91 | targetMap1_list = torch.zeros(clip_len, 3, img_size, img_size) 92 | targetMap2_list = torch.zeros(clip_len, 3, img_size, img_size) 93 | 94 | trajScore_list1 = torch.zeros(clip_len, 1) 95 | trajScore_list2 = torch.zeros(clip_len, 1) 96 | 97 | trajBBox_list1 = torch.zeros(clip_len, 4) 98 | trajBBox_list2 = torch.zeros(clip_len, 4) 99 | 100 | IoU_score_1 = 0 101 | IoU_score_2 = 0 102 | 103 | ######################################################################################################### 104 | #### Load Batch data 105 | ######################################################################################################### 106 | count = 0 107 | startIndex = 0 108 | for INdex in range(startIndex, startIndex+clip_len): 109 | imgPath = got10k_dataset_path + videoName + "/" + imgFiles[INdex] 110 | image = cv2.imread(imgPath) 111 | 112 | imgIndex = int(imgFiles[INdex][:-4]) 113 | attMap_path = attentionMap_path + videoName + "/" + str(imgIndex)+"_dynamic_atttentonMAP_adaptIS.png" 114 | attMap = cv2.imread(attMap_path) ## (720, 1280, 3) 115 | 116 | gt_curr = gt[INdex] 117 | result_curr1 = tracking_result1[INdex] 118 | result_curr2 = tracking_result2[INdex] 119 | 120 | #### BBox normalization 121 | result_curr1[0] = max(0, min(image.shape[1], result_curr1[0])) 122 | result_curr1[1] = max(0, min(image.shape[0], result_curr1[1])) 123 | result_curr1[2] = max(10, min(image.shape[1], result_curr1[2])) 124 | result_curr1[3] = max(10, min(image.shape[0], result_curr1[3])) 125 | 126 | result_curr2[0] = max(0, min(image.shape[1], result_curr2[0])) 127 | result_curr2[1] = max(0, min(image.shape[0], result_curr2[1])) 128 | result_curr2[2] = max(10, min(image.shape[1], result_curr2[2])) 129 | result_curr2[3] = max(10, min(image.shape[0], result_curr2[3])) 130 | 131 | 132 | targetImg1 = image[int(result_curr1[1]):int(result_curr1[1]+result_curr1[3]), int(result_curr1[0]):int(result_curr1[0]+result_curr1[2]), :] 133 | targetImg2 = image[int(result_curr2[1]):int(result_curr2[1]+result_curr2[3]), int(result_curr2[0]):int(result_curr2[0]+result_curr2[2]), :] 134 | tagetattMap1 = attMap[int(result_curr1[1]):int(result_curr1[1]+result_curr1[3]), int(result_curr1[0]):int(result_curr1[0]+result_curr1[2]), :] 135 | tagetattMap2 = attMap[int(result_curr2[1]):int(result_curr2[1]+result_curr2[3]), int(result_curr2[0]):int(result_curr2[0]+result_curr2[2]), :] 136 | 137 | trajScore1 = local_score[INdex] 138 | trajScore2 = global_score[INdex] 139 | 140 | #### Normalization 141 | image = cv2.resize(image, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 142 | attMap = cv2.resize(attMap, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 143 | targetImg1 = cv2.resize(targetImg1, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 144 | targetImg2 = cv2.resize(targetImg2, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 145 | tagetattMap1 = cv2.resize(tagetattMap1, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 146 | tagetattMap2 = cv2.resize(tagetattMap2, (img_size, img_size), interpolation=cv2.INTER_CUBIC) 147 | 148 | # cv2.imwrite('./image.png', image) 149 | # cv2.imwrite('./attMap.png', attMap) 150 | # cv2.imwrite('./targetImg1.png', targetImg1) 151 | # cv2.imwrite('./targetImg2.png', targetImg2) 152 | # cv2.imwrite('./tagetattMap1.png', tagetattMap1) 153 | # cv2.imwrite('./tagetattMap2.png', tagetattMap2) 154 | 155 | # pdb.set_trace() 156 | 157 | image_list[count] = to_tensor(image) 158 | attMap_list[count] = to_tensor(attMap) 159 | targetImg1_list[count] = to_tensor(targetImg1) 160 | targetImg2_list[count] = to_tensor(targetImg2) 161 | targetMap1_list[count] = to_tensor(tagetattMap1) 162 | targetMap2_list[count] = to_tensor(tagetattMap2) 163 | initTarget_list[count] = to_tensor(init_target) 164 | 165 | trajBBox_list1[count] = torch.from_numpy(result_curr1) 166 | trajBBox_list2[count] = torch.from_numpy(result_curr2) 167 | 168 | # pdb.set_trace() 169 | 170 | trajScore_list1[count] = trajScore1 171 | trajScore_list2[count] = trajScore2 172 | 173 | count = count + 1 174 | 175 | 176 | #### Calculate the GIoU score 177 | gt_curr[2] = gt_curr[0]+gt_curr[2]; gt_curr[3] = gt_curr[1]+gt_curr[3] 178 | result_curr1[2] = result_curr1[0]+result_curr1[2]; result_curr1[3] = result_curr1[1]+result_curr1[3] 179 | result_curr2[2] = result_curr2[0]+result_curr2[2]; result_curr2[3] = result_curr2[1]+result_curr2[3] 180 | 181 | IoU_score_1 = IoU_score_1 + axis_aligned_iou(gt_curr, result_curr1) 182 | IoU_score_2 = IoU_score_2 + axis_aligned_iou(gt_curr, result_curr2) 183 | 184 | # pdb.set_trace() 185 | 186 | optimizer.zero_grad() 187 | pred_traj_score1 = traj_critic_net(image_list, attMap_list, targetImg1_list, targetMap1_list, initTarget_list, trajBBox_list1, trajScore_list1) 188 | pred_traj_score2 = traj_critic_net(image_list, attMap_list, targetImg2_list, targetMap2_list, initTarget_list, trajBBox_list2, trajScore_list2) 189 | 190 | IoU_score_1 = torch.from_numpy(np.array(IoU_score_1)).float().cuda() 191 | IoU_score_2 = torch.from_numpy(np.array(IoU_score_2)).float().cuda() 192 | traj_loss1 = loss_fn(pred_traj_score1, IoU_score_1) 193 | traj_loss2 = loss_fn(pred_traj_score2, IoU_score_2) 194 | total_loss = traj_loss1 + traj_loss2 195 | 196 | 197 | # print('Epoch:', epochID, " video: ", videoName, " loss:", total_loss.item()) 198 | 199 | # backward + optimize 200 | total_loss.backward() 201 | optimizer.step() 202 | 203 | 204 | epoch_totalLoss = epoch_totalLoss + total_loss.item() 205 | # Save weights 206 | if epochID % 50 == 0: 207 | print('==>> Epoch:', epochID, ' ==>> Train_loss->', epoch_totalLoss) 208 | checkpointName = str(epochID) + "_traj_critic_net.pkl" 209 | torch.save(traj_critic_net.state_dict(), './traj_measure_model_checkoints/'+checkpointName) 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | -------------------------------------------------------------------------------- /DeepMTA_code/webcam_demo.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # THOR 3 | # Licensed under The MIT License 4 | # Written by Axel Sauer (axel.sauer@tum.de) 5 | # -------------------------------------------------------- 6 | 7 | import pdb 8 | import argparse, cv2, os 9 | import numpy as np 10 | import sys 11 | from imutils.video import FPS 12 | import json 13 | 14 | from trackers.tracker import SiamFC_Tracker, SiamRPN_Tracker, SiamMask_Tracker 15 | from benchmark.bench_utils.bbox_helper import cxy_wh_2_rect, xyxy_to_xywh 16 | 17 | # constants 18 | BRIGHTGREEN = [102, 255, 0] 19 | RED = [0, 0, 255] 20 | YELLOW = [0, 255, 255] 21 | np.set_printoptions(precision=6, suppress=True) 22 | 23 | OUTPUT_WIDTH = 740 24 | OUTPUT_HEIGHT = 555 25 | PADDING = 2 26 | 27 | parser = argparse.ArgumentParser(description='Webcam Test') 28 | parser.add_argument('-t', '--tracker', dest='tracker', required=True, 29 | help='Name of the tracker [SiamFC, SiamRPN, SiamMask]') 30 | parser.add_argument('--vanilla', action='store_true', 31 | help='run the tracker without memory') 32 | parser.add_argument('-v', '--viz', action='store_true', 33 | help='whether visualize result') 34 | parser.add_argument('--verbose', action='store_true', 35 | help='print info about temp mem') 36 | parser.add_argument('--lb_type', type=str, default='ensemble', 37 | help='Specify the type of lower bound [dynamic, ensemble]') 38 | 39 | drawnBox = np.zeros(4) 40 | boxToDraw = np.zeros(4) 41 | mousedown = False 42 | mouseupdown = False 43 | initialize = False 44 | 45 | def on_mouse(event, x, y, flags, params): 46 | global mousedown, mouseupdown, drawnBox, boxToDraw, initialize, boxToDraw_xywh 47 | if event == cv2.EVENT_LBUTTONDOWN: 48 | drawnBox[[0,2]] = x 49 | drawnBox[[1,3]] = y 50 | mousedown = True 51 | mouseupdown = False 52 | elif mousedown and event == cv2.EVENT_MOUSEMOVE: 53 | drawnBox[2] = x 54 | drawnBox[3] = y 55 | elif event == cv2.EVENT_LBUTTONUP: 56 | drawnBox[2] = x 57 | drawnBox[3] = y 58 | mousedown = False 59 | mouseupdown = True 60 | initialize = True 61 | boxToDraw = drawnBox.copy() 62 | boxToDraw[[0, 2]] = np.sort(boxToDraw[[0, 2]]) 63 | boxToDraw[[1, 3]] = np.sort(boxToDraw[[1, 3]]) 64 | boxToDraw_xywh = xyxy_to_xywh(boxToDraw) 65 | 66 | def bb_on_im(im, location, mask): 67 | location = [int(l) for l in location] # 68 | 69 | if len(mask): 70 | im[:, :, 2] = mask * 255 + (1 - mask) * im[:, :, 2] 71 | 72 | # prediction 73 | cv2.rectangle(im, (location[0], location[1]), 74 | (location[0] + location[2], location[1] + location[3]), 75 | (0, 255, 255), 3) 76 | 77 | return im 78 | 79 | def show_webcam(tracker, mirror=False, viz=False): 80 | global initialize 81 | 82 | vs = cv2.VideoCapture(0) 83 | cv2.namedWindow('Webcam', cv2.WINDOW_NORMAL) 84 | cv2.resizeWindow('Webcam', OUTPUT_WIDTH, OUTPUT_HEIGHT) 85 | cv2.setMouseCallback('Webcam', on_mouse, 0) 86 | 87 | outputBoxToDraw = None 88 | bbox = None 89 | fps = None 90 | state = None 91 | mask = [] 92 | 93 | # loop over video stream ims 94 | while True: 95 | _, im = vs.read() 96 | 97 | if mirror: 98 | im = cv2.flip(im, 1) 99 | 100 | if mousedown: 101 | (x1, y1, x2, y2) = [int(l) for l in boxToDraw] 102 | cv2.rectangle(im, (x1, y1), (x2, y2), 103 | BRIGHTGREEN, PADDING) 104 | 105 | elif mouseupdown: 106 | if initialize: 107 | init_pos = boxToDraw_xywh[[0, 1]] 108 | init_sz = boxToDraw_xywh[[2, 3]] 109 | state = tracker.setup(im, init_pos, init_sz) 110 | initialize = False 111 | fps = FPS().start() 112 | else: 113 | state = tracker.track(im, state) 114 | location = cxy_wh_2_rect(state['target_pos'], state['target_sz']) 115 | (cx, cy, w, h) = [int(l) for l in location] 116 | 117 | fps.update() 118 | fps.stop() 119 | 120 | # Display the image 121 | info = [ 122 | ("Score:", f"{state['score']:.4f}"), 123 | ("FPS:", f"{fps.fps():.2f}"), 124 | ] 125 | 126 | if not state['score'] > 0.8: 127 | info.insert(0, ("Object lost since", "")) 128 | else: 129 | if 'mask' in state.keys(): 130 | mask = state['mask'] > state['p'].seg_thr 131 | im = bb_on_im(im, location, mask) 132 | 133 | for (i, (k, v)) in enumerate(info): 134 | text = "{}: {}".format(k, v) 135 | cv2.putText(im, text, (10, OUTPUT_HEIGHT - ((i * 20) + 20)), 136 | cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2) 137 | 138 | cv2.imshow("Webcam", im) 139 | 140 | # check for escape key 141 | key = cv2.waitKey(1) 142 | if key==27 or key==1048603: 143 | break 144 | 145 | # release the pointer 146 | cv2.destroyAllWindows() 147 | 148 | def load_cfg(args): 149 | json_path = f"configs/{args.tracker}/VOT2018_" 150 | if args.vanilla: 151 | json_path += "vanilla.json" 152 | else: 153 | json_path += f"THOR_{args.lb_type}.json" 154 | cfg = json.load(open(json_path)) 155 | return cfg 156 | 157 | if __name__ == '__main__': 158 | args = parser.parse_args() 159 | 160 | cfg = load_cfg(args) 161 | cfg['THOR']['viz'] = args.viz 162 | cfg['THOR']['verbose'] = args.verbose 163 | 164 | print("[INFO] Initializing the tracker.") 165 | if args.tracker == 'SiamFC': 166 | tracker = SiamFC_Tracker(cfg) 167 | elif args.tracker == 'SiamRPN': 168 | tracker = SiamRPN_Tracker(cfg) 169 | elif args.tracker == 'SiamMask': 170 | tracker = SiamMask_Tracker(cfg) 171 | elif args.tracker == 'SiamRPN_PP': 172 | tracker = SiamRPN_PP_Tracker(cfg) 173 | else: 174 | raise ValueError(f"Tracker {args.tracker} does not exist.") 175 | 176 | print("[INFO] Starting video stream.") 177 | show_webcam(tracker, mirror=True, viz=args.viz) 178 | -------------------------------------------------------------------------------- /GOT10K_dataset_video_list/01_mask_prepreocessing.m: -------------------------------------------------------------------------------- 1 | %% 2 | clc; clear all; close all; warning off; 3 | 4 | path = '/media/wangxiao/4T_wangxiao/GOT-10K_dataset/train/'; 5 | files = dir(path); 6 | files = files(3:end); 7 | 8 | for i = 1:size(files, 1) 9 | 10 | disp(['==>> deal with ', num2str(i), '/', num2str(size(files, 1))]); 11 | 12 | videoName = files(i).name; 13 | imgPath = [path videoName '/']; 14 | firstFrame = imread([imgPath '00000001.jpg']); 15 | 16 | gt_name = ['groundtruth.txt']; 17 | gt_file = importdata([path videoName '/' gt_name]); 18 | initial_BBox = gt_file(1, :); 19 | target_Object = imcrop(firstFrame, initial_BBox); 20 | target_Object = imresize(target_Object, [320 640]); 21 | savePath = [path videoName '/']; 22 | imwrite(target_Object, [savePath, 'init_targetObject.png']); 23 | 24 | maskSavePath = [path videoName '/resizedImage/']; 25 | mkdir(maskSavePath); 26 | imgfiles = dir([imgPath, '*.jpg']); 27 | for j=1:size(imgfiles, 1) 28 | image = imread([imgPath imgfiles(j).name]); 29 | image = imresize(image, [320 640]); 30 | imwrite(image, fullfile(maskSavePath, imgfiles(j).name),'jpg'); 31 | end 32 | 33 | 34 | maskSavePath = [path videoName '/mask_imgs/']; 35 | mkdir(maskSavePath); 36 | imgfiles = dir([imgPath, '*.jpg']); 37 | for j=1:size(imgfiles, 1) 38 | image = imread([imgPath imgfiles(j).name]); 39 | 40 | BBox = gt_file(j, :); 41 | 42 | if BBox(1) <= 0 BBox(1)=1; end 43 | if BBox(2) <= 0 BBox(2)=1; end 44 | if BBox(3) <= 0 BBox(3)=1; end 45 | if BBox(4) <= 0 BBox(4)=1; end 46 | 47 | BinaryMap = zeros(size(image, 1), size(image, 2)); 48 | for iidex = floor(BBox(1)):floor(BBox(1)+BBox(3)) 49 | for jidex = floor(BBox(2)):floor(BBox(2)+BBox(4)) 50 | BinaryMap(jidex, iidex) = 255; 51 | end 52 | end 53 | BinaryMap = imresize(BinaryMap, [320, 640]); 54 | imwrite(BinaryMap, fullfile(maskSavePath, imgfiles(j).name),'jpg'); 55 | 56 | 57 | 58 | end 59 | 60 | 61 | end 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /GOT10K_dataset_video_list/GOT10K_dataset_test_video_list.txt: -------------------------------------------------------------------------------- 1 | GOT-10k_Test_000001 2 | GOT-10k_Test_000002 3 | GOT-10k_Test_000003 4 | GOT-10k_Test_000004 5 | GOT-10k_Test_000005 6 | GOT-10k_Test_000006 7 | GOT-10k_Test_000007 8 | GOT-10k_Test_000008 9 | GOT-10k_Test_000009 10 | GOT-10k_Test_000010 11 | GOT-10k_Test_000011 12 | GOT-10k_Test_000012 13 | GOT-10k_Test_000013 14 | GOT-10k_Test_000014 15 | GOT-10k_Test_000015 16 | GOT-10k_Test_000016 17 | GOT-10k_Test_000017 18 | GOT-10k_Test_000018 19 | GOT-10k_Test_000019 20 | GOT-10k_Test_000020 21 | GOT-10k_Test_000021 22 | GOT-10k_Test_000022 23 | GOT-10k_Test_000023 24 | GOT-10k_Test_000024 25 | GOT-10k_Test_000025 26 | GOT-10k_Test_000026 27 | GOT-10k_Test_000027 28 | GOT-10k_Test_000028 29 | GOT-10k_Test_000029 30 | GOT-10k_Test_000030 31 | GOT-10k_Test_000031 32 | GOT-10k_Test_000032 33 | GOT-10k_Test_000033 34 | GOT-10k_Test_000034 35 | GOT-10k_Test_000035 36 | GOT-10k_Test_000036 37 | GOT-10k_Test_000037 38 | GOT-10k_Test_000038 39 | GOT-10k_Test_000039 40 | GOT-10k_Test_000040 41 | GOT-10k_Test_000041 42 | GOT-10k_Test_000042 43 | GOT-10k_Test_000043 44 | GOT-10k_Test_000044 45 | GOT-10k_Test_000045 46 | GOT-10k_Test_000046 47 | GOT-10k_Test_000047 48 | GOT-10k_Test_000048 49 | GOT-10k_Test_000049 50 | GOT-10k_Test_000050 51 | GOT-10k_Test_000051 52 | GOT-10k_Test_000052 53 | GOT-10k_Test_000053 54 | GOT-10k_Test_000054 55 | GOT-10k_Test_000055 56 | GOT-10k_Test_000056 57 | GOT-10k_Test_000057 58 | GOT-10k_Test_000058 59 | GOT-10k_Test_000059 60 | GOT-10k_Test_000060 61 | GOT-10k_Test_000061 62 | GOT-10k_Test_000062 63 | GOT-10k_Test_000063 64 | GOT-10k_Test_000064 65 | GOT-10k_Test_000065 66 | GOT-10k_Test_000066 67 | GOT-10k_Test_000067 68 | GOT-10k_Test_000068 69 | GOT-10k_Test_000069 70 | GOT-10k_Test_000070 71 | GOT-10k_Test_000071 72 | GOT-10k_Test_000072 73 | GOT-10k_Test_000073 74 | GOT-10k_Test_000074 75 | GOT-10k_Test_000075 76 | GOT-10k_Test_000076 77 | GOT-10k_Test_000077 78 | GOT-10k_Test_000078 79 | GOT-10k_Test_000079 80 | GOT-10k_Test_000080 81 | GOT-10k_Test_000081 82 | GOT-10k_Test_000082 83 | GOT-10k_Test_000083 84 | GOT-10k_Test_000084 85 | GOT-10k_Test_000085 86 | GOT-10k_Test_000086 87 | GOT-10k_Test_000087 88 | GOT-10k_Test_000088 89 | GOT-10k_Test_000089 90 | GOT-10k_Test_000090 91 | GOT-10k_Test_000091 92 | GOT-10k_Test_000092 93 | GOT-10k_Test_000093 94 | GOT-10k_Test_000094 95 | GOT-10k_Test_000095 96 | GOT-10k_Test_000096 97 | GOT-10k_Test_000097 98 | GOT-10k_Test_000098 99 | GOT-10k_Test_000099 100 | GOT-10k_Test_000100 101 | GOT-10k_Test_000101 102 | GOT-10k_Test_000102 103 | GOT-10k_Test_000103 104 | GOT-10k_Test_000104 105 | GOT-10k_Test_000105 106 | GOT-10k_Test_000106 107 | GOT-10k_Test_000107 108 | GOT-10k_Test_000108 109 | GOT-10k_Test_000109 110 | GOT-10k_Test_000110 111 | GOT-10k_Test_000111 112 | GOT-10k_Test_000112 113 | GOT-10k_Test_000113 114 | GOT-10k_Test_000114 115 | GOT-10k_Test_000115 116 | GOT-10k_Test_000116 117 | GOT-10k_Test_000117 118 | GOT-10k_Test_000118 119 | GOT-10k_Test_000119 120 | GOT-10k_Test_000120 121 | GOT-10k_Test_000121 122 | GOT-10k_Test_000122 123 | GOT-10k_Test_000123 124 | GOT-10k_Test_000124 125 | GOT-10k_Test_000125 126 | GOT-10k_Test_000126 127 | GOT-10k_Test_000127 128 | GOT-10k_Test_000128 129 | GOT-10k_Test_000129 130 | GOT-10k_Test_000130 131 | GOT-10k_Test_000131 132 | GOT-10k_Test_000132 133 | GOT-10k_Test_000133 134 | GOT-10k_Test_000134 135 | GOT-10k_Test_000135 136 | GOT-10k_Test_000136 137 | GOT-10k_Test_000137 138 | GOT-10k_Test_000138 139 | GOT-10k_Test_000139 140 | GOT-10k_Test_000140 141 | GOT-10k_Test_000141 142 | GOT-10k_Test_000142 143 | GOT-10k_Test_000143 144 | GOT-10k_Test_000144 145 | GOT-10k_Test_000145 146 | GOT-10k_Test_000146 147 | GOT-10k_Test_000147 148 | GOT-10k_Test_000148 149 | GOT-10k_Test_000149 150 | GOT-10k_Test_000150 151 | GOT-10k_Test_000151 152 | GOT-10k_Test_000152 153 | GOT-10k_Test_000153 154 | GOT-10k_Test_000154 155 | GOT-10k_Test_000155 156 | GOT-10k_Test_000156 157 | GOT-10k_Test_000157 158 | GOT-10k_Test_000158 159 | GOT-10k_Test_000159 160 | GOT-10k_Test_000160 161 | GOT-10k_Test_000161 162 | GOT-10k_Test_000162 163 | GOT-10k_Test_000163 164 | GOT-10k_Test_000164 165 | GOT-10k_Test_000165 166 | GOT-10k_Test_000166 167 | GOT-10k_Test_000167 168 | GOT-10k_Test_000168 169 | GOT-10k_Test_000169 170 | GOT-10k_Test_000170 171 | GOT-10k_Test_000171 172 | GOT-10k_Test_000172 173 | GOT-10k_Test_000173 174 | GOT-10k_Test_000174 175 | GOT-10k_Test_000175 176 | GOT-10k_Test_000176 177 | GOT-10k_Test_000177 178 | GOT-10k_Test_000178 179 | GOT-10k_Test_000179 180 | GOT-10k_Test_000180 -------------------------------------------------------------------------------- /GOT10K_dataset_video_list/GOT10K_dataset_val_video_list.txt: -------------------------------------------------------------------------------- 1 | GOT-10k_Val_000001 2 | GOT-10k_Val_000002 3 | GOT-10k_Val_000003 4 | GOT-10k_Val_000004 5 | GOT-10k_Val_000005 6 | GOT-10k_Val_000006 7 | GOT-10k_Val_000007 8 | GOT-10k_Val_000008 9 | GOT-10k_Val_000009 10 | GOT-10k_Val_000010 11 | GOT-10k_Val_000011 12 | GOT-10k_Val_000012 13 | GOT-10k_Val_000013 14 | GOT-10k_Val_000014 15 | GOT-10k_Val_000015 16 | GOT-10k_Val_000016 17 | GOT-10k_Val_000017 18 | GOT-10k_Val_000018 19 | GOT-10k_Val_000019 20 | GOT-10k_Val_000020 21 | GOT-10k_Val_000021 22 | GOT-10k_Val_000022 23 | GOT-10k_Val_000023 24 | GOT-10k_Val_000024 25 | GOT-10k_Val_000025 26 | GOT-10k_Val_000026 27 | GOT-10k_Val_000027 28 | GOT-10k_Val_000028 29 | GOT-10k_Val_000029 30 | GOT-10k_Val_000030 31 | GOT-10k_Val_000031 32 | GOT-10k_Val_000032 33 | GOT-10k_Val_000033 34 | GOT-10k_Val_000034 35 | GOT-10k_Val_000035 36 | GOT-10k_Val_000036 37 | GOT-10k_Val_000037 38 | GOT-10k_Val_000038 39 | GOT-10k_Val_000039 40 | GOT-10k_Val_000040 41 | GOT-10k_Val_000041 42 | GOT-10k_Val_000042 43 | GOT-10k_Val_000043 44 | GOT-10k_Val_000044 45 | GOT-10k_Val_000045 46 | GOT-10k_Val_000046 47 | GOT-10k_Val_000047 48 | GOT-10k_Val_000048 49 | GOT-10k_Val_000049 50 | GOT-10k_Val_000050 51 | GOT-10k_Val_000051 52 | GOT-10k_Val_000052 53 | GOT-10k_Val_000053 54 | GOT-10k_Val_000054 55 | GOT-10k_Val_000055 56 | GOT-10k_Val_000056 57 | GOT-10k_Val_000057 58 | GOT-10k_Val_000058 59 | GOT-10k_Val_000059 60 | GOT-10k_Val_000060 61 | GOT-10k_Val_000061 62 | GOT-10k_Val_000062 63 | GOT-10k_Val_000063 64 | GOT-10k_Val_000064 65 | GOT-10k_Val_000065 66 | GOT-10k_Val_000066 67 | GOT-10k_Val_000067 68 | GOT-10k_Val_000068 69 | GOT-10k_Val_000069 70 | GOT-10k_Val_000070 71 | GOT-10k_Val_000071 72 | GOT-10k_Val_000072 73 | GOT-10k_Val_000073 74 | GOT-10k_Val_000074 75 | GOT-10k_Val_000075 76 | GOT-10k_Val_000076 77 | GOT-10k_Val_000077 78 | GOT-10k_Val_000078 79 | GOT-10k_Val_000079 80 | GOT-10k_Val_000080 81 | GOT-10k_Val_000081 82 | GOT-10k_Val_000082 83 | GOT-10k_Val_000083 84 | GOT-10k_Val_000084 85 | GOT-10k_Val_000085 86 | GOT-10k_Val_000086 87 | GOT-10k_Val_000087 88 | GOT-10k_Val_000088 89 | GOT-10k_Val_000089 90 | GOT-10k_Val_000090 91 | GOT-10k_Val_000091 92 | GOT-10k_Val_000092 93 | GOT-10k_Val_000093 94 | GOT-10k_Val_000094 95 | GOT-10k_Val_000095 96 | GOT-10k_Val_000096 97 | GOT-10k_Val_000097 98 | GOT-10k_Val_000098 99 | GOT-10k_Val_000099 100 | GOT-10k_Val_000100 101 | GOT-10k_Val_000101 102 | GOT-10k_Val_000102 103 | GOT-10k_Val_000103 104 | GOT-10k_Val_000104 105 | GOT-10k_Val_000105 106 | GOT-10k_Val_000106 107 | GOT-10k_Val_000107 108 | GOT-10k_Val_000108 109 | GOT-10k_Val_000109 110 | GOT-10k_Val_000110 111 | GOT-10k_Val_000111 112 | GOT-10k_Val_000112 113 | GOT-10k_Val_000113 114 | GOT-10k_Val_000114 115 | GOT-10k_Val_000115 116 | GOT-10k_Val_000116 117 | GOT-10k_Val_000117 118 | GOT-10k_Val_000118 119 | GOT-10k_Val_000119 120 | GOT-10k_Val_000120 121 | GOT-10k_Val_000121 122 | GOT-10k_Val_000122 123 | GOT-10k_Val_000123 124 | GOT-10k_Val_000124 125 | GOT-10k_Val_000125 126 | GOT-10k_Val_000126 127 | GOT-10k_Val_000127 128 | GOT-10k_Val_000128 129 | GOT-10k_Val_000129 130 | GOT-10k_Val_000130 131 | GOT-10k_Val_000131 132 | GOT-10k_Val_000132 133 | GOT-10k_Val_000133 134 | GOT-10k_Val_000134 135 | GOT-10k_Val_000135 136 | GOT-10k_Val_000136 137 | GOT-10k_Val_000137 138 | GOT-10k_Val_000138 139 | GOT-10k_Val_000139 140 | GOT-10k_Val_000140 141 | GOT-10k_Val_000141 142 | GOT-10k_Val_000142 143 | GOT-10k_Val_000143 144 | GOT-10k_Val_000144 145 | GOT-10k_Val_000145 146 | GOT-10k_Val_000146 147 | GOT-10k_Val_000147 148 | GOT-10k_Val_000148 149 | GOT-10k_Val_000149 150 | GOT-10k_Val_000150 151 | GOT-10k_Val_000151 152 | GOT-10k_Val_000152 153 | GOT-10k_Val_000153 154 | GOT-10k_Val_000154 155 | GOT-10k_Val_000155 156 | GOT-10k_Val_000156 157 | GOT-10k_Val_000157 158 | GOT-10k_Val_000158 159 | GOT-10k_Val_000159 160 | GOT-10k_Val_000160 161 | GOT-10k_Val_000161 162 | GOT-10k_Val_000162 163 | GOT-10k_Val_000163 164 | GOT-10k_Val_000164 165 | GOT-10k_Val_000165 166 | GOT-10k_Val_000166 167 | GOT-10k_Val_000167 168 | GOT-10k_Val_000168 169 | GOT-10k_Val_000169 170 | GOT-10k_Val_000170 171 | GOT-10k_Val_000171 172 | GOT-10k_Val_000172 173 | GOT-10k_Val_000173 174 | GOT-10k_Val_000174 175 | GOT-10k_Val_000175 176 | GOT-10k_Val_000176 177 | GOT-10k_Val_000177 178 | GOT-10k_Val_000178 179 | GOT-10k_Val_000179 180 | GOT-10k_Val_000180 181 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepMTA_PyTorch 2 | 3 | ### Officical PyTorch Implementation of "Dynamic Attention-guided Multi-TrajectoryAnalysis for Single Object Tracking", Xiao Wang, Zhe Chen, Jin Tang, Bin Luo, Yaowei Wang, Yonghong Tian, Feng Wu, IEEE Transactions on Circuits and Systems for Video Technology (T-CSVT 2021) [[Paper](https://ieeexplore.ieee.org/document/9345930)] [[Project](https://sites.google.com/view/mt-track/home)] 4 | 5 | 6 | ## Abstract: 7 | Most of the existing single object trackers track the target in a unitary local search window, making them particularly vulnerable to challenging factors such as heavy occlusions and out-of-view movements. Despite the attempts to further incorporate global search, prevailing mechanisms that cooperate local and global search are relatively static, thus are still sub-optimal for improving tracking performance. By further studying the local and global search results, we raise a question: can we allow more dynamics for cooperating both results? In this paper, we propose to introduce more dynamics by devising a dynamic attention-guided multi-trajectory tracking strategy. In particular, we construct dynamic appearance model that contains multiple target templates, each of which provides its own attention for locating the target in the new frame. Guided by different attention, we maintain diversified tracking results for the target to build multi-trajectory tracking history, allowing more candidates to represent the true target trajectory. After spanning the whole sequence, we introduce a multi-trajectory selection network to find the best trajectory that deliver improved tracking performance. Extensive experimental results show that our proposed tracking strategy achieves compelling performance on various large-scale tracking benchmarks. 8 | 9 | 10 | ## Our Proposed Approach: 11 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/pipeline.png) 12 | 13 | 14 | 15 | 16 | ## Install: 17 | ~~~ 18 | git clone https://github.com/wangxiao5791509/DeepMTA_PyTorch 19 | cd DeepMTA_TCSVT_project 20 | 21 | # create the conda environment 22 | conda env create -f environment.yml 23 | conda activate deepmta 24 | 25 | # build the vot toolkits 26 | bash benchmark/make_toolkits.sh 27 | ~~~ 28 | 29 | ## Download Dataset and Model: 30 | download pre-trained **Traj-Evaluation-Network** and **Dynamic-TANet-Model** from [[Onedrive](https://ahueducn-my.sharepoint.com/:f:/g/personal/e16101002_stu_ahu_edu_cn/EpMTPeqEVOFHoCvTLMI8WTUBNHt65WtgB31-cB8WqlaIfQ?e=HiDhLQ)] 31 | 32 | 33 | get the dataset OTB2015, GOT-10k, LaSOT, UAV123, UAV20L, OxUvA from [[List](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/download_links_for_tracking_datasets.txt)]. 34 | 35 | Download TNL2K dataset (published on CVPR 2021, 1300/700 for train and test subset) from: https://sites.google.com/view/langtrackbenchmark/ 36 | 37 | 38 | ## Train: 39 | 1. you can directly use the pre-trained tracking model of THOR [[github](https://github.com/xl-sr/THOR)]; 40 | 41 | 2. train Dynamic Target-aware Attention: 42 | ~~~ 43 | cd ~/DeepMTA_TCSVT_project/trackers/dcynet_modules_adaptis/ 44 | python train.py 45 | ~~~ 46 | 47 | 3. train Trajectory Evaluation Network: 48 | ~~~ 49 | python train_traj_measure_net.py 50 | ~~~ 51 | 52 | 53 | 54 | 55 | ## Tracking: 56 | 57 | take got-10k and LaSOT dataset as the examples: 58 | ~~~ 59 | python testing.py -d GOT10k -t SiamRPN --lb_type ensemble 60 | 61 | python testing.py -d LaSOT -t SiamRPN --lb_type ensemble 62 | ~~~ 63 | 64 | 65 | 66 | 67 | 68 | 69 | ### Benchmark Results: 70 | Experimental results on the compared tracking benchmarks 71 | 72 | [[OTB2015]()] 73 | [[LaSOT](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/Ec99MGQJXlJEjJFtpn7tJzoBTl77yVKt4wBOd9amXWR5lQ?e=u0eShJ)] 74 | [[OxUvA](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/Efqz3Y2KSVdCnEl0ephudGQBNELXW7dgESWfvGmmdVVFyQ?e=D049Wf)] 75 | [[GOT-10k](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/EbUB51geqFJEupM70SY6lfYBRkMAgKjfpH9MB6dlPKWzMg?e=kkuB6f)] 76 | [[UAV123](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/EbhtNj6ZHRpJp34c07Qk9a4Bd522CYx4zcjOFKB6AWTUpA?e=4qEBdP)] 77 | [[TNL2K](https://stuahueducn-my.sharepoint.com/:u:/g/personal/e16101002_stu_ahu_edu_cn/EaiGld9vweVNv6HiR3gfnlQBLlFiC29Se-MOFLJV_ooJIA?e=cXliLz)] 78 | 79 | 80 | 81 | 82 | 83 | ### Tracking Results: 84 | 85 | #### Tracking results on LaSOT dataset. 86 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/lasot_result.png) 87 | 88 | #### Tracking results on TNL2K dataset. 89 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/benchmarkresults.png) 90 | 91 | #### Attention prediciton and Tracking Results. 92 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/attention_supplement.jpg) 93 | ![fig-1](https://github.com/wangxiao5791509/DeepMTA_PyTorch/blob/master/figures/trackingresults_vis.jpg) 94 | 95 | 96 | 97 | 98 | 99 | 100 | ### Acknowledgement: 101 | Our tracker is developed based on **THOR** which is published on BMVC-2019 [[Paper](https://arxiv.org/pdf/1907.12920.pdf)] [[Code](https://github.com/xl-sr/THOR)] 102 | 103 | 104 | ### Other related works: 105 | * MTP: Multi-hypothesis Tracking and Prediction for Reduced Error Propagation, Xinshuo Weng, Boris Ivanovic, and Marco Pavone [[Paper](https://arxiv.org/pdf/2110.09481.pdf)] [[Code](https://www.xinshuoweng.com/projects/MTP/)] 106 | * D.-Y. Lee, J.-Y. Sim, and C.-S. Kim, “Multihypothesis trajectory analysis for robust visual tracking,” in Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition, 2015, pp. 5088–5096. [[Paper](http://openaccess.thecvf.com/content_cvpr_2015/papers/Lee_Multihypothesis_Trajectory_Analysis_2015_CVPR_paper.pdf)] 107 | * C. Kim, F. Li, A. Ciptadi, and J. M. Rehg, “Multiple hypothesis tracking revisited,” in Proceedings of the IEEE International Conference on Computer Vision, 2015, pp. 4696–4704. [[Paper](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Kim_Multiple_Hypothesis_Tracking_ICCV_2015_paper.pdf)] 108 | 109 | 110 | 111 | 112 | 113 | ### Citation: 114 | If you find this paper useful for your research, please consider to cite our paper: 115 | ~~~ 116 | @inproceedings{wang2021deepmta, 117 | title={Dynamic Attention guided Multi-Trajectory Analysis for Single Object Tracking}, 118 | author={Xiao, Wang and Zhe, Chen and Jin, Tang and Bin, Luo and Yaowei, Wang and Yonghong, Tian and Feng, Wu}, 119 | booktitle={IEEE Transactions on Circuits and Systems for Video Technology}, 120 | doi={10.1109/TCSVT.2021.3056684}, 121 | year={2021} 122 | } 123 | ~~~ 124 | 125 | If you have any questions about this work, please contact with me via wangxiaocvpr@foxmail.com 126 | 127 | 128 | -------------------------------------------------------------------------------- /deepmta_arts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/deepmta_arts.png -------------------------------------------------------------------------------- /download_links_for_tracking_datasets.txt: -------------------------------------------------------------------------------- 1 | Popular Tracking datasets: 2 | UAV123: https://pan.baidu.com/s/1AhNnfjF4fZe14sUFefU3iA password: 2iq4 3 | 4 | VOT2018: https://pan.baidu.com/s/1MOWZ5lcxfF0wsgSuj5g4Yw password: e5eh 5 | 6 | VisDrone2019: https://pan.baidu.com/s/1Y6ubKHuYX65mK_iDVSfKPQ password: yxb6 7 | 8 | OTB2015: https://pan.baidu.com/s/1ZjKgRMYSHfR_w3Z7iQEkYA password: t5i1 9 | 10 | DTB70: https://pan.baidu.com/s/1kfHrArw0aVhGPSM91WHomw password: e7qm 11 | 12 | TLP50 (Long-Term): https://amoudgl.github.io/tlp/ 13 | 14 | ILSVRC2015 VID: https://pan.baidu.com/s/1CXWgpAG4CYpk-WnaUY5mAQ password: uqzj 15 | 16 | NFS: https://pan.baidu.com/s/1ei54oKNA05iBkoUwXPOB7g password: vng1 17 | 18 | GOT10k: https://pan.baidu.com/s/172oiQPA_Ky2iujcW5Irlow password: uxds 19 | 20 | UAVDT: https://pan.baidu.com/s/1K8oo53mPYCxUFVMXIGLhVA password: keva 21 | 22 | YTB-VOS: https://pan.baidu.com/s/1WMB0q9GJson75QBFVfeH5A password: sf1m 23 | 24 | YTB-Crop511 (used in siamrpn++ and siammask): https://pan.baidu.com/s/112zLS_02-Z2ouKGbnPlTjw password: ebq1 25 | 26 | TColor128: https://pan.baidu.com/s/1v4J6zWqZwj8fHi5eo5EJvQ password: 26d4 27 | 28 | DAVIS2017: https://pan.baidu.com/s/1JTsumpnkWotEJQE7KQmh6A password: c9qp 29 | 30 | YTB&VID (used in siamrpn): https://pan.baidu.com/s/1gF8PSZDzw-7EAVrdYHQwsA password: 6vkz 31 | 32 | TrackingNet: https://pan.baidu.com/s/1PXSRAqcw-KMfBIJYUtI4Aw code: nkb9 (Note that this link is provided by SiamFC++ author) 33 | 34 | TAO: A Large-Scale Benchmark for Tracking Any Object: https://github.com/TAO-Dataset/tao 35 | 36 | vot 2018 and vot 2019:   链接: https://pan.baidu.com/s/1q6lv3cUhezBb5pmdj3BRGw 提取码: d7r3 37 | 38 | vot 2018 LT:       链接: https://pan.baidu.com/s/16Q4_sxhBjmddIHU8b7XK3w 提取码: 67xf 39 | 40 | vot 2019 LT:       链接:https://pan.baidu.com/s/1z9HBPNprbt2gb2RGzRJkwA 提取码:7yq5 41 | 42 | vot 2019 rgb-thermal:   链接: https://pan.baidu.com/s/1oT8qFmKBpYa3VlXP1ZwfCA 提取码: mn1b 43 | 44 | -------------------------------------------------------------------------------- /figures/attention_supplement.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/attention_supplement.jpg -------------------------------------------------------------------------------- /figures/benchmarkresults.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/benchmarkresults.png -------------------------------------------------------------------------------- /figures/lasot_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/lasot_result.png -------------------------------------------------------------------------------- /figures/lasot_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/lasot_results.jpg -------------------------------------------------------------------------------- /figures/motivation.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/motivation.jpg -------------------------------------------------------------------------------- /figures/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/pipeline.png -------------------------------------------------------------------------------- /figures/trackingresults_vis.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wangxiao5791509/DeepMTA_PyTorch/430a997232e7b56c7867345e3d064a9718237e6c/figures/trackingresults_vis.jpg --------------------------------------------------------------------------------