├── ltr ├── admin │ ├── __init__.py │ ├── settings.py │ ├── multigpu.py │ ├── tensorboard.py │ ├── stats.py │ ├── model_constructor.py │ └── environment.py ├── models │ ├── __init__.py │ ├── kys │ │ ├── __init__.py │ │ ├── conv_gru.py │ │ ├── cost_volume.py │ │ └── utils.py │ ├── lwl │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── initializer.py │ │ └── loss_residual_modules.py │ ├── meta │ │ └── __init__.py │ ├── rts │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── initializer.py │ │ ├── learners_fusion.py │ │ └── loss_residual_modules.py │ ├── layers │ │ ├── __init__.py │ │ ├── normalization.py │ │ ├── transform.py │ │ ├── blocks.py │ │ ├── distance.py │ │ └── activation.py │ ├── tracking │ │ └── __init__.py │ ├── transformer │ │ ├── __init__.py │ │ └── position_encoding.py │ ├── target_candidate_matching │ │ └── __init__.py │ ├── bbreg │ │ ├── __init__.py │ │ └── atom.py │ ├── target_classifier │ │ ├── __init__.py │ │ └── features.py │ ├── loss │ │ ├── __init__.py │ │ ├── bbr_loss.py │ │ ├── target_candidate_matching_loss.py │ │ ├── segmentation.py │ │ └── kl_regression.py │ └── backbone │ │ ├── __init__.py │ │ └── base.py ├── train_settings │ ├── __init__.py │ ├── bbreg │ │ └── __init__.py │ ├── dimp │ │ └── __init__.py │ ├── kys │ │ └── __init__.py │ ├── lwl │ │ └── __init__.py │ ├── rts │ │ └── __init__.py │ ├── tamos │ │ └── __init__.py │ ├── tomp │ │ └── __init__.py │ └── keep_track │ │ └── __init__.py ├── data │ ├── __init__.py │ ├── bounding_box_utils.py │ └── image_loader.py ├── trainers │ └── __init__.py ├── actors │ ├── __init__.py │ ├── base_actor.py │ └── bbreg.py ├── __init__.py ├── dataset │ ├── __init__.py │ ├── got10kvos.py │ ├── base_image_dataset.py │ ├── ecssd.py │ ├── hku_is.py │ ├── msra10k.py │ ├── lasotvos.py │ ├── synthetic_video.py │ └── base_video_dataset.py ├── data_specs │ └── lasot_train_val_split.txt └── run_training.py ├── pytracking ├── analysis │ └── __init__.py ├── features │ ├── __init__.py │ ├── color.py │ ├── util.py │ └── net_wrappers.py ├── parameter │ ├── __init__.py │ ├── atom │ │ └── __init__.py │ ├── dimp │ │ ├── __init__.py │ │ ├── dimp18.py │ │ ├── dimp50.py │ │ ├── dimp50_vot19.py │ │ ├── dimp50_vot18.py │ │ ├── dimp18_vot18.py │ │ ├── prdimp18.py │ │ ├── super_dimp.py │ │ ├── prdimp50.py │ │ └── prdimp50_vot18.py │ ├── eco │ │ └── __init__.py │ ├── kys │ │ ├── __init__.py │ │ ├── default.py │ │ └── default_vot.py │ ├── lwl │ │ ├── __init__.py │ │ ├── lwl_ytvos.py │ │ └── lwl_boxinit.py │ ├── rts │ │ ├── __init__.py │ │ └── rts50.py │ ├── tamos │ │ ├── __init__.py │ │ ├── tamos_resnet50.py │ │ └── tamos_swin_base.py │ ├── tomp │ │ ├── __init__.py │ │ ├── tomp101.py │ │ └── tomp50.py │ ├── keep_track │ │ ├── __init__.py │ │ ├── default.py │ │ └── default_fast.py │ └── dimp_simple │ │ ├── __init__.py │ │ └── super_dimp_simple.py ├── tracker │ ├── __init__.py │ ├── base │ │ ├── __init__.py │ │ └── basetracker.py │ ├── eco │ │ └── __init__.py │ ├── atom │ │ ├── __init__.py │ │ └── optim.py │ ├── dimp │ │ └── __init__.py │ ├── kys │ │ └── __init__.py │ ├── lwl │ │ └── __init__.py │ ├── rts │ │ └── __init__.py │ ├── tomp │ │ └── __init__.py │ ├── tamos │ │ └── __init__.py │ ├── keep_track │ │ └── __init__.py │ └── dimp_simple │ │ └── __init__.py ├── experiments │ ├── __init__.py │ └── myexperiments.py ├── util_scripts │ ├── __init__.py │ ├── pack_got10k_results.py │ └── pack_trackingnet_results.py ├── utils │ ├── __init__.py │ ├── convert_vot_anno_to_rect.py │ ├── loading.py │ ├── params.py │ └── load_text.py ├── .figs │ ├── NFS.png │ ├── LaSOT.png │ ├── OTB-100.png │ ├── UAV123.png │ ├── visdom.png │ ├── ToMP_teaser.png │ ├── atom_overview.png │ ├── dimp_overview.png │ ├── kys_overview.png │ ├── lwtl_overview.png │ ├── rts_overview.png │ ├── TaMOs_overview.png │ ├── ETTrack_overview.png │ └── KeepTrack_teaser.png ├── libs │ ├── __init__.py │ ├── tensordict.py │ └── operation.py ├── evaluation │ ├── __init__.py │ ├── environment.py │ ├── mobifacedataset.py │ ├── lagotdataset.py │ ├── got10kdataset.py │ └── trackingnetdataset.py ├── __init__.py ├── VOT │ ├── trackers.ini │ ├── tracker_DiMP.m │ └── vot.py ├── run_vot.py ├── run_experiment.py ├── run_video.py ├── run_webcam.py └── run_tracker.py ├── .gitmodules └── .gitignore /ltr/admin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/kys/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/lwl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/meta/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/rts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/tracking/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/features/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/tracker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/bbreg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/dimp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/kys/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/lwl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/rts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/tamos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/tomp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/atom/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/eco/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/kys/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/lwl/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/rts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/tamos/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/tomp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/util_scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/train_settings/keep_track/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/keep_track/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/models/target_candidate_matching/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp_simple/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ltr/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import LTRLoader -------------------------------------------------------------------------------- /ltr/models/bbreg/__init__.py: -------------------------------------------------------------------------------- 1 | from .atom_iou_net import AtomIoUNet 2 | -------------------------------------------------------------------------------- /pytracking/tracker/base/__init__.py: -------------------------------------------------------------------------------- 1 | from .basetracker import BaseTracker -------------------------------------------------------------------------------- /ltr/models/target_classifier/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_filter import LinearFilter 2 | -------------------------------------------------------------------------------- /pytracking/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .params import TrackerParams, FeatureParams, Choice -------------------------------------------------------------------------------- /ltr/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_trainer import BaseTrainer 2 | from .ltr_trainer import LTRTrainer -------------------------------------------------------------------------------- /pytracking/.figs/NFS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/NFS.png -------------------------------------------------------------------------------- /pytracking/libs/__init__.py: -------------------------------------------------------------------------------- 1 | from .tensorlist import TensorList 2 | from .tensordict import TensorDict -------------------------------------------------------------------------------- /pytracking/.figs/LaSOT.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/LaSOT.png -------------------------------------------------------------------------------- /pytracking/tracker/eco/__init__.py: -------------------------------------------------------------------------------- 1 | from .eco import ECO 2 | 3 | def get_tracker_class(): 4 | return ECO -------------------------------------------------------------------------------- /pytracking/.figs/OTB-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/OTB-100.png -------------------------------------------------------------------------------- /pytracking/.figs/UAV123.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/UAV123.png -------------------------------------------------------------------------------- /pytracking/.figs/visdom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/visdom.png -------------------------------------------------------------------------------- /pytracking/tracker/atom/__init__.py: -------------------------------------------------------------------------------- 1 | from .atom import ATOM 2 | 3 | def get_tracker_class(): 4 | return ATOM -------------------------------------------------------------------------------- /pytracking/tracker/dimp/__init__.py: -------------------------------------------------------------------------------- 1 | from .dimp import DiMP 2 | 3 | def get_tracker_class(): 4 | return DiMP -------------------------------------------------------------------------------- /pytracking/tracker/kys/__init__.py: -------------------------------------------------------------------------------- 1 | from .kys import KYS 2 | 3 | 4 | def get_tracker_class(): 5 | return KYS -------------------------------------------------------------------------------- /pytracking/tracker/lwl/__init__.py: -------------------------------------------------------------------------------- 1 | from .lwl import LWL 2 | 3 | 4 | def get_tracker_class(): 5 | return LWL -------------------------------------------------------------------------------- /pytracking/tracker/rts/__init__.py: -------------------------------------------------------------------------------- 1 | from .rts import RTS 2 | 3 | 4 | def get_tracker_class(): 5 | return RTS -------------------------------------------------------------------------------- /pytracking/tracker/tomp/__init__.py: -------------------------------------------------------------------------------- 1 | from .tomp import ToMP 2 | 3 | def get_tracker_class(): 4 | return ToMP -------------------------------------------------------------------------------- /pytracking/tracker/tamos/__init__.py: -------------------------------------------------------------------------------- 1 | from .tamos import TaMOs 2 | 3 | def get_tracker_class(): 4 | return TaMOs -------------------------------------------------------------------------------- /pytracking/.figs/ToMP_teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/ToMP_teaser.png -------------------------------------------------------------------------------- /pytracking/.figs/atom_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/atom_overview.png -------------------------------------------------------------------------------- /pytracking/.figs/dimp_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/dimp_overview.png -------------------------------------------------------------------------------- /pytracking/.figs/kys_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/kys_overview.png -------------------------------------------------------------------------------- /pytracking/.figs/lwtl_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/lwtl_overview.png -------------------------------------------------------------------------------- /pytracking/.figs/rts_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/rts_overview.png -------------------------------------------------------------------------------- /pytracking/.figs/TaMOs_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/TaMOs_overview.png -------------------------------------------------------------------------------- /pytracking/.figs/ETTrack_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/ETTrack_overview.png -------------------------------------------------------------------------------- /pytracking/.figs/KeepTrack_teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/KeepTrack_teaser.png -------------------------------------------------------------------------------- /pytracking/tracker/keep_track/__init__.py: -------------------------------------------------------------------------------- 1 | from .keep_track import KeepTrack 2 | 3 | def get_tracker_class(): 4 | return KeepTrack -------------------------------------------------------------------------------- /ltr/actors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_actor import BaseActor 2 | from .bbreg import AtomActor 3 | from .tracking import DiMPActor, KYSActor -------------------------------------------------------------------------------- /pytracking/tracker/dimp_simple/__init__.py: -------------------------------------------------------------------------------- 1 | from .dimp_simple import DiMPSimple 2 | 3 | def get_tracker_class(): 4 | return DiMPSimple -------------------------------------------------------------------------------- /ltr/__init__.py: -------------------------------------------------------------------------------- 1 | from .admin.loading import load_network 2 | from .admin.model_constructor import model_constructor 3 | from .admin.multigpu import MultiGPU -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ltr/external/PreciseRoIPooling"] 2 | path = ltr/external/PreciseRoIPooling 3 | url = https://github.com/vacancy/PreciseRoIPooling.git 4 | -------------------------------------------------------------------------------- /pytracking/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import Sequence 2 | from .tracker import Tracker, trackerlist 3 | from .datasets import get_dataset, get_dataset_attributes -------------------------------------------------------------------------------- /ltr/models/loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .target_classification import LBHinge, LBHingev2, IsTargetCellLoss, TrackingClassificationAccuracy, FocalLoss 2 | from .segmentation import LovaszSegLoss -------------------------------------------------------------------------------- /ltr/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import resnet18, resnet50, resnet101, resnet_baby 2 | from .resnet18_vggm import resnet18_vggmconv1 3 | from .swin_transformer_flex import swin_base384_flex -------------------------------------------------------------------------------- /ltr/admin/settings.py: -------------------------------------------------------------------------------- 1 | from ltr.admin.environment import env_settings 2 | 3 | 4 | class Settings: 5 | """ Training settings, e.g. the paths to datasets and networks.""" 6 | def __init__(self): 7 | self.set_default() 8 | 9 | def set_default(self): 10 | self.env = env_settings() 11 | self.use_gpu = True 12 | 13 | 14 | -------------------------------------------------------------------------------- /pytracking/__init__.py: -------------------------------------------------------------------------------- 1 | from pytracking.libs import TensorList, TensorDict 2 | import pytracking.libs.complex as complex 3 | import pytracking.libs.operation as operation 4 | import pytracking.libs.fourier as fourier 5 | import pytracking.libs.dcf as dcf 6 | import pytracking.libs.optimization as optimization 7 | from pytracking.run_tracker import run_tracker 8 | from pytracking.run_webcam import run_webcam 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.idea 2 | *~ 3 | *__pycache__* 4 | *.pyc 5 | *.pytest_cache 6 | *.ipynb_checkpoints/ 7 | ltr/admin/local.py 8 | ltr/run_ltr_local.py 9 | ltr/train_settings/*/debug.py 10 | pytracking/parameter/*/debug.py 11 | pytracking/networks/ 12 | pytracking/tracking_results/ 13 | pytracking/segmentation_results/ 14 | pytracking/result_plots/ 15 | pytracking/evaluation/local.py 16 | pytracking/run_local.py 17 | -------------------------------------------------------------------------------- /ltr/admin/multigpu.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def is_multi_gpu(net): 5 | return isinstance(net, (MultiGPU, nn.DataParallel)) 6 | 7 | 8 | class MultiGPU(nn.DataParallel): 9 | """Wraps a network to allow simple multi-GPU training.""" 10 | def __getattr__(self, item): 11 | try: 12 | return super().__getattr__(item) 13 | except: 14 | pass 15 | return getattr(self.module, item) -------------------------------------------------------------------------------- /pytracking/VOT/trackers.ini: -------------------------------------------------------------------------------- 1 | [DiMP] # 2 | label = DiMP 3 | protocol = traxpython 4 | 5 | command = import pytracking.run_vot as run_vot; run_vot.run_vot2020('dimp', 'dimp50') # Set the tracker name and the parameter name 6 | 7 | # Specify a path to trax python wrapper if it is not visible (separate by ; if using multiple paths) 8 | paths = PATH_TO_PYTRACKING 9 | 10 | # Additional environment paths 11 | #env_PATH = ;${PATH} 12 | 13 | -------------------------------------------------------------------------------- /ltr/models/lwl/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def adaptive_cat(seq, dim=0, ref_tensor=0, mode='bilinear'): 6 | sz = seq[ref_tensor].shape[-2:] 7 | t = torch.cat([interpolate(t, sz, mode=mode) for t in seq], dim=dim) 8 | return t 9 | 10 | 11 | def interpolate(t, sz, mode='bilinear'): 12 | sz = sz.tolist() if torch.is_tensor(sz) else sz 13 | align = {} if mode == 'nearest' else dict(align_corners=False) 14 | return F.interpolate(t, sz, mode=mode, **align) if t.shape[-2:] != sz else t 15 | 16 | -------------------------------------------------------------------------------- /ltr/models/rts/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def adaptive_cat(seq, dim=0, ref_tensor=0, mode='bilinear'): 6 | sz = seq[ref_tensor].shape[-2:] 7 | t = torch.cat([interpolate(t, sz, mode=mode) for t in seq], dim=dim) 8 | return t 9 | 10 | 11 | def interpolate(t, sz, mode='bilinear'): 12 | sz = sz.tolist() if torch.is_tensor(sz) else sz 13 | align = {} if mode == 'nearest' else dict(align_corners=False) 14 | return F.interpolate(t, sz, mode=mode, **align) if t.shape[-2:] != sz else t 15 | 16 | -------------------------------------------------------------------------------- /pytracking/experiments/myexperiments.py: -------------------------------------------------------------------------------- 1 | from pytracking.evaluation import Tracker, get_dataset, trackerlist 2 | 3 | 4 | def atom_nfs_uav(): 5 | # Run three runs of ATOM on NFS and UAV datasets 6 | trackers = trackerlist('atom', 'default', range(3)) 7 | 8 | dataset = get_dataset('nfs', 'uav') 9 | return trackers, dataset 10 | 11 | 12 | def uav_test(): 13 | # Run DiMP18, ATOM and ECO on the UAV dataset 14 | trackers = trackerlist('dimp', 'dimp18', range(1)) + \ 15 | trackerlist('atom', 'default', range(1)) + \ 16 | trackerlist('eco', 'default', range(1)) 17 | 18 | dataset = get_dataset('uav') 19 | return trackers, dataset 20 | -------------------------------------------------------------------------------- /pytracking/features/color.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from pytracking.features.featurebase import FeatureBase 3 | 4 | 5 | class RGB(FeatureBase): 6 | """RGB feature normalized to [-0.5, 0.5].""" 7 | def dim(self): 8 | return 3 9 | 10 | def stride(self): 11 | return self.pool_stride 12 | 13 | def extract(self, im: torch.Tensor): 14 | return im/255 - 0.5 15 | 16 | 17 | class Grayscale(FeatureBase): 18 | """Grayscale feature normalized to [-0.5, 0.5].""" 19 | def dim(self): 20 | return 1 21 | 22 | def stride(self): 23 | return self.pool_stride 24 | 25 | def extract(self, im: torch.Tensor): 26 | return torch.mean(im/255 - 0.5, 1, keepdim=True) 27 | -------------------------------------------------------------------------------- /ltr/models/lwl/initializer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class FilterInitializerZero(nn.Module): 5 | """Initializes a target model with zeros. 6 | args: 7 | filter_size: Size of the filter. 8 | feature_dim: Input feature dimentionality.""" 9 | 10 | def __init__(self, filter_size=1, num_filters=1, feature_dim=256, filter_groups=1): 11 | super().__init__() 12 | 13 | self.filter_size = (num_filters, feature_dim//filter_groups, filter_size, filter_size) 14 | 15 | def forward(self, feat, mask=None): 16 | assert feat.dim() == 5 17 | # num_sequences = feat.shape[1] if feat.dim() == 5 else 1 18 | num_sequences = feat.shape[1] 19 | 20 | return feat.new_zeros(num_sequences, *self.filter_size) 21 | -------------------------------------------------------------------------------- /ltr/models/rts/initializer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class FilterInitializerZero(nn.Module): 5 | """Initializes a target model with zeros. 6 | args: 7 | filter_size: Size of the filter. 8 | feature_dim: Input feature dimentionality.""" 9 | 10 | def __init__(self, filter_size=1, num_filters=1, feature_dim=256, filter_groups=1): 11 | super().__init__() 12 | 13 | self.filter_size = (num_filters, feature_dim//filter_groups, filter_size, filter_size) 14 | 15 | def forward(self, feat, mask=None): 16 | assert feat.dim() == 5 17 | # num_sequences = feat.shape[1] if feat.dim() == 5 else 1 18 | num_sequences = feat.shape[1] 19 | 20 | return feat.new_zeros(num_sequences, *self.filter_size) 21 | -------------------------------------------------------------------------------- /ltr/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .lasot import Lasot 2 | from .lasotvos import LasotVOS 3 | from .got10k import Got10k 4 | from .got10kvos import Got10kVOS 5 | from .tracking_net import TrackingNet 6 | from .imagenetvid import ImagenetVID 7 | from .coco import MSCOCO 8 | from .coco_seq import MSCOCOSeq 9 | from .youtubevos import YouTubeVOS 10 | from .davis import Davis 11 | from .lvis import LVIS 12 | from .ecssd import ECSSD 13 | from .msra10k import MSRA10k 14 | from .hku_is import HKUIS 15 | from .sbd import SBD 16 | from .synthetic_video import SyntheticVideo 17 | from .synthetic_video_blend import SyntheticVideoBlend 18 | from .lasot_candidate_matching import LasotCandidateMatching 19 | from .coco_mot_seq import MSCOCOMOTSeq 20 | from .imagenetvid_mot import ImagenetVIDMOT 21 | from .tao_burst import TAOBURST 22 | -------------------------------------------------------------------------------- /ltr/models/layers/normalization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class InstanceL2Norm(nn.Module): 7 | """Instance L2 normalization. 8 | """ 9 | def __init__(self, size_average=True, eps=1e-5, scale=1.0): 10 | super().__init__() 11 | self.size_average = size_average 12 | self.eps = eps 13 | self.scale = scale 14 | 15 | def forward(self, input): 16 | if self.size_average: 17 | return input * (self.scale * ((input.shape[1] * input.shape[2] * input.shape[3]) / ( 18 | torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps)).sqrt()) 19 | else: 20 | return input * (self.scale / (torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps).sqrt()) 21 | 22 | -------------------------------------------------------------------------------- /ltr/models/layers/transform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | 7 | def interpolate(x, sz): 8 | """Interpolate 4D tensor x to size sz.""" 9 | sz = sz.tolist() if torch.is_tensor(sz) else sz 10 | return F.interpolate(x, sz, mode='bilinear', align_corners=False) if x.shape[-2:] != sz else x 11 | 12 | 13 | class InterpCat(nn.Module): 14 | """Interpolate and concatenate features of different resolutions.""" 15 | 16 | def forward(self, input): 17 | if isinstance(input, (dict, OrderedDict)): 18 | input = list(input.values()) 19 | 20 | output_shape = None 21 | for x in input: 22 | if output_shape is None or output_shape[0] > x.shape[-2]: 23 | output_shape = x.shape[-2:] 24 | 25 | return torch.cat([interpolate(x, output_shape) for x in input], dim=-3) 26 | -------------------------------------------------------------------------------- /pytracking/run_vot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | env_path = os.path.join(os.path.dirname(__file__), '..') 6 | if env_path not in sys.path: 7 | sys.path.append(env_path) 8 | 9 | from pytracking.evaluation import Tracker 10 | 11 | 12 | def run_vot2020(tracker_name, tracker_param, run_id=None, debug=0, visdom_info=None): 13 | tracker = Tracker(tracker_name, tracker_param, run_id) 14 | tracker.run_vot2020(debug, visdom_info) 15 | 16 | 17 | def run_vot(tracker_name, tracker_param, run_id=None): 18 | tracker = Tracker(tracker_name, tracker_param, run_id) 19 | tracker.run_vot() 20 | 21 | 22 | def main(): 23 | parser = argparse.ArgumentParser(description='Run VOT.') 24 | parser.add_argument('tracker_name', type=str) 25 | parser.add_argument('tracker_param', type=str) 26 | parser.add_argument('--run_id', type=int, default=None) 27 | 28 | args = parser.parse_args() 29 | 30 | run_vot(args.tracker_name, args.tracker_param, args.run_id) 31 | 32 | 33 | if __name__ == '__main__': 34 | main() 35 | -------------------------------------------------------------------------------- /pytracking/features/util.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from pytracking.features.featurebase import FeatureBase 3 | 4 | 5 | class Concatenate(FeatureBase): 6 | """A feature that concatenates other features. 7 | args: 8 | features: List of features to concatenate. 9 | """ 10 | def __init__(self, features, pool_stride = None, normalize_power = None, use_for_color = True, use_for_gray = True): 11 | super(Concatenate, self).__init__(pool_stride, normalize_power, use_for_color, use_for_gray) 12 | self.features = features 13 | 14 | self.input_stride = self.features[0].stride() 15 | 16 | for feat in self.features: 17 | if self.input_stride != feat.stride(): 18 | raise ValueError('Strides for the features must be the same for a bultiresolution feature.') 19 | 20 | def dim(self): 21 | return sum([f.dim() for f in self.features]) 22 | 23 | def stride(self): 24 | return self.pool_stride * self.input_stride 25 | 26 | def extract(self, im: torch.Tensor): 27 | return torch.cat([f.get_feature(im) for f in self.features], 1) -------------------------------------------------------------------------------- /pytracking/utils/convert_vot_anno_to_rect.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def convert_vot_anno_to_rect(vot_anno, type): 5 | if len(vot_anno) == 4: 6 | return vot_anno 7 | 8 | if type == 'union': 9 | x1 = min(vot_anno[0::2]) 10 | x2 = max(vot_anno[0::2]) 11 | y1 = min(vot_anno[1::2]) 12 | y2 = max(vot_anno[1::2]) 13 | return [x1, y1, x2 - x1, y2 - y1] 14 | elif type == 'preserve_area': 15 | if len(vot_anno) != 8: 16 | raise ValueError 17 | 18 | vot_anno = np.array(vot_anno) 19 | cx = np.mean(vot_anno[0::2]) 20 | cy = np.mean(vot_anno[1::2]) 21 | 22 | x1 = min(vot_anno[0::2]) 23 | x2 = max(vot_anno[0::2]) 24 | y1 = min(vot_anno[1::2]) 25 | y2 = max(vot_anno[1::2]) 26 | 27 | A1 = np.linalg.norm(vot_anno[0:2] - vot_anno[2: 4]) * np.linalg.norm(vot_anno[2: 4] - vot_anno[4:6]) 28 | A2 = (x2 - x1) * (y2 - y1) 29 | s = np.sqrt(A1 / A2) 30 | w = s * (x2 - x1) + 1 31 | h = s * (y2 - y1) + 1 32 | 33 | x = cx - 0.5*w 34 | y = cy - 0.5*h 35 | return [x, y, w, h] 36 | else: 37 | raise ValueError 38 | -------------------------------------------------------------------------------- /pytracking/tracker/base/basetracker.py: -------------------------------------------------------------------------------- 1 | from _collections import OrderedDict 2 | 3 | class BaseTracker: 4 | """Base class for all trackers.""" 5 | 6 | def __init__(self, params): 7 | self.params = params 8 | self.visdom = None 9 | 10 | 11 | def predicts_segmentation_mask(self): 12 | return False 13 | 14 | 15 | def initialize(self, image, info: dict) -> dict: 16 | """Overload this function in your tracker. This should initialize the model.""" 17 | raise NotImplementedError 18 | 19 | 20 | def track(self, image, info: dict = None) -> dict: 21 | """Overload this function in your tracker. This should track in the frame and update the model.""" 22 | raise NotImplementedError 23 | 24 | 25 | def visdom_draw_tracking(self, image, box, segmentation=None): 26 | if box is None: 27 | box = [] 28 | elif isinstance(box, OrderedDict): 29 | box = [v for k, v in box.items()] 30 | elif isinstance(box, list): 31 | box = box 32 | else: 33 | box = (box,) 34 | if segmentation is None: 35 | self.visdom.register((image, *box), 'Tracking', 1, 'Tracking') 36 | else: 37 | self.visdom.register((image, *box, segmentation), 'Tracking', 1, 'Tracking') -------------------------------------------------------------------------------- /pytracking/VOT/tracker_DiMP.m: -------------------------------------------------------------------------------- 1 | % Set path to the python in the pytracking conda environment 2 | python_path = 'PATH_TO_CONDA_INSTALLATION/envs/pytracking/bin/python'; 3 | 4 | % Set path to pytracking 5 | pytracking_path = 'PATH_TO_VISIONML/pytracking'; 6 | 7 | % Set path to trax installation. Check 8 | % https://trax.readthedocs.io/en/latest/tutorial_compiling.html for 9 | % compilation information 10 | trax_path = 'PATH_TO_VOT_TOOLKIT/native/trax'; 11 | 12 | tracker_name = 'dimp'; % Name of the tracker to evaluate 13 | runfile_name = 'dimp18_vot'; % Name of the parameter file to use 14 | debug = 0; 15 | 16 | %% 17 | tracker_label = [tracker_name, '_', runfile_name]; 18 | 19 | % Generate python command 20 | tracker_command = sprintf(['%s -c "import sys; sys.path.append(''%s'');', ... 21 | 'sys.path.append(''%s/support/python'');', ... 22 | 'import run_vot;', ... 23 | 'run_vot.run_vot(''%s'', ''%s'', debug=%d)"'],... 24 | python_path, pytracking_path, trax_path, ... 25 | tracker_name, runfile_name, debug); 26 | 27 | 28 | tracker_interpreter = python_path; 29 | 30 | tracker_linkpath = {[trax_path, '/build'],... 31 | [trax_path, '/build/support/client'],... 32 | [trax_path, '/build/support/opencv']}; 33 | -------------------------------------------------------------------------------- /pytracking/parameter/lwl/lwl_ytvos.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | 5 | def parameters(): 6 | params = TrackerParams() 7 | 8 | params.debug = 0 9 | params.visualization = False 10 | 11 | params.seg_to_bb_mode = 'var' 12 | params.max_scale_change = (0.95, 1.1) 13 | params.min_mask_area = 100 14 | 15 | params.use_gpu = True 16 | 17 | params.image_sample_size = (30 * 16, 52 * 16) 18 | params.search_area_scale = 5.0 19 | params.border_mode = 'inside_major' 20 | params.patch_max_scale_change = None 21 | 22 | # Learning parameters 23 | params.sample_memory_size = 32 24 | params.learning_rate = 0.1 25 | params.init_samples_minimum_weight = 0.25 26 | params.train_skipping = 1 27 | 28 | # Net optimization params 29 | params.update_target_model = True 30 | params.net_opt_iter = 20 31 | params.net_opt_update_iter = 3 32 | 33 | params.net = NetWithBackbone(net_path='lwl_stage2.pth', 34 | use_gpu=params.use_gpu, 35 | image_format='bgr255', 36 | mean=[102.9801, 115.9465, 122.7717], 37 | std=[1.0, 1.0, 1.0] 38 | ) 39 | 40 | return params 41 | -------------------------------------------------------------------------------- /ltr/admin/tensorboard.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | try: 4 | from torch.utils.tensorboard import SummaryWriter 5 | except: 6 | print('WARNING: You are using tensorboardX instead sis you have a too old pytorch version.') 7 | from tensorboardX import SummaryWriter 8 | 9 | 10 | class TensorboardWriter: 11 | def __init__(self, directory, loader_names): 12 | self.directory = directory 13 | self.writer = OrderedDict({name: SummaryWriter(os.path.join(self.directory, name)) for name in loader_names}) 14 | 15 | def write_info(self, module_name, script_name, description): 16 | tb_info_writer = SummaryWriter(os.path.join(self.directory, 'info')) 17 | tb_info_writer.add_text('Modulet_name', module_name) 18 | tb_info_writer.add_text('Script_name', script_name) 19 | tb_info_writer.add_text('Description', description) 20 | tb_info_writer.close() 21 | 22 | def write_epoch(self, stats: OrderedDict, epoch: int, ind=-1): 23 | for loader_name, loader_stats in stats.items(): 24 | if loader_stats is None: 25 | continue 26 | for var_name, val in loader_stats.items(): 27 | if hasattr(val, 'history') and getattr(val, 'has_new_data', True): 28 | self.writer[loader_name].add_scalar(var_name, val.history[ind], epoch) -------------------------------------------------------------------------------- /pytracking/libs/tensordict.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch 3 | import copy 4 | 5 | 6 | class TensorDict(OrderedDict): 7 | """Container mainly used for dicts of torch tensors. Extends OrderedDict with pytorch functionality.""" 8 | 9 | def concat(self, other): 10 | """Concatenates two dicts without copying internal data.""" 11 | return TensorDict(self, **other) 12 | 13 | def copy(self): 14 | return TensorDict(super(TensorDict, self).copy()) 15 | 16 | def __deepcopy__(self, memodict={}): 17 | return TensorDict(copy.deepcopy(list(self), memodict)) 18 | 19 | def __getattr__(self, name): 20 | if not hasattr(torch.Tensor, name): 21 | raise AttributeError('\'TensorDict\' object has not attribute \'{}\''.format(name)) 22 | 23 | def apply_attr(*args, **kwargs): 24 | return TensorDict({n: getattr(e, name)(*args, **kwargs) if hasattr(e, name) else e for n, e in self.items()}) 25 | return apply_attr 26 | 27 | def attribute(self, attr: str, *args): 28 | return TensorDict({n: getattr(e, attr, *args) for n, e in self.items()}) 29 | 30 | def apply(self, fn, *args, **kwargs): 31 | return TensorDict({n: fn(e, *args, **kwargs) for n, e in self.items()}) 32 | 33 | @staticmethod 34 | def _iterable(a): 35 | return isinstance(a, (TensorDict, list)) 36 | 37 | -------------------------------------------------------------------------------- /pytracking/utils/loading.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ltr.admin.loading as ltr_loading 3 | from pytracking.evaluation.environment import env_settings 4 | 5 | 6 | def load_network(net_path, **kwargs): 7 | """Load network for tracking. 8 | args: 9 | net_path - Path to network. If it is not an absolute path, it is relative to the network_path in the local.py. 10 | See ltr.admin.loading.load_network for further details. 11 | **kwargs - Additional key-word arguments that are sent to ltr.admin.loading.load_network. 12 | """ 13 | kwargs['backbone_pretrained'] = False 14 | if os.path.isabs(net_path): 15 | path_full = net_path 16 | net, _ = ltr_loading.load_network(path_full, **kwargs) 17 | elif isinstance(env_settings().network_path, (list, tuple)): 18 | net = None 19 | for p in env_settings().network_path: 20 | path_full = os.path.join(p, net_path) 21 | try: 22 | net, _ = ltr_loading.load_network(path_full, **kwargs) 23 | break 24 | except Exception as e: 25 | print(e) 26 | pass 27 | 28 | assert net is not None, 'Failed to load network' 29 | else: 30 | path_full = os.path.join(env_settings().network_path, net_path) 31 | net, _ = ltr_loading.load_network(path_full, **kwargs) 32 | 33 | return net 34 | -------------------------------------------------------------------------------- /pytracking/parameter/lwl/lwl_boxinit.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | 5 | def parameters(): 6 | params = TrackerParams() 7 | 8 | params.debug = 0 9 | params.visualization = False 10 | 11 | params.seg_to_bb_mode = 'var' 12 | params.max_scale_change = (0.95, 1.1) 13 | params.min_mask_area = 100 14 | 15 | params.use_gpu = True 16 | 17 | params.image_sample_size = (30 * 16, 52 * 16) 18 | params.search_area_scale = 5.0 19 | params.border_mode = 'inside_major' 20 | params.patch_max_scale_change = None 21 | 22 | # Learning parameters 23 | params.sample_memory_size = 32 24 | params.learning_rate = 0.2 25 | params.init_samples_minimum_weight = 0 26 | params.train_skipping = 5 27 | 28 | # Net optimization params 29 | params.update_target_model = True 30 | params.net_opt_iter = 20 31 | params.net_opt_update_iter = 5 32 | 33 | params.init_with_box = True 34 | params.lower_init_weight = True 35 | 36 | params.net = NetWithBackbone(net_path='lwl_boxinit.pth', 37 | use_gpu=params.use_gpu, 38 | image_format='bgr255', 39 | mean=[102.9801, 115.9465, 122.7717], 40 | std=[1.0, 1.0, 1.0]) 41 | 42 | params.vot_anno_conversion_type = 'preserve_area' 43 | 44 | return params 45 | -------------------------------------------------------------------------------- /ltr/models/rts/learners_fusion.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | 6 | class LearnersFusion(nn.Module): 7 | """ """ 8 | def __init__(self, fusion_type): 9 | super().__init__() 10 | self.fusion_type = fusion_type 11 | 12 | if self.fusion_type == 'concat': 13 | self.fusion_conv1 = nn.Conv2d(32, 16, kernel_size=3, padding=1, stride=1) 14 | 15 | for m in self.modules(): 16 | if isinstance(m, nn.Conv2d): 17 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 18 | m.weight.data.normal_(0, math.sqrt(2. / n)) 19 | elif isinstance(m, nn.BatchNorm2d): 20 | m.weight.data.fill_(1) 21 | m.bias.data.zero_() 22 | 23 | 24 | def forward(self, seg_learner_out, clf_learner_out): 25 | 26 | assert seg_learner_out.shape == clf_learner_out.shape 27 | assert seg_learner_out.shape[0] == 1 28 | 29 | if self.fusion_type == 'add': 30 | return seg_learner_out + clf_learner_out 31 | 32 | if self.fusion_type == 'concat': 33 | concat_output = torch.cat([seg_learner_out, clf_learner_out], dim=2) 34 | concat_output = concat_output.squeeze(0) 35 | concat_output = self.fusion_conv1(concat_output) 36 | concat_output = concat_output.unsqueeze(0) 37 | 38 | return concat_output 39 | 40 | print("Type of fusion not recognized") 41 | assert False 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /ltr/actors/base_actor.py: -------------------------------------------------------------------------------- 1 | from pytracking import TensorDict 2 | 3 | 4 | class BaseActor: 5 | """ Base class for actor. The actor class handles the passing of the data through the network 6 | and calculation the loss""" 7 | def __init__(self, net, objective): 8 | """ 9 | args: 10 | net - The network to train 11 | objective - The loss function 12 | """ 13 | self.net = net 14 | self.objective = objective 15 | 16 | def __call__(self, data: TensorDict): 17 | """ Called in each training iteration. Should pass in input data through the network, calculate the loss, and 18 | return the training stats for the input data 19 | args: 20 | data - A TensorDict containing all the necessary data blocks. 21 | 22 | returns: 23 | loss - loss for the input data 24 | stats - a dict containing detailed losses 25 | """ 26 | raise NotImplementedError 27 | 28 | def to(self, device): 29 | """ Move the network to device 30 | args: 31 | device - device to use. 'cpu' or 'cuda' 32 | """ 33 | self.net.to(device) 34 | 35 | def train(self, mode=True): 36 | """ Set whether the network is in train mode. 37 | args: 38 | mode (True) - Bool specifying whether in training mode. 39 | """ 40 | self.net.train(mode) 41 | 42 | def eval(self): 43 | """ Set network to eval mode""" 44 | self.train(False) -------------------------------------------------------------------------------- /pytracking/utils/params.py: -------------------------------------------------------------------------------- 1 | from pytracking import TensorList 2 | import random 3 | 4 | 5 | class TrackerParams: 6 | """Class for tracker parameters.""" 7 | def set_default_values(self, default_vals: dict): 8 | for name, val in default_vals.items(): 9 | if not hasattr(self, name): 10 | setattr(self, name, val) 11 | 12 | def get(self, name: str, *default): 13 | """Get a parameter value with the given name. If it does not exists, it return the default value given as a 14 | second argument or returns an error if no default value is given.""" 15 | if len(default) > 1: 16 | raise ValueError('Can only give one default value.') 17 | 18 | if not default: 19 | return getattr(self, name) 20 | 21 | return getattr(self, name, default[0]) 22 | 23 | def has(self, name: str): 24 | """Check if there exist a parameter with the given name.""" 25 | return hasattr(self, name) 26 | 27 | 28 | class FeatureParams: 29 | """Class for feature specific parameters""" 30 | def __init__(self, *args, **kwargs): 31 | if len(args) > 0: 32 | raise ValueError 33 | 34 | for name, val in kwargs.items(): 35 | if isinstance(val, list): 36 | setattr(self, name, TensorList(val)) 37 | else: 38 | setattr(self, name, val) 39 | 40 | 41 | def Choice(*args): 42 | """Can be used to sample random parameter values.""" 43 | return random.choice(args) 44 | -------------------------------------------------------------------------------- /ltr/models/layers/blocks.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | def conv_block(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1, bias=True, 5 | batch_norm=True, relu=True, padding_mode='zeros'): 6 | layers = [] 7 | assert padding_mode == 'zeros' or padding_mode == 'replicate' 8 | 9 | if padding_mode == 'replicate' and padding > 0: 10 | assert isinstance(padding, int) 11 | layers.append(nn.ReflectionPad2d(padding)) 12 | padding = 0 13 | 14 | layers.append(nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 15 | padding=padding, dilation=dilation, bias=bias)) 16 | if batch_norm: 17 | layers.append(nn.BatchNorm2d(out_planes)) 18 | if relu: 19 | layers.append(nn.ReLU(inplace=True)) 20 | return nn.Sequential(*layers) 21 | 22 | 23 | class LinearBlock(nn.Module): 24 | def __init__(self, in_planes, out_planes, input_sz, bias=True, batch_norm=True, relu=True): 25 | super().__init__() 26 | self.linear = nn.Linear(in_planes*input_sz*input_sz, out_planes, bias=bias) 27 | self.bn = nn.BatchNorm2d(out_planes) if batch_norm else None 28 | self.relu = nn.ReLU(inplace=True) if relu else None 29 | 30 | def forward(self, x): 31 | x = self.linear(x.reshape(x.shape[0], -1)) 32 | if self.bn is not None: 33 | x = self.bn(x.reshape(x.shape[0], x.shape[1], 1, 1)) 34 | if self.relu is not None: 35 | x = self.relu(x) 36 | return x.reshape(x.shape[0], -1) -------------------------------------------------------------------------------- /ltr/models/layers/distance.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class DistanceMap(nn.Module): 7 | """Generate a distance map from a origin center location. 8 | args: 9 | num_bins: Number of bins in the map. 10 | bin_displacement: Displacement of the bins. 11 | """ 12 | def __init__(self, num_bins, bin_displacement=1.0): 13 | super().__init__() 14 | self.num_bins = num_bins 15 | self.bin_displacement = bin_displacement 16 | 17 | def forward(self, center, output_sz): 18 | """Create the distance map. 19 | args: 20 | center: Torch tensor with (y,x) center position. Dims (batch, 2) 21 | output_sz: Size of output distance map. 2-dimensional tuple.""" 22 | 23 | center = center.view(-1,2) 24 | 25 | bin_centers = torch.arange(self.num_bins, dtype=torch.float32, device=center.device).view(1, -1, 1, 1) 26 | 27 | k0 = torch.arange(output_sz[0], dtype=torch.float32, device=center.device).view(1,1,-1,1) 28 | k1 = torch.arange(output_sz[1], dtype=torch.float32, device=center.device).view(1,1,1,-1) 29 | 30 | d0 = k0 - center[:,0].view(-1,1,1,1) 31 | d1 = k1 - center[:,1].view(-1,1,1,1) 32 | 33 | dist = torch.sqrt(d0*d0 + d1*d1) 34 | bin_diff = dist / self.bin_displacement - bin_centers 35 | 36 | bin_val = torch.cat((F.relu(1.0 - torch.abs(bin_diff[:,:-1,:,:]), inplace=True), 37 | (1.0 + bin_diff[:,-1:,:,:]).clamp(0, 1)), dim=1) 38 | 39 | return bin_val 40 | 41 | 42 | -------------------------------------------------------------------------------- /pytracking/utils/load_text.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def load_text_numpy(path, delimiter, dtype): 6 | if isinstance(delimiter, (tuple, list)): 7 | for d in delimiter: 8 | try: 9 | ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype) 10 | return ground_truth_rect 11 | except: 12 | pass 13 | 14 | raise Exception('Could not read file {}'.format(path)) 15 | else: 16 | ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype) 17 | return ground_truth_rect 18 | 19 | 20 | def load_text_pandas(path, delimiter, dtype): 21 | if isinstance(delimiter, (tuple, list)): 22 | for d in delimiter: 23 | try: 24 | ground_truth_rect = pd.read_csv(path, delimiter=d, header=None, dtype=dtype, na_filter=False, 25 | low_memory=False).values 26 | return ground_truth_rect 27 | except Exception as e: 28 | pass 29 | 30 | raise Exception('Could not read file {}'.format(path)) 31 | else: 32 | ground_truth_rect = pd.read_csv(path, delimiter=delimiter, header=None, dtype=dtype, na_filter=False, 33 | low_memory=False).values 34 | return ground_truth_rect 35 | 36 | 37 | def load_text(path, delimiter=' ', dtype=np.float32, backend='numpy'): 38 | if backend == 'numpy': 39 | return load_text_numpy(path, delimiter, dtype) 40 | elif backend == 'pandas': 41 | return load_text_pandas(path, delimiter, dtype) 42 | -------------------------------------------------------------------------------- /pytracking/run_experiment.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import importlib 5 | 6 | env_path = os.path.join(os.path.dirname(__file__), '..') 7 | if env_path not in sys.path: 8 | sys.path.append(env_path) 9 | 10 | from pytracking.evaluation.running import run_dataset 11 | 12 | 13 | def run_experiment(experiment_module: str, experiment_name: str, debug=0, threads=0): 14 | """Run experiment. 15 | args: 16 | experiment_module: Name of experiment module in the experiments/ folder. 17 | experiment_name: Name of the experiment function. 18 | debug: Debug level. 19 | threads: Number of threads. 20 | """ 21 | expr_module = importlib.import_module('pytracking.experiments.{}'.format(experiment_module)) 22 | expr_func = getattr(expr_module, experiment_name) 23 | trackers, dataset = expr_func() 24 | print('Running: {} {}'.format(experiment_module, experiment_name)) 25 | run_dataset(dataset, trackers, debug, threads) 26 | 27 | 28 | def main(): 29 | parser = argparse.ArgumentParser(description='Run tracker.') 30 | parser.add_argument('experiment_module', type=str, help='Name of experiment module in the experiments/ folder.') 31 | parser.add_argument('experiment_name', type=str, help='Name of the experiment function.') 32 | parser.add_argument('--debug', type=int, default=0, help='Debug level.') 33 | parser.add_argument('--threads', type=int, default=0, help='Number of threads.') 34 | 35 | args = parser.parse_args() 36 | 37 | run_experiment(args.experiment_module, args.experiment_name, args.debug, args.threads) 38 | 39 | 40 | if __name__ == '__main__': 41 | main() 42 | -------------------------------------------------------------------------------- /pytracking/libs/operation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from pytracking.libs.tensorlist import tensor_operation, TensorList 4 | 5 | 6 | @tensor_operation 7 | def conv2d(input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride=1, padding=0, dilation=1, groups=1, mode=None): 8 | """Standard conv2d. Returns the input if weight=None.""" 9 | 10 | if weight is None: 11 | return input 12 | 13 | ind = None 14 | if mode is not None: 15 | if padding != 0: 16 | raise ValueError('Cannot input both padding and mode.') 17 | if mode == 'same': 18 | padding = (weight.shape[2]//2, weight.shape[3]//2) 19 | if weight.shape[2] % 2 == 0 or weight.shape[3] % 2 == 0: 20 | ind = (slice(-1) if weight.shape[2] % 2 == 0 else slice(None), 21 | slice(-1) if weight.shape[3] % 2 == 0 else slice(None)) 22 | elif mode == 'valid': 23 | padding = (0, 0) 24 | elif mode == 'full': 25 | padding = (weight.shape[2]-1, weight.shape[3]-1) 26 | else: 27 | raise ValueError('Unknown mode for padding.') 28 | 29 | out = F.conv2d(input, weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups) 30 | if ind is None: 31 | return out 32 | return out[:,:,ind[0],ind[1]] 33 | 34 | 35 | @tensor_operation 36 | def conv1x1(input: torch.Tensor, weight: torch.Tensor): 37 | """Do a convolution with a 1x1 kernel weights. Implemented with matmul, which can be faster than using conv.""" 38 | 39 | if weight is None: 40 | return input 41 | 42 | return torch.conv2d(input, weight) 43 | -------------------------------------------------------------------------------- /pytracking/run_video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | env_path = os.path.join(os.path.dirname(__file__), '..') 6 | if env_path not in sys.path: 7 | sys.path.append(env_path) 8 | 9 | from pytracking.evaluation import Tracker 10 | 11 | 12 | def run_video(tracker_name, tracker_param, videofile, optional_box=None, debug=None, save_results=False): 13 | """Run the tracker on your webcam. 14 | args: 15 | tracker_name: Name of tracking method. 16 | tracker_param: Name of parameter file. 17 | debug: Debug level. 18 | """ 19 | tracker = Tracker(tracker_name, tracker_param) 20 | tracker.run_video_generic(videofilepath=videofile, optional_box=optional_box, debug=debug, save_results=save_results) 21 | 22 | def main(): 23 | parser = argparse.ArgumentParser(description='Run the tracker on your webcam.') 24 | parser.add_argument('tracker_name', type=str, help='Name of tracking method.') 25 | parser.add_argument('tracker_param', type=str, help='Name of parameter file.') 26 | parser.add_argument('videofile', type=str, help='path to a video file.') 27 | parser.add_argument('--optional_box', type=float, default=None, nargs="+", help='optional_box with format x y w h.') 28 | parser.add_argument('--debug', type=int, default=0, help='Debug level.') 29 | parser.add_argument('--save_results', dest='save_results', action='store_true', help='Save bounding boxes') 30 | parser.set_defaults(save_results=False) 31 | 32 | args = parser.parse_args() 33 | 34 | run_video(args.tracker_name, args.tracker_param,args.videofile, args.optional_box, args.debug, args.save_results) 35 | 36 | 37 | if __name__ == '__main__': 38 | main() 39 | -------------------------------------------------------------------------------- /pytracking/run_webcam.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | env_path = os.path.join(os.path.dirname(__file__), '..') 6 | if env_path not in sys.path: 7 | sys.path.append(env_path) 8 | 9 | from pytracking.evaluation import Tracker 10 | 11 | 12 | def run_webcam(tracker_name, tracker_param, debug=None, visdom_info=None): 13 | """Run the tracker on your webcam. 14 | args: 15 | tracker_name: Name of tracking method. 16 | tracker_param: Name of parameter file. 17 | debug: Debug level. 18 | visdom_info: Dict optionally containing 'use_visdom', 'server' and 'port' for Visdom visualization. 19 | """ 20 | visdom_info = {} if visdom_info is None else visdom_info 21 | tracker = Tracker(tracker_name, tracker_param) 22 | tracker.run_video_generic(debug=debug, visdom_info=visdom_info) 23 | 24 | 25 | def main(): 26 | parser = argparse.ArgumentParser(description='Run the tracker on your webcam.') 27 | parser.add_argument('tracker_name', type=str, help='Name of tracking method.') 28 | parser.add_argument('tracker_param', type=str, help='Name of parameter file.') 29 | parser.add_argument('--debug', type=int, default=0, help='Debug level.') 30 | parser.add_argument('--use_visdom', type=bool, default=True, help='Flag to enable visdom') 31 | parser.add_argument('--visdom_server', type=str, default='127.0.0.1', help='Server for visdom') 32 | parser.add_argument('--visdom_port', type=int, default=8097, help='Port for visdom') 33 | 34 | args = parser.parse_args() 35 | 36 | visdom_info = {'use_visdom': args.use_visdom, 'server': args.visdom_server, 'port': args.visdom_port} 37 | run_webcam(args.tracker_name, args.tracker_param, args.debug, visdom_info) 38 | 39 | 40 | if __name__ == '__main__': 41 | main() -------------------------------------------------------------------------------- /ltr/models/lwl/loss_residual_modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | import ltr.models.layers.filter as filter_layer 5 | from pytracking import TensorList 6 | 7 | 8 | class LWTLResidual(nn.Module): 9 | """ Computes the residuals W(y_t)*(T_tau(x_t) - E(y_t) and lambda*tau in the few-shot learner loss (3) in the 10 | paper """ 11 | def __init__(self, init_filter_reg=1e-2, filter_dilation_factors=None): 12 | super().__init__() 13 | self.filter_reg = nn.Parameter(init_filter_reg * torch.ones(1)) 14 | self.filter_dilation_factors = filter_dilation_factors 15 | 16 | def forward(self, meta_parameter: TensorList, feat, label, sample_weight=None): 17 | # Assumes multiple filters, i.e. (sequences, filters, feat_dim, fH, fW) 18 | filter = meta_parameter[0] 19 | 20 | num_images = feat.shape[0] 21 | num_sequences = feat.shape[1] if feat.dim() == 5 else 1 22 | 23 | # Compute scores 24 | scores = filter_layer.apply_filter(feat, filter, dilation_factors=self.filter_dilation_factors) 25 | 26 | if sample_weight is None: 27 | sample_weight = math.sqrt(1.0 / num_images) 28 | elif isinstance(sample_weight, torch.Tensor): 29 | if sample_weight.numel() == scores.numel(): 30 | sample_weight = sample_weight.view(scores.shape) 31 | elif sample_weight.dim() == 1: 32 | sample_weight = sample_weight.view(-1, 1, 1, 1, 1) 33 | 34 | label = label.view(scores.shape) 35 | 36 | data_residual = sample_weight * (scores - label) 37 | 38 | # Compute regularization residual. Put batch in second dimension 39 | reg_residual = self.filter_reg*filter.view(1, num_sequences, -1) 40 | 41 | return TensorList([data_residual, reg_residual]) 42 | -------------------------------------------------------------------------------- /ltr/models/rts/loss_residual_modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | import ltr.models.layers.filter as filter_layer 5 | from pytracking import TensorList 6 | 7 | 8 | class RTSResidual(nn.Module): 9 | """ Computes the residuals W(y_t)*(T_tau(x_t) - E(y_t) and lambda*tau in the few-shot learner loss (3) in the 10 | paper """ 11 | def __init__(self, init_filter_reg=1e-2, filter_dilation_factors=None): 12 | super().__init__() 13 | self.filter_reg = nn.Parameter(init_filter_reg * torch.ones(1)) 14 | self.filter_dilation_factors = filter_dilation_factors 15 | 16 | def forward(self, meta_parameter: TensorList, feat, label, sample_weight=None): 17 | # Assumes multiple filters, i.e. (sequences, filters, feat_dim, fH, fW) 18 | filter = meta_parameter[0] 19 | 20 | num_images = feat.shape[0] 21 | num_sequences = feat.shape[1] if feat.dim() == 5 else 1 22 | 23 | # Compute scores 24 | scores = filter_layer.apply_filter(feat, filter, dilation_factors=self.filter_dilation_factors) 25 | 26 | if sample_weight is None: 27 | sample_weight = math.sqrt(1.0 / num_images) 28 | elif isinstance(sample_weight, torch.Tensor): 29 | if sample_weight.numel() == scores.numel(): 30 | sample_weight = sample_weight.view(scores.shape) 31 | elif sample_weight.dim() == 1: 32 | sample_weight = sample_weight.view(-1, 1, 1, 1, 1) 33 | 34 | label = label.view(scores.shape) 35 | 36 | data_residual = sample_weight * (scores - label) 37 | 38 | # Compute regularization residual. Put batch in second dimension 39 | reg_residual = self.filter_reg*filter.view(1, num_sequences, -1) 40 | 41 | return TensorList([data_residual, reg_residual]) 42 | -------------------------------------------------------------------------------- /ltr/data_specs/lasot_train_val_split.txt: -------------------------------------------------------------------------------- 1 | airplane-10 2 | basketball-10 3 | basketball-12 4 | basketball-13 5 | basketball-8 6 | bicycle-1 7 | bird-9 8 | boat-14 9 | boat-19 10 | book-17 11 | book-4 12 | bottle-6 13 | bus-14 14 | car-15 15 | car-18 16 | car-20 17 | car-5 18 | cat-11 19 | cat-15 20 | cat-17 21 | cat-9 22 | cattle-15 23 | chameleon-10 24 | chameleon-12 25 | chameleon-18 26 | crab-14 27 | crab-17 28 | crocodile-18 29 | crocodile-9 30 | cup-2 31 | deer-19 32 | deer-5 33 | deer-6 34 | electricfan-11 35 | elephant-3 36 | flag-11 37 | flag-13 38 | flag-4 39 | flag-7 40 | fox-10 41 | fox-11 42 | fox-15 43 | fox-7 44 | frog-8 45 | gametarget-8 46 | gecko-13 47 | gecko-14 48 | gecko-17 49 | goldfish-1 50 | goldfish-9 51 | gorilla-15 52 | gorilla-20 53 | guitar-20 54 | hand-14 55 | hat-14 56 | hippo-3 57 | kangaroo-12 58 | kangaroo-15 59 | kangaroo-3 60 | kite-19 61 | kite-5 62 | licenseplate-17 63 | licenseplate-2 64 | licenseplate-9 65 | lion-13 66 | microphone-12 67 | microphone-15 68 | microphone-8 69 | monkey-5 70 | mouse-12 71 | person-20 72 | pig-1 73 | pig-19 74 | pig-20 75 | pool-11 76 | pool-5 77 | pool-6 78 | rabbit-12 79 | racing-3 80 | robot-12 81 | robot-4 82 | rubicCube-8 83 | sepia-20 84 | shark-11 85 | shark-8 86 | sheep-12 87 | sheep-13 88 | sheep-17 89 | sheep-20 90 | sheep-8 91 | skateboard-1 92 | spider-7 93 | spider-9 94 | squirrel-15 95 | squirrel-9 96 | surfboard-14 97 | surfboard-18 98 | surfboard-6 99 | swing-13 100 | tank-10 101 | tank-8 102 | tiger-16 103 | tiger-19 104 | train-6 105 | truck-11 106 | turtle-11 107 | turtle-18 108 | turtle-4 109 | turtle-7 110 | umbrella-10 111 | umbrella-12 112 | umbrella-18 113 | yoyo-11 114 | yoyo-14 115 | yoyo-20 116 | zebra-12 117 | zebra-13 118 | zebra-5 119 | zebra-6 120 | zebra-7 121 | -------------------------------------------------------------------------------- /ltr/models/backbone/base.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Backbone(nn.Module): 6 | """Base class for backbone networks. Handles freezing layers etc. 7 | args: 8 | frozen_layers - Name of layers to freeze. Either list of strings, 'none' or 'all'. Default: 'none'. 9 | """ 10 | def __init__(self, frozen_layers=()): 11 | super().__init__() 12 | 13 | if isinstance(frozen_layers, str): 14 | if frozen_layers.lower() == 'none': 15 | frozen_layers = () 16 | elif frozen_layers.lower() != 'all': 17 | raise ValueError('Unknown option for frozen layers: \"{}\". Should be \"all\", \"none\" or list of layer names.'.format(frozen_layers)) 18 | 19 | self.frozen_layers = frozen_layers 20 | self._is_frozen_nograd = False 21 | 22 | 23 | def train(self, mode=True): 24 | super().train(mode) 25 | if mode == True: 26 | self._set_frozen_to_eval() 27 | if not self._is_frozen_nograd: 28 | self._set_frozen_to_nograd() 29 | self._is_frozen_nograd = True 30 | return self 31 | 32 | 33 | def _set_frozen_to_eval(self): 34 | if isinstance(self.frozen_layers, str) and self.frozen_layers.lower() == 'all': 35 | self.eval() 36 | else: 37 | for layer in self.frozen_layers: 38 | getattr(self, layer).eval() 39 | 40 | 41 | def _set_frozen_to_nograd(self): 42 | if isinstance(self.frozen_layers, str) and self.frozen_layers.lower() == 'all': 43 | for p in self.parameters(): 44 | p.requires_grad_(False) 45 | else: 46 | for layer in self.frozen_layers: 47 | for p in getattr(self, layer).parameters(): 48 | p.requires_grad_(False) -------------------------------------------------------------------------------- /pytracking/util_scripts/pack_got10k_results.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import shutil 4 | from pytracking.evaluation.environment import env_settings 5 | 6 | 7 | def pack_got10k_results(tracker_name, param_name, output_name): 8 | """ Packs got10k results into a zip folder which can be directly uploaded to the evaluation server. The packed 9 | file is saved in the folder env_settings().got_packed_results_path 10 | 11 | args: 12 | tracker_name - name of the tracker 13 | param_name - name of the parameter file 14 | output_name - name of the packed zip file 15 | """ 16 | output_path = os.path.join(env_settings().got_packed_results_path, output_name) 17 | 18 | if not os.path.exists(output_path): 19 | os.makedirs(output_path) 20 | 21 | results_path = env_settings().results_path 22 | for i in range(1,181): 23 | seq_name = 'GOT-10k_Test_{:06d}'.format(i) 24 | 25 | seq_output_path = '{}/{}'.format(output_path, seq_name) 26 | if not os.path.exists(seq_output_path): 27 | os.makedirs(seq_output_path) 28 | 29 | for run_id in range(3): 30 | res = np.loadtxt('{}/{}/{}_{:03d}/{}.txt'.format(results_path, tracker_name, param_name, run_id, seq_name), dtype=np.float64) 31 | times = np.loadtxt( 32 | '{}/{}/{}_{:03d}/{}_time.txt'.format(results_path, tracker_name, param_name, run_id, seq_name), 33 | dtype=np.float64) 34 | 35 | np.savetxt('{}/{}_{:03d}.txt'.format(seq_output_path, seq_name, run_id+1), res, delimiter=',', fmt='%f') 36 | np.savetxt('{}/{}_time.txt'.format(seq_output_path, seq_name), times, fmt='%f') 37 | 38 | # Generate ZIP file 39 | shutil.make_archive(output_path, 'zip', output_path) 40 | 41 | # Remove raw text files 42 | shutil.rmtree(output_path) 43 | -------------------------------------------------------------------------------- /pytracking/parameter/tamos/tamos_resnet50.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.train_feature_size = [24, 36] 13 | params.feature_stride = 16 14 | params.image_sample_size = [params.feature_stride*tfs for tfs in params.train_feature_size] 15 | params.search_area_scale = 5 16 | 17 | # Learning parameters 18 | params.sample_memory_size = 2 19 | params.learning_rate = 0.01 20 | params.init_samples_minimum_weight = 0.25 21 | 22 | # Net optimization params 23 | params.update_classifier = True 24 | 25 | # Detection parameters 26 | params.window_output = False 27 | 28 | # Init augmentation parameters 29 | params.use_augmentation = False 30 | params.augmentation = {} 31 | 32 | params.augmentation_expansion_factor = 2 33 | params.random_shift_factor = 1/3 34 | 35 | # Advanced localization parameters 36 | params.advanced_localization = True 37 | params.target_not_found_threshold = 0.25 38 | params.distractor_threshold = 0.8 39 | params.hard_negative_threshold = 0.5 40 | params.target_neighborhood_scale = 1.5 41 | params.dispalcement_scale = 0.8 42 | params.hard_negative_learning_rate = 0.02 43 | params.update_scale_when_uncertain = True 44 | params.conf_ths = 0.85 45 | params.search_area_rescaling_at_occlusion = False 46 | 47 | params.net = NetWithBackbone(net_path='tamos_resnet50.pth.tar', use_gpu=params.use_gpu) 48 | 49 | params.vot_anno_conversion_type = 'preserve_area' 50 | 51 | params.use_gt_box = True 52 | params.plot_iou = True 53 | params.normalize_scores = True 54 | 55 | return params 56 | -------------------------------------------------------------------------------- /ltr/admin/stats.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class StatValue: 4 | def __init__(self): 5 | self.clear() 6 | 7 | def reset(self): 8 | self.val = 0 9 | 10 | def clear(self): 11 | self.reset() 12 | self.history = [] 13 | 14 | def update(self, val): 15 | self.val = val 16 | self.history.append(self.val) 17 | 18 | 19 | class AverageMeter(object): 20 | """Computes and stores the average and current value""" 21 | def __init__(self): 22 | self.clear() 23 | self.has_new_data = False 24 | 25 | def reset(self): 26 | self.avg = 0 27 | self.val = 0 28 | self.sum = 0 29 | self.count = 0 30 | 31 | def clear(self): 32 | self.reset() 33 | self.history = [] 34 | 35 | def update(self, val, n=1): 36 | self.val = val 37 | self.sum += val * n 38 | self.count += n 39 | self.avg = self.sum / self.count 40 | 41 | def new_epoch(self): 42 | if self.count > 0: 43 | self.history.append(self.avg) 44 | self.reset() 45 | self.has_new_data = True 46 | else: 47 | self.has_new_data = False 48 | 49 | 50 | def topk_accuracy(output, target, topk=(1,)): 51 | """Computes the precision@k for the specified values of k""" 52 | single_input = not isinstance(topk, (tuple, list)) 53 | if single_input: 54 | topk = (topk,) 55 | 56 | maxk = max(topk) 57 | batch_size = target.size(0) 58 | 59 | _, pred = output.topk(maxk, 1, True, True) 60 | pred = pred.t() 61 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 62 | 63 | res = [] 64 | for k in topk: 65 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)[0] 66 | res.append(correct_k * 100.0 / batch_size) 67 | 68 | if single_input: 69 | return res[0] 70 | 71 | return res 72 | -------------------------------------------------------------------------------- /pytracking/parameter/tamos/tamos_swin_base.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.train_feature_size = [24, 36] 13 | params.feature_stride = 16 14 | params.image_sample_size = [params.feature_stride*tfs for tfs in params.train_feature_size] 15 | params.search_area_scale = 5 16 | 17 | # Learning parameters 18 | params.sample_memory_size = 2 19 | params.learning_rate = 0.01 20 | params.init_samples_minimum_weight = 0.25 21 | # params.train_skipping = 20 22 | 23 | # Net optimization params 24 | params.update_classifier = True 25 | 26 | # Detection parameters 27 | params.window_output = False 28 | 29 | # Init augmentation parameters 30 | params.use_augmentation = False 31 | params.augmentation = {} 32 | 33 | params.augmentation_expansion_factor = 2 34 | params.random_shift_factor = 1/3 35 | 36 | # Advanced localization parameters 37 | params.advanced_localization = True 38 | params.target_not_found_threshold = 0.25 39 | params.distractor_threshold = 0.8 40 | params.hard_negative_threshold = 0.5 41 | params.target_neighborhood_scale = 1.5 42 | params.dispalcement_scale = 0.8 43 | params.hard_negative_learning_rate = 0.02 44 | params.update_scale_when_uncertain = True 45 | params.conf_ths = 0.85 46 | params.search_area_rescaling_at_occlusion = False 47 | 48 | params.net = NetWithBackbone(net_path='tamos_swin_base.pth.tar', use_gpu=params.use_gpu) 49 | 50 | params.vot_anno_conversion_type = 'preserve_area' 51 | 52 | params.use_gt_box = True 53 | params.plot_iou = True 54 | params.normalize_scores = True 55 | 56 | return params 57 | -------------------------------------------------------------------------------- /pytracking/util_scripts/pack_trackingnet_results.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import shutil 4 | from pytracking.evaluation.environment import env_settings 5 | from pytracking.evaluation.datasets import get_dataset 6 | 7 | 8 | def pack_trackingnet_results(tracker_name, param_name, run_id=None, output_name=None): 9 | """ Packs trackingnet results into a zip folder which can be directly uploaded to the evaluation server. The packed 10 | file is saved in the folder env_settings().tn_packed_results_path 11 | 12 | args: 13 | tracker_name - name of the tracker 14 | param_name - name of the parameter file 15 | run_id - run id for the tracker 16 | output_name - name of the packed zip file 17 | """ 18 | 19 | if output_name is None: 20 | if run_id is None: 21 | output_name = '{}_{}'.format(tracker_name, param_name) 22 | else: 23 | output_name = '{}_{}_{:03d}'.format(tracker_name, param_name, run_id) 24 | 25 | output_path = os.path.join(env_settings().tn_packed_results_path, output_name) 26 | 27 | if not os.path.exists(output_path): 28 | os.makedirs(output_path) 29 | 30 | results_path = env_settings().results_path 31 | 32 | tn_dataset = get_dataset('trackingnet') 33 | 34 | for seq in tn_dataset: 35 | seq_name = seq.name 36 | 37 | if run_id is None: 38 | seq_results_path = '{}/{}/{}/{}.txt'.format(results_path, tracker_name, param_name, seq_name) 39 | else: 40 | seq_results_path = '{}/{}/{}_{:03d}/{}.txt'.format(results_path, tracker_name, param_name, run_id, seq_name) 41 | 42 | results = np.loadtxt(seq_results_path, dtype=np.float64) 43 | 44 | np.savetxt('{}/{}.txt'.format(output_path, seq_name), results, delimiter=',', fmt='%.2f') 45 | 46 | # Generate ZIP file 47 | shutil.make_archive(output_path, 'zip', output_path) 48 | 49 | # Remove raw text files 50 | shutil.rmtree(output_path) 51 | -------------------------------------------------------------------------------- /pytracking/parameter/tomp/tomp101.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.train_feature_size = 18 13 | params.feature_stride = 16 14 | params.image_sample_size = params.train_feature_size*params.feature_stride 15 | params.search_area_scale = 5 16 | params.border_mode = 'inside_major' 17 | params.patch_max_scale_change = 1.5 18 | 19 | # Learning parameters 20 | params.sample_memory_size = 2 21 | params.learning_rate = 0.01 22 | params.init_samples_minimum_weight = 0.25 23 | params.train_skipping = 20 24 | 25 | # Net optimization params 26 | params.update_classifier = True 27 | params.net_opt_iter = 10 28 | params.net_opt_update_iter = 2 29 | params.net_opt_hn_iter = 1 30 | 31 | # Detection parameters 32 | params.window_output = False 33 | 34 | # Init augmentation parameters 35 | params.use_augmentation = False 36 | params.augmentation = {} 37 | 38 | params.augmentation_expansion_factor = 2 39 | params.random_shift_factor = 1/3 40 | 41 | # Advanced localization parameters 42 | params.advanced_localization = True 43 | params.target_not_found_threshold = 0.25 44 | params.distractor_threshold = 0.8 45 | params.hard_negative_threshold = 0.5 46 | params.target_neighborhood_scale = 2.2 47 | params.dispalcement_scale = 0.8 48 | params.hard_negative_learning_rate = 0.02 49 | params.update_scale_when_uncertain = True 50 | params.conf_ths = 0.9 51 | params.search_area_rescaling_at_occlusion = True 52 | 53 | params.net = NetWithBackbone(net_path='tomp101.pth.tar', use_gpu=params.use_gpu) 54 | 55 | params.vot_anno_conversion_type = 'preserve_area' 56 | 57 | params.use_gt_box = True 58 | params.plot_iou = True 59 | 60 | return params 61 | -------------------------------------------------------------------------------- /pytracking/parameter/tomp/tomp50.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.train_feature_size = 18 13 | params.feature_stride = 16 14 | params.image_sample_size = params.train_feature_size*params.feature_stride 15 | params.search_area_scale = 5 16 | params.border_mode = 'inside_major' 17 | params.patch_max_scale_change = 1.5 18 | 19 | # Learning parameters 20 | params.sample_memory_size = 2 21 | params.learning_rate = 0.01 22 | params.init_samples_minimum_weight = 0.25 23 | params.train_skipping = 20 24 | 25 | # Net optimization params 26 | params.update_classifier = True 27 | params.net_opt_iter = 10 28 | params.net_opt_update_iter = 2 29 | params.net_opt_hn_iter = 1 30 | 31 | # Detection parameters 32 | params.window_output = False 33 | 34 | # Init augmentation parameters 35 | params.use_augmentation = False 36 | params.augmentation = {} 37 | 38 | params.augmentation_expansion_factor = 2 39 | params.random_shift_factor = 1/3 40 | 41 | # Advanced localization parameters 42 | params.advanced_localization = True 43 | params.target_not_found_threshold = 0.25 44 | params.distractor_threshold = 0.8 45 | params.hard_negative_threshold = 0.5 46 | params.target_neighborhood_scale = 2.2 47 | params.dispalcement_scale = 0.8 48 | params.hard_negative_learning_rate = 0.02 49 | params.update_scale_when_uncertain = True 50 | params.conf_ths = 0.9 51 | params.search_area_rescaling_at_occlusion = True 52 | 53 | params.net = NetWithBackbone(net_path='tomp50.pth.tar', use_gpu=params.use_gpu) 54 | 55 | params.vot_anno_conversion_type = 'preserve_area' 56 | 57 | params.use_gt_box = True 58 | params.plot_iou = True 59 | 60 | return params 61 | -------------------------------------------------------------------------------- /ltr/run_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import importlib 5 | import multiprocessing 6 | import cv2 as cv 7 | import torch.backends.cudnn 8 | 9 | env_path = os.path.join(os.path.dirname(__file__), '..') 10 | if env_path not in sys.path: 11 | sys.path.append(env_path) 12 | 13 | import ltr.admin.settings as ws_settings 14 | 15 | 16 | def run_training(train_module, train_name, cudnn_benchmark=True): 17 | """Run a train scripts in train_settings. 18 | args: 19 | train_module: Name of module in the "train_settings/" folder. 20 | train_name: Name of the train settings file. 21 | cudnn_benchmark: Use cudnn benchmark or not (default is True). 22 | """ 23 | 24 | # This is needed to avoid strange crashes related to opencv 25 | cv.setNumThreads(0) 26 | 27 | torch.backends.cudnn.benchmark = cudnn_benchmark 28 | 29 | print('Training: {} {}'.format(train_module, train_name)) 30 | 31 | settings = ws_settings.Settings() 32 | settings.module_name = train_module 33 | settings.script_name = train_name 34 | settings.project_path = 'ltr/{}/{}'.format(train_module, train_name) 35 | 36 | expr_module = importlib.import_module('ltr.train_settings.{}.{}'.format(train_module, train_name)) 37 | expr_func = getattr(expr_module, 'run') 38 | 39 | expr_func(settings) 40 | 41 | 42 | def main(): 43 | parser = argparse.ArgumentParser(description='Run a train scripts in train_settings.') 44 | parser.add_argument('train_module', type=str, help='Name of module in the "train_settings/" folder.') 45 | parser.add_argument('train_name', type=str, help='Name of the train settings file.') 46 | parser.add_argument('--cudnn_benchmark', type=bool, default=True, help='Set cudnn benchmark on (1) or off (0) (default is on).') 47 | 48 | args = parser.parse_args() 49 | 50 | run_training(args.train_module, args.train_name, args.cudnn_benchmark) 51 | 52 | 53 | if __name__ == '__main__': 54 | multiprocessing.set_start_method('spawn', force=True) 55 | main() 56 | -------------------------------------------------------------------------------- /ltr/models/loss/bbr_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class GIoULoss(nn.Module): 6 | def __init__(self): 7 | super().__init__() 8 | 9 | def forward(self, pred, target, weights=None): 10 | if pred.dim() == 4: 11 | pred = pred.unsqueeze(0) 12 | 13 | pred = pred.permute(0, 1, 3, 4, 2).reshape(-1, 4) # nf x ns x x 4 x h x w 14 | target = target.permute(0, 1, 3, 4, 2).reshape(-1, 4) #nf x ns x 4 x h x w 15 | 16 | pred_left = pred[:, 0] 17 | pred_top = pred[:, 1] 18 | pred_right = pred[:, 2] 19 | pred_bottom = pred[:, 3] 20 | 21 | target_left = target[:, 0] 22 | target_top = target[:, 1] 23 | target_right = target[:, 2] 24 | target_bottom = target[:, 3] 25 | 26 | target_area = (target_left + target_right) * \ 27 | (target_top + target_bottom) 28 | pred_area = (pred_left + pred_right) * \ 29 | (pred_top + pred_bottom) 30 | 31 | w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right) 32 | g_w_intersect = torch.max(pred_left, target_left) + torch.max( 33 | pred_right, target_right) 34 | h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top) 35 | g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top) 36 | ac_union = g_w_intersect * g_h_intersect + 1e-7 37 | area_intersect = w_intersect * h_intersect 38 | area_union = target_area + pred_area - area_intersect + 1e-7 39 | ious = (area_intersect) / (area_union) 40 | gious = ious - (ac_union - area_union) / ac_union 41 | 42 | losses = 1 - gious 43 | 44 | if weights is not None and weights.sum() > 0: 45 | weights = weights.reshape(-1) # nf x ns x 1 x h x w 46 | loss_mean = losses[weights>0].mean() 47 | ious = ious[weights>0] 48 | else: 49 | loss_mean = losses.mean() 50 | 51 | return loss_mean, ious 52 | -------------------------------------------------------------------------------- /ltr/models/layers/activation.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | def softmax_reg(x: torch.Tensor, dim, reg=None): 8 | """Softmax with optional denominator regularization.""" 9 | if reg is None: 10 | return torch.softmax(x, dim=dim) 11 | dim %= x.dim() 12 | if isinstance(reg, (float, int)): 13 | reg = x.new_tensor([reg]) 14 | reg = reg.expand([1 if d==dim else x.shape[d] for d in range(x.dim())]) 15 | x = torch.cat((x, reg), dim=dim) 16 | return torch.softmax(x, dim=dim)[[slice(-1) if d==dim else slice(None) for d in range(x.dim())]] 17 | 18 | 19 | 20 | class MLU(nn.Module): 21 | r"""MLU activation 22 | """ 23 | def __init__(self, min_val, inplace=False): 24 | super().__init__() 25 | self.min_val = min_val 26 | self.inplace = inplace 27 | 28 | def forward(self, input): 29 | return F.elu(F.leaky_relu(input, 1/self.min_val, inplace=self.inplace), self.min_val, inplace=self.inplace) 30 | 31 | 32 | class LeakyReluPar(nn.Module): 33 | r"""LeakyRelu parametric activation 34 | """ 35 | 36 | def forward(self, x, a): 37 | return (1.0 - a)/2.0 * torch.abs(x) + (1.0 + a)/2.0 * x 38 | 39 | class LeakyReluParDeriv(nn.Module): 40 | r"""Derivative of the LeakyRelu parametric activation, wrt x. 41 | """ 42 | 43 | def forward(self, x, a): 44 | return (1.0 - a)/2.0 * torch.sign(x.detach()) + (1.0 + a)/2.0 45 | 46 | 47 | class BentIdentPar(nn.Module): 48 | r"""BentIdent parametric activation 49 | """ 50 | def __init__(self, b=1.0): 51 | super().__init__() 52 | self.b = b 53 | 54 | def forward(self, x, a): 55 | return (1.0 - a)/2.0 * (torch.sqrt(x*x + 4.0*self.b*self.b) - 2.0*self.b) + (1.0 + a)/2.0 * x 56 | 57 | 58 | class BentIdentParDeriv(nn.Module): 59 | r"""BentIdent parametric activation deriv 60 | """ 61 | def __init__(self, b=1.0): 62 | super().__init__() 63 | self.b = b 64 | 65 | def forward(self, x, a): 66 | return (1.0 - a)/2.0 * (x / torch.sqrt(x*x + 4.0*self.b*self.b)) + (1.0 + a)/2.0 67 | 68 | -------------------------------------------------------------------------------- /ltr/admin/model_constructor.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | import importlib 3 | 4 | 5 | def model_constructor(f): 6 | """ Wraps the function 'f' which returns the network. An extra field 'constructor' is added to the network returned 7 | by 'f'. This field contains an instance of the 'NetConstructor' class, which contains the information needed to 8 | re-construct the network, such as the name of the function 'f', the function arguments etc. Thus, the network can 9 | be easily constructed from a saved checkpoint by calling NetConstructor.get() function. 10 | """ 11 | @wraps(f) 12 | def f_wrapper(*args, **kwds): 13 | net_constr = NetConstructor(f.__name__, f.__module__, args, kwds) 14 | output = f(*args, **kwds) 15 | if isinstance(output, (tuple, list)): 16 | # Assume first argument is the network 17 | output[0].constructor = net_constr 18 | else: 19 | output.constructor = net_constr 20 | return output 21 | return f_wrapper 22 | 23 | 24 | class NetConstructor: 25 | """ Class to construct networks. Takes as input the function name (e.g. atom_resnet18), the name of the module 26 | which contains the network function (e.g. ltr.models.bbreg.atom) and the arguments for the network 27 | function. The class object can then be stored along with the network weights to re-construct the network.""" 28 | def __init__(self, fun_name, fun_module, args, kwds): 29 | """ 30 | args: 31 | fun_name - The function which returns the network 32 | fun_module - the module which contains the network function 33 | args - arguments which are passed to the network function 34 | kwds - arguments which are passed to the network function 35 | """ 36 | self.fun_name = fun_name 37 | self.fun_module = fun_module 38 | self.args = args 39 | self.kwds = kwds 40 | 41 | def get(self): 42 | """ Rebuild the network by calling the network function with the correct arguments. """ 43 | net_module = importlib.import_module(self.fun_module) 44 | net_fun = getattr(net_module, self.fun_name) 45 | return net_fun(*self.args, **self.kwds) 46 | -------------------------------------------------------------------------------- /ltr/admin/environment.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | from collections import OrderedDict 4 | 5 | 6 | def create_default_local_file(): 7 | path = os.path.join(os.path.dirname(__file__), 'local.py') 8 | 9 | empty_str = '\'\'' 10 | default_settings = OrderedDict({ 11 | 'workspace_dir': empty_str, 12 | 'tensorboard_dir': 'self.workspace_dir + \'/tensorboard/\'', 13 | 'pretrained_networks': 'self.workspace_dir + \'/pretrained_networks/\'', 14 | 'pregenerated_masks': empty_str, 15 | 'lasot_dir': empty_str, 16 | 'got10k_dir': empty_str, 17 | 'trackingnet_dir': empty_str, 18 | 'coco_dir': empty_str, 19 | 'lvis_dir': empty_str, 20 | 'sbd_dir': empty_str, 21 | 'imagenet_dir': empty_str, 22 | 'imagenetdet_dir': empty_str, 23 | 'ecssd_dir': empty_str, 24 | 'hkuis_dir': empty_str, 25 | 'msra10k_dir': empty_str, 26 | 'davis_dir': empty_str, 27 | 'youtubevos_dir': empty_str, 28 | 'lasot_candidate_matching_dataset_path': empty_str}) 29 | 30 | comment = {'workspace_dir': 'Base directory for saving network checkpoints.', 31 | 'tensorboard_dir': 'Directory for tensorboard files.'} 32 | 33 | with open(path, 'w') as f: 34 | f.write('class EnvironmentSettings:\n') 35 | f.write(' def __init__(self):\n') 36 | 37 | for attr, attr_val in default_settings.items(): 38 | comment_str = None 39 | if attr in comment: 40 | comment_str = comment[attr] 41 | if comment_str is None: 42 | f.write(' self.{} = {}\n'.format(attr, attr_val)) 43 | else: 44 | f.write(' self.{} = {} # {}\n'.format(attr, attr_val, comment_str)) 45 | 46 | 47 | def env_settings(): 48 | env_module_name = 'ltr.admin.local' 49 | try: 50 | env_module = importlib.import_module(env_module_name) 51 | return env_module.EnvironmentSettings() 52 | except: 53 | env_file = os.path.join(os.path.dirname(__file__), 'local.py') 54 | 55 | create_default_local_file() 56 | raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. Then try to run again.'.format(env_file)) 57 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/dimp18.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 18*16 13 | params.search_area_scale = 5 14 | 15 | # Learning parameters 16 | params.sample_memory_size = 50 17 | params.learning_rate = 0.01 18 | params.init_samples_minimum_weight = 0.25 19 | params.train_skipping = 20 20 | 21 | # Net optimization params 22 | params.update_classifier = True 23 | params.net_opt_iter = 10 24 | params.net_opt_update_iter = 2 25 | params.net_opt_hn_iter = 1 26 | 27 | # Detection parameters 28 | params.window_output = False 29 | 30 | # Init augmentation parameters 31 | params.use_augmentation = True 32 | params.augmentation = {'fliplr': True, 33 | 'rotate': [10, -10, 45, -45], 34 | 'blur': [(3,1), (1, 3), (2, 2)], 35 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 36 | 'dropout': (2, 0.2)} 37 | 38 | params.augmentation_expansion_factor = 2 39 | params.random_shift_factor = 1/3 40 | 41 | # Advanced localization parameters 42 | params.advanced_localization = True 43 | params.target_not_found_threshold = 0.25 44 | params.distractor_threshold = 0.8 45 | params.hard_negative_threshold = 0.5 46 | params.target_neighborhood_scale = 2.2 47 | params.dispalcement_scale = 0.8 48 | params.hard_negative_learning_rate = 0.02 49 | params.update_scale_when_uncertain = True 50 | 51 | # IoUnet parameters 52 | params.iounet_augmentation = False 53 | params.iounet_use_log_scale = True 54 | params.iounet_k = 3 55 | params.num_init_random_boxes = 9 56 | params.box_jitter_pos = 0.1 57 | params.box_jitter_sz = 0.5 58 | params.maximal_aspect_ratio = 6 59 | params.box_refinement_iter = 5 60 | params.box_refinement_step_length = 1 61 | params.box_refinement_step_decay = 1 62 | 63 | params.net = NetWithBackbone(net_path='dimp18.pth', 64 | use_gpu=params.use_gpu) 65 | 66 | params.vot_anno_conversion_type = 'preserve_area' 67 | 68 | return params 69 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/dimp50.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 18*16 13 | params.search_area_scale = 5 14 | 15 | # Learning parameters 16 | params.sample_memory_size = 50 17 | params.learning_rate = 0.01 18 | params.init_samples_minimum_weight = 0.25 19 | params.train_skipping = 20 20 | 21 | # Net optimization params 22 | params.update_classifier = True 23 | params.net_opt_iter = 10 24 | params.net_opt_update_iter = 2 25 | params.net_opt_hn_iter = 1 26 | 27 | # Detection parameters 28 | params.window_output = False 29 | 30 | # Init augmentation parameters 31 | params.use_augmentation = True 32 | params.augmentation = {'fliplr': True, 33 | 'rotate': [10, -10, 45, -45], 34 | 'blur': [(3,1), (1, 3), (2, 2)], 35 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 36 | 'dropout': (2, 0.2)} 37 | 38 | params.augmentation_expansion_factor = 2 39 | params.random_shift_factor = 1/3 40 | 41 | # Advanced localization parameters 42 | params.advanced_localization = True 43 | params.target_not_found_threshold = 0.25 44 | params.distractor_threshold = 0.8 45 | params.hard_negative_threshold = 0.5 46 | params.target_neighborhood_scale = 2.2 47 | params.dispalcement_scale = 0.8 48 | params.hard_negative_learning_rate = 0.02 49 | params.update_scale_when_uncertain = True 50 | 51 | # IoUnet parameters 52 | params.iounet_augmentation = False 53 | params.iounet_use_log_scale = True 54 | params.iounet_k = 3 55 | params.num_init_random_boxes = 9 56 | params.box_jitter_pos = 0.1 57 | params.box_jitter_sz = 0.5 58 | params.maximal_aspect_ratio = 6 59 | params.box_refinement_iter = 5 60 | params.box_refinement_step_length = 1 61 | params.box_refinement_step_decay = 1 62 | 63 | params.net = NetWithBackbone(net_path='dimp50.pth', 64 | use_gpu=params.use_gpu) 65 | 66 | params.vot_anno_conversion_type = 'preserve_area' 67 | 68 | return params 69 | -------------------------------------------------------------------------------- /ltr/actors/bbreg.py: -------------------------------------------------------------------------------- 1 | from . import BaseActor 2 | 3 | 4 | class AtomActor(BaseActor): 5 | """ Actor for training the IoU-Net in ATOM""" 6 | def __call__(self, data): 7 | """ 8 | args: 9 | data - The input data, should contain the fields 'train_images', 'test_images', 'train_anno', 10 | 'test_proposals' and 'proposal_iou'. 11 | 12 | returns: 13 | loss - the training loss 14 | states - dict containing detailed losses 15 | """ 16 | # Run network to obtain IoU prediction for each proposal in 'test_proposals' 17 | iou_pred = self.net(data['train_images'], data['test_images'], data['train_anno'], data['test_proposals']) 18 | 19 | iou_pred = iou_pred.view(-1, iou_pred.shape[2]) 20 | iou_gt = data['proposal_iou'].view(-1, data['proposal_iou'].shape[2]) 21 | 22 | # Compute loss 23 | loss = self.objective(iou_pred, iou_gt) 24 | 25 | # Return training stats 26 | stats = {'Loss/total': loss.item(), 27 | 'Loss/iou': loss.item()} 28 | 29 | return loss, stats 30 | 31 | 32 | class AtomBBKLActor(BaseActor): 33 | """ Actor for training the IoU-Net in ATOM with BBKL""" 34 | def __call__(self, data): 35 | """ 36 | args: 37 | data - The input data, should contain the fields 'train_images', 'test_images', 'train_anno', 38 | 'test_proposals', 'proposal_density', and 'gt_density'. 39 | 40 | returns: 41 | loss - the training loss 42 | states - dict containing detailed losses 43 | """ 44 | # Run network to obtain IoU prediction for each proposal in 'test_proposals' 45 | bb_scores = self.net(data['train_images'], data['test_images'], data['train_anno'], data['test_proposals']) 46 | 47 | bb_scores = bb_scores.view(-1, bb_scores.shape[2]) 48 | proposal_density = data['proposal_density'].view(-1, data['proposal_density'].shape[2]) 49 | gt_density = data['gt_density'].view(-1, data['gt_density'].shape[2]) 50 | 51 | # Compute loss 52 | loss = self.objective(bb_scores, sample_density=proposal_density, gt_density=gt_density, mc_dim=1) 53 | 54 | # Return training stats 55 | stats = {'Loss/total': loss.item(), 56 | 'Loss/bb_ce': loss.item()} 57 | 58 | return loss, stats 59 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/dimp50_vot19.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 16 * 16 13 | params.search_area_scale = 4.5 14 | 15 | # Learning parameters 16 | params.sample_memory_size = 100 17 | params.learning_rate = 0.0075 18 | params.init_samples_minimum_weight = 0.0 19 | params.train_skipping = 10 20 | 21 | # Net optimization params 22 | params.update_classifier = True 23 | params.net_opt_iter = 15 24 | params.net_opt_update_iter = 2 25 | params.net_opt_hn_iter = 2 26 | 27 | # Detection parameters 28 | params.window_output = True 29 | 30 | # Init augmentation parameters 31 | params.use_augmentation = True 32 | params.augmentation = {'fliplr': True, 33 | 'rotate': [-5, 10, -30, 60], 34 | 'blur': [(2, 0.2), (1, 3)], 35 | 'relativeshift': [(0.6, 0.6), (-0.6, -0.6)], 36 | 'dropout': (3, 0.2)} 37 | 38 | params.augmentation_expansion_factor = 1.4 39 | params.random_shift_factor = 1/3 40 | 41 | # Advanced localization parameters 42 | params.advanced_localization = True 43 | params.target_not_found_threshold = 0.0 44 | params.distractor_threshold = 100 45 | params.hard_negative_threshold = 0.45 46 | params.target_neighborhood_scale = 2.2 47 | params.dispalcement_scale = 0.7 48 | 49 | params.perform_hn_without_windowing = True 50 | 51 | params.hard_negative_learning_rate = 0.02 52 | params.update_scale_when_uncertain = True 53 | 54 | # IoUnet parameters 55 | params.iounet_augmentation = False 56 | params.iounet_use_log_scale = True 57 | params.iounet_k = 3 58 | params.num_init_random_boxes = 9 59 | params.box_jitter_pos = 0.1 60 | params.box_jitter_sz = 0.5 61 | params.maximal_aspect_ratio = 6 62 | params.box_refinement_iter = 3 63 | params.box_refinement_step_length = 1 64 | params.box_refinement_step_decay = 1 65 | 66 | params.net = NetWithBackbone(net_path='dimp50.pth', 67 | use_gpu=params.use_gpu) 68 | 69 | params.vot_anno_conversion_type = 'preserve_area' 70 | 71 | return params 72 | -------------------------------------------------------------------------------- /ltr/dataset/got10kvos.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | import numpy as np 4 | import torch 5 | from PIL import Image 6 | from ltr.dataset.got10k import Got10k 7 | from ltr.data.image_loader import jpeg4py_loader, imread_indexed 8 | 9 | 10 | class Got10kVOS(Got10k): 11 | """ Got10K video object segmentation dataset. 12 | """ 13 | 14 | def __init__(self, anno_path=None, split='train'): 15 | super().__init__(split=split) 16 | self.anno_path = anno_path 17 | 18 | # TODO this prevents a crash, because that particular sequence does not have masks. 19 | # Once the missing mask is added, the following code can be removed (handled in base) 20 | self.sequence_list = [i for i in self.sequence_list if i not in ['GOT-10k_Train_004419']] 21 | 22 | self.sequence_meta_info = self._load_meta_info() 23 | self.seq_per_class = self._build_seq_per_class() 24 | 25 | self.class_list = list(self.seq_per_class.keys()) 26 | self.class_list.sort() 27 | 28 | @staticmethod 29 | def _load_anno(path): 30 | if not path.exists(): 31 | print('path', path, flush=True) 32 | return None 33 | im = np.array(Image.open(path)) 34 | im = np.atleast_3d(im)[..., 0] 35 | return im 36 | 37 | def _get_anno_sequence_path(self, seq_id): 38 | return os.path.join(self.anno_path, self.sequence_list[seq_id]) 39 | 40 | def _get_anno_frame_path(self, seq_path, frame_id): 41 | return os.path.join(seq_path, '{:08}.png'.format(frame_id + 1)) # frames start from 1 42 | 43 | def get_frames(self, seq_id, frame_ids, anno=None): 44 | seq_path = self._get_sequence_path(seq_id) 45 | obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]] 46 | 47 | frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids] 48 | 49 | if anno is None: 50 | anno = self.get_sequence_info(seq_id) 51 | 52 | anno_frames = {} 53 | for key, value in anno.items(): 54 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 55 | 56 | anno_seq_path = self._get_anno_sequence_path(seq_id) 57 | 58 | labels = [self._load_anno(Path(self._get_anno_frame_path(anno_seq_path, f))) for f in frame_ids] 59 | labels = [torch.Tensor(lb) for lb in labels] 60 | anno_frames['mask'] = labels 61 | 62 | return frame_list, anno_frames, obj_meta 63 | -------------------------------------------------------------------------------- /ltr/models/kys/conv_gru.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from ltr.models.layers.blocks import conv_block 4 | 5 | 6 | class ConvGRUCell(nn.Module): 7 | def __init__(self, input_dim, hidden_dim, kernel_size, padding_mode='zeros'): 8 | " Referenced from https://github.com/happyjin/ConvGRU-pytorch" 9 | super(ConvGRUCell, self).__init__() 10 | self.hidden_dim = hidden_dim 11 | 12 | if padding_mode == 'zeros': 13 | if not isinstance(kernel_size, (list, tuple)): 14 | kernel_size = (kernel_size, kernel_size) 15 | 16 | padding = kernel_size[0] // 2, kernel_size[1] // 2 17 | self.conv_reset = nn.Conv2d(input_dim + hidden_dim, self.hidden_dim, kernel_size, padding=padding) 18 | self.conv_update = nn.Conv2d(input_dim + hidden_dim, self.hidden_dim, kernel_size, padding=padding) 19 | 20 | self.conv_state_new = nn.Conv2d(input_dim+hidden_dim, self.hidden_dim, kernel_size, padding=padding) 21 | else: 22 | self.conv_reset = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1, 23 | padding=int(kernel_size // 2), batch_norm=False, relu=False, 24 | padding_mode=padding_mode) 25 | 26 | self.conv_update = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1, 27 | padding=int(kernel_size // 2), batch_norm=False, relu=False, 28 | padding_mode=padding_mode) 29 | 30 | self.conv_state_new = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1, 31 | padding=int(kernel_size // 2), batch_norm=False, relu=False, 32 | padding_mode=padding_mode) 33 | 34 | def forward(self, input, state_cur): 35 | input_state_cur = torch.cat([input, state_cur], dim=1) 36 | 37 | reset_gate = torch.sigmoid(self.conv_reset(input_state_cur)) 38 | update_gate = torch.sigmoid(self.conv_update(input_state_cur)) 39 | 40 | input_state_cur_reset = torch.cat([input, reset_gate*state_cur], dim=1) 41 | state_new = torch.tanh(self.conv_state_new(input_state_cur_reset)) 42 | 43 | state_next = (1.0 - update_gate) * state_cur + update_gate * state_new 44 | return state_next 45 | -------------------------------------------------------------------------------- /ltr/models/loss/target_candidate_matching_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | def recall(m, gt_m): 6 | mask = (gt_m > -1).float() 7 | return ((m == gt_m) * mask).sum(1) / mask.sum(1) 8 | 9 | 10 | def precision(m, gt_m): 11 | mask = ((m > -1) & (gt_m >= -1)).float() 12 | prec = ((m == gt_m) * mask).sum(1) / torch.max(mask.sum(1), torch.ones_like(mask.sum(1))) 13 | no_match_mask = (gt_m > -1).sum(1) == 0 14 | prec[no_match_mask] = float('NaN') 15 | return prec 16 | 17 | 18 | class TargetCandidateMatchingLoss(nn.Module): 19 | def __init__(self, nll_balancing=0.5, nll_weight=1.): 20 | super().__init__() 21 | self.nll_balancing = nll_balancing 22 | self.nll_weight = nll_weight 23 | 24 | 25 | def metrics(self, matches1, gt_matches1, **kwargs): 26 | rec = recall(matches1, gt_matches1[0]) 27 | prec = precision(matches1, gt_matches1[0]) 28 | return {'match_recall': rec, 'match_precision': prec} 29 | 30 | def forward(self, gt_assignment, gt_matches0, gt_matches1, log_assignment, bin_score, **kwargs): 31 | gt_assignment = gt_assignment[0] 32 | gt_matches0 = gt_matches0[0] 33 | gt_matches1 = gt_matches1[0] 34 | 35 | losses = {'total': 0} 36 | 37 | positive = gt_assignment.float() 38 | neg0 = (gt_matches0 == -1).float() 39 | neg1 = (gt_matches1 == -1).float() 40 | 41 | num_pos = torch.max(positive.sum((1, 2)), positive.new_tensor(1)) 42 | num_neg = torch.max(neg0.sum(1) + neg1.sum(1), neg0.new_tensor(1)) 43 | 44 | nll_pos = -(log_assignment[:, :-1, :-1] * positive).sum((1, 2)) 45 | 46 | nll_pos /= num_pos 47 | nll_neg0 = -(log_assignment[:, :-1, -1] * neg0).sum(1) 48 | nll_neg1 = -(log_assignment[:, -1, :-1] * neg1).sum(1) 49 | nll_neg = (nll_neg0 + nll_neg1) / num_neg 50 | 51 | nll = (self.nll_balancing * nll_pos + (1 - self.nll_balancing) * nll_neg) 52 | 53 | losses['assignment_nll'] = nll 54 | 55 | if self.nll_weight > 0: 56 | losses['total'] = nll * self.nll_weight 57 | 58 | # Some statistics 59 | losses['nll_pos'] = nll_pos 60 | losses['nll_neg'] = nll_neg 61 | losses['num_matchable'] = num_pos 62 | losses['num_unmatchable'] = num_neg 63 | losses['sinkhorn_norm'] = log_assignment.exp()[:, :-1].sum(2).mean(1) 64 | losses['bin_score'] = bin_score[None] 65 | 66 | return losses 67 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/dimp50_vot18.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 14 * 16 13 | params.search_area_scale = 4 14 | 15 | # Learning parameters 16 | params.sample_memory_size = 250 17 | params.learning_rate = 0.0075 18 | params.init_samples_minimum_weight = 0.0 19 | params.train_skipping = 10 20 | 21 | # Net optimization params 22 | params.update_classifier = True 23 | params.net_opt_iter = 25 24 | params.net_opt_update_iter = 3 25 | params.net_opt_hn_iter = 3 26 | 27 | # Detection parameters 28 | params.window_output = True 29 | 30 | # Init augmentation parameters 31 | params.use_augmentation = True 32 | params.augmentation = {'fliplr': True, 33 | 'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60], 34 | 'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)], 35 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)], 36 | 'dropout': (7, 0.2)} 37 | 38 | params.augmentation_expansion_factor = 2 39 | params.random_shift_factor = 1/3 40 | 41 | # Advanced localization parameters 42 | params.advanced_localization = True 43 | params.target_not_found_threshold = 0.0 44 | params.distractor_threshold = 100 45 | params.hard_negative_threshold = 0.45 46 | params.target_neighborhood_scale = 2.2 47 | params.dispalcement_scale = 0.7 48 | 49 | params.perform_hn_without_windowing = True 50 | 51 | params.hard_negative_learning_rate = 0.02 52 | params.update_scale_when_uncertain = True 53 | 54 | # IoUnet parameters 55 | params.iounet_augmentation = False 56 | params.iounet_use_log_scale = True 57 | params.iounet_k = 3 58 | params.num_init_random_boxes = 9 59 | params.box_jitter_pos = 0.1 60 | params.box_jitter_sz = 0.5 61 | params.maximal_aspect_ratio = 6 62 | params.box_refinement_iter = 5 63 | params.box_refinement_step_length = 1 64 | params.box_refinement_step_decay = 1 65 | 66 | params.net = NetWithBackbone(net_path='dimp50.pth', 67 | use_gpu=params.use_gpu) 68 | 69 | params.vot_anno_conversion_type = 'preserve_area' 70 | 71 | return params 72 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/dimp18_vot18.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 14 * 16 13 | params.search_area_scale = 4 14 | params.feature_size_odd = False 15 | 16 | # Learning parameters 17 | params.sample_memory_size = 250 18 | params.learning_rate = 0.0075 19 | params.init_samples_minimum_weight = 0.0 20 | params.train_skipping = 10 21 | 22 | # Net optimization params 23 | params.update_classifier = True 24 | params.net_opt_iter = 25 25 | params.net_opt_update_iter = 3 26 | params.net_opt_hn_iter = 3 27 | 28 | # Detection parameters 29 | params.window_output = True 30 | 31 | # Init augmentation parameters 32 | params.use_augmentation = True 33 | params.augmentation = {'fliplr': True, 34 | 'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60], 35 | 'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)], 36 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)], 37 | 'dropout': (7, 0.2)} 38 | 39 | params.augmentation_expansion_factor = 2 40 | params.random_shift_factor = 1/3 41 | 42 | # Advanced localization parameters 43 | params.advanced_localization = True 44 | params.target_not_found_threshold = 0.0 45 | params.distractor_threshold = 100 46 | params.hard_negative_threshold = 0.45 47 | params.target_neighborhood_scale = 2.2 48 | params.dispalcement_scale = 0.7 49 | 50 | params.perform_hn_without_windowing = True 51 | 52 | params.hard_negative_learning_rate = 0.02 53 | params.update_scale_when_uncertain = True 54 | 55 | # IoUnet parameters 56 | params.iounet_augmentation = False 57 | params.iounet_use_log_scale = True 58 | params.iounet_k = 3 59 | params.num_init_random_boxes = 9 60 | params.box_jitter_pos = 0.1 61 | params.box_jitter_sz = 0.5 62 | params.maximal_aspect_ratio = 6 63 | params.box_refinement_iter = 5 64 | params.box_refinement_step_length = 1 65 | params.box_refinement_step_decay = 1 66 | 67 | params.net = NetWithBackbone(net_path='dimp18.pth', 68 | use_gpu=params.use_gpu) 69 | 70 | params.vot_anno_conversion_type = 'preserve_area' 71 | 72 | return params 73 | -------------------------------------------------------------------------------- /pytracking/parameter/kys/default.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | 5 | def parameters(): 6 | params = TrackerParams() 7 | 8 | params.debug = 0 9 | params.visualization = False 10 | 11 | params.use_gpu = True 12 | 13 | params.image_sample_size = 18*16 14 | params.search_area_scale = 5 15 | 16 | # Learning parameters 17 | params.sample_memory_size = 50 18 | params.learning_rate = 0.01 19 | params.init_samples_minimum_weight = 0.25 20 | params.train_skipping = 20 21 | params.output_sigma_factor = 1/4 22 | 23 | # Net optimization params 24 | params.update_classifier = True 25 | params.net_opt_iter = 10 26 | params.net_opt_update_iter = 2 27 | 28 | # Init augmentation parameters 29 | params.use_augmentation = True 30 | params.augmentation = {'fliplr': True, 31 | 'rotate': [10, -10, 45, -45], 32 | 'blur': [(3,1), (1, 3), (2, 2)], 33 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 34 | 'dropout': (2, 0.2)} 35 | 36 | params.augmentation_expansion_factor = 2 37 | params.random_shift_factor = 1/3 38 | 39 | # Localization parameters 40 | params.window_output = True 41 | params.use_clipped_window = True 42 | params.effective_search_area = 10.0 43 | params.apply_window_to_dimp_score = True 44 | 45 | params.dimp_threshold = 0.05 46 | params.target_not_found_threshold_fused = 0.05 47 | 48 | params.reset_state_during_occlusion = False 49 | params.prev_feat_remove_subpixel_shift = True 50 | params.move_feat_to_center = True 51 | 52 | params.update_scale_when_uncertain = True 53 | 54 | # IoUnet parameters 55 | params.use_iou_net = True 56 | params.iounet_augmentation = False 57 | params.iounet_use_log_scale = True 58 | params.iounet_k = 3 59 | params.num_init_random_boxes = 9 60 | params.box_jitter_pos = 0.1 61 | params.box_jitter_sz = 0.5 62 | params.maximal_aspect_ratio = 6 63 | params.box_refinement_iter = 5 64 | params.box_refinement_step_length = 1 65 | params.box_refinement_step_decay = 1 66 | 67 | params.remove_offset_in_fused_score = True 68 | params.score_downsample_factor = 1 69 | 70 | params.net = NetWithBackbone(net_path='kys.pth', 71 | use_gpu=params.use_gpu) 72 | 73 | params.vot_anno_conversion_type = 'preserve_area' 74 | return params 75 | -------------------------------------------------------------------------------- /ltr/models/transformer/position_encoding.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class NerfPositionalEncoding(nn.Module): 7 | def __init__(self, depth=10, sine_type='lin_sine', avoid_aliasing=False, max_spatial_resolution=None): 8 | ''' 9 | out_dim = in_dim * depth * 2 10 | ''' 11 | super().__init__() 12 | if sine_type == 'lin_sine': 13 | self.bases = [i+1 for i in range(depth)] 14 | elif sine_type == 'exp_sine': 15 | self.bases = [2**i for i in range(depth)] 16 | print(f'using {sine_type} as positional encoding') 17 | 18 | if avoid_aliasing and max_spatial_resolution == None: 19 | raise ValueError('Please specify the maxima spatial resolution (h, w) of the feature map') 20 | elif avoid_aliasing: 21 | self.factor = max_spatial_resolution/depth 22 | else: 23 | self.factor = 1. 24 | 25 | @torch.no_grad() 26 | def forward(self, inputs): 27 | out = torch.cat([torch.sin(i * self.factor * math.pi * inputs) for i in self.bases] + 28 | [torch.cos(i * self.factor * math.pi * inputs) for i in self.bases], axis=-1) 29 | assert torch.isnan(out).any() == False 30 | return out 31 | 32 | 33 | class PositionEmbeddingSine(nn.Module): 34 | """ 35 | This is a more standard version of the position embedding, very similar to the one 36 | used by the Attention is all you need paper, generalized to work on images. 37 | """ 38 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None, sine_type='lin_sine', 39 | avoid_aliazing=False, max_spatial_resolution=None): 40 | super().__init__() 41 | self.num_pos_feats = num_pos_feats 42 | self.temperature = temperature 43 | self.normalize = normalize 44 | if not isinstance(max_spatial_resolution, (list, tuple)): 45 | max_spatial_resolution = (max_spatial_resolution, max_spatial_resolution) 46 | self.sine = NerfPositionalEncoding(num_pos_feats // 2, sine_type, avoid_aliazing, max(max_spatial_resolution)) 47 | 48 | @torch.no_grad() 49 | def forward(self, mask): 50 | assert mask is not None 51 | not_mask = ~mask 52 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 53 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 54 | eps = 1e-6 55 | y_embed = (y_embed-0.5) / (y_embed[:, -1:, :] + eps) 56 | x_embed = (x_embed-0.5) / (x_embed[:, :, -1:] + eps) 57 | pos = torch.stack([x_embed, y_embed], dim=-1) 58 | return self.sine(pos).permute(0, 3, 1, 2) 59 | -------------------------------------------------------------------------------- /pytracking/features/net_wrappers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from pytracking.utils.loading import load_network 3 | 4 | 5 | class NetWrapper: 6 | """Used for wrapping networks in pytracking. 7 | Network modules and functions can be accessed directly as if they were members of this class.""" 8 | _rec_iter=0 9 | def __init__(self, net_path, use_gpu=True, initialize=False, **kwargs): 10 | self.net_path = net_path 11 | self.use_gpu = use_gpu 12 | self.net = None 13 | self.net_kwargs = kwargs 14 | if initialize: 15 | self.initialize() 16 | 17 | def __getattr__(self, name): 18 | if self._rec_iter > 0: 19 | self._rec_iter = 0 20 | return None 21 | self._rec_iter += 1 22 | try: 23 | ret_val = getattr(self.net, name) 24 | except Exception as e: 25 | self._rec_iter = 0 26 | raise e 27 | self._rec_iter = 0 28 | return ret_val 29 | 30 | def load_network(self): 31 | self.net = load_network(self.net_path, **self.net_kwargs) 32 | if self.use_gpu: 33 | self.cuda() 34 | self.eval() 35 | 36 | def initialize(self): 37 | self.load_network() 38 | 39 | 40 | class NetWithBackbone(NetWrapper): 41 | """Wraps a network with a common backbone. 42 | Assumes the network have a 'extract_backbone_features(image)' function.""" 43 | 44 | def __init__(self, net_path, use_gpu=True, initialize=False, image_format='rgb', 45 | mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), **kwargs): 46 | super().__init__(net_path, use_gpu, initialize, **kwargs) 47 | 48 | self.image_format = image_format 49 | self._mean = torch.Tensor(mean).view(1, -1, 1, 1) 50 | self._std = torch.Tensor(std).view(1, -1, 1, 1) 51 | 52 | def initialize(self, image_format='rgb', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): 53 | super().initialize() 54 | 55 | def preprocess_image(self, im: torch.Tensor): 56 | """Normalize the image with the mean and standard deviation used by the network.""" 57 | 58 | if self.image_format in ['rgb', 'bgr']: 59 | im = im/255 60 | 61 | if self.image_format in ['bgr', 'bgr255']: 62 | im = im[:, [2, 1, 0], :, :] 63 | im -= self._mean 64 | im /= self._std 65 | 66 | if self.use_gpu: 67 | im = im.cuda() 68 | 69 | return im 70 | 71 | def extract_backbone(self, im: torch.Tensor): 72 | """Extract backbone features from the network. 73 | Expects a float tensor image with pixel range [0, 255].""" 74 | im = self.preprocess_image(im) 75 | return self.net.extract_backbone_features(im) 76 | -------------------------------------------------------------------------------- /ltr/models/loss/segmentation.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | import ltr.models.loss.lovasz_loss as lovasz_loss 6 | 7 | 8 | class LovaszSegLoss(nn.Module): 9 | def __init__(self, classes=[1,], per_image=True): 10 | super().__init__() 11 | 12 | self.classes = classes 13 | self.per_image=per_image 14 | 15 | def forward(self, input, target): 16 | return lovasz_loss.lovasz_softmax(probas=torch.sigmoid(input), labels=target, per_image=self.per_image, classes=self.classes) 17 | 18 | 19 | def one_hot(labels: torch.Tensor, 20 | num_classes: int, 21 | device = None, 22 | dtype = None, 23 | eps = 1e-6) -> torch.Tensor: 24 | r"""Converts an integer label x-D tensor to a one-hot (x+1)-D tensor. 25 | Args: 26 | labels (torch.Tensor) : tensor with labels of shape :math:`(N, *)`, 27 | where N is batch size. Each value is an integer 28 | representing correct classification. 29 | num_classes (int): number of classes in labels. 30 | device (Optional[torch.device]): the desired device of returned tensor. 31 | Default: if None, uses the current device for the default tensor type 32 | (see torch.set_default_tensor_type()). device will be the CPU for CPU 33 | tensor types and the current CUDA device for CUDA tensor types. 34 | dtype (Optional[torch.dtype]): the desired data type of returned 35 | tensor. Default: if None, infers data type from values. 36 | Returns: 37 | torch.Tensor: the labels in one hot tensor of shape :math:`(N, C, *)`, 38 | Examples:: 39 | #>>> labels = torch.LongTensor([[[0, 1], [2, 0]]]) 40 | #>>> kornia.losses.one_hot(labels, num_classes=3) 41 | tensor([[[[1., 0.], 42 | [0., 1.]], 43 | [[0., 1.], 44 | [0., 0.]], 45 | [[0., 0.], 46 | [1., 0.]]]] 47 | """ 48 | if not torch.is_tensor(labels): 49 | raise TypeError("Input labels type is not a torch.Tensor. Got {}" 50 | .format(type(labels))) 51 | if not labels.dtype == torch.int64: 52 | raise ValueError( 53 | "labels must be of the same dtype torch.int64. Got: {}" .format( 54 | labels.dtype)) 55 | if num_classes < 1: 56 | raise ValueError("The number of classes must be bigger than one." 57 | " Got: {}".format(num_classes)) 58 | shape = labels.shape 59 | one_hot = torch.zeros((shape[0], num_classes, shape[1], shape[2])).to(device) 60 | return one_hot.scatter_(1, labels.unsqueeze(1), 1.0) + eps 61 | -------------------------------------------------------------------------------- /ltr/models/loss/kl_regression.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | from torch.nn import functional as F 5 | 6 | 7 | class KLRegression(nn.Module): 8 | """KL-divergence loss for probabilistic regression. 9 | It is computed using Monte Carlo (MC) samples from an arbitrary distribution.""" 10 | 11 | def __init__(self, eps=0.0): 12 | super().__init__() 13 | self.eps = eps 14 | 15 | def forward(self, scores, sample_density, gt_density, mc_dim=-1): 16 | """Args: 17 | scores: predicted score values 18 | sample_density: probability density of the sample distribution 19 | gt_density: probability density of the ground truth distribution 20 | mc_dim: dimension of the MC samples""" 21 | 22 | exp_val = scores - torch.log(sample_density + self.eps) 23 | 24 | L = torch.logsumexp(exp_val, dim=mc_dim) - math.log(scores.shape[mc_dim]) - \ 25 | torch.mean(scores * (gt_density / (sample_density + self.eps)), dim=mc_dim) 26 | 27 | return L.mean() 28 | 29 | 30 | class MLRegression(nn.Module): 31 | """Maximum likelihood loss for probabilistic regression. 32 | It is computed using Monte Carlo (MC) samples from an arbitrary distribution.""" 33 | 34 | def __init__(self, eps=0.0): 35 | super().__init__() 36 | self.eps = eps 37 | 38 | def forward(self, scores, sample_density, gt_density=None, mc_dim=-1): 39 | """Args: 40 | scores: predicted score values. First sample must be ground-truth 41 | sample_density: probability density of the sample distribution 42 | gt_density: not used 43 | mc_dim: dimension of the MC samples. Only mc_dim=1 supported""" 44 | 45 | assert mc_dim == 1 46 | assert (sample_density[:,0,...] == -1).all() 47 | 48 | exp_val = scores[:, 1:, ...] - torch.log(sample_density[:, 1:, ...] + self.eps) 49 | 50 | L = torch.logsumexp(exp_val, dim=mc_dim) - math.log(scores.shape[mc_dim] - 1) - scores[:, 0, ...] 51 | loss = L.mean() 52 | return loss 53 | 54 | 55 | class KLRegressionGrid(nn.Module): 56 | """KL-divergence loss for probabilistic regression. 57 | It is computed using the grid integration strategy.""" 58 | 59 | def forward(self, scores, gt_density, grid_dim=-1, grid_scale=1.0): 60 | """Args: 61 | scores: predicted score values 62 | gt_density: probability density of the ground truth distribution 63 | grid_dim: dimension(s) of the grid 64 | grid_scale: area of one grid cell""" 65 | 66 | score_corr = grid_scale * torch.sum(scores * gt_density, dim=grid_dim) 67 | 68 | L = torch.logsumexp(scores, dim=grid_dim) + math.log(grid_scale) - score_corr 69 | 70 | return L.mean() 71 | -------------------------------------------------------------------------------- /ltr/dataset/base_image_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from ltr.data.image_loader import jpeg4py_loader 3 | 4 | 5 | class BaseImageDataset(torch.utils.data.Dataset): 6 | """ Base class for image datasets """ 7 | 8 | def __init__(self, name, root, image_loader=jpeg4py_loader): 9 | """ 10 | args: 11 | root - The root path to the dataset 12 | image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) 13 | is used by default. 14 | """ 15 | self.name = name 16 | self.root = root 17 | self.image_loader = image_loader 18 | 19 | self.image_list = [] # Contains the list of sequences. 20 | self.class_list = [] 21 | 22 | def __len__(self): 23 | """ Returns size of the dataset 24 | returns: 25 | int - number of samples in the dataset 26 | """ 27 | return self.get_num_images() 28 | 29 | def __getitem__(self, index): 30 | """ Not to be used! Check get_frames() instead. 31 | """ 32 | return None 33 | 34 | def get_name(self): 35 | """ Name of the dataset 36 | 37 | returns: 38 | string - Name of the dataset 39 | """ 40 | raise NotImplementedError 41 | 42 | def get_num_images(self): 43 | """ Number of sequences in a dataset 44 | 45 | returns: 46 | int - number of sequences in the dataset.""" 47 | return len(self.image_list) 48 | 49 | def has_class_info(self): 50 | return False 51 | 52 | def get_class_name(self, image_id): 53 | return None 54 | 55 | def get_num_classes(self): 56 | return len(self.class_list) 57 | 58 | def get_class_list(self): 59 | return self.class_list 60 | 61 | def get_images_in_class(self, class_name): 62 | raise NotImplementedError 63 | 64 | def has_segmentation_info(self): 65 | return False 66 | 67 | def get_image_info(self, seq_id): 68 | """ Returns information about a particular image, 69 | 70 | args: 71 | seq_id - index of the image 72 | 73 | returns: 74 | Dict 75 | """ 76 | raise NotImplementedError 77 | 78 | def get_image(self, image_id, anno=None): 79 | """ Get a image 80 | 81 | args: 82 | image_id - index of image 83 | anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded. 84 | 85 | returns: 86 | image - 87 | anno - 88 | dict - A dict containing meta information about the sequence, e.g. class of the target object. 89 | 90 | """ 91 | raise NotImplementedError 92 | 93 | -------------------------------------------------------------------------------- /pytracking/evaluation/environment.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | 4 | 5 | class EnvSettings: 6 | def __init__(self): 7 | pytracking_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 8 | 9 | self.results_path = '{}/tracking_results/'.format(pytracking_path) 10 | self.segmentation_path = '{}/segmentation_results/'.format(pytracking_path) 11 | self.network_path = '{}/networks/'.format(pytracking_path) 12 | self.result_plot_path = '{}/result_plots/'.format(pytracking_path) 13 | self.otb_path = '' 14 | self.nfs_path = '' 15 | self.uav_path = '' 16 | self.tpl_path = '' 17 | self.vot_path = '' 18 | self.got10k_path = '' 19 | self.lasot_path = '' 20 | self.lasot_extension_subset_path = '' 21 | self.trackingnet_path = '' 22 | self.oxuva_path = '' 23 | self.davis_dir = '' 24 | self.youtubevos_dir = '' 25 | 26 | self.got_packed_results_path = '' 27 | self.got_reports_path = '' 28 | self.tn_packed_results_path = '' 29 | 30 | 31 | def create_default_local_file(): 32 | comment = {'results_path': 'Where to store tracking results', 33 | 'network_path': 'Where tracking networks are stored.'} 34 | 35 | path = os.path.join(os.path.dirname(__file__), 'local.py') 36 | with open(path, 'w') as f: 37 | settings = EnvSettings() 38 | 39 | f.write('from pytracking.evaluation.environment import EnvSettings\n\n') 40 | f.write('def local_env_settings():\n') 41 | f.write(' settings = EnvSettings()\n\n') 42 | f.write(' # Set your local paths here.\n\n') 43 | 44 | for attr in dir(settings): 45 | comment_str = None 46 | if attr in comment: 47 | comment_str = comment[attr] 48 | attr_val = getattr(settings, attr) 49 | if not attr.startswith('__') and not callable(attr_val): 50 | if comment_str is None: 51 | f.write(' settings.{} = \'{}\'\n'.format(attr, attr_val)) 52 | else: 53 | f.write(' settings.{} = \'{}\' # {}\n'.format(attr, attr_val, comment_str)) 54 | f.write('\n return settings\n\n') 55 | 56 | 57 | def env_settings(): 58 | env_module_name = 'pytracking.evaluation.local' 59 | try: 60 | env_module = importlib.import_module(env_module_name) 61 | return env_module.local_env_settings() 62 | except: 63 | env_file = os.path.join(os.path.dirname(__file__), 'local.py') 64 | 65 | # Create a default file 66 | create_default_local_file() 67 | raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. ' 68 | 'Then try to run again.'.format(env_file)) -------------------------------------------------------------------------------- /pytracking/parameter/kys/default_vot.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | 5 | def parameters(): 6 | params = TrackerParams() 7 | 8 | params.debug = 0 9 | params.visualization = False 10 | 11 | params.use_gpu = True 12 | 13 | params.image_sample_size = 14*16 14 | params.search_area_scale = 4 15 | 16 | # Learning parameters 17 | params.sample_memory_size = 250 18 | params.learning_rate = 0.0075 19 | params.init_samples_minimum_weight = 0.0 20 | params.train_skipping = 10 21 | 22 | # Net optimization params 23 | params.update_classifier = True 24 | params.net_opt_iter = 25 25 | params.net_opt_update_iter = 3 26 | params.net_opt_hn_iter = 3 27 | 28 | params.output_sigma_factor = 1/4 29 | 30 | # Init augmentation parameters 31 | params.use_augmentation = True 32 | params.augmentation = {'fliplr': True, 33 | 'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60], 34 | 'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)], 35 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)], 36 | 'dropout': (7, 0.2)} 37 | 38 | params.augmentation_expansion_factor = 2 39 | params.random_shift_factor = 1 / 3 40 | 41 | # localization parameters 42 | params.window_output = True 43 | params.use_clipped_window = True 44 | params.effective_search_area = 4.0 45 | params.apply_window_to_dimp_score = True 46 | 47 | params.target_not_found_threshold_fused = 0.05 48 | params.dimp_threshold = 0.05 49 | 50 | params.reset_state_during_occlusion = True 51 | 52 | params.prev_feat_remove_subpixel_shift = True 53 | params.move_feat_to_center = True 54 | 55 | params.perform_hn_mining_dimp = True 56 | params.hard_negative_threshold = 0.5 57 | params.target_neighborhood_scale_safe = 2.2 58 | params.hard_negative_learning_rate = 0.02 59 | params.update_scale_when_uncertain = True 60 | 61 | # IoUnet parameters 62 | params.use_iou_net = True 63 | params.iounet_augmentation = False 64 | params.iounet_use_log_scale = True 65 | params.iounet_k = 3 66 | params.num_init_random_boxes = 9 67 | params.box_jitter_pos = 0.1 68 | params.box_jitter_sz = 0.5 69 | params.maximal_aspect_ratio = 6 70 | params.box_refinement_iter = 5 71 | params.box_refinement_step_length = 1 72 | params.box_refinement_step_decay = 1 73 | 74 | params.remove_offset_in_fused_score = True 75 | params.score_downsample_factor = 1 76 | 77 | params.net = NetWithBackbone(net_path='kys.pth', 78 | use_gpu=params.use_gpu) 79 | 80 | params.vot_anno_conversion_type = 'preserve_area' 81 | return params 82 | -------------------------------------------------------------------------------- /pytracking/run_tracker.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | 5 | env_path = os.path.join(os.path.dirname(__file__), '..') 6 | if env_path not in sys.path: 7 | sys.path.append(env_path) 8 | 9 | from pytracking.evaluation import get_dataset 10 | from pytracking.evaluation.running import run_dataset 11 | from pytracking.evaluation import Tracker 12 | 13 | 14 | def run_tracker(tracker_name, tracker_param, run_id=None, dataset_name='otb', sequence=None, debug=0, threads=0, 15 | visdom_info=None): 16 | """Run tracker on sequence or dataset. 17 | args: 18 | tracker_name: Name of tracking method. 19 | tracker_param: Name of parameter file. 20 | run_id: The run id. 21 | dataset_name: Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot). 22 | sequence: Sequence number or name. 23 | debug: Debug level. 24 | threads: Number of threads. 25 | visdom_info: Dict optionally containing 'use_visdom', 'server' and 'port' for Visdom visualization. 26 | """ 27 | 28 | visdom_info = {} if visdom_info is None else visdom_info 29 | 30 | dataset = get_dataset(dataset_name) 31 | 32 | if sequence is not None: 33 | dataset = [dataset[sequence]] 34 | 35 | trackers = [Tracker(tracker_name, tracker_param, run_id)] 36 | 37 | run_dataset(dataset, trackers, debug, threads, visdom_info=visdom_info) 38 | 39 | 40 | def main(): 41 | parser = argparse.ArgumentParser(description='Run tracker on sequence or dataset.') 42 | parser.add_argument('tracker_name', type=str, help='Name of tracking method.') 43 | parser.add_argument('tracker_param', type=str, help='Name of parameter file.') 44 | parser.add_argument('--runid', type=int, default=None, help='The run id.') 45 | parser.add_argument('--dataset_name', type=str, default='otb', help='Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot).') 46 | parser.add_argument('--sequence', type=str, default=None, help='Sequence number or name.') 47 | parser.add_argument('--debug', type=int, default=0, help='Debug level.') 48 | parser.add_argument('--threads', type=int, default=0, help='Number of threads.') 49 | parser.add_argument('--use_visdom', type=bool, default=True, help='Flag to enable visdom.') 50 | parser.add_argument('--visdom_server', type=str, default='127.0.0.1', help='Server for visdom.') 51 | parser.add_argument('--visdom_port', type=int, default=8097, help='Port for visdom.') 52 | 53 | args = parser.parse_args() 54 | 55 | try: 56 | seq_name = int(args.sequence) 57 | except: 58 | seq_name = args.sequence 59 | 60 | run_tracker(args.tracker_name, args.tracker_param, args.runid, args.dataset_name, seq_name, args.debug, 61 | args.threads, {'use_visdom': args.use_visdom, 'server': args.visdom_server, 'port': args.visdom_port}) 62 | 63 | 64 | if __name__ == '__main__': 65 | main() 66 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/prdimp18.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 18*16 13 | params.search_area_scale = 5 14 | 15 | # Learning parameters 16 | params.sample_memory_size = 50 17 | params.learning_rate = 0.01 18 | params.init_samples_minimum_weight = 0.25 19 | params.train_skipping = 20 20 | 21 | # Net optimization params 22 | params.update_classifier = True 23 | params.net_opt_iter = 10 24 | params.net_opt_update_iter = 2 25 | params.net_opt_hn_iter = 1 26 | 27 | # Detection parameters 28 | params.window_output = False 29 | 30 | # Init augmentation parameters 31 | params.use_augmentation = True 32 | params.augmentation = {'fliplr': True, 33 | 'rotate': [10, -10, 45, -45], 34 | 'blur': [(3,1), (1, 3), (2, 2)], 35 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 36 | 'dropout': (2, 0.2)} 37 | 38 | params.augmentation_expansion_factor = 2 39 | params.random_shift_factor = 1/3 40 | 41 | # Advanced localization parameters 42 | params.advanced_localization = True 43 | params.score_preprocess = 'softmax' 44 | params.target_not_found_threshold = 0.04 45 | params.distractor_threshold = 0.8 46 | params.hard_negative_threshold = 0.5 47 | params.target_neighborhood_scale = 2.2 48 | params.dispalcement_scale = 0.8 49 | params.hard_negative_learning_rate = 0.02 50 | params.update_scale_when_uncertain = True 51 | 52 | # IoUnet parameters 53 | params.box_refinement_space = 'relative' 54 | params.iounet_augmentation = False # Use the augmented samples to compute the modulation vector 55 | params.iounet_k = 3 # Top-k average to estimate final box 56 | params.num_init_random_boxes = 9 # Num extra random boxes in addition to the classifier prediction 57 | params.box_jitter_pos = 0.1 # How much to jitter the translation for random boxes 58 | params.box_jitter_sz = 0.5 # How much to jitter the scale for random boxes 59 | params.maximal_aspect_ratio = 6 # Limit on the aspect ratio 60 | params.box_refinement_iter = 10 # Number of iterations for refining the boxes 61 | params.box_refinement_step_length = 2.5e-3 # 1 # Gradient step length in the bounding box refinement 62 | params.box_refinement_step_decay = 1 # Multiplicative step length decay (1 means no decay) 63 | 64 | params.net = NetWithBackbone(net_path='prdimp18.pth.tar', 65 | use_gpu=params.use_gpu) 66 | 67 | params.vot_anno_conversion_type = 'preserve_area' 68 | 69 | return params 70 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/super_dimp.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 22*16 13 | params.search_area_scale = 6 14 | params.border_mode = 'inside_major' 15 | params.patch_max_scale_change = 1.5 16 | 17 | # Learning parameters 18 | params.sample_memory_size = 50 19 | params.learning_rate = 0.01 20 | params.init_samples_minimum_weight = 0.25 21 | params.train_skipping = 20 22 | 23 | # Net optimization params 24 | params.update_classifier = True 25 | params.net_opt_iter = 10 26 | params.net_opt_update_iter = 2 27 | params.net_opt_hn_iter = 1 28 | 29 | # Detection parameters 30 | params.window_output = False 31 | 32 | # Init augmentation parameters 33 | params.use_augmentation = True 34 | params.augmentation = {'fliplr': True, 35 | 'rotate': [10, -10, 45, -45], 36 | 'blur': [(3,1), (1, 3), (2, 2)], 37 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 38 | 'dropout': (2, 0.2)} 39 | 40 | params.augmentation_expansion_factor = 2 41 | params.random_shift_factor = 1/3 42 | 43 | # Advanced localization parameters 44 | params.advanced_localization = True 45 | params.target_not_found_threshold = 0.25 46 | params.distractor_threshold = 0.8 47 | params.hard_negative_threshold = 0.5 48 | params.target_neighborhood_scale = 2.2 49 | params.dispalcement_scale = 0.8 50 | params.hard_negative_learning_rate = 0.02 51 | params.update_scale_when_uncertain = True 52 | 53 | # IoUnet parameters 54 | params.box_refinement_space = 'relative' 55 | params.iounet_augmentation = False # Use the augmented samples to compute the modulation vector 56 | params.iounet_k = 3 # Top-k average to estimate final box 57 | params.num_init_random_boxes = 9 # Num extra random boxes in addition to the classifier prediction 58 | params.box_jitter_pos = 0.1 # How much to jitter the translation for random boxes 59 | params.box_jitter_sz = 0.5 # How much to jitter the scale for random boxes 60 | params.maximal_aspect_ratio = 6 # Limit on the aspect ratio 61 | params.box_refinement_iter = 10 # Number of iterations for refining the boxes 62 | params.box_refinement_step_length = 2.5e-3 # 1 # Gradient step length in the bounding box refinement 63 | params.box_refinement_step_decay = 1 # Multiplicative step length decay (1 means no decay) 64 | 65 | params.net = NetWithBackbone(net_path='super_dimp.pth.tar', 66 | use_gpu=params.use_gpu) 67 | 68 | params.vot_anno_conversion_type = 'preserve_area' 69 | 70 | return params 71 | -------------------------------------------------------------------------------- /ltr/models/kys/cost_volume.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from spatial_correlation_sampler import SpatialCorrelationSampler 6 | 7 | 8 | class CostVolume(nn.Module): 9 | def __init__(self, kernel_size, max_displacement, stride=1, abs_coordinate_output=False): 10 | super().__init__() 11 | self.correlation_layer = SpatialCorrelationSampler(kernel_size, 2*max_displacement + 1, stride, 12 | int((kernel_size-1)/2)) 13 | self.abs_coordinate_output = abs_coordinate_output 14 | 15 | def forward(self, feat1, feat2): 16 | assert feat1.dim() == 4 and feat2.dim() == 4, 'Expect 4 dimensional inputs' 17 | 18 | batch_size = feat1.shape[0] 19 | 20 | cost_volume = self.correlation_layer(feat1, feat2) 21 | 22 | if self.abs_coordinate_output: 23 | cost_volume = cost_volume.view(batch_size, -1, cost_volume.shape[-2], cost_volume.shape[-1]) 24 | cost_volume = remap_cost_volume(cost_volume) 25 | 26 | return cost_volume.view(batch_size, -1, cost_volume.shape[-2], cost_volume.shape[-1]) 27 | 28 | 29 | def remap_cost_volume(cost_volume): 30 | """ 31 | 32 | :param cost_volume: cost volume of shape (batch, (2*md-1)*(2*md-1), rows, cols), where md is the maximum displacement 33 | allowed when computing the cost volume. 34 | :return: cost_volume_remapped: The input cost volume is remapped to shape (batch, rows, cols, rows, cols) 35 | """ 36 | 37 | if cost_volume.dim() != 4: 38 | raise ValueError('input cost_volume should have 4 dimensions') 39 | 40 | [batch_size, d_, num_rows, num_cols] = cost_volume.size() 41 | d_sqrt_ = np.sqrt(d_) 42 | 43 | if not d_sqrt_.is_integer(): 44 | raise ValueError("Invalid cost volume") 45 | 46 | cost_volume = cost_volume.view(batch_size, int(d_sqrt_), int(d_sqrt_), num_rows, num_cols) 47 | 48 | cost_volume_remapped = torch.zeros((batch_size, num_rows, num_cols, 49 | num_rows, num_cols), 50 | dtype=cost_volume.dtype, 51 | device=cost_volume.device) 52 | 53 | if cost_volume.size()[1] % 2 != 1: 54 | raise ValueError 55 | 56 | md = int((cost_volume.size()[1]-1)/2) 57 | 58 | for r in range(num_rows): 59 | for c in range(num_cols): 60 | r1_ = r - md 61 | r2_ = r1_ + 2*md + 1 62 | c1_ = c - md 63 | c2_ = c1_ + 2*md + 1 64 | 65 | r1_pad_ = max(-r1_, 0) 66 | r2_pad_ = max(r2_ - cost_volume_remapped.shape[1], 0) 67 | 68 | c1_pad_ = max(-c1_, 0) 69 | c2_pad_ = max(c2_ - cost_volume_remapped.shape[2], 0) 70 | 71 | d_ = cost_volume.size()[1] 72 | cost_volume_remapped[:, r1_+r1_pad_:r2_-r2_pad_, c1_+c1_pad_:c2_-c2_pad_, r, c] = \ 73 | cost_volume[:, r1_pad_:d_-r2_pad_, c1_pad_:d_-c2_pad_, r, c] 74 | 75 | return cost_volume_remapped 76 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/prdimp50.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 22*16 13 | params.search_area_scale = 6 14 | params.border_mode = 'inside_major' 15 | params.patch_max_scale_change = 1.5 16 | 17 | # Learning parameters 18 | params.sample_memory_size = 50 19 | params.learning_rate = 0.01 20 | params.init_samples_minimum_weight = 0.25 21 | params.train_skipping = 20 22 | 23 | # Net optimization params 24 | params.update_classifier = True 25 | params.net_opt_iter = 10 26 | params.net_opt_update_iter = 2 27 | params.net_opt_hn_iter = 1 28 | 29 | # Detection parameters 30 | params.window_output = False 31 | 32 | # Init augmentation parameters 33 | params.use_augmentation = True 34 | params.augmentation = {'fliplr': True, 35 | 'rotate': [10, -10, 45, -45], 36 | 'blur': [(3,1), (1, 3), (2, 2)], 37 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 38 | 'dropout': (2, 0.2)} 39 | 40 | params.augmentation_expansion_factor = 2 41 | params.random_shift_factor = 1/3 42 | 43 | # Advanced localization parameters 44 | params.advanced_localization = True 45 | params.score_preprocess = 'softmax' 46 | params.target_not_found_threshold = 0.04 47 | params.distractor_threshold = 0.8 48 | params.hard_negative_threshold = 0.5 49 | params.target_neighborhood_scale = 2.2 50 | params.dispalcement_scale = 0.8 51 | params.hard_negative_learning_rate = 0.02 52 | params.update_scale_when_uncertain = True 53 | 54 | # IoUnet parameters 55 | params.box_refinement_space = 'relative' 56 | params.iounet_augmentation = False # Use the augmented samples to compute the modulation vector 57 | params.iounet_k = 3 # Top-k average to estimate final box 58 | params.num_init_random_boxes = 9 # Num extra random boxes in addition to the classifier prediction 59 | params.box_jitter_pos = 0.1 # How much to jitter the translation for random boxes 60 | params.box_jitter_sz = 0.5 # How much to jitter the scale for random boxes 61 | params.maximal_aspect_ratio = 6 # Limit on the aspect ratio 62 | params.box_refinement_iter = 10 # Number of iterations for refining the boxes 63 | params.box_refinement_step_length = 2.5e-3 # 1 # Gradient step length in the bounding box refinement 64 | params.box_refinement_step_decay = 1 # Multiplicative step length decay (1 means no decay) 65 | 66 | params.net = NetWithBackbone(net_path='prdimp50.pth.tar', 67 | use_gpu=params.use_gpu) 68 | 69 | params.vot_anno_conversion_type = 'preserve_area' 70 | 71 | return params 72 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp_simple/super_dimp_simple.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 22*16 13 | params.search_area_scale = 6 14 | params.border_mode = 'inside_major' 15 | params.patch_max_scale_change = 1.5 16 | 17 | # Learning parameters 18 | params.sample_memory_size = 50 19 | params.learning_rate = 0.01 20 | params.init_samples_minimum_weight = 0.25 21 | params.train_skipping = 20 22 | 23 | # Net optimization params 24 | params.update_classifier = True 25 | params.net_opt_iter = 10 26 | params.net_opt_update_iter = 2 27 | params.net_opt_hn_iter = 1 28 | 29 | # Detection parameters 30 | params.window_output = False 31 | 32 | # Init augmentation parameters 33 | params.use_augmentation = True 34 | params.augmentation = {'fliplr': True, 35 | 'rotate': [10, -10, 45, -45], 36 | 'blur': [(3,1), (1, 3), (2, 2)], 37 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 38 | 'dropout': (2, 0.2)} 39 | 40 | params.augmentation_expansion_factor = 2 41 | params.random_shift_factor = 1/3 42 | 43 | # Advanced localization parameters 44 | params.advanced_localization = True 45 | params.target_not_found_threshold = 0.25 46 | params.distractor_threshold = 0.8 47 | params.hard_negative_threshold = 0.5 48 | params.target_neighborhood_scale = 2.2 49 | params.dispalcement_scale = 0.8 50 | params.hard_negative_learning_rate = 0.02 51 | params.update_scale_when_uncertain = True 52 | 53 | # IoUnet parameters 54 | params.box_refinement_space = 'relative' 55 | params.iounet_augmentation = False # Use the augmented samples to compute the modulation vector 56 | params.iounet_k = 3 # Top-k average to estimate final box 57 | params.num_init_random_boxes = 9 # Num extra random boxes in addition to the classifier prediction 58 | params.box_jitter_pos = 0.1 # How much to jitter the translation for random boxes 59 | params.box_jitter_sz = 0.5 # How much to jitter the scale for random boxes 60 | params.maximal_aspect_ratio = 6 # Limit on the aspect ratio 61 | params.box_refinement_iter = 10 # Number of iterations for refining the boxes 62 | params.box_refinement_step_length = 2.5e-3 # 1 # Gradient step length in the bounding box refinement 63 | params.box_refinement_step_decay = 1 # Multiplicative step length decay (1 means no decay) 64 | 65 | params.net = NetWithBackbone(net_path='super_dimp_simple.pth.tar', 66 | use_gpu=params.use_gpu) 67 | 68 | params.vot_anno_conversion_type = 'preserve_area' 69 | 70 | params.use_gt_box = True 71 | 72 | return params 73 | -------------------------------------------------------------------------------- /pytracking/evaluation/mobifacedataset.py: -------------------------------------------------------------------------------- 1 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList 2 | import glob 3 | import numpy as np 4 | import os.path as osp 5 | from collections import OrderedDict 6 | import pandas as pd 7 | 8 | 9 | class MobifaceDataset(BaseDataset): 10 | """ Mobiface dataset. 11 | Publication: 12 | MobiFace: A Novel Dataset for Mobile Face Tracking in the Wild 13 | Yiming Lin, Shiyang Cheng, Jie Shen, Maja Pantic 14 | arXiv:1805.09749, 2018 15 | https://arxiv.org/pdf/1805.09749v2 16 | 17 | Download dataset from https://mobiface.github.io/ 18 | """ 19 | def __init__(self, split): 20 | """ 21 | args: 22 | split - Split to use. Can be i) 'train': official training set, ii) 'test': official test set, iii) 'all': whole dataset. 23 | """ 24 | super().__init__() 25 | self.base_path = self.env_settings.mobiface_path 26 | self.sequence_list = self._get_sequence_list(split) 27 | self.split = split 28 | 29 | def get_sequence_list(self): 30 | return SequenceList([self._construct_sequence(s) for s in self.sequence_list]) 31 | 32 | def _get_sequence_list(self, split): 33 | 34 | self.train_meta_fn = osp.join(self.base_path, 'train.meta.csv') 35 | self.test_meta_fn = osp.join(self.base_path, 'test.meta.csv') 36 | self.train_meta = pd.read_csv(self.train_meta_fn,index_col=0).transpose().to_dict() 37 | self.test_meta = pd.read_csv(self.test_meta_fn,index_col=0).transpose().to_dict() 38 | if split == 'train': 39 | self.meta = self.train_meta 40 | elif split == 'test': 41 | self.meta = self.test_meta 42 | else: 43 | self.meta = {**self.train_meta, **self.test_meta} # In Python 3.5 or greater 44 | self.meta = OrderedDict(sorted(self.meta.items(), key=lambda t: t[0])) 45 | self.anno_files = [] 46 | for k,v in self.meta.items(): 47 | if k in self.train_meta.keys(): 48 | self.anno_files.append(osp.abspath(osp.join(self.base_path,'train', k+'.annot.csv'))) 49 | else: 50 | self.anno_files.append(osp.abspath(osp.join(self.base_path,'test', k+'.annot.csv'))) 51 | self.seq_names = sorted(list(self.meta.keys())) 52 | self.seq_dirs = [fn[:-len('.annot.csv')] for fn in self.anno_files] 53 | return self.seq_names 54 | 55 | def _construct_sequence(self, sequence_name): 56 | index = self.seq_names.index(sequence_name) 57 | img_files = sorted(glob.glob(self.seq_dirs[index]+'/*.jpg')) 58 | if len(img_files) == 0: 59 | img_files = sorted(glob.glob(self.seq_dirs[index]+'.png')) 60 | with open(self.anno_files[index], 'r') as f: 61 | anno = np.loadtxt(f, delimiter=',', skiprows=1, dtype=int) 62 | anno = anno[:,1:] 63 | assert anno.shape[1] == 4 64 | 65 | return Sequence(sequence_name, img_files, anno.reshape(-1, 4)) 66 | 67 | def __len__(self): 68 | return len(self.sequence_list) 69 | -------------------------------------------------------------------------------- /pytracking/parameter/dimp/prdimp50_vot18.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 14 * 16 13 | params.search_area_scale = 4 14 | params.feature_size_odd = False 15 | 16 | # Learning parameters 17 | params.sample_memory_size = 250 18 | params.learning_rate = 0.01 19 | params.init_samples_minimum_weight = 0.0 20 | params.train_skipping = 1 21 | 22 | # Net optimization params 23 | params.update_classifier = True 24 | params.net_opt_iter = 25 25 | params.net_opt_update_iter = 1 26 | params.net_opt_hn_iter = 1 27 | 28 | # Detection parameters 29 | params.window_output = True 30 | 31 | # Init augmentation parameters 32 | params.use_augmentation = True 33 | params.augmentation = {'fliplr': True, 34 | 'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60], 35 | 'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)], 36 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)], 37 | 'dropout': (7, 0.2)} 38 | 39 | params.augmentation_expansion_factor = 2 40 | params.random_shift_factor = 1/3 41 | 42 | # Advanced localization parameters 43 | params.advanced_localization = True 44 | # params.score_preprocess = 'softmax' 45 | params.target_not_found_threshold = 0.00 46 | params.distractor_threshold = 99999 47 | params.hard_negative_threshold = 999999 48 | params.target_neighborhood_scale = 2.2 49 | params.dispalcement_scale = 0.7 50 | params.perform_hn_without_windowing = True 51 | params.hard_negative_learning_rate = 0.02 52 | params.update_scale_when_uncertain = True 53 | 54 | # IoUnet parameters 55 | params.box_refinement_space = 'relative' 56 | params.iounet_augmentation = False # Use the augmented samples to compute the modulation vector 57 | params.iounet_k = 3 # Top-k average to estimate final box 58 | params.num_init_random_boxes = 9 # Num extra random boxes in addition to the classifier prediction 59 | params.box_jitter_pos = 0.1 # How much to jitter the translation for random boxes 60 | params.box_jitter_sz = 0.5 # How much to jitter the scale for random boxes 61 | params.maximal_aspect_ratio = 6 # Limit on the aspect ratio 62 | params.box_refinement_iter = 10 # Number of iterations for refining the boxes 63 | params.box_refinement_step_length = 2.5e-3 # 1 # Gradient step length in the bounding box refinement 64 | params.box_refinement_step_decay = 1 # Multiplicative step length decay (1 means no decay) 65 | 66 | params.net = NetWithBackbone(net_path='prdimp50.pth.tar', 67 | use_gpu=params.use_gpu) 68 | 69 | params.vot_anno_conversion_type = 'preserve_area' 70 | 71 | return params 72 | -------------------------------------------------------------------------------- /pytracking/evaluation/lagotdataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | from collections import OrderedDict 5 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList 6 | 7 | 8 | class LaGOTDataset(BaseDataset): 9 | def __init__(self, sot_mode=False): 10 | super().__init__() 11 | self.sot_mode = sot_mode 12 | self.base_path = self.env_settings.lasot_path 13 | if sot_mode: 14 | self.anno_path = os.path.join(self.env_settings.lagot_path, 15 | 'LaGOT_one_object_per_sequence_annotations_final.json') 16 | else: 17 | self.anno_path = os.path.join(self.env_settings.lagot_path, 18 | 'LaGOT_multiple_object_per_sequence_annotations_final.json') 19 | 20 | self.annos = self._load_annotations() 21 | self.sequence_list = list(self.annos.keys()) 22 | 23 | def _load_annotations(self): 24 | with open(self.anno_path, 'r') as f: 25 | anno = json.load(f) 26 | 27 | return anno 28 | 29 | def get_sequence_list(self): 30 | return SequenceList([self._construct_sequence(s) for s in self.sequence_list]) 31 | 32 | def _construct_sequence(self, sequence_name): 33 | if self.sot_mode: 34 | ground_truth_rect = np.array(self.annos[sequence_name]['xywh']) 35 | frames_list = [os.path.join(self.base_path, p) for p in self.annos[sequence_name]['frames']] 36 | target_visible = np.ones(ground_truth_rect.shape[0], dtype=np.bool) 37 | target_visible[::3] = np.all(ground_truth_rect[::3] >= 0, axis=1) 38 | return Sequence(sequence_name, frames_list, 'LaGOT', ground_truth_rect.reshape(-1, 4), 39 | target_visible=target_visible) 40 | else: 41 | frames_list = [f'{self.base_path}/{p}' for p in self.annos[sequence_name]['frames']] 42 | 43 | track_ids = list(self.annos[sequence_name]['xywh'].keys()) 44 | 45 | gt_bboxes = OrderedDict() 46 | 47 | for tid, boxes in self.annos[sequence_name]['xywh'].items(): 48 | gt_bboxes[tid] = np.array(boxes) 49 | 50 | init_data = dict() 51 | for tid, boxes in gt_bboxes.items(): 52 | im_id = 0 53 | init_box = boxes[im_id] 54 | 55 | if np.all(init_box > -1): 56 | if im_id not in init_data: 57 | init_data[im_id] = {'object_ids': [tid], 'bbox': {tid: np.array(init_box)}} 58 | else: 59 | init_data[im_id]['object_ids'].append(tid) 60 | init_data[im_id]['bbox'][tid] = np.array(init_box) 61 | 62 | assert set(init_data[0]['object_ids']) == set(track_ids) 63 | gt_bboxes = OrderedDict({key: val for key, val in gt_bboxes.items() if key in track_ids}) 64 | 65 | return Sequence(name=sequence_name, frames=frames_list, dataset='LaGOT', ground_truth_rect=gt_bboxes, 66 | init_data=init_data, object_ids=track_ids, 67 | multiobj_mode=True) 68 | 69 | def __len__(self): 70 | return len(self.sequence_list) 71 | 72 | def _get_sequence_list(self): 73 | return list(self.annos.keys()) 74 | -------------------------------------------------------------------------------- /ltr/models/kys/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | def shift_features(feat, relative_translation_vector): 7 | T_mat = torch.eye(2).repeat(feat.shape[0], 1, 1).to(feat.device) 8 | T_mat = torch.cat((T_mat, relative_translation_vector.view(-1, 2, 1)), dim=2) 9 | 10 | grid = F.affine_grid(T_mat, feat.shape) 11 | 12 | feat_out = F.grid_sample(feat, grid) 13 | return feat_out 14 | 15 | 16 | class CenterShiftFeatures(nn.Module): 17 | def __init__(self, feature_stride): 18 | super().__init__() 19 | self.feature_stride = feature_stride 20 | 21 | def forward(self, feat, anno): 22 | anno = anno.view(-1, 4) 23 | c_x = (anno[:, 0] + anno[:, 2] * 0.5) / self.feature_stride 24 | c_y = (anno[:, 1] + anno[:, 3] * 0.5) / self.feature_stride 25 | 26 | t_x = 2 * (c_x - feat.shape[-1] * 0.5) / feat.shape[-1] 27 | t_y = 2 * (c_y - feat.shape[-2] * 0.5) / feat.shape[-2] 28 | 29 | t = torch.cat((t_x.view(-1, 1), t_y.view(-1, 1)), dim=1) 30 | 31 | feat_out = shift_features(feat, t) 32 | return feat_out 33 | 34 | 35 | class DiMPScoreJittering(): 36 | def __init__(self, p_zero=0.0, distractor_ratio=1.0, p_distractor=0, max_distractor_enhance_factor=1, 37 | min_distractor_enhance_factor=0.75): 38 | """ Jitters predicted score map by randomly enhancing distractor peaks and masking out target peaks""" 39 | self.p_zero = p_zero 40 | self.distractor_ratio = distractor_ratio 41 | self.p_distractor = p_distractor 42 | self.max_distractor_enhance_factor = max_distractor_enhance_factor 43 | self.min_distractor_enhance_factor = min_distractor_enhance_factor 44 | 45 | def rand(self, sz, min_val, max_val): 46 | return torch.rand(sz, device=min_val.device) * (max_val - min_val) + min_val 47 | 48 | def __call__(self, score, label): 49 | score_shape = score.shape 50 | 51 | score = score.view(-1, score_shape[-2]*score_shape[-1]) 52 | num_score_maps = score.shape[0] 53 | 54 | label = label.view(score.shape) 55 | 56 | dist_roll_value = torch.rand(num_score_maps).to(score.device) 57 | 58 | score_c = score.clone().detach() 59 | score_neg = score_c * (label < 1e-4).float() 60 | score_pos = score_c * (label > 0.2).float() 61 | 62 | target_max_val, _ = torch.max(score_pos, dim=1) 63 | dist_max_val, dist_id = torch.max(score_neg, dim=1) 64 | 65 | jitter_score = (dist_roll_value < self.p_distractor) & ((dist_max_val / target_max_val) > self.distractor_ratio) 66 | 67 | for i in range(num_score_maps): 68 | score_c[i, dist_id[i]] = self.rand(1, target_max_val[i]*self.min_distractor_enhance_factor, 69 | target_max_val[i]*self.max_distractor_enhance_factor) 70 | 71 | zero_roll_value = torch.rand(num_score_maps).to(score.device) 72 | zero_score = (zero_roll_value < self.p_zero) & ~jitter_score 73 | 74 | score_c[zero_score, :] = 0 75 | 76 | score_jittered = score*(1.0 - (jitter_score | zero_score).float()).view(num_score_maps, 1).float() + \ 77 | score_c*(jitter_score | zero_score).float().view(num_score_maps, 1).float() 78 | 79 | return score_jittered.view(score_shape) 80 | -------------------------------------------------------------------------------- /ltr/data/bounding_box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def rect_to_rel(bb, sz_norm=None): 5 | """Convert standard rectangular parametrization of the bounding box [x, y, w, h] 6 | to relative parametrization [cx/sw, cy/sh, log(w), log(h)], where [cx, cy] is the center coordinate. 7 | args: 8 | bb - N x 4 tensor of boxes. 9 | sz_norm - [N] x 2 tensor of value of [sw, sh] (optional). sw=w and sh=h if not given. 10 | """ 11 | 12 | c = bb[...,:2] + 0.5 * bb[...,2:] 13 | if sz_norm is None: 14 | c_rel = c / bb[...,2:] 15 | else: 16 | c_rel = c / sz_norm 17 | sz_rel = torch.log(bb[...,2:]) 18 | return torch.cat((c_rel, sz_rel), dim=-1) 19 | 20 | 21 | def rel_to_rect(bb, sz_norm=None): 22 | """Inverts the effect of rect_to_rel. See above.""" 23 | 24 | sz = torch.exp(bb[...,2:]) 25 | if sz_norm is None: 26 | c = bb[...,:2] * sz 27 | else: 28 | c = bb[...,:2] * sz_norm 29 | tl = c - 0.5 * sz 30 | return torch.cat((tl, sz), dim=-1) 31 | 32 | 33 | def masks_to_bboxes(mask, fmt='c'): 34 | 35 | """ Convert a mask tensor to one or more bounding boxes. 36 | Note: This function is a bit new, make sure it does what it says. /Andreas 37 | :param mask: Tensor of masks, shape = (..., H, W) 38 | :param fmt: bbox layout. 'c' => "center + size" or (x_center, y_center, width, height) 39 | 't' => "top left + size" or (x_left, y_top, width, height) 40 | 'v' => "vertices" or (x_left, y_top, x_right, y_bottom) 41 | :return: tensor containing a batch of bounding boxes, shape = (..., 4) 42 | """ 43 | batch_shape = mask.shape[:-2] 44 | mask = mask.reshape((-1, *mask.shape[-2:])) 45 | bboxes = [] 46 | 47 | for m in mask: 48 | mx = m.sum(dim=-2).nonzero() 49 | my = m.sum(dim=-1).nonzero() 50 | bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0] 51 | bboxes.append(bb) 52 | 53 | bboxes = torch.tensor(bboxes, dtype=torch.float32, device=mask.device) 54 | bboxes = bboxes.reshape(batch_shape + (4,)) 55 | 56 | if fmt == 'v': 57 | return bboxes 58 | 59 | x1 = bboxes[..., :2] 60 | s = bboxes[..., 2:] - x1 + 1 61 | 62 | if fmt == 'c': 63 | return torch.cat((x1 + 0.5 * s, s), dim=-1) 64 | elif fmt == 't': 65 | return torch.cat((x1, s), dim=-1) 66 | 67 | raise ValueError("Undefined bounding box layout '%s'" % fmt) 68 | 69 | 70 | def masks_to_bboxes_multi(mask, ids, fmt='c'): 71 | assert mask.dim() == 2 72 | bboxes = [] 73 | 74 | for id in ids: 75 | mx = (mask == id).sum(dim=-2).nonzero() 76 | my = (mask == id).float().sum(dim=-1).nonzero() 77 | bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0] 78 | 79 | bb = torch.tensor(bb, dtype=torch.float32, device=mask.device) 80 | 81 | x1 = bb[:2] 82 | s = bb[2:] - x1 + 1 83 | 84 | if fmt == 'v': 85 | pass 86 | elif fmt == 'c': 87 | bb = torch.cat((x1 + 0.5 * s, s), dim=-1) 88 | elif fmt == 't': 89 | bb = torch.cat((x1, s), dim=-1) 90 | else: 91 | raise ValueError("Undefined bounding box layout '%s'" % fmt) 92 | bboxes.append(bb) 93 | 94 | return bboxes 95 | -------------------------------------------------------------------------------- /pytracking/parameter/keep_track/default.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone, NetWrapper 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 30*16 13 | params.search_area_scale = 8 14 | params.border_mode = 'inside_major' 15 | params.patch_max_scale_change = 1.5 16 | 17 | # Learning parameters 18 | params.sample_memory_size = 50 19 | params.learning_rate = 0.01 20 | params.init_samples_minimum_weight = 0.25 21 | params.train_skipping = 20 22 | 23 | # Net optimization params 24 | params.update_classifier = True 25 | params.net_opt_iter = 10 26 | params.net_opt_update_iter = 2 27 | params.net_opt_hn_iter = 1 28 | 29 | # Detection parameters 30 | params.window_output = False 31 | 32 | # Init augmentation parameters 33 | params.use_augmentation = True 34 | params.augmentation = {'fliplr': True, 35 | 'rotate': [10, -10, 45, -45], 36 | 'blur': [(3,1), (1, 3), (2, 2)], 37 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 38 | 'dropout': (2, 0.2)} 39 | 40 | params.augmentation_expansion_factor = 2 41 | params.random_shift_factor = 1/3 42 | 43 | # Advanced localization parameters 44 | params.advanced_localization = True 45 | params.target_not_found_threshold = 0.25 46 | params.distractor_threshold = 0.8 47 | params.hard_negative_threshold = 0.5 48 | params.target_neighborhood_scale = 2.2 49 | params.dispalcement_scale = 0.8 50 | params.hard_negative_learning_rate = 0.02 51 | params.update_scale_when_uncertain = True 52 | 53 | # IoUnet parameters 54 | params.box_refinement_space = 'relative' 55 | params.iounet_augmentation = False # Use the augmented samples to compute the modulation vector 56 | params.iounet_k = 3 # Top-k average to estimate final box 57 | params.num_init_random_boxes = 9 # Num extra random boxes in addition to the classifier prediction 58 | params.box_jitter_pos = 0.1 # How much to jitter the translation for random boxes 59 | params.box_jitter_sz = 0.5 # How much to jitter the scale for random boxes 60 | params.maximal_aspect_ratio = 6 # Limit on the aspect ratio 61 | params.box_refinement_iter = 10 # Number of iterations for refining the boxes 62 | params.box_refinement_step_length = 2.5e-3 # 1 # Gradient step length in the bounding box refinement 63 | params.box_refinement_step_decay = 1 # Multiplicative step length decay (1 means no decay) 64 | 65 | # KeepTrack parameters 66 | params.use_certainty_for_weight_computation = True 67 | params.certainty_for_weight_computation_ths = 0.5 68 | params.target_candidate_matching_net = NetWrapper(net_path='keep_track.pth.tar', use_gpu=params.use_gpu) 69 | 70 | params.vot_anno_conversion_type = 'preserve_area' 71 | params.net = NetWithBackbone(net_path='super_dimp_simple.pth.tar', use_gpu=params.use_gpu) 72 | 73 | params.visualize_candidate_matching = False 74 | params.visualize_candidate_assignment_matrix = False 75 | 76 | return params 77 | -------------------------------------------------------------------------------- /ltr/dataset/ecssd.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .base_image_dataset import BaseImageDataset 3 | from ltr.data.image_loader import jpeg4py_loader, opencv_loader, imread_indexed 4 | import torch 5 | from collections import OrderedDict 6 | from ltr.admin.environment import env_settings 7 | from ltr.data.bounding_box_utils import masks_to_bboxes 8 | 9 | 10 | class ECSSD(BaseImageDataset): 11 | """ 12 | Extended Complex Scene Saliency Dataset (ECSSD) 13 | 14 | Publication: 15 | Hierarchical Image Saliency Detection on Extended CSSD 16 | Jianping Shi, Qiong Yan, Li Xu, Jiaya Jia 17 | TPAMI, 2016 18 | https://arxiv.org/pdf/1408.5418.pdf 19 | 20 | Download the dataset from http://www.cse.cuhk.edu.hk/leojia/projects/hsaliency/dataset.html 21 | """ 22 | def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None): 23 | """ 24 | args: 25 | root - path to ECSSD root folder 26 | image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) 27 | is used by default. 28 | data_fraction - Fraction of dataset to be used. The complete dataset is used by default 29 | min_area - Objects with area less than min_area are filtered out. Default is 0.0 30 | """ 31 | root = env_settings().ecssd_dir if root is None else root 32 | super().__init__('ECSSD', root, image_loader) 33 | 34 | self.image_list = self._load_dataset(min_area=min_area) 35 | 36 | if data_fraction is not None: 37 | raise NotImplementedError 38 | 39 | def _load_dataset(self, min_area=None): 40 | images = [] 41 | 42 | for i in range(1, 1001): 43 | a = imread_indexed(os.path.join(self.root, 'ground_truth_mask', '{:04d}.png'.format(i))) 44 | 45 | if min_area is None or (a > 0).sum() > min_area: 46 | images.append(i) 47 | 48 | return images 49 | 50 | def get_name(self): 51 | return 'ecssd' 52 | 53 | def has_segmentation_info(self): 54 | return True 55 | 56 | def get_image_info(self, im_id): 57 | mask = imread_indexed(os.path.join(self.root, 'ground_truth_mask', '{:04d}.png'.format(self.image_list[im_id]))) 58 | 59 | mask = torch.Tensor(mask == 255) 60 | bbox = masks_to_bboxes(mask, fmt='t').view(4,) 61 | 62 | valid = (bbox[2] > 0) & (bbox[3] > 0) 63 | visible = valid.clone().byte() 64 | 65 | return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible} 66 | 67 | def get_meta_info(self, im_id): 68 | object_meta = OrderedDict({'object_class_name': None, 69 | 'motion_class': None, 70 | 'major_class': None, 71 | 'root_class': None, 72 | 'motion_adverb': None}) 73 | 74 | return object_meta 75 | 76 | def get_image(self, image_id, anno=None): 77 | frame = self.image_loader(os.path.join(self.root, 'images', '{:04d}.jpg'.format(self.image_list[image_id]))) 78 | 79 | if anno is None: 80 | anno = self.get_image_info(image_id) 81 | 82 | object_meta = self.get_meta_info(image_id) 83 | 84 | return frame, anno, object_meta 85 | -------------------------------------------------------------------------------- /ltr/dataset/hku_is.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .base_image_dataset import BaseImageDataset 3 | from ltr.data.image_loader import jpeg4py_loader, opencv_loader, imread_indexed 4 | import torch 5 | from collections import OrderedDict 6 | from ltr.admin.environment import env_settings 7 | from ltr.data.bounding_box_utils import masks_to_bboxes 8 | 9 | 10 | class HKUIS(BaseImageDataset): 11 | """ 12 | HKU-IS salient object detection dataset 13 | 14 | Publication: 15 | Visual saliency based on multiscale deep features 16 | Guanbin Li and Yizhou Yu 17 | CVPR, 2015 18 | https://arxiv.org/pdf/1503.08663.pdf 19 | 20 | Download dataset from https://sites.google.com/site/ligb86/hkuis 21 | """ 22 | 23 | def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None): 24 | """ 25 | args: 26 | root - path to HKU-IS root folder 27 | image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) 28 | is used by default. 29 | data_fraction - Fraction of dataset to be used. The complete dataset is used by default 30 | min_area - Objects with area less than min_area are filtered out. Default is 0.0 31 | """ 32 | root = env_settings().hkuis_dir if root is None else root 33 | super().__init__('HKUIS', root, image_loader) 34 | 35 | self.image_list, self.anno_list = self._load_dataset(min_area=min_area) 36 | 37 | if data_fraction is not None: 38 | raise NotImplementedError 39 | 40 | def _load_dataset(self, min_area=None): 41 | files_list = os.listdir(os.path.join(self.root, 'imgs')) 42 | image_list = [f[:-4] for f in files_list] 43 | 44 | images = [] 45 | annos = [] 46 | 47 | for f in image_list: 48 | a = imread_indexed(os.path.join(self.root, 'gt', '{}.png'.format(f))) 49 | 50 | if min_area is None or (a > 0).sum() > min_area: 51 | im = opencv_loader(os.path.join(self.root, 'imgs', '{}.png'.format(f))) 52 | images.append(im) 53 | annos.append(a) 54 | 55 | return images, annos 56 | 57 | def get_name(self): 58 | return 'hku-is' 59 | 60 | def has_segmentation_info(self): 61 | return True 62 | 63 | def get_image_info(self, im_id): 64 | mask = self.anno_list[im_id] 65 | mask = torch.Tensor(mask == 255) 66 | bbox = masks_to_bboxes(mask, fmt='t').view(4,) 67 | 68 | valid = (bbox[2] > 0) & (bbox[3] > 0) 69 | visible = valid.clone().byte() 70 | 71 | return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible} 72 | 73 | def get_meta_info(self, im_id): 74 | object_meta = OrderedDict({'object_class_name': None, 75 | 'motion_class': None, 76 | 'major_class': None, 77 | 'root_class': None, 78 | 'motion_adverb': None}) 79 | 80 | return object_meta 81 | 82 | def get_image(self, image_id, anno=None): 83 | frame = self.image_list[image_id] 84 | 85 | if anno is None: 86 | anno = self.get_image_info(image_id) 87 | 88 | object_meta = self.get_meta_info(image_id) 89 | 90 | return frame, anno, object_meta 91 | -------------------------------------------------------------------------------- /pytracking/parameter/keep_track/default_fast.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone, NetWrapper 3 | 4 | def parameters(): 5 | params = TrackerParams() 6 | 7 | params.debug = 0 8 | params.visualization = False 9 | 10 | params.use_gpu = True 11 | 12 | params.image_sample_size = 22*16 13 | params.search_area_scale = 6 14 | params.border_mode = 'inside_major' 15 | params.patch_max_scale_change = 1.5 16 | 17 | # Learning parameters 18 | params.sample_memory_size = 50 19 | params.learning_rate = 0.01 20 | params.init_samples_minimum_weight = 0.25 21 | params.train_skipping = 20 22 | 23 | # Net optimization params 24 | params.update_classifier = True 25 | params.net_opt_iter = 10 26 | params.net_opt_update_iter = 2 27 | params.net_opt_hn_iter = 1 28 | 29 | # Detection parameters 30 | params.window_output = False 31 | 32 | # Init augmentation parameters 33 | params.use_augmentation = True 34 | params.augmentation = {'fliplr': True, 35 | 'rotate': [10, -10, 45, -45], 36 | 'blur': [(3,1), (1, 3), (2, 2)], 37 | 'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)], 38 | 'dropout': (2, 0.2)} 39 | 40 | params.augmentation_expansion_factor = 2 41 | params.random_shift_factor = 1/3 42 | 43 | # Advanced localization parameters 44 | params.advanced_localization = True 45 | params.target_not_found_threshold = 0.25 46 | params.distractor_threshold = 0.8 47 | params.hard_negative_threshold = 0.5 48 | params.target_neighborhood_scale = 2.2 49 | params.dispalcement_scale = 0.8 50 | params.hard_negative_learning_rate = 0.02 51 | params.update_scale_when_uncertain = True 52 | 53 | # IoUnet parameters 54 | params.box_refinement_space = 'relative' 55 | params.iounet_augmentation = False # Use the augmented samples to compute the modulation vector 56 | params.iounet_k = 3 # Top-k average to estimate final box 57 | params.num_init_random_boxes = 9 # Num extra random boxes in addition to the classifier prediction 58 | params.box_jitter_pos = 0.1 # How much to jitter the translation for random boxes 59 | params.box_jitter_sz = 0.5 # How much to jitter the scale for random boxes 60 | params.maximal_aspect_ratio = 6 # Limit on the aspect ratio 61 | params.box_refinement_iter = 3 # Number of iterations for refining the boxes 62 | params.box_refinement_step_length = 2.5e-3 # 1 # Gradient step length in the bounding box refinement 63 | params.box_refinement_step_decay = 1 # Multiplicative step length decay (1 means no decay) 64 | 65 | # KeepTrack parameters 66 | params.use_certainty_for_weight_computation = True 67 | params.certainty_for_weight_computation_ths = 0.5 68 | params.local_max_candidate_score_th = 0.1 69 | params.target_candidate_matching_net = NetWrapper(net_path='keep_track.pth.tar', use_gpu=params.use_gpu) 70 | 71 | params.vot_anno_conversion_type = 'preserve_area' 72 | params.net = NetWithBackbone(net_path='super_dimp_simple.pth.tar', use_gpu=params.use_gpu) 73 | 74 | params.visualize_candidate_matching = False 75 | params.visualize_candidate_assignment_matrix = False 76 | 77 | return params 78 | -------------------------------------------------------------------------------- /ltr/dataset/msra10k.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .base_image_dataset import BaseImageDataset 3 | from ltr.data.image_loader import jpeg4py_loader, imread_indexed 4 | import torch 5 | from collections import OrderedDict 6 | from ltr.admin.environment import env_settings 7 | from ltr.data.bounding_box_utils import masks_to_bboxes 8 | 9 | 10 | class MSRA10k(BaseImageDataset): 11 | """ 12 | MSRA10k salient object detection dataset 13 | 14 | Publication: 15 | Global contrast based salient region detection 16 | Ming-Ming Cheng, Niloy J. Mitra, Xiaolei Huang, Philip H. S. Torr, and Shi-Min Hu 17 | TPAMI, 2015 18 | https://mmcheng.net/mftp/Papers/SaliencyTPAMI.pdf 19 | 20 | Download dataset from https://mmcheng.net/msra10k/ 21 | """ 22 | 23 | def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None): 24 | """ 25 | args: 26 | root - path to MSRA10k root folder 27 | image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) 28 | is used by default. 29 | data_fraction - Fraction of dataset to be used. The complete dataset is used by default 30 | min_area - Objects with area less than min_area are filtered out. Default is 0.0 31 | """ 32 | root = env_settings().msra10k_dir if root is None else root 33 | super().__init__('MSRA10k', root, image_loader) 34 | 35 | self.image_list = self._load_dataset(min_area=min_area) 36 | 37 | if data_fraction is not None: 38 | raise NotImplementedError 39 | 40 | def _load_dataset(self, min_area=None): 41 | files_list = os.listdir(os.path.join(self.root, 'Imgs')) 42 | image_list = [f[:-4] for f in files_list if f[-3:] == 'jpg'] 43 | 44 | images = [] 45 | 46 | for f in image_list: 47 | a = imread_indexed(os.path.join(self.root, 'Imgs', '{}.png'.format(f))) 48 | 49 | if min_area is None or (a > 0).sum() > min_area: 50 | images.append(f) 51 | 52 | return images 53 | 54 | def get_name(self): 55 | return 'msra10k' 56 | 57 | def has_segmentation_info(self): 58 | return True 59 | 60 | def get_image_info(self, im_id): 61 | mask = imread_indexed(os.path.join(self.root, 'Imgs', '{}.png'.format(self.image_list[im_id]))) 62 | mask = torch.Tensor(mask == 255) 63 | bbox = masks_to_bboxes(mask, fmt='t').view(4,) 64 | 65 | valid = (bbox[2] > 0) & (bbox[3] > 0) 66 | visible = valid.clone().byte() 67 | 68 | return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible} 69 | 70 | def get_meta_info(self, im_id): 71 | object_meta = OrderedDict({'object_class_name': None, 72 | 'motion_class': None, 73 | 'major_class': None, 74 | 'root_class': None, 75 | 'motion_adverb': None}) 76 | 77 | return object_meta 78 | 79 | def get_image(self, image_id, anno=None): 80 | frame = self.image_loader(os.path.join(self.root, 'Imgs', '{}.jpg'.format(self.image_list[image_id]))) 81 | 82 | if anno is None: 83 | anno = self.get_image_info(image_id) 84 | 85 | object_meta = self.get_meta_info(image_id) 86 | 87 | return frame, anno, object_meta 88 | -------------------------------------------------------------------------------- /pytracking/evaluation/got10kdataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList 3 | from pytracking.utils.load_text import load_text 4 | import os 5 | from PIL import Image 6 | from pathlib import Path 7 | 8 | 9 | class GOT10KDataset(BaseDataset): 10 | """ GOT-10k dataset. 11 | 12 | Publication: 13 | GOT-10k: A Large High-Diversity Benchmark for Generic Object Tracking in the Wild 14 | Lianghua Huang, Xin Zhao, and Kaiqi Huang 15 | arXiv:1810.11981, 2018 16 | https://arxiv.org/pdf/1810.11981.pdf 17 | 18 | Download dataset from http://got-10k.aitestunion.com/downloads 19 | """ 20 | def __init__(self, split, vos_mode=False): 21 | super().__init__() 22 | # Split can be test, val, or ltrval (a validation split consisting of videos from the official train set) 23 | if split == 'test' or split == 'val': 24 | self.base_path = os.path.join(self.env_settings.got10k_path, split) 25 | else: 26 | self.base_path = os.path.join(self.env_settings.got10k_path, 'train') 27 | 28 | self.sequence_list = self._get_sequence_list(split) 29 | self.split = split 30 | 31 | self.vos_mode = vos_mode 32 | 33 | self.mask_path = None 34 | if self.vos_mode: 35 | self.mask_path = self.env_settings.got10k_mask_path 36 | 37 | def get_sequence_list(self): 38 | return SequenceList([self._construct_sequence(s) for s in self.sequence_list]) 39 | 40 | def _construct_sequence(self, sequence_name): 41 | anno_path = '{}/{}/groundtruth.txt'.format(self.base_path, sequence_name) 42 | 43 | ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64) 44 | 45 | frames_path = '{}/{}'.format(self.base_path, sequence_name) 46 | frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")] 47 | frame_list.sort(key=lambda f: int(f[:-4])) 48 | frames_list = [os.path.join(frames_path, frame) for frame in frame_list] 49 | 50 | masks = None 51 | if self.vos_mode: 52 | seq_mask_path = '{}/{}'.format(self.mask_path, sequence_name) 53 | masks = [self._load_mask(Path(self._get_anno_frame_path(seq_mask_path, f[:-3] + 'png'))) for f in 54 | frame_list[0:1]] 55 | 56 | return Sequence(sequence_name, frames_list, 'got10k', ground_truth_rect.reshape(-1, 4), 57 | ground_truth_seg=masks) 58 | 59 | @staticmethod 60 | def _load_mask(path): 61 | if not path.exists(): 62 | print('Error: Could not read: ', path, flush=True) 63 | return None 64 | im = np.array(Image.open(path)) 65 | im = np.atleast_3d(im)[..., 0] 66 | return im 67 | 68 | def _get_anno_frame_path(self, seq_path, frame_name): 69 | return os.path.join(seq_path, frame_name) 70 | 71 | def __len__(self): 72 | return len(self.sequence_list) 73 | 74 | def _get_sequence_list(self, split): 75 | with open('{}/list.txt'.format(self.base_path)) as f: 76 | sequence_list = f.read().splitlines() 77 | 78 | if split == 'ltrval': 79 | with open('{}/got10k_val_split.txt'.format(self.env_settings.dataspec_path)) as f: 80 | seq_ids = f.read().splitlines() 81 | 82 | sequence_list = [sequence_list[int(x)] for x in seq_ids] 83 | return sequence_list 84 | -------------------------------------------------------------------------------- /ltr/models/target_classifier/features.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from torchvision.models.resnet import BasicBlock, Bottleneck 5 | from ltr.models.layers.normalization import InstanceL2Norm 6 | from ltr.models.layers.transform import InterpCat 7 | 8 | 9 | def residual_basic_block(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None, 10 | interp_cat=False, final_relu=False, init_pool=False): 11 | """Construct a network block based on the BasicBlock used in ResNet 18 and 34.""" 12 | if out_dim is None: 13 | out_dim = feature_dim 14 | feat_layers = [] 15 | if interp_cat: 16 | feat_layers.append(InterpCat()) 17 | if init_pool: 18 | feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 19 | for i in range(num_blocks): 20 | odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim 21 | feat_layers.append(BasicBlock(feature_dim, odim)) 22 | if final_conv: 23 | feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False)) 24 | if final_relu: 25 | feat_layers.append(nn.ReLU(inplace=True)) 26 | if l2norm: 27 | feat_layers.append(InstanceL2Norm(scale=norm_scale)) 28 | return nn.Sequential(*feat_layers) 29 | 30 | 31 | def residual_basic_block_pool(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None, 32 | pool=True): 33 | """Construct a network block based on the BasicBlock used in ResNet.""" 34 | if out_dim is None: 35 | out_dim = feature_dim 36 | feat_layers = [] 37 | for i in range(num_blocks): 38 | odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim 39 | feat_layers.append(BasicBlock(feature_dim, odim)) 40 | if final_conv: 41 | feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False)) 42 | if pool: 43 | feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 44 | if l2norm: 45 | feat_layers.append(InstanceL2Norm(scale=norm_scale)) 46 | 47 | return nn.Sequential(*feat_layers) 48 | 49 | 50 | def residual_bottleneck(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None, 51 | interp_cat=False, final_relu=False, final_pool=False, input_dim=None, final_stride=1): 52 | """Construct a network block based on the Bottleneck block used in ResNet.""" 53 | if out_dim is None: 54 | out_dim = feature_dim 55 | if input_dim is None: 56 | input_dim = 4*feature_dim 57 | dim = input_dim 58 | feat_layers = [] 59 | if interp_cat: 60 | feat_layers.append(InterpCat()) 61 | for i in range(num_blocks): 62 | planes = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim // 4 63 | feat_layers.append(Bottleneck(dim, planes)) 64 | dim = 4*feature_dim 65 | if final_conv: 66 | feat_layers.append(nn.Conv2d(dim, out_dim, kernel_size=3, padding=1, bias=False, stride=final_stride)) 67 | if final_relu: 68 | feat_layers.append(nn.ReLU(inplace=True)) 69 | if final_pool: 70 | feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 71 | if l2norm: 72 | feat_layers.append(InstanceL2Norm(scale=norm_scale)) 73 | return nn.Sequential(*feat_layers) 74 | 75 | -------------------------------------------------------------------------------- /ltr/dataset/lasotvos.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import os 3 | import numpy as np 4 | import torch 5 | import pandas 6 | import csv 7 | from PIL import Image 8 | from ltr.dataset.lasot import Lasot 9 | from ltr.data.image_loader import jpeg4py_loader, imread_indexed 10 | 11 | 12 | class LasotVOS(Lasot): 13 | """ Lasot video object segmentation dataset. 14 | """ 15 | 16 | def __init__(self, anno_path=None, split='train'): 17 | super().__init__(split=split) 18 | self.anno_path = anno_path 19 | self.skip_interval = 5 20 | 21 | @staticmethod 22 | def _load_anno(path): 23 | if not path.exists(): 24 | print('path', path, flush=True) 25 | return None 26 | im = np.array(Image.open(path)) 27 | im = np.atleast_3d(im)[..., 0] 28 | # im = imread_indexed(path) 29 | return im 30 | 31 | def _get_anno_sequence_path(self, seq_id): 32 | return os.path.join(self.anno_path, self.sequence_list[seq_id]) 33 | 34 | def _get_anno_frame_path(self, seq_path, frame_id): 35 | frame_number = 1 + frame_id * self.skip_interval 36 | return os.path.join(seq_path, '{:08}.png'.format(frame_number)) # frames start from 1 37 | 38 | ######################### 39 | def _read_bb_anno(self, seq_path): 40 | bb_anno_file = os.path.join(seq_path, "groundtruth.txt") 41 | gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, 42 | low_memory=False).values 43 | gt = torch.tensor(gt) 44 | gt = gt[:1000:self.skip_interval] 45 | return gt 46 | 47 | def _read_target_visible(self, seq_path): 48 | # Read full occlusion and out_of_view 49 | occlusion_file = os.path.join(seq_path, "full_occlusion.txt") 50 | out_of_view_file = os.path.join(seq_path, "out_of_view.txt") 51 | 52 | with open(occlusion_file, 'r', newline='') as f: 53 | occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]]) 54 | with open(out_of_view_file, 'r') as f: 55 | out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]]) 56 | 57 | target_visible = ~occlusion & ~out_of_view 58 | 59 | target_visible = target_visible[:1000:self.skip_interval] 60 | 61 | return target_visible 62 | 63 | def _get_frame_path(self, seq_path, frame_id): 64 | frame_number = 1 + frame_id * self.skip_interval 65 | return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_number)) # frames start from 1 66 | 67 | ######################### 68 | def get_frames(self, seq_id, frame_ids, anno=None): 69 | seq_path = self._get_sequence_path(seq_id) 70 | 71 | # TODO FIX Me ?? This is not used by the LWL sampler 72 | obj_meta = None 73 | # obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]] 74 | 75 | frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids] 76 | 77 | if anno is None: 78 | anno = self.get_sequence_info(seq_id) 79 | 80 | anno_frames = {} 81 | for key, value in anno.items(): 82 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 83 | 84 | anno_seq_path = self._get_anno_sequence_path(seq_id) 85 | 86 | labels = [self._load_anno(Path(self._get_anno_frame_path(anno_seq_path, f))) for f in frame_ids] 87 | labels = [torch.Tensor(lb) for lb in labels] 88 | anno_frames['mask'] = labels 89 | 90 | return frame_list, anno_frames, obj_meta 91 | -------------------------------------------------------------------------------- /ltr/dataset/synthetic_video.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .base_video_dataset import BaseVideoDataset 3 | from ltr.data.bounding_box_utils import masks_to_bboxes 4 | 5 | 6 | class SyntheticVideo(BaseVideoDataset): 7 | """ 8 | Create a synthetic video dataset from an image dataset by applying a random transformation to images. 9 | """ 10 | def __init__(self, base_image_dataset, transform=None): 11 | """ 12 | args: 13 | base_image_dataset - Image dataset used for generating synthetic videos 14 | transform - Set of transforms to be applied to the images to generate synthetic video. 15 | """ 16 | super().__init__(base_image_dataset.get_name() + '_syn_vid', base_image_dataset.root, 17 | base_image_dataset.image_loader) 18 | self.base_image_dataset = base_image_dataset 19 | self.transform = transform 20 | 21 | def get_name(self): 22 | return self.name 23 | 24 | def is_video_sequence(self): 25 | return False 26 | 27 | def has_class_info(self): 28 | return self.base_image_dataset.has_class_info() 29 | 30 | def has_occlusion_info(self): 31 | return True 32 | 33 | def get_num_sequences(self): 34 | return self.base_image_dataset.get_num_images() 35 | 36 | def get_num_classes(self): 37 | return len(self.class_list) 38 | 39 | def get_sequences_in_class(self, class_name): 40 | return self.get_images_in_class[class_name] 41 | 42 | def get_sequence_info(self, seq_id): 43 | image_info = self.base_image_dataset.get_image_info(seq_id) 44 | 45 | image_info = {k: v.unsqueeze(0) for k, v in image_info.items()} 46 | return image_info 47 | 48 | def get_class_name(self, seq_id): 49 | return self.base_image_dataset.get_class_name(seq_id) 50 | 51 | def get_frames(self, seq_id, frame_ids, anno=None): 52 | frame, anno, object_meta = self.base_image_dataset.get_image(seq_id, anno=anno) 53 | 54 | frame_list = [frame.copy() for _ in frame_ids] 55 | 56 | if anno is None: 57 | anno = self.get_sequence_info(seq_id) 58 | 59 | anno_frames = {} 60 | for key, value in anno.items(): 61 | anno_frames[key] = [value[0].clone() for f_id in frame_ids] 62 | 63 | if self.transform is not None: 64 | if 'mask' in anno_frames.keys(): 65 | frame_list, anno_frames['bbox'], anno_frames['mask'] = self.transform(image=frame_list, 66 | bbox=anno_frames['bbox'], 67 | mask=anno_frames['mask'], 68 | joint=False) 69 | 70 | anno_frames['bbox'] = [masks_to_bboxes(m, fmt='t') for m in anno_frames['mask']] 71 | else: 72 | frame_list, anno_frames['bbox'] = self.transform(image=frame_list, 73 | bbox=anno_frames['bbox'], 74 | joint=False) 75 | 76 | object_meta = OrderedDict({'object_class_name': self.get_class_name(seq_id), 77 | 'motion_class': None, 78 | 'major_class': None, 79 | 'root_class': None, 80 | 'motion_adverb': None}) 81 | 82 | return frame_list, anno_frames, object_meta 83 | -------------------------------------------------------------------------------- /ltr/dataset/base_video_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from ltr.data.image_loader import jpeg4py_loader 3 | 4 | 5 | class BaseVideoDataset(torch.utils.data.Dataset): 6 | """ Base class for video datasets """ 7 | 8 | def __init__(self, name, root, image_loader=jpeg4py_loader): 9 | """ 10 | args: 11 | root - The root path to the dataset 12 | image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) 13 | is used by default. 14 | """ 15 | self.name = name 16 | self.root = root 17 | self.image_loader = image_loader 18 | 19 | self.sequence_list = [] # Contains the list of sequences. 20 | self.class_list = [] 21 | 22 | def __len__(self): 23 | """ Returns size of the dataset 24 | returns: 25 | int - number of samples in the dataset 26 | """ 27 | return self.get_num_sequences() 28 | 29 | def __getitem__(self, index): 30 | """ Not to be used! Check get_frames() instead. 31 | """ 32 | return None 33 | 34 | def is_video_sequence(self): 35 | """ Returns whether the dataset is a video dataset or an image dataset 36 | 37 | returns: 38 | bool - True if a video dataset 39 | """ 40 | return True 41 | 42 | def is_synthetic_video_dataset(self): 43 | """ Returns whether the dataset contains real videos or synthetic 44 | 45 | returns: 46 | bool - True if a video dataset 47 | """ 48 | return False 49 | 50 | def get_name(self): 51 | """ Name of the dataset 52 | 53 | returns: 54 | string - Name of the dataset 55 | """ 56 | raise NotImplementedError 57 | 58 | def get_num_sequences(self): 59 | """ Number of sequences in a dataset 60 | 61 | returns: 62 | int - number of sequences in the dataset.""" 63 | return len(self.sequence_list) 64 | 65 | def is_mot_dataset(self): 66 | return False 67 | 68 | def has_class_info(self): 69 | return False 70 | 71 | def has_occlusion_info(self): 72 | return False 73 | 74 | def get_num_classes(self): 75 | return len(self.class_list) 76 | 77 | def get_class_list(self): 78 | return self.class_list 79 | 80 | def get_sequences_in_class(self, class_name): 81 | raise NotImplementedError 82 | 83 | def has_segmentation_info(self): 84 | return False 85 | 86 | def get_sequence_info(self, seq_id): 87 | """ Returns information about a particular sequences, 88 | 89 | args: 90 | seq_id - index of the sequence 91 | 92 | returns: 93 | Dict 94 | """ 95 | raise NotImplementedError 96 | 97 | def get_frames(self, seq_id, frame_ids, anno=None): 98 | """ Get a set of frames from a particular sequence 99 | 100 | args: 101 | seq_id - index of sequence 102 | frame_ids - a list of frame numbers 103 | anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded. 104 | 105 | returns: 106 | list - List of frames corresponding to frame_ids 107 | list - List of dicts for each frame 108 | dict - A dict containing meta information about the sequence, e.g. class of the target object. 109 | 110 | """ 111 | raise NotImplementedError 112 | 113 | -------------------------------------------------------------------------------- /pytracking/evaluation/trackingnetdataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList 3 | import os 4 | from pytracking.utils.load_text import load_text 5 | from pathlib import Path 6 | from PIL import Image 7 | 8 | 9 | class TrackingNetDataset(BaseDataset): 10 | """ TrackingNet test set. 11 | 12 | Publication: 13 | TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild. 14 | Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem 15 | ECCV, 2018 16 | https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf 17 | 18 | Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit. 19 | """ 20 | def __init__(self, load_frames=True, vos_mode=False): 21 | super().__init__() 22 | self.base_path = self.env_settings.trackingnet_path 23 | self.load_frames = load_frames 24 | 25 | sets = 'TEST' 26 | if not isinstance(sets, (list, tuple)): 27 | if sets == 'TEST': 28 | sets = ['TEST'] 29 | elif sets == 'TRAIN': 30 | sets = ['TRAIN_{}'.format(i) for i in range(5)] 31 | 32 | self.sequence_list = self._list_sequences(self.base_path, sets) 33 | 34 | self.vos_mode = vos_mode 35 | 36 | self.mask_path = None 37 | if self.vos_mode: 38 | self.mask_path = self.env_settings.trackingnet_mask_path 39 | 40 | def get_sequence_list(self): 41 | return SequenceList([self._construct_sequence(set, seq_name) for set, seq_name in self.sequence_list]) 42 | 43 | def _construct_sequence(self, set, sequence_name): 44 | anno_path = '{}/{}/anno/{}.txt'.format(self.base_path, set, sequence_name) 45 | 46 | ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64, backend='numpy') 47 | 48 | if self.load_frames: 49 | frames_path = '{}/{}/frames/{}'.format(self.base_path, set, sequence_name) 50 | frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")] 51 | frame_list.sort(key=lambda f: int(f[:-4])) 52 | frames_list = [os.path.join(frames_path, frame) for frame in frame_list] 53 | else: 54 | frames_list = [] 55 | frame_list = [] 56 | 57 | masks = None 58 | if self.vos_mode: 59 | seq_mask_path = '{}/{}'.format(self.mask_path, sequence_name) 60 | masks = [self._load_mask(Path(self._get_anno_frame_path(seq_mask_path, f[:-3] + 'png'))) for f in 61 | frame_list[0:1]] 62 | 63 | return Sequence(sequence_name, frames_list, 'trackingnet', 64 | ground_truth_rect.reshape(-1, 4), ground_truth_seg=masks) 65 | 66 | @staticmethod 67 | def _load_mask(path): 68 | if not path.exists(): 69 | print('Error: Could not read: ', path, flush=True) 70 | return None 71 | im = np.array(Image.open(path)) 72 | im = np.atleast_3d(im)[..., 0] 73 | return im 74 | 75 | def _get_anno_frame_path(self, seq_path, frame_name): 76 | return os.path.join(seq_path, frame_name) 77 | 78 | def __len__(self): 79 | return len(self.sequence_list) 80 | 81 | def _list_sequences(self, root, set_ids): 82 | sequence_list = [] 83 | 84 | for s in set_ids: 85 | anno_dir = os.path.join(root, s, "anno") 86 | sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')] 87 | 88 | sequence_list += sequences_cur_set 89 | 90 | return sequence_list 91 | -------------------------------------------------------------------------------- /ltr/models/bbreg/atom.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import ltr.models.backbone as backbones 3 | import ltr.models.bbreg as bbmodels 4 | from ltr import model_constructor 5 | 6 | 7 | class ATOMnet(nn.Module): 8 | """ ATOM network module""" 9 | def __init__(self, feature_extractor, bb_regressor, bb_regressor_layer, extractor_grad=True): 10 | """ 11 | args: 12 | feature_extractor - backbone feature extractor 13 | bb_regressor - IoU prediction module 14 | bb_regressor_layer - List containing the name of the layers from feature_extractor, which are input to 15 | bb_regressor 16 | extractor_grad - Bool indicating whether backbone feature extractor requires gradients 17 | """ 18 | super(ATOMnet, self).__init__() 19 | 20 | self.feature_extractor = feature_extractor 21 | self.bb_regressor = bb_regressor 22 | self.bb_regressor_layer = bb_regressor_layer 23 | 24 | if not extractor_grad: 25 | for p in self.feature_extractor.parameters(): 26 | p.requires_grad_(False) 27 | 28 | def forward(self, train_imgs, test_imgs, train_bb, test_proposals): 29 | """ Forward pass 30 | Note: If the training is done in sequence mode, that is, test_imgs.dim() == 5, then the batch dimension 31 | corresponds to the first dimensions. test_imgs is thus of the form [sequence, batch, feature, row, col] 32 | """ 33 | num_sequences = train_imgs.shape[-4] 34 | num_train_images = train_imgs.shape[0] if train_imgs.dim() == 5 else 1 35 | num_test_images = test_imgs.shape[0] if test_imgs.dim() == 5 else 1 36 | 37 | # Extract backbone features 38 | train_feat = self.extract_backbone_features(train_imgs.reshape(-1, *train_imgs.shape[-3:])) 39 | test_feat = self.extract_backbone_features(test_imgs.reshape(-1, *test_imgs.shape[-3:])) 40 | 41 | train_feat_iou = [feat for feat in train_feat.values()] 42 | test_feat_iou = [feat for feat in test_feat.values()] 43 | 44 | # Obtain iou prediction 45 | iou_pred = self.bb_regressor(train_feat_iou, test_feat_iou, 46 | train_bb.reshape(num_train_images, num_sequences, 4), 47 | test_proposals.reshape(num_train_images, num_sequences, -1, 4)) 48 | return iou_pred 49 | 50 | def extract_backbone_features(self, im, layers=None): 51 | if layers is None: 52 | layers = self.bb_regressor_layer 53 | return self.feature_extractor(im, layers) 54 | 55 | def extract_features(self, im, layers): 56 | return self.feature_extractor(im, layers) 57 | 58 | 59 | 60 | @model_constructor 61 | def atom_resnet18(iou_input_dim=(256,256), iou_inter_dim=(256,256), backbone_pretrained=True): 62 | # backbone 63 | backbone_net = backbones.resnet18(pretrained=backbone_pretrained) 64 | 65 | # Bounding box regressor 66 | iou_predictor = bbmodels.AtomIoUNet(pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim) 67 | 68 | net = ATOMnet(feature_extractor=backbone_net, bb_regressor=iou_predictor, bb_regressor_layer=['layer2', 'layer3'], 69 | extractor_grad=False) 70 | 71 | return net 72 | 73 | 74 | @model_constructor 75 | def atom_resnet50(iou_input_dim=(256,256), iou_inter_dim=(256,256), backbone_pretrained=True): 76 | # backbone 77 | backbone_net = backbones.resnet50(pretrained=backbone_pretrained) 78 | 79 | # Bounding box regressor 80 | iou_predictor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim) 81 | 82 | net = ATOMnet(feature_extractor=backbone_net, bb_regressor=iou_predictor, bb_regressor_layer=['layer2', 'layer3'], 83 | extractor_grad=False) 84 | 85 | return net 86 | -------------------------------------------------------------------------------- /pytracking/VOT/vot.py: -------------------------------------------------------------------------------- 1 | """ 2 | \file vot.py 3 | 4 | @brief Python utility functions for VOT integration 5 | 6 | @author Luka Cehovin, Alessio Dore 7 | 8 | @date 2016, 2019 9 | 10 | """ 11 | 12 | import sys 13 | import copy 14 | import collections 15 | 16 | try: 17 | import trax 18 | except ImportError: 19 | raise Exception('TraX support not found. Please add trax module to Python path.') 20 | 21 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height']) 22 | Point = collections.namedtuple('Point', ['x', 'y']) 23 | Polygon = collections.namedtuple('Polygon', ['points']) 24 | 25 | class VOT(object): 26 | """ Base class for Python VOT integration """ 27 | def __init__(self, region_format, channels=None): 28 | """ Constructor 29 | 30 | Args: 31 | region_format: Region format options 32 | """ 33 | assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON]) 34 | 35 | if channels is None: 36 | channels = ['color'] 37 | elif channels == 'rgbd': 38 | channels = ['color', 'depth'] 39 | elif channels == 'rgbt': 40 | channels = ['color', 'ir'] 41 | elif channels == 'ir': 42 | channels = ['ir'] 43 | else: 44 | raise Exception('Illegal configuration {}.'.format(channels)) 45 | 46 | self._trax = trax.Server([region_format], [trax.Image.PATH], channels) 47 | 48 | request = self._trax.wait() 49 | assert(request.type == 'initialize') 50 | if isinstance(request.region, trax.Polygon): 51 | self._region = Polygon([Point(x[0], x[1]) for x in request.region]) 52 | else: 53 | self._region = Rectangle(*request.region.bounds()) 54 | self._image = [str(x) for k, x in request.image.items()] 55 | if len(self._image) == 1: 56 | self._image = self._image[0] 57 | self._trax.status(request.region) 58 | 59 | def region(self): 60 | """ 61 | Send configuration message to the client and receive the initialization 62 | region and the path of the first image 63 | 64 | Returns: 65 | initialization region 66 | """ 67 | 68 | return self._region 69 | 70 | def report(self, region, confidence = None): 71 | """ 72 | Report the tracking results to the client 73 | 74 | Arguments: 75 | region: region for the frame 76 | """ 77 | assert(isinstance(region, Rectangle) or isinstance(region, Polygon)) 78 | if isinstance(region, Polygon): 79 | tregion = trax.Polygon.create([(x.x, x.y) for x in region.points]) 80 | else: 81 | tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height) 82 | properties = {} 83 | if not confidence is None: 84 | properties['confidence'] = confidence 85 | self._trax.status(tregion, properties) 86 | 87 | def frame(self): 88 | """ 89 | Get a frame (image path) from client 90 | 91 | Returns: 92 | absolute path of the image 93 | """ 94 | if hasattr(self, "_image"): 95 | image = self._image 96 | del self._image 97 | return tuple(image) 98 | 99 | request = self._trax.wait() 100 | 101 | if request.type == 'frame': 102 | image = [str(x) for k, x in request.image.items()] 103 | if len(image) == 1: 104 | image = image[0] 105 | return tuple(image) 106 | else: 107 | return None 108 | 109 | 110 | def quit(self): 111 | if hasattr(self, '_trax'): 112 | self._trax.quit() 113 | 114 | def __del__(self): 115 | self.quit() 116 | 117 | -------------------------------------------------------------------------------- /ltr/data/image_loader.py: -------------------------------------------------------------------------------- 1 | import jpeg4py 2 | import cv2 as cv 3 | from PIL import Image 4 | import numpy as np 5 | 6 | davis_palette = np.repeat(np.expand_dims(np.arange(0,256), 1), 3, 1).astype(np.uint8) 7 | davis_palette[:22, :] = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], 8 | [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128], 9 | [64, 0, 0], [191, 0, 0], [64, 128, 0], [191, 128, 0], 10 | [64, 0, 128], [191, 0, 128], [64, 128, 128], [191, 128, 128], 11 | [0, 64, 0], [128, 64, 0], [0, 191, 0], [128, 191, 0], 12 | [0, 64, 128], [128, 64, 128]] 13 | 14 | 15 | def default_image_loader(path): 16 | """The default image loader, reads the image from the given path. It first tries to use the jpeg4py_loader, 17 | but reverts to the opencv_loader if the former is not available.""" 18 | if default_image_loader.use_jpeg4py is None: 19 | # Try using jpeg4py 20 | im = jpeg4py_loader(path) 21 | if im is None: 22 | default_image_loader.use_jpeg4py = False 23 | print('Using opencv_loader instead.') 24 | else: 25 | default_image_loader.use_jpeg4py = True 26 | return im 27 | if default_image_loader.use_jpeg4py: 28 | return jpeg4py_loader(path) 29 | return opencv_loader(path) 30 | 31 | default_image_loader.use_jpeg4py = None 32 | 33 | 34 | def jpeg4py_loader(path): 35 | """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py""" 36 | try: 37 | return jpeg4py.JPEG(path).decode() 38 | except Exception as e: 39 | print('ERROR: Could not read image "{}"'.format(path)) 40 | print(e) 41 | return None 42 | 43 | 44 | def opencv_loader(path): 45 | """ Read image using opencv's imread function and returns it in rgb format""" 46 | try: 47 | im = cv.imread(path, cv.IMREAD_COLOR) 48 | 49 | # convert to rgb and return 50 | return cv.cvtColor(im, cv.COLOR_BGR2RGB) 51 | except Exception as e: 52 | print('ERROR: Could not read image "{}"'.format(path)) 53 | print(e) 54 | return None 55 | 56 | 57 | def jpeg4py_loader_w_failsafe(path): 58 | """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py""" 59 | try: 60 | return jpeg4py.JPEG(path).decode() 61 | except: 62 | try: 63 | im = cv.imread(path, cv.IMREAD_COLOR) 64 | 65 | # convert to rgb and return 66 | return cv.cvtColor(im, cv.COLOR_BGR2RGB) 67 | except Exception as e: 68 | print('ERROR: Could not read image "{}"'.format(path)) 69 | print(e) 70 | return None 71 | 72 | 73 | def opencv_seg_loader(path): 74 | """ Read segmentation annotation using opencv's imread function""" 75 | try: 76 | return cv.imread(path) 77 | except Exception as e: 78 | print('ERROR: Could not read image "{}"'.format(path)) 79 | print(e) 80 | return None 81 | 82 | 83 | def imread_indexed(filename): 84 | """ Load indexed image with given filename. Used to read segmentation annotations.""" 85 | 86 | im = Image.open(filename) 87 | 88 | annotation = np.atleast_3d(im)[...,0] 89 | return annotation 90 | 91 | 92 | def imwrite_indexed(filename, array, color_palette=None): 93 | """ Save indexed image as png. Used to save segmentation annotation.""" 94 | 95 | if color_palette is None: 96 | color_palette = davis_palette 97 | 98 | if np.atleast_3d(array).shape[2] != 1: 99 | raise Exception("Saving indexed PNGs requires 2D array.") 100 | 101 | im = Image.fromarray(array.astype('uint8')) 102 | im.putpalette(color_palette.ravel()) 103 | im.save(filename, format='PNG') -------------------------------------------------------------------------------- /pytracking/parameter/rts/rts50.py: -------------------------------------------------------------------------------- 1 | from pytracking.utils import TrackerParams 2 | from pytracking.features.net_wrappers import NetWithBackbone 3 | 4 | 5 | def parameters(): 6 | params = TrackerParams() 7 | 8 | ########################################## 9 | # General parameters 10 | ########################################## 11 | 12 | params.debug = 0 13 | params.visualization = False 14 | params.multiobj_mode = 'parallel' 15 | params.use_gpu = True 16 | 17 | ########################################## 18 | # Bounding box init network 19 | ########################################## 20 | params.sta_image_sample_size = (30 * 16, 52 * 16) 21 | params.sta_search_area_scale = 4.0 22 | 23 | params.sta_net = NetWithBackbone(net_path='sta.pth.tar', 24 | use_gpu=params.use_gpu, 25 | image_format='bgr255', 26 | mean=[102.9801, 115.9465, 122.7717], 27 | std=[1.0, 1.0, 1.0] 28 | ) 29 | 30 | params.sta_net.load_network() 31 | 32 | ########################################## 33 | # Segmentation Branch parameters 34 | ########################################## 35 | params.seg_to_bb_mode = 'var' 36 | params.min_mask_area = 100 37 | 38 | params.image_sample_size = (30 * 16, 52 * 16) 39 | params.search_area_scale = 6.0 40 | params.border_mode = 'inside_major' 41 | params.patch_max_scale_change = None 42 | params.max_scale_change = (0.8, 1.2) 43 | 44 | # Learning parameters 45 | params.sample_memory_size = 32 46 | params.learning_rate = 0.1 47 | params.init_samples_minimum_weight = 0.25 48 | params.train_skipping = 20 49 | 50 | # Net optimization params 51 | params.update_target_model = True 52 | params.net_opt_iter = 20 53 | params.net_opt_update_iter = 3 54 | 55 | # Main network 56 | params.net = NetWithBackbone(net_path='rts50.pth', 57 | use_gpu=params.use_gpu, 58 | image_format='bgr255', 59 | mean=[102.9801, 115.9465, 122.7717], 60 | std=[1.0, 1.0, 1.0], 61 | clf_filter_size=4, 62 | fusion_type="add" 63 | ) 64 | params.net.load_network() 65 | 66 | ########################################## 67 | # Classifier Branch parameters 68 | ########################################## 69 | 70 | # General parameters 71 | params.clf_image_sample_size = params.image_sample_size 72 | params.clf_search_area_scale = params.search_area_scale 73 | params.clf_border_mode = params.border_mode 74 | params.clf_patch_max_scale_change = params.patch_max_scale_change 75 | 76 | # Learning parameters 77 | params.clf_sample_memory_size = 50 78 | params.clf_learning_rate = 0.01 79 | params.clf_train_skipping = 20 80 | 81 | # Net optimization 82 | params.update_classifier = True 83 | params.clf_net_opt_iter = 10 84 | params.clf_net_opt_update_iter = 2 85 | params.clf_net_opt_hn_iter = 1 86 | params.clf_output_sigma_factor = 0.25 87 | 88 | # Advanced localization parameters 89 | params.clf_advanced_localization = True 90 | params.clf_target_not_found_threshold = 0.30 91 | params.clf_target_not_found_threshold_too_small = 0.50 92 | params.clf_distractor_threshold = 10000 93 | params.clf_hard_negative_threshold = 10000 94 | params.clf_target_neighborhood_scale = 2.2 95 | params.clf_displacement_scale = 0.8 96 | params.clf_hard_negative_learning_rate = 0.02 97 | 98 | # Augmentations parameters 99 | params.clf_use_augmentation = True 100 | params.clf_augmentation = { 101 | 'fliplr': True, 102 | 'blur': [(3, 1), (1, 3), (2, 2)], 103 | } 104 | 105 | return params 106 | -------------------------------------------------------------------------------- /pytracking/tracker/atom/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from pytracking import optimization, TensorList, operation 3 | import math 4 | 5 | 6 | class FactorizedConvProblem(optimization.L2Problem): 7 | def __init__(self, training_samples: TensorList, y:TensorList, filter_reg: torch.Tensor, projection_reg, params, sample_weights: TensorList, 8 | projection_activation, response_activation): 9 | self.training_samples = training_samples 10 | self.y = y 11 | self.filter_reg = filter_reg 12 | self.sample_weights = sample_weights 13 | self.params = params 14 | self.projection_reg = projection_reg 15 | self.projection_activation = projection_activation 16 | self.response_activation = response_activation 17 | 18 | self.diag_M = self.filter_reg.concat(projection_reg) 19 | 20 | def __call__(self, x: TensorList): 21 | """ 22 | Compute residuals 23 | :param x: [filters, projection_matrices] 24 | :return: [data_terms, filter_regularizations, proj_mat_regularizations] 25 | """ 26 | filter = x[:len(x)//2] # w2 in paper 27 | P = x[len(x)//2:] # w1 in paper 28 | 29 | # Do first convolution 30 | compressed_samples = operation.conv1x1(self.training_samples, P).apply(self.projection_activation) 31 | 32 | # Do second convolution 33 | residuals = operation.conv2d(compressed_samples, filter, mode='same').apply(self.response_activation) 34 | 35 | # Compute data residuals 36 | residuals = residuals - self.y 37 | 38 | residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals 39 | 40 | # Add regularization for projection matrix 41 | residuals.extend(self.filter_reg.apply(math.sqrt) * filter) 42 | 43 | # Add regularization for projection matrix 44 | residuals.extend(self.projection_reg.apply(math.sqrt) * P) 45 | 46 | return residuals 47 | 48 | 49 | def ip_input(self, a: TensorList, b: TensorList): 50 | num = len(a) // 2 # Number of filters 51 | a_filter = a[:num] 52 | b_filter = b[:num] 53 | a_P = a[num:] 54 | b_P = b[num:] 55 | 56 | # Filter inner product 57 | # ip_out = a_filter.reshape(-1) @ b_filter.reshape(-1) 58 | ip_out = operation.conv2d(a_filter, b_filter).view(-1) 59 | 60 | # Add projection matrix part 61 | # ip_out += a_P.reshape(-1) @ b_P.reshape(-1) 62 | ip_out += operation.conv2d(a_P.view(1,-1,1,1), b_P.view(1,-1,1,1)).view(-1) 63 | 64 | # Have independent inner products for each filter 65 | return ip_out.concat(ip_out.clone()) 66 | 67 | def M1(self, x: TensorList): 68 | return x / self.diag_M 69 | 70 | 71 | class ConvProblem(optimization.L2Problem): 72 | def __init__(self, training_samples: TensorList, y:TensorList, filter_reg: torch.Tensor, sample_weights: TensorList, response_activation): 73 | self.training_samples = training_samples 74 | self.y = y 75 | self.filter_reg = filter_reg 76 | self.sample_weights = sample_weights 77 | self.response_activation = response_activation 78 | 79 | def __call__(self, x: TensorList): 80 | """ 81 | Compute residuals 82 | :param x: [filters] 83 | :return: [data_terms, filter_regularizations] 84 | """ 85 | # Do convolution and compute residuals 86 | residuals = operation.conv2d(self.training_samples, x, mode='same').apply(self.response_activation) 87 | residuals = residuals - self.y 88 | 89 | residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals 90 | 91 | # Add regularization for projection matrix 92 | residuals.extend(self.filter_reg.apply(math.sqrt) * x) 93 | 94 | return residuals 95 | 96 | def ip_input(self, a: TensorList, b: TensorList): 97 | # return a.reshape(-1) @ b.reshape(-1) 98 | # return (a * b).sum() 99 | return operation.conv2d(a, b).view(-1) 100 | --------------------------------------------------------------------------------