├── ltr
    ├── admin
    │   ├── __init__.py
    │   ├── settings.py
    │   ├── multigpu.py
    │   ├── tensorboard.py
    │   ├── stats.py
    │   ├── model_constructor.py
    │   └── environment.py
    ├── models
    │   ├── __init__.py
    │   ├── kys
    │   │   ├── __init__.py
    │   │   ├── conv_gru.py
    │   │   ├── cost_volume.py
    │   │   └── utils.py
    │   ├── lwl
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── initializer.py
    │   │   └── loss_residual_modules.py
    │   ├── meta
    │   │   └── __init__.py
    │   ├── rts
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── initializer.py
    │   │   ├── learners_fusion.py
    │   │   └── loss_residual_modules.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── normalization.py
    │   │   ├── transform.py
    │   │   ├── blocks.py
    │   │   ├── distance.py
    │   │   └── activation.py
    │   ├── tracking
    │   │   └── __init__.py
    │   ├── transformer
    │   │   ├── __init__.py
    │   │   └── position_encoding.py
    │   ├── target_candidate_matching
    │   │   └── __init__.py
    │   ├── bbreg
    │   │   ├── __init__.py
    │   │   └── atom.py
    │   ├── target_classifier
    │   │   ├── __init__.py
    │   │   └── features.py
    │   ├── loss
    │   │   ├── __init__.py
    │   │   ├── bbr_loss.py
    │   │   ├── target_candidate_matching_loss.py
    │   │   ├── segmentation.py
    │   │   └── kl_regression.py
    │   └── backbone
    │   │   ├── __init__.py
    │   │   └── base.py
    ├── train_settings
    │   ├── __init__.py
    │   ├── bbreg
    │   │   └── __init__.py
    │   ├── dimp
    │   │   └── __init__.py
    │   ├── kys
    │   │   └── __init__.py
    │   ├── lwl
    │   │   └── __init__.py
    │   ├── rts
    │   │   └── __init__.py
    │   ├── tamos
    │   │   └── __init__.py
    │   ├── tomp
    │   │   └── __init__.py
    │   └── keep_track
    │   │   └── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── bounding_box_utils.py
    │   └── image_loader.py
    ├── trainers
    │   └── __init__.py
    ├── actors
    │   ├── __init__.py
    │   ├── base_actor.py
    │   └── bbreg.py
    ├── __init__.py
    ├── dataset
    │   ├── __init__.py
    │   ├── got10kvos.py
    │   ├── base_image_dataset.py
    │   ├── ecssd.py
    │   ├── hku_is.py
    │   ├── msra10k.py
    │   ├── lasotvos.py
    │   ├── synthetic_video.py
    │   └── base_video_dataset.py
    ├── data_specs
    │   └── lasot_train_val_split.txt
    └── run_training.py
├── pytracking
    ├── analysis
    │   └── __init__.py
    ├── features
    │   ├── __init__.py
    │   ├── color.py
    │   ├── util.py
    │   └── net_wrappers.py
    ├── parameter
    │   ├── __init__.py
    │   ├── atom
    │   │   └── __init__.py
    │   ├── dimp
    │   │   ├── __init__.py
    │   │   ├── dimp18.py
    │   │   ├── dimp50.py
    │   │   ├── dimp50_vot19.py
    │   │   ├── dimp50_vot18.py
    │   │   ├── dimp18_vot18.py
    │   │   ├── prdimp18.py
    │   │   ├── super_dimp.py
    │   │   ├── prdimp50.py
    │   │   └── prdimp50_vot18.py
    │   ├── eco
    │   │   └── __init__.py
    │   ├── kys
    │   │   ├── __init__.py
    │   │   ├── default.py
    │   │   └── default_vot.py
    │   ├── lwl
    │   │   ├── __init__.py
    │   │   ├── lwl_ytvos.py
    │   │   └── lwl_boxinit.py
    │   ├── rts
    │   │   ├── __init__.py
    │   │   └── rts50.py
    │   ├── tamos
    │   │   ├── __init__.py
    │   │   ├── tamos_resnet50.py
    │   │   └── tamos_swin_base.py
    │   ├── tomp
    │   │   ├── __init__.py
    │   │   ├── tomp101.py
    │   │   └── tomp50.py
    │   ├── keep_track
    │   │   ├── __init__.py
    │   │   ├── default.py
    │   │   └── default_fast.py
    │   └── dimp_simple
    │   │   ├── __init__.py
    │   │   └── super_dimp_simple.py
    ├── tracker
    │   ├── __init__.py
    │   ├── base
    │   │   ├── __init__.py
    │   │   └── basetracker.py
    │   ├── eco
    │   │   └── __init__.py
    │   ├── atom
    │   │   ├── __init__.py
    │   │   └── optim.py
    │   ├── dimp
    │   │   └── __init__.py
    │   ├── kys
    │   │   └── __init__.py
    │   ├── lwl
    │   │   └── __init__.py
    │   ├── rts
    │   │   └── __init__.py
    │   ├── tomp
    │   │   └── __init__.py
    │   ├── tamos
    │   │   └── __init__.py
    │   ├── keep_track
    │   │   └── __init__.py
    │   └── dimp_simple
    │   │   └── __init__.py
    ├── experiments
    │   ├── __init__.py
    │   └── myexperiments.py
    ├── util_scripts
    │   ├── __init__.py
    │   ├── pack_got10k_results.py
    │   └── pack_trackingnet_results.py
    ├── utils
    │   ├── __init__.py
    │   ├── convert_vot_anno_to_rect.py
    │   ├── loading.py
    │   ├── params.py
    │   └── load_text.py
    ├── .figs
    │   ├── NFS.png
    │   ├── LaSOT.png
    │   ├── OTB-100.png
    │   ├── UAV123.png
    │   ├── visdom.png
    │   ├── ToMP_teaser.png
    │   ├── atom_overview.png
    │   ├── dimp_overview.png
    │   ├── kys_overview.png
    │   ├── lwtl_overview.png
    │   ├── rts_overview.png
    │   ├── TaMOs_overview.png
    │   ├── ETTrack_overview.png
    │   └── KeepTrack_teaser.png
    ├── libs
    │   ├── __init__.py
    │   ├── tensordict.py
    │   └── operation.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── environment.py
    │   ├── mobifacedataset.py
    │   ├── lagotdataset.py
    │   ├── got10kdataset.py
    │   └── trackingnetdataset.py
    ├── __init__.py
    ├── VOT
    │   ├── trackers.ini
    │   ├── tracker_DiMP.m
    │   └── vot.py
    ├── run_vot.py
    ├── run_experiment.py
    ├── run_video.py
    ├── run_webcam.py
    └── run_tracker.py
├── .gitmodules
└── .gitignore


/ltr/admin/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/kys/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/lwl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/meta/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/rts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/tracking/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/analysis/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/features/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/tracker/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/transformer/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/bbreg/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/dimp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/kys/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/lwl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/rts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/tamos/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/tomp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/atom/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/eco/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/kys/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/lwl/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/rts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/tamos/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/tomp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/util_scripts/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/train_settings/keep_track/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/keep_track/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/models/target_candidate_matching/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp_simple/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ltr/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .loader import LTRLoader


--------------------------------------------------------------------------------
/ltr/models/bbreg/__init__.py:
--------------------------------------------------------------------------------
1 | from .atom_iou_net import AtomIoUNet
2 | 


--------------------------------------------------------------------------------
/pytracking/tracker/base/__init__.py:
--------------------------------------------------------------------------------
1 | from .basetracker import BaseTracker


--------------------------------------------------------------------------------
/ltr/models/target_classifier/__init__.py:
--------------------------------------------------------------------------------
1 | from .linear_filter import LinearFilter
2 | 


--------------------------------------------------------------------------------
/pytracking/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .params import TrackerParams, FeatureParams, Choice


--------------------------------------------------------------------------------
/ltr/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_trainer import BaseTrainer
2 | from .ltr_trainer import LTRTrainer


--------------------------------------------------------------------------------
/pytracking/.figs/NFS.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/NFS.png


--------------------------------------------------------------------------------
/pytracking/libs/__init__.py:
--------------------------------------------------------------------------------
1 | from .tensorlist import TensorList
2 | from .tensordict import TensorDict


--------------------------------------------------------------------------------
/pytracking/.figs/LaSOT.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/LaSOT.png


--------------------------------------------------------------------------------
/pytracking/tracker/eco/__init__.py:
--------------------------------------------------------------------------------
1 | from .eco import ECO
2 | 
3 | def get_tracker_class():
4 |     return ECO


--------------------------------------------------------------------------------
/pytracking/.figs/OTB-100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/OTB-100.png


--------------------------------------------------------------------------------
/pytracking/.figs/UAV123.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/UAV123.png


--------------------------------------------------------------------------------
/pytracking/.figs/visdom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/visdom.png


--------------------------------------------------------------------------------
/pytracking/tracker/atom/__init__.py:
--------------------------------------------------------------------------------
1 | from .atom import ATOM
2 | 
3 | def get_tracker_class():
4 |     return ATOM


--------------------------------------------------------------------------------
/pytracking/tracker/dimp/__init__.py:
--------------------------------------------------------------------------------
1 | from .dimp import DiMP
2 | 
3 | def get_tracker_class():
4 |     return DiMP


--------------------------------------------------------------------------------
/pytracking/tracker/kys/__init__.py:
--------------------------------------------------------------------------------
1 | from .kys import KYS
2 | 
3 | 
4 | def get_tracker_class():
5 |     return KYS


--------------------------------------------------------------------------------
/pytracking/tracker/lwl/__init__.py:
--------------------------------------------------------------------------------
1 | from .lwl import LWL
2 | 
3 | 
4 | def get_tracker_class():
5 |     return LWL


--------------------------------------------------------------------------------
/pytracking/tracker/rts/__init__.py:
--------------------------------------------------------------------------------
1 | from .rts import RTS
2 | 
3 | 
4 | def get_tracker_class():
5 |     return RTS


--------------------------------------------------------------------------------
/pytracking/tracker/tomp/__init__.py:
--------------------------------------------------------------------------------
1 | from .tomp import ToMP
2 | 
3 | def get_tracker_class():
4 |     return ToMP


--------------------------------------------------------------------------------
/pytracking/tracker/tamos/__init__.py:
--------------------------------------------------------------------------------
1 | from .tamos import TaMOs
2 | 
3 | def get_tracker_class():
4 |     return TaMOs


--------------------------------------------------------------------------------
/pytracking/.figs/ToMP_teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/ToMP_teaser.png


--------------------------------------------------------------------------------
/pytracking/.figs/atom_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/atom_overview.png


--------------------------------------------------------------------------------
/pytracking/.figs/dimp_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/dimp_overview.png


--------------------------------------------------------------------------------
/pytracking/.figs/kys_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/kys_overview.png


--------------------------------------------------------------------------------
/pytracking/.figs/lwtl_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/lwtl_overview.png


--------------------------------------------------------------------------------
/pytracking/.figs/rts_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/rts_overview.png


--------------------------------------------------------------------------------
/pytracking/.figs/TaMOs_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/TaMOs_overview.png


--------------------------------------------------------------------------------
/pytracking/.figs/ETTrack_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/ETTrack_overview.png


--------------------------------------------------------------------------------
/pytracking/.figs/KeepTrack_teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/visionml/pytracking/HEAD/pytracking/.figs/KeepTrack_teaser.png


--------------------------------------------------------------------------------
/pytracking/tracker/keep_track/__init__.py:
--------------------------------------------------------------------------------
1 | from .keep_track import KeepTrack
2 | 
3 | def get_tracker_class():
4 |     return KeepTrack


--------------------------------------------------------------------------------
/ltr/actors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_actor import BaseActor
2 | from .bbreg import AtomActor
3 | from .tracking import DiMPActor, KYSActor


--------------------------------------------------------------------------------
/pytracking/tracker/dimp_simple/__init__.py:
--------------------------------------------------------------------------------
1 | from .dimp_simple import DiMPSimple
2 | 
3 | def get_tracker_class():
4 |     return DiMPSimple


--------------------------------------------------------------------------------
/ltr/__init__.py:
--------------------------------------------------------------------------------
1 | from .admin.loading import load_network
2 | from .admin.model_constructor import model_constructor
3 | from .admin.multigpu import MultiGPU


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "ltr/external/PreciseRoIPooling"]
2 | 	path = ltr/external/PreciseRoIPooling
3 | 	url = https://github.com/vacancy/PreciseRoIPooling.git
4 | 


--------------------------------------------------------------------------------
/pytracking/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import Sequence
2 | from .tracker import Tracker, trackerlist
3 | from .datasets import get_dataset, get_dataset_attributes


--------------------------------------------------------------------------------
/ltr/models/loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .target_classification import LBHinge, LBHingev2, IsTargetCellLoss, TrackingClassificationAccuracy, FocalLoss
2 | from .segmentation import LovaszSegLoss


--------------------------------------------------------------------------------
/ltr/models/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import resnet18, resnet50, resnet101, resnet_baby
2 | from .resnet18_vggm import resnet18_vggmconv1
3 | from .swin_transformer_flex import swin_base384_flex


--------------------------------------------------------------------------------
/ltr/admin/settings.py:
--------------------------------------------------------------------------------
 1 | from ltr.admin.environment import env_settings
 2 | 
 3 | 
 4 | class Settings:
 5 |     """ Training settings, e.g. the paths to datasets and networks."""
 6 |     def __init__(self):
 7 |         self.set_default()
 8 | 
 9 |     def set_default(self):
10 |         self.env = env_settings()
11 |         self.use_gpu = True
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/pytracking/__init__.py:
--------------------------------------------------------------------------------
1 | from pytracking.libs import TensorList, TensorDict
2 | import pytracking.libs.complex as complex
3 | import pytracking.libs.operation as operation
4 | import pytracking.libs.fourier as fourier
5 | import pytracking.libs.dcf as dcf
6 | import pytracking.libs.optimization as optimization
7 | from pytracking.run_tracker import run_tracker
8 | from pytracking.run_webcam import run_webcam
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.idea
 2 | *~
 3 | *__pycache__*
 4 | *.pyc
 5 | *.pytest_cache
 6 | *.ipynb_checkpoints/
 7 | ltr/admin/local.py
 8 | ltr/run_ltr_local.py
 9 | ltr/train_settings/*/debug.py
10 | pytracking/parameter/*/debug.py
11 | pytracking/networks/
12 | pytracking/tracking_results/
13 | pytracking/segmentation_results/
14 | pytracking/result_plots/
15 | pytracking/evaluation/local.py
16 | pytracking/run_local.py
17 | 


--------------------------------------------------------------------------------
/ltr/admin/multigpu.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def is_multi_gpu(net):
 5 |     return isinstance(net, (MultiGPU, nn.DataParallel))
 6 | 
 7 | 
 8 | class MultiGPU(nn.DataParallel):
 9 |     """Wraps a network to allow simple multi-GPU training."""
10 |     def __getattr__(self, item):
11 |         try:
12 |             return super().__getattr__(item)
13 |         except:
14 |             pass
15 |         return getattr(self.module, item)


--------------------------------------------------------------------------------
/pytracking/VOT/trackers.ini:
--------------------------------------------------------------------------------
 1 | [DiMP]  # <tracker-name>
 2 | label = DiMP
 3 | protocol = traxpython
 4 | 
 5 | command = import pytracking.run_vot as run_vot; run_vot.run_vot2020('dimp', 'dimp50')  # Set the tracker name and the parameter name
 6 | 
 7 | # Specify a path to trax python wrapper if it is not visible (separate by ; if using multiple paths)
 8 | paths = PATH_TO_PYTRACKING
 9 | 
10 | # Additional environment paths
11 | #env_PATH = <additional-env-paths>;${PATH}
12 | 
13 | 


--------------------------------------------------------------------------------
/ltr/models/lwl/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def adaptive_cat(seq, dim=0, ref_tensor=0, mode='bilinear'):
 6 |     sz = seq[ref_tensor].shape[-2:]
 7 |     t = torch.cat([interpolate(t, sz, mode=mode) for t in seq], dim=dim)
 8 |     return t
 9 | 
10 | 
11 | def interpolate(t, sz, mode='bilinear'):
12 |     sz = sz.tolist() if torch.is_tensor(sz) else sz
13 |     align = {} if mode == 'nearest' else dict(align_corners=False)
14 |     return F.interpolate(t, sz, mode=mode, **align) if t.shape[-2:] != sz else t
15 | 
16 | 


--------------------------------------------------------------------------------
/ltr/models/rts/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def adaptive_cat(seq, dim=0, ref_tensor=0, mode='bilinear'):
 6 |     sz = seq[ref_tensor].shape[-2:]
 7 |     t = torch.cat([interpolate(t, sz, mode=mode) for t in seq], dim=dim)
 8 |     return t
 9 | 
10 | 
11 | def interpolate(t, sz, mode='bilinear'):
12 |     sz = sz.tolist() if torch.is_tensor(sz) else sz
13 |     align = {} if mode == 'nearest' else dict(align_corners=False)
14 |     return F.interpolate(t, sz, mode=mode, **align) if t.shape[-2:] != sz else t
15 | 
16 | 


--------------------------------------------------------------------------------
/pytracking/experiments/myexperiments.py:
--------------------------------------------------------------------------------
 1 | from pytracking.evaluation import Tracker, get_dataset, trackerlist
 2 | 
 3 | 
 4 | def atom_nfs_uav():
 5 |     # Run three runs of ATOM on NFS and UAV datasets
 6 |     trackers = trackerlist('atom', 'default', range(3))
 7 | 
 8 |     dataset = get_dataset('nfs', 'uav')
 9 |     return trackers, dataset
10 | 
11 | 
12 | def uav_test():
13 |     # Run DiMP18, ATOM and ECO on the UAV dataset
14 |     trackers = trackerlist('dimp', 'dimp18', range(1)) + \
15 |                trackerlist('atom', 'default', range(1)) + \
16 |                trackerlist('eco', 'default', range(1))
17 | 
18 |     dataset = get_dataset('uav')
19 |     return trackers, dataset
20 | 


--------------------------------------------------------------------------------
/pytracking/features/color.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from pytracking.features.featurebase import FeatureBase
 3 | 
 4 | 
 5 | class RGB(FeatureBase):
 6 |     """RGB feature normalized to [-0.5, 0.5]."""
 7 |     def dim(self):
 8 |         return 3
 9 | 
10 |     def stride(self):
11 |         return self.pool_stride
12 | 
13 |     def extract(self, im: torch.Tensor):
14 |         return im/255 - 0.5
15 | 
16 | 
17 | class Grayscale(FeatureBase):
18 |     """Grayscale feature normalized to [-0.5, 0.5]."""
19 |     def dim(self):
20 |         return 1
21 | 
22 |     def stride(self):
23 |         return self.pool_stride
24 | 
25 |     def extract(self, im: torch.Tensor):
26 |         return torch.mean(im/255 - 0.5, 1, keepdim=True)
27 | 


--------------------------------------------------------------------------------
/ltr/models/lwl/initializer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class FilterInitializerZero(nn.Module):
 5 |     """Initializes a target model with zeros.
 6 |     args:
 7 |         filter_size:  Size of the filter.
 8 |         feature_dim:  Input feature dimentionality."""
 9 | 
10 |     def __init__(self, filter_size=1, num_filters=1, feature_dim=256, filter_groups=1):
11 |         super().__init__()
12 | 
13 |         self.filter_size = (num_filters, feature_dim//filter_groups, filter_size, filter_size)
14 | 
15 |     def forward(self, feat, mask=None):
16 |         assert feat.dim() == 5
17 |         # num_sequences = feat.shape[1] if feat.dim() == 5 else 1
18 |         num_sequences = feat.shape[1]
19 | 
20 |         return feat.new_zeros(num_sequences, *self.filter_size)
21 | 


--------------------------------------------------------------------------------
/ltr/models/rts/initializer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class FilterInitializerZero(nn.Module):
 5 |     """Initializes a target model with zeros.
 6 |     args:
 7 |         filter_size:  Size of the filter.
 8 |         feature_dim:  Input feature dimentionality."""
 9 | 
10 |     def __init__(self, filter_size=1, num_filters=1, feature_dim=256, filter_groups=1):
11 |         super().__init__()
12 | 
13 |         self.filter_size = (num_filters, feature_dim//filter_groups, filter_size, filter_size)
14 | 
15 |     def forward(self, feat, mask=None):
16 |         assert feat.dim() == 5
17 |         # num_sequences = feat.shape[1] if feat.dim() == 5 else 1
18 |         num_sequences = feat.shape[1]
19 | 
20 |         return feat.new_zeros(num_sequences, *self.filter_size)
21 | 


--------------------------------------------------------------------------------
/ltr/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | from .lasot import Lasot
 2 | from .lasotvos import LasotVOS
 3 | from .got10k import Got10k
 4 | from .got10kvos import Got10kVOS
 5 | from .tracking_net import TrackingNet
 6 | from .imagenetvid import ImagenetVID
 7 | from .coco import MSCOCO
 8 | from .coco_seq import MSCOCOSeq
 9 | from .youtubevos import YouTubeVOS
10 | from .davis import Davis
11 | from .lvis import LVIS
12 | from .ecssd import ECSSD
13 | from .msra10k import MSRA10k
14 | from .hku_is import HKUIS
15 | from .sbd import SBD
16 | from .synthetic_video import SyntheticVideo
17 | from .synthetic_video_blend import SyntheticVideoBlend
18 | from .lasot_candidate_matching import LasotCandidateMatching
19 | from .coco_mot_seq import MSCOCOMOTSeq
20 | from .imagenetvid_mot import ImagenetVIDMOT
21 | from .tao_burst import TAOBURST
22 | 


--------------------------------------------------------------------------------
/ltr/models/layers/normalization.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class InstanceL2Norm(nn.Module):
 7 |     """Instance L2 normalization.
 8 |     """
 9 |     def __init__(self, size_average=True, eps=1e-5, scale=1.0):
10 |         super().__init__()
11 |         self.size_average = size_average
12 |         self.eps = eps
13 |         self.scale = scale
14 | 
15 |     def forward(self, input):
16 |         if self.size_average:
17 |             return input * (self.scale * ((input.shape[1] * input.shape[2] * input.shape[3]) / (
18 |                         torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps)).sqrt())
19 |         else:
20 |             return input * (self.scale / (torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps).sqrt())
21 | 
22 | 


--------------------------------------------------------------------------------
/ltr/models/layers/transform.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from collections import OrderedDict
 5 | 
 6 | 
 7 | def interpolate(x, sz):
 8 |     """Interpolate 4D tensor x to size sz."""
 9 |     sz = sz.tolist() if torch.is_tensor(sz) else sz
10 |     return F.interpolate(x, sz, mode='bilinear', align_corners=False) if x.shape[-2:] != sz else x
11 | 
12 | 
13 | class InterpCat(nn.Module):
14 |     """Interpolate and concatenate features of different resolutions."""
15 | 
16 |     def forward(self, input):
17 |         if isinstance(input, (dict, OrderedDict)):
18 |             input = list(input.values())
19 | 
20 |         output_shape = None
21 |         for x in input:
22 |             if output_shape is None or output_shape[0] > x.shape[-2]:
23 |                 output_shape = x.shape[-2:]
24 | 
25 |         return torch.cat([interpolate(x, output_shape) for x in input], dim=-3)
26 | 


--------------------------------------------------------------------------------
/pytracking/run_vot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | 
 5 | env_path = os.path.join(os.path.dirname(__file__), '..')
 6 | if env_path not in sys.path:
 7 |     sys.path.append(env_path)
 8 | 
 9 | from pytracking.evaluation import Tracker
10 | 
11 | 
12 | def run_vot2020(tracker_name, tracker_param, run_id=None, debug=0, visdom_info=None):
13 |     tracker = Tracker(tracker_name, tracker_param, run_id)
14 |     tracker.run_vot2020(debug, visdom_info)
15 | 
16 | 
17 | def run_vot(tracker_name, tracker_param, run_id=None):
18 |     tracker = Tracker(tracker_name, tracker_param, run_id)
19 |     tracker.run_vot()
20 | 
21 | 
22 | def main():
23 |     parser = argparse.ArgumentParser(description='Run VOT.')
24 |     parser.add_argument('tracker_name', type=str)
25 |     parser.add_argument('tracker_param', type=str)
26 |     parser.add_argument('--run_id', type=int, default=None)
27 | 
28 |     args = parser.parse_args()
29 | 
30 |     run_vot(args.tracker_name, args.tracker_param, args.run_id)
31 | 
32 | 
33 | if __name__ == '__main__':
34 |     main()
35 | 


--------------------------------------------------------------------------------
/pytracking/features/util.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from pytracking.features.featurebase import FeatureBase
 3 | 
 4 | 
 5 | class Concatenate(FeatureBase):
 6 |     """A feature that concatenates other features.
 7 |     args:
 8 |         features: List of features to concatenate.
 9 |     """
10 |     def __init__(self, features, pool_stride = None, normalize_power = None, use_for_color = True, use_for_gray = True):
11 |         super(Concatenate, self).__init__(pool_stride, normalize_power, use_for_color, use_for_gray)
12 |         self.features = features
13 | 
14 |         self.input_stride = self.features[0].stride()
15 | 
16 |         for feat in self.features:
17 |             if self.input_stride != feat.stride():
18 |                 raise ValueError('Strides for the features must be the same for a bultiresolution feature.')
19 | 
20 |     def dim(self):
21 |         return sum([f.dim() for f in self.features])
22 | 
23 |     def stride(self):
24 |         return self.pool_stride * self.input_stride
25 | 
26 |     def extract(self, im: torch.Tensor):
27 |         return torch.cat([f.get_feature(im) for f in self.features], 1)


--------------------------------------------------------------------------------
/pytracking/utils/convert_vot_anno_to_rect.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def convert_vot_anno_to_rect(vot_anno, type):
 5 |     if len(vot_anno) == 4:
 6 |         return vot_anno
 7 | 
 8 |     if type == 'union':
 9 |         x1 = min(vot_anno[0::2])
10 |         x2 = max(vot_anno[0::2])
11 |         y1 = min(vot_anno[1::2])
12 |         y2 = max(vot_anno[1::2])
13 |         return [x1, y1, x2 - x1, y2 - y1]
14 |     elif type == 'preserve_area':
15 |         if len(vot_anno) != 8:
16 |             raise ValueError
17 | 
18 |         vot_anno = np.array(vot_anno)
19 |         cx = np.mean(vot_anno[0::2])
20 |         cy = np.mean(vot_anno[1::2])
21 | 
22 |         x1 = min(vot_anno[0::2])
23 |         x2 = max(vot_anno[0::2])
24 |         y1 = min(vot_anno[1::2])
25 |         y2 = max(vot_anno[1::2])
26 | 
27 |         A1 = np.linalg.norm(vot_anno[0:2] - vot_anno[2: 4]) * np.linalg.norm(vot_anno[2: 4] - vot_anno[4:6])
28 |         A2 = (x2 - x1) * (y2 - y1)
29 |         s = np.sqrt(A1 / A2)
30 |         w = s * (x2 - x1) + 1
31 |         h = s * (y2 - y1) + 1
32 | 
33 |         x = cx - 0.5*w
34 |         y = cy - 0.5*h
35 |         return [x, y, w, h]
36 |     else:
37 |         raise ValueError
38 | 


--------------------------------------------------------------------------------
/pytracking/tracker/base/basetracker.py:
--------------------------------------------------------------------------------
 1 | from _collections import OrderedDict
 2 | 
 3 | class BaseTracker:
 4 |     """Base class for all trackers."""
 5 | 
 6 |     def __init__(self, params):
 7 |         self.params = params
 8 |         self.visdom = None
 9 | 
10 | 
11 |     def predicts_segmentation_mask(self):
12 |         return False
13 | 
14 | 
15 |     def initialize(self, image, info: dict) -> dict:
16 |         """Overload this function in your tracker. This should initialize the model."""
17 |         raise NotImplementedError
18 | 
19 | 
20 |     def track(self, image, info: dict = None) -> dict:
21 |         """Overload this function in your tracker. This should track in the frame and update the model."""
22 |         raise NotImplementedError
23 | 
24 | 
25 |     def visdom_draw_tracking(self, image, box, segmentation=None):
26 |         if box is None:
27 |             box = []
28 |         elif isinstance(box, OrderedDict):
29 |             box = [v for k, v in box.items()]
30 |         elif isinstance(box, list):
31 |             box = box
32 |         else:
33 |             box = (box,)
34 |         if segmentation is None:
35 |             self.visdom.register((image, *box), 'Tracking', 1, 'Tracking')
36 |         else:
37 |             self.visdom.register((image, *box, segmentation), 'Tracking', 1, 'Tracking')


--------------------------------------------------------------------------------
/pytracking/VOT/tracker_DiMP.m:
--------------------------------------------------------------------------------
 1 | % Set path to the python in the pytracking conda environment
 2 | python_path = 'PATH_TO_CONDA_INSTALLATION/envs/pytracking/bin/python';
 3 | 
 4 | % Set path to pytracking
 5 | pytracking_path = 'PATH_TO_VISIONML/pytracking';
 6 | 
 7 | % Set path to trax installation. Check
 8 | % https://trax.readthedocs.io/en/latest/tutorial_compiling.html for
 9 | % compilation information
10 | trax_path = 'PATH_TO_VOT_TOOLKIT/native/trax';
11 | 
12 | tracker_name = 'dimp';          % Name of the tracker to evaluate
13 | runfile_name = 'dimp18_vot';    % Name of the parameter file to use
14 | debug = 0;
15 | 
16 | %%
17 | tracker_label = [tracker_name, '_', runfile_name];
18 | 
19 | % Generate python command
20 | tracker_command = sprintf(['%s -c "import sys; sys.path.append(''%s'');', ...
21 |                            'sys.path.append(''%s/support/python'');', ...
22 |                            'import run_vot;', ...
23 |                            'run_vot.run_vot(''%s'', ''%s'', debug=%d)"'],...
24 |                            python_path, pytracking_path, trax_path, ...
25 |                            tracker_name, runfile_name, debug);
26 | 
27 | 
28 | tracker_interpreter = python_path;
29 | 
30 | tracker_linkpath = {[trax_path, '/build'],...
31 | 		[trax_path, '/build/support/client'],...
32 | 		[trax_path, '/build/support/opencv']};
33 | 


--------------------------------------------------------------------------------
/pytracking/parameter/lwl/lwl_ytvos.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | 
 5 | def parameters():
 6 |     params = TrackerParams()
 7 | 
 8 |     params.debug = 0
 9 |     params.visualization = False
10 | 
11 |     params.seg_to_bb_mode = 'var'
12 |     params.max_scale_change = (0.95, 1.1)
13 |     params.min_mask_area = 100
14 | 
15 |     params.use_gpu = True
16 | 
17 |     params.image_sample_size = (30 * 16, 52 * 16)
18 |     params.search_area_scale = 5.0
19 |     params.border_mode = 'inside_major'
20 |     params.patch_max_scale_change = None
21 | 
22 |     # Learning parameters
23 |     params.sample_memory_size = 32
24 |     params.learning_rate = 0.1
25 |     params.init_samples_minimum_weight = 0.25
26 |     params.train_skipping = 1
27 | 
28 |     # Net optimization params
29 |     params.update_target_model = True
30 |     params.net_opt_iter = 20
31 |     params.net_opt_update_iter = 3
32 | 
33 |     params.net = NetWithBackbone(net_path='lwl_stage2.pth',
34 |                                  use_gpu=params.use_gpu,
35 |                                  image_format='bgr255',
36 |                                  mean=[102.9801, 115.9465, 122.7717],
37 |                                  std=[1.0, 1.0, 1.0]
38 |                                  )
39 | 
40 |     return params
41 | 


--------------------------------------------------------------------------------
/ltr/admin/tensorboard.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from collections import OrderedDict
 3 | try:
 4 |     from torch.utils.tensorboard import SummaryWriter
 5 | except:
 6 |     print('WARNING: You are using tensorboardX instead sis you have a too old pytorch version.')
 7 |     from tensorboardX import SummaryWriter
 8 | 
 9 | 
10 | class TensorboardWriter:
11 |     def __init__(self, directory, loader_names):
12 |         self.directory = directory
13 |         self.writer = OrderedDict({name: SummaryWriter(os.path.join(self.directory, name)) for name in loader_names})
14 | 
15 |     def write_info(self, module_name, script_name, description):
16 |         tb_info_writer = SummaryWriter(os.path.join(self.directory, 'info'))
17 |         tb_info_writer.add_text('Modulet_name', module_name)
18 |         tb_info_writer.add_text('Script_name', script_name)
19 |         tb_info_writer.add_text('Description', description)
20 |         tb_info_writer.close()
21 | 
22 |     def write_epoch(self, stats: OrderedDict, epoch: int, ind=-1):
23 |         for loader_name, loader_stats in stats.items():
24 |             if loader_stats is None:
25 |                 continue
26 |             for var_name, val in loader_stats.items():
27 |                 if hasattr(val, 'history') and getattr(val, 'has_new_data', True):
28 |                     self.writer[loader_name].add_scalar(var_name, val.history[ind], epoch)


--------------------------------------------------------------------------------
/pytracking/libs/tensordict.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import torch
 3 | import copy
 4 | 
 5 | 
 6 | class TensorDict(OrderedDict):
 7 |     """Container mainly used for dicts of torch tensors. Extends OrderedDict with pytorch functionality."""
 8 | 
 9 |     def concat(self, other):
10 |         """Concatenates two dicts without copying internal data."""
11 |         return TensorDict(self, **other)
12 | 
13 |     def copy(self):
14 |         return TensorDict(super(TensorDict, self).copy())
15 | 
16 |     def __deepcopy__(self, memodict={}):
17 |         return TensorDict(copy.deepcopy(list(self), memodict))
18 | 
19 |     def __getattr__(self, name):
20 |         if not hasattr(torch.Tensor, name):
21 |             raise AttributeError('\'TensorDict\' object has not attribute \'{}\''.format(name))
22 | 
23 |         def apply_attr(*args, **kwargs):
24 |             return TensorDict({n: getattr(e, name)(*args, **kwargs) if hasattr(e, name) else e for n, e in self.items()})
25 |         return apply_attr
26 | 
27 |     def attribute(self, attr: str, *args):
28 |         return TensorDict({n: getattr(e, attr, *args) for n, e in self.items()})
29 | 
30 |     def apply(self, fn, *args, **kwargs):
31 |         return TensorDict({n: fn(e, *args, **kwargs) for n, e in self.items()})
32 | 
33 |     @staticmethod
34 |     def _iterable(a):
35 |         return isinstance(a, (TensorDict, list))
36 | 
37 | 


--------------------------------------------------------------------------------
/pytracking/utils/loading.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import ltr.admin.loading as ltr_loading
 3 | from pytracking.evaluation.environment import env_settings
 4 | 
 5 | 
 6 | def load_network(net_path, **kwargs):
 7 |     """Load network for tracking.
 8 |     args:
 9 |         net_path - Path to network. If it is not an absolute path, it is relative to the network_path in the local.py.
10 |                    See ltr.admin.loading.load_network for further details.
11 |         **kwargs - Additional key-word arguments that are sent to ltr.admin.loading.load_network.
12 |     """
13 |     kwargs['backbone_pretrained'] = False
14 |     if os.path.isabs(net_path):
15 |         path_full = net_path
16 |         net, _ = ltr_loading.load_network(path_full, **kwargs)
17 |     elif isinstance(env_settings().network_path, (list, tuple)):
18 |         net = None
19 |         for p in env_settings().network_path:
20 |             path_full = os.path.join(p, net_path)
21 |             try:
22 |                 net, _ = ltr_loading.load_network(path_full, **kwargs)
23 |                 break
24 |             except Exception as e:
25 |                 print(e)
26 |                 pass
27 | 
28 |         assert net is not None, 'Failed to load network'
29 |     else:
30 |         path_full = os.path.join(env_settings().network_path, net_path)
31 |         net, _ = ltr_loading.load_network(path_full, **kwargs)
32 | 
33 |     return net
34 | 


--------------------------------------------------------------------------------
/pytracking/parameter/lwl/lwl_boxinit.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | 
 5 | def parameters():
 6 |     params = TrackerParams()
 7 | 
 8 |     params.debug = 0
 9 |     params.visualization = False
10 | 
11 |     params.seg_to_bb_mode = 'var'
12 |     params.max_scale_change = (0.95, 1.1)
13 |     params.min_mask_area = 100
14 | 
15 |     params.use_gpu = True
16 | 
17 |     params.image_sample_size = (30 * 16, 52 * 16)
18 |     params.search_area_scale = 5.0
19 |     params.border_mode = 'inside_major'
20 |     params.patch_max_scale_change = None
21 | 
22 |     # Learning parameters
23 |     params.sample_memory_size = 32
24 |     params.learning_rate = 0.2
25 |     params.init_samples_minimum_weight = 0
26 |     params.train_skipping = 5
27 | 
28 |     # Net optimization params
29 |     params.update_target_model = True
30 |     params.net_opt_iter = 20
31 |     params.net_opt_update_iter = 5
32 | 
33 |     params.init_with_box = True
34 |     params.lower_init_weight = True
35 | 
36 |     params.net = NetWithBackbone(net_path='lwl_boxinit.pth',
37 |                                  use_gpu=params.use_gpu,
38 |                                  image_format='bgr255',
39 |                                  mean=[102.9801, 115.9465, 122.7717],
40 |                                  std=[1.0, 1.0, 1.0])
41 | 
42 |     params.vot_anno_conversion_type = 'preserve_area'
43 | 
44 |     return params
45 | 


--------------------------------------------------------------------------------
/ltr/models/rts/learners_fusion.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | 
 5 | 
 6 | class LearnersFusion(nn.Module):
 7 |     """  """
 8 |     def __init__(self, fusion_type):
 9 |         super().__init__()
10 |         self.fusion_type = fusion_type
11 | 
12 |         if self.fusion_type == 'concat':
13 |             self.fusion_conv1 = nn.Conv2d(32, 16, kernel_size=3, padding=1, stride=1)
14 | 
15 |         for m in self.modules():
16 |             if isinstance(m, nn.Conv2d):
17 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
18 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
19 |             elif isinstance(m, nn.BatchNorm2d):
20 |                 m.weight.data.fill_(1)
21 |                 m.bias.data.zero_()
22 | 
23 | 
24 |     def forward(self, seg_learner_out, clf_learner_out):
25 | 
26 |         assert seg_learner_out.shape == clf_learner_out.shape
27 |         assert seg_learner_out.shape[0] == 1
28 | 
29 |         if self.fusion_type == 'add':
30 |             return seg_learner_out + clf_learner_out
31 | 
32 |         if self.fusion_type == 'concat':
33 |             concat_output = torch.cat([seg_learner_out, clf_learner_out], dim=2)
34 |             concat_output = concat_output.squeeze(0)
35 |             concat_output = self.fusion_conv1(concat_output)
36 |             concat_output = concat_output.unsqueeze(0)
37 | 
38 |             return concat_output
39 | 
40 |         print("Type of fusion not recognized")
41 |         assert False
42 | 
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/ltr/actors/base_actor.py:
--------------------------------------------------------------------------------
 1 | from pytracking import TensorDict
 2 | 
 3 | 
 4 | class BaseActor:
 5 |     """ Base class for actor. The actor class handles the passing of the data through the network
 6 |     and calculation the loss"""
 7 |     def __init__(self, net, objective):
 8 |         """
 9 |         args:
10 |             net - The network to train
11 |             objective - The loss function
12 |         """
13 |         self.net = net
14 |         self.objective = objective
15 | 
16 |     def __call__(self, data: TensorDict):
17 |         """ Called in each training iteration. Should pass in input data through the network, calculate the loss, and
18 |         return the training stats for the input data
19 |         args:
20 |             data - A TensorDict containing all the necessary data blocks.
21 | 
22 |         returns:
23 |             loss    - loss for the input data
24 |             stats   - a dict containing detailed losses
25 |         """
26 |         raise NotImplementedError
27 | 
28 |     def to(self, device):
29 |         """ Move the network to device
30 |         args:
31 |             device - device to use. 'cpu' or 'cuda'
32 |         """
33 |         self.net.to(device)
34 | 
35 |     def train(self, mode=True):
36 |         """ Set whether the network is in train mode.
37 |         args:
38 |             mode (True) - Bool specifying whether in training mode.
39 |         """
40 |         self.net.train(mode)
41 | 
42 |     def eval(self):
43 |         """ Set network to eval mode"""
44 |         self.train(False)


--------------------------------------------------------------------------------
/pytracking/utils/params.py:
--------------------------------------------------------------------------------
 1 | from pytracking import TensorList
 2 | import random
 3 | 
 4 | 
 5 | class TrackerParams:
 6 |     """Class for tracker parameters."""
 7 |     def set_default_values(self, default_vals: dict):
 8 |         for name, val in default_vals.items():
 9 |             if not hasattr(self, name):
10 |                 setattr(self, name, val)
11 | 
12 |     def get(self, name: str, *default):
13 |         """Get a parameter value with the given name. If it does not exists, it return the default value given as a
14 |         second argument or returns an error if no default value is given."""
15 |         if len(default) > 1:
16 |             raise ValueError('Can only give one default value.')
17 | 
18 |         if not default:
19 |             return getattr(self, name)
20 | 
21 |         return getattr(self, name, default[0])
22 | 
23 |     def has(self, name: str):
24 |         """Check if there exist a parameter with the given name."""
25 |         return hasattr(self, name)
26 | 
27 | 
28 | class FeatureParams:
29 |     """Class for feature specific parameters"""
30 |     def __init__(self, *args, **kwargs):
31 |         if len(args) > 0:
32 |             raise ValueError
33 | 
34 |         for name, val in kwargs.items():
35 |             if isinstance(val, list):
36 |                 setattr(self, name, TensorList(val))
37 |             else:
38 |                 setattr(self, name, val)
39 | 
40 | 
41 | def Choice(*args):
42 |     """Can be used to sample random parameter values."""
43 |     return random.choice(args)
44 | 


--------------------------------------------------------------------------------
/ltr/models/layers/blocks.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | def conv_block(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1, bias=True,
 5 |                batch_norm=True, relu=True, padding_mode='zeros'):
 6 |     layers = []
 7 |     assert padding_mode == 'zeros' or padding_mode == 'replicate'
 8 | 
 9 |     if padding_mode == 'replicate' and padding > 0:
10 |         assert isinstance(padding, int)
11 |         layers.append(nn.ReflectionPad2d(padding))
12 |         padding = 0
13 | 
14 |     layers.append(nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
15 |                   padding=padding, dilation=dilation, bias=bias))
16 |     if batch_norm:
17 |         layers.append(nn.BatchNorm2d(out_planes))
18 |     if relu:
19 |         layers.append(nn.ReLU(inplace=True))
20 |     return nn.Sequential(*layers)
21 | 
22 | 
23 | class LinearBlock(nn.Module):
24 |     def __init__(self, in_planes, out_planes, input_sz, bias=True, batch_norm=True, relu=True):
25 |         super().__init__()
26 |         self.linear = nn.Linear(in_planes*input_sz*input_sz, out_planes, bias=bias)
27 |         self.bn = nn.BatchNorm2d(out_planes) if batch_norm else None
28 |         self.relu = nn.ReLU(inplace=True) if relu else None
29 | 
30 |     def forward(self, x):
31 |         x = self.linear(x.reshape(x.shape[0], -1))
32 |         if self.bn is not None:
33 |             x = self.bn(x.reshape(x.shape[0], x.shape[1], 1, 1))
34 |         if self.relu is not None:
35 |             x = self.relu(x)
36 |         return x.reshape(x.shape[0], -1)


--------------------------------------------------------------------------------
/ltr/models/layers/distance.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class DistanceMap(nn.Module):
 7 |     """Generate a distance map from a origin center location.
 8 |     args:
 9 |         num_bins:  Number of bins in the map.
10 |         bin_displacement:  Displacement of the bins.
11 |     """
12 |     def __init__(self, num_bins, bin_displacement=1.0):
13 |         super().__init__()
14 |         self.num_bins = num_bins
15 |         self.bin_displacement = bin_displacement
16 | 
17 |     def forward(self, center, output_sz):
18 |         """Create the distance map.
19 |         args:
20 |             center: Torch tensor with (y,x) center position. Dims (batch, 2)
21 |             output_sz: Size of output distance map. 2-dimensional tuple."""
22 | 
23 |         center = center.view(-1,2)
24 | 
25 |         bin_centers = torch.arange(self.num_bins, dtype=torch.float32, device=center.device).view(1, -1, 1, 1)
26 | 
27 |         k0 = torch.arange(output_sz[0], dtype=torch.float32, device=center.device).view(1,1,-1,1)
28 |         k1 = torch.arange(output_sz[1], dtype=torch.float32, device=center.device).view(1,1,1,-1)
29 | 
30 |         d0 = k0 - center[:,0].view(-1,1,1,1)
31 |         d1 = k1 - center[:,1].view(-1,1,1,1)
32 | 
33 |         dist = torch.sqrt(d0*d0 + d1*d1)
34 |         bin_diff = dist / self.bin_displacement - bin_centers
35 | 
36 |         bin_val = torch.cat((F.relu(1.0 - torch.abs(bin_diff[:,:-1,:,:]), inplace=True),
37 |                              (1.0 + bin_diff[:,-1:,:,:]).clamp(0, 1)), dim=1)
38 | 
39 |         return bin_val
40 | 
41 | 
42 | 


--------------------------------------------------------------------------------
/pytracking/utils/load_text.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def load_text_numpy(path, delimiter, dtype):
 6 |     if isinstance(delimiter, (tuple, list)):
 7 |         for d in delimiter:
 8 |             try:
 9 |                 ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype)
10 |                 return ground_truth_rect
11 |             except:
12 |                 pass
13 | 
14 |         raise Exception('Could not read file {}'.format(path))
15 |     else:
16 |         ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype)
17 |         return ground_truth_rect
18 | 
19 | 
20 | def load_text_pandas(path, delimiter, dtype):
21 |     if isinstance(delimiter, (tuple, list)):
22 |         for d in delimiter:
23 |             try:
24 |                 ground_truth_rect = pd.read_csv(path, delimiter=d, header=None, dtype=dtype, na_filter=False,
25 |                                                 low_memory=False).values
26 |                 return ground_truth_rect
27 |             except Exception as e:
28 |                 pass
29 | 
30 |         raise Exception('Could not read file {}'.format(path))
31 |     else:
32 |         ground_truth_rect = pd.read_csv(path, delimiter=delimiter, header=None, dtype=dtype, na_filter=False,
33 |                                         low_memory=False).values
34 |         return ground_truth_rect
35 | 
36 | 
37 | def load_text(path, delimiter=' ', dtype=np.float32, backend='numpy'):
38 |     if backend == 'numpy':
39 |         return load_text_numpy(path, delimiter, dtype)
40 |     elif backend == 'pandas':
41 |         return load_text_pandas(path, delimiter, dtype)
42 | 


--------------------------------------------------------------------------------
/pytracking/run_experiment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import importlib
 5 | 
 6 | env_path = os.path.join(os.path.dirname(__file__), '..')
 7 | if env_path not in sys.path:
 8 |     sys.path.append(env_path)
 9 | 
10 | from pytracking.evaluation.running import run_dataset
11 | 
12 | 
13 | def run_experiment(experiment_module: str, experiment_name: str, debug=0, threads=0):
14 |     """Run experiment.
15 |     args:
16 |         experiment_module: Name of experiment module in the experiments/ folder.
17 |         experiment_name: Name of the experiment function.
18 |         debug: Debug level.
19 |         threads: Number of threads.
20 |     """
21 |     expr_module = importlib.import_module('pytracking.experiments.{}'.format(experiment_module))
22 |     expr_func = getattr(expr_module, experiment_name)
23 |     trackers, dataset = expr_func()
24 |     print('Running:  {}  {}'.format(experiment_module, experiment_name))
25 |     run_dataset(dataset, trackers, debug, threads)
26 | 
27 | 
28 | def main():
29 |     parser = argparse.ArgumentParser(description='Run tracker.')
30 |     parser.add_argument('experiment_module', type=str, help='Name of experiment module in the experiments/ folder.')
31 |     parser.add_argument('experiment_name', type=str, help='Name of the experiment function.')
32 |     parser.add_argument('--debug', type=int, default=0, help='Debug level.')
33 |     parser.add_argument('--threads', type=int, default=0, help='Number of threads.')
34 | 
35 |     args = parser.parse_args()
36 | 
37 |     run_experiment(args.experiment_module, args.experiment_name, args.debug, args.threads)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     main()
42 | 


--------------------------------------------------------------------------------
/pytracking/libs/operation.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from pytracking.libs.tensorlist import tensor_operation, TensorList
 4 | 
 5 | 
 6 | @tensor_operation
 7 | def conv2d(input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride=1, padding=0, dilation=1, groups=1, mode=None):
 8 |     """Standard conv2d. Returns the input if weight=None."""
 9 | 
10 |     if weight is None:
11 |         return input
12 | 
13 |     ind = None
14 |     if mode is not None:
15 |         if padding != 0:
16 |             raise ValueError('Cannot input both padding and mode.')
17 |         if mode == 'same':
18 |             padding = (weight.shape[2]//2, weight.shape[3]//2)
19 |             if weight.shape[2] % 2 == 0 or weight.shape[3] % 2 == 0:
20 |                 ind = (slice(-1) if weight.shape[2] % 2 == 0 else slice(None),
21 |                        slice(-1) if weight.shape[3] % 2 == 0 else slice(None))
22 |         elif mode == 'valid':
23 |             padding = (0, 0)
24 |         elif mode == 'full':
25 |             padding = (weight.shape[2]-1, weight.shape[3]-1)
26 |         else:
27 |             raise ValueError('Unknown mode for padding.')
28 | 
29 |     out = F.conv2d(input, weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups)
30 |     if ind is None:
31 |         return out
32 |     return out[:,:,ind[0],ind[1]]
33 | 
34 | 
35 | @tensor_operation
36 | def conv1x1(input: torch.Tensor, weight: torch.Tensor):
37 |     """Do a convolution with a 1x1 kernel weights. Implemented with matmul, which can be faster than using conv."""
38 | 
39 |     if weight is None:
40 |         return input
41 | 
42 |     return torch.conv2d(input, weight)
43 | 


--------------------------------------------------------------------------------
/pytracking/run_video.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | 
 5 | env_path = os.path.join(os.path.dirname(__file__), '..')
 6 | if env_path not in sys.path:
 7 |     sys.path.append(env_path)
 8 | 
 9 | from pytracking.evaluation import Tracker
10 | 
11 | 
12 | def run_video(tracker_name, tracker_param, videofile, optional_box=None, debug=None, save_results=False):
13 |     """Run the tracker on your webcam.
14 |     args:
15 |         tracker_name: Name of tracking method.
16 |         tracker_param: Name of parameter file.
17 |         debug: Debug level.
18 |     """
19 |     tracker = Tracker(tracker_name, tracker_param)
20 |     tracker.run_video_generic(videofilepath=videofile, optional_box=optional_box, debug=debug, save_results=save_results)
21 | 
22 | def main():
23 |     parser = argparse.ArgumentParser(description='Run the tracker on your webcam.')
24 |     parser.add_argument('tracker_name', type=str, help='Name of tracking method.')
25 |     parser.add_argument('tracker_param', type=str, help='Name of parameter file.')
26 |     parser.add_argument('videofile', type=str, help='path to a video file.')
27 |     parser.add_argument('--optional_box', type=float, default=None, nargs="+", help='optional_box with format x y w h.')
28 |     parser.add_argument('--debug', type=int, default=0, help='Debug level.')
29 |     parser.add_argument('--save_results', dest='save_results', action='store_true', help='Save bounding boxes')
30 |     parser.set_defaults(save_results=False)
31 | 
32 |     args = parser.parse_args()
33 | 
34 |     run_video(args.tracker_name, args.tracker_param,args.videofile, args.optional_box, args.debug, args.save_results)
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     main()
39 | 


--------------------------------------------------------------------------------
/pytracking/run_webcam.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | 
 5 | env_path = os.path.join(os.path.dirname(__file__), '..')
 6 | if env_path not in sys.path:
 7 |     sys.path.append(env_path)
 8 | 
 9 | from pytracking.evaluation import Tracker
10 | 
11 | 
12 | def run_webcam(tracker_name, tracker_param, debug=None, visdom_info=None):
13 |     """Run the tracker on your webcam.
14 |     args:
15 |         tracker_name: Name of tracking method.
16 |         tracker_param: Name of parameter file.
17 |         debug: Debug level.
18 |         visdom_info: Dict optionally containing 'use_visdom', 'server' and 'port' for Visdom visualization.
19 |     """
20 |     visdom_info = {} if visdom_info is None else visdom_info
21 |     tracker = Tracker(tracker_name, tracker_param)
22 |     tracker.run_video_generic(debug=debug, visdom_info=visdom_info)
23 | 
24 | 
25 | def main():
26 |     parser = argparse.ArgumentParser(description='Run the tracker on your webcam.')
27 |     parser.add_argument('tracker_name', type=str, help='Name of tracking method.')
28 |     parser.add_argument('tracker_param', type=str, help='Name of parameter file.')
29 |     parser.add_argument('--debug', type=int, default=0, help='Debug level.')
30 |     parser.add_argument('--use_visdom', type=bool, default=True, help='Flag to enable visdom')
31 |     parser.add_argument('--visdom_server', type=str, default='127.0.0.1', help='Server for visdom')
32 |     parser.add_argument('--visdom_port', type=int, default=8097, help='Port for visdom')
33 | 
34 |     args = parser.parse_args()
35 | 
36 |     visdom_info = {'use_visdom': args.use_visdom, 'server': args.visdom_server, 'port': args.visdom_port}
37 |     run_webcam(args.tracker_name, args.tracker_param, args.debug, visdom_info)
38 | 
39 | 
40 | if __name__ == '__main__':
41 |     main()


--------------------------------------------------------------------------------
/ltr/models/lwl/loss_residual_modules.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | import ltr.models.layers.filter as filter_layer
 5 | from pytracking import TensorList
 6 | 
 7 | 
 8 | class LWTLResidual(nn.Module):
 9 |     """ Computes the residuals W(y_t)*(T_tau(x_t) - E(y_t) and lambda*tau in the few-shot learner loss (3) in the
10 |     paper """
11 |     def __init__(self, init_filter_reg=1e-2, filter_dilation_factors=None):
12 |         super().__init__()
13 |         self.filter_reg = nn.Parameter(init_filter_reg * torch.ones(1))
14 |         self.filter_dilation_factors = filter_dilation_factors
15 | 
16 |     def forward(self, meta_parameter: TensorList, feat, label, sample_weight=None):
17 |         # Assumes multiple filters, i.e.  (sequences, filters, feat_dim, fH, fW)
18 |         filter = meta_parameter[0]
19 | 
20 |         num_images = feat.shape[0]
21 |         num_sequences = feat.shape[1] if feat.dim() == 5 else 1
22 | 
23 |         # Compute scores
24 |         scores = filter_layer.apply_filter(feat, filter, dilation_factors=self.filter_dilation_factors)
25 | 
26 |         if sample_weight is None:
27 |             sample_weight = math.sqrt(1.0 / num_images)
28 |         elif isinstance(sample_weight, torch.Tensor):
29 |             if sample_weight.numel() == scores.numel():
30 |                 sample_weight = sample_weight.view(scores.shape)
31 |             elif sample_weight.dim() == 1:
32 |                 sample_weight = sample_weight.view(-1, 1, 1, 1, 1)
33 | 
34 |         label = label.view(scores.shape)
35 | 
36 |         data_residual = sample_weight * (scores - label)
37 | 
38 |         # Compute regularization residual. Put batch in second dimension
39 |         reg_residual = self.filter_reg*filter.view(1, num_sequences, -1)
40 | 
41 |         return TensorList([data_residual, reg_residual])
42 | 


--------------------------------------------------------------------------------
/ltr/models/rts/loss_residual_modules.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import math
 4 | import ltr.models.layers.filter as filter_layer
 5 | from pytracking import TensorList
 6 | 
 7 | 
 8 | class RTSResidual(nn.Module):
 9 |     """ Computes the residuals W(y_t)*(T_tau(x_t) - E(y_t) and lambda*tau in the few-shot learner loss (3) in the
10 |     paper """
11 |     def __init__(self, init_filter_reg=1e-2, filter_dilation_factors=None):
12 |         super().__init__()
13 |         self.filter_reg = nn.Parameter(init_filter_reg * torch.ones(1))
14 |         self.filter_dilation_factors = filter_dilation_factors
15 | 
16 |     def forward(self, meta_parameter: TensorList, feat, label, sample_weight=None):
17 |         # Assumes multiple filters, i.e.  (sequences, filters, feat_dim, fH, fW)
18 |         filter = meta_parameter[0]
19 | 
20 |         num_images = feat.shape[0]
21 |         num_sequences = feat.shape[1] if feat.dim() == 5 else 1
22 | 
23 |         # Compute scores
24 |         scores = filter_layer.apply_filter(feat, filter, dilation_factors=self.filter_dilation_factors)
25 | 
26 |         if sample_weight is None:
27 |             sample_weight = math.sqrt(1.0 / num_images)
28 |         elif isinstance(sample_weight, torch.Tensor):
29 |             if sample_weight.numel() == scores.numel():
30 |                 sample_weight = sample_weight.view(scores.shape)
31 |             elif sample_weight.dim() == 1:
32 |                 sample_weight = sample_weight.view(-1, 1, 1, 1, 1)
33 | 
34 |         label = label.view(scores.shape)
35 | 
36 |         data_residual = sample_weight * (scores - label)
37 | 
38 |         # Compute regularization residual. Put batch in second dimension
39 |         reg_residual = self.filter_reg*filter.view(1, num_sequences, -1)
40 | 
41 |         return TensorList([data_residual, reg_residual])
42 | 


--------------------------------------------------------------------------------
/ltr/data_specs/lasot_train_val_split.txt:
--------------------------------------------------------------------------------
  1 | airplane-10
  2 | basketball-10
  3 | basketball-12
  4 | basketball-13
  5 | basketball-8
  6 | bicycle-1
  7 | bird-9
  8 | boat-14
  9 | boat-19
 10 | book-17
 11 | book-4
 12 | bottle-6
 13 | bus-14
 14 | car-15
 15 | car-18
 16 | car-20
 17 | car-5
 18 | cat-11
 19 | cat-15
 20 | cat-17
 21 | cat-9
 22 | cattle-15
 23 | chameleon-10
 24 | chameleon-12
 25 | chameleon-18
 26 | crab-14
 27 | crab-17
 28 | crocodile-18
 29 | crocodile-9
 30 | cup-2
 31 | deer-19
 32 | deer-5
 33 | deer-6
 34 | electricfan-11
 35 | elephant-3
 36 | flag-11
 37 | flag-13
 38 | flag-4
 39 | flag-7
 40 | fox-10
 41 | fox-11
 42 | fox-15
 43 | fox-7
 44 | frog-8
 45 | gametarget-8
 46 | gecko-13
 47 | gecko-14
 48 | gecko-17
 49 | goldfish-1
 50 | goldfish-9
 51 | gorilla-15
 52 | gorilla-20
 53 | guitar-20
 54 | hand-14
 55 | hat-14
 56 | hippo-3
 57 | kangaroo-12
 58 | kangaroo-15
 59 | kangaroo-3
 60 | kite-19
 61 | kite-5
 62 | licenseplate-17
 63 | licenseplate-2
 64 | licenseplate-9
 65 | lion-13
 66 | microphone-12
 67 | microphone-15
 68 | microphone-8
 69 | monkey-5
 70 | mouse-12
 71 | person-20
 72 | pig-1
 73 | pig-19
 74 | pig-20
 75 | pool-11
 76 | pool-5
 77 | pool-6
 78 | rabbit-12
 79 | racing-3
 80 | robot-12
 81 | robot-4
 82 | rubicCube-8
 83 | sepia-20
 84 | shark-11
 85 | shark-8
 86 | sheep-12
 87 | sheep-13
 88 | sheep-17
 89 | sheep-20
 90 | sheep-8
 91 | skateboard-1
 92 | spider-7
 93 | spider-9
 94 | squirrel-15
 95 | squirrel-9
 96 | surfboard-14
 97 | surfboard-18
 98 | surfboard-6
 99 | swing-13
100 | tank-10
101 | tank-8
102 | tiger-16
103 | tiger-19
104 | train-6
105 | truck-11
106 | turtle-11
107 | turtle-18
108 | turtle-4
109 | turtle-7
110 | umbrella-10
111 | umbrella-12
112 | umbrella-18
113 | yoyo-11
114 | yoyo-14
115 | yoyo-20
116 | zebra-12
117 | zebra-13
118 | zebra-5
119 | zebra-6
120 | zebra-7
121 | 


--------------------------------------------------------------------------------
/ltr/models/backbone/base.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Backbone(nn.Module):
 6 |     """Base class for backbone networks. Handles freezing layers etc.
 7 |     args:
 8 |         frozen_layers  -  Name of layers to freeze. Either list of strings, 'none' or 'all'. Default: 'none'.
 9 |     """
10 |     def __init__(self, frozen_layers=()):
11 |         super().__init__()
12 | 
13 |         if isinstance(frozen_layers, str):
14 |             if frozen_layers.lower() == 'none':
15 |                 frozen_layers = ()
16 |             elif frozen_layers.lower() != 'all':
17 |                 raise ValueError('Unknown option for frozen layers: \"{}\". Should be \"all\", \"none\" or list of layer names.'.format(frozen_layers))
18 | 
19 |         self.frozen_layers = frozen_layers
20 |         self._is_frozen_nograd = False
21 | 
22 | 
23 |     def train(self, mode=True):
24 |         super().train(mode)
25 |         if mode == True:
26 |             self._set_frozen_to_eval()
27 |         if not self._is_frozen_nograd:
28 |             self._set_frozen_to_nograd()
29 |             self._is_frozen_nograd = True
30 |         return self
31 | 
32 | 
33 |     def _set_frozen_to_eval(self):
34 |         if isinstance(self.frozen_layers, str) and self.frozen_layers.lower() == 'all':
35 |             self.eval()
36 |         else:
37 |             for layer in self.frozen_layers:
38 |                 getattr(self, layer).eval()
39 | 
40 | 
41 |     def _set_frozen_to_nograd(self):
42 |         if isinstance(self.frozen_layers, str) and self.frozen_layers.lower() == 'all':
43 |             for p in self.parameters():
44 |                 p.requires_grad_(False)
45 |         else:
46 |             for layer in self.frozen_layers:
47 |                 for p in getattr(self, layer).parameters():
48 |                     p.requires_grad_(False)


--------------------------------------------------------------------------------
/pytracking/util_scripts/pack_got10k_results.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import shutil
 4 | from pytracking.evaluation.environment import env_settings
 5 | 
 6 | 
 7 | def pack_got10k_results(tracker_name, param_name, output_name):
 8 |     """ Packs got10k results into a zip folder which can be directly uploaded to the evaluation server. The packed
 9 |     file is saved in the folder env_settings().got_packed_results_path
10 | 
11 |     args:
12 |         tracker_name - name of the tracker
13 |         param_name - name of the parameter file
14 |         output_name - name of the packed zip file
15 |     """
16 |     output_path = os.path.join(env_settings().got_packed_results_path, output_name)
17 | 
18 |     if not os.path.exists(output_path):
19 |         os.makedirs(output_path)
20 | 
21 |     results_path = env_settings().results_path
22 |     for i in range(1,181):
23 |         seq_name = 'GOT-10k_Test_{:06d}'.format(i)
24 | 
25 |         seq_output_path = '{}/{}'.format(output_path, seq_name)
26 |         if not os.path.exists(seq_output_path):
27 |             os.makedirs(seq_output_path)
28 | 
29 |         for run_id in range(3):
30 |             res = np.loadtxt('{}/{}/{}_{:03d}/{}.txt'.format(results_path, tracker_name, param_name, run_id, seq_name), dtype=np.float64)
31 |             times = np.loadtxt(
32 |                 '{}/{}/{}_{:03d}/{}_time.txt'.format(results_path, tracker_name, param_name, run_id, seq_name),
33 |                 dtype=np.float64)
34 | 
35 |             np.savetxt('{}/{}_{:03d}.txt'.format(seq_output_path, seq_name, run_id+1), res, delimiter=',', fmt='%f')
36 |             np.savetxt('{}/{}_time.txt'.format(seq_output_path, seq_name), times, fmt='%f')
37 | 
38 |     # Generate ZIP file
39 |     shutil.make_archive(output_path, 'zip', output_path)
40 | 
41 |     # Remove raw text files
42 |     shutil.rmtree(output_path)
43 | 


--------------------------------------------------------------------------------
/pytracking/parameter/tamos/tamos_resnet50.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.train_feature_size = [24, 36]
13 |     params.feature_stride = 16
14 |     params.image_sample_size = [params.feature_stride*tfs for tfs in params.train_feature_size]
15 |     params.search_area_scale = 5
16 | 
17 |     # Learning parameters
18 |     params.sample_memory_size = 2
19 |     params.learning_rate = 0.01
20 |     params.init_samples_minimum_weight = 0.25
21 | 
22 |     # Net optimization params
23 |     params.update_classifier = True
24 | 
25 |     # Detection parameters
26 |     params.window_output = False
27 | 
28 |     # Init augmentation parameters
29 |     params.use_augmentation = False
30 |     params.augmentation = {}
31 | 
32 |     params.augmentation_expansion_factor = 2
33 |     params.random_shift_factor = 1/3
34 | 
35 |     # Advanced localization parameters
36 |     params.advanced_localization = True
37 |     params.target_not_found_threshold = 0.25
38 |     params.distractor_threshold = 0.8
39 |     params.hard_negative_threshold = 0.5
40 |     params.target_neighborhood_scale = 1.5
41 |     params.dispalcement_scale = 0.8
42 |     params.hard_negative_learning_rate = 0.02
43 |     params.update_scale_when_uncertain = True
44 |     params.conf_ths = 0.85
45 |     params.search_area_rescaling_at_occlusion = False
46 | 
47 |     params.net = NetWithBackbone(net_path='tamos_resnet50.pth.tar', use_gpu=params.use_gpu)
48 | 
49 |     params.vot_anno_conversion_type = 'preserve_area'
50 | 
51 |     params.use_gt_box = True
52 |     params.plot_iou = True
53 |     params.normalize_scores = True
54 | 
55 |     return params
56 | 


--------------------------------------------------------------------------------
/ltr/admin/stats.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class StatValue:
 4 |     def __init__(self):
 5 |         self.clear()
 6 | 
 7 |     def reset(self):
 8 |         self.val = 0
 9 | 
10 |     def clear(self):
11 |         self.reset()
12 |         self.history = []
13 | 
14 |     def update(self, val):
15 |         self.val = val
16 |         self.history.append(self.val)
17 | 
18 | 
19 | class AverageMeter(object):
20 |     """Computes and stores the average and current value"""
21 |     def __init__(self):
22 |         self.clear()
23 |         self.has_new_data = False
24 | 
25 |     def reset(self):
26 |         self.avg = 0
27 |         self.val = 0
28 |         self.sum = 0
29 |         self.count = 0
30 | 
31 |     def clear(self):
32 |         self.reset()
33 |         self.history = []
34 | 
35 |     def update(self, val, n=1):
36 |         self.val = val
37 |         self.sum += val * n
38 |         self.count += n
39 |         self.avg = self.sum / self.count
40 | 
41 |     def new_epoch(self):
42 |         if self.count > 0:
43 |             self.history.append(self.avg)
44 |             self.reset()
45 |             self.has_new_data = True
46 |         else:
47 |             self.has_new_data = False
48 | 
49 | 
50 | def topk_accuracy(output, target, topk=(1,)):
51 |     """Computes the precision@k for the specified values of k"""
52 |     single_input = not isinstance(topk, (tuple, list))
53 |     if single_input:
54 |         topk = (topk,)
55 | 
56 |     maxk = max(topk)
57 |     batch_size = target.size(0)
58 | 
59 |     _, pred = output.topk(maxk, 1, True, True)
60 |     pred = pred.t()
61 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
62 | 
63 |     res = []
64 |     for k in topk:
65 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)[0]
66 |         res.append(correct_k * 100.0 / batch_size)
67 | 
68 |     if single_input:
69 |         return res[0]
70 | 
71 |     return res
72 | 


--------------------------------------------------------------------------------
/pytracking/parameter/tamos/tamos_swin_base.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.train_feature_size = [24, 36]
13 |     params.feature_stride = 16
14 |     params.image_sample_size = [params.feature_stride*tfs for tfs in params.train_feature_size]
15 |     params.search_area_scale = 5
16 | 
17 |     # Learning parameters
18 |     params.sample_memory_size = 2
19 |     params.learning_rate = 0.01
20 |     params.init_samples_minimum_weight = 0.25
21 |     # params.train_skipping = 20
22 | 
23 |     # Net optimization params
24 |     params.update_classifier = True
25 | 
26 |     # Detection parameters
27 |     params.window_output = False
28 | 
29 |     # Init augmentation parameters
30 |     params.use_augmentation = False
31 |     params.augmentation = {}
32 | 
33 |     params.augmentation_expansion_factor = 2
34 |     params.random_shift_factor = 1/3
35 | 
36 |     # Advanced localization parameters
37 |     params.advanced_localization = True
38 |     params.target_not_found_threshold = 0.25
39 |     params.distractor_threshold = 0.8
40 |     params.hard_negative_threshold = 0.5
41 |     params.target_neighborhood_scale = 1.5
42 |     params.dispalcement_scale = 0.8
43 |     params.hard_negative_learning_rate = 0.02
44 |     params.update_scale_when_uncertain = True
45 |     params.conf_ths = 0.85
46 |     params.search_area_rescaling_at_occlusion = False
47 | 
48 |     params.net = NetWithBackbone(net_path='tamos_swin_base.pth.tar', use_gpu=params.use_gpu)
49 | 
50 |     params.vot_anno_conversion_type = 'preserve_area'
51 | 
52 |     params.use_gt_box = True
53 |     params.plot_iou = True
54 |     params.normalize_scores = True
55 | 
56 |     return params
57 | 


--------------------------------------------------------------------------------
/pytracking/util_scripts/pack_trackingnet_results.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import shutil
 4 | from pytracking.evaluation.environment import env_settings
 5 | from pytracking.evaluation.datasets import get_dataset
 6 | 
 7 | 
 8 | def pack_trackingnet_results(tracker_name, param_name, run_id=None, output_name=None):
 9 |     """ Packs trackingnet results into a zip folder which can be directly uploaded to the evaluation server. The packed
10 |     file is saved in the folder env_settings().tn_packed_results_path
11 | 
12 |     args:
13 |         tracker_name - name of the tracker
14 |         param_name - name of the parameter file
15 |         run_id - run id for the tracker
16 |         output_name - name of the packed zip file
17 |     """
18 | 
19 |     if output_name is None:
20 |         if run_id is None:
21 |             output_name = '{}_{}'.format(tracker_name, param_name)
22 |         else:
23 |             output_name = '{}_{}_{:03d}'.format(tracker_name, param_name, run_id)
24 | 
25 |     output_path = os.path.join(env_settings().tn_packed_results_path, output_name)
26 | 
27 |     if not os.path.exists(output_path):
28 |         os.makedirs(output_path)
29 | 
30 |     results_path = env_settings().results_path
31 | 
32 |     tn_dataset = get_dataset('trackingnet')
33 | 
34 |     for seq in tn_dataset:
35 |         seq_name = seq.name
36 | 
37 |         if run_id is None:
38 |             seq_results_path = '{}/{}/{}/{}.txt'.format(results_path, tracker_name, param_name, seq_name)
39 |         else:
40 |             seq_results_path = '{}/{}/{}_{:03d}/{}.txt'.format(results_path, tracker_name, param_name, run_id, seq_name)
41 | 
42 |         results = np.loadtxt(seq_results_path, dtype=np.float64)
43 | 
44 |         np.savetxt('{}/{}.txt'.format(output_path, seq_name), results, delimiter=',', fmt='%.2f')
45 | 
46 |     # Generate ZIP file
47 |     shutil.make_archive(output_path, 'zip', output_path)
48 | 
49 |     # Remove raw text files
50 |     shutil.rmtree(output_path)
51 | 


--------------------------------------------------------------------------------
/pytracking/parameter/tomp/tomp101.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.train_feature_size = 18
13 |     params.feature_stride = 16
14 |     params.image_sample_size = params.train_feature_size*params.feature_stride
15 |     params.search_area_scale = 5
16 |     params.border_mode = 'inside_major'
17 |     params.patch_max_scale_change = 1.5
18 | 
19 |     # Learning parameters
20 |     params.sample_memory_size = 2
21 |     params.learning_rate = 0.01
22 |     params.init_samples_minimum_weight = 0.25
23 |     params.train_skipping = 20
24 | 
25 |     # Net optimization params
26 |     params.update_classifier = True
27 |     params.net_opt_iter = 10
28 |     params.net_opt_update_iter = 2
29 |     params.net_opt_hn_iter = 1
30 | 
31 |     # Detection parameters
32 |     params.window_output = False
33 | 
34 |     # Init augmentation parameters
35 |     params.use_augmentation = False
36 |     params.augmentation = {}
37 | 
38 |     params.augmentation_expansion_factor = 2
39 |     params.random_shift_factor = 1/3
40 | 
41 |     # Advanced localization parameters
42 |     params.advanced_localization = True
43 |     params.target_not_found_threshold = 0.25
44 |     params.distractor_threshold = 0.8
45 |     params.hard_negative_threshold = 0.5
46 |     params.target_neighborhood_scale = 2.2
47 |     params.dispalcement_scale = 0.8
48 |     params.hard_negative_learning_rate = 0.02
49 |     params.update_scale_when_uncertain = True
50 |     params.conf_ths = 0.9
51 |     params.search_area_rescaling_at_occlusion = True
52 | 
53 |     params.net = NetWithBackbone(net_path='tomp101.pth.tar', use_gpu=params.use_gpu)
54 | 
55 |     params.vot_anno_conversion_type = 'preserve_area'
56 | 
57 |     params.use_gt_box = True
58 |     params.plot_iou = True
59 | 
60 |     return params
61 | 


--------------------------------------------------------------------------------
/pytracking/parameter/tomp/tomp50.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.train_feature_size = 18
13 |     params.feature_stride = 16
14 |     params.image_sample_size = params.train_feature_size*params.feature_stride
15 |     params.search_area_scale = 5
16 |     params.border_mode = 'inside_major'
17 |     params.patch_max_scale_change = 1.5
18 | 
19 |     # Learning parameters
20 |     params.sample_memory_size = 2
21 |     params.learning_rate = 0.01
22 |     params.init_samples_minimum_weight = 0.25
23 |     params.train_skipping = 20
24 | 
25 |     # Net optimization params
26 |     params.update_classifier = True
27 |     params.net_opt_iter = 10
28 |     params.net_opt_update_iter = 2
29 |     params.net_opt_hn_iter = 1
30 | 
31 |     # Detection parameters
32 |     params.window_output = False
33 | 
34 |     # Init augmentation parameters
35 |     params.use_augmentation = False
36 |     params.augmentation = {}
37 | 
38 |     params.augmentation_expansion_factor = 2
39 |     params.random_shift_factor = 1/3
40 | 
41 |     # Advanced localization parameters
42 |     params.advanced_localization = True
43 |     params.target_not_found_threshold = 0.25
44 |     params.distractor_threshold = 0.8
45 |     params.hard_negative_threshold = 0.5
46 |     params.target_neighborhood_scale = 2.2
47 |     params.dispalcement_scale = 0.8
48 |     params.hard_negative_learning_rate = 0.02
49 |     params.update_scale_when_uncertain = True
50 |     params.conf_ths = 0.9
51 |     params.search_area_rescaling_at_occlusion = True
52 | 
53 |     params.net = NetWithBackbone(net_path='tomp50.pth.tar', use_gpu=params.use_gpu)
54 | 
55 |     params.vot_anno_conversion_type = 'preserve_area'
56 | 
57 |     params.use_gt_box = True
58 |     params.plot_iou = True
59 | 
60 |     return params
61 | 


--------------------------------------------------------------------------------
/ltr/run_training.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import importlib
 5 | import multiprocessing
 6 | import cv2 as cv
 7 | import torch.backends.cudnn
 8 | 
 9 | env_path = os.path.join(os.path.dirname(__file__), '..')
10 | if env_path not in sys.path:
11 |     sys.path.append(env_path)
12 | 
13 | import ltr.admin.settings as ws_settings
14 | 
15 | 
16 | def run_training(train_module, train_name, cudnn_benchmark=True):
17 |     """Run a train scripts in train_settings.
18 |     args:
19 |         train_module: Name of module in the "train_settings/" folder.
20 |         train_name: Name of the train settings file.
21 |         cudnn_benchmark: Use cudnn benchmark or not (default is True).
22 |     """
23 | 
24 |     # This is needed to avoid strange crashes related to opencv
25 |     cv.setNumThreads(0)
26 | 
27 |     torch.backends.cudnn.benchmark = cudnn_benchmark
28 | 
29 |     print('Training:  {}  {}'.format(train_module, train_name))
30 | 
31 |     settings = ws_settings.Settings()
32 |     settings.module_name = train_module
33 |     settings.script_name = train_name
34 |     settings.project_path = 'ltr/{}/{}'.format(train_module, train_name)
35 | 
36 |     expr_module = importlib.import_module('ltr.train_settings.{}.{}'.format(train_module, train_name))
37 |     expr_func = getattr(expr_module, 'run')
38 | 
39 |     expr_func(settings)
40 | 
41 | 
42 | def main():
43 |     parser = argparse.ArgumentParser(description='Run a train scripts in train_settings.')
44 |     parser.add_argument('train_module', type=str, help='Name of module in the "train_settings/" folder.')
45 |     parser.add_argument('train_name', type=str, help='Name of the train settings file.')
46 |     parser.add_argument('--cudnn_benchmark', type=bool, default=True, help='Set cudnn benchmark on (1) or off (0) (default is on).')
47 | 
48 |     args = parser.parse_args()
49 | 
50 |     run_training(args.train_module, args.train_name, args.cudnn_benchmark)
51 | 
52 | 
53 | if __name__ == '__main__':
54 |     multiprocessing.set_start_method('spawn', force=True)
55 |     main()
56 | 


--------------------------------------------------------------------------------
/ltr/models/loss/bbr_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class GIoULoss(nn.Module):
 6 |     def __init__(self):
 7 |         super().__init__()
 8 | 
 9 |     def forward(self, pred, target, weights=None):
10 |         if pred.dim() == 4:
11 |             pred = pred.unsqueeze(0)
12 | 
13 |         pred = pred.permute(0, 1, 3, 4, 2).reshape(-1, 4) # nf x ns x x 4 x h x w
14 |         target = target.permute(0, 1, 3, 4, 2).reshape(-1, 4) #nf x ns x 4 x h x w
15 | 
16 |         pred_left = pred[:, 0]
17 |         pred_top = pred[:, 1]
18 |         pred_right = pred[:, 2]
19 |         pred_bottom = pred[:, 3]
20 | 
21 |         target_left = target[:, 0]
22 |         target_top = target[:, 1]
23 |         target_right = target[:, 2]
24 |         target_bottom = target[:, 3]
25 | 
26 |         target_area = (target_left + target_right) * \
27 |                       (target_top + target_bottom)
28 |         pred_area = (pred_left + pred_right) * \
29 |                     (pred_top + pred_bottom)
30 | 
31 |         w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right)
32 |         g_w_intersect = torch.max(pred_left, target_left) + torch.max(
33 |             pred_right, target_right)
34 |         h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top)
35 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top)
36 |         ac_union = g_w_intersect * g_h_intersect + 1e-7
37 |         area_intersect = w_intersect * h_intersect
38 |         area_union = target_area + pred_area - area_intersect + 1e-7
39 |         ious = (area_intersect) / (area_union)
40 |         gious = ious - (ac_union - area_union) / ac_union
41 | 
42 |         losses = 1 - gious
43 | 
44 |         if weights is not None and weights.sum() > 0:
45 |             weights = weights.reshape(-1) # nf x ns x 1 x h x w
46 |             loss_mean = losses[weights>0].mean()
47 |             ious = ious[weights>0]
48 |         else:
49 |             loss_mean = losses.mean()
50 | 
51 |         return loss_mean, ious
52 | 


--------------------------------------------------------------------------------
/ltr/models/layers/activation.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | def softmax_reg(x: torch.Tensor, dim, reg=None):
 8 |     """Softmax with optional denominator regularization."""
 9 |     if reg is None:
10 |         return torch.softmax(x, dim=dim)
11 |     dim %= x.dim()
12 |     if isinstance(reg, (float, int)):
13 |         reg = x.new_tensor([reg])
14 |     reg = reg.expand([1 if d==dim else x.shape[d] for d in range(x.dim())])
15 |     x = torch.cat((x, reg), dim=dim)
16 |     return torch.softmax(x, dim=dim)[[slice(-1) if d==dim else slice(None) for d in range(x.dim())]]
17 | 
18 | 
19 | 
20 | class MLU(nn.Module):
21 |     r"""MLU activation
22 |     """
23 |     def __init__(self, min_val, inplace=False):
24 |         super().__init__()
25 |         self.min_val = min_val
26 |         self.inplace = inplace
27 | 
28 |     def forward(self, input):
29 |         return F.elu(F.leaky_relu(input, 1/self.min_val, inplace=self.inplace), self.min_val, inplace=self.inplace)
30 | 
31 | 
32 | class LeakyReluPar(nn.Module):
33 |     r"""LeakyRelu parametric activation
34 |     """
35 | 
36 |     def forward(self, x, a):
37 |         return (1.0 - a)/2.0 * torch.abs(x) + (1.0 + a)/2.0 * x
38 | 
39 | class LeakyReluParDeriv(nn.Module):
40 |     r"""Derivative of the LeakyRelu parametric activation, wrt x.
41 |     """
42 | 
43 |     def forward(self, x, a):
44 |         return (1.0 - a)/2.0 * torch.sign(x.detach()) + (1.0 + a)/2.0
45 | 
46 | 
47 | class BentIdentPar(nn.Module):
48 |     r"""BentIdent parametric activation
49 |     """
50 |     def __init__(self, b=1.0):
51 |         super().__init__()
52 |         self.b = b
53 | 
54 |     def forward(self, x, a):
55 |         return (1.0 - a)/2.0 * (torch.sqrt(x*x + 4.0*self.b*self.b) - 2.0*self.b) + (1.0 + a)/2.0 * x
56 | 
57 | 
58 | class BentIdentParDeriv(nn.Module):
59 |     r"""BentIdent parametric activation deriv
60 |     """
61 |     def __init__(self, b=1.0):
62 |         super().__init__()
63 |         self.b = b
64 | 
65 |     def forward(self, x, a):
66 |         return (1.0 - a)/2.0 * (x / torch.sqrt(x*x + 4.0*self.b*self.b)) + (1.0 + a)/2.0
67 | 
68 | 


--------------------------------------------------------------------------------
/ltr/admin/model_constructor.py:
--------------------------------------------------------------------------------
 1 | from functools import wraps
 2 | import importlib
 3 | 
 4 | 
 5 | def model_constructor(f):
 6 |     """ Wraps the function 'f' which returns the network. An extra field 'constructor' is added to the network returned
 7 |     by 'f'. This field contains an instance of the  'NetConstructor' class, which contains the information needed to
 8 |     re-construct the network, such as the name of the function 'f', the function arguments etc. Thus, the network can
 9 |     be easily constructed from a saved checkpoint by calling NetConstructor.get() function.
10 |     """
11 |     @wraps(f)
12 |     def f_wrapper(*args, **kwds):
13 |         net_constr = NetConstructor(f.__name__, f.__module__, args, kwds)
14 |         output = f(*args, **kwds)
15 |         if isinstance(output, (tuple, list)):
16 |             # Assume first argument is the network
17 |             output[0].constructor = net_constr
18 |         else:
19 |             output.constructor = net_constr
20 |         return output
21 |     return f_wrapper
22 | 
23 | 
24 | class NetConstructor:
25 |     """ Class to construct networks. Takes as input the function name (e.g. atom_resnet18), the name of the module
26 |     which contains the network function (e.g. ltr.models.bbreg.atom) and the arguments for the network
27 |     function. The class object can then be stored along with the network weights to re-construct the network."""
28 |     def __init__(self, fun_name, fun_module, args, kwds):
29 |         """
30 |         args:
31 |             fun_name - The function which returns the network
32 |             fun_module - the module which contains the network function
33 |             args - arguments which are passed to the network function
34 |             kwds - arguments which are passed to the network function
35 |         """
36 |         self.fun_name = fun_name
37 |         self.fun_module = fun_module
38 |         self.args = args
39 |         self.kwds = kwds
40 | 
41 |     def get(self):
42 |         """ Rebuild the network by calling the network function with the correct arguments. """
43 |         net_module = importlib.import_module(self.fun_module)
44 |         net_fun = getattr(net_module, self.fun_name)
45 |         return net_fun(*self.args, **self.kwds)
46 | 


--------------------------------------------------------------------------------
/ltr/admin/environment.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os
 3 | from collections import OrderedDict
 4 | 
 5 | 
 6 | def create_default_local_file():
 7 |     path = os.path.join(os.path.dirname(__file__), 'local.py')
 8 | 
 9 |     empty_str = '\'\''
10 |     default_settings = OrderedDict({
11 |         'workspace_dir': empty_str,
12 |         'tensorboard_dir': 'self.workspace_dir + \'/tensorboard/\'',
13 |         'pretrained_networks': 'self.workspace_dir + \'/pretrained_networks/\'',
14 |         'pregenerated_masks': empty_str,
15 |         'lasot_dir': empty_str,
16 |         'got10k_dir': empty_str,
17 |         'trackingnet_dir': empty_str,
18 |         'coco_dir': empty_str,
19 |         'lvis_dir': empty_str,
20 |         'sbd_dir': empty_str,
21 |         'imagenet_dir': empty_str,
22 |         'imagenetdet_dir': empty_str,
23 |         'ecssd_dir': empty_str,
24 |         'hkuis_dir': empty_str,
25 |         'msra10k_dir': empty_str,
26 |         'davis_dir': empty_str,
27 |         'youtubevos_dir': empty_str,
28 |         'lasot_candidate_matching_dataset_path': empty_str})
29 | 
30 |     comment = {'workspace_dir': 'Base directory for saving network checkpoints.',
31 |                'tensorboard_dir': 'Directory for tensorboard files.'}
32 | 
33 |     with open(path, 'w') as f:
34 |         f.write('class EnvironmentSettings:\n')
35 |         f.write('    def __init__(self):\n')
36 | 
37 |         for attr, attr_val in default_settings.items():
38 |             comment_str = None
39 |             if attr in comment:
40 |                 comment_str = comment[attr]
41 |             if comment_str is None:
42 |                 f.write('        self.{} = {}\n'.format(attr, attr_val))
43 |             else:
44 |                 f.write('        self.{} = {}    # {}\n'.format(attr, attr_val, comment_str))
45 | 
46 | 
47 | def env_settings():
48 |     env_module_name = 'ltr.admin.local'
49 |     try:
50 |         env_module = importlib.import_module(env_module_name)
51 |         return env_module.EnvironmentSettings()
52 |     except:
53 |         env_file = os.path.join(os.path.dirname(__file__), 'local.py')
54 | 
55 |         create_default_local_file()
56 |         raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. Then try to run again.'.format(env_file))
57 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/dimp18.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 18*16
13 |     params.search_area_scale = 5
14 | 
15 |     # Learning parameters
16 |     params.sample_memory_size = 50
17 |     params.learning_rate = 0.01
18 |     params.init_samples_minimum_weight = 0.25
19 |     params.train_skipping = 20
20 | 
21 |     # Net optimization params
22 |     params.update_classifier = True
23 |     params.net_opt_iter = 10
24 |     params.net_opt_update_iter = 2
25 |     params.net_opt_hn_iter = 1
26 | 
27 |     # Detection parameters
28 |     params.window_output = False
29 | 
30 |     # Init augmentation parameters
31 |     params.use_augmentation = True
32 |     params.augmentation = {'fliplr': True,
33 |                            'rotate': [10, -10, 45, -45],
34 |                            'blur': [(3,1), (1, 3), (2, 2)],
35 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
36 |                            'dropout': (2, 0.2)}
37 | 
38 |     params.augmentation_expansion_factor = 2
39 |     params.random_shift_factor = 1/3
40 | 
41 |     # Advanced localization parameters
42 |     params.advanced_localization = True
43 |     params.target_not_found_threshold = 0.25
44 |     params.distractor_threshold = 0.8
45 |     params.hard_negative_threshold = 0.5
46 |     params.target_neighborhood_scale = 2.2
47 |     params.dispalcement_scale = 0.8
48 |     params.hard_negative_learning_rate = 0.02
49 |     params.update_scale_when_uncertain = True
50 | 
51 |     # IoUnet parameters
52 |     params.iounet_augmentation = False
53 |     params.iounet_use_log_scale = True
54 |     params.iounet_k = 3
55 |     params.num_init_random_boxes = 9
56 |     params.box_jitter_pos = 0.1
57 |     params.box_jitter_sz = 0.5
58 |     params.maximal_aspect_ratio = 6
59 |     params.box_refinement_iter = 5
60 |     params.box_refinement_step_length = 1
61 |     params.box_refinement_step_decay = 1
62 | 
63 |     params.net = NetWithBackbone(net_path='dimp18.pth',
64 |                                  use_gpu=params.use_gpu)
65 | 
66 |     params.vot_anno_conversion_type = 'preserve_area'
67 | 
68 |     return params
69 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/dimp50.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 18*16
13 |     params.search_area_scale = 5
14 | 
15 |     # Learning parameters
16 |     params.sample_memory_size = 50
17 |     params.learning_rate = 0.01
18 |     params.init_samples_minimum_weight = 0.25
19 |     params.train_skipping = 20
20 | 
21 |     # Net optimization params
22 |     params.update_classifier = True
23 |     params.net_opt_iter = 10
24 |     params.net_opt_update_iter = 2
25 |     params.net_opt_hn_iter = 1
26 | 
27 |     # Detection parameters
28 |     params.window_output = False
29 | 
30 |     # Init augmentation parameters
31 |     params.use_augmentation = True
32 |     params.augmentation = {'fliplr': True,
33 |                            'rotate': [10, -10, 45, -45],
34 |                            'blur': [(3,1), (1, 3), (2, 2)],
35 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
36 |                            'dropout': (2, 0.2)}
37 | 
38 |     params.augmentation_expansion_factor = 2
39 |     params.random_shift_factor = 1/3
40 | 
41 |     # Advanced localization parameters
42 |     params.advanced_localization = True
43 |     params.target_not_found_threshold = 0.25
44 |     params.distractor_threshold = 0.8
45 |     params.hard_negative_threshold = 0.5
46 |     params.target_neighborhood_scale = 2.2
47 |     params.dispalcement_scale = 0.8
48 |     params.hard_negative_learning_rate = 0.02
49 |     params.update_scale_when_uncertain = True
50 | 
51 |     # IoUnet parameters
52 |     params.iounet_augmentation = False
53 |     params.iounet_use_log_scale = True
54 |     params.iounet_k = 3
55 |     params.num_init_random_boxes = 9
56 |     params.box_jitter_pos = 0.1
57 |     params.box_jitter_sz = 0.5
58 |     params.maximal_aspect_ratio = 6
59 |     params.box_refinement_iter = 5
60 |     params.box_refinement_step_length = 1
61 |     params.box_refinement_step_decay = 1
62 | 
63 |     params.net = NetWithBackbone(net_path='dimp50.pth',
64 |                                  use_gpu=params.use_gpu)
65 | 
66 |     params.vot_anno_conversion_type = 'preserve_area'
67 | 
68 |     return params
69 | 


--------------------------------------------------------------------------------
/ltr/actors/bbreg.py:
--------------------------------------------------------------------------------
 1 | from . import BaseActor
 2 | 
 3 | 
 4 | class AtomActor(BaseActor):
 5 |     """ Actor for training the IoU-Net in ATOM"""
 6 |     def __call__(self, data):
 7 |         """
 8 |         args:
 9 |             data - The input data, should contain the fields 'train_images', 'test_images', 'train_anno',
10 |                     'test_proposals' and 'proposal_iou'.
11 | 
12 |         returns:
13 |             loss    - the training loss
14 |             states  -  dict containing detailed losses
15 |         """
16 |         # Run network to obtain IoU prediction for each proposal in 'test_proposals'
17 |         iou_pred = self.net(data['train_images'], data['test_images'], data['train_anno'], data['test_proposals'])
18 | 
19 |         iou_pred = iou_pred.view(-1, iou_pred.shape[2])
20 |         iou_gt = data['proposal_iou'].view(-1, data['proposal_iou'].shape[2])
21 | 
22 |         # Compute loss
23 |         loss = self.objective(iou_pred, iou_gt)
24 | 
25 |         # Return training stats
26 |         stats = {'Loss/total': loss.item(),
27 |                  'Loss/iou': loss.item()}
28 | 
29 |         return loss, stats
30 | 
31 | 
32 | class AtomBBKLActor(BaseActor):
33 |     """ Actor for training the IoU-Net in ATOM with BBKL"""
34 |     def __call__(self, data):
35 |         """
36 |         args:
37 |             data - The input data, should contain the fields 'train_images', 'test_images', 'train_anno',
38 |                     'test_proposals', 'proposal_density', and 'gt_density'.
39 | 
40 |         returns:
41 |             loss    - the training loss
42 |             states  -  dict containing detailed losses
43 |         """
44 |         # Run network to obtain IoU prediction for each proposal in 'test_proposals'
45 |         bb_scores = self.net(data['train_images'], data['test_images'], data['train_anno'], data['test_proposals'])
46 | 
47 |         bb_scores = bb_scores.view(-1, bb_scores.shape[2])
48 |         proposal_density = data['proposal_density'].view(-1, data['proposal_density'].shape[2])
49 |         gt_density = data['gt_density'].view(-1, data['gt_density'].shape[2])
50 | 
51 |         # Compute loss
52 |         loss = self.objective(bb_scores, sample_density=proposal_density, gt_density=gt_density, mc_dim=1)
53 | 
54 |         # Return training stats
55 |         stats = {'Loss/total': loss.item(),
56 |                  'Loss/bb_ce': loss.item()}
57 | 
58 |         return loss, stats
59 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/dimp50_vot19.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 16 * 16
13 |     params.search_area_scale = 4.5
14 | 
15 |     # Learning parameters
16 |     params.sample_memory_size = 100
17 |     params.learning_rate = 0.0075
18 |     params.init_samples_minimum_weight = 0.0
19 |     params.train_skipping = 10
20 | 
21 |     # Net optimization params
22 |     params.update_classifier = True
23 |     params.net_opt_iter = 15
24 |     params.net_opt_update_iter = 2
25 |     params.net_opt_hn_iter = 2
26 | 
27 |     # Detection parameters
28 |     params.window_output = True
29 | 
30 |     # Init augmentation parameters
31 |     params.use_augmentation = True
32 |     params.augmentation = {'fliplr': True,
33 |                            'rotate': [-5, 10, -30, 60],
34 |                            'blur': [(2, 0.2), (1, 3)],
35 |                            'relativeshift': [(0.6, 0.6), (-0.6, -0.6)],
36 |                            'dropout': (3, 0.2)}
37 | 
38 |     params.augmentation_expansion_factor = 1.4
39 |     params.random_shift_factor = 1/3
40 | 
41 |     # Advanced localization parameters
42 |     params.advanced_localization = True
43 |     params.target_not_found_threshold = 0.0
44 |     params.distractor_threshold = 100
45 |     params.hard_negative_threshold = 0.45
46 |     params.target_neighborhood_scale = 2.2
47 |     params.dispalcement_scale = 0.7
48 | 
49 |     params.perform_hn_without_windowing = True
50 | 
51 |     params.hard_negative_learning_rate = 0.02
52 |     params.update_scale_when_uncertain = True
53 | 
54 |     # IoUnet parameters
55 |     params.iounet_augmentation = False
56 |     params.iounet_use_log_scale = True
57 |     params.iounet_k = 3
58 |     params.num_init_random_boxes = 9
59 |     params.box_jitter_pos = 0.1
60 |     params.box_jitter_sz = 0.5
61 |     params.maximal_aspect_ratio = 6
62 |     params.box_refinement_iter = 3
63 |     params.box_refinement_step_length = 1
64 |     params.box_refinement_step_decay = 1
65 | 
66 |     params.net = NetWithBackbone(net_path='dimp50.pth',
67 |                                  use_gpu=params.use_gpu)
68 | 
69 |     params.vot_anno_conversion_type = 'preserve_area'
70 | 
71 |     return params
72 | 


--------------------------------------------------------------------------------
/ltr/dataset/got10kvos.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import os
 3 | import numpy as np
 4 | import torch
 5 | from PIL import Image
 6 | from ltr.dataset.got10k import Got10k
 7 | from ltr.data.image_loader import jpeg4py_loader, imread_indexed
 8 | 
 9 | 
10 | class Got10kVOS(Got10k):
11 |     """ Got10K video object segmentation dataset.
12 |     """
13 | 
14 |     def __init__(self, anno_path=None, split='train'):
15 |         super().__init__(split=split)
16 |         self.anno_path = anno_path
17 | 
18 |         # TODO this prevents a crash, because that particular sequence does not have masks.
19 |         # Once the missing mask is added, the following code can be removed (handled in base)
20 |         self.sequence_list = [i for i in self.sequence_list if i not in ['GOT-10k_Train_004419']]
21 | 
22 |         self.sequence_meta_info = self._load_meta_info()
23 |         self.seq_per_class = self._build_seq_per_class()
24 | 
25 |         self.class_list = list(self.seq_per_class.keys())
26 |         self.class_list.sort()
27 | 
28 |     @staticmethod
29 |     def _load_anno(path):
30 |         if not path.exists():
31 |             print('path', path, flush=True)
32 |             return None
33 |         im = np.array(Image.open(path))
34 |         im = np.atleast_3d(im)[..., 0]
35 |         return im
36 | 
37 |     def _get_anno_sequence_path(self, seq_id):
38 |         return os.path.join(self.anno_path, self.sequence_list[seq_id])
39 | 
40 |     def _get_anno_frame_path(self, seq_path, frame_id):
41 |         return os.path.join(seq_path, '{:08}.png'.format(frame_id + 1))  # frames start from 1
42 | 
43 |     def get_frames(self, seq_id, frame_ids, anno=None):
44 |         seq_path = self._get_sequence_path(seq_id)
45 |         obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
46 | 
47 |         frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
48 | 
49 |         if anno is None:
50 |             anno = self.get_sequence_info(seq_id)
51 | 
52 |         anno_frames = {}
53 |         for key, value in anno.items():
54 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
55 | 
56 |         anno_seq_path = self._get_anno_sequence_path(seq_id)
57 | 
58 |         labels = [self._load_anno(Path(self._get_anno_frame_path(anno_seq_path, f))) for f in frame_ids]
59 |         labels = [torch.Tensor(lb) for lb in labels]
60 |         anno_frames['mask'] = labels
61 | 
62 |         return frame_list, anno_frames, obj_meta
63 | 


--------------------------------------------------------------------------------
/ltr/models/kys/conv_gru.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from ltr.models.layers.blocks import conv_block
 4 | 
 5 | 
 6 | class ConvGRUCell(nn.Module):
 7 |     def __init__(self, input_dim, hidden_dim, kernel_size, padding_mode='zeros'):
 8 |         " Referenced from https://github.com/happyjin/ConvGRU-pytorch"
 9 |         super(ConvGRUCell, self).__init__()
10 |         self.hidden_dim = hidden_dim
11 | 
12 |         if padding_mode == 'zeros':
13 |             if not isinstance(kernel_size, (list, tuple)):
14 |                 kernel_size = (kernel_size, kernel_size)
15 | 
16 |             padding = kernel_size[0] // 2, kernel_size[1] // 2
17 |             self.conv_reset = nn.Conv2d(input_dim + hidden_dim, self.hidden_dim, kernel_size, padding=padding)
18 |             self.conv_update = nn.Conv2d(input_dim + hidden_dim, self.hidden_dim, kernel_size, padding=padding)
19 | 
20 |             self.conv_state_new = nn.Conv2d(input_dim+hidden_dim, self.hidden_dim, kernel_size, padding=padding)
21 |         else:
22 |             self.conv_reset = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1,
23 |                                          padding=int(kernel_size // 2), batch_norm=False, relu=False,
24 |                                          padding_mode=padding_mode)
25 | 
26 |             self.conv_update = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1,
27 |                                           padding=int(kernel_size // 2), batch_norm=False, relu=False,
28 |                                           padding_mode=padding_mode)
29 | 
30 |             self.conv_state_new = conv_block(input_dim + hidden_dim, hidden_dim, kernel_size=kernel_size, stride=1,
31 |                                              padding=int(kernel_size // 2), batch_norm=False, relu=False,
32 |                                              padding_mode=padding_mode)
33 | 
34 |     def forward(self, input, state_cur):
35 |         input_state_cur = torch.cat([input, state_cur], dim=1)
36 | 
37 |         reset_gate = torch.sigmoid(self.conv_reset(input_state_cur))
38 |         update_gate = torch.sigmoid(self.conv_update(input_state_cur))
39 | 
40 |         input_state_cur_reset = torch.cat([input, reset_gate*state_cur], dim=1)
41 |         state_new = torch.tanh(self.conv_state_new(input_state_cur_reset))
42 | 
43 |         state_next = (1.0 - update_gate) * state_cur + update_gate * state_new
44 |         return state_next
45 | 


--------------------------------------------------------------------------------
/ltr/models/loss/target_candidate_matching_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def recall(m, gt_m):
 6 |     mask = (gt_m > -1).float()
 7 |     return ((m == gt_m) * mask).sum(1) / mask.sum(1)
 8 | 
 9 | 
10 | def precision(m, gt_m):
11 |     mask = ((m > -1) & (gt_m >= -1)).float()
12 |     prec = ((m == gt_m) * mask).sum(1) / torch.max(mask.sum(1), torch.ones_like(mask.sum(1)))
13 |     no_match_mask = (gt_m > -1).sum(1) == 0
14 |     prec[no_match_mask] = float('NaN')
15 |     return prec
16 | 
17 | 
18 | class TargetCandidateMatchingLoss(nn.Module):
19 |     def __init__(self, nll_balancing=0.5, nll_weight=1.):
20 |         super().__init__()
21 |         self.nll_balancing = nll_balancing
22 |         self.nll_weight = nll_weight
23 | 
24 | 
25 |     def metrics(self, matches1, gt_matches1, **kwargs):
26 |         rec = recall(matches1, gt_matches1[0])
27 |         prec = precision(matches1, gt_matches1[0])
28 |         return {'match_recall': rec, 'match_precision': prec}
29 | 
30 |     def forward(self, gt_assignment, gt_matches0, gt_matches1, log_assignment, bin_score, **kwargs):
31 |         gt_assignment = gt_assignment[0]
32 |         gt_matches0 = gt_matches0[0]
33 |         gt_matches1 = gt_matches1[0]
34 | 
35 |         losses = {'total': 0}
36 | 
37 |         positive = gt_assignment.float()
38 |         neg0 = (gt_matches0 == -1).float()
39 |         neg1 = (gt_matches1 == -1).float()
40 | 
41 |         num_pos = torch.max(positive.sum((1, 2)), positive.new_tensor(1))
42 |         num_neg = torch.max(neg0.sum(1) + neg1.sum(1), neg0.new_tensor(1))
43 | 
44 |         nll_pos = -(log_assignment[:, :-1, :-1] * positive).sum((1, 2))
45 | 
46 |         nll_pos /= num_pos
47 |         nll_neg0 = -(log_assignment[:, :-1, -1] * neg0).sum(1)
48 |         nll_neg1 = -(log_assignment[:, -1, :-1] * neg1).sum(1)
49 |         nll_neg = (nll_neg0 + nll_neg1) / num_neg
50 | 
51 |         nll = (self.nll_balancing * nll_pos + (1 - self.nll_balancing) * nll_neg)
52 | 
53 |         losses['assignment_nll'] = nll
54 | 
55 |         if self.nll_weight > 0:
56 |             losses['total'] = nll * self.nll_weight
57 | 
58 |         # Some statistics
59 |         losses['nll_pos'] = nll_pos
60 |         losses['nll_neg'] = nll_neg
61 |         losses['num_matchable'] = num_pos
62 |         losses['num_unmatchable'] = num_neg
63 |         losses['sinkhorn_norm'] = log_assignment.exp()[:, :-1].sum(2).mean(1)
64 |         losses['bin_score'] = bin_score[None]
65 | 
66 |         return losses
67 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/dimp50_vot18.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 14 * 16
13 |     params.search_area_scale = 4
14 | 
15 |     # Learning parameters
16 |     params.sample_memory_size = 250
17 |     params.learning_rate = 0.0075
18 |     params.init_samples_minimum_weight = 0.0
19 |     params.train_skipping = 10
20 | 
21 |     # Net optimization params
22 |     params.update_classifier = True
23 |     params.net_opt_iter = 25
24 |     params.net_opt_update_iter = 3
25 |     params.net_opt_hn_iter = 3
26 | 
27 |     # Detection parameters
28 |     params.window_output = True
29 | 
30 |     # Init augmentation parameters
31 |     params.use_augmentation = True
32 |     params.augmentation = {'fliplr': True,
33 |                            'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60],
34 |                            'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)],
35 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)],
36 |                            'dropout': (7, 0.2)}
37 | 
38 |     params.augmentation_expansion_factor = 2
39 |     params.random_shift_factor = 1/3
40 | 
41 |     # Advanced localization parameters
42 |     params.advanced_localization = True
43 |     params.target_not_found_threshold = 0.0
44 |     params.distractor_threshold = 100
45 |     params.hard_negative_threshold = 0.45
46 |     params.target_neighborhood_scale = 2.2
47 |     params.dispalcement_scale = 0.7
48 | 
49 |     params.perform_hn_without_windowing = True
50 | 
51 |     params.hard_negative_learning_rate = 0.02
52 |     params.update_scale_when_uncertain = True
53 | 
54 |     # IoUnet parameters
55 |     params.iounet_augmentation = False
56 |     params.iounet_use_log_scale = True
57 |     params.iounet_k = 3
58 |     params.num_init_random_boxes = 9
59 |     params.box_jitter_pos = 0.1
60 |     params.box_jitter_sz = 0.5
61 |     params.maximal_aspect_ratio = 6
62 |     params.box_refinement_iter = 5
63 |     params.box_refinement_step_length = 1
64 |     params.box_refinement_step_decay = 1
65 | 
66 |     params.net = NetWithBackbone(net_path='dimp50.pth',
67 |                                  use_gpu=params.use_gpu)
68 | 
69 |     params.vot_anno_conversion_type = 'preserve_area'
70 | 
71 |     return params
72 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/dimp18_vot18.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 14 * 16
13 |     params.search_area_scale = 4
14 |     params.feature_size_odd = False
15 | 
16 |     # Learning parameters
17 |     params.sample_memory_size = 250
18 |     params.learning_rate = 0.0075
19 |     params.init_samples_minimum_weight = 0.0
20 |     params.train_skipping = 10
21 | 
22 |     # Net optimization params
23 |     params.update_classifier = True
24 |     params.net_opt_iter = 25
25 |     params.net_opt_update_iter = 3
26 |     params.net_opt_hn_iter = 3
27 | 
28 |     # Detection parameters
29 |     params.window_output = True
30 | 
31 |     # Init augmentation parameters
32 |     params.use_augmentation = True
33 |     params.augmentation = {'fliplr': True,
34 |                            'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60],
35 |                            'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)],
36 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)],
37 |                            'dropout': (7, 0.2)}
38 | 
39 |     params.augmentation_expansion_factor = 2
40 |     params.random_shift_factor = 1/3
41 | 
42 |     # Advanced localization parameters
43 |     params.advanced_localization = True
44 |     params.target_not_found_threshold = 0.0
45 |     params.distractor_threshold = 100
46 |     params.hard_negative_threshold = 0.45
47 |     params.target_neighborhood_scale = 2.2
48 |     params.dispalcement_scale = 0.7
49 | 
50 |     params.perform_hn_without_windowing = True
51 | 
52 |     params.hard_negative_learning_rate = 0.02
53 |     params.update_scale_when_uncertain = True
54 | 
55 |     # IoUnet parameters
56 |     params.iounet_augmentation = False
57 |     params.iounet_use_log_scale = True
58 |     params.iounet_k = 3
59 |     params.num_init_random_boxes = 9
60 |     params.box_jitter_pos = 0.1
61 |     params.box_jitter_sz = 0.5
62 |     params.maximal_aspect_ratio = 6
63 |     params.box_refinement_iter = 5
64 |     params.box_refinement_step_length = 1
65 |     params.box_refinement_step_decay = 1
66 | 
67 |     params.net = NetWithBackbone(net_path='dimp18.pth',
68 |                                  use_gpu=params.use_gpu)
69 | 
70 |     params.vot_anno_conversion_type = 'preserve_area'
71 | 
72 |     return params
73 | 


--------------------------------------------------------------------------------
/pytracking/parameter/kys/default.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | 
 5 | def parameters():
 6 |     params = TrackerParams()
 7 | 
 8 |     params.debug = 0
 9 |     params.visualization = False
10 | 
11 |     params.use_gpu = True
12 | 
13 |     params.image_sample_size = 18*16
14 |     params.search_area_scale = 5
15 | 
16 |     # Learning parameters
17 |     params.sample_memory_size = 50
18 |     params.learning_rate = 0.01
19 |     params.init_samples_minimum_weight = 0.25
20 |     params.train_skipping = 20
21 |     params.output_sigma_factor = 1/4
22 | 
23 |     # Net optimization params
24 |     params.update_classifier = True
25 |     params.net_opt_iter = 10
26 |     params.net_opt_update_iter = 2
27 | 
28 |     # Init augmentation parameters
29 |     params.use_augmentation = True
30 |     params.augmentation = {'fliplr': True,
31 |                            'rotate': [10, -10, 45, -45],
32 |                            'blur': [(3,1), (1, 3), (2, 2)],
33 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
34 |                            'dropout': (2, 0.2)}
35 | 
36 |     params.augmentation_expansion_factor = 2
37 |     params.random_shift_factor = 1/3
38 | 
39 |     # Localization parameters
40 |     params.window_output = True
41 |     params.use_clipped_window = True
42 |     params.effective_search_area = 10.0
43 |     params.apply_window_to_dimp_score = True
44 | 
45 |     params.dimp_threshold = 0.05
46 |     params.target_not_found_threshold_fused = 0.05
47 | 
48 |     params.reset_state_during_occlusion = False
49 |     params.prev_feat_remove_subpixel_shift = True
50 |     params.move_feat_to_center = True
51 | 
52 |     params.update_scale_when_uncertain = True
53 | 
54 |     # IoUnet parameters
55 |     params.use_iou_net = True
56 |     params.iounet_augmentation = False
57 |     params.iounet_use_log_scale = True
58 |     params.iounet_k = 3
59 |     params.num_init_random_boxes = 9
60 |     params.box_jitter_pos = 0.1
61 |     params.box_jitter_sz = 0.5
62 |     params.maximal_aspect_ratio = 6
63 |     params.box_refinement_iter = 5
64 |     params.box_refinement_step_length = 1
65 |     params.box_refinement_step_decay = 1
66 | 
67 |     params.remove_offset_in_fused_score = True
68 |     params.score_downsample_factor = 1
69 | 
70 |     params.net = NetWithBackbone(net_path='kys.pth',
71 |                                  use_gpu=params.use_gpu)
72 | 
73 |     params.vot_anno_conversion_type = 'preserve_area'
74 |     return params
75 | 


--------------------------------------------------------------------------------
/ltr/models/transformer/position_encoding.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class NerfPositionalEncoding(nn.Module):
 7 |     def __init__(self, depth=10, sine_type='lin_sine', avoid_aliasing=False, max_spatial_resolution=None):
 8 |         '''
 9 |         out_dim = in_dim * depth * 2
10 |         '''
11 |         super().__init__()
12 |         if sine_type == 'lin_sine':
13 |             self.bases = [i+1 for i in range(depth)]
14 |         elif sine_type == 'exp_sine':
15 |             self.bases = [2**i for i in range(depth)]
16 |         print(f'using {sine_type} as positional encoding')
17 | 
18 |         if avoid_aliasing and max_spatial_resolution == None:
19 |             raise ValueError('Please specify the maxima spatial resolution (h, w) of the feature map')
20 |         elif avoid_aliasing:
21 |             self.factor = max_spatial_resolution/depth
22 |         else:
23 |             self.factor = 1.
24 | 
25 |     @torch.no_grad()
26 |     def forward(self, inputs):
27 |         out = torch.cat([torch.sin(i * self.factor * math.pi * inputs) for i in self.bases] +
28 |                         [torch.cos(i * self.factor * math.pi * inputs) for i in self.bases], axis=-1)
29 |         assert torch.isnan(out).any() == False
30 |         return out
31 | 
32 | 
33 | class PositionEmbeddingSine(nn.Module):
34 |     """
35 |     This is a more standard version of the position embedding, very similar to the one
36 |     used by the Attention is all you need paper, generalized to work on images.
37 |     """
38 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None, sine_type='lin_sine',
39 |                  avoid_aliazing=False, max_spatial_resolution=None):
40 |         super().__init__()
41 |         self.num_pos_feats = num_pos_feats
42 |         self.temperature = temperature
43 |         self.normalize = normalize
44 |         if not isinstance(max_spatial_resolution, (list, tuple)):
45 |             max_spatial_resolution = (max_spatial_resolution, max_spatial_resolution)
46 |         self.sine = NerfPositionalEncoding(num_pos_feats // 2, sine_type, avoid_aliazing, max(max_spatial_resolution))
47 | 
48 |     @torch.no_grad()
49 |     def forward(self, mask):
50 |         assert mask is not None
51 |         not_mask = ~mask
52 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
53 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
54 |         eps = 1e-6
55 |         y_embed = (y_embed-0.5) / (y_embed[:, -1:, :] + eps)
56 |         x_embed = (x_embed-0.5) / (x_embed[:, :, -1:] + eps)
57 |         pos = torch.stack([x_embed, y_embed], dim=-1)
58 |         return self.sine(pos).permute(0, 3, 1, 2)
59 | 


--------------------------------------------------------------------------------
/pytracking/features/net_wrappers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from pytracking.utils.loading import load_network
 3 | 
 4 | 
 5 | class NetWrapper:
 6 |     """Used for wrapping networks in pytracking.
 7 |     Network modules and functions can be accessed directly as if they were members of this class."""
 8 |     _rec_iter=0
 9 |     def __init__(self, net_path, use_gpu=True, initialize=False, **kwargs):
10 |         self.net_path = net_path
11 |         self.use_gpu = use_gpu
12 |         self.net = None
13 |         self.net_kwargs = kwargs
14 |         if initialize:
15 |             self.initialize()
16 | 
17 |     def __getattr__(self, name):
18 |         if self._rec_iter > 0:
19 |             self._rec_iter = 0
20 |             return None
21 |         self._rec_iter += 1
22 |         try:
23 |             ret_val = getattr(self.net, name)
24 |         except Exception as e:
25 |             self._rec_iter = 0
26 |             raise e
27 |         self._rec_iter = 0
28 |         return ret_val
29 | 
30 |     def load_network(self):
31 |         self.net = load_network(self.net_path, **self.net_kwargs)
32 |         if self.use_gpu:
33 |             self.cuda()
34 |         self.eval()
35 | 
36 |     def initialize(self):
37 |         self.load_network()
38 | 
39 | 
40 | class NetWithBackbone(NetWrapper):
41 |     """Wraps a network with a common backbone.
42 |     Assumes the network have a 'extract_backbone_features(image)' function."""
43 | 
44 |     def __init__(self, net_path, use_gpu=True, initialize=False, image_format='rgb',
45 |                  mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), **kwargs):
46 |         super().__init__(net_path, use_gpu, initialize, **kwargs)
47 | 
48 |         self.image_format = image_format
49 |         self._mean = torch.Tensor(mean).view(1, -1, 1, 1)
50 |         self._std = torch.Tensor(std).view(1, -1, 1, 1)
51 | 
52 |     def initialize(self, image_format='rgb', mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
53 |         super().initialize()
54 | 
55 |     def preprocess_image(self, im: torch.Tensor):
56 |         """Normalize the image with the mean and standard deviation used by the network."""
57 | 
58 |         if self.image_format in ['rgb', 'bgr']:
59 |             im = im/255
60 | 
61 |         if self.image_format in ['bgr', 'bgr255']:
62 |             im = im[:, [2, 1, 0], :, :]
63 |         im -= self._mean
64 |         im /= self._std
65 | 
66 |         if self.use_gpu:
67 |             im = im.cuda()
68 | 
69 |         return im
70 | 
71 |     def extract_backbone(self, im: torch.Tensor):
72 |         """Extract backbone features from the network.
73 |         Expects a float tensor image with pixel range [0, 255]."""
74 |         im = self.preprocess_image(im)
75 |         return self.net.extract_backbone_features(im)
76 | 


--------------------------------------------------------------------------------
/ltr/models/loss/segmentation.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.nn import functional as F
 5 | import ltr.models.loss.lovasz_loss as lovasz_loss
 6 | 
 7 | 
 8 | class LovaszSegLoss(nn.Module):
 9 |     def __init__(self, classes=[1,], per_image=True):
10 |         super().__init__()
11 | 
12 |         self.classes = classes
13 |         self.per_image=per_image
14 | 
15 |     def forward(self, input, target):
16 |         return lovasz_loss.lovasz_softmax(probas=torch.sigmoid(input), labels=target, per_image=self.per_image, classes=self.classes)
17 | 
18 | 
19 | def one_hot(labels: torch.Tensor,
20 |             num_classes: int,
21 |             device = None,
22 |             dtype = None,
23 |             eps = 1e-6) -> torch.Tensor:
24 |     r"""Converts an integer label x-D tensor to a one-hot (x+1)-D tensor.
25 |     Args:
26 |         labels (torch.Tensor) : tensor with labels of shape :math:`(N, *)`,
27 |                                 where N is batch size. Each value is an integer
28 |                                 representing correct classification.
29 |         num_classes (int): number of classes in labels.
30 |         device (Optional[torch.device]): the desired device of returned tensor.
31 |          Default: if None, uses the current device for the default tensor type
32 |          (see torch.set_default_tensor_type()). device will be the CPU for CPU
33 |          tensor types and the current CUDA device for CUDA tensor types.
34 |         dtype (Optional[torch.dtype]): the desired data type of returned
35 |          tensor. Default: if None, infers data type from values.
36 |     Returns:
37 |         torch.Tensor: the labels in one hot tensor of shape :math:`(N, C, *)`,
38 |     Examples::
39 |         #>>> labels = torch.LongTensor([[[0, 1], [2, 0]]])
40 |         #>>> kornia.losses.one_hot(labels, num_classes=3)
41 |         tensor([[[[1., 0.],
42 |                   [0., 1.]],
43 |                  [[0., 1.],
44 |                   [0., 0.]],
45 |                  [[0., 0.],
46 |                   [1., 0.]]]]
47 |     """
48 |     if not torch.is_tensor(labels):
49 |         raise TypeError("Input labels type is not a torch.Tensor. Got {}"
50 |                         .format(type(labels)))
51 |     if not labels.dtype == torch.int64:
52 |         raise ValueError(
53 |             "labels must be of the same dtype torch.int64. Got: {}" .format(
54 |                 labels.dtype))
55 |     if num_classes < 1:
56 |         raise ValueError("The number of classes must be bigger than one."
57 |                          " Got: {}".format(num_classes))
58 |     shape = labels.shape
59 |     one_hot = torch.zeros((shape[0], num_classes, shape[1], shape[2])).to(device)
60 |     return one_hot.scatter_(1, labels.unsqueeze(1), 1.0) + eps
61 | 


--------------------------------------------------------------------------------
/ltr/models/loss/kl_regression.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.nn import functional as F
 5 | 
 6 | 
 7 | class KLRegression(nn.Module):
 8 |     """KL-divergence loss for probabilistic regression.
 9 |     It is computed using Monte Carlo (MC) samples from an arbitrary distribution."""
10 | 
11 |     def __init__(self, eps=0.0):
12 |         super().__init__()
13 |         self.eps = eps
14 | 
15 |     def forward(self, scores, sample_density, gt_density, mc_dim=-1):
16 |         """Args:
17 |             scores: predicted score values
18 |             sample_density: probability density of the sample distribution
19 |             gt_density: probability density of the ground truth distribution
20 |             mc_dim: dimension of the MC samples"""
21 | 
22 |         exp_val = scores - torch.log(sample_density + self.eps)
23 | 
24 |         L = torch.logsumexp(exp_val, dim=mc_dim) - math.log(scores.shape[mc_dim]) - \
25 |             torch.mean(scores * (gt_density / (sample_density + self.eps)), dim=mc_dim)
26 | 
27 |         return L.mean()
28 | 
29 | 
30 | class MLRegression(nn.Module):
31 |     """Maximum likelihood loss for probabilistic regression.
32 |     It is computed using Monte Carlo (MC) samples from an arbitrary distribution."""
33 | 
34 |     def __init__(self, eps=0.0):
35 |         super().__init__()
36 |         self.eps = eps
37 | 
38 |     def forward(self, scores, sample_density, gt_density=None, mc_dim=-1):
39 |         """Args:
40 |             scores: predicted score values. First sample must be ground-truth
41 |             sample_density: probability density of the sample distribution
42 |             gt_density: not used
43 |             mc_dim: dimension of the MC samples. Only mc_dim=1 supported"""
44 | 
45 |         assert mc_dim == 1
46 |         assert (sample_density[:,0,...] == -1).all()
47 | 
48 |         exp_val = scores[:, 1:, ...] - torch.log(sample_density[:, 1:, ...] + self.eps)
49 | 
50 |         L = torch.logsumexp(exp_val, dim=mc_dim) - math.log(scores.shape[mc_dim] - 1) - scores[:, 0, ...]
51 |         loss = L.mean()
52 |         return loss
53 | 
54 | 
55 | class KLRegressionGrid(nn.Module):
56 |     """KL-divergence loss for probabilistic regression.
57 |     It is computed using the grid integration strategy."""
58 | 
59 |     def forward(self, scores, gt_density, grid_dim=-1, grid_scale=1.0):
60 |         """Args:
61 |             scores: predicted score values
62 |             gt_density: probability density of the ground truth distribution
63 |             grid_dim: dimension(s) of the grid
64 |             grid_scale: area of one grid cell"""
65 | 
66 |         score_corr = grid_scale * torch.sum(scores * gt_density, dim=grid_dim)
67 | 
68 |         L = torch.logsumexp(scores, dim=grid_dim) + math.log(grid_scale) - score_corr
69 | 
70 |         return L.mean()
71 | 


--------------------------------------------------------------------------------
/ltr/dataset/base_image_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | from ltr.data.image_loader import jpeg4py_loader
 3 | 
 4 | 
 5 | class BaseImageDataset(torch.utils.data.Dataset):
 6 |     """ Base class for image datasets """
 7 | 
 8 |     def __init__(self, name, root, image_loader=jpeg4py_loader):
 9 |         """
10 |         args:
11 |             root - The root path to the dataset
12 |             image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
13 |                                             is used by default.
14 |         """
15 |         self.name = name
16 |         self.root = root
17 |         self.image_loader = image_loader
18 | 
19 |         self.image_list = []     # Contains the list of sequences.
20 |         self.class_list = []
21 | 
22 |     def __len__(self):
23 |         """ Returns size of the dataset
24 |         returns:
25 |             int - number of samples in the dataset
26 |         """
27 |         return self.get_num_images()
28 | 
29 |     def __getitem__(self, index):
30 |         """ Not to be used! Check get_frames() instead.
31 |         """
32 |         return None
33 | 
34 |     def get_name(self):
35 |         """ Name of the dataset
36 | 
37 |         returns:
38 |             string - Name of the dataset
39 |         """
40 |         raise NotImplementedError
41 | 
42 |     def get_num_images(self):
43 |         """ Number of sequences in a dataset
44 | 
45 |         returns:
46 |             int - number of sequences in the dataset."""
47 |         return len(self.image_list)
48 | 
49 |     def has_class_info(self):
50 |         return False
51 | 
52 |     def get_class_name(self, image_id):
53 |         return None
54 | 
55 |     def get_num_classes(self):
56 |         return len(self.class_list)
57 | 
58 |     def get_class_list(self):
59 |         return self.class_list
60 | 
61 |     def get_images_in_class(self, class_name):
62 |         raise NotImplementedError
63 | 
64 |     def has_segmentation_info(self):
65 |         return False
66 | 
67 |     def get_image_info(self, seq_id):
68 |         """ Returns information about a particular image,
69 | 
70 |         args:
71 |             seq_id - index of the image
72 | 
73 |         returns:
74 |             Dict
75 |             """
76 |         raise NotImplementedError
77 | 
78 |     def get_image(self, image_id, anno=None):
79 |         """ Get a image
80 | 
81 |         args:
82 |             image_id      - index of image
83 |             anno(None)  - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
84 | 
85 |         returns:
86 |             image -
87 |             anno -
88 |             dict - A dict containing meta information about the sequence, e.g. class of the target object.
89 | 
90 |         """
91 |         raise NotImplementedError
92 | 
93 | 


--------------------------------------------------------------------------------
/pytracking/evaluation/environment.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import os
 3 | 
 4 | 
 5 | class EnvSettings:
 6 |     def __init__(self):
 7 |         pytracking_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 8 | 
 9 |         self.results_path = '{}/tracking_results/'.format(pytracking_path)
10 |         self.segmentation_path = '{}/segmentation_results/'.format(pytracking_path)
11 |         self.network_path = '{}/networks/'.format(pytracking_path)
12 |         self.result_plot_path = '{}/result_plots/'.format(pytracking_path)
13 |         self.otb_path = ''
14 |         self.nfs_path = ''
15 |         self.uav_path = ''
16 |         self.tpl_path = ''
17 |         self.vot_path = ''
18 |         self.got10k_path = ''
19 |         self.lasot_path = ''
20 |         self.lasot_extension_subset_path = ''
21 |         self.trackingnet_path = ''
22 |         self.oxuva_path = ''
23 |         self.davis_dir = ''
24 |         self.youtubevos_dir = ''
25 | 
26 |         self.got_packed_results_path = ''
27 |         self.got_reports_path = ''
28 |         self.tn_packed_results_path = ''
29 | 
30 | 
31 | def create_default_local_file():
32 |     comment = {'results_path': 'Where to store tracking results',
33 |                'network_path': 'Where tracking networks are stored.'}
34 | 
35 |     path = os.path.join(os.path.dirname(__file__), 'local.py')
36 |     with open(path, 'w') as f:
37 |         settings = EnvSettings()
38 | 
39 |         f.write('from pytracking.evaluation.environment import EnvSettings\n\n')
40 |         f.write('def local_env_settings():\n')
41 |         f.write('    settings = EnvSettings()\n\n')
42 |         f.write('    # Set your local paths here.\n\n')
43 | 
44 |         for attr in dir(settings):
45 |             comment_str = None
46 |             if attr in comment:
47 |                 comment_str = comment[attr]
48 |             attr_val = getattr(settings, attr)
49 |             if not attr.startswith('__') and not callable(attr_val):
50 |                 if comment_str is None:
51 |                     f.write('    settings.{} = \'{}\'\n'.format(attr, attr_val))
52 |                 else:
53 |                     f.write('    settings.{} = \'{}\'    # {}\n'.format(attr, attr_val, comment_str))
54 |         f.write('\n    return settings\n\n')
55 | 
56 | 
57 | def env_settings():
58 |     env_module_name = 'pytracking.evaluation.local'
59 |     try:
60 |         env_module = importlib.import_module(env_module_name)
61 |         return env_module.local_env_settings()
62 |     except:
63 |         env_file = os.path.join(os.path.dirname(__file__), 'local.py')
64 | 
65 |         # Create a default file
66 |         create_default_local_file()
67 |         raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. '
68 |                            'Then try to run again.'.format(env_file))


--------------------------------------------------------------------------------
/pytracking/parameter/kys/default_vot.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | 
 5 | def parameters():
 6 |     params = TrackerParams()
 7 | 
 8 |     params.debug = 0
 9 |     params.visualization = False
10 | 
11 |     params.use_gpu = True
12 | 
13 |     params.image_sample_size = 14*16
14 |     params.search_area_scale = 4
15 | 
16 |     # Learning parameters
17 |     params.sample_memory_size = 250
18 |     params.learning_rate = 0.0075
19 |     params.init_samples_minimum_weight = 0.0
20 |     params.train_skipping = 10
21 | 
22 |     # Net optimization params
23 |     params.update_classifier = True
24 |     params.net_opt_iter = 25
25 |     params.net_opt_update_iter = 3
26 |     params.net_opt_hn_iter = 3
27 | 
28 |     params.output_sigma_factor = 1/4
29 | 
30 |     # Init augmentation parameters
31 |     params.use_augmentation = True
32 |     params.augmentation = {'fliplr': True,
33 |                            'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60],
34 |                            'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)],
35 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)],
36 |                            'dropout': (7, 0.2)}
37 | 
38 |     params.augmentation_expansion_factor = 2
39 |     params.random_shift_factor = 1 / 3
40 | 
41 |     # localization parameters
42 |     params.window_output = True
43 |     params.use_clipped_window = True
44 |     params.effective_search_area = 4.0
45 |     params.apply_window_to_dimp_score = True
46 | 
47 |     params.target_not_found_threshold_fused = 0.05
48 |     params.dimp_threshold = 0.05
49 | 
50 |     params.reset_state_during_occlusion = True
51 | 
52 |     params.prev_feat_remove_subpixel_shift = True
53 |     params.move_feat_to_center = True
54 | 
55 |     params.perform_hn_mining_dimp = True
56 |     params.hard_negative_threshold = 0.5
57 |     params.target_neighborhood_scale_safe = 2.2
58 |     params.hard_negative_learning_rate = 0.02
59 |     params.update_scale_when_uncertain = True
60 | 
61 |     # IoUnet parameters
62 |     params.use_iou_net = True
63 |     params.iounet_augmentation = False
64 |     params.iounet_use_log_scale = True
65 |     params.iounet_k = 3
66 |     params.num_init_random_boxes = 9
67 |     params.box_jitter_pos = 0.1
68 |     params.box_jitter_sz = 0.5
69 |     params.maximal_aspect_ratio = 6
70 |     params.box_refinement_iter = 5
71 |     params.box_refinement_step_length = 1
72 |     params.box_refinement_step_decay = 1
73 | 
74 |     params.remove_offset_in_fused_score = True
75 |     params.score_downsample_factor = 1
76 | 
77 |     params.net = NetWithBackbone(net_path='kys.pth',
78 |                                  use_gpu=params.use_gpu)
79 | 
80 |     params.vot_anno_conversion_type = 'preserve_area'
81 |     return params
82 | 


--------------------------------------------------------------------------------
/pytracking/run_tracker.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | 
 5 | env_path = os.path.join(os.path.dirname(__file__), '..')
 6 | if env_path not in sys.path:
 7 |     sys.path.append(env_path)
 8 | 
 9 | from pytracking.evaluation import get_dataset
10 | from pytracking.evaluation.running import run_dataset
11 | from pytracking.evaluation import Tracker
12 | 
13 | 
14 | def run_tracker(tracker_name, tracker_param, run_id=None, dataset_name='otb', sequence=None, debug=0, threads=0,
15 |                 visdom_info=None):
16 |     """Run tracker on sequence or dataset.
17 |     args:
18 |         tracker_name: Name of tracking method.
19 |         tracker_param: Name of parameter file.
20 |         run_id: The run id.
21 |         dataset_name: Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot).
22 |         sequence: Sequence number or name.
23 |         debug: Debug level.
24 |         threads: Number of threads.
25 |         visdom_info: Dict optionally containing 'use_visdom', 'server' and 'port' for Visdom visualization.
26 |     """
27 | 
28 |     visdom_info = {} if visdom_info is None else visdom_info
29 | 
30 |     dataset = get_dataset(dataset_name)
31 | 
32 |     if sequence is not None:
33 |         dataset = [dataset[sequence]]
34 | 
35 |     trackers = [Tracker(tracker_name, tracker_param, run_id)]
36 | 
37 |     run_dataset(dataset, trackers, debug, threads, visdom_info=visdom_info)
38 | 
39 | 
40 | def main():
41 |     parser = argparse.ArgumentParser(description='Run tracker on sequence or dataset.')
42 |     parser.add_argument('tracker_name', type=str, help='Name of tracking method.')
43 |     parser.add_argument('tracker_param', type=str, help='Name of parameter file.')
44 |     parser.add_argument('--runid', type=int, default=None, help='The run id.')
45 |     parser.add_argument('--dataset_name', type=str, default='otb', help='Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot).')
46 |     parser.add_argument('--sequence', type=str, default=None, help='Sequence number or name.')
47 |     parser.add_argument('--debug', type=int, default=0, help='Debug level.')
48 |     parser.add_argument('--threads', type=int, default=0, help='Number of threads.')
49 |     parser.add_argument('--use_visdom', type=bool, default=True, help='Flag to enable visdom.')
50 |     parser.add_argument('--visdom_server', type=str, default='127.0.0.1', help='Server for visdom.')
51 |     parser.add_argument('--visdom_port', type=int, default=8097, help='Port for visdom.')
52 | 
53 |     args = parser.parse_args()
54 | 
55 |     try:
56 |         seq_name = int(args.sequence)
57 |     except:
58 |         seq_name = args.sequence
59 | 
60 |     run_tracker(args.tracker_name, args.tracker_param, args.runid, args.dataset_name, seq_name, args.debug,
61 |                 args.threads, {'use_visdom': args.use_visdom, 'server': args.visdom_server, 'port': args.visdom_port})
62 | 
63 | 
64 | if __name__ == '__main__':
65 |     main()
66 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/prdimp18.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 18*16
13 |     params.search_area_scale = 5
14 | 
15 |     # Learning parameters
16 |     params.sample_memory_size = 50
17 |     params.learning_rate = 0.01
18 |     params.init_samples_minimum_weight = 0.25
19 |     params.train_skipping = 20
20 | 
21 |     # Net optimization params
22 |     params.update_classifier = True
23 |     params.net_opt_iter = 10
24 |     params.net_opt_update_iter = 2
25 |     params.net_opt_hn_iter = 1
26 | 
27 |     # Detection parameters
28 |     params.window_output = False
29 | 
30 |     # Init augmentation parameters
31 |     params.use_augmentation = True
32 |     params.augmentation = {'fliplr': True,
33 |                            'rotate': [10, -10, 45, -45],
34 |                            'blur': [(3,1), (1, 3), (2, 2)],
35 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
36 |                            'dropout': (2, 0.2)}
37 | 
38 |     params.augmentation_expansion_factor = 2
39 |     params.random_shift_factor = 1/3
40 | 
41 |     # Advanced localization parameters
42 |     params.advanced_localization = True
43 |     params.score_preprocess = 'softmax'
44 |     params.target_not_found_threshold = 0.04
45 |     params.distractor_threshold = 0.8
46 |     params.hard_negative_threshold = 0.5
47 |     params.target_neighborhood_scale = 2.2
48 |     params.dispalcement_scale = 0.8
49 |     params.hard_negative_learning_rate = 0.02
50 |     params.update_scale_when_uncertain = True
51 | 
52 |     # IoUnet parameters
53 |     params.box_refinement_space = 'relative'
54 |     params.iounet_augmentation = False      # Use the augmented samples to compute the modulation vector
55 |     params.iounet_k = 3                     # Top-k average to estimate final box
56 |     params.num_init_random_boxes = 9        # Num extra random boxes in addition to the classifier prediction
57 |     params.box_jitter_pos = 0.1             # How much to jitter the translation for random boxes
58 |     params.box_jitter_sz = 0.5              # How much to jitter the scale for random boxes
59 |     params.maximal_aspect_ratio = 6         # Limit on the aspect ratio
60 |     params.box_refinement_iter = 10          # Number of iterations for refining the boxes
61 |     params.box_refinement_step_length = 2.5e-3 # 1   # Gradient step length in the bounding box refinement
62 |     params.box_refinement_step_decay = 1    # Multiplicative step length decay (1 means no decay)
63 | 
64 |     params.net = NetWithBackbone(net_path='prdimp18.pth.tar',
65 |                                  use_gpu=params.use_gpu)
66 | 
67 |     params.vot_anno_conversion_type = 'preserve_area'
68 | 
69 |     return params
70 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/super_dimp.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 22*16
13 |     params.search_area_scale = 6
14 |     params.border_mode = 'inside_major'
15 |     params.patch_max_scale_change = 1.5
16 | 
17 |     # Learning parameters
18 |     params.sample_memory_size = 50
19 |     params.learning_rate = 0.01
20 |     params.init_samples_minimum_weight = 0.25
21 |     params.train_skipping = 20
22 | 
23 |     # Net optimization params
24 |     params.update_classifier = True
25 |     params.net_opt_iter = 10
26 |     params.net_opt_update_iter = 2
27 |     params.net_opt_hn_iter = 1
28 | 
29 |     # Detection parameters
30 |     params.window_output = False
31 | 
32 |     # Init augmentation parameters
33 |     params.use_augmentation = True
34 |     params.augmentation = {'fliplr': True,
35 |                            'rotate': [10, -10, 45, -45],
36 |                            'blur': [(3,1), (1, 3), (2, 2)],
37 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
38 |                            'dropout': (2, 0.2)}
39 | 
40 |     params.augmentation_expansion_factor = 2
41 |     params.random_shift_factor = 1/3
42 | 
43 |     # Advanced localization parameters
44 |     params.advanced_localization = True
45 |     params.target_not_found_threshold = 0.25
46 |     params.distractor_threshold = 0.8
47 |     params.hard_negative_threshold = 0.5
48 |     params.target_neighborhood_scale = 2.2
49 |     params.dispalcement_scale = 0.8
50 |     params.hard_negative_learning_rate = 0.02
51 |     params.update_scale_when_uncertain = True
52 | 
53 |     # IoUnet parameters
54 |     params.box_refinement_space = 'relative'
55 |     params.iounet_augmentation = False      # Use the augmented samples to compute the modulation vector
56 |     params.iounet_k = 3                     # Top-k average to estimate final box
57 |     params.num_init_random_boxes = 9        # Num extra random boxes in addition to the classifier prediction
58 |     params.box_jitter_pos = 0.1             # How much to jitter the translation for random boxes
59 |     params.box_jitter_sz = 0.5              # How much to jitter the scale for random boxes
60 |     params.maximal_aspect_ratio = 6         # Limit on the aspect ratio
61 |     params.box_refinement_iter = 10          # Number of iterations for refining the boxes
62 |     params.box_refinement_step_length = 2.5e-3 # 1   # Gradient step length in the bounding box refinement
63 |     params.box_refinement_step_decay = 1    # Multiplicative step length decay (1 means no decay)
64 | 
65 |     params.net = NetWithBackbone(net_path='super_dimp.pth.tar',
66 |                                  use_gpu=params.use_gpu)
67 | 
68 |     params.vot_anno_conversion_type = 'preserve_area'
69 | 
70 |     return params
71 | 


--------------------------------------------------------------------------------
/ltr/models/kys/cost_volume.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | 
 5 | from spatial_correlation_sampler import SpatialCorrelationSampler
 6 | 
 7 | 
 8 | class CostVolume(nn.Module):
 9 |     def __init__(self, kernel_size, max_displacement, stride=1, abs_coordinate_output=False):
10 |         super().__init__()
11 |         self.correlation_layer = SpatialCorrelationSampler(kernel_size, 2*max_displacement + 1, stride,
12 |                                                            int((kernel_size-1)/2))
13 |         self.abs_coordinate_output = abs_coordinate_output
14 | 
15 |     def forward(self, feat1, feat2):
16 |         assert feat1.dim() == 4 and feat2.dim() == 4, 'Expect 4 dimensional inputs'
17 | 
18 |         batch_size = feat1.shape[0]
19 | 
20 |         cost_volume = self.correlation_layer(feat1, feat2)
21 | 
22 |         if self.abs_coordinate_output:
23 |             cost_volume = cost_volume.view(batch_size, -1, cost_volume.shape[-2], cost_volume.shape[-1])
24 |             cost_volume = remap_cost_volume(cost_volume)
25 | 
26 |         return cost_volume.view(batch_size, -1, cost_volume.shape[-2], cost_volume.shape[-1])
27 | 
28 | 
29 | def remap_cost_volume(cost_volume):
30 |     """
31 | 
32 |     :param cost_volume: cost volume of shape (batch, (2*md-1)*(2*md-1), rows, cols), where md is the maximum displacement
33 |                         allowed when computing the cost volume.
34 |     :return: cost_volume_remapped: The input cost volume is remapped to shape (batch, rows, cols, rows, cols)
35 |     """
36 | 
37 |     if cost_volume.dim() != 4:
38 |         raise ValueError('input cost_volume should have 4 dimensions')
39 | 
40 |     [batch_size, d_, num_rows, num_cols] = cost_volume.size()
41 |     d_sqrt_ = np.sqrt(d_)
42 | 
43 |     if not d_sqrt_.is_integer():
44 |         raise ValueError("Invalid cost volume")
45 | 
46 |     cost_volume = cost_volume.view(batch_size, int(d_sqrt_), int(d_sqrt_), num_rows, num_cols)
47 | 
48 |     cost_volume_remapped = torch.zeros((batch_size, num_rows, num_cols,
49 |                                         num_rows, num_cols),
50 |                                        dtype=cost_volume.dtype,
51 |                                        device=cost_volume.device)
52 | 
53 |     if cost_volume.size()[1] % 2 != 1:
54 |         raise ValueError
55 | 
56 |     md = int((cost_volume.size()[1]-1)/2)
57 | 
58 |     for r in range(num_rows):
59 |         for c in range(num_cols):
60 |             r1_ = r - md
61 |             r2_ = r1_ + 2*md + 1
62 |             c1_ = c - md
63 |             c2_ = c1_ + 2*md + 1
64 | 
65 |             r1_pad_ = max(-r1_, 0)
66 |             r2_pad_ = max(r2_ - cost_volume_remapped.shape[1], 0)
67 | 
68 |             c1_pad_ = max(-c1_, 0)
69 |             c2_pad_ = max(c2_ - cost_volume_remapped.shape[2], 0)
70 | 
71 |             d_ = cost_volume.size()[1]
72 |             cost_volume_remapped[:, r1_+r1_pad_:r2_-r2_pad_, c1_+c1_pad_:c2_-c2_pad_, r, c] = \
73 |                 cost_volume[:, r1_pad_:d_-r2_pad_, c1_pad_:d_-c2_pad_, r, c]
74 | 
75 |     return cost_volume_remapped
76 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/prdimp50.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 22*16
13 |     params.search_area_scale = 6
14 |     params.border_mode = 'inside_major'
15 |     params.patch_max_scale_change = 1.5
16 | 
17 |     # Learning parameters
18 |     params.sample_memory_size = 50
19 |     params.learning_rate = 0.01
20 |     params.init_samples_minimum_weight = 0.25
21 |     params.train_skipping = 20
22 | 
23 |     # Net optimization params
24 |     params.update_classifier = True
25 |     params.net_opt_iter = 10
26 |     params.net_opt_update_iter = 2
27 |     params.net_opt_hn_iter = 1
28 | 
29 |     # Detection parameters
30 |     params.window_output = False
31 | 
32 |     # Init augmentation parameters
33 |     params.use_augmentation = True
34 |     params.augmentation = {'fliplr': True,
35 |                            'rotate': [10, -10, 45, -45],
36 |                            'blur': [(3,1), (1, 3), (2, 2)],
37 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
38 |                            'dropout': (2, 0.2)}
39 | 
40 |     params.augmentation_expansion_factor = 2
41 |     params.random_shift_factor = 1/3
42 | 
43 |     # Advanced localization parameters
44 |     params.advanced_localization = True
45 |     params.score_preprocess = 'softmax'
46 |     params.target_not_found_threshold = 0.04
47 |     params.distractor_threshold = 0.8
48 |     params.hard_negative_threshold = 0.5
49 |     params.target_neighborhood_scale = 2.2
50 |     params.dispalcement_scale = 0.8
51 |     params.hard_negative_learning_rate = 0.02
52 |     params.update_scale_when_uncertain = True
53 | 
54 |     # IoUnet parameters
55 |     params.box_refinement_space = 'relative'
56 |     params.iounet_augmentation = False      # Use the augmented samples to compute the modulation vector
57 |     params.iounet_k = 3                     # Top-k average to estimate final box
58 |     params.num_init_random_boxes = 9        # Num extra random boxes in addition to the classifier prediction
59 |     params.box_jitter_pos = 0.1             # How much to jitter the translation for random boxes
60 |     params.box_jitter_sz = 0.5              # How much to jitter the scale for random boxes
61 |     params.maximal_aspect_ratio = 6         # Limit on the aspect ratio
62 |     params.box_refinement_iter = 10          # Number of iterations for refining the boxes
63 |     params.box_refinement_step_length = 2.5e-3 # 1   # Gradient step length in the bounding box refinement
64 |     params.box_refinement_step_decay = 1    # Multiplicative step length decay (1 means no decay)
65 | 
66 |     params.net = NetWithBackbone(net_path='prdimp50.pth.tar',
67 |                                  use_gpu=params.use_gpu)
68 | 
69 |     params.vot_anno_conversion_type = 'preserve_area'
70 | 
71 |     return params
72 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp_simple/super_dimp_simple.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 22*16
13 |     params.search_area_scale = 6
14 |     params.border_mode = 'inside_major'
15 |     params.patch_max_scale_change = 1.5
16 | 
17 |     # Learning parameters
18 |     params.sample_memory_size = 50
19 |     params.learning_rate = 0.01
20 |     params.init_samples_minimum_weight = 0.25
21 |     params.train_skipping = 20
22 | 
23 |     # Net optimization params
24 |     params.update_classifier = True
25 |     params.net_opt_iter = 10
26 |     params.net_opt_update_iter = 2
27 |     params.net_opt_hn_iter = 1
28 | 
29 |     # Detection parameters
30 |     params.window_output = False
31 | 
32 |     # Init augmentation parameters
33 |     params.use_augmentation = True
34 |     params.augmentation = {'fliplr': True,
35 |                            'rotate': [10, -10, 45, -45],
36 |                            'blur': [(3,1), (1, 3), (2, 2)],
37 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
38 |                            'dropout': (2, 0.2)}
39 | 
40 |     params.augmentation_expansion_factor = 2
41 |     params.random_shift_factor = 1/3
42 | 
43 |     # Advanced localization parameters
44 |     params.advanced_localization = True
45 |     params.target_not_found_threshold = 0.25
46 |     params.distractor_threshold = 0.8
47 |     params.hard_negative_threshold = 0.5
48 |     params.target_neighborhood_scale = 2.2
49 |     params.dispalcement_scale = 0.8
50 |     params.hard_negative_learning_rate = 0.02
51 |     params.update_scale_when_uncertain = True
52 | 
53 |     # IoUnet parameters
54 |     params.box_refinement_space = 'relative'
55 |     params.iounet_augmentation = False      # Use the augmented samples to compute the modulation vector
56 |     params.iounet_k = 3                     # Top-k average to estimate final box
57 |     params.num_init_random_boxes = 9        # Num extra random boxes in addition to the classifier prediction
58 |     params.box_jitter_pos = 0.1             # How much to jitter the translation for random boxes
59 |     params.box_jitter_sz = 0.5              # How much to jitter the scale for random boxes
60 |     params.maximal_aspect_ratio = 6         # Limit on the aspect ratio
61 |     params.box_refinement_iter = 10          # Number of iterations for refining the boxes
62 |     params.box_refinement_step_length = 2.5e-3 # 1   # Gradient step length in the bounding box refinement
63 |     params.box_refinement_step_decay = 1    # Multiplicative step length decay (1 means no decay)
64 | 
65 |     params.net = NetWithBackbone(net_path='super_dimp_simple.pth.tar',
66 |                                  use_gpu=params.use_gpu)
67 | 
68 |     params.vot_anno_conversion_type = 'preserve_area'
69 | 
70 |     params.use_gt_box = True
71 | 
72 |     return params
73 | 


--------------------------------------------------------------------------------
/pytracking/evaluation/mobifacedataset.py:
--------------------------------------------------------------------------------
 1 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList
 2 | import glob
 3 | import numpy as np
 4 | import os.path as osp
 5 | from collections import OrderedDict
 6 | import pandas as pd
 7 | 
 8 | 
 9 | class MobifaceDataset(BaseDataset):
10 |     """ Mobiface dataset.
11 |         Publication:
12 |             MobiFace: A Novel Dataset for Mobile Face Tracking in the Wild
13 |             Yiming Lin, Shiyang Cheng, Jie Shen, Maja Pantic
14 |             arXiv:1805.09749, 2018
15 |             https://arxiv.org/pdf/1805.09749v2
16 | 
17 |         Download dataset from https://mobiface.github.io/
18 |     """
19 |     def __init__(self, split):
20 |         """
21 |         args:
22 |             split - Split to use. Can be i) 'train': official training set, ii) 'test': official test set, iii) 'all': whole dataset.
23 |         """
24 |         super().__init__()
25 |         self.base_path = self.env_settings.mobiface_path
26 |         self.sequence_list = self._get_sequence_list(split)
27 |         self.split = split
28 | 
29 |     def get_sequence_list(self):
30 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
31 | 
32 |     def _get_sequence_list(self, split):
33 | 
34 |         self.train_meta_fn = osp.join(self.base_path, 'train.meta.csv')
35 |         self.test_meta_fn = osp.join(self.base_path, 'test.meta.csv')
36 |         self.train_meta = pd.read_csv(self.train_meta_fn,index_col=0).transpose().to_dict()
37 |         self.test_meta = pd.read_csv(self.test_meta_fn,index_col=0).transpose().to_dict()
38 |         if split == 'train':
39 |             self.meta = self.train_meta
40 |         elif split == 'test':
41 |             self.meta = self.test_meta
42 |         else:
43 |             self.meta = {**self.train_meta, **self.test_meta} # In Python 3.5 or greater
44 |         self.meta = OrderedDict(sorted(self.meta.items(), key=lambda t: t[0]))
45 |         self.anno_files = []
46 |         for k,v in self.meta.items():
47 |             if k in self.train_meta.keys():
48 |                 self.anno_files.append(osp.abspath(osp.join(self.base_path,'train', k+'.annot.csv')))
49 |             else:
50 |                 self.anno_files.append(osp.abspath(osp.join(self.base_path,'test', k+'.annot.csv')))
51 |         self.seq_names = sorted(list(self.meta.keys()))
52 |         self.seq_dirs = [fn[:-len('.annot.csv')] for fn in self.anno_files]
53 |         return self.seq_names
54 | 
55 |     def _construct_sequence(self, sequence_name):
56 |         index = self.seq_names.index(sequence_name)
57 |         img_files = sorted(glob.glob(self.seq_dirs[index]+'/*.jpg'))
58 |         if len(img_files) == 0:
59 |             img_files = sorted(glob.glob(self.seq_dirs[index]+'.png'))
60 |         with open(self.anno_files[index], 'r') as f:
61 |             anno = np.loadtxt(f, delimiter=',', skiprows=1, dtype=int)
62 |         anno = anno[:,1:]
63 |         assert anno.shape[1] == 4
64 | 
65 |         return Sequence(sequence_name, img_files, anno.reshape(-1, 4))
66 | 
67 |     def __len__(self):
68 |         return len(self.sequence_list)
69 | 


--------------------------------------------------------------------------------
/pytracking/parameter/dimp/prdimp50_vot18.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 14 * 16
13 |     params.search_area_scale = 4
14 |     params.feature_size_odd = False
15 | 
16 |     # Learning parameters
17 |     params.sample_memory_size = 250
18 |     params.learning_rate = 0.01
19 |     params.init_samples_minimum_weight = 0.0
20 |     params.train_skipping = 1
21 | 
22 |     # Net optimization params
23 |     params.update_classifier = True
24 |     params.net_opt_iter = 25
25 |     params.net_opt_update_iter = 1
26 |     params.net_opt_hn_iter = 1
27 | 
28 |     # Detection parameters
29 |     params.window_output = True
30 | 
31 |     # Init augmentation parameters
32 |     params.use_augmentation = True
33 |     params.augmentation = {'fliplr': True,
34 |                            'rotate': [5, -5, 10, -10, 20, -20, 30, -30, 45, -45, -60, 60],
35 |                            'blur': [(2, 0.2), (0.2, 2), (3, 1), (1, 3), (2, 2)],
36 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6, -0.6)],
37 |                            'dropout': (7, 0.2)}
38 | 
39 |     params.augmentation_expansion_factor = 2
40 |     params.random_shift_factor = 1/3
41 | 
42 |     # Advanced localization parameters
43 |     params.advanced_localization = True
44 |     # params.score_preprocess = 'softmax'
45 |     params.target_not_found_threshold = 0.00
46 |     params.distractor_threshold = 99999
47 |     params.hard_negative_threshold = 999999
48 |     params.target_neighborhood_scale = 2.2
49 |     params.dispalcement_scale = 0.7
50 |     params.perform_hn_without_windowing = True
51 |     params.hard_negative_learning_rate = 0.02
52 |     params.update_scale_when_uncertain = True
53 | 
54 |     # IoUnet parameters
55 |     params.box_refinement_space = 'relative'
56 |     params.iounet_augmentation = False      # Use the augmented samples to compute the modulation vector
57 |     params.iounet_k = 3                     # Top-k average to estimate final box
58 |     params.num_init_random_boxes = 9        # Num extra random boxes in addition to the classifier prediction
59 |     params.box_jitter_pos = 0.1             # How much to jitter the translation for random boxes
60 |     params.box_jitter_sz = 0.5              # How much to jitter the scale for random boxes
61 |     params.maximal_aspect_ratio = 6         # Limit on the aspect ratio
62 |     params.box_refinement_iter = 10          # Number of iterations for refining the boxes
63 |     params.box_refinement_step_length = 2.5e-3 # 1   # Gradient step length in the bounding box refinement
64 |     params.box_refinement_step_decay = 1    # Multiplicative step length decay (1 means no decay)
65 | 
66 |     params.net = NetWithBackbone(net_path='prdimp50.pth.tar',
67 |                                  use_gpu=params.use_gpu)
68 | 
69 |     params.vot_anno_conversion_type = 'preserve_area'
70 | 
71 |     return params
72 | 


--------------------------------------------------------------------------------
/pytracking/evaluation/lagotdataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | from collections import OrderedDict
 5 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList
 6 | 
 7 | 
 8 | class LaGOTDataset(BaseDataset):
 9 |     def __init__(self, sot_mode=False):
10 |         super().__init__()
11 |         self.sot_mode = sot_mode
12 |         self.base_path = self.env_settings.lasot_path
13 |         if sot_mode:
14 |             self.anno_path = os.path.join(self.env_settings.lagot_path,
15 |                                           'LaGOT_one_object_per_sequence_annotations_final.json')
16 |         else:
17 |             self.anno_path = os.path.join(self.env_settings.lagot_path,
18 |                                           'LaGOT_multiple_object_per_sequence_annotations_final.json')
19 | 
20 |         self.annos = self._load_annotations()
21 |         self.sequence_list = list(self.annos.keys())
22 | 
23 |     def _load_annotations(self):
24 |         with open(self.anno_path, 'r') as f:
25 |             anno = json.load(f)
26 | 
27 |         return anno
28 | 
29 |     def get_sequence_list(self):
30 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
31 | 
32 |     def _construct_sequence(self, sequence_name):
33 |         if self.sot_mode:
34 |             ground_truth_rect = np.array(self.annos[sequence_name]['xywh'])
35 |             frames_list = [os.path.join(self.base_path, p) for p in self.annos[sequence_name]['frames']]
36 |             target_visible = np.ones(ground_truth_rect.shape[0], dtype=np.bool)
37 |             target_visible[::3] = np.all(ground_truth_rect[::3] >= 0, axis=1)
38 |             return Sequence(sequence_name, frames_list, 'LaGOT', ground_truth_rect.reshape(-1, 4),
39 |                             target_visible=target_visible)
40 |         else:
41 |             frames_list = [f'{self.base_path}/{p}' for p in self.annos[sequence_name]['frames']]
42 | 
43 |             track_ids = list(self.annos[sequence_name]['xywh'].keys())
44 | 
45 |             gt_bboxes = OrderedDict()
46 | 
47 |             for tid, boxes in self.annos[sequence_name]['xywh'].items():
48 |                 gt_bboxes[tid] = np.array(boxes)
49 | 
50 |             init_data = dict()
51 |             for tid, boxes in gt_bboxes.items():
52 |                 im_id = 0
53 |                 init_box = boxes[im_id]
54 | 
55 |                 if np.all(init_box > -1):
56 |                     if im_id not in init_data:
57 |                         init_data[im_id] = {'object_ids': [tid], 'bbox': {tid: np.array(init_box)}}
58 |                     else:
59 |                         init_data[im_id]['object_ids'].append(tid)
60 |                         init_data[im_id]['bbox'][tid] = np.array(init_box)
61 | 
62 |             assert set(init_data[0]['object_ids']) == set(track_ids)
63 |             gt_bboxes = OrderedDict({key: val for key, val in gt_bboxes.items() if key in track_ids})
64 | 
65 |             return Sequence(name=sequence_name, frames=frames_list, dataset='LaGOT', ground_truth_rect=gt_bboxes,
66 |                             init_data=init_data, object_ids=track_ids,
67 |                             multiobj_mode=True)
68 | 
69 |     def __len__(self):
70 |         return len(self.sequence_list)
71 | 
72 |     def _get_sequence_list(self):
73 |         return list(self.annos.keys())
74 | 


--------------------------------------------------------------------------------
/ltr/models/kys/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def shift_features(feat, relative_translation_vector):
 7 |     T_mat = torch.eye(2).repeat(feat.shape[0], 1, 1).to(feat.device)
 8 |     T_mat = torch.cat((T_mat, relative_translation_vector.view(-1, 2, 1)), dim=2)
 9 | 
10 |     grid = F.affine_grid(T_mat, feat.shape)
11 | 
12 |     feat_out = F.grid_sample(feat, grid)
13 |     return feat_out
14 | 
15 | 
16 | class CenterShiftFeatures(nn.Module):
17 |     def __init__(self, feature_stride):
18 |         super().__init__()
19 |         self.feature_stride = feature_stride
20 | 
21 |     def forward(self, feat, anno):
22 |         anno = anno.view(-1, 4)
23 |         c_x = (anno[:, 0] + anno[:, 2] * 0.5) / self.feature_stride
24 |         c_y = (anno[:, 1] + anno[:, 3] * 0.5) / self.feature_stride
25 | 
26 |         t_x = 2 * (c_x - feat.shape[-1] * 0.5) / feat.shape[-1]
27 |         t_y = 2 * (c_y - feat.shape[-2] * 0.5) / feat.shape[-2]
28 | 
29 |         t = torch.cat((t_x.view(-1, 1), t_y.view(-1, 1)), dim=1)
30 | 
31 |         feat_out = shift_features(feat, t)
32 |         return feat_out
33 | 
34 | 
35 | class DiMPScoreJittering():
36 |     def __init__(self, p_zero=0.0, distractor_ratio=1.0, p_distractor=0, max_distractor_enhance_factor=1,
37 |                  min_distractor_enhance_factor=0.75):
38 |         """ Jitters predicted score map by randomly enhancing distractor peaks and masking out target peaks"""
39 |         self.p_zero = p_zero
40 |         self.distractor_ratio = distractor_ratio
41 |         self.p_distractor = p_distractor
42 |         self.max_distractor_enhance_factor = max_distractor_enhance_factor
43 |         self.min_distractor_enhance_factor = min_distractor_enhance_factor
44 | 
45 |     def rand(self, sz, min_val, max_val):
46 |         return torch.rand(sz, device=min_val.device) * (max_val - min_val) + min_val
47 | 
48 |     def __call__(self, score, label):
49 |         score_shape = score.shape
50 | 
51 |         score = score.view(-1, score_shape[-2]*score_shape[-1])
52 |         num_score_maps = score.shape[0]
53 | 
54 |         label = label.view(score.shape)
55 | 
56 |         dist_roll_value = torch.rand(num_score_maps).to(score.device)
57 | 
58 |         score_c = score.clone().detach()
59 |         score_neg = score_c * (label < 1e-4).float()
60 |         score_pos = score_c * (label > 0.2).float()
61 | 
62 |         target_max_val, _ = torch.max(score_pos, dim=1)
63 |         dist_max_val, dist_id = torch.max(score_neg, dim=1)
64 | 
65 |         jitter_score = (dist_roll_value < self.p_distractor) & ((dist_max_val / target_max_val) > self.distractor_ratio)
66 | 
67 |         for i in range(num_score_maps):
68 |             score_c[i, dist_id[i]] = self.rand(1, target_max_val[i]*self.min_distractor_enhance_factor,
69 |                                                target_max_val[i]*self.max_distractor_enhance_factor)
70 | 
71 |         zero_roll_value = torch.rand(num_score_maps).to(score.device)
72 |         zero_score = (zero_roll_value < self.p_zero) & ~jitter_score
73 | 
74 |         score_c[zero_score, :] = 0
75 | 
76 |         score_jittered = score*(1.0 - (jitter_score | zero_score).float()).view(num_score_maps, 1).float() + \
77 |                          score_c*(jitter_score | zero_score).float().view(num_score_maps, 1).float()
78 | 
79 |         return score_jittered.view(score_shape)
80 | 


--------------------------------------------------------------------------------
/ltr/data/bounding_box_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def rect_to_rel(bb, sz_norm=None):
 5 |     """Convert standard rectangular parametrization of the bounding box [x, y, w, h]
 6 |     to relative parametrization [cx/sw, cy/sh, log(w), log(h)], where [cx, cy] is the center coordinate.
 7 |     args:
 8 |         bb  -  N x 4 tensor of boxes.
 9 |         sz_norm  -  [N] x 2 tensor of value of [sw, sh] (optional). sw=w and sh=h if not given.
10 |     """
11 | 
12 |     c = bb[...,:2] + 0.5 * bb[...,2:]
13 |     if sz_norm is None:
14 |         c_rel = c / bb[...,2:]
15 |     else:
16 |         c_rel = c / sz_norm
17 |     sz_rel = torch.log(bb[...,2:])
18 |     return torch.cat((c_rel, sz_rel), dim=-1)
19 | 
20 | 
21 | def rel_to_rect(bb, sz_norm=None):
22 |     """Inverts the effect of rect_to_rel. See above."""
23 | 
24 |     sz = torch.exp(bb[...,2:])
25 |     if sz_norm is None:
26 |         c = bb[...,:2] * sz
27 |     else:
28 |         c = bb[...,:2] * sz_norm
29 |     tl = c - 0.5 * sz
30 |     return torch.cat((tl, sz), dim=-1)
31 | 
32 | 
33 | def masks_to_bboxes(mask, fmt='c'):
34 | 
35 |     """ Convert a mask tensor to one or more bounding boxes.
36 |     Note: This function is a bit new, make sure it does what it says.  /Andreas
37 |     :param mask: Tensor of masks, shape = (..., H, W)
38 |     :param fmt: bbox layout. 'c' => "center + size" or (x_center, y_center, width, height)
39 |                              't' => "top left + size" or (x_left, y_top, width, height)
40 |                              'v' => "vertices" or (x_left, y_top, x_right, y_bottom)
41 |     :return: tensor containing a batch of bounding boxes, shape = (..., 4)
42 |     """
43 |     batch_shape = mask.shape[:-2]
44 |     mask = mask.reshape((-1, *mask.shape[-2:]))
45 |     bboxes = []
46 | 
47 |     for m in mask:
48 |         mx = m.sum(dim=-2).nonzero()
49 |         my = m.sum(dim=-1).nonzero()
50 |         bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0]
51 |         bboxes.append(bb)
52 | 
53 |     bboxes = torch.tensor(bboxes, dtype=torch.float32, device=mask.device)
54 |     bboxes = bboxes.reshape(batch_shape + (4,))
55 | 
56 |     if fmt == 'v':
57 |         return bboxes
58 | 
59 |     x1 = bboxes[..., :2]
60 |     s = bboxes[..., 2:] - x1 + 1
61 | 
62 |     if fmt == 'c':
63 |         return torch.cat((x1 + 0.5 * s, s), dim=-1)
64 |     elif fmt == 't':
65 |         return torch.cat((x1, s), dim=-1)
66 | 
67 |     raise ValueError("Undefined bounding box layout '%s'" % fmt)
68 | 
69 | 
70 | def masks_to_bboxes_multi(mask, ids, fmt='c'):
71 |     assert mask.dim() == 2
72 |     bboxes = []
73 | 
74 |     for id in ids:
75 |         mx = (mask == id).sum(dim=-2).nonzero()
76 |         my = (mask == id).float().sum(dim=-1).nonzero()
77 |         bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0]
78 | 
79 |         bb = torch.tensor(bb, dtype=torch.float32, device=mask.device)
80 | 
81 |         x1 = bb[:2]
82 |         s = bb[2:] - x1 + 1
83 | 
84 |         if fmt == 'v':
85 |             pass
86 |         elif fmt == 'c':
87 |             bb = torch.cat((x1 + 0.5 * s, s), dim=-1)
88 |         elif fmt == 't':
89 |             bb = torch.cat((x1, s), dim=-1)
90 |         else:
91 |             raise ValueError("Undefined bounding box layout '%s'" % fmt)
92 |         bboxes.append(bb)
93 | 
94 |     return bboxes
95 | 


--------------------------------------------------------------------------------
/pytracking/parameter/keep_track/default.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone, NetWrapper
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 30*16
13 |     params.search_area_scale = 8
14 |     params.border_mode = 'inside_major'
15 |     params.patch_max_scale_change = 1.5
16 | 
17 |     # Learning parameters
18 |     params.sample_memory_size = 50
19 |     params.learning_rate = 0.01
20 |     params.init_samples_minimum_weight = 0.25
21 |     params.train_skipping = 20
22 | 
23 |     # Net optimization params
24 |     params.update_classifier = True
25 |     params.net_opt_iter = 10
26 |     params.net_opt_update_iter = 2
27 |     params.net_opt_hn_iter = 1
28 | 
29 |     # Detection parameters
30 |     params.window_output = False
31 | 
32 |     # Init augmentation parameters
33 |     params.use_augmentation = True
34 |     params.augmentation = {'fliplr': True,
35 |                            'rotate': [10, -10, 45, -45],
36 |                            'blur': [(3,1), (1, 3), (2, 2)],
37 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
38 |                            'dropout': (2, 0.2)}
39 | 
40 |     params.augmentation_expansion_factor = 2
41 |     params.random_shift_factor = 1/3
42 | 
43 |     # Advanced localization parameters
44 |     params.advanced_localization = True
45 |     params.target_not_found_threshold = 0.25
46 |     params.distractor_threshold = 0.8
47 |     params.hard_negative_threshold = 0.5
48 |     params.target_neighborhood_scale = 2.2
49 |     params.dispalcement_scale = 0.8
50 |     params.hard_negative_learning_rate = 0.02
51 |     params.update_scale_when_uncertain = True
52 | 
53 |     # IoUnet parameters
54 |     params.box_refinement_space = 'relative'
55 |     params.iounet_augmentation = False      # Use the augmented samples to compute the modulation vector
56 |     params.iounet_k = 3                     # Top-k average to estimate final box
57 |     params.num_init_random_boxes = 9        # Num extra random boxes in addition to the classifier prediction
58 |     params.box_jitter_pos = 0.1             # How much to jitter the translation for random boxes
59 |     params.box_jitter_sz = 0.5              # How much to jitter the scale for random boxes
60 |     params.maximal_aspect_ratio = 6         # Limit on the aspect ratio
61 |     params.box_refinement_iter = 10          # Number of iterations for refining the boxes
62 |     params.box_refinement_step_length = 2.5e-3 # 1   # Gradient step length in the bounding box refinement
63 |     params.box_refinement_step_decay = 1    # Multiplicative step length decay (1 means no decay)
64 | 
65 |     # KeepTrack parameters
66 |     params.use_certainty_for_weight_computation = True
67 |     params.certainty_for_weight_computation_ths = 0.5
68 |     params.target_candidate_matching_net = NetWrapper(net_path='keep_track.pth.tar', use_gpu=params.use_gpu)
69 | 
70 |     params.vot_anno_conversion_type = 'preserve_area'
71 |     params.net = NetWithBackbone(net_path='super_dimp_simple.pth.tar', use_gpu=params.use_gpu)
72 | 
73 |     params.visualize_candidate_matching = False
74 |     params.visualize_candidate_assignment_matrix = False
75 | 
76 |     return params
77 | 


--------------------------------------------------------------------------------
/ltr/dataset/ecssd.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .base_image_dataset import BaseImageDataset
 3 | from ltr.data.image_loader import jpeg4py_loader, opencv_loader, imread_indexed
 4 | import torch
 5 | from collections import OrderedDict
 6 | from ltr.admin.environment import env_settings
 7 | from ltr.data.bounding_box_utils import masks_to_bboxes
 8 | 
 9 | 
10 | class ECSSD(BaseImageDataset):
11 |     """
12 |     Extended Complex Scene Saliency Dataset (ECSSD)
13 | 
14 |     Publication:
15 |             Hierarchical Image Saliency Detection on Extended CSSD
16 |             Jianping Shi, Qiong Yan, Li Xu, Jiaya Jia
17 |             TPAMI, 2016
18 |             https://arxiv.org/pdf/1408.5418.pdf
19 | 
20 |         Download the dataset from http://www.cse.cuhk.edu.hk/leojia/projects/hsaliency/dataset.html
21 |     """
22 |     def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None):
23 |         """
24 |         args:
25 |             root - path to ECSSD root folder
26 |             image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
27 |                                             is used by default.
28 |             data_fraction - Fraction of dataset to be used. The complete dataset is used by default
29 |             min_area - Objects with area less than min_area are filtered out. Default is 0.0
30 |         """
31 |         root = env_settings().ecssd_dir if root is None else root
32 |         super().__init__('ECSSD', root, image_loader)
33 | 
34 |         self.image_list = self._load_dataset(min_area=min_area)
35 | 
36 |         if data_fraction is not None:
37 |             raise NotImplementedError
38 | 
39 |     def _load_dataset(self, min_area=None):
40 |         images = []
41 | 
42 |         for i in range(1, 1001):
43 |             a = imread_indexed(os.path.join(self.root, 'ground_truth_mask', '{:04d}.png'.format(i)))
44 | 
45 |             if min_area is None or (a > 0).sum() > min_area:
46 |                 images.append(i)
47 | 
48 |         return images
49 | 
50 |     def get_name(self):
51 |         return 'ecssd'
52 | 
53 |     def has_segmentation_info(self):
54 |         return True
55 | 
56 |     def get_image_info(self, im_id):
57 |         mask = imread_indexed(os.path.join(self.root, 'ground_truth_mask', '{:04d}.png'.format(self.image_list[im_id])))
58 | 
59 |         mask = torch.Tensor(mask == 255)
60 |         bbox = masks_to_bboxes(mask, fmt='t').view(4,)
61 | 
62 |         valid = (bbox[2] > 0) & (bbox[3] > 0)
63 |         visible = valid.clone().byte()
64 | 
65 |         return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
66 | 
67 |     def get_meta_info(self, im_id):
68 |         object_meta = OrderedDict({'object_class_name': None,
69 |                                    'motion_class': None,
70 |                                    'major_class': None,
71 |                                    'root_class': None,
72 |                                    'motion_adverb': None})
73 | 
74 |         return object_meta
75 | 
76 |     def get_image(self, image_id, anno=None):
77 |         frame = self.image_loader(os.path.join(self.root, 'images', '{:04d}.jpg'.format(self.image_list[image_id])))
78 | 
79 |         if anno is None:
80 |             anno = self.get_image_info(image_id)
81 | 
82 |         object_meta = self.get_meta_info(image_id)
83 | 
84 |         return frame, anno, object_meta
85 | 


--------------------------------------------------------------------------------
/ltr/dataset/hku_is.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .base_image_dataset import BaseImageDataset
 3 | from ltr.data.image_loader import jpeg4py_loader, opencv_loader, imread_indexed
 4 | import torch
 5 | from collections import OrderedDict
 6 | from ltr.admin.environment import env_settings
 7 | from ltr.data.bounding_box_utils import masks_to_bboxes
 8 | 
 9 | 
10 | class HKUIS(BaseImageDataset):
11 |     """
12 |     HKU-IS salient object detection dataset
13 | 
14 |     Publication:
15 |         Visual saliency based on multiscale deep features
16 |         Guanbin Li and Yizhou Yu
17 |         CVPR, 2015
18 |         https://arxiv.org/pdf/1503.08663.pdf
19 | 
20 |     Download dataset from https://sites.google.com/site/ligb86/hkuis
21 |     """
22 | 
23 |     def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None):
24 |         """
25 |         args:
26 |             root - path to HKU-IS root folder
27 |             image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
28 |                                             is used by default.
29 |             data_fraction - Fraction of dataset to be used. The complete dataset is used by default
30 |             min_area - Objects with area less than min_area are filtered out. Default is 0.0
31 |         """
32 |         root = env_settings().hkuis_dir if root is None else root
33 |         super().__init__('HKUIS', root, image_loader)
34 | 
35 |         self.image_list, self.anno_list = self._load_dataset(min_area=min_area)
36 | 
37 |         if data_fraction is not None:
38 |             raise NotImplementedError
39 | 
40 |     def _load_dataset(self, min_area=None):
41 |         files_list = os.listdir(os.path.join(self.root, 'imgs'))
42 |         image_list = [f[:-4] for f in files_list]
43 | 
44 |         images = []
45 |         annos = []
46 | 
47 |         for f in image_list:
48 |             a = imread_indexed(os.path.join(self.root, 'gt', '{}.png'.format(f)))
49 | 
50 |             if min_area is None or (a > 0).sum() > min_area:
51 |                 im = opencv_loader(os.path.join(self.root, 'imgs', '{}.png'.format(f)))
52 |                 images.append(im)
53 |                 annos.append(a)
54 | 
55 |         return images, annos
56 | 
57 |     def get_name(self):
58 |         return 'hku-is'
59 | 
60 |     def has_segmentation_info(self):
61 |         return True
62 | 
63 |     def get_image_info(self, im_id):
64 |         mask = self.anno_list[im_id]
65 |         mask = torch.Tensor(mask == 255)
66 |         bbox = masks_to_bboxes(mask, fmt='t').view(4,)
67 | 
68 |         valid = (bbox[2] > 0) & (bbox[3] > 0)
69 |         visible = valid.clone().byte()
70 | 
71 |         return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
72 | 
73 |     def get_meta_info(self, im_id):
74 |         object_meta = OrderedDict({'object_class_name': None,
75 |                                    'motion_class': None,
76 |                                    'major_class': None,
77 |                                    'root_class': None,
78 |                                    'motion_adverb': None})
79 | 
80 |         return object_meta
81 | 
82 |     def get_image(self, image_id, anno=None):
83 |         frame = self.image_list[image_id]
84 | 
85 |         if anno is None:
86 |             anno = self.get_image_info(image_id)
87 | 
88 |         object_meta = self.get_meta_info(image_id)
89 | 
90 |         return frame, anno, object_meta
91 | 


--------------------------------------------------------------------------------
/pytracking/parameter/keep_track/default_fast.py:
--------------------------------------------------------------------------------
 1 | from pytracking.utils import TrackerParams
 2 | from pytracking.features.net_wrappers import NetWithBackbone, NetWrapper
 3 | 
 4 | def parameters():
 5 |     params = TrackerParams()
 6 | 
 7 |     params.debug = 0
 8 |     params.visualization = False
 9 | 
10 |     params.use_gpu = True
11 | 
12 |     params.image_sample_size = 22*16
13 |     params.search_area_scale = 6
14 |     params.border_mode = 'inside_major'
15 |     params.patch_max_scale_change = 1.5
16 | 
17 |     # Learning parameters
18 |     params.sample_memory_size = 50
19 |     params.learning_rate = 0.01
20 |     params.init_samples_minimum_weight = 0.25
21 |     params.train_skipping = 20
22 | 
23 |     # Net optimization params
24 |     params.update_classifier = True
25 |     params.net_opt_iter = 10
26 |     params.net_opt_update_iter = 2
27 |     params.net_opt_hn_iter = 1
28 | 
29 |     # Detection parameters
30 |     params.window_output = False
31 | 
32 |     # Init augmentation parameters
33 |     params.use_augmentation = True
34 |     params.augmentation = {'fliplr': True,
35 |                            'rotate': [10, -10, 45, -45],
36 |                            'blur': [(3,1), (1, 3), (2, 2)],
37 |                            'relativeshift': [(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)],
38 |                            'dropout': (2, 0.2)}
39 | 
40 |     params.augmentation_expansion_factor = 2
41 |     params.random_shift_factor = 1/3
42 | 
43 |     # Advanced localization parameters
44 |     params.advanced_localization = True
45 |     params.target_not_found_threshold = 0.25
46 |     params.distractor_threshold = 0.8
47 |     params.hard_negative_threshold = 0.5
48 |     params.target_neighborhood_scale = 2.2
49 |     params.dispalcement_scale = 0.8
50 |     params.hard_negative_learning_rate = 0.02
51 |     params.update_scale_when_uncertain = True
52 | 
53 |     # IoUnet parameters
54 |     params.box_refinement_space = 'relative'
55 |     params.iounet_augmentation = False      # Use the augmented samples to compute the modulation vector
56 |     params.iounet_k = 3                     # Top-k average to estimate final box
57 |     params.num_init_random_boxes = 9        # Num extra random boxes in addition to the classifier prediction
58 |     params.box_jitter_pos = 0.1             # How much to jitter the translation for random boxes
59 |     params.box_jitter_sz = 0.5              # How much to jitter the scale for random boxes
60 |     params.maximal_aspect_ratio = 6         # Limit on the aspect ratio
61 |     params.box_refinement_iter = 3          # Number of iterations for refining the boxes
62 |     params.box_refinement_step_length = 2.5e-3 # 1   # Gradient step length in the bounding box refinement
63 |     params.box_refinement_step_decay = 1    # Multiplicative step length decay (1 means no decay)
64 | 
65 |     # KeepTrack parameters
66 |     params.use_certainty_for_weight_computation = True
67 |     params.certainty_for_weight_computation_ths = 0.5
68 |     params.local_max_candidate_score_th = 0.1
69 |     params.target_candidate_matching_net = NetWrapper(net_path='keep_track.pth.tar', use_gpu=params.use_gpu)
70 | 
71 |     params.vot_anno_conversion_type = 'preserve_area'
72 |     params.net = NetWithBackbone(net_path='super_dimp_simple.pth.tar', use_gpu=params.use_gpu)
73 | 
74 |     params.visualize_candidate_matching = False
75 |     params.visualize_candidate_assignment_matrix = False
76 | 
77 |     return params
78 | 


--------------------------------------------------------------------------------
/ltr/dataset/msra10k.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .base_image_dataset import BaseImageDataset
 3 | from ltr.data.image_loader import jpeg4py_loader, imread_indexed
 4 | import torch
 5 | from collections import OrderedDict
 6 | from ltr.admin.environment import env_settings
 7 | from ltr.data.bounding_box_utils import masks_to_bboxes
 8 | 
 9 | 
10 | class MSRA10k(BaseImageDataset):
11 |     """
12 |     MSRA10k salient object detection dataset
13 | 
14 |     Publication:
15 |         Global contrast based salient region detection
16 |         Ming-Ming Cheng, Niloy J. Mitra, Xiaolei Huang, Philip H. S. Torr, and Shi-Min Hu
17 |         TPAMI, 2015
18 |         https://mmcheng.net/mftp/Papers/SaliencyTPAMI.pdf
19 | 
20 |     Download dataset from https://mmcheng.net/msra10k/
21 |     """
22 | 
23 |     def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, min_area=None):
24 |         """
25 |         args:
26 |             root - path to MSRA10k root folder
27 |             image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
28 |                                             is used by default.
29 |             data_fraction - Fraction of dataset to be used. The complete dataset is used by default
30 |             min_area - Objects with area less than min_area are filtered out. Default is 0.0
31 |         """
32 |         root = env_settings().msra10k_dir if root is None else root
33 |         super().__init__('MSRA10k', root, image_loader)
34 | 
35 |         self.image_list = self._load_dataset(min_area=min_area)
36 | 
37 |         if data_fraction is not None:
38 |             raise NotImplementedError
39 | 
40 |     def _load_dataset(self, min_area=None):
41 |         files_list = os.listdir(os.path.join(self.root, 'Imgs'))
42 |         image_list = [f[:-4] for f in files_list if f[-3:] == 'jpg']
43 | 
44 |         images = []
45 | 
46 |         for f in image_list:
47 |             a = imread_indexed(os.path.join(self.root, 'Imgs', '{}.png'.format(f)))
48 | 
49 |             if min_area is None or (a > 0).sum() > min_area:
50 |                 images.append(f)
51 | 
52 |         return images
53 | 
54 |     def get_name(self):
55 |         return 'msra10k'
56 | 
57 |     def has_segmentation_info(self):
58 |         return True
59 | 
60 |     def get_image_info(self, im_id):
61 |         mask = imread_indexed(os.path.join(self.root, 'Imgs', '{}.png'.format(self.image_list[im_id])))
62 |         mask = torch.Tensor(mask == 255)
63 |         bbox = masks_to_bboxes(mask, fmt='t').view(4,)
64 | 
65 |         valid = (bbox[2] > 0) & (bbox[3] > 0)
66 |         visible = valid.clone().byte()
67 | 
68 |         return {'bbox': bbox, 'mask': mask, 'valid': valid, 'visible': visible}
69 | 
70 |     def get_meta_info(self, im_id):
71 |         object_meta = OrderedDict({'object_class_name': None,
72 |                                    'motion_class': None,
73 |                                    'major_class': None,
74 |                                    'root_class': None,
75 |                                    'motion_adverb': None})
76 | 
77 |         return object_meta
78 | 
79 |     def get_image(self, image_id, anno=None):
80 |         frame = self.image_loader(os.path.join(self.root, 'Imgs', '{}.jpg'.format(self.image_list[image_id])))
81 | 
82 |         if anno is None:
83 |             anno = self.get_image_info(image_id)
84 | 
85 |         object_meta = self.get_meta_info(image_id)
86 | 
87 |         return frame, anno, object_meta
88 | 


--------------------------------------------------------------------------------
/pytracking/evaluation/got10kdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | from pytracking.utils.load_text import load_text
 4 | import os
 5 | from PIL import Image
 6 | from pathlib import Path
 7 | 
 8 | 
 9 | class GOT10KDataset(BaseDataset):
10 |     """ GOT-10k dataset.
11 | 
12 |     Publication:
13 |         GOT-10k: A Large High-Diversity Benchmark for Generic Object Tracking in the Wild
14 |         Lianghua Huang, Xin Zhao, and Kaiqi Huang
15 |         arXiv:1810.11981, 2018
16 |         https://arxiv.org/pdf/1810.11981.pdf
17 | 
18 |     Download dataset from http://got-10k.aitestunion.com/downloads
19 |     """
20 |     def __init__(self, split, vos_mode=False):
21 |         super().__init__()
22 |         # Split can be test, val, or ltrval (a validation split consisting of videos from the official train set)
23 |         if split == 'test' or split == 'val':
24 |             self.base_path = os.path.join(self.env_settings.got10k_path, split)
25 |         else:
26 |             self.base_path = os.path.join(self.env_settings.got10k_path, 'train')
27 | 
28 |         self.sequence_list = self._get_sequence_list(split)
29 |         self.split = split
30 | 
31 |         self.vos_mode = vos_mode
32 | 
33 |         self.mask_path = None
34 |         if self.vos_mode:
35 |             self.mask_path = self.env_settings.got10k_mask_path
36 | 
37 |     def get_sequence_list(self):
38 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
39 | 
40 |     def _construct_sequence(self, sequence_name):
41 |         anno_path = '{}/{}/groundtruth.txt'.format(self.base_path, sequence_name)
42 | 
43 |         ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64)
44 | 
45 |         frames_path = '{}/{}'.format(self.base_path, sequence_name)
46 |         frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")]
47 |         frame_list.sort(key=lambda f: int(f[:-4]))
48 |         frames_list = [os.path.join(frames_path, frame) for frame in frame_list]
49 | 
50 |         masks = None
51 |         if self.vos_mode:
52 |             seq_mask_path = '{}/{}'.format(self.mask_path, sequence_name)
53 |             masks = [self._load_mask(Path(self._get_anno_frame_path(seq_mask_path, f[:-3] + 'png'))) for f in
54 |                      frame_list[0:1]]
55 | 
56 |         return Sequence(sequence_name, frames_list, 'got10k', ground_truth_rect.reshape(-1, 4),
57 |                         ground_truth_seg=masks)
58 | 
59 |     @staticmethod
60 |     def _load_mask(path):
61 |         if not path.exists():
62 |             print('Error: Could not read: ', path, flush=True)
63 |             return None
64 |         im = np.array(Image.open(path))
65 |         im = np.atleast_3d(im)[..., 0]
66 |         return im
67 | 
68 |     def _get_anno_frame_path(self, seq_path, frame_name):
69 |         return os.path.join(seq_path, frame_name)
70 | 
71 |     def __len__(self):
72 |         return len(self.sequence_list)
73 | 
74 |     def _get_sequence_list(self, split):
75 |         with open('{}/list.txt'.format(self.base_path)) as f:
76 |             sequence_list = f.read().splitlines()
77 | 
78 |         if split == 'ltrval':
79 |             with open('{}/got10k_val_split.txt'.format(self.env_settings.dataspec_path)) as f:
80 |                 seq_ids = f.read().splitlines()
81 | 
82 |             sequence_list = [sequence_list[int(x)] for x in seq_ids]
83 |         return sequence_list
84 | 


--------------------------------------------------------------------------------
/ltr/models/target_classifier/features.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | import torch.nn.functional as F
 4 | from torchvision.models.resnet import BasicBlock, Bottleneck
 5 | from ltr.models.layers.normalization import InstanceL2Norm
 6 | from ltr.models.layers.transform import InterpCat
 7 | 
 8 | 
 9 | def residual_basic_block(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None,
10 |                          interp_cat=False, final_relu=False, init_pool=False):
11 |     """Construct a network block based on the BasicBlock used in ResNet 18 and 34."""
12 |     if out_dim is None:
13 |         out_dim = feature_dim
14 |     feat_layers = []
15 |     if interp_cat:
16 |         feat_layers.append(InterpCat())
17 |     if init_pool:
18 |         feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
19 |     for i in range(num_blocks):
20 |         odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim
21 |         feat_layers.append(BasicBlock(feature_dim, odim))
22 |     if final_conv:
23 |         feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False))
24 |         if final_relu:
25 |             feat_layers.append(nn.ReLU(inplace=True))
26 |     if l2norm:
27 |         feat_layers.append(InstanceL2Norm(scale=norm_scale))
28 |     return nn.Sequential(*feat_layers)
29 | 
30 | 
31 | def residual_basic_block_pool(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None,
32 |                               pool=True):
33 |     """Construct a network block based on the BasicBlock used in ResNet."""
34 |     if out_dim is None:
35 |         out_dim = feature_dim
36 |     feat_layers = []
37 |     for i in range(num_blocks):
38 |         odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim
39 |         feat_layers.append(BasicBlock(feature_dim, odim))
40 |     if final_conv:
41 |         feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False))
42 |     if pool:
43 |         feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
44 |     if l2norm:
45 |         feat_layers.append(InstanceL2Norm(scale=norm_scale))
46 | 
47 |     return nn.Sequential(*feat_layers)
48 | 
49 | 
50 | def residual_bottleneck(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None,
51 |                         interp_cat=False, final_relu=False, final_pool=False, input_dim=None, final_stride=1):
52 |     """Construct a network block based on the Bottleneck block used in ResNet."""
53 |     if out_dim is None:
54 |         out_dim = feature_dim
55 |     if input_dim is None:
56 |         input_dim = 4*feature_dim
57 |     dim = input_dim
58 |     feat_layers = []
59 |     if interp_cat:
60 |         feat_layers.append(InterpCat())
61 |     for i in range(num_blocks):
62 |         planes = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim // 4
63 |         feat_layers.append(Bottleneck(dim, planes))
64 |         dim = 4*feature_dim
65 |     if final_conv:
66 |         feat_layers.append(nn.Conv2d(dim, out_dim, kernel_size=3, padding=1, bias=False, stride=final_stride))
67 |         if final_relu:
68 |             feat_layers.append(nn.ReLU(inplace=True))
69 |         if final_pool:
70 |             feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
71 |     if l2norm:
72 |         feat_layers.append(InstanceL2Norm(scale=norm_scale))
73 |     return nn.Sequential(*feat_layers)
74 | 
75 | 


--------------------------------------------------------------------------------
/ltr/dataset/lasotvos.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import os
 3 | import numpy as np
 4 | import torch
 5 | import pandas
 6 | import csv
 7 | from PIL import Image
 8 | from ltr.dataset.lasot import Lasot
 9 | from ltr.data.image_loader import jpeg4py_loader, imread_indexed
10 | 
11 | 
12 | class LasotVOS(Lasot):
13 |     """ Lasot video object segmentation dataset.
14 |     """
15 | 
16 |     def __init__(self, anno_path=None, split='train'):
17 |         super().__init__(split=split)
18 |         self.anno_path = anno_path
19 |         self.skip_interval = 5
20 | 
21 |     @staticmethod
22 |     def _load_anno(path):
23 |         if not path.exists():
24 |             print('path', path, flush=True)
25 |             return None
26 |         im = np.array(Image.open(path))
27 |         im = np.atleast_3d(im)[..., 0]
28 |         # im = imread_indexed(path)
29 |         return im
30 | 
31 |     def _get_anno_sequence_path(self, seq_id):
32 |         return os.path.join(self.anno_path, self.sequence_list[seq_id])
33 | 
34 |     def _get_anno_frame_path(self, seq_path, frame_id):
35 |         frame_number = 1 + frame_id * self.skip_interval
36 |         return os.path.join(seq_path, '{:08}.png'.format(frame_number))  # frames start from 1
37 | 
38 |     #########################
39 |     def _read_bb_anno(self, seq_path):
40 |         bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
41 |         gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False,
42 |                              low_memory=False).values
43 |         gt = torch.tensor(gt)
44 |         gt = gt[:1000:self.skip_interval]
45 |         return gt
46 | 
47 |     def _read_target_visible(self, seq_path):
48 |         # Read full occlusion and out_of_view
49 |         occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
50 |         out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
51 | 
52 |         with open(occlusion_file, 'r', newline='') as f:
53 |             occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
54 |         with open(out_of_view_file, 'r') as f:
55 |             out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
56 | 
57 |         target_visible = ~occlusion & ~out_of_view
58 | 
59 |         target_visible = target_visible[:1000:self.skip_interval]
60 | 
61 |         return target_visible
62 | 
63 |     def _get_frame_path(self, seq_path, frame_id):
64 |         frame_number = 1 + frame_id * self.skip_interval
65 |         return os.path.join(seq_path, 'img', '{:08}.jpg'.format(frame_number))  # frames start from 1
66 | 
67 |     #########################
68 |     def get_frames(self, seq_id, frame_ids, anno=None):
69 |         seq_path = self._get_sequence_path(seq_id)
70 | 
71 |         # TODO FIX Me ?? This is not used by the LWL sampler
72 |         obj_meta = None
73 |         # obj_meta = self.sequence_meta_info[self.sequence_list[seq_id]]
74 | 
75 |         frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
76 | 
77 |         if anno is None:
78 |             anno = self.get_sequence_info(seq_id)
79 | 
80 |         anno_frames = {}
81 |         for key, value in anno.items():
82 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
83 | 
84 |         anno_seq_path = self._get_anno_sequence_path(seq_id)
85 | 
86 |         labels = [self._load_anno(Path(self._get_anno_frame_path(anno_seq_path, f))) for f in frame_ids]
87 |         labels = [torch.Tensor(lb) for lb in labels]
88 |         anno_frames['mask'] = labels
89 | 
90 |         return frame_list, anno_frames, obj_meta
91 | 


--------------------------------------------------------------------------------
/ltr/dataset/synthetic_video.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | from .base_video_dataset import BaseVideoDataset
 3 | from ltr.data.bounding_box_utils import masks_to_bboxes
 4 | 
 5 | 
 6 | class SyntheticVideo(BaseVideoDataset):
 7 |     """
 8 |     Create a synthetic video dataset from an image dataset by applying a random transformation to images.
 9 |     """
10 |     def __init__(self, base_image_dataset, transform=None):
11 |         """
12 |         args:
13 |             base_image_dataset - Image dataset used for generating synthetic videos
14 |             transform - Set of transforms to be applied to the images to generate synthetic video.
15 |         """
16 |         super().__init__(base_image_dataset.get_name() + '_syn_vid', base_image_dataset.root,
17 |                          base_image_dataset.image_loader)
18 |         self.base_image_dataset = base_image_dataset
19 |         self.transform = transform
20 | 
21 |     def get_name(self):
22 |         return self.name
23 | 
24 |     def is_video_sequence(self):
25 |         return False
26 | 
27 |     def has_class_info(self):
28 |         return self.base_image_dataset.has_class_info()
29 | 
30 |     def has_occlusion_info(self):
31 |         return True
32 | 
33 |     def get_num_sequences(self):
34 |         return self.base_image_dataset.get_num_images()
35 | 
36 |     def get_num_classes(self):
37 |         return len(self.class_list)
38 | 
39 |     def get_sequences_in_class(self, class_name):
40 |         return self.get_images_in_class[class_name]
41 | 
42 |     def get_sequence_info(self, seq_id):
43 |         image_info = self.base_image_dataset.get_image_info(seq_id)
44 | 
45 |         image_info = {k: v.unsqueeze(0) for k, v in image_info.items()}
46 |         return image_info
47 | 
48 |     def get_class_name(self, seq_id):
49 |         return self.base_image_dataset.get_class_name(seq_id)
50 | 
51 |     def get_frames(self, seq_id, frame_ids, anno=None):
52 |         frame, anno, object_meta = self.base_image_dataset.get_image(seq_id, anno=anno)
53 | 
54 |         frame_list = [frame.copy() for _ in frame_ids]
55 | 
56 |         if anno is None:
57 |             anno = self.get_sequence_info(seq_id)
58 | 
59 |         anno_frames = {}
60 |         for key, value in anno.items():
61 |             anno_frames[key] = [value[0].clone() for f_id in frame_ids]
62 | 
63 |         if self.transform is not None:
64 |             if 'mask' in anno_frames.keys():
65 |                 frame_list, anno_frames['bbox'], anno_frames['mask'] = self.transform(image=frame_list,
66 |                                                                                       bbox=anno_frames['bbox'],
67 |                                                                                       mask=anno_frames['mask'],
68 |                                                                                       joint=False)
69 | 
70 |                 anno_frames['bbox'] = [masks_to_bboxes(m, fmt='t') for m in anno_frames['mask']]
71 |             else:
72 |                 frame_list, anno_frames['bbox'] = self.transform(image=frame_list,
73 |                                                                  bbox=anno_frames['bbox'],
74 |                                                                  joint=False)
75 | 
76 |         object_meta = OrderedDict({'object_class_name': self.get_class_name(seq_id),
77 |                                    'motion_class': None,
78 |                                    'major_class': None,
79 |                                    'root_class': None,
80 |                                    'motion_adverb': None})
81 | 
82 |         return frame_list, anno_frames, object_meta
83 | 


--------------------------------------------------------------------------------
/ltr/dataset/base_video_dataset.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data
  2 | from ltr.data.image_loader import jpeg4py_loader
  3 | 
  4 | 
  5 | class BaseVideoDataset(torch.utils.data.Dataset):
  6 |     """ Base class for video datasets """
  7 | 
  8 |     def __init__(self, name, root, image_loader=jpeg4py_loader):
  9 |         """
 10 |         args:
 11 |             root - The root path to the dataset
 12 |             image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
 13 |                                             is used by default.
 14 |         """
 15 |         self.name = name
 16 |         self.root = root
 17 |         self.image_loader = image_loader
 18 | 
 19 |         self.sequence_list = []     # Contains the list of sequences.
 20 |         self.class_list = []
 21 | 
 22 |     def __len__(self):
 23 |         """ Returns size of the dataset
 24 |         returns:
 25 |             int - number of samples in the dataset
 26 |         """
 27 |         return self.get_num_sequences()
 28 | 
 29 |     def __getitem__(self, index):
 30 |         """ Not to be used! Check get_frames() instead.
 31 |         """
 32 |         return None
 33 | 
 34 |     def is_video_sequence(self):
 35 |         """ Returns whether the dataset is a video dataset or an image dataset
 36 | 
 37 |         returns:
 38 |             bool - True if a video dataset
 39 |         """
 40 |         return True
 41 | 
 42 |     def is_synthetic_video_dataset(self):
 43 |         """ Returns whether the dataset contains real videos or synthetic
 44 | 
 45 |         returns:
 46 |             bool - True if a video dataset
 47 |         """
 48 |         return False
 49 | 
 50 |     def get_name(self):
 51 |         """ Name of the dataset
 52 | 
 53 |         returns:
 54 |             string - Name of the dataset
 55 |         """
 56 |         raise NotImplementedError
 57 | 
 58 |     def get_num_sequences(self):
 59 |         """ Number of sequences in a dataset
 60 | 
 61 |         returns:
 62 |             int - number of sequences in the dataset."""
 63 |         return len(self.sequence_list)
 64 | 
 65 |     def is_mot_dataset(self):
 66 |         return False
 67 | 
 68 |     def has_class_info(self):
 69 |         return False
 70 | 
 71 |     def has_occlusion_info(self):
 72 |         return False
 73 | 
 74 |     def get_num_classes(self):
 75 |         return len(self.class_list)
 76 | 
 77 |     def get_class_list(self):
 78 |         return self.class_list
 79 | 
 80 |     def get_sequences_in_class(self, class_name):
 81 |         raise NotImplementedError
 82 | 
 83 |     def has_segmentation_info(self):
 84 |         return False
 85 | 
 86 |     def get_sequence_info(self, seq_id):
 87 |         """ Returns information about a particular sequences,
 88 | 
 89 |         args:
 90 |             seq_id - index of the sequence
 91 | 
 92 |         returns:
 93 |             Dict
 94 |             """
 95 |         raise NotImplementedError
 96 | 
 97 |     def get_frames(self, seq_id, frame_ids, anno=None):
 98 |         """ Get a set of frames from a particular sequence
 99 | 
100 |         args:
101 |             seq_id      - index of sequence
102 |             frame_ids   - a list of frame numbers
103 |             anno(None)  - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
104 | 
105 |         returns:
106 |             list - List of frames corresponding to frame_ids
107 |             list - List of dicts for each frame
108 |             dict - A dict containing meta information about the sequence, e.g. class of the target object.
109 | 
110 |         """
111 |         raise NotImplementedError
112 | 
113 | 


--------------------------------------------------------------------------------
/pytracking/evaluation/trackingnetdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from pytracking.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | import os
 4 | from pytracking.utils.load_text import load_text
 5 | from pathlib import Path
 6 | from PIL import Image
 7 | 
 8 | 
 9 | class TrackingNetDataset(BaseDataset):
10 |     """ TrackingNet test set.
11 | 
12 |     Publication:
13 |         TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
14 |         Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
15 |         ECCV, 2018
16 |         https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
17 | 
18 |     Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
19 |     """
20 |     def __init__(self, load_frames=True, vos_mode=False):
21 |         super().__init__()
22 |         self.base_path = self.env_settings.trackingnet_path
23 |         self.load_frames = load_frames
24 | 
25 |         sets = 'TEST'
26 |         if not isinstance(sets, (list, tuple)):
27 |             if sets == 'TEST':
28 |                 sets = ['TEST']
29 |             elif sets == 'TRAIN':
30 |                 sets = ['TRAIN_{}'.format(i) for i in range(5)]
31 | 
32 |         self.sequence_list = self._list_sequences(self.base_path, sets)
33 | 
34 |         self.vos_mode = vos_mode
35 | 
36 |         self.mask_path = None
37 |         if self.vos_mode:
38 |             self.mask_path = self.env_settings.trackingnet_mask_path
39 | 
40 |     def get_sequence_list(self):
41 |         return SequenceList([self._construct_sequence(set, seq_name) for set, seq_name in self.sequence_list])
42 | 
43 |     def _construct_sequence(self, set, sequence_name):
44 |         anno_path = '{}/{}/anno/{}.txt'.format(self.base_path, set, sequence_name)
45 | 
46 |         ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64, backend='numpy')
47 | 
48 |         if self.load_frames:
49 |             frames_path = '{}/{}/frames/{}'.format(self.base_path, set, sequence_name)
50 |             frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")]
51 |             frame_list.sort(key=lambda f: int(f[:-4]))
52 |             frames_list = [os.path.join(frames_path, frame) for frame in frame_list]
53 |         else:
54 |             frames_list = []
55 |             frame_list = []
56 | 
57 |         masks = None
58 |         if self.vos_mode:
59 |             seq_mask_path = '{}/{}'.format(self.mask_path, sequence_name)
60 |             masks = [self._load_mask(Path(self._get_anno_frame_path(seq_mask_path, f[:-3] + 'png'))) for f in
61 |                      frame_list[0:1]]
62 | 
63 |         return Sequence(sequence_name, frames_list, 'trackingnet',
64 |                         ground_truth_rect.reshape(-1, 4), ground_truth_seg=masks)
65 | 
66 |     @staticmethod
67 |     def _load_mask(path):
68 |         if not path.exists():
69 |             print('Error: Could not read: ', path, flush=True)
70 |             return None
71 |         im = np.array(Image.open(path))
72 |         im = np.atleast_3d(im)[..., 0]
73 |         return im
74 | 
75 |     def _get_anno_frame_path(self, seq_path, frame_name):
76 |         return os.path.join(seq_path, frame_name)
77 | 
78 |     def __len__(self):
79 |         return len(self.sequence_list)
80 | 
81 |     def _list_sequences(self, root, set_ids):
82 |         sequence_list = []
83 | 
84 |         for s in set_ids:
85 |             anno_dir = os.path.join(root, s, "anno")
86 |             sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')]
87 | 
88 |             sequence_list += sequences_cur_set
89 | 
90 |         return sequence_list
91 | 


--------------------------------------------------------------------------------
/ltr/models/bbreg/atom.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import ltr.models.backbone as backbones
 3 | import ltr.models.bbreg as bbmodels
 4 | from ltr import model_constructor
 5 | 
 6 | 
 7 | class ATOMnet(nn.Module):
 8 |     """ ATOM network module"""
 9 |     def __init__(self, feature_extractor, bb_regressor, bb_regressor_layer, extractor_grad=True):
10 |         """
11 |         args:
12 |             feature_extractor - backbone feature extractor
13 |             bb_regressor - IoU prediction module
14 |             bb_regressor_layer - List containing the name of the layers from feature_extractor, which are input to
15 |                                     bb_regressor
16 |             extractor_grad - Bool indicating whether backbone feature extractor requires gradients
17 |         """
18 |         super(ATOMnet, self).__init__()
19 | 
20 |         self.feature_extractor = feature_extractor
21 |         self.bb_regressor = bb_regressor
22 |         self.bb_regressor_layer = bb_regressor_layer
23 | 
24 |         if not extractor_grad:
25 |             for p in self.feature_extractor.parameters():
26 |                 p.requires_grad_(False)
27 | 
28 |     def forward(self, train_imgs, test_imgs, train_bb, test_proposals):
29 |         """ Forward pass
30 |         Note: If the training is done in sequence mode, that is, test_imgs.dim() == 5, then the batch dimension
31 |         corresponds to the first dimensions. test_imgs is thus of the form [sequence, batch, feature, row, col]
32 |         """
33 |         num_sequences = train_imgs.shape[-4]
34 |         num_train_images = train_imgs.shape[0] if train_imgs.dim() == 5 else 1
35 |         num_test_images = test_imgs.shape[0] if test_imgs.dim() == 5 else 1
36 | 
37 |         # Extract backbone features
38 |         train_feat = self.extract_backbone_features(train_imgs.reshape(-1, *train_imgs.shape[-3:]))
39 |         test_feat = self.extract_backbone_features(test_imgs.reshape(-1, *test_imgs.shape[-3:]))
40 | 
41 |         train_feat_iou = [feat for feat in train_feat.values()]
42 |         test_feat_iou = [feat for feat in test_feat.values()]
43 | 
44 |         # Obtain iou prediction
45 |         iou_pred = self.bb_regressor(train_feat_iou, test_feat_iou,
46 |                                      train_bb.reshape(num_train_images, num_sequences, 4),
47 |                                      test_proposals.reshape(num_train_images, num_sequences, -1, 4))
48 |         return iou_pred
49 | 
50 |     def extract_backbone_features(self, im, layers=None):
51 |         if layers is None:
52 |             layers = self.bb_regressor_layer
53 |         return self.feature_extractor(im, layers)
54 | 
55 |     def extract_features(self, im, layers):
56 |         return self.feature_extractor(im, layers)
57 | 
58 | 
59 | 
60 | @model_constructor
61 | def atom_resnet18(iou_input_dim=(256,256), iou_inter_dim=(256,256), backbone_pretrained=True):
62 |     # backbone
63 |     backbone_net = backbones.resnet18(pretrained=backbone_pretrained)
64 | 
65 |     # Bounding box regressor
66 |     iou_predictor = bbmodels.AtomIoUNet(pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim)
67 | 
68 |     net = ATOMnet(feature_extractor=backbone_net, bb_regressor=iou_predictor, bb_regressor_layer=['layer2', 'layer3'],
69 |                   extractor_grad=False)
70 | 
71 |     return net
72 | 
73 | 
74 | @model_constructor
75 | def atom_resnet50(iou_input_dim=(256,256), iou_inter_dim=(256,256), backbone_pretrained=True):
76 |     # backbone
77 |     backbone_net = backbones.resnet50(pretrained=backbone_pretrained)
78 | 
79 |     # Bounding box regressor
80 |     iou_predictor = bbmodels.AtomIoUNet(input_dim=(4*128,4*256), pred_input_dim=iou_input_dim, pred_inter_dim=iou_inter_dim)
81 | 
82 |     net = ATOMnet(feature_extractor=backbone_net, bb_regressor=iou_predictor, bb_regressor_layer=['layer2', 'layer3'],
83 |                   extractor_grad=False)
84 | 
85 |     return net
86 | 


--------------------------------------------------------------------------------
/pytracking/VOT/vot.py:
--------------------------------------------------------------------------------
  1 | """
  2 | \file vot.py
  3 | 
  4 | @brief Python utility functions for VOT integration
  5 | 
  6 | @author Luka Cehovin, Alessio Dore
  7 | 
  8 | @date 2016, 2019
  9 | 
 10 | """
 11 | 
 12 | import sys
 13 | import copy
 14 | import collections
 15 | 
 16 | try:
 17 |     import trax
 18 | except ImportError:
 19 |     raise Exception('TraX support not found. Please add trax module to Python path.')
 20 | 
 21 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height'])
 22 | Point = collections.namedtuple('Point', ['x', 'y'])
 23 | Polygon = collections.namedtuple('Polygon', ['points'])
 24 | 
 25 | class VOT(object):
 26 |     """ Base class for Python VOT integration """
 27 |     def __init__(self, region_format, channels=None):
 28 |         """ Constructor
 29 | 
 30 |         Args:
 31 |             region_format: Region format options
 32 |         """
 33 |         assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON])
 34 | 
 35 |         if channels is None:
 36 |             channels = ['color']
 37 |         elif channels == 'rgbd':
 38 |             channels = ['color', 'depth']
 39 |         elif channels == 'rgbt':
 40 |             channels = ['color', 'ir']
 41 |         elif channels == 'ir':
 42 |             channels = ['ir']
 43 |         else:
 44 |             raise Exception('Illegal configuration {}.'.format(channels))
 45 | 
 46 |         self._trax = trax.Server([region_format], [trax.Image.PATH], channels)
 47 | 
 48 |         request = self._trax.wait()
 49 |         assert(request.type == 'initialize')
 50 |         if isinstance(request.region, trax.Polygon):
 51 |             self._region = Polygon([Point(x[0], x[1]) for x in request.region])
 52 |         else:
 53 |             self._region = Rectangle(*request.region.bounds())
 54 |         self._image = [str(x) for k, x in request.image.items()]
 55 |         if len(self._image) == 1:
 56 |             self._image = self._image[0]
 57 |         self._trax.status(request.region)
 58 | 
 59 |     def region(self):
 60 |         """
 61 |         Send configuration message to the client and receive the initialization
 62 |         region and the path of the first image
 63 | 
 64 |         Returns:
 65 |             initialization region
 66 |         """
 67 | 
 68 |         return self._region
 69 | 
 70 |     def report(self, region, confidence = None):
 71 |         """
 72 |         Report the tracking results to the client
 73 | 
 74 |         Arguments:
 75 |             region: region for the frame
 76 |         """
 77 |         assert(isinstance(region, Rectangle) or isinstance(region, Polygon))
 78 |         if isinstance(region, Polygon):
 79 |             tregion = trax.Polygon.create([(x.x, x.y) for x in region.points])
 80 |         else:
 81 |             tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height)
 82 |         properties = {}
 83 |         if not confidence is None:
 84 |             properties['confidence'] = confidence
 85 |         self._trax.status(tregion, properties)
 86 | 
 87 |     def frame(self):
 88 |         """
 89 |         Get a frame (image path) from client
 90 | 
 91 |         Returns:
 92 |             absolute path of the image
 93 |         """
 94 |         if hasattr(self, "_image"):
 95 |             image = self._image
 96 |             del self._image
 97 |             return tuple(image)
 98 | 
 99 |         request = self._trax.wait()
100 | 
101 |         if request.type == 'frame':
102 |             image = [str(x) for k, x in request.image.items()]
103 |             if len(image) == 1:
104 |                 image = image[0]
105 |             return tuple(image)
106 |         else:
107 |             return None
108 | 
109 | 
110 |     def quit(self):
111 |         if hasattr(self, '_trax'):
112 |             self._trax.quit()
113 | 
114 |     def __del__(self):
115 |         self.quit()
116 | 
117 | 


--------------------------------------------------------------------------------
/ltr/data/image_loader.py:
--------------------------------------------------------------------------------
  1 | import jpeg4py
  2 | import cv2 as cv
  3 | from PIL import Image
  4 | import numpy as np
  5 | 
  6 | davis_palette = np.repeat(np.expand_dims(np.arange(0,256), 1), 3, 1).astype(np.uint8)
  7 | davis_palette[:22, :] = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
  8 |                          [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
  9 |                          [64, 0, 0], [191, 0, 0], [64, 128, 0], [191, 128, 0],
 10 |                          [64, 0, 128], [191, 0, 128], [64, 128, 128], [191, 128, 128],
 11 |                          [0, 64, 0], [128, 64, 0], [0, 191, 0], [128, 191, 0],
 12 |                          [0, 64, 128], [128, 64, 128]]
 13 | 
 14 | 
 15 | def default_image_loader(path):
 16 |     """The default image loader, reads the image from the given path. It first tries to use the jpeg4py_loader,
 17 |     but reverts to the opencv_loader if the former is not available."""
 18 |     if default_image_loader.use_jpeg4py is None:
 19 |         # Try using jpeg4py
 20 |         im = jpeg4py_loader(path)
 21 |         if im is None:
 22 |             default_image_loader.use_jpeg4py = False
 23 |             print('Using opencv_loader instead.')
 24 |         else:
 25 |             default_image_loader.use_jpeg4py = True
 26 |             return im
 27 |     if default_image_loader.use_jpeg4py:
 28 |         return jpeg4py_loader(path)
 29 |     return opencv_loader(path)
 30 | 
 31 | default_image_loader.use_jpeg4py = None
 32 | 
 33 | 
 34 | def jpeg4py_loader(path):
 35 |     """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py"""
 36 |     try:
 37 |         return jpeg4py.JPEG(path).decode()
 38 |     except Exception as e:
 39 |         print('ERROR: Could not read image "{}"'.format(path))
 40 |         print(e)
 41 |         return None
 42 | 
 43 | 
 44 | def opencv_loader(path):
 45 |     """ Read image using opencv's imread function and returns it in rgb format"""
 46 |     try:
 47 |         im = cv.imread(path, cv.IMREAD_COLOR)
 48 | 
 49 |         # convert to rgb and return
 50 |         return cv.cvtColor(im, cv.COLOR_BGR2RGB)
 51 |     except Exception as e:
 52 |         print('ERROR: Could not read image "{}"'.format(path))
 53 |         print(e)
 54 |         return None
 55 | 
 56 | 
 57 | def jpeg4py_loader_w_failsafe(path):
 58 |     """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py"""
 59 |     try:
 60 |         return jpeg4py.JPEG(path).decode()
 61 |     except:
 62 |         try:
 63 |             im = cv.imread(path, cv.IMREAD_COLOR)
 64 | 
 65 |             # convert to rgb and return
 66 |             return cv.cvtColor(im, cv.COLOR_BGR2RGB)
 67 |         except Exception as e:
 68 |             print('ERROR: Could not read image "{}"'.format(path))
 69 |             print(e)
 70 |             return None
 71 | 
 72 | 
 73 | def opencv_seg_loader(path):
 74 |     """ Read segmentation annotation using opencv's imread function"""
 75 |     try:
 76 |         return cv.imread(path)
 77 |     except Exception as e:
 78 |         print('ERROR: Could not read image "{}"'.format(path))
 79 |         print(e)
 80 |         return None
 81 | 
 82 | 
 83 | def imread_indexed(filename):
 84 |     """ Load indexed image with given filename. Used to read segmentation annotations."""
 85 | 
 86 |     im = Image.open(filename)
 87 | 
 88 |     annotation = np.atleast_3d(im)[...,0]
 89 |     return annotation
 90 | 
 91 | 
 92 | def imwrite_indexed(filename, array, color_palette=None):
 93 |     """ Save indexed image as png. Used to save segmentation annotation."""
 94 | 
 95 |     if color_palette is None:
 96 |         color_palette = davis_palette
 97 | 
 98 |     if np.atleast_3d(array).shape[2] != 1:
 99 |         raise Exception("Saving indexed PNGs requires 2D array.")
100 | 
101 |     im = Image.fromarray(array.astype('uint8'))
102 |     im.putpalette(color_palette.ravel())
103 |     im.save(filename, format='PNG')


--------------------------------------------------------------------------------
/pytracking/parameter/rts/rts50.py:
--------------------------------------------------------------------------------
  1 | from pytracking.utils import TrackerParams
  2 | from pytracking.features.net_wrappers import NetWithBackbone
  3 | 
  4 | 
  5 | def parameters():
  6 |     params = TrackerParams()
  7 | 
  8 |     ##########################################
  9 |     # General parameters
 10 |     ##########################################
 11 | 
 12 |     params.debug = 0
 13 |     params.visualization = False
 14 |     params.multiobj_mode = 'parallel'
 15 |     params.use_gpu = True
 16 | 
 17 |     ##########################################
 18 |     # Bounding box init network
 19 |     ##########################################
 20 |     params.sta_image_sample_size = (30 * 16, 52 * 16)
 21 |     params.sta_search_area_scale = 4.0
 22 | 
 23 |     params.sta_net = NetWithBackbone(net_path='sta.pth.tar',
 24 |                                      use_gpu=params.use_gpu,
 25 |                                      image_format='bgr255',
 26 |                                      mean=[102.9801, 115.9465, 122.7717],
 27 |                                      std=[1.0, 1.0, 1.0]
 28 |                                      )
 29 | 
 30 |     params.sta_net.load_network()
 31 | 
 32 |     ##########################################
 33 |     # Segmentation Branch parameters
 34 |     ##########################################
 35 |     params.seg_to_bb_mode = 'var'
 36 |     params.min_mask_area = 100
 37 | 
 38 |     params.image_sample_size = (30 * 16, 52 * 16)
 39 |     params.search_area_scale = 6.0
 40 |     params.border_mode = 'inside_major'
 41 |     params.patch_max_scale_change = None
 42 |     params.max_scale_change = (0.8, 1.2)
 43 | 
 44 |     # Learning parameters
 45 |     params.sample_memory_size = 32
 46 |     params.learning_rate = 0.1
 47 |     params.init_samples_minimum_weight = 0.25
 48 |     params.train_skipping = 20
 49 | 
 50 |     # Net optimization params
 51 |     params.update_target_model = True
 52 |     params.net_opt_iter = 20
 53 |     params.net_opt_update_iter = 3
 54 | 
 55 |     # Main network
 56 |     params.net = NetWithBackbone(net_path='rts50.pth',
 57 |                                  use_gpu=params.use_gpu,
 58 |                                  image_format='bgr255',
 59 |                                  mean=[102.9801, 115.9465, 122.7717],
 60 |                                  std=[1.0, 1.0, 1.0],
 61 |                                  clf_filter_size=4,
 62 |                                  fusion_type="add"
 63 |                                  )
 64 |     params.net.load_network()
 65 |     
 66 |     ##########################################
 67 |     # Classifier Branch parameters
 68 |     ##########################################
 69 | 
 70 |     # General parameters
 71 |     params.clf_image_sample_size = params.image_sample_size
 72 |     params.clf_search_area_scale = params.search_area_scale
 73 |     params.clf_border_mode = params.border_mode
 74 |     params.clf_patch_max_scale_change = params.patch_max_scale_change
 75 | 
 76 |     # Learning parameters
 77 |     params.clf_sample_memory_size = 50
 78 |     params.clf_learning_rate = 0.01
 79 |     params.clf_train_skipping = 20
 80 | 
 81 |     # Net optimization
 82 |     params.update_classifier = True
 83 |     params.clf_net_opt_iter = 10
 84 |     params.clf_net_opt_update_iter = 2
 85 |     params.clf_net_opt_hn_iter = 1
 86 |     params.clf_output_sigma_factor = 0.25
 87 | 
 88 |     # Advanced localization parameters
 89 |     params.clf_advanced_localization = True
 90 |     params.clf_target_not_found_threshold = 0.30
 91 |     params.clf_target_not_found_threshold_too_small = 0.50
 92 |     params.clf_distractor_threshold = 10000
 93 |     params.clf_hard_negative_threshold = 10000
 94 |     params.clf_target_neighborhood_scale = 2.2
 95 |     params.clf_displacement_scale = 0.8
 96 |     params.clf_hard_negative_learning_rate = 0.02
 97 | 
 98 |     # Augmentations parameters
 99 |     params.clf_use_augmentation = True
100 |     params.clf_augmentation = {
101 |         'fliplr': True,
102 |         'blur': [(3, 1), (1, 3), (2, 2)],
103 |     }
104 | 
105 |     return params
106 | 


--------------------------------------------------------------------------------
/pytracking/tracker/atom/optim.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from pytracking import optimization, TensorList, operation
  3 | import math
  4 | 
  5 | 
  6 | class FactorizedConvProblem(optimization.L2Problem):
  7 |     def __init__(self, training_samples: TensorList, y:TensorList, filter_reg: torch.Tensor, projection_reg, params, sample_weights: TensorList,
  8 |                  projection_activation, response_activation):
  9 |         self.training_samples = training_samples
 10 |         self.y = y
 11 |         self.filter_reg = filter_reg
 12 |         self.sample_weights = sample_weights
 13 |         self.params = params
 14 |         self.projection_reg = projection_reg
 15 |         self.projection_activation = projection_activation
 16 |         self.response_activation = response_activation
 17 | 
 18 |         self.diag_M = self.filter_reg.concat(projection_reg)
 19 | 
 20 |     def __call__(self, x: TensorList):
 21 |         """
 22 |         Compute residuals
 23 |         :param x: [filters, projection_matrices]
 24 |         :return: [data_terms, filter_regularizations, proj_mat_regularizations]
 25 |         """
 26 |         filter = x[:len(x)//2]  # w2 in paper
 27 |         P = x[len(x)//2:]       # w1 in paper
 28 | 
 29 |         # Do first convolution
 30 |         compressed_samples = operation.conv1x1(self.training_samples, P).apply(self.projection_activation)
 31 | 
 32 |         # Do second convolution
 33 |         residuals = operation.conv2d(compressed_samples, filter, mode='same').apply(self.response_activation)
 34 | 
 35 |         # Compute data residuals
 36 |         residuals = residuals - self.y
 37 | 
 38 |         residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals
 39 | 
 40 |         # Add regularization for projection matrix
 41 |         residuals.extend(self.filter_reg.apply(math.sqrt) * filter)
 42 | 
 43 |         # Add regularization for projection matrix
 44 |         residuals.extend(self.projection_reg.apply(math.sqrt) * P)
 45 | 
 46 |         return residuals
 47 | 
 48 | 
 49 |     def ip_input(self, a: TensorList, b: TensorList):
 50 |         num = len(a) // 2       # Number of filters
 51 |         a_filter = a[:num]
 52 |         b_filter = b[:num]
 53 |         a_P = a[num:]
 54 |         b_P = b[num:]
 55 | 
 56 |         # Filter inner product
 57 |         # ip_out = a_filter.reshape(-1) @ b_filter.reshape(-1)
 58 |         ip_out = operation.conv2d(a_filter, b_filter).view(-1)
 59 | 
 60 |         # Add projection matrix part
 61 |         # ip_out += a_P.reshape(-1) @ b_P.reshape(-1)
 62 |         ip_out += operation.conv2d(a_P.view(1,-1,1,1), b_P.view(1,-1,1,1)).view(-1)
 63 | 
 64 |         # Have independent inner products for each filter
 65 |         return ip_out.concat(ip_out.clone())
 66 | 
 67 |     def M1(self, x: TensorList):
 68 |         return x / self.diag_M
 69 | 
 70 | 
 71 | class ConvProblem(optimization.L2Problem):
 72 |     def __init__(self, training_samples: TensorList, y:TensorList, filter_reg: torch.Tensor, sample_weights: TensorList, response_activation):
 73 |         self.training_samples = training_samples
 74 |         self.y = y
 75 |         self.filter_reg = filter_reg
 76 |         self.sample_weights = sample_weights
 77 |         self.response_activation = response_activation
 78 | 
 79 |     def __call__(self, x: TensorList):
 80 |         """
 81 |         Compute residuals
 82 |         :param x: [filters]
 83 |         :return: [data_terms, filter_regularizations]
 84 |         """
 85 |         # Do convolution and compute residuals
 86 |         residuals = operation.conv2d(self.training_samples, x, mode='same').apply(self.response_activation)
 87 |         residuals = residuals - self.y
 88 | 
 89 |         residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals
 90 | 
 91 |         # Add regularization for projection matrix
 92 |         residuals.extend(self.filter_reg.apply(math.sqrt) * x)
 93 | 
 94 |         return residuals
 95 | 
 96 |     def ip_input(self, a: TensorList, b: TensorList):
 97 |         # return a.reshape(-1) @ b.reshape(-1)
 98 |         # return (a * b).sum()
 99 |         return operation.conv2d(a, b).view(-1)
100 | 


--------------------------------------------------------------------------------