├── models ├── __init__.py ├── PreciseRoIPooling │ ├── pytorch │ │ ├── prroi_pool │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── prroi_pool.py │ │ │ ├── src │ │ │ │ ├── prroi_pooling_gpu.h │ │ │ │ ├── prroi_pooling_gpu_impl.cuh │ │ │ │ └── prroi_pooling_gpu.c │ │ │ └── functional.py │ │ └── tests │ │ │ └── test_prroi_pooling2d.py │ ├── _assets │ │ └── prroi_visualization.png │ ├── LICENSE │ ├── .gitignore │ ├── src │ │ └── prroi_pooling_gpu_impl.cuh │ └── README.md ├── cornerdet │ ├── __init__.py │ └── cornerdet.py ├── attention │ ├── __init__.py │ └── attention.py ├── neck │ ├── __init__.py │ └── neck.py ├── backbone │ └── __init__.py └── siamese │ ├── __init__.py │ └── siamese.py ├── track ├── __init__.py └── run_CGACD.py ├── train └── __init__.py ├── toolkit ├── __init__.py ├── utils │ ├── __init__.py │ ├── setup.py │ ├── misc.py │ ├── c_region.pxd │ ├── src │ │ ├── region.h │ │ └── buffer.h │ ├── statistics.py │ └── region.pyx ├── visualization │ ├── __init__.py │ ├── draw_utils.py │ ├── draw_eao.py │ ├── draw_f1.py │ └── draw_success_precision.py ├── evaluation │ ├── __init__.py │ ├── ar_benchmark.py │ ├── f1_benchmark.py │ └── eao_benchmark.py └── datasets │ ├── dataset.py │ ├── __init__.py │ ├── uav.py │ ├── got10k.py │ ├── nfs.py │ ├── trackingnet.py │ ├── lasot.py │ ├── otb.py │ └── video.py ├── training_dataset ├── coco │ ├── pycocotools │ │ ├── __init__.py │ │ ├── Makefile │ │ ├── setup.py │ │ ├── common │ │ │ ├── maskApi.h │ │ │ └── gason.h │ │ └── mask.py │ ├── visual.py │ ├── gen_json_clean.py │ └── par_crop.py ├── vid │ ├── visual.py │ ├── parse_vid.py │ ├── gen_json_clean.py │ └── par_crop.py ├── det │ ├── visual.py │ ├── gen_json_clean.py │ └── par_crop.py ├── y2b │ └── gen_json_clean.py └── got10k │ ├── gen_json_clean.py │ └── par_crop.py ├── .gitignore ├── requirement.txt ├── train_cgacd_resnet.sh ├── test.sh ├── setup.py ├── utils ├── log_helper.py ├── misc.py ├── model_load.py └── utils.py ├── experiments ├── cgacd_resnet │ └── cgacd_resnet.yml └── cgacd_resnet_otb │ └── cgacd_resnet_otb.yml ├── README.md └── tools └── eval.py /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /track/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /toolkit/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /training_dataset/coco/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/prroi_pool/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | /_prroi_pooling 3 | -------------------------------------------------------------------------------- /toolkit/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import region 2 | from .statistics import * 3 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/_assets/prroi_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/feiaxyt/CGACD/HEAD/models/PreciseRoIPooling/_assets/prroi_visualization.png -------------------------------------------------------------------------------- /toolkit/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .draw_f1 import draw_f1 2 | from .draw_success_precision import draw_success_precision 3 | from .draw_eao import draw_eao 4 | -------------------------------------------------------------------------------- /toolkit/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .ar_benchmark import AccuracyRobustnessBenchmark 2 | from .eao_benchmark import EAOBenchmark 3 | from .ope_benchmark import OPEBenchmark 4 | from .f1_benchmark import F1Benchmark 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dataset 2 | *.pyc 3 | checkpoint 4 | result 5 | hp_search_result 6 | runs 7 | *.o 8 | *.so 9 | checkpoint* 10 | *.model 11 | results 12 | reports 13 | .idea 14 | *.pth 15 | .ipynb_checkpoints 16 | raw_results -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pytorch==1.1.0 3 | torchvision==0.3.0 4 | opencv-python==3.4.3.18 5 | pyyaml 6 | yacs 7 | tqdm 8 | colorama 9 | matplotlib 10 | cython 11 | tensorboardX 12 | futures 13 | easydict 14 | numba -------------------------------------------------------------------------------- /models/cornerdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .cornerdet import SepCornerDet 2 | 3 | CORNERDET = { 4 | 'SepCornerDet': SepCornerDet, 5 | } 6 | 7 | 8 | def get_cornerdet(name, **kwargs): 9 | return CORNERDET[name](**kwargs) 10 | -------------------------------------------------------------------------------- /models/attention/__init__.py: -------------------------------------------------------------------------------- 1 | from .attention import PixelAttention 2 | 3 | ATTENTION = { 4 | 'PixelAttention': PixelAttention, 5 | } 6 | 7 | 8 | def get_attention(name, **kwargs): 9 | return ATTENTION[name](**kwargs) 10 | -------------------------------------------------------------------------------- /train_cgacd_resnet.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=$PWD:$PYTHONPATH 2 | 3 | #CUDA_VISIBLE_DEVICES=1 4 | python train/train.py \ 5 | --config=experiments/cgacd_resnet/cgacd_resnet.yml \ 6 | -b 64 \ 7 | -j 16 \ 8 | --save_name cgacd_resnet 9 | -------------------------------------------------------------------------------- /toolkit/utils/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | 5 | setup( 6 | ext_modules = cythonize([Extension("region", ["region.pyx", "src/region.c"])]), 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /models/neck/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from .neck import AdjustLayer 4 | 5 | 6 | NECKS = { 7 | 'AdjustLayer': AdjustLayer 8 | } 9 | 10 | 11 | def get_neck(name, **kwargs): 12 | return NECKS[name](**kwargs) 13 | -------------------------------------------------------------------------------- /models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from .resnet import resnet18, resnet50 4 | 5 | 6 | BACKBONES = { 7 | 'resnet50': resnet50, 8 | } 9 | 10 | 11 | def get_backbone(name, **kwargs): 12 | return BACKBONES[name](**kwargs) 13 | -------------------------------------------------------------------------------- /models/siamese/__init__.py: -------------------------------------------------------------------------------- 1 | from .siamese import UPChannelSiamese, DepthwiseSiamese 2 | 3 | 4 | def get_siamese(name, **kwargs): 5 | SIAMESE = { 6 | 'UPChannelSiamese': UPChannelSiamese, 7 | 'DepthwiseSiamese': DepthwiseSiamese 8 | } 9 | return SIAMESE[name](**kwargs) 10 | -------------------------------------------------------------------------------- /training_dataset/coco/pycocotools/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build 10 | clean: 11 | rm _mask.c _mask.cpython-36m-x86_64-linux-gnu.so 12 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=$PWD:$PYTHONPATH 2 | tracker_name="CGACD_VOT" 3 | config_file="experiments/cgacd_resnet/cgacd_resnet.yml" 4 | START=11 5 | END=19 6 | for s in $(seq $START 1 $END) 7 | do 8 | python tools/test.py \ 9 | --model "checkpoint/"$tracker_name"/checkpoint_epoch"$s".pth" \ 10 | --config "config/"$config_file \ 11 | --dataset "VOT2018" \ 12 | --save_name $tracker_name"_"$s 13 | done 14 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/prroi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : __init__.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | from .prroi_pool import * 13 | 14 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_utils.py: -------------------------------------------------------------------------------- 1 | 2 | COLOR = ((1, 0, 0), 3 | (0, 1, 0), 4 | (1, 0, 1), 5 | (1, 1, 0), 6 | (0 , 162/255, 232/255), 7 | (0.5, 0.5, 0.5), 8 | (0, 0, 1), 9 | (0, 1, 1), 10 | (136/255, 0 , 21/255), 11 | (255/255, 127/255, 39/255), 12 | (0, 0, 0)) 13 | 14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-'] 15 | 16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.'] 17 | -------------------------------------------------------------------------------- /models/neck/neck.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class AdjustLayer(nn.Module): 8 | def __init__(self, in_channels, out_channels): 9 | super(AdjustLayer, self).__init__() 10 | self.downsample = nn.Sequential( 11 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), 12 | nn.BatchNorm2d(out_channels), 13 | ) 14 | 15 | def forward(self, x): 16 | x = self.downsample(x) 17 | return x -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | 5 | ext_modules = [ 6 | Extension( 7 | name='toolkit.utils.region', 8 | sources=[ 9 | 'toolkit/utils/region.pyx', 10 | 'toolkit/utils/src/region.c', 11 | ], 12 | include_dirs=[ 13 | 'toolkit/utils/src' 14 | ] 15 | ) 16 | ] 17 | 18 | setup( 19 | name='toolkit', 20 | packages=['toolkit'], 21 | ext_modules=cythonize(ext_modules) 22 | ) 23 | -------------------------------------------------------------------------------- /training_dataset/coco/pycocotools/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['common/maskApi.c', '_mask.pyx'], 13 | include_dirs = [np.get_include(), 'common'], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | packages=['pycocotools'], 20 | package_dir = {'pycocotools': '.'}, 21 | version='2.0', 22 | ext_modules= 23 | cythonize(ext_modules) 24 | ) 25 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : prroi_pool.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch.nn as nn 13 | 14 | from .functional import prroi_pool2d 15 | 16 | __all__ = ['PrRoIPool2D'] 17 | 18 | 19 | class PrRoIPool2D(nn.Module): 20 | def __init__(self, pooled_height, pooled_width, spatial_scale): 21 | super().__init__() 22 | 23 | self.pooled_height = int(pooled_height) 24 | self.pooled_width = int(pooled_width) 25 | self.spatial_scale = float(spatial_scale) 26 | 27 | def forward(self, features, rois): 28 | return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale) 29 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.h 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale); 12 | 13 | int prroi_pooling_backward_cuda( 14 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 15 | int pooled_height, int pooled_width, float spatial_scale 16 | ); 17 | 18 | int prroi_pooling_coor_backward_cuda( 19 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 20 | int pooled_height, int pooled_width, float spatial_scal 21 | ); 22 | 23 | -------------------------------------------------------------------------------- /toolkit/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author fangyi.zhang@vipl.ict.ac.cn 3 | """ 4 | import numpy as np 5 | 6 | def determine_thresholds(confidence, resolution=100): 7 | """choose threshold according to confidence 8 | 9 | Args: 10 | confidence: list or numpy array or numpy array 11 | reolution: number of threshold to choose 12 | 13 | Restures: 14 | threshold: numpy array 15 | """ 16 | if isinstance(confidence, list): 17 | confidence = np.array(confidence) 18 | confidence = confidence.flatten() 19 | confidence = confidence[~np.isnan(confidence)] 20 | confidence.sort() 21 | 22 | assert len(confidence) > resolution and resolution > 2 23 | 24 | thresholds = np.ones((resolution)) 25 | thresholds[0] = - np.inf 26 | thresholds[-1] = np.inf 27 | delta = np.floor(len(confidence) / (resolution - 2)) 28 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32) 29 | thresholds[1:-1] = confidence[idxs] 30 | return thresholds 31 | -------------------------------------------------------------------------------- /utils/log_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import os 4 | import logging 5 | import math 6 | import sys 7 | 8 | logs = set() 9 | 10 | def get_format(logger, level): 11 | format_str = '[%(asctime)s-%(filename)s#%(lineno)3d] %(message)s' 12 | formatter = logging.Formatter(format_str) 13 | return formatter 14 | 15 | def init_log(name, level=logging.INFO, format_func=get_format): 16 | if (name, level) in logs: 17 | return 18 | logs.add((name, level)) 19 | logger = logging.getLogger(name) 20 | logger.setLevel(level) 21 | ch = logging.StreamHandler() 22 | ch.setLevel(level) 23 | formatter = format_func(logger, level) 24 | ch.setFormatter(formatter) 25 | logger.addHandler(ch) 26 | return logger 27 | 28 | def add_file_handler(name, log_file, level=logging.INFO): 29 | logger = logging.getLogger(name) 30 | fh = logging.FileHandler(log_file) 31 | fh.setFormatter(get_format(logger, level)) 32 | logger.addHandler(fh) 33 | 34 | init_log('global') -------------------------------------------------------------------------------- /models/PreciseRoIPooling/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jiayuan Mao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /training_dataset/coco/visual.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from pycocotools.coco import COCO 7 | import cv2 8 | import numpy as np 9 | 10 | color_bar = np.random.randint(0, 255, (90, 3)) 11 | 12 | visual = True 13 | 14 | dataDir = '.' 15 | dataType = 'val2017' 16 | annFile = '{}/annotations/instances_{}.json'.format(dataDir,dataType) 17 | coco = COCO(annFile) 18 | 19 | for img_id in coco.imgs: 20 | img = coco.loadImgs(img_id)[0] 21 | annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None) 22 | anns = coco.loadAnns(annIds) 23 | im = cv2.imread('{}/{}/{}'.format(dataDir, dataType, img['file_name'])) 24 | for ann in anns: 25 | rect = ann['bbox'] 26 | c = ann['category_id'] 27 | if visual: 28 | pt1 = (int(rect[0]), int(rect[1])) 29 | pt2 = (int(rect[0]+rect[2]-1), int(rect[1]+rect[3]-1)) 30 | cv2.rectangle(im, pt1, pt2, color_bar[c-1], 3) 31 | cv2.imshow('img', im) 32 | cv2.waitKey(200) 33 | print('done') 34 | 35 | -------------------------------------------------------------------------------- /toolkit/utils/c_region.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "src/region.h": 2 | ctypedef enum region_type "RegionType": 3 | EMTPY 4 | SPECIAL 5 | RECTANGEL 6 | POLYGON 7 | MASK 8 | 9 | ctypedef struct region_bounds: 10 | float top 11 | float bottom 12 | float left 13 | float right 14 | 15 | ctypedef struct region_rectangle: 16 | float x 17 | float y 18 | float width 19 | float height 20 | 21 | # ctypedef struct region_mask: 22 | # int x 23 | # int y 24 | # int width 25 | # int height 26 | # char *data 27 | 28 | ctypedef struct region_polygon: 29 | int count 30 | float *x 31 | float *y 32 | 33 | ctypedef union region_container_data: 34 | region_rectangle rectangle 35 | region_polygon polygon 36 | # region_mask mask 37 | int special 38 | 39 | ctypedef struct region_container: 40 | region_type type 41 | region_container_data data 42 | 43 | # ctypedef struct region_overlap: 44 | # float overlap 45 | # float only1 46 | # float only2 47 | 48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds) 49 | 50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds) 51 | -------------------------------------------------------------------------------- /toolkit/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tqdm import tqdm 3 | 4 | class Dataset(object): 5 | def __init__(self, name, dataset_root): 6 | self.name = name 7 | self.dataset_root = dataset_root 8 | self.videos = None 9 | 10 | def __getitem__(self, idx): 11 | if isinstance(idx, str): 12 | return self.videos[idx] 13 | elif isinstance(idx, int): 14 | return self.videos[sorted(list(self.videos.keys()))[idx]] 15 | 16 | def __len__(self): 17 | return len(self.videos) 18 | 19 | def __iter__(self): 20 | keys = sorted(list(self.videos.keys())) 21 | for key in keys: 22 | yield self.videos[key] 23 | 24 | def set_tracker(self, path, tracker_names, ): 25 | """ 26 | Args: 27 | path: path to tracker results, 28 | tracker_names: list of tracker name 29 | """ 30 | self.tracker_path = path 31 | self.tracker_names = [] 32 | seq_nums = len(self.videos) 33 | for tracker in tracker_names: 34 | t_path = os.path.join(path, tracker) 35 | if 'VOT' in self.name: 36 | t_path = os.path.join(path, tracker, 'baseline') 37 | seqs = os.listdir(t_path) 38 | if len(seqs) == seq_nums: 39 | self.tracker_names.append(tracker) 40 | 41 | # for video in tqdm(self.videos.values(), 42 | # desc='loading tacker result', ncols=100): 43 | # video.load_tracker(path, tracker_names) 44 | -------------------------------------------------------------------------------- /utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | import os 3 | 4 | from colorama import Fore, Style 5 | 6 | 7 | __all__ = ['commit', 'describe'] 8 | 9 | 10 | def _exec(cmd): 11 | f = os.popen(cmd, 'r', 1) 12 | return f.read().strip() 13 | 14 | def _em(s): 15 | return f'{s}***' 16 | 17 | 18 | def _describe(model, lines=None, spaces=0): 19 | head = " " * spaces 20 | for name, p in model.named_parameters(): 21 | if '.' in name: 22 | continue 23 | if p.requires_grad: 24 | name = _em(name) 25 | line = "{head}- {name}".format(head=head, name=name) 26 | lines.append(line) 27 | 28 | for name, m in model.named_children(): 29 | space_num = len(name) + spaces + 1 30 | #if m.training: 31 | # name = _em(name) 32 | line = "{head}.{name} ({type})".format( 33 | head=head, 34 | name=name, 35 | type=m.__class__.__name__) 36 | lines.append(line) 37 | _describe(m, lines, space_num) 38 | 39 | 40 | def commit(): 41 | root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../')) 42 | cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root) 43 | commit = _exec(cmd) 44 | cmd = "cd {}; git log --oneline | head -n1".format(root) 45 | commit_log = _exec(cmd) 46 | return "commit : {}\n log : {}".format(commit, commit_log) 47 | 48 | 49 | def describe(net, name=None): 50 | num = 0 51 | lines = [] 52 | if name is not None: 53 | lines.append(name) 54 | num = len(name) 55 | _describe(net, lines, num) 56 | return "\n".join(lines) 57 | -------------------------------------------------------------------------------- /toolkit/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .vot import VOTDataset, VOTLTDataset 2 | from .otb import OTBDataset 3 | from .uav import UAVDataset 4 | from .lasot import LaSOTDataset 5 | from .nfs import NFSDataset 6 | from .trackingnet import TrackingNetDataset 7 | from .got10k import GOT10kDataset 8 | 9 | class DatasetFactory(object): 10 | @staticmethod 11 | def create_dataset(**kwargs): 12 | """ 13 | Args: 14 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30', 15 | 'VOT2018', 'VOT2016', 'VOT2018-LT' 16 | dataset_root: dataset root 17 | load_img: wether to load image 18 | Return: 19 | dataset 20 | """ 21 | assert 'name' in kwargs, "should provide dataset name" 22 | name = kwargs['name'] 23 | if 'OTB' in name: 24 | dataset = OTBDataset(**kwargs) 25 | elif 'LaSOT' == name: 26 | dataset = LaSOTDataset(**kwargs) 27 | elif 'UAV' in name: 28 | dataset = UAVDataset(**kwargs) 29 | elif 'NFS' in name: 30 | dataset = NFSDataset(**kwargs) 31 | elif 'VOT2018' == name or 'VOT2016' == name or 'VOT2019' == name: 32 | dataset = VOTDataset(**kwargs) 33 | elif 'VOT2018-LT' == name: 34 | dataset = VOTLTDataset(**kwargs) 35 | elif 'TrackingNet' == name: 36 | dataset = TrackingNetDataset(**kwargs) 37 | elif 'GOT-10k' == name: 38 | dataset = GOT10kDataset(**kwargs) 39 | elif 'got10k_val' == name: 40 | dataset = GOT10kDataset(**kwargs) 41 | else: 42 | raise Exception("unknow dataset {}".format(kwargs['name'])) 43 | return dataset 44 | 45 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : test_prroi_pooling2d.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 18/02/2018 6 | # 7 | # This file is part of Jacinle. 8 | 9 | import unittest 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | from prroi_pool import PrRoIPool2D 15 | from jactorch.utils.unittest import TorchTestCase 16 | 17 | 18 | 19 | 20 | class TestPrRoIPool2D(TorchTestCase): 21 | def test_forward(self): 22 | pool = PrRoIPool2D(7, 7, spatial_scale=0.5) 23 | features = torch.rand((4, 16, 24, 32)).cuda() 24 | rois = torch.tensor([ 25 | [0, 0, 0, 14, 14], 26 | [1, 14, 14, 28, 28], 27 | ]).float().cuda() 28 | 29 | out = pool(features, rois) 30 | out_gold = F.avg_pool2d(features, kernel_size=2, stride=1) 31 | 32 | self.assertTensorClose(out, torch.stack(( 33 | out_gold[0, :, :7, :7], 34 | out_gold[1, :, 7:14, 7:14], 35 | ), dim=0)) 36 | 37 | def test_backward_shapeonly(self): 38 | pool = PrRoIPool2D(2, 2, spatial_scale=0.5) 39 | 40 | features = torch.rand((4, 2, 24, 32)).cuda() 41 | rois = torch.tensor([ 42 | [0, 0, 0, 4, 4], 43 | [1, 14, 14, 18, 18], 44 | ]).float().cuda() 45 | features.requires_grad = rois.requires_grad = True 46 | out = pool(features, rois) 47 | 48 | loss = out.sum() 49 | loss.backward() 50 | 51 | self.assertTupleEqual(features.size(), features.grad.size()) 52 | self.assertTupleEqual(rois.size(), rois.grad.size()) 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | .vim-template* 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_eao.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pickle 4 | 5 | from matplotlib import rc 6 | from .draw_utils import COLOR, MARKER_STYLE 7 | 8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 9 | rc('text', usetex=True) 10 | 11 | def draw_eao(result): 12 | fig = plt.figure() 13 | ax = fig.add_subplot(111, projection='polar') 14 | angles = np.linspace(0, 2*np.pi, 8, endpoint=True) 15 | 16 | attr2value = [] 17 | for i, (tracker_name, ret) in enumerate(result.items()): 18 | value = list(ret.values()) 19 | attr2value.append(value) 20 | value.append(value[0]) 21 | attr2value = np.array(attr2value) 22 | max_value = np.max(attr2value, axis=0) 23 | min_value = np.min(attr2value, axis=0) 24 | for i, (tracker_name, ret) in enumerate(result.items()): 25 | value = list(ret.values()) 26 | value.append(value[0]) 27 | value = np.array(value) 28 | value *= (1 / max_value) 29 | plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i], 30 | label=tracker_name, linewidth=1.5, markersize=6) 31 | 32 | attrs = ["Overall", "Camera motion", 33 | "Illumination change","Motion Change", 34 | "Size change","Occlusion", 35 | "Unassigned"] 36 | attr_value = [] 37 | for attr, maxv, minv in zip(attrs, max_value, min_value): 38 | attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv)) 39 | ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value) 40 | ax.spines['polar'].set_visible(False) 41 | ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5) 42 | ax.grid(b=False) 43 | ax.set_ylim(0, 1.18) 44 | ax.set_yticks([]) 45 | plt.show() 46 | 47 | if __name__ == '__main__': 48 | result = pickle.load(open("../../result.pkl", 'rb')) 49 | draw_eao(result) 50 | -------------------------------------------------------------------------------- /training_dataset/vid/visual.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join 7 | from os import listdir 8 | import cv2 9 | import numpy as np 10 | import glob 11 | import xml.etree.ElementTree as ET 12 | 13 | visual = False 14 | color_bar = np.random.randint(0, 255, (90, 3)) 15 | 16 | VID_base_path = './ILSVRC2015' 17 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 18 | img_base_path = join(VID_base_path, 'Data/VID/train/') 19 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'}) 20 | for sub_set in sub_sets: 21 | sub_set_base_path = join(ann_base_path, sub_set) 22 | videos = sorted(listdir(sub_set_base_path)) 23 | for vi, video in enumerate(videos): 24 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 25 | 26 | video_base_path = join(sub_set_base_path, video) 27 | xmls = sorted(glob.glob(join(video_base_path, '*.xml'))) 28 | for xml in xmls: 29 | f = dict() 30 | xmltree = ET.parse(xml) 31 | size = xmltree.findall('size')[0] 32 | frame_sz = [int(it.text) for it in size] 33 | objects = xmltree.findall('object') 34 | if visual: 35 | im = cv2.imread(xml.replace('xml', 'JPEG').replace('Annotations', 'Data')) 36 | for object_iter in objects: 37 | trackid = int(object_iter.find('trackid').text) 38 | bndbox = object_iter.find('bndbox') 39 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 40 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 41 | if visual: 42 | pt1 = (int(bbox[0]), int(bbox[1])) 43 | pt2 = (int(bbox[2]), int(bbox[3])) 44 | cv2.rectangle(im, pt1, pt2, color_bar[trackid], 3) 45 | if visual: 46 | cv2.imshow('img', im) 47 | cv2.waitKey(1) 48 | 49 | print('done!') 50 | -------------------------------------------------------------------------------- /training_dataset/det/visual.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join 7 | from os import listdir 8 | import cv2 9 | import numpy as np 10 | import glob 11 | import xml.etree.ElementTree as ET 12 | 13 | visual = False 14 | color_bar = np.random.randint(0, 255, (90, 3)) 15 | 16 | VID_base_path = './ILSVRC2015' 17 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/') 18 | img_base_path = join(VID_base_path, 'Data/DET/train/') 19 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'}) 20 | for sub_set in sub_sets: 21 | sub_set_base_path = join(ann_base_path, sub_set) 22 | class_names = sorted(listdir(sub_set_base_path)) 23 | for vi, class_name in enumerate(class_names): 24 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(class_names))) 25 | 26 | class_base_path = join(sub_set_base_path, class_name) 27 | xmls = sorted(glob.glob(join(class_base_path, '*.xml'))) 28 | for xml in xmls: 29 | f = dict() 30 | xmltree = ET.parse(xml) 31 | size = xmltree.findall('size')[0] 32 | frame_sz = [int(it.text) for it in size] 33 | objects = xmltree.findall('object') 34 | # if visual: 35 | img_path = xml.replace('xml', 'JPEG').replace('Annotations', 'Data') 36 | im = cv2.imread(img_path) 37 | for object_iter in objects: 38 | bndbox = object_iter.find('bndbox') 39 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 40 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 41 | if visual: 42 | pt1 = (int(bbox[0]), int(bbox[1])) 43 | pt2 = (int(bbox[2]), int(bbox[3])) 44 | cv2.rectangle(im, pt1, pt2, color_bar[vi], 3) 45 | if visual: 46 | cv2.imshow('img', im) 47 | cv2.waitKey(500) 48 | 49 | print('done!') 50 | -------------------------------------------------------------------------------- /toolkit/datasets/uav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from tqdm import tqdm 5 | from glob import glob 6 | 7 | from .dataset import Dataset 8 | from .video import Video 9 | 10 | class UAVVideo(Video): 11 | """ 12 | Args: 13 | name: video name 14 | root: dataset root 15 | video_dir: video directory 16 | init_rect: init rectangle 17 | img_names: image names 18 | gt_rect: groundtruth rectangle 19 | attr: attribute of video 20 | """ 21 | def __init__(self, name, root, video_dir, init_rect, img_names, 22 | gt_rect, attr, load_img=False): 23 | super(UAVVideo, self).__init__(name, root, video_dir, 24 | init_rect, img_names, gt_rect, attr, load_img) 25 | 26 | 27 | class UAVDataset(Dataset): 28 | """ 29 | Args: 30 | name: dataset name, should be 'UAV123', 'UAV20L' 31 | dataset_root: dataset root 32 | load_img: wether to load all imgs 33 | """ 34 | def __init__(self, name, dataset_root, load_img=False): 35 | super(UAVDataset, self).__init__(name, dataset_root) 36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 37 | meta_data = json.load(f) 38 | 39 | # load videos 40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 41 | self.videos = {} 42 | for video in pbar: 43 | pbar.set_postfix_str(video) 44 | self.videos[video] = UAVVideo(video, 45 | dataset_root, 46 | meta_data[video]['video_dir'], 47 | meta_data[video]['init_rect'], 48 | meta_data[video]['img_names'], 49 | meta_data[video]['gt_rect'], 50 | meta_data[video]['attr']) 51 | 52 | # set attr 53 | attr = [] 54 | for x in self.videos.values(): 55 | attr += x.attr 56 | attr = set(attr) 57 | self.attr = {} 58 | self.attr['ALL'] = list(self.videos.keys()) 59 | for x in attr: 60 | self.attr[x] = [] 61 | for k, v in self.videos.items(): 62 | for attr_ in v.attr: 63 | self.attr[attr_].append(k) 64 | 65 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_f1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from matplotlib import rc 5 | from .draw_utils import COLOR, LINE_STYLE 6 | 7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 8 | rc('text', usetex=True) 9 | 10 | def draw_f1(result, bold_name=None): 11 | # drawing f1 contour 12 | fig, ax = plt.subplots() 13 | for f1 in np.arange(0.1, 1, 0.1): 14 | recall = np.arange(f1, 1+0.01, 0.01) 15 | precision = f1 * recall / (2 * recall - f1) 16 | ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5) 17 | ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5) 18 | ax.grid(b=True) 19 | ax.set_aspect(1) 20 | plt.xlabel('Recall') 21 | plt.ylabel('Precision') 22 | plt.axis([0, 1, 0, 1]) 23 | plt.title(r'\textbf{VOT2018-LT Precision vs Recall}') 24 | 25 | # draw result line 26 | all_precision = {} 27 | all_recall = {} 28 | best_f1 = {} 29 | best_idx = {} 30 | for tracker_name, ret in result.items(): 31 | precision = np.mean(list(ret['precision'].values()), axis=0) 32 | recall = np.mean(list(ret['recall'].values()), axis=0) 33 | f1 = 2 * precision * recall / (precision + recall) 34 | max_idx = np.argmax(f1) 35 | all_precision[tracker_name] = precision 36 | all_recall[tracker_name] = recall 37 | best_f1[tracker_name] = f1[max_idx] 38 | best_idx[tracker_name] = max_idx 39 | 40 | for idx, (tracker_name, best_f1) in \ 41 | enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)): 42 | if tracker_name == bold_name: 43 | label = r"\textbf{[%.3f] Ours}" % (best_f1) 44 | else: 45 | label = "[%.3f] " % (best_f1) + tracker_name 46 | recall = all_recall[tracker_name][:-1] 47 | precision = all_precision[tracker_name][:-1] 48 | ax.plot(recall, precision, color=COLOR[idx], linestyle='-', 49 | label=label) 50 | f1_idx = best_idx[tracker_name] 51 | ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o', 52 | markerfacecolor=COLOR[idx], markersize=5) 53 | ax.legend(loc='lower right', labelspacing=0.2) 54 | plt.xticks(np.arange(0, 1+0.1, 0.1)) 55 | plt.yticks(np.arange(0, 1+0.1, 0.1)) 56 | plt.show() 57 | 58 | if __name__ == '__main__': 59 | draw_f1(None) 60 | -------------------------------------------------------------------------------- /training_dataset/coco/pycocotools/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /training_dataset/vid/parse_vid.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join 7 | from os import listdir 8 | import json 9 | import glob 10 | import xml.etree.ElementTree as ET 11 | 12 | VID_base_path = '/ssd/feiji/Research/Data/ILSVRC2015' 13 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 14 | img_base_path = join(VID_base_path, 'Data/VID/train/') 15 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'}) 16 | 17 | vid = [] 18 | for sub_set in sub_sets: 19 | sub_set_base_path = join(ann_base_path, sub_set) 20 | videos = sorted(listdir(sub_set_base_path)) 21 | s = [] 22 | for vi, video in enumerate(videos): 23 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 24 | v = dict() 25 | v['base_path'] = join(sub_set, video) 26 | v['frame'] = [] 27 | video_base_path = join(sub_set_base_path, video) 28 | xmls = sorted(glob.glob(join(video_base_path, '*.xml'))) 29 | for xml in xmls: 30 | f = dict() 31 | xmltree = ET.parse(xml) 32 | size = xmltree.findall('size')[0] 33 | frame_sz = [int(it.text) for it in size] 34 | objects = xmltree.findall('object') 35 | objs = [] 36 | for object_iter in objects: 37 | trackid = int(object_iter.find('trackid').text) 38 | name = (object_iter.find('name')).text 39 | bndbox = object_iter.find('bndbox') 40 | occluded = int(object_iter.find('occluded').text) 41 | o = dict() 42 | o['c'] = name 43 | o['bbox'] = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 44 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 45 | o['trackid'] = trackid 46 | o['occ'] = occluded 47 | objs.append(o) 48 | f['frame_sz'] = frame_sz 49 | f['img_path'] = xml.split('/')[-1].replace('xml', 'JPEG') 50 | f['objs'] = objs 51 | v['frame'].append(f) 52 | s.append(v) 53 | vid.append(s) 54 | print('save json (raw vid info), please wait 1 min~') 55 | json.dump(vid, open('vid.json', 'w'), indent=4, sort_keys=True) 56 | print('done!') 57 | -------------------------------------------------------------------------------- /training_dataset/y2b/gen_json_clean.py: -------------------------------------------------------------------------------- 1 | from os.path import join, isdir 2 | from os import mkdir 3 | import numpy as np 4 | import cv2 5 | import glob 6 | import json 7 | 8 | def check_neg(bbox): 9 | x1, y1, x2, y2 = bbox 10 | w, h = x2 - x1, y2 -y1 11 | if w <= 0 or h <= 0: 12 | return False 13 | return True 14 | 15 | def check_size(frame_sz, bbox): 16 | #min_ratio = 0.1 17 | max_ratio = 0.75 18 | # only accept objects >10% and <75% of the total frame 19 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 20 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio) 21 | return ok 22 | 23 | 24 | def check_borders(frame_sz, bbox): 25 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 26 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 27 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 28 | ((frame_sz[1] - bbox[3]) > dist_from_border) 29 | return ok 30 | 31 | data_file = 'train.json' 32 | path_format = "{}.{}.{}.jpg" 33 | root = "/ssd/feiji/Research/Data/y2b_crop511" 34 | anno = json.load(open(data_file, 'r')) 35 | wh_file = 'train_wh.json' 36 | wh = json.load(open(wh_file, 'r')) 37 | out = {} 38 | n_videos = 0 39 | for video, tracks in anno.items(): 40 | new_tracks = {} 41 | video_id = video.split('/')[-1] 42 | if not(video_id in wh): 43 | continue 44 | frame_sz = wh[video_id] 45 | for track, frames in tracks.items(): 46 | new_frames = {} 47 | valid_num = 0 48 | for frame, bbox in frames.items(): 49 | new_info = {} 50 | image_path = join(root, video, path_format.format(frame, track, 'x')) 51 | new_info['bbox'] = bbox 52 | new_info['valid'] = 0 53 | if check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox): 54 | new_info['valid'] = 1 55 | valid_num += 1 56 | new_frames[frame] = new_info 57 | #new_frames['track_category'] = video.split('/')[0] 58 | 59 | if valid_num > 0: 60 | new_tracks[track] = new_frames 61 | if len(new_tracks) > 0: 62 | out[video] = new_tracks 63 | n_videos += 1 64 | print('video: {:d}'.format(n_videos)) 65 | 66 | json.dump(out, open('train_largeclean.json', 'w'), indent=4, sort_keys=True) 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /experiments/cgacd_resnet/cgacd_resnet.yml: -------------------------------------------------------------------------------- 1 | backbone: 2 | type: 'resnet50' 3 | pretrained: 'resnet50.model' 4 | unfix_layers: ['layer3', 'layer2'] 5 | unfix_steps: [10, 10] 6 | unfix_lr: [0.1, 0.1] 7 | kwargs: 8 | used_layers: [3] 9 | 10 | adjust: 11 | adjust: true 12 | type: 'AdjustLayer' 13 | kwargs: 14 | in_channels: 1024 15 | out_channels: 256 16 | 17 | siamese: 18 | type: 'UPChannelSiamese' 19 | 20 | attention: 21 | attention: true 22 | type: 'PixelAttention' 23 | 24 | cornerdet: 25 | cornerdet: true 26 | type: 'SepCornerDet' 27 | 28 | train: 29 | roi_augmentation: 30 | ratio: 0.5 31 | shift: 8 32 | scale: 0.1 33 | creg_weight: 0.25 34 | epoch: 20 35 | pretrain_epoch: 1 36 | response_size: 25 37 | template_pool_size: 5 38 | search_pool_size: 7 39 | train_dataset: 40 | names: 41 | - 'youtubebb' 42 | - 'got10k' 43 | - 'vid' 44 | - 'coco' 45 | - 'det' 46 | youtubebb: 47 | num_use: 100000 48 | root: '/ssd/feiji/Research/Data/y2b_crop511' 49 | anno: '/home/feiji/Research/Data/data_preprocess/y2b/train_largeclean.json' 50 | got10k: 51 | num_use: 100000 52 | root: '/ssd/feiji/Research/Data/GOT-10k_crop511' 53 | anno: '/home/feiji/Research/Data/data_preprocess/got10k/train_largeclean.json' 54 | vid: 55 | num_use: 50000 56 | root: '/ssd/feiji/Research/Data/VID_crop511' 57 | anno: '/home/feiji/Research/Data/data_preprocess/vid/train_largeclean.json' 58 | coco: 59 | num_use: 50000 60 | root: '/ssd/feiji/Research/Data/COCO_crop511' 61 | anno: '/home/feiji/Research/Data/data_preprocess/coco/train2017_largeclean.json' 62 | det: 63 | num_use: 50000 64 | root: '/ssd/feiji/Research/Data/DET_crop511' 65 | anno: '/home/feiji/Research/Data/data_preprocess/det/train_largeclean.json' 66 | 67 | video_per_epoch: 350000 68 | 69 | augmentation: 70 | neg: 0.2 71 | gray: 0.25 72 | search: 73 | shift: 64 74 | scale: 0.18 75 | blur: 0.2 76 | 77 | lr: 78 | type: 'log' 79 | start_lr: 0.001 80 | end_lr: 0.0001 81 | pretrain: 82 | start_lr: 0.0005 83 | type: 'step' 84 | step: 1 85 | epoch: 1 86 | warmup: 87 | start_lr: 0.0006 88 | end_lr: 0.001 89 | type: 'step' 90 | step: 1 91 | epoch: 4 92 | 93 | track: 94 | response_size: 25 95 | penalty_k: 0.055 96 | window_influence: 0.42 97 | lr: 0.2 98 | -------------------------------------------------------------------------------- /experiments/cgacd_resnet_otb/cgacd_resnet_otb.yml: -------------------------------------------------------------------------------- 1 | backbone: 2 | type: 'resnet50' 3 | pretrained: 'resnet50.model' 4 | unfix_layers: ['layer3', 'layer2'] 5 | unfix_steps: [10, 10] 6 | unfix_lr: [0.1, 0.1] 7 | kwargs: 8 | used_layers: [3] 9 | 10 | adjust: 11 | adjust: true 12 | type: 'AdjustLayer' 13 | kwargs: 14 | in_channels: 1024 15 | out_channels: 256 16 | 17 | siamese: 18 | type: 'UPChannelSiamese' 19 | 20 | attention: 21 | attention: true 22 | type: 'PixelAttention' 23 | 24 | cornerdet: 25 | cornerdet: true 26 | type: 'SepCornerDet' 27 | 28 | train: 29 | roi_augmentation: 30 | ratio: 0.5 31 | shift: 16 32 | scale: 0.1 33 | creg_weight: 0.25 34 | epoch: 20 35 | pretrain_epoch: 1 36 | response_size: 25 37 | template_pool_size: 5 38 | search_pool_size: 7 39 | train_dataset: 40 | names: 41 | - 'youtubebb' 42 | - 'got10k' 43 | - 'vid' 44 | - 'coco' 45 | - 'det' 46 | youtubebb: 47 | num_use: 100000 48 | root: '/ssd/feiji/Research/Data/y2b_crop511' 49 | anno: '/home/feiji/Research/Data/data_preprocess/y2b/train_largeclean.json' 50 | got10k: 51 | num_use: 100000 52 | root: '/ssd/feiji/Research/Data/GOT-10k_crop511' 53 | anno: '/home/feiji/Research/Data/data_preprocess/got10k/train_largeclean.json' 54 | vid: 55 | num_use: 50000 56 | root: '/ssd/feiji/Research/Data/VID_crop511' 57 | anno: '/home/feiji/Research/Data/data_preprocess/vid/train_largeclean.json' 58 | coco: 59 | num_use: 50000 60 | root: '/ssd/feiji/Research/Data/COCO_crop511' 61 | anno: '/home/feiji/Research/Data/data_preprocess/coco/train2017_largeclean.json' 62 | det: 63 | num_use: 50000 64 | root: '/ssd/feiji/Research/Data/DET_crop511' 65 | anno: '/home/feiji/Research/Data/data_preprocess/det/train_largeclean.json' 66 | 67 | video_per_epoch: 350000 68 | 69 | augmentation: 70 | neg: 0.2 71 | gray: 0.25 72 | search: 73 | shift: 64 74 | scale: 0.18 75 | blur: 0.2 76 | 77 | lr: 78 | type: 'log' 79 | start_lr: 0.001 80 | end_lr: 0.0001 81 | pretrain: 82 | start_lr: 0.0005 83 | type: 'step' 84 | step: 1 85 | epoch: 1 86 | warmup: 87 | start_lr: 0.0006 88 | end_lr: 0.001 89 | type: 'step' 90 | step: 1 91 | epoch: 4 92 | 93 | track: 94 | response_size: 25 95 | penalty_k: 0.055 96 | window_influence: 0.42 97 | lr: 0.2 98 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/prroi_pool/functional.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : functional.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch 13 | import torch.autograd as ag 14 | 15 | try: 16 | from os.path import join as pjoin, dirname 17 | from torch.utils.cpp_extension import load as load_extension 18 | root_dir = pjoin(dirname(__file__), 'src') 19 | _prroi_pooling = load_extension( 20 | '_prroi_pooling', 21 | [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')], 22 | verbose=True 23 | ) 24 | except ImportError: 25 | raise ImportError('Can not compile Precise RoI Pooling library.') 26 | 27 | __all__ = ['prroi_pool2d'] 28 | 29 | 30 | class PrRoIPool2DFunction(ag.Function): 31 | @staticmethod 32 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale): 33 | assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \ 34 | 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type()) 35 | 36 | pooled_height = int(pooled_height) 37 | pooled_width = int(pooled_width) 38 | spatial_scale = float(spatial_scale) 39 | 40 | features = features.contiguous() 41 | rois = rois.contiguous() 42 | params = (pooled_height, pooled_width, spatial_scale) 43 | 44 | if features.is_cuda: 45 | output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params) 46 | ctx.params = params 47 | # everything here is contiguous. 48 | ctx.save_for_backward(features, rois, output) 49 | else: 50 | raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.') 51 | 52 | return output 53 | 54 | @staticmethod 55 | def backward(ctx, grad_output): 56 | features, rois, output = ctx.saved_tensors 57 | grad_input = grad_coor = None 58 | 59 | if features.requires_grad: 60 | grad_output = grad_output.contiguous() 61 | grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params) 62 | if rois.requires_grad: 63 | grad_output = grad_output.contiguous() 64 | grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params) 65 | 66 | return grad_input, grad_coor, None, None, None 67 | 68 | 69 | prroi_pool2d = PrRoIPool2DFunction.apply 70 | 71 | -------------------------------------------------------------------------------- /training_dataset/coco/gen_json_clean.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from pycocotools.coco import COCO 7 | from os.path import join 8 | import numpy as np 9 | import json 10 | 11 | def check_neg(bbox): 12 | x1, y1, x2, y2 = bbox 13 | w, h = x2 - x1, y2 -y1 14 | if w <= 0 or h <= 0: 15 | return False 16 | return True 17 | 18 | def check_size(frame_sz, bbox): 19 | #min_ratio = 0.1 20 | max_ratio = 0.75 21 | # only accept objects >10% and <75% of the total frame 22 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 23 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio) 24 | return ok 25 | 26 | 27 | def check_borders(frame_sz, bbox): 28 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 29 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 30 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 31 | ((frame_sz[1] - bbox[3]) > dist_from_border) 32 | return ok 33 | 34 | 35 | dataDir = '/home/feiji/Research/Data/COCO' 36 | for data_subset in ['val2017', 'train2017']: 37 | dataset = dict() 38 | annFile = '{}/annotations/instances_{}.json'.format(dataDir, data_subset) 39 | coco = COCO(annFile) 40 | n_imgs = len(coco.imgs) 41 | for n, img_id in enumerate(coco.imgs): 42 | print('subset: {} image id: {:04d} / {:04d}'.format(data_subset, n, n_imgs)) 43 | img = coco.loadImgs(img_id)[0] 44 | annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None) 45 | anns = coco.loadAnns(annIds) 46 | crop_base_path = join(data_subset, img['file_name'].split('/')[-1].split('.')[0]) 47 | frame_sz = [img['width'], img['height']] 48 | 49 | for track_id, ann in enumerate(anns): 50 | info = {} 51 | rect = ann['bbox'] 52 | if rect[2] <= 0 or rect[3] <= 0: # lead nan error in cls. 53 | continue 54 | bbox = [rect[0], rect[1], rect[0]+rect[2]-1, rect[1]+rect[3]-1] # x1,y1,x2,y2 55 | if check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox): 56 | if crop_base_path not in dataset: 57 | dataset[crop_base_path] = dict() 58 | info['valid'] = 1 59 | info['bbox'] = bbox 60 | dataset[crop_base_path]['{:02d}'.format(track_id)] = {'000000': info} 61 | #dataset[crop_base_path]['{:02d}'.format(track_id)]['track_category'] = ann['category_id'] 62 | 63 | print('save json (dataset), please wait 20 seconds~') 64 | json.dump(dataset, open('{}_largeclean.json'.format(data_subset), 'w'), indent=4, sort_keys=True) 65 | print('done!') 66 | 67 | -------------------------------------------------------------------------------- /toolkit/datasets/got10k.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import os 4 | 5 | from tqdm import tqdm 6 | 7 | from .dataset import Dataset 8 | from .video import Video 9 | 10 | class GOT10kVideo(Video): 11 | """ 12 | Args: 13 | name: video name 14 | root: dataset root 15 | video_dir: video directory 16 | init_rect: init rectangle 17 | img_names: image names 18 | gt_rect: groundtruth rectangle 19 | attr: attribute of video 20 | """ 21 | def __init__(self, name, root, video_dir, init_rect, img_names, 22 | gt_rect, attr, load_img=False): 23 | super(GOT10kVideo, self).__init__(name, root, video_dir, 24 | init_rect, img_names, gt_rect, attr, load_img) 25 | 26 | # def load_tracker(self, path, tracker_names=None): 27 | # """ 28 | # Args: 29 | # path(str): path to result 30 | # tracker_name(list): name of tracker 31 | # """ 32 | # if not tracker_names: 33 | # tracker_names = [x.split('/')[-1] for x in glob(path) 34 | # if os.path.isdir(x)] 35 | # if isinstance(tracker_names, str): 36 | # tracker_names = [tracker_names] 37 | # # self.pred_trajs = {} 38 | # for name in tracker_names: 39 | # traj_file = os.path.join(path, name, self.name+'.txt') 40 | # if os.path.exists(traj_file): 41 | # with open(traj_file, 'r') as f : 42 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 43 | # for x in f.readlines()] 44 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 45 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 46 | # else: 47 | 48 | # self.tracker_names = list(self.pred_trajs.keys()) 49 | 50 | class GOT10kDataset(Dataset): 51 | """ 52 | Args: 53 | name: dataset name, should be "NFS30" or "NFS240" 54 | dataset_root, dataset root dir 55 | """ 56 | def __init__(self, name, dataset_root, load_img=False): 57 | super(GOT10kDataset, self).__init__(name, dataset_root) 58 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 59 | meta_data = json.load(f) 60 | 61 | # load videos 62 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 63 | self.videos = {} 64 | for video in pbar: 65 | pbar.set_postfix_str(video) 66 | self.videos[video] = GOT10kVideo(video, 67 | dataset_root, 68 | meta_data[video]['video_dir'], 69 | meta_data[video]['init_rect'], 70 | meta_data[video]['img_names'], 71 | meta_data[video]['gt_rect'], 72 | None) 73 | self.attr = {} 74 | self.attr['ALL'] = list(self.videos.keys()) 75 | -------------------------------------------------------------------------------- /toolkit/datasets/nfs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | 12 | class NFSVideo(Video): 13 | """ 14 | Args: 15 | name: video name 16 | root: dataset root 17 | video_dir: video directory 18 | init_rect: init rectangle 19 | img_names: image names 20 | gt_rect: groundtruth rectangle 21 | attr: attribute of video 22 | """ 23 | def __init__(self, name, root, video_dir, init_rect, img_names, 24 | gt_rect, attr, load_img=False): 25 | super(NFSVideo, self).__init__(name, root, video_dir, 26 | init_rect, img_names, gt_rect, attr, load_img) 27 | 28 | # def load_tracker(self, path, tracker_names=None): 29 | # """ 30 | # Args: 31 | # path(str): path to result 32 | # tracker_name(list): name of tracker 33 | # """ 34 | # if not tracker_names: 35 | # tracker_names = [x.split('/')[-1] for x in glob(path) 36 | # if os.path.isdir(x)] 37 | # if isinstance(tracker_names, str): 38 | # tracker_names = [tracker_names] 39 | # # self.pred_trajs = {} 40 | # for name in tracker_names: 41 | # traj_file = os.path.join(path, name, self.name+'.txt') 42 | # if os.path.exists(traj_file): 43 | # with open(traj_file, 'r') as f : 44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 45 | # for x in f.readlines()] 46 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 48 | # else: 49 | 50 | # self.tracker_names = list(self.pred_trajs.keys()) 51 | 52 | class NFSDataset(Dataset): 53 | """ 54 | Args: 55 | name: dataset name, should be "NFS30" or "NFS240" 56 | dataset_root, dataset root dir 57 | """ 58 | def __init__(self, name, dataset_root, load_img=False): 59 | super(NFSDataset, self).__init__(name, dataset_root) 60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 61 | meta_data = json.load(f) 62 | 63 | # load videos 64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 65 | self.videos = {} 66 | for video in pbar: 67 | pbar.set_postfix_str(video) 68 | self.videos[video] = NFSVideo(video, 69 | dataset_root, 70 | meta_data[video]['video_dir'], 71 | meta_data[video]['init_rect'], 72 | meta_data[video]['img_names'], 73 | meta_data[video]['gt_rect'], 74 | None) 75 | 76 | self.attr = {} 77 | self.attr['ALL'] = list(self.videos.keys()) 78 | -------------------------------------------------------------------------------- /toolkit/datasets/trackingnet.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class TrackingNetVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, load_img=False): 24 | super(TrackingNetVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | 27 | # def load_tracker(self, path, tracker_names=None): 28 | # """ 29 | # Args: 30 | # path(str): path to result 31 | # tracker_name(list): name of tracker 32 | # """ 33 | # if not tracker_names: 34 | # tracker_names = [x.split('/')[-1] for x in glob(path) 35 | # if os.path.isdir(x)] 36 | # if isinstance(tracker_names, str): 37 | # tracker_names = [tracker_names] 38 | # # self.pred_trajs = {} 39 | # for name in tracker_names: 40 | # traj_file = os.path.join(path, name, self.name+'.txt') 41 | # if os.path.exists(traj_file): 42 | # with open(traj_file, 'r') as f : 43 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 44 | # for x in f.readlines()] 45 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 46 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 47 | # else: 48 | 49 | # self.tracker_names = list(self.pred_trajs.keys()) 50 | 51 | class TrackingNetDataset(Dataset): 52 | """ 53 | Args: 54 | name: dataset name, should be "NFS30" or "NFS240" 55 | dataset_root, dataset root dir 56 | """ 57 | def __init__(self, name, dataset_root, load_img=False): 58 | super(TrackingNetDataset, self).__init__(name, dataset_root) 59 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 60 | meta_data = json.load(f) 61 | 62 | # load videos 63 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 64 | self.videos = {} 65 | for video in pbar: 66 | pbar.set_postfix_str(video) 67 | self.videos[video] = TrackingNetVideo(video, 68 | dataset_root, 69 | meta_data[video]['video_dir'], 70 | meta_data[video]['init_rect'], 71 | meta_data[video]['img_names'], 72 | meta_data[video]['gt_rect'], 73 | None) 74 | self.attr = {} 75 | self.attr['ALL'] = list(self.videos.keys()) 76 | -------------------------------------------------------------------------------- /training_dataset/det/gen_json_clean.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join, isdir 7 | from os import mkdir 8 | import glob 9 | import numpy as np 10 | import xml.etree.ElementTree as ET 11 | import json 12 | 13 | def check_neg(bbox): 14 | x1, y1, x2, y2 = bbox 15 | w, h = x2 - x1, y2 -y1 16 | if w <= 0 or h <= 0: 17 | return False 18 | return True 19 | 20 | def check_size(frame_sz, bbox): 21 | #min_ratio = 0.1 22 | max_ratio = 0.75 23 | # only accept objects >10% and <75% of the total frame 24 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 25 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio) 26 | return ok 27 | 28 | 29 | def check_borders(frame_sz, bbox): 30 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 31 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 32 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 33 | ((frame_sz[1] - bbox[3]) > dist_from_border) 34 | return ok 35 | 36 | js = {} 37 | VID_base_path = '/home/feiji/Research/Data/ILSVRC2015' 38 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/') 39 | sub_sets = ('ILSVRC2013_train', 'ILSVRC2014_train_0000', 'ILSVRC2014_train_0001','ILSVRC2014_train_0002','ILSVRC2014_train_0003','ILSVRC2014_train_0004','ILSVRC2014_train_0005','ILSVRC2014_train_0006', 'val') 40 | for sub_set in sub_sets: 41 | sub_set_base_path = join(ann_base_path, sub_set) 42 | 43 | if 'ILSVRC2013_train' == sub_set: 44 | xmls = sorted(glob.glob(join(sub_set_base_path, '*', '*.xml'))) 45 | else: 46 | xmls = sorted(glob.glob(join(sub_set_base_path, '*.xml'))) 47 | n_imgs = len(xmls) 48 | for f, xml in enumerate(xmls): 49 | print('subset: {} frame id: {:08d} / {:08d}'.format(sub_set, f, n_imgs)) 50 | xmltree = ET.parse(xml) 51 | objects = xmltree.findall('object') 52 | size = xmltree.find('size') 53 | video = join(sub_set, xml.split('/')[-1].split('.')[0]) 54 | 55 | for id, object_iter in enumerate(objects): 56 | info = {} 57 | bndbox = object_iter.find('bndbox') 58 | frame_sz = [int(size.find('width').text), int(size.find('height').text)] 59 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 60 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 61 | if check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox): 62 | info['valid'] = 1 63 | info['bbox'] = bbox 64 | frame = '%06d' % (0) 65 | obj = '%02d' % (id) 66 | if video not in js: 67 | js[video] = {} 68 | if obj not in js[video]: 69 | js[video][obj] = {} 70 | js[video][obj][frame] = info 71 | #js[video][obj]['track_category'] = str(object_iter.find('name').text) 72 | 73 | json.dump(js, open('train_largeclean.json', 'w'), indent=4, sort_keys=True) 74 | 75 | 76 | -------------------------------------------------------------------------------- /utils/model_load.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import logging 9 | 10 | import torch 11 | 12 | 13 | logger = logging.getLogger('global') 14 | 15 | 16 | def check_keys(model, pretrained_state_dict): 17 | ckpt_keys = set(pretrained_state_dict.keys()) 18 | model_keys = set(model.state_dict().keys()) 19 | used_pretrained_keys = model_keys & ckpt_keys 20 | unused_pretrained_keys = ckpt_keys - model_keys 21 | missing_keys = model_keys - ckpt_keys 22 | # filter 'num_batches_tracked' 23 | missing_keys = [x for x in missing_keys 24 | if not x.endswith('num_batches_tracked')] 25 | if len(missing_keys) > 0: 26 | logger.info('[Warning] missing keys: {}'.format(missing_keys)) 27 | logger.info('missing keys:{}'.format(len(missing_keys))) 28 | if len(unused_pretrained_keys) > 0: 29 | logger.info('[Warning] unused_pretrained_keys: {}'.format( 30 | unused_pretrained_keys)) 31 | logger.info('unused checkpoint keys:{}'.format( 32 | len(unused_pretrained_keys))) 33 | logger.info('used keys:{}'.format(len(used_pretrained_keys))) 34 | assert len(used_pretrained_keys) > 0, \ 35 | 'load NONE from pretrained checkpoint' 36 | return True 37 | 38 | 39 | def remove_prefix(state_dict, prefix): 40 | ''' Old style model is stored with all names of parameters 41 | share common prefix 'module.' ''' 42 | logger.info('remove prefix \'{}\''.format(prefix)) 43 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 44 | return {f(key): value for key, value in state_dict.items()} 45 | 46 | 47 | def load_pretrain(model, pretrained_path): 48 | logger.info('load pretrained model from {}'.format(pretrained_path)) 49 | device = torch.cuda.current_device() 50 | pretrained_dict = torch.load(pretrained_path, 51 | map_location=lambda storage, loc: storage.cuda(device)) 52 | if "state_dict" in pretrained_dict.keys(): 53 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 54 | 'module.') 55 | else: 56 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 57 | 58 | try: 59 | check_keys(model, pretrained_dict) 60 | except: 61 | logger.info('[Warning]: using pretrain as features.\ 62 | Adding "features." as prefix') 63 | new_dict = {} 64 | for k, v in pretrained_dict.items(): 65 | k = 'features.' + k 66 | new_dict[k] = v 67 | pretrained_dict = new_dict 68 | check_keys(model, pretrained_dict) 69 | model.load_state_dict(pretrained_dict, strict=False) 70 | return model 71 | 72 | 73 | def restore_from(model, ckpt_path): 74 | device = torch.cuda.current_device() 75 | ckpt = torch.load(ckpt_path, 76 | map_location=lambda storage, loc: storage.cuda(device)) 77 | epoch = ckpt['epoch'] 78 | 79 | ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.') 80 | check_keys(model, ckpt_model_dict) 81 | model.load_state_dict(ckpt_model_dict, strict=False) 82 | 83 | return model, epoch 84 | -------------------------------------------------------------------------------- /models/attention/attention.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from config.config import cfg 6 | 7 | class Attention(nn.Module): 8 | def __init__(self): 9 | super(Attention, self).__init__() 10 | 11 | def forward(self, z_f, x_f): 12 | raise NotImplementedError 13 | 14 | class PixelAttention(Attention): 15 | def __init__(self, feat_in=256): 16 | super(PixelAttention, self).__init__() 17 | self.feat_in = feat_in 18 | 19 | self.spatial_pool_agl = nn.Sequential( 20 | nn.Conv2d(25, 32, 3), 21 | nn.BatchNorm2d(32), 22 | nn.ReLU(inplace=True), 23 | nn.Conv2d(32, 32, 3), 24 | nn.BatchNorm2d(32), 25 | nn.ReLU(inplace=True), 26 | nn.ConvTranspose2d(32, 32, 3), 27 | nn.BatchNorm2d(32), 28 | nn.ReLU(inplace=True), 29 | nn.ConvTranspose2d(32, 1, 3), 30 | nn.Sigmoid(), 31 | ) 32 | 33 | self.spatial_pool_agr = nn.Sequential( 34 | nn.Conv2d(25, 32, 3), 35 | nn.BatchNorm2d(32), 36 | nn.ReLU(inplace=True), 37 | nn.Conv2d(32, 32, 3), 38 | nn.BatchNorm2d(32), 39 | nn.ReLU(inplace=True), 40 | nn.ConvTranspose2d(32, 32, 3), 41 | nn.BatchNorm2d(32), 42 | nn.ReLU(inplace=True), 43 | nn.ConvTranspose2d(32, 1, 3), 44 | nn.Sigmoid(), 45 | ) 46 | 47 | self.channel_pool_ag = nn.Sequential( 48 | nn.Linear(feat_in, feat_in//4), 49 | nn.ReLU(inplace=True), 50 | nn.Linear(feat_in//4, feat_in), 51 | ) 52 | 53 | self.channel_maxpool = nn.MaxPool2d(cfg.train.search_pool_size - cfg.train.template_pool_size + 1) 54 | self.channel_avgpool = nn.AvgPool2d(cfg.train.search_pool_size - cfg.train.template_pool_size + 1) 55 | self.channel_activation = nn.Sigmoid() 56 | for m in self.modules(): 57 | if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.Linear)): 58 | nn.init.kaiming_normal_( 59 | m.weight.data, mode='fan_out', nonlinearity='relu') 60 | elif isinstance(m, nn.BatchNorm2d): 61 | m.weight.data.fill_(1.0) 62 | m.bias.data.zero_() 63 | 64 | def forward(self, z_f, x_f): 65 | b, c, h, w = z_f.shape 66 | kernel = z_f.reshape(b,c,h*w).permute(0,2,1).reshape(-1, c, 1, 1) 67 | b, c, h, w = x_f.shape 68 | xf_reshape = x_f.reshape(1, -1, h, w) 69 | pixel_corr = F.conv2d(xf_reshape, kernel, groups=b).reshape(b, -1, h, w)# / c 70 | b, c, h, w = pixel_corr.shape 71 | spatial_att_l = self.spatial_pool_agl(pixel_corr) 72 | spatial_att_r = self.spatial_pool_agr(pixel_corr) 73 | b, c, h, w = z_f.shape 74 | kernel = z_f.reshape(b*c, 1, h, w) 75 | b, c, h, w = x_f.shape 76 | xf_reshape = x_f.reshape(1, b*c, h, w) 77 | depth_corr = F.conv2d(xf_reshape, kernel, groups=b*c) 78 | depth_corr = depth_corr.reshape(b, c, depth_corr.shape[-2], depth_corr.shape[-1]) 79 | channel_max_pool = self.channel_maxpool(depth_corr).squeeze() 80 | channel_avg_pool = self.channel_avgpool(depth_corr).squeeze() 81 | channel_att = self.channel_activation(self.channel_pool_ag(channel_max_pool) + self.channel_pool_ag(channel_avg_pool)).unsqueeze(-1).unsqueeze(-1) 82 | 83 | x_f = x_f * channel_att 84 | x_f_l = x_f * spatial_att_l 85 | x_f_r = x_f * spatial_att_r 86 | return x_f_l, x_f_r -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # [CGACD](https://openaccess.thecvf.com/content_CVPR_2020/html/Du_Correlation-Guided_Attention_for_Corner_Detection_Based_Visual_Tracking_CVPR_2020_paper.html) 2 | 3 | ## 1. Environment setup 4 | This code has been tested on Ubuntu 18.04, Python 3.7, Pytorch 1.1.0, CUDA 10.0. Please install related libraries before running this code: 5 | ```bash 6 | pip install -r requirements.txt 7 | python setup.py build_ext --inplace 8 | ``` 9 | ### Add CGACD to your PYTHONPATH 10 | ```bash 11 | export PYTHONPATH=/path/to/CGACD:$PYTHONPATH 12 | ``` 13 | 14 | 15 | ## 2. Test 16 | Download the pretrained model: [OTB and VOT](https://pan.baidu.com/s/11z74ZUGAPhupPLNrbGN5NQ) (code: 16s0) and put them into `checkpoint` directory. 17 | 18 | Download testing datasets and put them into `dataset` directory. Jsons of commonly used datasets can be downloaded from [BaiduYun](https://pan.baidu.com/s/1js0Qhykqqur7_lNRtle1tA#list/path=%2F) or [Google driver](https://drive.google.com/drive/folders/1TC8obz4TvlbvTRWbS4Cn4VwwJ8tXs2sv?usp=sharing). If you want to test the tracker on a new dataset, please refer to [pysot-toolkit](https://github.com/StrangerZhang/pysot-toolkit) to set test_dataset. 19 | 20 | ```bash 21 | python tools/test.py \ 22 | --dataset VOT2018 \ # dataset_name 23 | --model checkpoint/CGACD_VOT.pth \ # tracker_name 24 | --save_name CGACD_VOT 25 | ``` 26 | 27 | The testing result will be saved in the `results/dataset_name/tracker_name` directory. 28 | 29 | ## 3. Train 30 | ### Prepare training datasets 31 | 32 | Download the datasets: 33 | * [VID](http://image-net.org/challenges/LSVRC/2017/) 34 | * [YOUTUBEBB](https://research.google.com/youtube-bb/) 35 | * [DET](http://image-net.org/challenges/LSVRC/2017/) 36 | * [COCO](http://cocodataset.org) 37 | * [GOT-10K](http://got-10k.aitestunion.com/downloads) 38 | 39 | Scripts to prepare training dataset are listed in `training_dataset` directory. 40 | 41 | ### Download pretrained backbones 42 | Download pretrained backbones from [google driver](https://drive.google.com/drive/folders/1DuXVWVYIeynAcvt9uxtkuleV6bs6e3T9) or [BaiduYun](https://pan.baidu.com/s/1pYe73PjkQx4Ph9cd3ePfCQ) (code: 5o1d) and put them into `pretrained_net` directory. 43 | 44 | ### Train a model 45 | To train the CGACD model, run `train.py` with the desired configs: 46 | 47 | ```bash 48 | python tools/train.py 49 | --config=experiments/cgacd_resnet/cgacd_resnet.yml \ 50 | -b 64 \ 51 | -j 16 \ 52 | --save_name cgacd_resnet 53 | ``` 54 | 55 | We use two RTX2080TI for training. 56 | 57 | ## 4. Evaluation 58 | We provide the tracking [results](https://pan.baidu.com/s/1fM36M19LUgd3hI0QFnwkdw) (code: qw69 ) of OTB2015, VOT2018, UAV123, and LaSOT. If you want to evaluate the tracker, please put those results into `results` directory. 59 | 60 | ``` 61 | python eval.py \ 62 | -p ./results \ # result path 63 | -d VOT2018 \ # dataset_name 64 | -t CGACD_VOT # tracker_name 65 | ``` 66 | 67 | ## 5. Acknowledgement 68 | The code is implemented based on [pysot](https://github.com/STVIR/pysot) and [PreciseRoIPooling](https://github.com/vacancy/PreciseRoIPooling). We would like to express our sincere thanks to the contributors. 69 | 70 | 71 | ## 6. Cite 72 | If you use CGACD in your work please cite our paper: 73 | > @InProceedings{Du_2020_CVPR, 74 | author = {Du, Fei and Liu, Peng and Zhao, Wei and Tang, Xianglong}, 75 | title = {Correlation-Guided Attention for Corner Detection Based Visual Tracking}, 76 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)}, 77 | month = {June}, 78 | year = {2020} 79 | } 80 | 81 | 82 | -------------------------------------------------------------------------------- /track/run_CGACD.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn.functional as F 3 | import torch 4 | from matplotlib import pyplot as plt 5 | import pdb 6 | from utils.utils import get_subwindow_tracking 7 | from config.config import cfg 8 | 9 | def tracker_eval(net, x_crop, target_pos, template_bbox, target_sz, window, scale_z): 10 | target, penalty, score, best_pscore_id = net.track(x_crop, target_sz, template_bbox, window) 11 | #pdb.set_trace() 12 | target = target / scale_z 13 | #import pdb 14 | #pdb.set_trace() 15 | target_sz = target_sz / scale_z 16 | lr = penalty[best_pscore_id] * score[best_pscore_id] * cfg.track.lr 17 | 18 | res_x = target[0] + target_pos[0] 19 | res_y = target[1] + target_pos[1] 20 | 21 | res_w = target_sz[0] * (1 - lr) + target[2] * lr 22 | res_h = target_sz[1] * (1 - lr) + target[3] * lr 23 | 24 | target_pos = np.array([res_x, res_y]) 25 | target_sz = np.array([res_w, res_h]) 26 | return target_pos, target_sz, score[best_pscore_id] 27 | 28 | 29 | def CGACD_init(im, target_pos, target_sz, net): 30 | state = dict() 31 | state['im_h'] = im.shape[0] 32 | state['im_w'] = im.shape[1] 33 | 34 | avg_chans = np.mean(im, axis=(0, 1)) 35 | 36 | wc_z = target_sz[0] + cfg.track.contex_amount * sum(target_sz) 37 | hc_z = target_sz[1] + cfg.track.contex_amount * sum(target_sz) 38 | s_z = round(np.sqrt(wc_z * hc_z)) 39 | # initialize the exemplar 40 | z_crop = get_subwindow_tracking(im, target_pos, cfg.track.template_size, s_z, avg_chans) 41 | 42 | scale_z = cfg.track.template_size / s_z 43 | w, h = target_sz[0] * scale_z, target_sz[1] * scale_z 44 | cx, cy = cfg.track.template_size//2, cfg.track.template_size//2 45 | template_bbox = [cx - w*0.5, cy - h*0.5, cx + w*0.5, cy + h*0.5] 46 | 47 | z = torch.from_numpy(np.transpose(z_crop, (2, 0, 1))).float().unsqueeze(0) 48 | net.template(z.cuda()) 49 | 50 | if cfg.track.windowing == 'cosine': 51 | window = np.outer(np.hanning(cfg.track.response_size), np.hanning(cfg.track.response_size)) 52 | elif cfg.track.windowing == 'uniform': 53 | window = np.ones((cfg.track.response_size, cfg.track.response_size)) 54 | window = window.flatten() 55 | 56 | state['net'] = net 57 | state['avg_chans'] = avg_chans 58 | state['window'] = window 59 | state['target_pos'] = target_pos 60 | state['target_sz'] = target_sz 61 | state['template_bbox'] = template_bbox 62 | return state 63 | 64 | 65 | def CGACD_track(state, im): 66 | net = state['net'] 67 | avg_chans = state['avg_chans'] 68 | window = state['window'] 69 | target_pos = state['target_pos'] 70 | target_sz = state['target_sz'] 71 | template_bbox = state['template_bbox'] 72 | wc_z = target_sz[1] + cfg.track.contex_amount * sum(target_sz) 73 | hc_z = target_sz[0] + cfg.track.contex_amount * sum(target_sz) 74 | s_z = np.sqrt(wc_z * hc_z) 75 | scale_z = cfg.track.template_size / s_z 76 | s_x = s_z * (cfg.track.search_size / cfg.track.template_size) 77 | 78 | # extract scaled crops for search region x at previous target position 79 | x_crop = get_subwindow_tracking(im, target_pos, cfg.track.search_size, round(s_x), avg_chans) 80 | 81 | x_crop = torch.from_numpy(np.transpose(x_crop, (2, 0, 1))).float().unsqueeze(0) 82 | 83 | target_pos, target_sz, best_score = tracker_eval(net, x_crop.cuda(), target_pos, template_bbox, target_sz * scale_z, window, scale_z) 84 | target_pos[0] = max(0, min(state['im_w'], target_pos[0])) 85 | target_pos[1] = max(0, min(state['im_h'], target_pos[1])) 86 | target_sz[0] = max(10, min(state['im_w'], target_sz[0])) 87 | target_sz[1] = max(10, min(state['im_h'], target_sz[1])) 88 | state['target_pos'] = target_pos 89 | state['target_sz'] = target_sz 90 | state['best_score'] = best_score 91 | return state 92 | -------------------------------------------------------------------------------- /training_dataset/got10k/gen_json_clean.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join, isdir 7 | from os import listdir 8 | import json 9 | import numpy as np 10 | import glob 11 | import cv2 12 | from pathlib import Path 13 | 14 | base_path = '/home/feiji/Research/Data/GOT-10k' 15 | sub_sets= sorted({'train', 'val'}) 16 | 17 | def check_neg(bbox): 18 | x1, y1, x2, y2 = bbox 19 | w, h = x2 - x1, y2 -y1 20 | if w <= 0 or h <= 0: 21 | return False 22 | return True 23 | 24 | def check_size(frame_sz, bbox): 25 | #min_ratio = 0.1 26 | max_ratio = 0.75 27 | # only accept objects >10% and <75% of the total frame 28 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 29 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio) 30 | return ok 31 | 32 | 33 | def check_borders(frame_sz, bbox): 34 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 35 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 36 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 37 | ((frame_sz[1] - bbox[3]) > dist_from_border) 38 | return ok 39 | 40 | def isinvalid(name): 41 | allname = 'GOT-10k_Train_008628' + 'GOT-10k_Train_008630' + 'GOT-10k_Train_009058' + \ 42 | 'GOT-10k_Train_009059' + 'GOT-10k_Train_008633' + 'GOT-10k_Train_008632' + \ 43 | 'GOT-10k_Train_008625' + 'GOT-10k_Train_008623' + 'GOT-10k_Train_008637' + \ 44 | 'GOT-10k_Train_008627' + 'GOT-10k_Train_008629' + 'GOT-10k_Train_008634' + \ 45 | 'GOT-10k_Train_008626' + 'GOT-10k_Train_005996' + 'GOT-10k_Train_004419' 46 | 47 | if allname.find(name) != -1: 48 | return True 49 | return False 50 | 51 | 52 | snippets = dict() 53 | n_snippets = 0 54 | n_videos = 0 55 | for subset in sub_sets: 56 | sub_set_base_path = join(base_path, subset) 57 | videos = sorted(listdir(sub_set_base_path)) 58 | for video in videos: 59 | if not isdir(join(sub_set_base_path, video)): 60 | continue 61 | if isinvalid(video): 62 | continue 63 | n_videos += 1 64 | ground_truth_file = join(sub_set_base_path, video, 'groundtruth.txt') 65 | full_occlusion_file = join(sub_set_base_path, video, 'absence.label') 66 | #cover = join(sub_set_base_path, video, 'cover.label') 67 | gt = np.genfromtxt(ground_truth_file, delimiter=',', dtype=float).astype(np.int) 68 | fo = np.genfromtxt(full_occlusion_file, dtype=int) 69 | subdir_paths = sorted(glob.glob(join(sub_set_base_path, video, '*.jpg'))) 70 | snippets[join(subset, video)] = dict() 71 | snippet = dict() 72 | track_id = 0 73 | valid_num = 0 74 | img = cv2.imread(subdir_paths[0]) 75 | frame_sz = [img.shape[1], img.shape[0]] 76 | for i, img in enumerate(subdir_paths): 77 | info = {} 78 | filename = Path(img).stem 79 | bbox = gt[i] 80 | fo_i = fo[i] 81 | bbox = [int(bbox[0]), int(bbox[1]), int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])] 82 | info['valid'] = 0 83 | if (not fo_i) and check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox): 84 | info['valid'] = 1 85 | valid_num += 1 86 | info['bbox'] = bbox 87 | snippet['{:06d}'.format(int(filename))] = info 88 | #snippet['track_category'] = 0 89 | if valid_num > 1: 90 | snippets[join(subset, video)]['{:02d}'.format(track_id)] = snippet 91 | n_snippets += 1 92 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets)) 93 | 94 | json.dump(snippets, open('train_largeclean.json', 'w'), indent=4, sort_keys=True) 95 | print('done!') 96 | -------------------------------------------------------------------------------- /toolkit/datasets/lasot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class LaSOTVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, absent, load_img=False): 24 | super(LaSOTVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | self.absent = np.array(absent, np.int8) 27 | 28 | def load_tracker(self, path, tracker_names=None, store=True): 29 | """ 30 | Args: 31 | path(str): path to result 32 | tracker_name(list): name of tracker 33 | """ 34 | if not tracker_names: 35 | tracker_names = [x.split('/')[-1] for x in glob(path) 36 | if os.path.isdir(x)] 37 | if isinstance(tracker_names, str): 38 | tracker_names = [tracker_names] 39 | for name in tracker_names: 40 | traj_file = os.path.join(path, name, self.name+'.txt') 41 | if os.path.exists(traj_file): 42 | with open(traj_file, 'r') as f : 43 | pred_traj = [list(map(float, x.strip().split(','))) 44 | for x in f.readlines()] 45 | else: 46 | print("File not exists: ", traj_file) 47 | if self.name == 'monkey-17': 48 | pred_traj = pred_traj[:len(self.gt_traj)] 49 | if store: 50 | self.pred_trajs[name] = pred_traj 51 | else: 52 | return pred_traj 53 | self.tracker_names = list(self.pred_trajs.keys()) 54 | 55 | 56 | 57 | class LaSOTDataset(Dataset): 58 | """ 59 | Args: 60 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 61 | dataset_root: dataset root 62 | load_img: wether to load all imgs 63 | """ 64 | def __init__(self, name, dataset_root, load_img=False): 65 | super(LaSOTDataset, self).__init__(name, dataset_root) 66 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 67 | meta_data = json.load(f) 68 | 69 | # load videos 70 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 71 | self.videos = {} 72 | for video in pbar: 73 | pbar.set_postfix_str(video) 74 | for i, nm in enumerate(meta_data[video]['img_names']): 75 | for_name = nm.split('-')[0] 76 | meta_data[video]['img_names'][i] = os.path.join(for_name, nm) 77 | self.videos[video] = LaSOTVideo(video, 78 | dataset_root, 79 | meta_data[video]['video_dir'], 80 | meta_data[video]['init_rect'], 81 | meta_data[video]['img_names'], 82 | meta_data[video]['gt_rect'], 83 | meta_data[video]['attr'], 84 | meta_data[video]['absent']) 85 | 86 | # set attr 87 | attr = [] 88 | for x in self.videos.values(): 89 | attr += x.attr 90 | attr = set(attr) 91 | self.attr = {} 92 | self.attr['ALL'] = list(self.videos.keys()) 93 | for x in attr: 94 | self.attr[x] = [] 95 | for k, v in self.videos.items(): 96 | for attr_ in v.attr: 97 | self.attr[attr_].append(k) 98 | 99 | 100 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/README.md: -------------------------------------------------------------------------------- 1 | # PreciseRoIPooling 2 | This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation). 3 | 4 | **Acquisition of Localization Confidence for Accurate Object Detection** 5 | 6 | _Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.) 7 | 8 | https://arxiv.org/abs/1807.11590 9 | 10 | ## Brief 11 | 12 | In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is: 13 | 14 | - different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates. 15 | - different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous. 16 | 17 | For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper. 18 | 19 |
20 | 21 | ## Implementation 22 | 23 | PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome. 24 | 25 | ## Usage (PyTorch 1.0) 26 | 27 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented). 28 | Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do: 29 | 30 | ``` 31 | from prroi_pool import PrRoIPool2D 32 | 33 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale) 34 | roi_features = avg_pool(features, rois) 35 | 36 | # for those who want to use the "functional" 37 | 38 | from prroi_pool.functional import prroi_pool2d 39 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale) 40 | ``` 41 | 42 | 43 | ## Usage (PyTorch 0.4) 44 | 45 | **!!! Please first checkout to the branch pytorch0.4.** 46 | 47 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented). 48 | To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do: 49 | 50 | ``` 51 | from prroi_pool import PrRoIPool2D 52 | 53 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale) 54 | roi_features = avg_pool(features, rois) 55 | 56 | # for those who want to use the "functional" 57 | 58 | from prroi_pool.functional import prroi_pool2d 59 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale) 60 | ``` 61 | 62 | Here, 63 | 64 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor. 65 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`. 66 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`. 67 | -------------------------------------------------------------------------------- /toolkit/utils/src/region.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */ 2 | 3 | #ifndef _REGION_H_ 4 | #define _REGION_H_ 5 | 6 | #ifdef TRAX_STATIC_DEFINE 7 | # define __TRAX_EXPORT 8 | #else 9 | # ifndef __TRAX_EXPORT 10 | # if defined(_MSC_VER) 11 | # ifdef trax_EXPORTS 12 | /* We are building this library */ 13 | # define __TRAX_EXPORT __declspec(dllexport) 14 | # else 15 | /* We are using this library */ 16 | # define __TRAX_EXPORT __declspec(dllimport) 17 | # endif 18 | # elif defined(__GNUC__) 19 | # ifdef trax_EXPORTS 20 | /* We are building this library */ 21 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 22 | # else 23 | /* We are using this library */ 24 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 25 | # endif 26 | # endif 27 | # endif 28 | #endif 29 | 30 | #ifndef MAX 31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b)) 32 | #endif 33 | 34 | #ifndef MIN 35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b)) 36 | #endif 37 | 38 | #define TRAX_DEFAULT_CODE 0 39 | 40 | #define REGION_LEGACY_RASTERIZATION 1 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type; 47 | 48 | typedef struct region_bounds { 49 | 50 | float top; 51 | float bottom; 52 | float left; 53 | float right; 54 | 55 | } region_bounds; 56 | 57 | typedef struct region_polygon { 58 | 59 | int count; 60 | 61 | float* x; 62 | float* y; 63 | 64 | } region_polygon; 65 | 66 | typedef struct region_mask { 67 | 68 | int x; 69 | int y; 70 | 71 | int width; 72 | int height; 73 | 74 | char* data; 75 | 76 | } region_mask; 77 | 78 | typedef struct region_rectangle { 79 | 80 | float x; 81 | float y; 82 | float width; 83 | float height; 84 | 85 | } region_rectangle; 86 | 87 | typedef struct region_container { 88 | enum region_type type; 89 | union { 90 | region_rectangle rectangle; 91 | region_polygon polygon; 92 | region_mask mask; 93 | int special; 94 | } data; 95 | } region_container; 96 | 97 | typedef struct region_overlap { 98 | 99 | float overlap; 100 | float only1; 101 | float only2; 102 | 103 | } region_overlap; 104 | 105 | extern const region_bounds region_no_bounds; 106 | 107 | __TRAX_EXPORT int region_set_flags(int mask); 108 | 109 | __TRAX_EXPORT int region_clear_flags(int mask); 110 | 111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds); 112 | 113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds); 114 | 115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom); 116 | 117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region); 118 | 119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region); 120 | 121 | __TRAX_EXPORT char* region_string(region_container* region); 122 | 123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region); 124 | 125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type); 126 | 127 | __TRAX_EXPORT void region_release(region_container** region); 128 | 129 | __TRAX_EXPORT region_container* region_create_special(int code); 130 | 131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height); 132 | 133 | __TRAX_EXPORT region_container* region_create_polygon(int count); 134 | 135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y); 136 | 137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height); 138 | 139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height); 140 | 141 | #ifdef __cplusplus 142 | } 143 | #endif 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /training_dataset/coco/pycocotools/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /training_dataset/vid/gen_json_clean.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join 7 | from os import listdir 8 | import json 9 | import numpy as np 10 | 11 | print('load json (raw vid info), please wait 20 seconds~') 12 | vid = json.load(open('vid.json', 'r')) 13 | 14 | def check_neg(bbox): 15 | x1, y1, x2, y2 = bbox 16 | w, h = x2 - x1, y2 -y1 17 | if w <= 0 or h <= 0: 18 | return False 19 | return True 20 | 21 | def check_size(frame_sz, bbox): 22 | #min_ratio = 0.1 23 | max_ratio = 0.75 24 | # only accept objects >10% and <75% of the total frame 25 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 26 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio) 27 | return ok 28 | 29 | 30 | def check_borders(frame_sz, bbox): 31 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 32 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 33 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 34 | ((frame_sz[1] - bbox[3]) > dist_from_border) 35 | return ok 36 | 37 | 38 | snippets = dict() 39 | n_snippets = 0 40 | n_videos = 0 41 | for subset in vid: 42 | for video in subset: 43 | n_videos += 1 44 | frames = video['frame'] 45 | id_set = [] 46 | id_frames = [[]] * 60 # at most 60 objects 47 | for f, frame in enumerate(frames): 48 | objs = frame['objs'] 49 | frame_sz = frame['frame_sz'] 50 | for obj in objs: 51 | trackid = obj['trackid'] 52 | occluded = obj['occ'] 53 | bbox = obj['bbox'] 54 | # if occluded: 55 | # continue 56 | # 57 | obj['valid'] = 0 58 | if not(occluded) and check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox): 59 | obj['valid'] = 1 60 | # 61 | # if obj['c'] in ['n01674464', 'n01726692', 'n04468005', 'n02062744']: 62 | # continue 63 | 64 | if trackid not in id_set: 65 | id_set.append(trackid) 66 | id_frames[trackid] = [] 67 | id_frames[trackid].append(f) 68 | if len(id_set) > 0: 69 | snippets[video['base_path']] = dict() 70 | for selected in id_set: 71 | frame_ids = sorted(id_frames[selected]) 72 | sequences = np.split(frame_ids, np.array(np.where(np.diff(frame_ids) > 1)[0]) + 1) 73 | sequences = [s for s in sequences if len(s) > 1] # remove isolated frame. 74 | for seq in sequences: 75 | snippet = dict() 76 | valid_num = 0 77 | for frame_id in seq: 78 | info = {} 79 | frame = frames[frame_id] 80 | for obj in frame['objs']: 81 | if obj['trackid'] == selected: 82 | o = obj 83 | continue 84 | info['bbox'] = o['bbox'] 85 | info['valid'] = o['valid'] 86 | if o['valid'] == 1: 87 | valid_num+=1 88 | snippet[frame['img_path'].split('.')[0]] = info 89 | #snippet['track_category'] = o['c'] 90 | if valid_num > 0: 91 | snippets[video['base_path']]['{:02d}'.format(selected)] = snippet 92 | n_snippets += 1 93 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets)) 94 | 95 | train = {k:v for (k,v) in snippets.items() if 'train' in k} 96 | val = {k:v for (k,v) in snippets.items() if 'val' in k} 97 | 98 | json.dump(train, open('train_largeclean.json', 'w'), indent=4, sort_keys=True) 99 | json.dump(val, open('val_largeclean.json', 'w'), indent=4, sort_keys=True) 100 | print('done!') 101 | -------------------------------------------------------------------------------- /models/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.c 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | #include "prroi_pooling_gpu_impl.cuh" 20 | 21 | 22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) { 23 | int nr_rois = rois.size(0); 24 | int nr_channels = features.size(1); 25 | int height = features.size(2); 26 | int width = features.size(3); 27 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 28 | auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options()); 29 | 30 | if (output.numel() == 0) { 31 | THCudaCheck(cudaGetLastError()); 32 | return output; 33 | } 34 | 35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 36 | PrRoIPoolingForwardGpu( 37 | stream, features.data(), rois.data(), output.data(), 38 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 39 | top_count 40 | ); 41 | 42 | THCudaCheck(cudaGetLastError()); 43 | return output; 44 | } 45 | 46 | at::Tensor prroi_pooling_backward_cuda( 47 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 48 | int pooled_height, int pooled_width, float spatial_scale) { 49 | 50 | auto features_diff = at::zeros_like(features); 51 | 52 | int nr_rois = rois.size(0); 53 | int batch_size = features.size(0); 54 | int nr_channels = features.size(1); 55 | int height = features.size(2); 56 | int width = features.size(3); 57 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 58 | int bottom_count = batch_size * nr_channels * height * width; 59 | 60 | if (output.numel() == 0) { 61 | THCudaCheck(cudaGetLastError()); 62 | return features_diff; 63 | } 64 | 65 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 66 | PrRoIPoolingBackwardGpu( 67 | stream, 68 | features.data(), rois.data(), output.data(), output_diff.data(), 69 | features_diff.data(), 70 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 71 | top_count, bottom_count 72 | ); 73 | 74 | THCudaCheck(cudaGetLastError()); 75 | return features_diff; 76 | } 77 | 78 | at::Tensor prroi_pooling_coor_backward_cuda( 79 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 80 | int pooled_height, int pooled_width, float spatial_scale) { 81 | 82 | auto coor_diff = at::zeros_like(rois); 83 | 84 | int nr_rois = rois.size(0); 85 | int nr_channels = features.size(1); 86 | int height = features.size(2); 87 | int width = features.size(3); 88 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 89 | int bottom_count = nr_rois * 5; 90 | 91 | if (output.numel() == 0) { 92 | THCudaCheck(cudaGetLastError()); 93 | return coor_diff; 94 | } 95 | 96 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 97 | PrRoIPoolingCoorBackwardGpu( 98 | stream, 99 | features.data(), rois.data(), output.data(), output_diff.data(), 100 | coor_diff.data(), 101 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 102 | top_count, bottom_count 103 | ); 104 | 105 | THCudaCheck(cudaGetLastError()); 106 | return coor_diff; 107 | } 108 | 109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 110 | m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward"); 111 | m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward"); 112 | m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor"); 113 | } 114 | -------------------------------------------------------------------------------- /models/siamese/siamese.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | def xcorr_depthwise(x, kernel): 9 | """depthwise cross correlation 10 | """ 11 | batch = kernel.size(0) 12 | channel = kernel.size(1) 13 | x = x.view(1, batch*channel, x.size(2), x.size(3)) 14 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3)) 15 | out = F.conv2d(x, kernel, groups=batch*channel) 16 | out = out.view(batch, channel, out.size(2), out.size(3)) 17 | return out 18 | 19 | 20 | def xcorr_up(x, kernel): 21 | batch_sz = kernel.shape[0] 22 | kernel = kernel.reshape(-1, x.shape[1], 23 | kernel.shape[2], kernel.shape[3]) 24 | out = F.conv2d( 25 | x.reshape(1, -1, x.shape[2], x.shape[3]), kernel, groups=batch_sz) 26 | out = out.reshape(batch_sz, -1, out.shape[2], out.shape[3]) 27 | return out 28 | 29 | 30 | class UPXCorr(nn.Module): 31 | def __init__(self, out_channels, adjust, feat_in=256, feat_out=256): 32 | super(UPXCorr, self).__init__() 33 | self.conv_kernel = nn.Conv2d(feat_in, feat_out * out_channels, 3) 34 | self.conv_search = nn.Conv2d(feat_in, feat_out, 3) 35 | if adjust: 36 | self.adjust = nn.Conv2d(out_channels, out_channels, 1) 37 | else: 38 | self.adjust = lambda x: x 39 | 40 | def forward(self, z_f, x_f): 41 | kernel = self.conv_kernel(z_f) 42 | search = self.conv_search(x_f) 43 | out = xcorr_up(search, kernel) 44 | return self.adjust(out) 45 | 46 | 47 | class DepthwiseXCorr(nn.Module): 48 | def __init__(self, feat_in=256, feat_out=256, out_channels=1, kernel_size=3): 49 | super(DepthwiseXCorr, self).__init__() 50 | self.conv_kernel = nn.Sequential( 51 | nn.Conv2d(feat_in, feat_out, kernel_size=kernel_size, bias=False), 52 | nn.BatchNorm2d(feat_out), 53 | nn.ReLU(inplace=True), 54 | ) 55 | self.conv_search = nn.Sequential( 56 | nn.Conv2d(feat_in, feat_out, kernel_size=kernel_size, bias=False), 57 | nn.BatchNorm2d(feat_out), 58 | nn.ReLU(inplace=True), 59 | ) 60 | self.head = nn.Sequential( 61 | nn.Conv2d(feat_out, feat_out, kernel_size=1, bias=False), 62 | nn.BatchNorm2d(feat_out), 63 | nn.ReLU(inplace=True), 64 | nn.Conv2d(feat_out, out_channels, kernel_size=1) 65 | ) 66 | self.kernel = None 67 | 68 | def forward(self, kernel, search): 69 | kernel = self.conv_kernel(kernel) 70 | search = self.conv_search(search) 71 | feature = xcorr_depthwise(search, kernel) 72 | out = self.head(feature) 73 | return out 74 | 75 | 76 | class Siamese(nn.Module): 77 | def __init__(self): 78 | super(Siamese, self).__init__() 79 | 80 | def forward(self, z_f, x_f): 81 | raise NotImplementedError 82 | 83 | 84 | def normal_init(m, mean, stddev): 85 | m.weight.data.normal_(mean, stddev) 86 | m.bias.data.zero_() 87 | 88 | 89 | class UPChannelSiamese(Siamese): 90 | def __init__(self, feat_in=256, feature_out=256): 91 | super(UPChannelSiamese, self).__init__() 92 | self.cls = UPXCorr(1, False, feat_in, feature_out) 93 | self.loc = UPXCorr(4, True, feat_in, feature_out) 94 | for m in self.modules(): 95 | if isinstance(m, nn.Conv2d): 96 | normal_init(m, 0, 0.001) 97 | 98 | def forward(self, z_f, x_f): 99 | loc = self.loc(z_f[:,:,4:-4,4:-4], x_f) 100 | cls = self.cls(z_f[:,:,4:-4,4:-4], x_f) 101 | return loc, cls 102 | 103 | 104 | class DepthwiseSiamese(Siamese): 105 | def __init__(self, feat_in=256, feature_out=256): 106 | super(DepthwiseSiamese, self).__init__() 107 | self.cls = DepthwiseXCorr(feat_in, feature_out, 1) 108 | self.loc = DepthwiseXCorr(feat_in, feature_out, 4) 109 | 110 | for m in self.modules(): 111 | if isinstance(m, nn.Conv2d): 112 | nn.init.kaiming_normal_( 113 | m.weight.data, mode='fan_out', nonlinearity='relu') 114 | elif isinstance(m, nn.BatchNorm2d): 115 | m.weight.data.fill_(1.0) 116 | m.bias.data.zero_() 117 | 118 | def forward(self, z_f, x_f): 119 | cls = self.cls(z_f, x_f) 120 | loc = self.loc(z_f, x_f) 121 | return loc, cls -------------------------------------------------------------------------------- /models/cornerdet/cornerdet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | class CornerDet(nn.Module): 8 | def __init__(self): 9 | super(CornerDet, self).__init__() 10 | 11 | def forward(self, x_f): 12 | raise NotImplementedError 13 | 14 | 15 | class SepCornerDet(CornerDet): 16 | def __init__(self, feat_in=256): 17 | super(SepCornerDet, self).__init__() 18 | 19 | self.up0_l = nn.Sequential( 20 | nn.Conv2d(feat_in, 256, 3, padding=1), 21 | nn.BatchNorm2d(256), 22 | nn.ReLU(inplace=True), 23 | nn.Conv2d(256, 64, 1), 24 | nn.BatchNorm2d(64), 25 | nn.ReLU(inplace=True), 26 | ) 27 | 28 | self.up1_l = nn.Sequential( 29 | nn.Conv2d(64, 64, 3, padding=1), 30 | nn.BatchNorm2d(64), 31 | nn.ReLU(inplace=True), 32 | nn.Conv2d(64, 32, 1), 33 | nn.BatchNorm2d(32), 34 | nn.ReLU(inplace=True), 35 | ) 36 | 37 | self.up2_l = nn.Sequential( 38 | nn.Conv2d(32, 32, 3, padding=1), 39 | nn.BatchNorm2d(32), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2d(32, 1, 1), 42 | ) 43 | 44 | self.up0_r = nn.Sequential( 45 | nn.Conv2d(feat_in, 256, 3, padding=1), 46 | nn.BatchNorm2d(256), 47 | nn.ReLU(inplace=True), 48 | nn.Conv2d(256, 64, 1), 49 | nn.BatchNorm2d(64), 50 | nn.ReLU(inplace=True), 51 | ) 52 | 53 | self.up1_r = nn.Sequential( 54 | nn.Conv2d(64, 64, 3, padding=1), 55 | nn.BatchNorm2d(64), 56 | nn.ReLU(inplace=True), 57 | nn.Conv2d(64, 32, 1), 58 | nn.BatchNorm2d(32), 59 | nn.ReLU(inplace=True), 60 | ) 61 | 62 | self.up2_r = nn.Sequential( 63 | nn.Conv2d(32, 32, 3, padding=1), 64 | nn.BatchNorm2d(32), 65 | nn.ReLU(inplace=True), 66 | nn.Conv2d(32, 1, 1), 67 | ) 68 | 69 | for m in self.modules(): 70 | if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.Linear)): 71 | nn.init.kaiming_normal_( 72 | m.weight.data, mode='fan_out', nonlinearity='relu') 73 | elif isinstance(m, nn.BatchNorm2d): 74 | m.weight.data.fill_(1.0) 75 | m.bias.data.zero_() 76 | 77 | def forward(self, x_f): 78 | x_f_l, x_f_r = x_f 79 | x_f_l = self.up0_l(x_f_l) 80 | resolution = x_f_l.shape[-1] 81 | x_f_l = self.up1_l(F.interpolate( 82 | x_f_l, size=(resolution*2+1, resolution*2+1))) 83 | resolution = x_f_l.shape[-1] 84 | heat_map_l = self.up2_l(F.interpolate( 85 | x_f_l, size=(resolution*2+1, resolution*2+1))) 86 | 87 | batch_sz = x_f_l.shape[0] 88 | left_top_map = F.softmax(heat_map_l.squeeze().reshape(batch_sz, -1), 1).reshape( 89 | batch_sz, heat_map_l.shape[-2], heat_map_l.shape[-1]) 90 | 91 | x_f_r = self.up0_r(x_f_r) 92 | resolution = x_f_r.shape[-1] 93 | x_f_r = self.up1_r(F.interpolate( 94 | x_f_r, size=(resolution*2+1, resolution*2+1))) 95 | resolution = x_f_r.shape[-1] 96 | heat_map_r = self.up2_r(F.interpolate( 97 | x_f_r, size=(resolution*2+1, resolution*2+1))) 98 | batch_sz = x_f_r.shape[0] 99 | right_bottom_map = F.softmax(heat_map_r.squeeze().reshape(batch_sz, -1), 1).reshape( 100 | batch_sz, heat_map_r.shape[-2], heat_map_r.shape[-1]) 101 | 102 | heatmap_size = left_top_map.shape[-1] 103 | xx, yy = np.meshgrid([dx for dx in range(int(heatmap_size))], 104 | [dy for dy in range(int(heatmap_size))]) 105 | heatmap_xx = torch.from_numpy(xx).float().cuda() 106 | heatmap_yy = torch.from_numpy(yy).float().cuda() 107 | 108 | x1 = ((left_top_map * heatmap_xx).sum(-1).sum(-1) / 109 | heatmap_xx.shape[-1]).reshape(-1, 1) 110 | y1 = ((left_top_map * heatmap_yy).sum(-1).sum(-1) / 111 | heatmap_xx.shape[-2]).reshape(-1, 1) 112 | x2 = ((right_bottom_map * heatmap_xx).sum(-1).sum(-1) / 113 | heatmap_xx.shape[-1]).reshape(-1, 1) 114 | y2 = ((right_bottom_map * heatmap_yy).sum(-1).sum(-1) / 115 | heatmap_xx.shape[-2]).reshape(-1, 1) 116 | 117 | result_target = torch.cat((x1, y1, x2, y2), 1) 118 | 119 | return result_target, left_top_map.shape[-1] -------------------------------------------------------------------------------- /toolkit/datasets/otb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | class OTBVideo(Video): 14 | """ 15 | Args: 16 | name: video name 17 | root: dataset root 18 | video_dir: video directory 19 | init_rect: init rectangle 20 | img_names: image names 21 | gt_rect: groundtruth rectangle 22 | attr: attribute of video 23 | """ 24 | def __init__(self, name, root, video_dir, init_rect, img_names, 25 | gt_rect, attr, load_img=False): 26 | super(OTBVideo, self).__init__(name, root, video_dir, 27 | init_rect, img_names, gt_rect, attr, load_img) 28 | 29 | def load_tracker(self, path, tracker_names=None, store=True): 30 | """ 31 | Args: 32 | path(str): path to result 33 | tracker_name(list): name of tracker 34 | """ 35 | if not tracker_names: 36 | tracker_names = [x.split('/')[-1] for x in glob(path) 37 | if os.path.isdir(x)] 38 | if isinstance(tracker_names, str): 39 | tracker_names = [tracker_names] 40 | for name in tracker_names: 41 | traj_file = os.path.join(path, name, self.name+'.txt') 42 | if not os.path.exists(traj_file): 43 | if self.name == 'FleetFace': 44 | txt_name = 'fleetface.txt' 45 | elif self.name == 'Jogging-1': 46 | txt_name = 'jogging_1.txt' 47 | elif self.name == 'Jogging-2': 48 | txt_name = 'jogging_2.txt' 49 | elif self.name == 'Skating2-1': 50 | txt_name = 'skating2_1.txt' 51 | elif self.name == 'Skating2-2': 52 | txt_name = 'skating2_2.txt' 53 | elif self.name == 'FaceOcc1': 54 | txt_name = 'faceocc1.txt' 55 | elif self.name == 'FaceOcc2': 56 | txt_name = 'faceocc2.txt' 57 | elif self.name == 'Human4-2': 58 | txt_name = 'human4_2.txt' 59 | else: 60 | txt_name = self.name[0].lower()+self.name[1:]+'.txt' 61 | traj_file = os.path.join(path, name, txt_name) 62 | if os.path.exists(traj_file): 63 | with open(traj_file, 'r') as f : 64 | pred_traj = [list(map(float, x.strip().split(','))) 65 | for x in f.readlines()] 66 | if len(pred_traj) != len(self.gt_traj): 67 | print(name, len(pred_traj), len(self.gt_traj), self.name) 68 | if store: 69 | self.pred_trajs[name] = pred_traj 70 | else: 71 | return pred_traj 72 | else: 73 | print(traj_file) 74 | self.tracker_names = list(self.pred_trajs.keys()) 75 | 76 | 77 | 78 | class OTBDataset(Dataset): 79 | """ 80 | Args: 81 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 82 | dataset_root: dataset root 83 | load_img: wether to load all imgs 84 | """ 85 | def __init__(self, name, dataset_root, load_img=False): 86 | super(OTBDataset, self).__init__(name, dataset_root) 87 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 88 | meta_data = json.load(f) 89 | 90 | # load videos 91 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 92 | self.videos = {} 93 | for video in pbar: 94 | pbar.set_postfix_str(video) 95 | self.videos[video] = OTBVideo(video, 96 | dataset_root, 97 | meta_data[video]['video_dir'], 98 | meta_data[video]['init_rect'], 99 | meta_data[video]['img_names'], 100 | meta_data[video]['gt_rect'], 101 | meta_data[video]['attr'], 102 | load_img) 103 | 104 | # set attr 105 | attr = [] 106 | for x in self.videos.values(): 107 | attr += x.attr 108 | attr = set(attr) 109 | self.attr = {} 110 | self.attr['ALL'] = list(self.videos.keys()) 111 | for x in attr: 112 | self.attr[x] = [] 113 | for k, v in self.videos.items(): 114 | for attr_ in v.attr: 115 | self.attr[attr_].append(k) 116 | -------------------------------------------------------------------------------- /training_dataset/coco/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | #import pycocotools._mask as _mask 4 | from . import _mask 5 | 6 | # Interface for manipulating masks stored in RLE format. 7 | # 8 | # RLE is a simple yet efficient format for storing binary masks. RLE 9 | # first divides a vector (or vectorized image) into a series of piecewise 10 | # constant regions and then for each piece simply stores the length of 11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 13 | # (note that the odd counts are always the numbers of zeros). Instead of 14 | # storing the counts directly, additional compression is achieved with a 15 | # variable bitrate representation based on a common scheme called LEB128. 16 | # 17 | # Compression is greatest given large piecewise constant regions. 18 | # Specifically, the size of the RLE is proportional to the number of 19 | # *boundaries* in M (or for an image the number of boundaries in the y 20 | # direction). Assuming fairly simple shapes, the RLE representation is 21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 22 | # is substantially lower, especially for large simple objects (large n). 23 | # 24 | # Many common operations on masks can be computed directly using the RLE 25 | # (without need for decoding). This includes computations such as area, 26 | # union, intersection, etc. All of these operations are linear in the 27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 28 | # of the object. Computing these operations on the original mask is O(n). 29 | # Thus, using the RLE can result in substantial computational savings. 30 | # 31 | # The following API functions are defined: 32 | # encode - Encode binary masks using RLE. 33 | # decode - Decode binary masks encoded via RLE. 34 | # merge - Compute union or intersection of encoded masks. 35 | # iou - Compute intersection over union between masks. 36 | # area - Compute area of encoded masks. 37 | # toBbox - Get bounding boxes surrounding encoded masks. 38 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 39 | # 40 | # Usage: 41 | # Rs = encode( masks ) 42 | # masks = decode( Rs ) 43 | # R = merge( Rs, intersect=false ) 44 | # o = iou( dt, gt, iscrowd ) 45 | # a = area( Rs ) 46 | # bbs = toBbox( Rs ) 47 | # Rs = frPyObjects( [pyObjects], h, w ) 48 | # 49 | # In the API the following formats are used: 50 | # Rs - [dict] Run-length encoding of binary masks 51 | # R - dict Run-length encoding of binary mask 52 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 53 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 54 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 55 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 56 | # dt,gt - May be either bounding boxes or encoded masks 57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 58 | # 59 | # Finally, a note about the intersection over union (iou) computation. 60 | # The standard iou of a ground truth (gt) and detected (dt) object is 61 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 62 | # For "crowd" regions, we use a modified criteria. If a gt object is 63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 64 | # Choosing gt' in the crowd gt that best matches the dt can be done using 65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 66 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 67 | # For crowd gt regions we use this modified criteria above for the iou. 68 | # 69 | # To compile run "python setup.py build_ext --inplace" 70 | # Please do not contact us for help with compiling. 71 | # 72 | # Microsoft COCO Toolbox. version 2.0 73 | # Data, paper, and tutorials available at: http://mscoco.org/ 74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 75 | # Licensed under the Simplified BSD License [see coco/license.txt] 76 | 77 | iou = _mask.iou 78 | merge = _mask.merge 79 | frPyObjects = _mask.frPyObjects 80 | 81 | def encode(bimask): 82 | if len(bimask.shape) == 3: 83 | return _mask.encode(bimask) 84 | elif len(bimask.shape) == 2: 85 | h, w = bimask.shape 86 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 87 | 88 | def decode(rleObjs): 89 | if type(rleObjs) == list: 90 | return _mask.decode(rleObjs) 91 | else: 92 | return _mask.decode([rleObjs])[:,:,0] 93 | 94 | def area(rleObjs): 95 | if type(rleObjs) == list: 96 | return _mask.area(rleObjs) 97 | else: 98 | return _mask.area([rleObjs])[0] 99 | 100 | def toBbox(rleObjs): 101 | if type(rleObjs) == list: 102 | return _mask.toBbox(rleObjs) 103 | else: 104 | return _mask.toBbox([rleObjs])[0] 105 | -------------------------------------------------------------------------------- /toolkit/utils/src/buffer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __STRING_BUFFER_H 3 | #define __STRING_BUFFER_H 4 | 5 | // Enable MinGW secure API for _snprintf_s 6 | #define MINGW_HAS_SECURE_API 1 7 | 8 | #ifdef _MSC_VER 9 | #define __INLINE __inline 10 | #else 11 | #define __INLINE inline 12 | #endif 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | typedef struct string_buffer { 19 | char* buffer; 20 | int position; 21 | int size; 22 | } string_buffer; 23 | 24 | typedef struct string_list { 25 | char** buffer; 26 | int position; 27 | int size; 28 | } string_list; 29 | 30 | #define BUFFER_INCREMENT_STEP 4096 31 | 32 | static __INLINE string_buffer* buffer_create(int L) { 33 | string_buffer* B = (string_buffer*) malloc(sizeof(string_buffer)); 34 | B->size = L; 35 | B->buffer = (char*) malloc(sizeof(char) * B->size); 36 | B->position = 0; 37 | return B; 38 | } 39 | 40 | static __INLINE void buffer_reset(string_buffer* B) { 41 | B->position = 0; 42 | } 43 | 44 | static __INLINE void buffer_destroy(string_buffer** B) { 45 | if (!(*B)) return; 46 | if ((*B)->buffer) { 47 | free((*B)->buffer); 48 | (*B)->buffer = NULL; 49 | } 50 | free((*B)); 51 | (*B) = NULL; 52 | } 53 | 54 | static __INLINE char* buffer_extract(const string_buffer* B) { 55 | char *S = (char*) malloc(sizeof(char) * (B->position + 1)); 56 | memcpy(S, B->buffer, B->position); 57 | S[B->position] = '\0'; 58 | return S; 59 | } 60 | 61 | static __INLINE int buffer_size(const string_buffer* B) { 62 | return B->position; 63 | } 64 | 65 | static __INLINE void buffer_push(string_buffer* B, char C) { 66 | int required = 1; 67 | if (required > B->size - B->position) { 68 | B->size = B->position + BUFFER_INCREMENT_STEP; 69 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 70 | } 71 | B->buffer[B->position] = C; 72 | B->position += required; 73 | } 74 | 75 | static __INLINE void buffer_append(string_buffer* B, const char *format, ...) { 76 | 77 | int required; 78 | va_list args; 79 | 80 | #if defined(__OS2__) || defined(__WINDOWS__) || defined(WIN32) || defined(_MSC_VER) 81 | 82 | va_start(args, format); 83 | required = _vscprintf(format, args) + 1; 84 | va_end(args); 85 | if (required >= B->size - B->position) { 86 | B->size = B->position + required + 1; 87 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 88 | } 89 | va_start(args, format); 90 | required = _vsnprintf_s(&(B->buffer[B->position]), B->size - B->position, _TRUNCATE, format, args); 91 | va_end(args); 92 | B->position += required; 93 | 94 | #else 95 | va_start(args, format); 96 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args); 97 | va_end(args); 98 | if (required >= B->size - B->position) { 99 | B->size = B->position + required + 1; 100 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 101 | va_start(args, format); 102 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args); 103 | va_end(args); 104 | } 105 | B->position += required; 106 | #endif 107 | 108 | } 109 | 110 | static __INLINE string_list* list_create(int L) { 111 | string_list* B = (string_list*) malloc(sizeof(string_list)); 112 | B->size = L; 113 | B->buffer = (char**) malloc(sizeof(char*) * B->size); 114 | memset(B->buffer, 0, sizeof(char*) * B->size); 115 | B->position = 0; 116 | return B; 117 | } 118 | 119 | static __INLINE void list_reset(string_list* B) { 120 | int i; 121 | for (i = 0; i < B->position; i++) { 122 | if (B->buffer[i]) free(B->buffer[i]); 123 | B->buffer[i] = NULL; 124 | } 125 | B->position = 0; 126 | } 127 | 128 | static __INLINE void list_destroy(string_list **B) { 129 | int i; 130 | 131 | if (!(*B)) return; 132 | 133 | for (i = 0; i < (*B)->position; i++) { 134 | if ((*B)->buffer[i]) free((*B)->buffer[i]); (*B)->buffer[i] = NULL; 135 | } 136 | 137 | if ((*B)->buffer) { 138 | free((*B)->buffer); (*B)->buffer = NULL; 139 | } 140 | 141 | free((*B)); 142 | (*B) = NULL; 143 | } 144 | 145 | static __INLINE char* list_get(const string_list *B, int I) { 146 | if (I < 0 || I >= B->position) { 147 | return NULL; 148 | } else { 149 | if (!B->buffer[I]) { 150 | return NULL; 151 | } else { 152 | char *S; 153 | int length = strlen(B->buffer[I]); 154 | S = (char*) malloc(sizeof(char) * (length + 1)); 155 | memcpy(S, B->buffer[I], length + 1); 156 | return S; 157 | } 158 | } 159 | } 160 | 161 | static __INLINE int list_size(const string_list *B) { 162 | return B->position; 163 | } 164 | 165 | static __INLINE void list_append(string_list *B, char* S) { 166 | int required = 1; 167 | int length = strlen(S); 168 | if (required > B->size - B->position) { 169 | B->size = B->position + 16; 170 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size); 171 | } 172 | B->buffer[B->position] = (char*) malloc(sizeof(char) * (length + 1)); 173 | memcpy(B->buffer[B->position], S, length + 1); 174 | B->position += required; 175 | } 176 | 177 | // This version of the append does not copy the string but simply takes the control of its allocation 178 | static __INLINE void list_append_direct(string_list *B, char* S) { 179 | int required = 1; 180 | // int length = strlen(S); 181 | if (required > B->size - B->position) { 182 | B->size = B->position + 16; 183 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size); 184 | } 185 | B->buffer[B->position] = S; 186 | B->position += required; 187 | } 188 | 189 | 190 | #endif 191 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_success_precision.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from .draw_utils import COLOR, LINE_STYLE 5 | 6 | def draw_success_precision(success_ret, name, videos, attr, precision_ret=None, 7 | norm_precision_ret=None, bold_name=None, axis=[0, 1]): 8 | # success plot 9 | fig, ax = plt.subplots() 10 | ax.grid(b=True) 11 | ax.set_aspect(1) 12 | plt.xlabel('Overlap threshold') 13 | plt.ylabel('Success rate') 14 | if attr == 'ALL': 15 | plt.title(r'\textbf{Success plots of OPE on %s}' % (name)) 16 | else: 17 | plt.title(r'\textbf{Success plots of OPE - %s}' % (attr)) 18 | plt.axis([0, 1]+axis) 19 | success = {} 20 | thresholds = np.arange(0, 1.05, 0.05) 21 | for tracker_name in success_ret.keys(): 22 | value = [v for k, v in success_ret[tracker_name].items() if k in videos] 23 | success[tracker_name] = np.mean(value) 24 | for idx, (tracker_name, auc) in \ 25 | enumerate(sorted(success.items(), key=lambda x:x[1], reverse=True)): 26 | if tracker_name == bold_name: 27 | label = r"\textbf{[%.3f] %s}" % (auc, tracker_name) 28 | else: 29 | label = "[%.3f] " % (auc) + tracker_name 30 | value = [v for k, v in success_ret[tracker_name].items() if k in videos] 31 | plt.plot(thresholds, np.mean(value, axis=0), 32 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 33 | ax.legend(loc='lower left', labelspacing=0.2) 34 | ax.autoscale(enable=True, axis='both', tight=True) 35 | xmin, xmax, ymin, ymax = plt.axis() 36 | ax.autoscale(enable=False) 37 | ymax += 0.03 38 | plt.axis([xmin, xmax, ymin, ymax]) 39 | plt.xticks(np.arange(xmin, xmax+0.01, 0.1)) 40 | plt.yticks(np.arange(ymin, ymax, 0.1)) 41 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 42 | plt.show() 43 | 44 | if precision_ret: 45 | # norm precision plot 46 | fig, ax = plt.subplots() 47 | ax.grid(b=True) 48 | ax.set_aspect(50) 49 | plt.xlabel('Location error threshold') 50 | plt.ylabel('Precision') 51 | if attr == 'ALL': 52 | plt.title(r'\textbf{Precision plots of OPE on %s}' % (name)) 53 | else: 54 | plt.title(r'\textbf{Precision plots of OPE - %s}' % (attr)) 55 | plt.axis([0, 50]+axis) 56 | precision = {} 57 | thresholds = np.arange(0, 51, 1) 58 | for tracker_name in precision_ret.keys(): 59 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos] 60 | precision[tracker_name] = np.mean(value, axis=0)[20] 61 | for idx, (tracker_name, pre) in \ 62 | enumerate(sorted(precision.items(), key=lambda x:x[1], reverse=True)): 63 | if tracker_name == bold_name: 64 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name) 65 | else: 66 | label = "[%.3f] " % (pre) + tracker_name 67 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos] 68 | plt.plot(thresholds, np.mean(value, axis=0), 69 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 70 | ax.legend(loc='lower right', labelspacing=0.2) 71 | ax.autoscale(enable=True, axis='both', tight=True) 72 | xmin, xmax, ymin, ymax = plt.axis() 73 | ax.autoscale(enable=False) 74 | ymax += 0.03 75 | plt.axis([xmin, xmax, ymin, ymax]) 76 | plt.xticks(np.arange(xmin, xmax+0.01, 5)) 77 | plt.yticks(np.arange(ymin, ymax, 0.1)) 78 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 79 | plt.show() 80 | 81 | # norm precision plot 82 | if norm_precision_ret: 83 | fig, ax = plt.subplots() 84 | ax.grid(b=True) 85 | plt.xlabel('Location error threshold') 86 | plt.ylabel('Precision') 87 | if attr == 'ALL': 88 | plt.title(r'\textbf{Normalized Precision plots of OPE on %s}' % (name)) 89 | else: 90 | plt.title(r'\textbf{Normalized Precision plots of OPE - %s}' % (attr)) 91 | norm_precision = {} 92 | thresholds = np.arange(0, 51, 1) / 100 93 | for tracker_name in precision_ret.keys(): 94 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos] 95 | norm_precision[tracker_name] = np.mean(value, axis=0)[20] 96 | for idx, (tracker_name, pre) in \ 97 | enumerate(sorted(norm_precision.items(), key=lambda x:x[1], reverse=True)): 98 | if tracker_name == bold_name: 99 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name) 100 | else: 101 | label = "[%.3f] " % (pre) + tracker_name 102 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos] 103 | plt.plot(thresholds, np.mean(value, axis=0), 104 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 105 | ax.legend(loc='lower right', labelspacing=0.2) 106 | ax.autoscale(enable=True, axis='both', tight=True) 107 | xmin, xmax, ymin, ymax = plt.axis() 108 | ax.autoscale(enable=False) 109 | ymax += 0.03 110 | plt.axis([xmin, xmax, ymin, ymax]) 111 | plt.xticks(np.arange(xmin, xmax+0.01, 0.05)) 112 | plt.yticks(np.arange(ymin, ymax, 0.1)) 113 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 114 | plt.show() 115 | -------------------------------------------------------------------------------- /toolkit/datasets/video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import re 4 | import numpy as np 5 | import json 6 | 7 | from glob import glob 8 | 9 | class Video(object): 10 | def __init__(self, name, root, video_dir, init_rect, img_names, 11 | gt_rect, attr, load_img=False): 12 | self.name = name 13 | self.video_dir = video_dir 14 | self.init_rect = init_rect 15 | self.gt_traj = gt_rect 16 | self.attr = attr 17 | self.pred_trajs = {} 18 | self.img_names = [os.path.join(root, x) for x in img_names] 19 | self.imgs = None 20 | 21 | if load_img: 22 | self.imgs = [cv2.imread(x) for x in self.img_names] 23 | self.width = self.imgs[0].shape[1] 24 | self.height = self.imgs[0].shape[0] 25 | else: 26 | img = cv2.imread(self.img_names[0]) 27 | assert img is not None, self.img_names[0] 28 | self.width = img.shape[1] 29 | self.height = img.shape[0] 30 | 31 | def load_tracker(self, path, tracker_names=None, store=True): 32 | """ 33 | Args: 34 | path(str): path to result 35 | tracker_name(list): name of tracker 36 | """ 37 | if not tracker_names: 38 | tracker_names = [x.split('/')[-1] for x in glob(path) 39 | if os.path.isdir(x)] 40 | if isinstance(tracker_names, str): 41 | tracker_names = [tracker_names] 42 | for name in tracker_names: 43 | traj_file = os.path.join(path, name, self.name+'.txt') 44 | if os.path.exists(traj_file): 45 | with open(traj_file, 'r') as f : 46 | pred_traj = [list(map(float, x.strip().split(','))) 47 | for x in f.readlines()] 48 | if len(pred_traj) != len(self.gt_traj): 49 | print(name, len(pred_traj), len(self.gt_traj), self.name) 50 | if store: 51 | self.pred_trajs[name] = pred_traj 52 | else: 53 | return pred_traj 54 | else: 55 | print(traj_file) 56 | self.tracker_names = list(self.pred_trajs.keys()) 57 | 58 | def load_img(self): 59 | if self.imgs is None: 60 | self.imgs = [cv2.imread(x) for x in self.img_names] 61 | self.width = self.imgs[0].shape[1] 62 | self.height = self.imgs[0].shape[0] 63 | 64 | def free_img(self): 65 | self.imgs = None 66 | 67 | def __len__(self): 68 | return len(self.img_names) 69 | 70 | def __getitem__(self, idx): 71 | if self.imgs is None: 72 | return cv2.imread(self.img_names[idx]), self.gt_traj[idx] 73 | else: 74 | return self.imgs[idx], self.gt_traj[idx] 75 | 76 | def __iter__(self): 77 | for i in range(len(self.img_names)): 78 | if self.imgs is not None: 79 | yield self.imgs[i], self.gt_traj[i] 80 | else: 81 | yield cv2.imread(self.img_names[i]), self.gt_traj[i] 82 | 83 | def draw_box(self, roi, img, linewidth, color, name=None): 84 | """ 85 | roi: rectangle or polygon 86 | img: numpy array img 87 | linewith: line width of the bbox 88 | """ 89 | if len(roi) > 6 and len(roi) % 2 == 0: 90 | pts = np.array(roi, np.int32).reshape(-1, 1, 2) 91 | color = tuple(map(int, color)) 92 | img = cv2.polylines(img, [pts], True, color, linewidth) 93 | pt = (pts[0, 0, 0], pts[0, 0, 1]-5) 94 | if name: 95 | img = cv2.putText(img, name, pt, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1) 96 | elif len(roi) == 4: 97 | if not np.isnan(roi[0]): 98 | roi = list(map(int, roi)) 99 | color = tuple(map(int, color)) 100 | img = cv2.rectangle(img, (roi[0], roi[1]), (roi[0]+roi[2], roi[1]+roi[3]), 101 | color, linewidth) 102 | if name: 103 | img = cv2.putText(img, name, (roi[0], roi[1]-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1) 104 | return img 105 | 106 | def show(self, pred_trajs={}, linewidth=2, show_name=False): 107 | """ 108 | pred_trajs: dict of pred_traj, {'tracker_name': list of traj} 109 | pred_traj should contain polygon or rectangle(x, y, width, height) 110 | linewith: line width of the bbox 111 | """ 112 | assert self.imgs is not None 113 | video = [] 114 | cv2.namedWindow(self.name, cv2.WINDOW_NORMAL) 115 | colors = {} 116 | if len(pred_trajs) == 0 and len(self.pred_trajs) > 0: 117 | pred_trajs = self.pred_trajs 118 | for i, (roi, img) in enumerate(zip(self.gt_traj, 119 | self.imgs[self.start_frame:self.end_frame+1])): 120 | img = img.copy() 121 | if len(img.shape) == 2: 122 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 123 | else: 124 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 125 | img = self.draw_box(roi, img, linewidth, (0, 255, 0), 126 | 'gt' if show_name else None) 127 | for name, trajs in pred_trajs.items(): 128 | if name not in colors: 129 | color = tuple(np.random.randint(0, 256, 3)) 130 | colors[name] = color 131 | else: 132 | color = colors[name] 133 | img = self.draw_box(trajs[0][i], img, linewidth, color, 134 | name if show_name else None) 135 | cv2.putText(img, str(i+self.start_frame), (5, 20), 136 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 0), 2) 137 | cv2.imshow(self.name, img) 138 | cv2.waitKey(40) 139 | video.append(img.copy()) 140 | return video 141 | -------------------------------------------------------------------------------- /training_dataset/got10k/par_crop.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join, isdir 7 | from os import listdir, mkdir, makedirs 8 | import cv2 9 | import numpy as np 10 | import glob 11 | import xml.etree.ElementTree as ET 12 | from concurrent import futures 13 | from pathlib import Path 14 | import sys 15 | import time 16 | 17 | base_path = '/ssd/feiji/Research/Data/GOT-10k' 18 | sub_sets= sorted({'train', 'val'}) 19 | # Print iterations progress (thanks StackOverflow) 20 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100): 21 | """ 22 | Call in a loop to create terminal progress bar 23 | @params: 24 | iteration - Required : current iteration (Int) 25 | total - Required : total iterations (Int) 26 | prefix - Optional : prefix string (Str) 27 | suffix - Optional : suffix string (Str) 28 | decimals - Optional : positive number of decimals in percent complete (Int) 29 | barLength - Optional : character length of bar (Int) 30 | """ 31 | formatStr = "{0:." + str(decimals) + "f}" 32 | percents = formatStr.format(100 * (iteration / float(total))) 33 | filledLength = int(round(barLength * iteration / float(total))) 34 | bar = '' * filledLength + '-' * (barLength - filledLength) 35 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), 36 | if iteration == total: 37 | sys.stdout.write('\x1b[2K\r') 38 | sys.stdout.flush() 39 | 40 | 41 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 42 | a = (out_sz-1) / (bbox[2]-bbox[0]) 43 | b = (out_sz-1) / (bbox[3]-bbox[1]) 44 | c = -a * bbox[0] 45 | d = -b * bbox[1] 46 | mapping = np.array([[a, 0, c], 47 | [0, b, d]]).astype(np.float) 48 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 49 | return crop 50 | 51 | 52 | def pos_s_2_bbox(pos, s): 53 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2] 54 | 55 | 56 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 57 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 58 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]] 59 | wc_z = target_size[1] + context_amount * sum(target_size) 60 | hc_z = target_size[0] + context_amount * sum(target_size) 61 | s_z = np.sqrt(wc_z * hc_z) 62 | scale_z = exemplar_size / s_z 63 | d_search = (instanc_size - exemplar_size) / 2 64 | pad = d_search / scale_z 65 | s_x = s_z + 2 * pad 66 | 67 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding) 68 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 69 | return z, x 70 | 71 | 72 | def crop_like_SiamFCx(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 73 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 74 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]] 75 | wc_z = target_size[1] + context_amount * sum(target_size) 76 | hc_z = target_size[0] + context_amount * sum(target_size) 77 | s_z = np.sqrt(wc_z * hc_z) 78 | scale_z = exemplar_size / s_z 79 | d_search = (instanc_size - exemplar_size) / 2 80 | pad = d_search / scale_z 81 | s_x = s_z + 2 * pad 82 | 83 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 84 | return x 85 | 86 | 87 | def crop_video(sub_set, video, crop_path, instanc_size): 88 | sub_set_base_path = join(base_path, sub_set) 89 | ground_truth_file = join(sub_set_base_path, video, 'groundtruth.txt') 90 | full_occlusion_file = join(sub_set_base_path, video, 'absence.label') 91 | cover = join(sub_set_base_path, video, 'cover.label') 92 | gt = np.genfromtxt(ground_truth_file, delimiter=',', dtype=float).astype(np.int) 93 | fo = np.genfromtxt(full_occlusion_file, dtype=int) 94 | 95 | video_crop_base_path = join(crop_path, sub_set, video) 96 | if not isdir(video_crop_base_path): makedirs(video_crop_base_path) 97 | 98 | subdir_paths = sorted(glob.glob(join(sub_set_base_path, video, '*.jpg'))) 99 | 100 | trackid = 0 101 | for i, img in enumerate(subdir_paths): 102 | filename = Path(img).stem 103 | bbox = gt[i] # x,y,w,h 104 | im = cv2.imread(img) 105 | avg_chans = np.mean(im, axis=(0, 1)) 106 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] 107 | x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans) 108 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x) 109 | 110 | 111 | def main(instanc_size=511, num_threads=24): 112 | crop_path = '/ssd/feiji/Research/Data/GOT-10k_crop{:d}'.format(instanc_size) 113 | if not isdir(crop_path): mkdir(crop_path) 114 | 115 | for sub_set in sub_sets: 116 | sub_set_base_path = join(base_path, sub_set) 117 | videos = sorted(listdir(sub_set_base_path)) 118 | n_videos = len(videos) 119 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 120 | fs = [executor.submit(crop_video, sub_set, video, crop_path, instanc_size) for video in videos] 121 | for i, f in enumerate(futures.as_completed(fs)): 122 | # Write progress to error so that it can be seen 123 | printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40) 124 | 125 | 126 | if __name__ == '__main__': 127 | since = time.time() 128 | main(int(sys.argv[1]), int(sys.argv[2])) 129 | time_elapsed = time.time() - since 130 | print('Total complete in {:.0f}m {:.0f}s'.format( 131 | time_elapsed // 60, time_elapsed % 60)) 132 | -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | from collections import OrderedDict 6 | import glob 7 | from os.path import realpath, join 8 | 9 | def smooth_l1_loss(bbox_pred, bbox_targets, sigma=3.0): 10 | sigma_2 = sigma ** 2 11 | in_box_diff = bbox_pred - bbox_targets 12 | abs_in_box_diff = torch.abs(in_box_diff) 13 | smoothL1_sign = (abs_in_box_diff < 1. / sigma_2).detach().float() 14 | out_loss_box = torch.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \ 15 | + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign) 16 | loss_box = out_loss_box.sum() / out_loss_box.shape[0] 17 | return loss_box 18 | 19 | def l1_loss(bbox_pred, bbox_targets): 20 | loss = (bbox_pred - bbox_targets).abs() 21 | return loss.sum().div(bbox_pred.shape[0]) 22 | 23 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 24 | bbox = [float(x) for x in bbox] 25 | a = (out_sz-1) / (bbox[2]-bbox[0]) 26 | b = (out_sz-1) / (bbox[3]-bbox[1]) 27 | c = -a * bbox[0] 28 | d = -b * bbox[1] 29 | mapping = np.array([[a, 0, c], 30 | [0, b, d]]).astype(np.float) 31 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 32 | return crop 33 | 34 | def read_image(path): 35 | with open(path, 'rb') as fb: 36 | with Image.open(fb) as img: 37 | return img.convert('RGB') 38 | 39 | def cxy_wh_2_bbox(cxy, wh): 40 | return np.array([cxy[0] - wh[0] / 2, cxy[1] - wh[1] / 2, cxy[0] + wh[0] / 2, cxy[1] + wh[1] / 2]) # 0-index 41 | 42 | 43 | def get_subwindow_tracking(im, pos, model_sz, original_sz, avg_chans): 44 | 45 | if isinstance(pos, float): 46 | pos = [pos, pos] 47 | sz = original_sz 48 | im_sz = im.shape 49 | c = (original_sz+1) / 2 50 | context_xmin = round(pos[0] - c) # floor(pos(2) - sz(2) / 2); 51 | context_xmax = context_xmin + sz - 1 52 | context_ymin = round(pos[1] - c) # floor(pos(1) - sz(1) / 2); 53 | context_ymax = context_ymin + sz - 1 54 | left_pad = int(max(0., -context_xmin)) 55 | top_pad = int(max(0., -context_ymin)) 56 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 57 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 58 | 59 | context_xmin = context_xmin + left_pad 60 | context_xmax = context_xmax + left_pad 61 | context_ymin = context_ymin + top_pad 62 | context_ymax = context_ymax + top_pad 63 | 64 | # zzp: a more easy speed version 65 | r, c, k = im.shape 66 | if any([top_pad, bottom_pad, left_pad, right_pad]): 67 | te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8) # 0 is better than 1 initialization 68 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 69 | if top_pad: 70 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 71 | if bottom_pad: 72 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 73 | if left_pad: 74 | te_im[:, 0:left_pad, :] = avg_chans 75 | if right_pad: 76 | te_im[:, c + left_pad:, :] = avg_chans 77 | im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 78 | else: 79 | im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :] 80 | 81 | if not np.array_equal(model_sz, original_sz): 82 | im_patch = cv2.resize(im_patch_original, (int(model_sz), int(model_sz))) # zzp: use cv to get a better speed 83 | else: 84 | im_patch = im_patch_original 85 | 86 | return im_patch 87 | 88 | 89 | def to_numpy(tensor): 90 | if torch.is_tensor(tensor): 91 | return tensor.cpu().numpy() 92 | elif type(tensor).__module__ != 'numpy': 93 | raise ValueError("Cannot convert {} to numpy array" 94 | .format(type(tensor))) 95 | return tensor 96 | 97 | 98 | def to_torch(ndarray): 99 | if type(ndarray).__module__ == 'numpy': 100 | return torch.from_numpy(ndarray) 101 | elif not torch.is_tensor(ndarray): 102 | raise ValueError("Cannot convert {} to torch tensor" 103 | .format(type(ndarray))) 104 | return ndarray 105 | 106 | 107 | def im_to_numpy(img): 108 | img = to_numpy(img) 109 | img = np.transpose(img, (1, 2, 0)) # H*W*C 110 | return img 111 | 112 | 113 | def im_to_torch(img): 114 | img = np.transpose(img, (2, 0, 1)) # C*H*W 115 | img = to_torch(img).float() 116 | return img 117 | 118 | 119 | def torch_to_img(img): 120 | img = to_numpy(torch.squeeze(img, 0)) 121 | img = np.transpose(img, (1, 2, 0)) # H*W*C 122 | return img 123 | 124 | 125 | def cxy_wh_2_rect(pos, sz): 126 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) # 0-index 127 | 128 | 129 | def rect_2_cxy_wh(rect): 130 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]]) # 0-index 131 | 132 | 133 | def cxy_wh_2_rect1(pos, sz): 134 | return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]]) # 1-index 135 | 136 | 137 | def rect1_2_cxy_wh(rect): 138 | return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), np.array([rect[2], rect[3]]) # 0-index 139 | 140 | 141 | def get_axis_aligned_bbox(region): 142 | nv = region.size 143 | if nv == 8: 144 | cx = np.mean(region[0::2]) 145 | cy = np.mean(region[1::2]) 146 | x1 = min(region[0::2]) 147 | x2 = max(region[0::2]) 148 | y1 = min(region[1::2]) 149 | y2 = max(region[1::2]) 150 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) 151 | A2 = (x2 - x1) * (y2 - y1) 152 | s = np.sqrt(A1 / A2) 153 | w = s * (x2 - x1) + 1 154 | h = s * (y2 - y1) + 1 155 | else: 156 | x = region[0] 157 | y = region[1] 158 | w = region[2] 159 | h = region[3] 160 | cx = x+w/2 161 | cy = y+h/2 162 | return cx, cy, w, h 163 | -------------------------------------------------------------------------------- /training_dataset/det/par_crop.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join, isdir 7 | from os import mkdir, makedirs 8 | import cv2 9 | import numpy as np 10 | import glob 11 | import xml.etree.ElementTree as ET 12 | from concurrent import futures 13 | import time 14 | import sys 15 | 16 | 17 | # Print iterations progress (thanks StackOverflow) 18 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100): 19 | """ 20 | Call in a loop to create terminal progress bar 21 | @params: 22 | iteration - Required : current iteration (Int) 23 | total - Required : total iterations (Int) 24 | prefix - Optional : prefix string (Str) 25 | suffix - Optional : suffix string (Str) 26 | decimals - Optional : positive number of decimals in percent complete (Int) 27 | barLength - Optional : character length of bar (Int) 28 | """ 29 | formatStr = "{0:." + str(decimals) + "f}" 30 | percents = formatStr.format(100 * (iteration / float(total))) 31 | filledLength = int(round(barLength * iteration / float(total))) 32 | bar = '' * filledLength + '-' * (barLength - filledLength) 33 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), 34 | if iteration == total: 35 | sys.stdout.write('\x1b[2K\r') 36 | sys.stdout.flush() 37 | 38 | 39 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 40 | a = (out_sz - 1) / (bbox[2] - bbox[0]) 41 | b = (out_sz - 1) / (bbox[3] - bbox[1]) 42 | c = -a * bbox[0] 43 | d = -b * bbox[1] 44 | mapping = np.array([[a, 0, c], 45 | [0, b, d]]).astype(np.float) 46 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 47 | return crop 48 | 49 | 50 | def pos_s_2_bbox(pos, s): 51 | return [pos[0] - s / 2, pos[1] - s / 2, pos[0] + s / 2, pos[1] + s / 2] 52 | 53 | 54 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 55 | target_pos = [(bbox[2] + bbox[0]) / 2., (bbox[3] + bbox[1]) / 2.] 56 | target_size = [bbox[2] - bbox[0], bbox[3] - bbox[1]] 57 | wc_z = target_size[1] + context_amount * sum(target_size) 58 | hc_z = target_size[0] + context_amount * sum(target_size) 59 | s_z = np.sqrt(wc_z * hc_z) 60 | scale_z = exemplar_size / s_z 61 | d_search = (instanc_size - exemplar_size) / 2 62 | pad = d_search / scale_z 63 | s_x = s_z + 2 * pad 64 | 65 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding) 66 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 67 | return z, x 68 | 69 | 70 | def crop_like_SiamFCx(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 71 | target_pos = [(bbox[2] + bbox[0]) / 2., (bbox[3] + bbox[1]) / 2.] 72 | target_size = [bbox[2] - bbox[0], bbox[3] - bbox[1]] 73 | wc_z = target_size[1] + context_amount * sum(target_size) 74 | hc_z = target_size[0] + context_amount * sum(target_size) 75 | s_z = np.sqrt(wc_z * hc_z) 76 | scale_z = exemplar_size / s_z 77 | d_search = (instanc_size - exemplar_size) / 2 78 | pad = d_search / scale_z 79 | s_x = s_z + 2 * pad 80 | 81 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 82 | return x 83 | 84 | 85 | def crop_xml(xml, sub_set_crop_path, instanc_size=511): 86 | xmltree = ET.parse(xml) 87 | objects = xmltree.findall('object') 88 | 89 | frame_crop_base_path = join(sub_set_crop_path, xml.split('/')[-1].split('.')[0]) 90 | if not isdir(frame_crop_base_path): makedirs(frame_crop_base_path) 91 | 92 | img_path = xml.replace('xml', 'JPEG').replace('Annotations', 'Data') 93 | 94 | im = cv2.imread(img_path) 95 | avg_chans = np.mean(im, axis=(0, 1)) 96 | 97 | for id, object_iter in enumerate(objects): 98 | bndbox = object_iter.find('bndbox') 99 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 100 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 101 | 102 | # z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans) 103 | # x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans) 104 | # cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(0, id)), z) 105 | x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans) 106 | cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(0, id)), x) 107 | 108 | 109 | def main(instanc_size=511, num_threads=24): 110 | crop_path = '/ssd/feiji/Research/Data/DET_crop{:d}'.format(instanc_size) 111 | if not isdir(crop_path): mkdir(crop_path) 112 | VID_base_path = '/ssd/feiji/Research/Data/ILSVRC2015' 113 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/') 114 | sub_sets = ['ILSVRC2013_train', 'ILSVRC2014_train_0000', 'ILSVRC2014_train_0001','ILSVRC2014_train_0002','ILSVRC2014_train_0003','ILSVRC2014_train_0004','ILSVRC2014_train_0005','ILSVRC2014_train_0006', 'val'] 115 | for sub_set in sub_sets: 116 | sub_set_base_path = join(ann_base_path, sub_set) 117 | if 'ILSVRC2013_train' == sub_set: 118 | xmls = sorted(glob.glob(join(sub_set_base_path, '*', '*.xml'))) 119 | else: 120 | xmls = sorted(glob.glob(join(sub_set_base_path, '*.xml'))) 121 | 122 | n_imgs = len(xmls) 123 | sub_set_crop_path = join(crop_path, sub_set) 124 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 125 | fs = [executor.submit(crop_xml, xml, sub_set_crop_path, instanc_size) for xml in xmls] 126 | for i, f in enumerate(futures.as_completed(fs)): 127 | printProgress(i, n_imgs, prefix=sub_set, suffix='Done ', barLength=80) 128 | 129 | 130 | if __name__ == '__main__': 131 | since = time.time() 132 | main(int(sys.argv[1]), int(sys.argv[2])) 133 | time_elapsed = time.time() - since 134 | print('Total complete in {:.0f}m {:.0f}s'.format( 135 | time_elapsed // 60, time_elapsed % 60)) 136 | -------------------------------------------------------------------------------- /toolkit/evaluation/ar_benchmark.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author 3 | """ 4 | 5 | import warnings 6 | import itertools 7 | import numpy as np 8 | 9 | from colorama import Style, Fore 10 | from ..utils import calculate_failures, calculate_accuracy 11 | 12 | class AccuracyRobustnessBenchmark: 13 | """ 14 | Args: 15 | dataset: 16 | burnin: 17 | """ 18 | def __init__(self, dataset, burnin=10): 19 | self.dataset = dataset 20 | self.burnin = burnin 21 | 22 | def eval(self, eval_trackers=None): 23 | """ 24 | Args: 25 | eval_tags: list of tag 26 | eval_trackers: list of tracker name 27 | Returns: 28 | ret: dict of results 29 | """ 30 | if eval_trackers is None: 31 | eval_trackers = self.dataset.tracker_names 32 | if isinstance(eval_trackers, str): 33 | eval_trackers = [eval_trackers] 34 | 35 | result = {} 36 | for tracker_name in eval_trackers: 37 | accuracy, failures = self._calculate_accuracy_robustness(tracker_name) 38 | result[tracker_name] = {'overlaps': accuracy, 39 | 'failures': failures} 40 | return result 41 | 42 | def show_result(self, result, eao_result=None, show_video_level=False, helight_threshold=0.5): 43 | """pretty print result 44 | Args: 45 | result: returned dict from function eval 46 | """ 47 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 48 | if eao_result is not None: 49 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|{:^7}|" 50 | header = header.format('Tracker Name', 51 | 'Accuracy', 'Robustness', 'Lost Number', 'EAO') 52 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|{:^7.3f}|" 53 | else: 54 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|" 55 | header = header.format('Tracker Name', 56 | 'Accuracy', 'Robustness', 'Lost Number') 57 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|" 58 | bar = '-'*len(header) 59 | print(bar) 60 | print(header) 61 | print(bar) 62 | if eao_result is not None: 63 | tracker_eao = sorted(eao_result.items(), 64 | key=lambda x:x[1]['all'], 65 | reverse=True)[:20] 66 | tracker_names = [x[0] for x in tracker_eao] 67 | else: 68 | tracker_names = list(result.keys()) 69 | for tracker_name in tracker_names: 70 | # for tracker_name, ret in result.items(): 71 | ret = result[tracker_name] 72 | overlaps = list(itertools.chain(*ret['overlaps'].values())) 73 | accuracy = np.nanmean(overlaps) 74 | length = sum([len(x) for x in ret['overlaps'].values()]) 75 | failures = list(ret['failures'].values()) 76 | lost_number = np.mean(np.sum(failures, axis=0)) 77 | robustness = np.mean(np.sum(np.array(failures), axis=0) / length) * 100 78 | if eao_result is None: 79 | print(formatter.format(tracker_name, accuracy, robustness, lost_number)) 80 | else: 81 | print(formatter.format(tracker_name, accuracy, robustness, lost_number, eao_result[tracker_name]['all'])) 82 | print(bar) 83 | 84 | if show_video_level and len(result) < 10: 85 | print('\n\n') 86 | header1 = "|{:^14}|".format("Tracker name") 87 | header2 = "|{:^14}|".format("Video name") 88 | for tracker_name in result.keys(): 89 | header1 += ("{:^17}|").format(tracker_name) 90 | header2 += "{:^8}|{:^8}|".format("Acc", "LN") 91 | print('-'*len(header1)) 92 | print(header1) 93 | print('-'*len(header1)) 94 | print(header2) 95 | print('-'*len(header1)) 96 | videos = list(result[tracker_name]['overlaps'].keys()) 97 | for video in videos: 98 | row = "|{:^14}|".format(video) 99 | for tracker_name in result.keys(): 100 | overlaps = result[tracker_name]['overlaps'][video] 101 | accuracy = np.nanmean(overlaps) 102 | failures = result[tracker_name]['failures'][video] 103 | lost_number = np.mean(failures) 104 | 105 | accuracy_str = "{:^8.3f}".format(accuracy) 106 | if accuracy < helight_threshold: 107 | row += f'{Fore.RED}{accuracy_str}{Style.RESET_ALL}|' 108 | else: 109 | row += accuracy_str+'|' 110 | lost_num_str = "{:^8.3f}".format(lost_number) 111 | if lost_number > 0: 112 | row += f'{Fore.RED}{lost_num_str}{Style.RESET_ALL}|' 113 | else: 114 | row += lost_num_str+'|' 115 | print(row) 116 | print('-'*len(header1)) 117 | 118 | def _calculate_accuracy_robustness(self, tracker_name): 119 | overlaps = {} 120 | failures = {} 121 | all_length = {} 122 | for i in range(len(self.dataset)): 123 | video = self.dataset[i] 124 | gt_traj = video.gt_traj 125 | if tracker_name not in video.pred_trajs: 126 | tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 127 | else: 128 | tracker_trajs = video.pred_trajs[tracker_name] 129 | overlaps_group = [] 130 | num_failures_group = [] 131 | for tracker_traj in tracker_trajs: 132 | num_failures = calculate_failures(tracker_traj)[0] 133 | overlaps_ = calculate_accuracy(tracker_traj, gt_traj, 134 | burnin=10, bound=(video.width, video.height))[1] 135 | overlaps_group.append(overlaps_) 136 | num_failures_group.append(num_failures) 137 | with warnings.catch_warnings(): 138 | warnings.simplefilter("ignore", category=RuntimeWarning) 139 | overlaps[video.name] = np.nanmean(overlaps_group, axis=0).tolist() 140 | failures[video.name] = num_failures_group 141 | return overlaps, failures 142 | -------------------------------------------------------------------------------- /training_dataset/coco/par_crop.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from pycocotools.coco import COCO 7 | import cv2 8 | import numpy as np 9 | from os.path import join, isdir 10 | from os import mkdir, makedirs 11 | from concurrent import futures 12 | import sys 13 | import time 14 | import argparse 15 | 16 | parser = argparse.ArgumentParser(description='COCO Parallel Preprocessing for SiamMask') 17 | parser.add_argument('--exemplar_size', type=int, default=127, help='size of exemplar') 18 | parser.add_argument('--context_amount', type=float, default=0.5, help='context amount') 19 | parser.add_argument('--search_size', type=int, default=255, help='size of cropped search region') 20 | parser.add_argument('--enable_mask', action='store_false', help='whether crop mask') 21 | parser.add_argument('--num_threads', type=int, default=24, help='number of threads') 22 | args = parser.parse_args() 23 | 24 | 25 | # Print iterations progress (thanks StackOverflow) 26 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100): 27 | """ 28 | Call in a loop to create terminal progress bar 29 | @params: 30 | iteration - Required : current iteration (Int) 31 | total - Required : total iterations (Int) 32 | prefix - Optional : prefix string (Str) 33 | suffix - Optional : suffix string (Str) 34 | decimals - Optional : positive number of decimals in percent complete (Int) 35 | barLength - Optional : character length of bar (Int) 36 | """ 37 | formatStr = "{0:." + str(decimals) + "f}" 38 | percents = formatStr.format(100 * (iteration / float(total))) 39 | filledLength = int(round(barLength * iteration / float(total))) 40 | bar = '' * filledLength + '-' * (barLength - filledLength) 41 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), 42 | if iteration == total: 43 | sys.stdout.write('\x1b[2K\r') 44 | sys.stdout.flush() 45 | 46 | 47 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 48 | a = (out_sz-1) / (bbox[2]-bbox[0]) 49 | b = (out_sz-1) / (bbox[3]-bbox[1]) 50 | c = -a * bbox[0] 51 | d = -b * bbox[1] 52 | mapping = np.array([[a, 0, c], 53 | [0, b, d]]).astype(np.float) 54 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), 55 | borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 56 | return crop 57 | 58 | 59 | def pos_s_2_bbox(pos, s): 60 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2] 61 | 62 | 63 | def crop_like_SiamFCx(image, bbox, exemplar_size=127, context_amount=0.5, search_size=255, padding=(0, 0, 0)): 64 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 65 | target_size = [bbox[2]-bbox[0]+1, bbox[3]-bbox[1]+1] 66 | wc_z = target_size[1] + context_amount * sum(target_size) 67 | hc_z = target_size[0] + context_amount * sum(target_size) 68 | s_z = np.sqrt(wc_z * hc_z) 69 | scale_z = exemplar_size / s_z 70 | d_search = (search_size - exemplar_size) / 2 71 | pad = d_search / scale_z 72 | s_x = s_z + 2 * pad 73 | 74 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), search_size, padding) 75 | return x 76 | 77 | 78 | def crop_img(img, anns, set_crop_base_path, set_img_base_path, 79 | exemplar_size=127, context_amount=0.5, search_size=511, enable_mask=True): 80 | frame_crop_base_path = join(set_crop_base_path, img['file_name'].split('/')[-1].split('.')[0]) 81 | if not isdir(frame_crop_base_path): makedirs(frame_crop_base_path) 82 | 83 | im = cv2.imread('{}/{}'.format(set_img_base_path, img['file_name'])) 84 | avg_chans = np.mean(im, axis=(0, 1)) 85 | for track_id, ann in enumerate(anns): 86 | rect = ann['bbox'] 87 | if rect[2] <= 0 or rect[3] <= 0: 88 | continue 89 | bbox = [rect[0], rect[1], rect[0]+rect[2]-1, rect[1]+rect[3]-1] 90 | 91 | x = crop_like_SiamFCx(im, bbox, exemplar_size=exemplar_size, context_amount=context_amount, 92 | search_size=search_size, padding=avg_chans) 93 | cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(0, track_id)), x) 94 | 95 | if enable_mask: 96 | im_mask = coco.annToMask(ann).astype(np.float32) 97 | x = (crop_like_SiamFCx(im_mask, bbox, exemplar_size=exemplar_size, context_amount=context_amount, 98 | search_size=search_size) > 0.5).astype(np.uint8) * 255 99 | cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.m.png'.format(0, track_id)), x) 100 | 101 | 102 | def main(exemplar_size=127, context_amount=0.5, search_size=511, enable_mask=True, num_threads=24): 103 | global coco # will used for generate mask 104 | data_dir = '/ssd/feiji/Research/Data/COCO' 105 | crop_path = '/ssd/feiji/Research/Data/COCO_crop{:d}'.format(search_size) 106 | if not isdir(crop_path): mkdir(crop_path) 107 | 108 | for data_subset in ['val2017', 'train2017']: 109 | set_crop_base_path = join(crop_path, data_subset) 110 | set_img_base_path = join(data_dir, data_subset) 111 | 112 | anno_file = '{}/annotations/instances_{}.json'.format(data_dir, data_subset) 113 | coco = COCO(anno_file) 114 | n_imgs = len(coco.imgs) 115 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 116 | fs = [executor.submit(crop_img, coco.loadImgs(id)[0], 117 | coco.loadAnns(coco.getAnnIds(imgIds=id, iscrowd=None)), 118 | set_crop_base_path, set_img_base_path, 119 | exemplar_size, context_amount, search_size, 120 | enable_mask) for id in coco.imgs] 121 | for i, f in enumerate(futures.as_completed(fs)): 122 | printProgress(i, n_imgs, prefix=data_subset, suffix='Done ', barLength=40) 123 | print('done') 124 | 125 | 126 | if __name__ == '__main__': 127 | since = time.time() 128 | main(args.exemplar_size, args.context_amount, args.search_size, False, args.num_threads) 129 | time_elapsed = time.time() - since 130 | print('Total complete in {:.0f}m {:.0f}s'.format( 131 | time_elapsed // 60, time_elapsed % 60)) 132 | -------------------------------------------------------------------------------- /training_dataset/vid/par_crop.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # SiamMask 3 | # Licensed under The MIT License 4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn) 5 | # -------------------------------------------------------- 6 | from os.path import join, isdir 7 | from os import listdir, mkdir, makedirs 8 | import cv2 9 | import numpy as np 10 | import glob 11 | import xml.etree.ElementTree as ET 12 | from concurrent import futures 13 | import sys 14 | import time 15 | 16 | VID_base_path = '/ssd/feiji/Research/Data/ILSVRC2015' 17 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 18 | sub_sets= sorted({'a', 'b', 'c', 'd', 'e'}) 19 | # Print iterations progress (thanks StackOverflow) 20 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100): 21 | """ 22 | Call in a loop to create terminal progress bar 23 | @params: 24 | iteration - Required : current iteration (Int) 25 | total - Required : total iterations (Int) 26 | prefix - Optional : prefix string (Str) 27 | suffix - Optional : suffix string (Str) 28 | decimals - Optional : positive number of decimals in percent complete (Int) 29 | barLength - Optional : character length of bar (Int) 30 | """ 31 | formatStr = "{0:." + str(decimals) + "f}" 32 | percents = formatStr.format(100 * (iteration / float(total))) 33 | filledLength = int(round(barLength * iteration / float(total))) 34 | bar = '' * filledLength + '-' * (barLength - filledLength) 35 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), 36 | if iteration == total: 37 | sys.stdout.write('\x1b[2K\r') 38 | sys.stdout.flush() 39 | 40 | 41 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 42 | a = (out_sz-1) / (bbox[2]-bbox[0]) 43 | b = (out_sz-1) / (bbox[3]-bbox[1]) 44 | c = -a * bbox[0] 45 | d = -b * bbox[1] 46 | mapping = np.array([[a, 0, c], 47 | [0, b, d]]).astype(np.float) 48 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 49 | return crop 50 | 51 | 52 | def pos_s_2_bbox(pos, s): 53 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2] 54 | 55 | 56 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 57 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 58 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]] 59 | wc_z = target_size[1] + context_amount * sum(target_size) 60 | hc_z = target_size[0] + context_amount * sum(target_size) 61 | s_z = np.sqrt(wc_z * hc_z) 62 | scale_z = exemplar_size / s_z 63 | d_search = (instanc_size - exemplar_size) / 2 64 | pad = d_search / scale_z 65 | s_x = s_z + 2 * pad 66 | 67 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding) 68 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 69 | return z, x 70 | 71 | 72 | def crop_like_SiamFCx(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 73 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 74 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]] 75 | wc_z = target_size[1] + context_amount * sum(target_size) 76 | hc_z = target_size[0] + context_amount * sum(target_size) 77 | s_z = np.sqrt(wc_z * hc_z) 78 | scale_z = exemplar_size / s_z 79 | d_search = (instanc_size - exemplar_size) / 2 80 | pad = d_search / scale_z 81 | s_x = s_z + 2 * pad 82 | 83 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 84 | return x 85 | 86 | 87 | def crop_video(sub_set, video, crop_path, instanc_size): 88 | video_crop_base_path = join(crop_path, sub_set, video) 89 | if not isdir(video_crop_base_path): makedirs(video_crop_base_path) 90 | 91 | sub_set_base_path = join(ann_base_path, sub_set) 92 | xmls = sorted(glob.glob(join(sub_set_base_path, video, '*.xml'))) 93 | for xml in xmls: 94 | xmltree = ET.parse(xml) 95 | # size = xmltree.findall('size')[0] 96 | # frame_sz = [int(it.text) for it in size] 97 | objects = xmltree.findall('object') 98 | objs = [] 99 | filename = xmltree.findall('filename')[0].text 100 | 101 | im = cv2.imread(xml.replace('xml', 'JPEG').replace('Annotations', 'Data')) 102 | avg_chans = np.mean(im, axis=(0, 1)) 103 | for object_iter in objects: 104 | trackid = int(object_iter.find('trackid').text) 105 | # name = (object_iter.find('name')).text 106 | bndbox = object_iter.find('bndbox') 107 | # occluded = int(object_iter.find('occluded').text) 108 | 109 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 110 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 111 | # z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans) 112 | # cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(filename), trackid)), z) 113 | # cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x) 114 | 115 | x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans) 116 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x) 117 | 118 | 119 | def main(instanc_size=511, num_threads=24): 120 | crop_path = '/ssd/feiji/Research/Data/VID_crop{:d}'.format(instanc_size) 121 | if not isdir(crop_path): mkdir(crop_path) 122 | 123 | for sub_set in sub_sets: 124 | sub_set_base_path = join(ann_base_path, sub_set) 125 | videos = sorted(listdir(sub_set_base_path)) 126 | n_videos = len(videos) 127 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 128 | fs = [executor.submit(crop_video, sub_set, video, crop_path, instanc_size) for video in videos] 129 | for i, f in enumerate(futures.as_completed(fs)): 130 | # Write progress to error so that it can be seen 131 | printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40) 132 | 133 | 134 | if __name__ == '__main__': 135 | since = time.time() 136 | main(int(sys.argv[1]), int(sys.argv[2])) 137 | time_elapsed = time.time() - since 138 | print('Total complete in {:.0f}m {:.0f}s'.format( 139 | time_elapsed // 60, time_elapsed % 60)) 140 | -------------------------------------------------------------------------------- /toolkit/evaluation/f1_benchmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from glob import glob 5 | from tqdm import tqdm 6 | from colorama import Style, Fore 7 | 8 | from ..utils import determine_thresholds, calculate_accuracy, calculate_f1 9 | 10 | class F1Benchmark: 11 | def __init__(self, dataset): 12 | """ 13 | Args: 14 | result_path: 15 | """ 16 | self.dataset = dataset 17 | 18 | def eval(self, eval_trackers=None): 19 | """ 20 | Args: 21 | eval_tags: list of tag 22 | eval_trackers: list of tracker name 23 | Returns: 24 | eao: dict of results 25 | """ 26 | if eval_trackers is None: 27 | eval_trackers = self.dataset.tracker_names 28 | if isinstance(eval_trackers, str): 29 | eval_trackers = [eval_trackers] 30 | 31 | ret = {} 32 | for tracker_name in eval_trackers: 33 | precision, recall, f1 = self._cal_precision_reall(tracker_name) 34 | ret[tracker_name] = {"precision": precision, 35 | "recall": recall, 36 | "f1": f1 37 | } 38 | return ret 39 | 40 | def _cal_precision_reall(self, tracker_name): 41 | score = [] 42 | # for i in range(len(self.dataset)): 43 | # video = self.dataset[i] 44 | for video in self.dataset: 45 | if tracker_name not in video.confidence: 46 | score += video.load_tracker(self.dataset.tracker_path, tracker_name, False)[1] 47 | else: 48 | score += video.confidence[tracker_name] 49 | score = np.array(score) 50 | thresholds = determine_thresholds(score)[::-1] 51 | 52 | precision = {} 53 | recall = {} 54 | f1 = {} 55 | for i in range(len(self.dataset)): 56 | video = self.dataset[i] 57 | gt_traj = video.gt_traj 58 | N = sum([1 for x in gt_traj if len(x) > 1]) 59 | if tracker_name not in video.pred_trajs: 60 | tracker_traj, score = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 61 | else: 62 | tracker_traj = video.pred_trajs[tracker_name] 63 | score = video.confidence[tracker_name] 64 | overlaps = calculate_accuracy(tracker_traj, gt_traj, \ 65 | bound=(video.width,video.height))[1] 66 | f1[video.name], precision[video.name], recall[video.name] = \ 67 | calculate_f1(overlaps, score, (video.width,video.height),thresholds, N) 68 | return precision, recall, f1 69 | 70 | def show_result(self, result, show_video_level=False, helight_threshold=0.5): 71 | """pretty print result 72 | Args: 73 | result: returned dict from function eval 74 | """ 75 | # sort tracker according to f1 76 | sorted_tracker = {} 77 | for tracker_name, ret in result.items(): 78 | precision = np.mean(list(ret['precision'].values()), axis=0) 79 | recall = np.mean(list(ret['recall'].values()), axis=0) 80 | f1 = 2 * precision * recall / (precision + recall) 81 | max_idx = np.argmax(f1) 82 | sorted_tracker[tracker_name] = (precision[max_idx], recall[max_idx], 83 | f1[max_idx]) 84 | sorted_tracker_ = sorted(sorted_tracker.items(), 85 | key=lambda x:x[1][2], 86 | reverse=True)[:20] 87 | tracker_names = [x[0] for x in sorted_tracker_] 88 | 89 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 90 | header = "|{:^"+str(tracker_name_len)+"}|{:^11}|{:^8}|{:^7}|" 91 | header = header.format('Tracker Name', 92 | 'Precision', 'Recall', 'F1') 93 | bar = '-' * len(header) 94 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^11.3f}|{:^8.3f}|{:^7.3f}|" 95 | print(bar) 96 | print(header) 97 | print(bar) 98 | # for tracker_name, ret in result.items(): 99 | # precision = np.mean(list(ret['precision'].values()), axis=0) 100 | # recall = np.mean(list(ret['recall'].values()), axis=0) 101 | # f1 = 2 * precision * recall / (precision + recall) 102 | # max_idx = np.argmax(f1) 103 | for tracker_name in tracker_names: 104 | precision = sorted_tracker[tracker_name][0] 105 | recall = sorted_tracker[tracker_name][1] 106 | f1 = sorted_tracker[tracker_name][2] 107 | print(formatter.format(tracker_name, precision, recall, f1)) 108 | print(bar) 109 | 110 | if show_video_level and len(result) < 10: 111 | print('\n\n') 112 | header1 = "|{:^14}|".format("Tracker name") 113 | header2 = "|{:^14}|".format("Video name") 114 | for tracker_name in result.keys(): 115 | # col_len = max(20, len(tracker_name)) 116 | header1 += ("{:^28}|").format(tracker_name) 117 | header2 += "{:^11}|{:^8}|{:^7}|".format("Precision", "Recall", "F1") 118 | print('-'*len(header1)) 119 | print(header1) 120 | print('-'*len(header1)) 121 | print(header2) 122 | print('-'*len(header1)) 123 | videos = list(result[tracker_name]['precision'].keys()) 124 | for video in videos: 125 | row = "|{:^14}|".format(video) 126 | for tracker_name in result.keys(): 127 | precision = result[tracker_name]['precision'][video] 128 | recall = result[tracker_name]['recall'][video] 129 | f1 = result[tracker_name]['f1'][video] 130 | max_idx = np.argmax(f1) 131 | precision_str = "{:^11.3f}".format(precision[max_idx]) 132 | if precision[max_idx] < helight_threshold: 133 | row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|' 134 | else: 135 | row += precision_str+'|' 136 | recall_str = "{:^8.3f}".format(recall[max_idx]) 137 | if recall[max_idx] < helight_threshold: 138 | row += f'{Fore.RED}{recall_str}{Style.RESET_ALL}|' 139 | else: 140 | row += recall_str+'|' 141 | f1_str = "{:^7.3f}".format(f1[max_idx]) 142 | if f1[max_idx] < helight_threshold: 143 | row += f'{Fore.RED}{f1_str}{Style.RESET_ALL}|' 144 | else: 145 | row += f1_str+'|' 146 | print(row) 147 | print('-'*len(header1)) 148 | -------------------------------------------------------------------------------- /toolkit/utils/statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author fangyi.zhang@vipl.ict.ac.cn 3 | """ 4 | 5 | import numpy as np 6 | from numba import jit 7 | from . import region 8 | 9 | def calculate_failures(trajectory): 10 | """ Calculate number of failures 11 | Args: 12 | trajectory: list of bbox 13 | Returns: 14 | num_failures: number of failures 15 | failures: failures point in trajectory, start with 0 16 | """ 17 | failures = [i for i, x in zip(range(len(trajectory)), trajectory) 18 | if len(x) == 1 and x[0] == 2] 19 | num_failures = len(failures) 20 | return num_failures, failures 21 | 22 | def calculate_accuracy(pred_trajectory, gt_trajectory, 23 | burnin=0, ignore_unknown=True, bound=None): 24 | """Caculate accuracy socre as average overlap over the entire sequence 25 | Args: 26 | trajectory: list of bbox 27 | gt_trajectory: list of bbox 28 | burnin: number of frames that have to be ignored after the failure 29 | ignore_unknown: ignore frames where the overlap is unknown 30 | bound: bounding region 31 | Return: 32 | acc: average overlap 33 | overlaps: per frame overlaps 34 | """ 35 | pred_trajectory_ = pred_trajectory 36 | if not ignore_unknown: 37 | unkown = [len(x)==1 and x[0] == 0 for x in pred_trajectory] 38 | 39 | if burnin > 0: 40 | pred_trajectory_ = pred_trajectory[:] 41 | mask = [len(x)==1 and x[0] == 1 for x in pred_trajectory] 42 | for i in range(len(mask)): 43 | if mask[i]: 44 | for j in range(burnin): 45 | if i + j < len(mask): 46 | pred_trajectory_[i+j] = [0] 47 | min_len = min(len(pred_trajectory_), len(gt_trajectory)) 48 | overlaps = region.vot_overlap_traj(pred_trajectory_[:min_len], 49 | gt_trajectory[:min_len], bound) 50 | 51 | if not ignore_unknown: 52 | overlaps = [x if u else 0 for u in unkown] 53 | 54 | acc = 0 55 | if len(overlaps) > 0: 56 | acc = np.nanmean(overlaps) 57 | return acc, overlaps 58 | 59 | # def caculate_expected_overlap(pred_trajectorys, gt_trajectorys, skip_init, traj_length=None, 60 | # weights=None, tags=['all']): 61 | # """ Caculate expected overlap 62 | # Args: 63 | # pred_trajectory: list of bbox 64 | # gt_trajectory: list of bbox 65 | # traj_length: a list of sequence length for which the overlap should be evaluated 66 | # weights: a list of per-sequence weights that indicate how much does each sequence 67 | # contribute to the estimate 68 | # tags: set list of tags for which to perform calculation 69 | # """ 70 | # overlaps = [calculate_accuracy(pred, gt)[1] 71 | # for pred, gt in zip(pred_trajectorys, gt_trajectorys)] 72 | # failures = [calculate_accuracy(pred, gt)[1] 73 | # for pred, gt in zip(pred_trajectorys, gt_trajectorys)] 74 | # 75 | # if traj_length is None: 76 | # traj_length = range(1, max([len(x) for x in gt_trajectorys])+1) 77 | # traj_length = list(set(traj_length)) 78 | 79 | @jit(nopython=True) 80 | def overlap_ratio(rect1, rect2): 81 | '''Compute overlap ratio between two rects 82 | Args 83 | rect:2d array of N x [x,y,w,h] 84 | Return: 85 | iou 86 | ''' 87 | # if rect1.ndim==1: 88 | # rect1 = rect1[np.newaxis, :] 89 | # if rect2.ndim==1: 90 | # rect2 = rect2[np.newaxis, :] 91 | left = np.maximum(rect1[:,0], rect2[:,0]) 92 | right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2]) 93 | top = np.maximum(rect1[:,1], rect2[:,1]) 94 | bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3]) 95 | 96 | intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top) 97 | union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect 98 | iou = intersect / union 99 | iou = np.maximum(np.minimum(1, iou), 0) 100 | return iou 101 | 102 | @jit(nopython=True) 103 | def success_overlap(gt_bb, result_bb, n_frame): 104 | thresholds_overlap = np.arange(0, 1.05, 0.05) 105 | success = np.zeros(len(thresholds_overlap)) 106 | iou = np.ones(len(gt_bb)) * (-1) 107 | mask = np.sum(gt_bb > 0, axis=1) == 4 108 | iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask]) 109 | for i in range(len(thresholds_overlap)): 110 | success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame) 111 | return success 112 | 113 | @jit(nopython=True) 114 | def success_error(gt_center, result_center, thresholds, n_frame): 115 | # n_frame = len(gt_center) 116 | success = np.zeros(len(thresholds)) 117 | dist = np.ones(len(gt_center)) * (-1) 118 | mask = np.sum(gt_center > 0, axis=1) == 2 119 | dist[mask] = np.sqrt(np.sum( 120 | np.power(gt_center[mask] - result_center[mask], 2), axis=1)) 121 | for i in range(len(thresholds)): 122 | success[i] = np.sum(dist <= thresholds[i]) / float(n_frame) 123 | return success 124 | 125 | @jit(nopython=True) 126 | def determine_thresholds(scores, resolution=100): 127 | """ 128 | Args: 129 | scores: 1d array of score 130 | """ 131 | scores = np.sort(scores[np.logical_not(np.isnan(scores))]) 132 | delta = np.floor(len(scores) / (resolution - 2)) 133 | idxs = np.floor(np.linspace(delta-1, len(scores)-delta, resolution-2)+0.5).astype(np.int32) 134 | thresholds = np.zeros((resolution)) 135 | thresholds[0] = - np.inf 136 | thresholds[-1] = np.inf 137 | thresholds[1:-1] = scores[idxs] 138 | return thresholds 139 | 140 | @jit(nopython=True) 141 | def calculate_f1(overlaps, score, bound, thresholds, N): 142 | overlaps = np.array(overlaps) 143 | overlaps[np.isnan(overlaps)] = 0 144 | score = np.array(score) 145 | score[np.isnan(score)] = 0 146 | precision = np.zeros(len(thresholds)) 147 | recall = np.zeros(len(thresholds)) 148 | for i, th in enumerate(thresholds): 149 | if th == - np.inf: 150 | idx = score > 0 151 | else: 152 | idx = score >= th 153 | if np.sum(idx) == 0: 154 | precision[i] = 1 155 | recall[i] = 0 156 | else: 157 | precision[i] = np.mean(overlaps[idx]) 158 | recall[i] = np.sum(overlaps[idx]) / N 159 | f1 = 2 * precision * recall / (precision + recall) 160 | return f1, precision, recall 161 | 162 | @jit(nopython=True) 163 | def calculate_expected_overlap(fragments, fweights): 164 | max_len = fragments.shape[1] 165 | expected_overlaps = np.zeros((max_len), np.float32) 166 | expected_overlaps[0] = 1 167 | 168 | # TODO Speed Up 169 | for i in range(1, max_len): 170 | mask = np.logical_not(np.isnan(fragments[:, i])) 171 | if np.any(mask): 172 | fragment = fragments[mask, 1:i+1] 173 | seq_mean = np.sum(fragment, 1) / fragment.shape[1] 174 | expected_overlaps[i] = np.sum(seq_mean * 175 | fweights[mask]) / np.sum(fweights[mask]) 176 | return expected_overlaps 177 | -------------------------------------------------------------------------------- /toolkit/evaluation/eao_benchmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | 5 | from glob import glob 6 | 7 | from ..utils import calculate_failures, calculate_accuracy, calculate_expected_overlap 8 | 9 | class EAOBenchmark: 10 | """ 11 | Args: 12 | dataset: 13 | """ 14 | def __init__(self, dataset, skipping=5, tags=['all']): 15 | self.dataset = dataset 16 | self.skipping = skipping 17 | self.tags = tags 18 | # NOTE we not use gmm to generate low, high, peak value 19 | if dataset.name == 'VOT2019': 20 | self.low = 46 21 | self.high = 291 22 | self.peak = 128 23 | elif dataset.name == 'VOT2018' or dataset.name == 'VOT2017': 24 | self.low = 100 25 | self.high = 356 26 | self.peak = 160 27 | elif dataset.name == 'VOT2016': 28 | self.low = 108 29 | self.high = 371 30 | self.peak = 168 31 | 32 | def eval(self, eval_trackers=None): 33 | """ 34 | Args: 35 | eval_tags: list of tag 36 | eval_trackers: list of tracker name 37 | Returns: 38 | eao: dict of results 39 | """ 40 | if eval_trackers is None: 41 | eval_trackers = self.dataset.tracker_names 42 | if isinstance(eval_trackers, str): 43 | eval_trackers = [eval_trackers] 44 | 45 | ret = {} 46 | for tracker_name in eval_trackers: 47 | eao = self._calculate_eao(tracker_name, self.tags) 48 | ret[tracker_name] = eao 49 | return ret 50 | 51 | def show_result(self, result, topk=10): 52 | """pretty print result 53 | Args: 54 | result: returned dict from function eval 55 | """ 56 | if len(self.tags) == 1: 57 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 58 | header = ("|{:^"+str(tracker_name_len)+"}|{:^10}|").format('Tracker Name', 'EAO') 59 | bar = '-'*len(header) 60 | formatter = "|{:^20}|{:^10.3f}|" 61 | print(bar) 62 | print(header) 63 | print(bar) 64 | tracker_eao = sorted(result.items(), 65 | key=lambda x: x[1]['all'], 66 | reverse=True)[:topk] 67 | for tracker_name, eao in tracker_eao: 68 | # for tracker_name, ret in result.items(): 69 | print(formatter.format(tracker_name, eao)) 70 | print(bar) 71 | else: 72 | header = "|{:^20}|".format('Tracker Name') 73 | header += "{:^7}|{:^15}|{:^14}|{:^15}|{:^13}|{:^11}|{:^7}|".format(*self.tags) 74 | bar = '-'*len(header) 75 | formatter = "{:^7.3f}|{:^15.3f}|{:^14.3f}|{:^15.3f}|{:^13.3f}|{:^11.3f}|{:^7.3f}|" 76 | print(bar) 77 | print(header) 78 | print(bar) 79 | sorted_tacker = sorted(result.items(), 80 | key=lambda x: x[1]['all'], 81 | reverse=True)[:topk] 82 | sorted_tacker = [x[0] for x in sorted_tacker] 83 | for tracker_name in sorted_tacker: 84 | # for tracker_name, ret in result.items(): 85 | print("|{:^20}|".format(tracker_name)+formatter.format( 86 | *[result[tracker_name][x] for x in self.tags])) 87 | print(bar) 88 | 89 | def _calculate_eao(self, tracker_name, tags): 90 | all_overlaps = [] 91 | all_failures = [] 92 | video_names = [] 93 | gt_traj_length = [] 94 | # for i in range(len(self.dataset)): 95 | for video in self.dataset: 96 | # video = self.dataset[i] 97 | gt_traj = video.gt_traj 98 | if tracker_name not in video.pred_trajs: 99 | tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 100 | else: 101 | tracker_trajs = video.pred_trajs[tracker_name] 102 | for tracker_traj in tracker_trajs: 103 | gt_traj_length.append(len(gt_traj)) 104 | video_names.append(video.name) 105 | overlaps = calculate_accuracy(tracker_traj, gt_traj, bound=(video.width-1, video.height-1))[1] 106 | failures = calculate_failures(tracker_traj)[1] 107 | all_overlaps.append(overlaps) 108 | all_failures.append(failures) 109 | fragment_num = sum([len(x)+1 for x in all_failures]) 110 | max_len = max([len(x) for x in all_overlaps]) 111 | if len(tracker_trajs) == 0: 112 | print('Warning: some seqs in {}.{} not found'.format(tracker_name, tags)) 113 | seq_weight = 1 / (len(tracker_trajs) + 1e-10) # division by zero 114 | 115 | eao = {} 116 | for tag in tags: 117 | # prepare segments 118 | fweights = np.ones((fragment_num)) * np.nan 119 | fragments = np.ones((fragment_num, max_len)) * np.nan 120 | seg_counter = 0 121 | for name, traj_len, failures, overlaps in zip(video_names, gt_traj_length, 122 | all_failures, all_overlaps): 123 | if len(failures) > 0: 124 | points = [x+self.skipping for x in failures if 125 | x+self.skipping <= len(overlaps)] 126 | points.insert(0, 0) 127 | for i in range(len(points)): 128 | if i != len(points) - 1: 129 | fragment = np.array(overlaps[points[i]:points[i+1]+1]) 130 | fragments[seg_counter, :] = 0 131 | else: 132 | fragment = np.array(overlaps[points[i]:]) 133 | fragment[np.isnan(fragment)] = 0 134 | fragments[seg_counter, :len(fragment)] = fragment 135 | if i != len(points) - 1: 136 | # tag_value = self.dataset[name].tags[tag][points[i]:points[i+1]+1] 137 | tag_value = self.dataset[name].select_tag(tag, points[i], points[i+1]+1) 138 | w = sum(tag_value) / (points[i+1] - points[i]+1) 139 | fweights[seg_counter] = seq_weight * w 140 | else: 141 | # tag_value = self.dataset[name].tags[tag][points[i]:len(overlaps)] 142 | tag_value = self.dataset[name].select_tag(tag, points[i], len(overlaps)) 143 | w = sum(tag_value) / (traj_len - points[i]+1e-16) 144 | fweights[seg_counter] = seq_weight * w# (len(fragment) / (traj_len-points[i])) 145 | seg_counter += 1 146 | else: 147 | # no failure 148 | max_idx = min(len(overlaps), max_len) 149 | fragments[seg_counter, :max_idx] = overlaps[:max_idx] 150 | # tag_value = self.dataset[name].tags[tag][:max_idx] 151 | tag_value = self.dataset[name].select_tag(tag, 0, max_idx) 152 | w = sum(tag_value) / max_idx 153 | fweights[seg_counter] = seq_weight * w 154 | seg_counter += 1 155 | 156 | expected_overlaps = calculate_expected_overlap(fragments, fweights) 157 | # caculate eao 158 | weight = np.zeros((len(expected_overlaps))) 159 | weight[self.low-1:self.high-1+1] = 1 160 | is_valid = np.logical_not(np.isnan(expected_overlaps)) 161 | eao_ = np.sum(expected_overlaps[is_valid] * weight[is_valid]) / np.sum(weight[is_valid]) 162 | eao[tag] = eao_ 163 | return eao 164 | -------------------------------------------------------------------------------- /tools/eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import argparse 8 | 9 | from glob import glob 10 | from tqdm import tqdm 11 | from multiprocessing import Pool 12 | from toolkit.datasets import OTBDataset, UAVDataset, LaSOTDataset, \ 13 | VOTDataset, NFSDataset, VOTLTDataset, GOT10kDataset 14 | from toolkit.evaluation import OPEBenchmark, AccuracyRobustnessBenchmark, \ 15 | EAOBenchmark, F1Benchmark 16 | 17 | parser = argparse.ArgumentParser(description='tracking evaluation') 18 | parser.add_argument('--tracker_path', '-p', type=str, 19 | help='tracker result path') 20 | parser.add_argument('--dataset', '-d', type=str, 21 | help='dataset name') 22 | parser.add_argument('--num', '-n', default=1, type=int, 23 | help='number of thread to eval') 24 | parser.add_argument('--tracker_prefix', '-t', default='', 25 | type=str, help='tracker name') 26 | parser.add_argument('--show_video_level', '-s', dest='show_video_level', 27 | action='store_true') 28 | parser.set_defaults(show_video_level=False) 29 | args = parser.parse_args() 30 | 31 | 32 | def main(): 33 | tracker_dir = os.path.join(args.tracker_path, args.dataset) 34 | trackers = glob(os.path.join(args.tracker_path, 35 | args.dataset, 36 | args.tracker_prefix+'*')) 37 | trackers = [x.split('/')[-1] for x in trackers] 38 | 39 | assert len(trackers) > 0 40 | args.num = min(args.num, len(trackers)) 41 | 42 | root = os.path.realpath(os.path.join(os.path.dirname(__file__), 43 | '../dataset')) 44 | root = os.path.join(root, args.dataset) 45 | if 'OTB' in args.dataset: 46 | dataset = OTBDataset(args.dataset, root) 47 | dataset.set_tracker(tracker_dir, trackers) 48 | trackers = dataset.tracker_names 49 | benchmark = OPEBenchmark(dataset) 50 | success_ret = {} 51 | with Pool(processes=args.num) as pool: 52 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success, 53 | trackers), desc='eval success', total=len(trackers), ncols=100): 54 | success_ret.update(ret) 55 | precision_ret = {} 56 | with Pool(processes=args.num) as pool: 57 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision, 58 | trackers), desc='eval precision', total=len(trackers), ncols=100): 59 | precision_ret.update(ret) 60 | benchmark.show_result(success_ret, precision_ret, 61 | show_video_level=args.show_video_level) 62 | elif 'LaSOT' == args.dataset: 63 | dataset = LaSOTDataset(args.dataset, root) 64 | dataset.set_tracker(tracker_dir, trackers) 65 | trackers = dataset.tracker_names 66 | benchmark = OPEBenchmark(dataset) 67 | success_ret = {} 68 | with Pool(processes=args.num) as pool: 69 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success, 70 | trackers), desc='eval success', total=len(trackers), ncols=100): 71 | success_ret.update(ret) 72 | precision_ret = {} 73 | with Pool(processes=args.num) as pool: 74 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision, 75 | trackers), desc='eval precision', total=len(trackers), ncols=100): 76 | precision_ret.update(ret) 77 | norm_precision_ret = {} 78 | with Pool(processes=args.num) as pool: 79 | for ret in tqdm(pool.imap_unordered(benchmark.eval_norm_precision, 80 | trackers), desc='eval norm precision', total=len(trackers), ncols=100): 81 | norm_precision_ret.update(ret) 82 | benchmark.show_result(success_ret, precision_ret, norm_precision_ret, 83 | show_video_level=args.show_video_level) 84 | elif 'UAV' in args.dataset: 85 | dataset = UAVDataset(args.dataset, root) 86 | dataset.set_tracker(tracker_dir, trackers) 87 | trackers = dataset.tracker_names 88 | benchmark = OPEBenchmark(dataset) 89 | success_ret = {} 90 | with Pool(processes=args.num) as pool: 91 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success, 92 | trackers), desc='eval success', total=len(trackers), ncols=100): 93 | success_ret.update(ret) 94 | precision_ret = {} 95 | with Pool(processes=args.num) as pool: 96 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision, 97 | trackers), desc='eval precision', total=len(trackers), ncols=100): 98 | precision_ret.update(ret) 99 | benchmark.show_result(success_ret, precision_ret, 100 | show_video_level=args.show_video_level) 101 | elif 'got10k' in args.dataset: 102 | dataset = GOT10kDataset(args.dataset, root) 103 | dataset.set_tracker(tracker_dir, trackers) 104 | trackers = dataset.tracker_names 105 | benchmark = OPEBenchmark(dataset) 106 | success_ret = {} 107 | with Pool(processes=args.num) as pool: 108 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success, 109 | trackers), desc='eval success', total=len(trackers), ncols=100): 110 | success_ret.update(ret) 111 | precision_ret = {} 112 | with Pool(processes=args.num) as pool: 113 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision, 114 | trackers), desc='eval precision', total=len(trackers), ncols=100): 115 | precision_ret.update(ret) 116 | benchmark.show_result(success_ret, precision_ret, 117 | show_video_level=args.show_video_level) 118 | elif 'NFS' in args.dataset: 119 | dataset = NFSDataset(args.dataset, root) 120 | dataset.set_tracker(tracker_dir, trackers) 121 | trackers = dataset.tracker_names 122 | benchmark = OPEBenchmark(dataset) 123 | success_ret = {} 124 | with Pool(processes=args.num) as pool: 125 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success, 126 | trackers), desc='eval success', total=len(trackers), ncols=100): 127 | success_ret.update(ret) 128 | precision_ret = {} 129 | with Pool(processes=args.num) as pool: 130 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision, 131 | trackers), desc='eval precision', total=len(trackers), ncols=100): 132 | precision_ret.update(ret) 133 | benchmark.show_result(success_ret, precision_ret, 134 | show_video_level=args.show_video_level) 135 | elif args.dataset in ['VOT2016', 'VOT2017', 'VOT2018', 'VOT2019']: 136 | dataset = VOTDataset(args.dataset, root) 137 | dataset.set_tracker(tracker_dir, trackers) 138 | trackers = dataset.tracker_names 139 | ar_benchmark = AccuracyRobustnessBenchmark(dataset) 140 | ar_result = {} 141 | with Pool(processes=args.num) as pool: 142 | for ret in tqdm(pool.imap_unordered(ar_benchmark.eval, 143 | trackers), desc='eval ar', total=len(trackers), ncols=100): 144 | ar_result.update(ret) 145 | 146 | benchmark = EAOBenchmark(dataset) 147 | eao_result = {} 148 | with Pool(processes=args.num) as pool: 149 | for ret in tqdm(pool.imap_unordered(benchmark.eval, 150 | trackers), desc='eval eao', total=len(trackers), ncols=100): 151 | eao_result.update(ret) 152 | ar_benchmark.show_result(ar_result, eao_result, 153 | show_video_level=args.show_video_level) 154 | elif 'VOT2018-LT' == args.dataset: 155 | dataset = VOTLTDataset(args.dataset, root) 156 | dataset.set_tracker(tracker_dir, trackers) 157 | trackers = dataset.tracker_names 158 | benchmark = F1Benchmark(dataset) 159 | f1_result = {} 160 | with Pool(processes=args.num) as pool: 161 | for ret in tqdm(pool.imap_unordered(benchmark.eval, 162 | trackers), desc='eval f1', total=len(trackers), ncols=100): 163 | f1_result.update(ret) 164 | benchmark.show_result(f1_result, 165 | show_video_level=args.show_video_level) 166 | 167 | 168 | if __name__ == '__main__': 169 | main() 170 | -------------------------------------------------------------------------------- /toolkit/utils/region.pyx: -------------------------------------------------------------------------------- 1 | """ 2 | @author fangyi.zhang@vipl.ict.ac.cn 3 | """ 4 | # distutils: sources = src/region.c 5 | # distutils: include_dirs = src/ 6 | 7 | from libc.stdlib cimport malloc, free 8 | from libc.stdio cimport sprintf 9 | from libc.string cimport strlen 10 | 11 | cimport c_region 12 | 13 | cpdef enum RegionType: 14 | EMTPY 15 | SPECIAL 16 | RECTANGEL 17 | POLYGON 18 | MASK 19 | 20 | cdef class RegionBounds: 21 | cdef c_region.region_bounds* _c_region_bounds 22 | 23 | def __cinit__(self): 24 | self._c_region_bounds = malloc( 25 | sizeof(c_region.region_bounds)) 26 | if not self._c_region_bounds: 27 | self._c_region_bounds = NULL 28 | raise MemoryError() 29 | 30 | def __init__(self, top, bottom, left, right): 31 | self.set(top, bottom, left, right) 32 | 33 | def __dealloc__(self): 34 | if self._c_region_bounds is not NULL: 35 | free(self._c_region_bounds) 36 | self._c_region_bounds = NULL 37 | 38 | def __str__(self): 39 | return "top: {:.3f} bottom: {:.3f} left: {:.3f} reight: {:.3f}".format( 40 | self._c_region_bounds.top, 41 | self._c_region_bounds.bottom, 42 | self._c_region_bounds.left, 43 | self._c_region_bounds.right) 44 | 45 | def get(self): 46 | return (self._c_region_bounds.top, 47 | self._c_region_bounds.bottom, 48 | self._c_region_bounds.left, 49 | self._c_region_bounds.right) 50 | 51 | def set(self, top, bottom, left, right): 52 | self._c_region_bounds.top = top 53 | self._c_region_bounds.bottom = bottom 54 | self._c_region_bounds.left = left 55 | self._c_region_bounds.right = right 56 | 57 | cdef class Rectangle: 58 | cdef c_region.region_rectangle* _c_region_rectangle 59 | 60 | def __cinit__(self): 61 | self._c_region_rectangle = malloc( 62 | sizeof(c_region.region_rectangle)) 63 | if not self._c_region_rectangle: 64 | self._c_region_rectangle = NULL 65 | raise MemoryError() 66 | 67 | def __init__(self, x, y, width, height): 68 | self.set(x, y, width, height) 69 | 70 | def __dealloc__(self): 71 | if self._c_region_rectangle is not NULL: 72 | free(self._c_region_rectangle) 73 | self._c_region_rectangle = NULL 74 | 75 | def __str__(self): 76 | return "x: {:.3f} y: {:.3f} width: {:.3f} height: {:.3f}".format( 77 | self._c_region_rectangle.x, 78 | self._c_region_rectangle.y, 79 | self._c_region_rectangle.width, 80 | self._c_region_rectangle.height) 81 | 82 | def set(self, x, y, width, height): 83 | self._c_region_rectangle.x = x 84 | self._c_region_rectangle.y = y 85 | self._c_region_rectangle.width = width 86 | self._c_region_rectangle.height = height 87 | 88 | def get(self): 89 | """ 90 | return: 91 | (x, y, width, height) 92 | """ 93 | return (self._c_region_rectangle.x, 94 | self._c_region_rectangle.y, 95 | self._c_region_rectangle.width, 96 | self._c_region_rectangle.height) 97 | 98 | cdef class Polygon: 99 | cdef c_region.region_polygon* _c_region_polygon 100 | 101 | def __cinit__(self, points): 102 | """ 103 | args: 104 | points: tuple of point 105 | points = ((1, 1), (10, 10)) 106 | """ 107 | num = len(points) // 2 108 | self._c_region_polygon = malloc( 109 | sizeof(c_region.region_polygon)) 110 | if not self._c_region_polygon: 111 | self._c_region_polygon = NULL 112 | raise MemoryError() 113 | self._c_region_polygon.count = num 114 | self._c_region_polygon.x = malloc(sizeof(float) * num) 115 | if not self._c_region_polygon.x: 116 | raise MemoryError() 117 | self._c_region_polygon.y = malloc(sizeof(float) * num) 118 | if not self._c_region_polygon.y: 119 | raise MemoryError() 120 | 121 | for i in range(num): 122 | self._c_region_polygon.x[i] = points[i*2] 123 | self._c_region_polygon.y[i] = points[i*2+1] 124 | 125 | def __dealloc__(self): 126 | if self._c_region_polygon is not NULL: 127 | if self._c_region_polygon.x is not NULL: 128 | free(self._c_region_polygon.x) 129 | self._c_region_polygon.x = NULL 130 | if self._c_region_polygon.y is not NULL: 131 | free(self._c_region_polygon.y) 132 | self._c_region_polygon.y = NULL 133 | free(self._c_region_polygon) 134 | self._c_region_polygon = NULL 135 | 136 | def __str__(self): 137 | ret = "" 138 | for i in range(self._c_region_polygon.count-1): 139 | ret += "({:.3f} {:.3f}) ".format(self._c_region_polygon.x[i], 140 | self._c_region_polygon.y[i]) 141 | ret += "({:.3f} {:.3f})".format(self._c_region_polygon.x[i], 142 | self._c_region_polygon.y[i]) 143 | return ret 144 | 145 | def vot_overlap(polygon1, polygon2, bounds=None): 146 | """ computing overlap between two polygon 147 | Args: 148 | polygon1: polygon tuple of points 149 | polygon2: polygon tuple of points 150 | bounds: tuple of (left, top, right, bottom) or tuple of (width height) 151 | Return: 152 | overlap: overlap between two polygons 153 | """ 154 | if len(polygon1) == 1 or len(polygon2) == 1: 155 | return float("nan") 156 | 157 | if len(polygon1) == 4: 158 | polygon1_ = Polygon([polygon1[0], polygon1[1], 159 | polygon1[0]+polygon1[2], polygon1[1], 160 | polygon1[0]+polygon1[2], polygon1[1]+polygon1[3], 161 | polygon1[0], polygon1[1]+polygon1[3]]) 162 | else: 163 | polygon1_ = Polygon(polygon1) 164 | 165 | if len(polygon2) == 4: 166 | polygon2_ = Polygon([polygon2[0], polygon2[1], 167 | polygon2[0]+polygon2[2], polygon2[1], 168 | polygon2[0]+polygon2[2], polygon2[1]+polygon2[3], 169 | polygon2[0], polygon2[1]+polygon2[3]]) 170 | else: 171 | polygon2_ = Polygon(polygon2) 172 | 173 | if bounds is not None and len(bounds) == 4: 174 | pno_bounds = RegionBounds(bounds[0], bounds[1], bounds[2], bounds[3]) 175 | elif bounds is not None and len(bounds) == 2: 176 | pno_bounds = RegionBounds(0, bounds[1], 0, bounds[0]) 177 | else: 178 | pno_bounds = RegionBounds(-float("inf"), float("inf"), 179 | -float("inf"), float("inf")) 180 | cdef float only1 = 0 181 | cdef float only2 = 0 182 | cdef c_region.region_polygon* c_polygon1 = polygon1_._c_region_polygon 183 | cdef c_region.region_polygon* c_polygon2 = polygon2_._c_region_polygon 184 | cdef c_region.region_bounds no_bounds = pno_bounds._c_region_bounds[0] # deference 185 | return c_region.compute_polygon_overlap(c_polygon1, 186 | c_polygon2, 187 | &only1, 188 | &only2, 189 | no_bounds) 190 | 191 | def vot_overlap_traj(polygons1, polygons2, bounds=None): 192 | """ computing overlap between two trajectory 193 | Args: 194 | polygons1: list of polygon 195 | polygons2: list of polygon 196 | bounds: tuple of (left, top, right, bottom) or tuple of (width height) 197 | Return: 198 | overlaps: overlaps between all pair of polygons 199 | """ 200 | assert len(polygons1) == len(polygons2) 201 | overlaps = [] 202 | for i in range(len(polygons1)): 203 | overlap = vot_overlap(polygons1[i], polygons2[i], bounds=bounds) 204 | overlaps.append(overlap) 205 | return overlaps 206 | 207 | 208 | def vot_float2str(template, float value): 209 | """ 210 | Args: 211 | tempate: like "%.3f" in C syntax 212 | value: float value 213 | """ 214 | cdef bytes ptemplate = template.encode() 215 | cdef const char* ctemplate = ptemplate 216 | cdef char* output = malloc(sizeof(char) * 100) 217 | if not output: 218 | raise MemoryError() 219 | sprintf(output, ctemplate, value) 220 | try: 221 | ret = output[:strlen(output)].decode() 222 | finally: 223 | free(output) 224 | return ret 225 | --------------------------------------------------------------------------------