├── INSTALL.md ├── LICENSE ├── README.md ├── VOT-ST2020+Winners+Presentation.pdf ├── experiments └── siamreppoints │ ├── config_vot2018_offline.yaml │ └── config_vot2019_offline.yaml ├── install.sh ├── requirements.txt ├── setup.py ├── siamreppoints ├── __init__.py ├── core │ ├── __init__.py │ ├── config.py │ └── xcorr.py ├── models │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── mobile_v2.py │ │ └── resnet_atrous.py │ ├── csrc │ │ ├── ROIAlign.h │ │ ├── ROIPool.h │ │ ├── SigmoidFocalLoss.h │ │ ├── cpu │ │ │ ├── ROIAlign_cpu.cpp │ │ │ ├── nms_cpu.cpp │ │ │ └── vision.h │ │ ├── cuda │ │ │ ├── ROIAlign_cuda.cu │ │ │ ├── ROIPool_cuda.cu │ │ │ ├── SigmoidFocalLoss_cuda.cu │ │ │ ├── deform_conv_cuda.cu │ │ │ ├── deform_conv_kernel_cuda.cu │ │ │ ├── deform_pool_cuda.cu │ │ │ ├── deform_pool_kernel_cuda.cu │ │ │ ├── ml_nms.cu │ │ │ ├── nms.cu │ │ │ └── vision.h │ │ ├── deform_conv.h │ │ ├── deform_pool.h │ │ ├── ml_nms.h │ │ ├── nms.h │ │ └── vision.cpp │ ├── head │ │ ├── __init__.py │ │ └── rpn.py │ ├── layers │ │ ├── __init__.py │ │ ├── _utils.py │ │ ├── batch_norm.py │ │ ├── dcn │ │ │ ├── __init__.py │ │ │ ├── deform_conv_func.py │ │ │ ├── deform_conv_module.py │ │ │ ├── deform_pool_func.py │ │ │ └── deform_pool_module.py │ │ ├── iou_loss.py │ │ ├── misc.py │ │ ├── nms.py │ │ ├── roi_align.py │ │ ├── roi_pool.py │ │ ├── scale.py │ │ ├── sigmoid_focal_loss.py │ │ └── smooth_l1_loss.py │ ├── model_builder.py │ └── neck │ │ ├── __init__.py │ │ └── neck.py ├── setup.py ├── tracker │ ├── __init__.py │ ├── base_tracker.py │ ├── siamreppoints_tracker.py │ └── tracker_builder.py └── utils │ ├── __init__.py │ ├── anchor.py │ ├── average_meter.py │ ├── bbox.py │ ├── distributed.py │ ├── log_helper.py │ ├── lr_scheduler.py │ ├── misc.py │ └── model_load.py ├── testing_dataset └── README.md ├── toolkit ├── __init__.py ├── datasets │ ├── __init__.py │ ├── dataset.py │ ├── got10k.py │ ├── lasot.py │ ├── nfs.py │ ├── otb.py │ ├── trackingnet.py │ ├── uav.py │ ├── video.py │ └── vot.py ├── evaluation │ ├── __init__.py │ ├── ar_benchmark.py │ ├── eao_benchmark.py │ ├── f1_benchmark.py │ └── ope_benchmark.py ├── utils │ ├── __init__.py │ ├── c_region.pxd │ ├── misc.py │ ├── region.c │ ├── region.pyx │ ├── src │ │ ├── buffer.h │ │ ├── region.c │ │ └── region.h │ └── statistics.py └── visualization │ ├── __init__.py │ ├── draw_eao.py │ ├── draw_f1.py │ ├── draw_success_precision.py │ └── draw_utils.py └── tools ├── eval.py └── test.py /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | This document contains detailed instructions for installing dependencies for RPT. We recommand using the [install.sh](install.sh). The code is tested on an Ubuntu 16.04 system with Nvidia GPU (We recommand 1080TI / TITAN XP). 4 | 5 | ### Requirments 6 | * Conda with Python 3.6. 7 | * Nvidia GPU. 8 | * PyTorch 1.1.0 9 | * yacs 10 | * pyyaml 11 | * matplotlib 12 | * tqdm 13 | * OpenCV 14 | 15 | ## Step-by-step instructions 16 | 17 | #### Create environment and activate 18 | ```bash 19 | conda create --name siamreppoints python=3.6 20 | conda activate siamreppoints 21 | ``` 22 | 23 | #### Install numpy/pytorch/opencv 24 | ``` 25 | conda install numpy 26 | conda install pytorch=1.1.0 torchvision cuda90 -c pytorch 27 | pip install opencv-python 28 | ``` 29 | 30 | #### Install other requirements 31 | ``` 32 | pip install pyyaml yacs tqdm colorama matplotlib cython tensorboardX 33 | ``` 34 | 35 | #### Build extensions 36 | ``` 37 | python setup.py build_ext --inplace 38 | ``` 39 | 40 | ### Build extensions of DCN 41 | ``` 42 | python ./siamreppoints/setup.py build_ext --inplace 43 | ``` 44 | 45 | ## Try with scripts 46 | ``` 47 | bash install.sh /path/to/your/conda siamreppoints 48 | ``` 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Lucas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RPT: Learning Point Set Representation for Siamese Visual Tracking[[ECCVW2020](https://arxiv.org/abs/2008.03467)] 2 | 3 | 4 | ## :sunny: Currently, this code only supports the offline version of RPT. 5 | 6 | 7 | ## News 8 | - :trophy: **We are the Winner of VOT-2020 Short-Term challenge** 9 | - :trophy: **We get both 1st on the public and sequestered benchmark dataset of the VOT2020 Short-Term challenge** 10 | - :sunny::sunny:**Our [VOT2020-ST Winner presentation](https://github.com/zhanght021/RPT/blob/master/VOT-ST2020%2BWinners%2BPresentation.pdf) has been uploaded** 11 | 12 | 13 | ---- 14 | ## Spotlight video 15 | 16 | [![Video Label](https://i0.hdslb.com/bfs/album/1ea9e961083d81f7fed53d22ed8698a1ac2307f9.jpg@518w_1e_1c.jpg)](https://www.bilibili.com/video/BV17v41117cZ) 17 | 18 | 19 | --- 20 | ## Models 21 | | Dataset | pattern | A | R | EAO | Config. Filename | 22 | |:---:|:---:|:---:|:---:|:---:|:---:| 23 | | VOT2018 | offline | 0.610 | 0.150 | 0.497 | config_vot2018_offline.yaml | 24 | | VOT2019 | offline | 0.598 | 0.261 | 0.409 | config_vot2019_offline.yaml | 25 | | VOT2018 | online | 0.629 | 0.103 | 0.510 | :smile:coming soon:smile: | 26 | | VOT2019 | online | 0.623 | 0.186 | 0.417 | :smile:coming soon:smile: | 27 | 28 | - The pretrained model can be downloaded from [[google](https://drive.google.com/file/d/1b9aynlUa4h1ju9Tir3xd6tT6dPzu40rN/view?usp=sharing)] or [[baidu](https://pan.baidu.com/s/18EXDr4DoeD89Vasuf8WCXQ)], extraction code: g4ac. 29 | - The raw results can be downloaded [[here](https://pan.baidu.com/s/1fAovMOR8UAN46f5Dm-sa6A)], extraction code: mkbh. 30 | 31 | ---- 32 | ## Abstract 33 | While remarkable progress has been made in robust visual tracking, accurate target state estimation still remains a highly challenging problem. In this paper, we argue that this issue is closely related to the prevalent bounding box representation, which provides only a coarse spatial extent of object. Thus an effcient visual tracking framework is proposed to accurately estimate the target state with a finer representation as a set of representative points. The point set is trained to indicate the semantically and geometrically significant positions of target region, enabling more fine-grained localization and modeling of object appearance. We further propose a multi-level aggregation strategy to obtain detailed structure information by fusing hierarchical convolution layers. Extensive experiments on several challenging benchmarks including OTB2015, VOT2018, VOT2019 and GOT-10k demonstrate that our method achieves new state-of-the-art performance while running at over 20 FPS. 34 | 35 | --- 36 | ## Installation 37 | Please find installation instructions in INSTALL.md 38 | 39 | --- 40 | ## Quick Start: Using siamreppoints 41 | 42 | Download pretrained models and put the siamreppoints.model in the correct directory in experiments 43 | 44 | ```bash 45 | cd siamreppoints/tools 46 | python test.py \ 47 | --snapshot ./snapshot/siamreppoints.model \ #model path 48 | --dataset VOT2018 \ #dataset name 49 | --config ./experiments/siamreppoints/config_vot2018_offline.yaml #config file 50 | ``` 51 | 52 | 53 | ```bash 54 | cd siamreppoints/tools 55 | python eval.py \ 56 | --tracker_path ./results \ #result path 57 | --dataset VOT2018 \ #dataset name 58 | --tracker_prefix 'siam' \ # tracker_name 59 | --num 1 # number thread to eval 60 | ``` 61 | 62 | --- 63 | ## Ackowledgement 64 | - [pysot](https://github.com/STVIR/pysot) 65 | -------------------------------------------------------------------------------- /VOT-ST2020+Winners+Presentation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/VOT-ST2020+Winners+Presentation.pdf -------------------------------------------------------------------------------- /experiments/siamreppoints/config_vot2018_offline.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [2, 3, 4] 7 | PRETRAINED: 'pretrained_models/resnet50.model' 8 | TRAIN_LAYERS: ['layer1', 'layer2', 'layer3', 'layer4'] 9 | TRAIN_EPOCH: 10 10 | LAYERS_LR: 0.1 11 | 12 | ADJUST: 13 | ADJUST: true 14 | TYPE: "AdjustAllLayer" 15 | KWARGS: 16 | in_channels: [512, 1024, 2048] 17 | out_channels: [256, 256, 256] 18 | 19 | RPN: 20 | TYPE: 'MultiRPN' 21 | KWARGS: 22 | anchor_num: 5 23 | in_channels: [256, 256, 256] 24 | weighted: true 25 | 26 | MASK: 27 | MASK: false 28 | 29 | ANCHOR: 30 | STRIDE: 8 31 | RATIOS: [0.33, 0.5, 1, 2, 3] 32 | SCALES: [8] 33 | ANCHOR_NUM: 5 34 | 35 | TRACK: 36 | TYPE: 'SiamReppointsTracker' 37 | PENALTY_K: 0.09384699789214077 38 | WINDOW_INFLUENCE: 0.2870488747571366 39 | LR: 0.546949224973851 40 | EXEMPLAR_SIZE: 127 41 | INSTANCE_SIZE: 255 42 | BASE_SIZE: 8 43 | CONTEXT_AMOUNT: 0.5 44 | EXPANSION: 1.00 45 | 46 | TRAIN: 47 | EPOCH: 20 48 | START_EPOCH: 0 49 | BATCH_SIZE: 20 50 | BASE_LR: 0.005 51 | CLS_WEIGHT: 1.0 52 | LOC_WEIGHT: 1.2 53 | RESUME: '' 54 | 55 | LR: 56 | TYPE: 'log' 57 | KWARGS: 58 | start_lr: 0.005 59 | end_lr: 0.0005 60 | LR_WARMUP: 61 | TYPE: 'step' 62 | EPOCH: 5 63 | KWARGS: 64 | start_lr: 0.001 65 | end_lr: 0.005 66 | step: 1 67 | 68 | DATASET: 69 | NAMES: 70 | - 'YOUTUBEBB' 71 | - 'VID' 72 | - 'COCO' 73 | 74 | TEMPLATE: 75 | SHIFT: 4 76 | SCALE: 0.05 77 | BLUR: 0.0 78 | FLIP: 0.0 79 | COLOR: 0.5 80 | 81 | SEARCH: 82 | SHIFT: 64 83 | SCALE: 0.25 84 | BLUR: 0.2 85 | FLIP: 0.0 86 | COLOR: 0.5 87 | 88 | NEG: 0.2 89 | GRAY: 0.0 90 | -------------------------------------------------------------------------------- /experiments/siamreppoints/config_vot2019_offline.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [2, 3, 4] 7 | PRETRAINED: 'pretrained_models/resnet50.model' 8 | TRAIN_LAYERS: ['layer1', 'layer2', 'layer3', 'layer4'] 9 | TRAIN_EPOCH: 10 10 | LAYERS_LR: 0.1 11 | 12 | ADJUST: 13 | ADJUST: true 14 | TYPE: "AdjustAllLayer" 15 | KWARGS: 16 | in_channels: [512, 1024, 2048] 17 | out_channels: [256, 256, 256] 18 | 19 | RPN: 20 | TYPE: 'MultiRPN' 21 | KWARGS: 22 | anchor_num: 5 23 | in_channels: [256, 256, 256] 24 | weighted: true 25 | 26 | MASK: 27 | MASK: false 28 | 29 | ANCHOR: 30 | STRIDE: 8 31 | RATIOS: [0.33, 0.5, 1, 2, 3] 32 | SCALES: [8] 33 | ANCHOR_NUM: 5 34 | 35 | TRACK: 36 | TYPE: 'SiamReppointsTracker' 37 | PENALTY_K: 0.17380070743842363 38 | WINDOW_INFLUENCE: 0.43745716974756743 39 | LR: 0.4488453012609389 40 | EXEMPLAR_SIZE: 127 41 | INSTANCE_SIZE: 351 42 | BASE_SIZE: 8 43 | CONTEXT_AMOUNT: 0.5 44 | EXPANSION: 1.02 45 | 46 | TRAIN: 47 | EPOCH: 20 48 | START_EPOCH: 0 49 | BATCH_SIZE: 20 50 | BASE_LR: 0.005 51 | CLS_WEIGHT: 1.0 52 | LOC_WEIGHT: 1.2 53 | RESUME: '' 54 | 55 | LR: 56 | TYPE: 'log' 57 | KWARGS: 58 | start_lr: 0.005 59 | end_lr: 0.0005 60 | LR_WARMUP: 61 | TYPE: 'step' 62 | EPOCH: 5 63 | KWARGS: 64 | start_lr: 0.001 65 | end_lr: 0.005 66 | step: 1 67 | 68 | DATASET: 69 | NAMES: 70 | - 'YOUTUBEBB' 71 | - 'VID' 72 | - 'COCO' 73 | 74 | TEMPLATE: 75 | SHIFT: 4 76 | SCALE: 0.05 77 | BLUR: 0.0 78 | FLIP: 0.0 79 | COLOR: 0.5 80 | 81 | SEARCH: 82 | SHIFT: 64 83 | SCALE: 0.25 84 | BLUR: 0.2 85 | FLIP: 0.0 86 | COLOR: 0.5 87 | 88 | NEG: 0.2 89 | GRAY: 0.0 90 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -lt 2 ]; then 4 | echo "ARGS ERROR!" 5 | echo " bash install.sh /path/to/your/conda env_name" 6 | exit 1 7 | fi 8 | 9 | set -e 10 | 11 | conda_path=$1 12 | env_name=$2 13 | 14 | source $conda_path/etc/profile.d/conda.sh 15 | 16 | echo "****** create environment " $env_name "*****" 17 | # create environment 18 | conda create -y --name $env_name python=3.6 19 | conda activate $env_name 20 | 21 | echo "***** install numpy pytorch opencv *****" 22 | # numpy 23 | conda install -y numpy 24 | # pytorch 25 | # pytorch with cuda80/cuda90 is tested 26 | conda install -y pytorch=1.1.0 torchvision cuda90 -c pytorch 27 | # opencv 28 | pip install opencv-python 29 | # tensorboardX 30 | 31 | echo "***** install other libs *****" 32 | pip install tensorboardX 33 | # libs 34 | pip install pyyaml yacs tqdm colorama matplotlib cython 35 | 36 | echo "***** build extensions *****" 37 | python setup.py build_ext --inplace 38 | 39 | echo "***** build extensions about DCN *****" 40 | cd siamreppoints 41 | python ./setup.py build_ext --inplace 42 | 43 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | yacs 3 | tqdm 4 | pyyaml 5 | matplotlib 6 | colorama 7 | cython 8 | tensorboardX 9 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | 5 | 6 | ext_modules = [ 7 | Extension( 8 | name='toolkit.utils.region', 9 | sources=[ 10 | 'toolkit/utils/region.pyx', 11 | 'toolkit/utils/src/region.c', 12 | ], 13 | include_dirs=[ 14 | 'toolkit/utils/src' 15 | ] 16 | ) 17 | ] 18 | 19 | setup( 20 | name='toolkit', 21 | packages=['toolkit'], 22 | ext_modules=cythonize(ext_modules) 23 | ) 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /siamreppoints/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/__init__.py -------------------------------------------------------------------------------- /siamreppoints/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/core/__init__.py -------------------------------------------------------------------------------- /siamreppoints/core/xcorr.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | 9 | 10 | def xcorr_slow(x, kernel): 11 | """for loop to calculate cross correlation, slow version 12 | """ 13 | batch = x.size()[0] 14 | out = [] 15 | for i in range(batch): 16 | px = x[i] 17 | pk = kernel[i] 18 | px = px.view(1, -1, px.size()[1], px.size()[2]) 19 | pk = pk.view(1, -1, pk.size()[1], pk.size()[2]) 20 | po = F.conv2d(px, pk) 21 | out.append(po) 22 | out = torch.cat(out, 0) 23 | return out 24 | 25 | 26 | def xcorr_fast(x, kernel): 27 | """group conv2d to calculate cross correlation, fast version 28 | """ 29 | batch = kernel.size()[0] 30 | pk = kernel.view(-1, x.size()[1], kernel.size()[2], kernel.size()[3]) 31 | px = x.view(1, -1, x.size()[2], x.size()[3]) 32 | po = F.conv2d(px, pk, groups=batch) 33 | po = po.view(batch, -1, po.size()[2], po.size()[3]) 34 | return po 35 | 36 | def xcorr_depthwise(x, kernel): 37 | """depthwise cross correlation 38 | """ 39 | batch = kernel.size(0) 40 | channel = kernel.size(1) 41 | x = x.view(1, batch*channel, x.size(2), x.size(3)) 42 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3)) 43 | out = F.conv2d(x, kernel, groups=batch*channel) 44 | out = out.view(batch, channel, out.size(2), out.size(3)) 45 | return out 46 | 47 | def xcorr_depthwise_dilation(x, kernel, padding=(0, 0), dilation=(1, 1)): 48 | """depthwise cross correlation 49 | """ 50 | batch = kernel.size(0) 51 | channel = kernel.size(1) 52 | x = x.view(1, batch*channel, x.size(2), x.size(3)) 53 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3)) 54 | out = F.conv2d(x, kernel, groups=batch*channel, padding=padding, dilation=dilation) 55 | out = out.view(batch, channel, out.size(2), out.size(3)) 56 | return out 57 | 58 | -------------------------------------------------------------------------------- /siamreppoints/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/models/__init__.py -------------------------------------------------------------------------------- /siamreppoints/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from siamreppoints.models.backbone.alexnet import alexnetlegacy, alexnet 9 | from siamreppoints.models.backbone.mobile_v2 import mobilenetv2 10 | from siamreppoints.models.backbone.resnet_atrous import resnet18, resnet34, resnet50 11 | 12 | BACKBONES = { 13 | 'alexnetlegacy': alexnetlegacy, 14 | 'mobilenetv2': mobilenetv2, 15 | 'resnet18': resnet18, 16 | 'resnet34': resnet34, 17 | 'resnet50': resnet50, 18 | 'alexnet': alexnet, 19 | } 20 | 21 | 22 | def get_backbone(name, **kwargs): 23 | return BACKBONES[name](**kwargs) 24 | -------------------------------------------------------------------------------- /siamreppoints/models/backbone/alexnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class AlexNetLegacy(nn.Module): 10 | configs = [3, 96, 256, 384, 384, 256] 11 | 12 | def __init__(self, width_mult=1): 13 | configs = list(map(lambda x: 3 if x == 3 else 14 | int(x*width_mult), AlexNet.configs)) 15 | super(AlexNetLegacy, self).__init__() 16 | self.features = nn.Sequential( 17 | nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2), 18 | nn.BatchNorm2d(configs[1]), 19 | nn.MaxPool2d(kernel_size=3, stride=2), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(configs[1], configs[2], kernel_size=5), 22 | nn.BatchNorm2d(configs[2]), 23 | nn.MaxPool2d(kernel_size=3, stride=2), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(configs[2], configs[3], kernel_size=3), 26 | nn.BatchNorm2d(configs[3]), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(configs[3], configs[4], kernel_size=3), 29 | nn.BatchNorm2d(configs[4]), 30 | nn.ReLU(inplace=True), 31 | nn.Conv2d(configs[4], configs[5], kernel_size=3), 32 | nn.BatchNorm2d(configs[5]), 33 | ) 34 | self.feature_size = configs[5] 35 | 36 | def forward(self, x): 37 | x = self.features(x) 38 | return x 39 | 40 | 41 | class AlexNet(nn.Module): 42 | configs = [3, 96, 256, 384, 384, 256] 43 | 44 | def __init__(self, width_mult=1): 45 | configs = list(map(lambda x: 3 if x == 3 else 46 | int(x*width_mult), AlexNet.configs)) 47 | super(AlexNet, self).__init__() 48 | self.layer1 = nn.Sequential( 49 | nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2), 50 | nn.BatchNorm2d(configs[1]), 51 | nn.MaxPool2d(kernel_size=3, stride=2), 52 | nn.ReLU(inplace=True), 53 | ) 54 | self.layer2 = nn.Sequential( 55 | nn.Conv2d(configs[1], configs[2], kernel_size=5), 56 | nn.BatchNorm2d(configs[2]), 57 | nn.MaxPool2d(kernel_size=3, stride=2), 58 | nn.ReLU(inplace=True), 59 | ) 60 | self.layer3 = nn.Sequential( 61 | nn.Conv2d(configs[2], configs[3], kernel_size=3), 62 | nn.BatchNorm2d(configs[3]), 63 | nn.ReLU(inplace=True), 64 | ) 65 | self.layer4 = nn.Sequential( 66 | nn.Conv2d(configs[3], configs[4], kernel_size=3), 67 | nn.BatchNorm2d(configs[4]), 68 | nn.ReLU(inplace=True), 69 | ) 70 | 71 | self.layer5 = nn.Sequential( 72 | nn.Conv2d(configs[4], configs[5], kernel_size=3), 73 | nn.BatchNorm2d(configs[5]), 74 | ) 75 | self.feature_size = configs[5] 76 | 77 | def forward(self, x): 78 | x = self.layer1(x) 79 | x = self.layer2(x) 80 | x = self.layer3(x) 81 | x = self.layer4(x) 82 | x = self.layer5(x) 83 | return x 84 | 85 | 86 | def alexnetlegacy(**kwargs): 87 | return AlexNetLegacy(**kwargs) 88 | 89 | 90 | def alexnet(**kwargs): 91 | return AlexNet(**kwargs) 92 | -------------------------------------------------------------------------------- /siamreppoints/models/backbone/mobile_v2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | def conv_bn(inp, oup, stride, padding=1): 11 | return nn.Sequential( 12 | nn.Conv2d(inp, oup, 3, stride, padding, bias=False), 13 | nn.BatchNorm2d(oup), 14 | nn.ReLU6(inplace=True) 15 | ) 16 | 17 | 18 | def conv_1x1_bn(inp, oup): 19 | return nn.Sequential( 20 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 21 | nn.BatchNorm2d(oup), 22 | nn.ReLU6(inplace=True) 23 | ) 24 | 25 | 26 | class InvertedResidual(nn.Module): 27 | def __init__(self, inp, oup, stride, expand_ratio, dilation=1): 28 | super(InvertedResidual, self).__init__() 29 | self.stride = stride 30 | 31 | self.use_res_connect = self.stride == 1 and inp == oup 32 | 33 | padding = 2 - stride 34 | if dilation > 1: 35 | padding = dilation 36 | 37 | self.conv = nn.Sequential( 38 | # pw 39 | nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(inp * expand_ratio), 41 | nn.ReLU6(inplace=True), 42 | # dw 43 | nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3, 44 | stride, padding, dilation=dilation, 45 | groups=inp * expand_ratio, bias=False), 46 | nn.BatchNorm2d(inp * expand_ratio), 47 | nn.ReLU6(inplace=True), 48 | # pw-linear 49 | nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False), 50 | nn.BatchNorm2d(oup), 51 | ) 52 | 53 | def forward(self, x): 54 | if self.use_res_connect: 55 | return x + self.conv(x) 56 | else: 57 | return self.conv(x) 58 | 59 | 60 | class MobileNetV2(nn.Sequential): 61 | def __init__(self, width_mult=1.0, used_layers=[3, 5, 7]): 62 | super(MobileNetV2, self).__init__() 63 | 64 | self.interverted_residual_setting = [ 65 | # t, c, n, s 66 | [1, 16, 1, 1, 1], 67 | [6, 24, 2, 2, 1], 68 | [6, 32, 3, 2, 1], 69 | [6, 64, 4, 2, 1], 70 | [6, 96, 3, 1, 1], 71 | [6, 160, 3, 2, 1], 72 | [6, 320, 1, 1, 1], 73 | ] 74 | # 0,2,3,4,6 75 | 76 | self.interverted_residual_setting = [ 77 | # t, c, n, s 78 | [1, 16, 1, 1, 1], 79 | [6, 24, 2, 2, 1], 80 | [6, 32, 3, 2, 1], 81 | [6, 64, 4, 1, 2], 82 | [6, 96, 3, 1, 2], 83 | [6, 160, 3, 1, 4], 84 | [6, 320, 1, 1, 4], 85 | ] 86 | 87 | self.channels = [24, 32, 96, 320] 88 | self.channels = [int(c * width_mult) for c in self.channels] 89 | 90 | input_channel = int(32 * width_mult) 91 | self.last_channel = int(1280 * width_mult) \ 92 | if width_mult > 1.0 else 1280 93 | 94 | self.add_module('layer0', conv_bn(3, input_channel, 2, 0)) 95 | 96 | last_dilation = 1 97 | 98 | self.used_layers = used_layers 99 | 100 | for idx, (t, c, n, s, d) in \ 101 | enumerate(self.interverted_residual_setting, start=1): 102 | output_channel = int(c * width_mult) 103 | 104 | layers = [] 105 | 106 | for i in range(n): 107 | if i == 0: 108 | if d == last_dilation: 109 | dd = d 110 | else: 111 | dd = max(d // 2, 1) 112 | layers.append(InvertedResidual(input_channel, 113 | output_channel, s, t, dd)) 114 | else: 115 | layers.append(InvertedResidual(input_channel, 116 | output_channel, 1, t, d)) 117 | input_channel = output_channel 118 | 119 | last_dilation = d 120 | 121 | self.add_module('layer%d' % (idx), nn.Sequential(*layers)) 122 | 123 | def forward(self, x): 124 | outputs = [] 125 | for idx in range(8): 126 | name = "layer%d" % idx 127 | x = getattr(self, name)(x) 128 | outputs.append(x) 129 | p0, p1, p2, p3, p4 = [outputs[i] for i in [1, 2, 3, 5, 7]] 130 | out = [outputs[i] for i in self.used_layers] 131 | if len(out) == 1: 132 | return out[0] 133 | return out 134 | 135 | 136 | def mobilenetv2(**kwargs): 137 | model = MobileNetV2(**kwargs) 138 | return model 139 | 140 | 141 | if __name__ == '__main__': 142 | net = mobilenetv2() 143 | 144 | print(net) 145 | 146 | from torch.autograd import Variable 147 | tensor = Variable(torch.Tensor(1, 3, 255, 255)).cuda() 148 | 149 | net = net.cuda() 150 | 151 | out = net(tensor) 152 | 153 | for i, p in enumerate(out): 154 | print(i, p.size()) 155 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/cuda/SigmoidFocalLoss_cuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | // This file is modified from https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu 3 | // Cheng-Yang Fu 4 | // cyfu@cs.unc.edu 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | // TODO make it in a common file 15 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 16 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 17 | i += blockDim.x * gridDim.x) 18 | 19 | 20 | template 21 | __global__ void SigmoidFocalLossForward(const int nthreads, 22 | const T* logits, 23 | const int* targets, 24 | const int num_classes, 25 | const float gamma, 26 | const float alpha, 27 | const int num, 28 | T* losses) { 29 | CUDA_1D_KERNEL_LOOP(i, nthreads) { 30 | 31 | int n = i / num_classes; 32 | int d = i % num_classes; // current class[0~79]; 33 | int t = targets[n]; // target class [1~80]; 34 | 35 | // Decide it is positive or negative case. 36 | T c1 = (t == (d+1)); 37 | T c2 = (t>=0 & t != (d+1)); 38 | 39 | T zn = (1.0 - alpha); 40 | T zp = (alpha); 41 | 42 | // p = 1. / 1. + expf(-x); p = sigmoid(x) 43 | T p = 1. / (1. + expf(-logits[i])); 44 | 45 | // (1-p)**gamma * log(p) where 46 | T term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN)); 47 | 48 | // p**gamma * log(1-p) 49 | T term2 = powf(p, gamma) * 50 | (-1. * logits[i] * (logits[i] >= 0) - 51 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))); 52 | 53 | losses[i] = 0.0; 54 | losses[i] += -c1 * term1 * zp; 55 | losses[i] += -c2 * term2 * zn; 56 | 57 | } // CUDA_1D_KERNEL_LOOP 58 | } // SigmoidFocalLossForward 59 | 60 | 61 | template 62 | __global__ void SigmoidFocalLossBackward(const int nthreads, 63 | const T* logits, 64 | const int* targets, 65 | const T* d_losses, 66 | const int num_classes, 67 | const float gamma, 68 | const float alpha, 69 | const int num, 70 | T* d_logits) { 71 | CUDA_1D_KERNEL_LOOP(i, nthreads) { 72 | 73 | int n = i / num_classes; 74 | int d = i % num_classes; // current class[0~79]; 75 | int t = targets[n]; // target class [1~80], 0 is background; 76 | 77 | // Decide it is positive or negative case. 78 | T c1 = (t == (d+1)); 79 | T c2 = (t>=0 & t != (d+1)); 80 | 81 | T zn = (1.0 - alpha); 82 | T zp = (alpha); 83 | // p = 1. / 1. + expf(-x); p = sigmoid(x) 84 | T p = 1. / (1. + expf(-logits[i])); 85 | 86 | // (1-p)**g * (1 - p - g*p*log(p) 87 | T term1 = powf((1. - p), gamma) * 88 | (1. - p - (p * gamma * logf(max(p, FLT_MIN)))); 89 | 90 | // (p**g) * (g*(1-p)*log(1-p) - p) 91 | T term2 = powf(p, gamma) * 92 | ((-1. * logits[i] * (logits[i] >= 0) - 93 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) * 94 | (1. - p) * gamma - p); 95 | d_logits[i] = 0.0; 96 | d_logits[i] += -c1 * term1 * zp; 97 | d_logits[i] += -c2 * term2 * zn; 98 | d_logits[i] = d_logits[i] * d_losses[i]; 99 | 100 | } // CUDA_1D_KERNEL_LOOP 101 | } // SigmoidFocalLossBackward 102 | 103 | 104 | at::Tensor SigmoidFocalLoss_forward_cuda( 105 | const at::Tensor& logits, 106 | const at::Tensor& targets, 107 | const int num_classes, 108 | const float gamma, 109 | const float alpha) { 110 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); 111 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); 112 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); 113 | 114 | const int num_samples = logits.size(0); 115 | 116 | auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); 117 | auto losses_size = num_samples * logits.size(1); 118 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 119 | 120 | dim3 grid(std::min(THCCeilDiv((long)losses_size, 512L), 4096L)); 121 | dim3 block(512); 122 | 123 | if (losses.numel() == 0) { 124 | THCudaCheck(cudaGetLastError()); 125 | return losses; 126 | } 127 | 128 | AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] { 129 | SigmoidFocalLossForward<<>>( 130 | losses_size, 131 | logits.contiguous().data(), 132 | targets.contiguous().data(), 133 | num_classes, 134 | gamma, 135 | alpha, 136 | num_samples, 137 | losses.data()); 138 | }); 139 | THCudaCheck(cudaGetLastError()); 140 | return losses; 141 | } 142 | 143 | 144 | at::Tensor SigmoidFocalLoss_backward_cuda( 145 | const at::Tensor& logits, 146 | const at::Tensor& targets, 147 | const at::Tensor& d_losses, 148 | const int num_classes, 149 | const float gamma, 150 | const float alpha) { 151 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); 152 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); 153 | AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor"); 154 | 155 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); 156 | 157 | const int num_samples = logits.size(0); 158 | AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes"); 159 | 160 | auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); 161 | auto d_logits_size = num_samples * logits.size(1); 162 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 163 | 164 | dim3 grid(std::min(THCCeilDiv((long)d_logits_size, 512L), 4096L)); 165 | dim3 block(512); 166 | 167 | if (d_logits.numel() == 0) { 168 | THCudaCheck(cudaGetLastError()); 169 | return d_logits; 170 | } 171 | 172 | AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] { 173 | SigmoidFocalLossBackward<<>>( 174 | d_logits_size, 175 | logits.contiguous().data(), 176 | targets.contiguous().data(), 177 | d_losses.contiguous().data(), 178 | num_classes, 179 | gamma, 180 | alpha, 181 | num_samples, 182 | d_logits.data()); 183 | }); 184 | 185 | THCudaCheck(cudaGetLastError()); 186 | return d_logits; 187 | } 188 | 189 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | void DeformablePSROIPoolForward( 20 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 21 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 22 | const int height, const int width, const int num_bbox, 23 | const int channels_trans, const int no_trans, const float spatial_scale, 24 | const int output_dim, const int group_size, const int pooled_size, 25 | const int part_size, const int sample_per_part, const float trans_std); 26 | 27 | void DeformablePSROIPoolBackwardAcc( 28 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 29 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 30 | at::Tensor trans_grad, const int batch, const int channels, 31 | const int height, const int width, const int num_bbox, 32 | const int channels_trans, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std); 35 | 36 | void deform_psroi_pooling_cuda_forward( 37 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 38 | at::Tensor top_count, const int no_trans, const float spatial_scale, 39 | const int output_dim, const int group_size, const int pooled_size, 40 | const int part_size, const int sample_per_part, const float trans_std) 41 | { 42 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 43 | 44 | const int batch = input.size(0); 45 | const int channels = input.size(1); 46 | const int height = input.size(2); 47 | const int width = input.size(3); 48 | const int channels_trans = no_trans ? 2 : trans.size(1); 49 | 50 | const int num_bbox = bbox.size(0); 51 | if (num_bbox != out.size(0)) 52 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 53 | out.size(0), num_bbox); 54 | 55 | DeformablePSROIPoolForward( 56 | input, bbox, trans, out, top_count, batch, channels, height, width, 57 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 58 | pooled_size, part_size, sample_per_part, trans_std); 59 | } 60 | 61 | void deform_psroi_pooling_cuda_backward( 62 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 63 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 64 | const int no_trans, const float spatial_scale, const int output_dim, 65 | const int group_size, const int pooled_size, const int part_size, 66 | const int sample_per_part, const float trans_std) 67 | { 68 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 69 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 70 | 71 | const int batch = input.size(0); 72 | const int channels = input.size(1); 73 | const int height = input.size(2); 74 | const int width = input.size(3); 75 | const int channels_trans = no_trans ? 2 : trans.size(1); 76 | 77 | const int num_bbox = bbox.size(0); 78 | if (num_bbox != out_grad.size(0)) 79 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 80 | out_grad.size(0), num_bbox); 81 | 82 | DeformablePSROIPoolBackwardAcc( 83 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 84 | channels, height, width, num_bbox, channels_trans, no_trans, 85 | spatial_scale, output_dim, group_size, pooled_size, part_size, 86 | sample_per_part, trans_std); 87 | } 88 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/cuda/ml_nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | if (a[5] != b[5]) { 15 | return 0.0; 16 | } 17 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 18 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 19 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 20 | float interS = width * height; 21 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 22 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 23 | return interS / (Sa + Sb - interS); 24 | } 25 | 26 | __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh, 27 | const float *dev_boxes, unsigned long long *dev_mask) { 28 | const int row_start = blockIdx.y; 29 | const int col_start = blockIdx.x; 30 | 31 | // if (row_start > col_start) return; 32 | 33 | const int row_size = 34 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 35 | const int col_size = 36 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 37 | 38 | __shared__ float block_boxes[threadsPerBlock * 6]; 39 | if (threadIdx.x < col_size) { 40 | block_boxes[threadIdx.x * 6 + 0] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0]; 42 | block_boxes[threadIdx.x * 6 + 1] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1]; 44 | block_boxes[threadIdx.x * 6 + 2] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2]; 46 | block_boxes[threadIdx.x * 6 + 3] = 47 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3]; 48 | block_boxes[threadIdx.x * 6 + 4] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4]; 50 | block_boxes[threadIdx.x * 6 + 5] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5]; 52 | } 53 | __syncthreads(); 54 | 55 | if (threadIdx.x < row_size) { 56 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 57 | const float *cur_box = dev_boxes + cur_box_idx * 6; 58 | int i = 0; 59 | unsigned long long t = 0; 60 | int start = 0; 61 | if (row_start == col_start) { 62 | start = threadIdx.x + 1; 63 | } 64 | for (i = start; i < col_size; i++) { 65 | if (devIoU(cur_box, block_boxes + i * 6) > nms_overlap_thresh) { 66 | t |= 1ULL << i; 67 | } 68 | } 69 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 70 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 71 | } 72 | } 73 | 74 | // boxes is a N x 6 tensor 75 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 76 | using scalar_t = float; 77 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 78 | auto scores = boxes.select(1, 4); 79 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 80 | auto boxes_sorted = boxes.index_select(0, order_t); 81 | 82 | int boxes_num = boxes.size(0); 83 | 84 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 85 | 86 | scalar_t* boxes_dev = boxes_sorted.data(); 87 | 88 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 89 | 90 | unsigned long long* mask_dev = NULL; 91 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 92 | // boxes_num * col_blocks * sizeof(unsigned long long))); 93 | 94 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 95 | 96 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 97 | THCCeilDiv(boxes_num, threadsPerBlock)); 98 | dim3 threads(threadsPerBlock); 99 | ml_nms_kernel<<>>(boxes_num, 100 | nms_overlap_thresh, 101 | boxes_dev, 102 | mask_dev); 103 | 104 | std::vector mask_host(boxes_num * col_blocks); 105 | THCudaCheck(cudaMemcpy(&mask_host[0], 106 | mask_dev, 107 | sizeof(unsigned long long) * boxes_num * col_blocks, 108 | cudaMemcpyDeviceToHost)); 109 | 110 | std::vector remv(col_blocks); 111 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 112 | 113 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 114 | int64_t* keep_out = keep.data(); 115 | 116 | int num_to_keep = 0; 117 | for (int i = 0; i < boxes_num; i++) { 118 | int nblock = i / threadsPerBlock; 119 | int inblock = i % threadsPerBlock; 120 | 121 | if (!(remv[nblock] & (1ULL << inblock))) { 122 | keep_out[num_to_keep++] = i; 123 | unsigned long long *p = &mask_host[0] + i * col_blocks; 124 | for (int j = nblock; j < col_blocks; j++) { 125 | remv[j] |= p[j]; 126 | } 127 | } 128 | } 129 | 130 | THCudaFree(state, mask_dev); 131 | // TODO improve this part 132 | return std::get<0>(order_t.index({ 133 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 134 | order_t.device(), keep.scalar_type()) 135 | }).sort(0, false)); 136 | } 137 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 15 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 16 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 17 | float interS = width * height; 18 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 19 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 20 | return interS / (Sa + Sb - interS); 21 | } 22 | 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 24 | const float *dev_boxes, unsigned long long *dev_mask) { 25 | const int row_start = blockIdx.y; 26 | const int col_start = blockIdx.x; 27 | 28 | // if (row_start > col_start) return; 29 | 30 | const int row_size = 31 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 32 | const int col_size = 33 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | __shared__ float block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const float *cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 61 | t |= 1ULL << i; 62 | } 63 | } 64 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 65 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 66 | } 67 | } 68 | 69 | // boxes is a N x 5 tensor 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 71 | using scalar_t = float; 72 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 73 | auto scores = boxes.select(1, 4); 74 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 75 | auto boxes_sorted = boxes.index_select(0, order_t); 76 | 77 | int boxes_num = boxes.size(0); 78 | 79 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 80 | 81 | scalar_t* boxes_dev = boxes_sorted.data(); 82 | 83 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 84 | 85 | unsigned long long* mask_dev = NULL; 86 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 87 | // boxes_num * col_blocks * sizeof(unsigned long long))); 88 | 89 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 90 | 91 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 92 | THCCeilDiv(boxes_num, threadsPerBlock)); 93 | dim3 threads(threadsPerBlock); 94 | nms_kernel<<>>(boxes_num, 95 | nms_overlap_thresh, 96 | boxes_dev, 97 | mask_dev); 98 | 99 | std::vector mask_host(boxes_num * col_blocks); 100 | THCudaCheck(cudaMemcpy(&mask_host[0], 101 | mask_dev, 102 | sizeof(unsigned long long) * boxes_num * col_blocks, 103 | cudaMemcpyDeviceToHost)); 104 | 105 | std::vector remv(col_blocks); 106 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 107 | 108 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 109 | int64_t* keep_out = keep.data(); 110 | 111 | int num_to_keep = 0; 112 | for (int i = 0; i < boxes_num; i++) { 113 | int nblock = i / threadsPerBlock; 114 | int inblock = i % threadsPerBlock; 115 | 116 | if (!(remv[nblock] & (1ULL << inblock))) { 117 | keep_out[num_to_keep++] = i; 118 | unsigned long long *p = &mask_host[0] + i * col_blocks; 119 | for (int j = nblock; j < col_blocks; j++) { 120 | remv[j] |= p[j]; 121 | } 122 | } 123 | } 124 | 125 | THCudaFree(state, mask_dev); 126 | // TODO improve this part 127 | return std::get<0>(order_t.index({ 128 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 129 | order_t.device(), keep.scalar_type()) 130 | }).sort(0, false)); 131 | } 132 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor SigmoidFocalLoss_forward_cuda( 7 | const at::Tensor& logits, 8 | const at::Tensor& targets, 9 | const int num_classes, 10 | const float gamma, 11 | const float alpha); 12 | 13 | at::Tensor SigmoidFocalLoss_backward_cuda( 14 | const at::Tensor& logits, 15 | const at::Tensor& targets, 16 | const at::Tensor& d_losses, 17 | const int num_classes, 18 | const float gamma, 19 | const float alpha); 20 | 21 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 22 | const at::Tensor& rois, 23 | const float spatial_scale, 24 | const int pooled_height, 25 | const int pooled_width, 26 | const int sampling_ratio); 27 | 28 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 29 | const at::Tensor& rois, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width, 37 | const int sampling_ratio); 38 | 39 | 40 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width); 45 | 46 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 47 | const at::Tensor& input, 48 | const at::Tensor& rois, 49 | const at::Tensor& argmax, 50 | const float spatial_scale, 51 | const int pooled_height, 52 | const int pooled_width, 53 | const int batch_size, 54 | const int channels, 55 | const int height, 56 | const int width); 57 | 58 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 59 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 60 | 61 | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, 62 | at::Tensor offset, at::Tensor output, 63 | at::Tensor columns, at::Tensor ones, int kW, 64 | int kH, int dW, int dH, int padW, int padH, 65 | int dilationW, int dilationH, int group, 66 | int deformable_group, int im2col_step); 67 | 68 | int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, 69 | at::Tensor gradOutput, at::Tensor gradInput, 70 | at::Tensor gradOffset, at::Tensor weight, 71 | at::Tensor columns, int kW, int kH, int dW, 72 | int dH, int padW, int padH, int dilationW, 73 | int dilationH, int group, 74 | int deformable_group, int im2col_step); 75 | 76 | int deform_conv_backward_parameters_cuda( 77 | at::Tensor input, at::Tensor offset, at::Tensor gradOutput, 78 | at::Tensor gradWeight, // at::Tensor gradBias, 79 | at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, 80 | int padW, int padH, int dilationW, int dilationH, int group, 81 | int deformable_group, float scale, int im2col_step); 82 | 83 | void modulated_deform_conv_cuda_forward( 84 | at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, 85 | at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, 86 | int kernel_h, int kernel_w, const int stride_h, const int stride_w, 87 | const int pad_h, const int pad_w, const int dilation_h, 88 | const int dilation_w, const int group, const int deformable_group, 89 | const bool with_bias); 90 | 91 | void modulated_deform_conv_cuda_backward( 92 | at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, 93 | at::Tensor offset, at::Tensor mask, at::Tensor columns, 94 | at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, 95 | at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, 96 | int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, 97 | int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, 98 | const bool with_bias); 99 | 100 | void deform_psroi_pooling_cuda_forward( 101 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 102 | at::Tensor top_count, const int no_trans, const float spatial_scale, 103 | const int output_dim, const int group_size, const int pooled_size, 104 | const int part_size, const int sample_per_part, const float trans_std); 105 | 106 | void deform_psroi_pooling_cuda_backward( 107 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 108 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 109 | const int no_trans, const float spatial_scale, const int output_dim, 110 | const int group_size, const int pooled_size, const int part_size, 111 | const int sample_per_part, const float trans_std); 112 | 113 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 114 | const int height, 115 | const int width); 116 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/deform_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | int deform_conv_forward( 12 | at::Tensor input, 13 | at::Tensor weight, 14 | at::Tensor offset, 15 | at::Tensor output, 16 | at::Tensor columns, 17 | at::Tensor ones, 18 | int kW, 19 | int kH, 20 | int dW, 21 | int dH, 22 | int padW, 23 | int padH, 24 | int dilationW, 25 | int dilationH, 26 | int group, 27 | int deformable_group, 28 | int im2col_step) 29 | { 30 | if (input.type().is_cuda()) { 31 | #ifdef WITH_CUDA 32 | return deform_conv_forward_cuda( 33 | input, weight, offset, output, columns, ones, 34 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 35 | group, deformable_group, im2col_step 36 | ); 37 | #else 38 | AT_ERROR("Not compiled with GPU support"); 39 | #endif 40 | } 41 | AT_ERROR("Not implemented on the CPU"); 42 | } 43 | 44 | 45 | int deform_conv_backward_input( 46 | at::Tensor input, 47 | at::Tensor offset, 48 | at::Tensor gradOutput, 49 | at::Tensor gradInput, 50 | at::Tensor gradOffset, 51 | at::Tensor weight, 52 | at::Tensor columns, 53 | int kW, 54 | int kH, 55 | int dW, 56 | int dH, 57 | int padW, 58 | int padH, 59 | int dilationW, 60 | int dilationH, 61 | int group, 62 | int deformable_group, 63 | int im2col_step) 64 | { 65 | if (input.type().is_cuda()) { 66 | #ifdef WITH_CUDA 67 | return deform_conv_backward_input_cuda( 68 | input, offset, gradOutput, gradInput, gradOffset, weight, columns, 69 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 70 | group, deformable_group, im2col_step 71 | ); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | AT_ERROR("Not implemented on the CPU"); 77 | } 78 | 79 | 80 | int deform_conv_backward_parameters( 81 | at::Tensor input, 82 | at::Tensor offset, 83 | at::Tensor gradOutput, 84 | at::Tensor gradWeight, // at::Tensor gradBias, 85 | at::Tensor columns, 86 | at::Tensor ones, 87 | int kW, 88 | int kH, 89 | int dW, 90 | int dH, 91 | int padW, 92 | int padH, 93 | int dilationW, 94 | int dilationH, 95 | int group, 96 | int deformable_group, 97 | float scale, 98 | int im2col_step) 99 | { 100 | if (input.type().is_cuda()) { 101 | #ifdef WITH_CUDA 102 | return deform_conv_backward_parameters_cuda( 103 | input, offset, gradOutput, gradWeight, columns, ones, 104 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 105 | group, deformable_group, scale, im2col_step 106 | ); 107 | #else 108 | AT_ERROR("Not compiled with GPU support"); 109 | #endif 110 | } 111 | AT_ERROR("Not implemented on the CPU"); 112 | } 113 | 114 | 115 | void modulated_deform_conv_forward( 116 | at::Tensor input, 117 | at::Tensor weight, 118 | at::Tensor bias, 119 | at::Tensor ones, 120 | at::Tensor offset, 121 | at::Tensor mask, 122 | at::Tensor output, 123 | at::Tensor columns, 124 | int kernel_h, 125 | int kernel_w, 126 | const int stride_h, 127 | const int stride_w, 128 | const int pad_h, 129 | const int pad_w, 130 | const int dilation_h, 131 | const int dilation_w, 132 | const int group, 133 | const int deformable_group, 134 | const bool with_bias) 135 | { 136 | if (input.type().is_cuda()) { 137 | #ifdef WITH_CUDA 138 | return modulated_deform_conv_cuda_forward( 139 | input, weight, bias, ones, offset, mask, output, columns, 140 | kernel_h, kernel_w, stride_h, stride_w, 141 | pad_h, pad_w, dilation_h, dilation_w, 142 | group, deformable_group, with_bias 143 | ); 144 | #else 145 | AT_ERROR("Not compiled with GPU support"); 146 | #endif 147 | } 148 | AT_ERROR("Not implemented on the CPU"); 149 | } 150 | 151 | 152 | void modulated_deform_conv_backward( 153 | at::Tensor input, 154 | at::Tensor weight, 155 | at::Tensor bias, 156 | at::Tensor ones, 157 | at::Tensor offset, 158 | at::Tensor mask, 159 | at::Tensor columns, 160 | at::Tensor grad_input, 161 | at::Tensor grad_weight, 162 | at::Tensor grad_bias, 163 | at::Tensor grad_offset, 164 | at::Tensor grad_mask, 165 | at::Tensor grad_output, 166 | int kernel_h, 167 | int kernel_w, 168 | int stride_h, 169 | int stride_w, 170 | int pad_h, 171 | int pad_w, 172 | int dilation_h, 173 | int dilation_w, 174 | int group, 175 | int deformable_group, 176 | const bool with_bias) 177 | { 178 | if (input.type().is_cuda()) { 179 | #ifdef WITH_CUDA 180 | return modulated_deform_conv_cuda_backward( 181 | input, weight, bias, ones, offset, mask, columns, 182 | grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, 183 | kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, 184 | group, deformable_group, with_bias 185 | ); 186 | #else 187 | AT_ERROR("Not compiled with GPU support"); 188 | #endif 189 | } 190 | AT_ERROR("Not implemented on the CPU"); 191 | } -------------------------------------------------------------------------------- /siamreppoints/models/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/ml_nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor ml_nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const at::Tensor& labels, 13 | const float threshold) { 14 | 15 | if (dets.type().is_cuda()) { 16 | #ifdef WITH_CUDA 17 | // TODO raise error if not compiled with CUDA 18 | if (dets.numel() == 0) 19 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 20 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 21 | return ml_nms_cuda(b, threshold); 22 | #else 23 | AT_ERROR("Not compiled with GPU support"); 24 | #endif 25 | } 26 | AT_ERROR("CPU version not implemented"); 27 | } 28 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /siamreppoints/models/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ml_nms.h" 4 | #include "ROIAlign.h" 5 | #include "ROIPool.h" 6 | #include "SigmoidFocalLoss.h" 7 | #include "deform_conv.h" 8 | #include "deform_pool.h" 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("nms", &nms, "non-maximum suppression"); 12 | m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression"); 13 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 14 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 15 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 16 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 17 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 18 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 19 | // dcn-v2 20 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 21 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 22 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 23 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 24 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 25 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 26 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 27 | } 28 | -------------------------------------------------------------------------------- /siamreppoints/models/head/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from siamreppoints.models.head.rpn import UPChannelRPN, DepthwiseRPN, MultiRPN 9 | 10 | RPNS = { 11 | 'UPChannelRPN': UPChannelRPN, 12 | 'DepthwiseRPN': DepthwiseRPN, 13 | 'MultiRPN': MultiRPN 14 | } 15 | 16 | def get_rpn_head(name, **kwargs): 17 | return RPNS[name](**kwargs) 18 | 19 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d 5 | from .misc import Conv2d 6 | from .misc import DFConv2d 7 | from .misc import ConvTranspose2d 8 | from .misc import BatchNorm2d 9 | from .misc import interpolate 10 | from .nms import nms, ml_nms 11 | from .roi_align import ROIAlign 12 | from .roi_align import roi_align 13 | from .roi_pool import ROIPool 14 | from .roi_pool import roi_pool 15 | from .smooth_l1_loss import smooth_l1_loss 16 | from .sigmoid_focal_loss import SigmoidFocalLoss 17 | from .iou_loss import IOULoss 18 | from .scale import Scale 19 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 20 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, \ 21 | ModulatedDeformConvPack 22 | from .dcn.deform_pool_func import deform_roi_pooling 23 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, \ 24 | ModulatedDeformRoIPoolingPack 25 | 26 | 27 | __all__ = [ 28 | "nms", 29 | "ml_nms", 30 | "roi_align", 31 | "ROIAlign", 32 | "roi_pool", 33 | "ROIPool", 34 | "smooth_l1_loss", 35 | "Conv2d", 36 | "DFConv2d", 37 | "ConvTranspose2d", 38 | "interpolate", 39 | "BatchNorm2d", 40 | "FrozenBatchNorm2d", 41 | "SigmoidFocalLoss", 42 | 'deform_conv', 43 | 'modulated_deform_conv', 44 | 'DeformConv', 45 | 'ModulatedDeformConv', 46 | 'ModulatedDeformConvPack', 47 | 'deform_roi_pooling', 48 | 'DeformRoIPooling', 49 | 'DeformRoIPoolingPack', 50 | 'ModulatedDeformRoIPoolingPack', 51 | "IOULoss", 52 | "Scale" 53 | ] 54 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import glob 3 | import os.path 4 | 5 | import torch 6 | 7 | try: 8 | from torch.utils.cpp_extension import load as load_ext 9 | from torch.utils.cpp_extension import CUDA_HOME 10 | except ImportError: 11 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 12 | 13 | 14 | def _load_C_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | this_dir = os.path.dirname(this_dir) 17 | this_dir = os.path.join(this_dir, "csrc") 18 | 19 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 22 | 23 | source = main_file + source_cpu 24 | 25 | extra_cflags = [] 26 | if torch.cuda.is_available() and CUDA_HOME is not None: 27 | source.extend(source_cuda) 28 | extra_cflags = ["-DWITH_CUDA"] 29 | source = [os.path.join(this_dir, s) for s in source] 30 | extra_include_paths = [this_dir] 31 | return load_ext( 32 | "torchvision", 33 | source, 34 | extra_cflags=extra_cflags, 35 | extra_include_paths=extra_include_paths, 36 | ) 37 | 38 | 39 | _C = _load_C_extensions() 40 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | class FrozenBatchNorm2d(nn.Module): 7 | """ 8 | BatchNorm2d where the batch statistics and the affine parameters 9 | are fixed 10 | """ 11 | 12 | def __init__(self, n): 13 | super(FrozenBatchNorm2d, self).__init__() 14 | self.register_buffer("weight", torch.ones(n)) 15 | self.register_buffer("bias", torch.zeros(n)) 16 | self.register_buffer("running_mean", torch.zeros(n)) 17 | self.register_buffer("running_var", torch.ones(n)) 18 | 19 | def forward(self, x): 20 | scale = self.weight * self.running_var.rsqrt() 21 | bias = self.bias - self.running_mean * scale 22 | scale = scale.reshape(1, -1, 1, 1) 23 | bias = bias.reshape(1, -1, 1, 1) 24 | return x * scale + bias 25 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # -------------------------------------------------------------------------------- /siamreppoints/models/layers/dcn/deform_conv_module.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.modules.utils import _pair 6 | 7 | from .deform_conv_func import deform_conv, modulated_deform_conv 8 | 9 | 10 | class DeformConv(nn.Module): 11 | def __init__( 12 | self, 13 | in_channels, 14 | out_channels, 15 | kernel_size, 16 | stride=1, 17 | padding=0, 18 | dilation=1, 19 | groups=1, 20 | deformable_groups=1, 21 | bias=False 22 | ): 23 | super(DeformConv, self).__init__() 24 | self.with_bias = bias 25 | 26 | assert in_channels % groups == 0, \ 27 | 'in_channels {} cannot be divisible by groups {}'.format( 28 | in_channels, groups) 29 | assert out_channels % groups == 0, \ 30 | 'out_channels {} cannot be divisible by groups {}'.format( 31 | out_channels, groups) 32 | self.in_channels = in_channels 33 | self.out_channels = out_channels 34 | self.kernel_size = _pair(kernel_size) 35 | self.stride = _pair(stride) 36 | self.padding = _pair(padding) 37 | self.dilation = _pair(dilation) 38 | self.groups = groups 39 | self.deformable_groups = deformable_groups 40 | 41 | self.weight = nn.Parameter( 42 | torch.Tensor(out_channels, in_channels // self.groups, 43 | *self.kernel_size)) 44 | if self.with_bias: 45 | self.bias = nn.Parameter(torch.Tensor(out_channels)) 46 | 47 | self.reset_parameters() 48 | 49 | def reset_parameters(self): 50 | n = self.in_channels 51 | for k in self.kernel_size: 52 | n *= k 53 | stdv = 1. / math.sqrt(n) 54 | self.weight.data.uniform_(-stdv, stdv) 55 | if self.with_bias: 56 | torch.nn.init.constant_(self.bias, 0.) 57 | 58 | def forward(self, input, offset): 59 | y = deform_conv(input, offset, self.weight, self.stride, 60 | self.padding, self.dilation, self.groups, 61 | self.deformable_groups) 62 | if self.with_bias: 63 | assert len(y.size()) == 4 64 | y = y + self.bias.reshape(1, -1, 1, 1) 65 | return y 66 | 67 | def __repr__(self): 68 | return "".join([ 69 | "{}(".format(self.__class__.__name__), 70 | "in_channels={}, ".format(self.in_channels), 71 | "out_channels={}, ".format(self.out_channels), 72 | "kernel_size={}, ".format(self.kernel_size), 73 | "stride={}, ".format(self.stride), 74 | "dilation={}, ".format(self.dilation), 75 | "padding={}, ".format(self.padding), 76 | "groups={}, ".format(self.groups), 77 | "deformable_groups={}, ".format(self.deformable_groups), 78 | "bias={})".format(self.with_bias), 79 | ]) 80 | 81 | 82 | class ModulatedDeformConv(nn.Module): 83 | def __init__( 84 | self, 85 | in_channels, 86 | out_channels, 87 | kernel_size, 88 | stride=1, 89 | padding=0, 90 | dilation=1, 91 | groups=1, 92 | deformable_groups=1, 93 | bias=True 94 | ): 95 | super(ModulatedDeformConv, self).__init__() 96 | self.in_channels = in_channels 97 | self.out_channels = out_channels 98 | self.kernel_size = _pair(kernel_size) 99 | self.stride = stride 100 | self.padding = padding 101 | self.dilation = dilation 102 | self.groups = groups 103 | self.deformable_groups = deformable_groups 104 | self.with_bias = bias 105 | 106 | self.weight = nn.Parameter(torch.Tensor( 107 | out_channels, 108 | in_channels // groups, 109 | *self.kernel_size 110 | )) 111 | if bias: 112 | self.bias = nn.Parameter(torch.Tensor(out_channels)) 113 | else: 114 | self.register_parameter('bias', None) 115 | self.reset_parameters() 116 | 117 | def reset_parameters(self): 118 | n = self.in_channels 119 | for k in self.kernel_size: 120 | n *= k 121 | stdv = 1. / math.sqrt(n) 122 | self.weight.data.uniform_(-stdv, stdv) 123 | if self.bias is not None: 124 | self.bias.data.zero_() 125 | 126 | def forward(self, input, offset, mask): 127 | return modulated_deform_conv( 128 | input, offset, mask, self.weight, self.bias, self.stride, 129 | self.padding, self.dilation, self.groups, self.deformable_groups) 130 | 131 | def __repr__(self): 132 | return "".join([ 133 | "{}(".format(self.__class__.__name__), 134 | "in_channels={}, ".format(self.in_channels), 135 | "out_channels={}, ".format(self.out_channels), 136 | "kernel_size={}, ".format(self.kernel_size), 137 | "stride={}, ".format(self.stride), 138 | "dilation={}, ".format(self.dilation), 139 | "padding={}, ".format(self.padding), 140 | "groups={}, ".format(self.groups), 141 | "deformable_groups={}, ".format(self.deformable_groups), 142 | "bias={})".format(self.with_bias), 143 | ]) 144 | 145 | 146 | class ModulatedDeformConvPack(ModulatedDeformConv): 147 | def __init__(self, 148 | in_channels, 149 | out_channels, 150 | kernel_size, 151 | stride=1, 152 | padding=0, 153 | dilation=1, 154 | groups=1, 155 | deformable_groups=1, 156 | bias=True): 157 | super(ModulatedDeformConvPack, self).__init__( 158 | in_channels, out_channels, kernel_size, stride, padding, dilation, 159 | groups, deformable_groups, bias) 160 | 161 | self.conv_offset_mask = nn.Conv2d( 162 | self.in_channels // self.groups, 163 | self.deformable_groups * 3 * self.kernel_size[0] * 164 | self.kernel_size[1], 165 | kernel_size=self.kernel_size, 166 | stride=_pair(self.stride), 167 | padding=_pair(self.padding), 168 | bias=True) 169 | self.init_offset() 170 | 171 | def init_offset(self): 172 | self.conv_offset_mask.weight.data.zero_() 173 | self.conv_offset_mask.bias.data.zero_() 174 | 175 | def forward(self, input): 176 | out = self.conv_offset_mask(input) 177 | o1, o2, mask = torch.chunk(out, 3, dim=1) 178 | offset = torch.cat((o1, o2), dim=1) 179 | mask = torch.sigmoid(mask) 180 | return modulated_deform_conv( 181 | input, offset, mask, self.weight, self.bias, self.stride, 182 | self.padding, self.dilation, self.groups, self.deformable_groups) 183 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from siamreppoints.models import _C 6 | 7 | 8 | class DeformRoIPoolingFunction(Function): 9 | 10 | @staticmethod 11 | def forward( 12 | ctx, 13 | data, 14 | rois, 15 | offset, 16 | spatial_scale, 17 | out_size, 18 | out_channels, 19 | no_trans, 20 | group_size=1, 21 | part_size=None, 22 | sample_per_part=4, 23 | trans_std=.0 24 | ): 25 | ctx.spatial_scale = spatial_scale 26 | ctx.out_size = out_size 27 | ctx.out_channels = out_channels 28 | ctx.no_trans = no_trans 29 | ctx.group_size = group_size 30 | ctx.part_size = out_size if part_size is None else part_size 31 | ctx.sample_per_part = sample_per_part 32 | ctx.trans_std = trans_std 33 | 34 | assert 0.0 <= ctx.trans_std <= 1.0 35 | if not data.is_cuda: 36 | raise NotImplementedError 37 | 38 | n = rois.shape[0] 39 | output = data.new_empty(n, out_channels, out_size, out_size) 40 | output_count = data.new_empty(n, out_channels, out_size, out_size) 41 | _C.deform_psroi_pooling_forward( 42 | data, 43 | rois, 44 | offset, 45 | output, 46 | output_count, 47 | ctx.no_trans, 48 | ctx.spatial_scale, 49 | ctx.out_channels, 50 | ctx.group_size, 51 | ctx.out_size, 52 | ctx.part_size, 53 | ctx.sample_per_part, 54 | ctx.trans_std 55 | ) 56 | 57 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 58 | ctx.save_for_backward(data, rois, offset) 59 | ctx.output_count = output_count 60 | 61 | return output 62 | 63 | @staticmethod 64 | @once_differentiable 65 | def backward(ctx, grad_output): 66 | if not grad_output.is_cuda: 67 | raise NotImplementedError 68 | 69 | data, rois, offset = ctx.saved_tensors 70 | output_count = ctx.output_count 71 | grad_input = torch.zeros_like(data) 72 | grad_rois = None 73 | grad_offset = torch.zeros_like(offset) 74 | 75 | _C.deform_psroi_pooling_backward( 76 | grad_output, 77 | data, 78 | rois, 79 | offset, 80 | output_count, 81 | grad_input, 82 | grad_offset, 83 | ctx.no_trans, 84 | ctx.spatial_scale, 85 | ctx.out_channels, 86 | ctx.group_size, 87 | ctx.out_size, 88 | ctx.part_size, 89 | ctx.sample_per_part, 90 | ctx.trans_std 91 | ) 92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 93 | 94 | 95 | deform_roi_pooling = DeformRoIPoolingFunction.apply 96 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/dcn/deform_pool_module.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from .deform_pool_func import deform_roi_pooling 4 | 5 | 6 | class DeformRoIPooling(nn.Module): 7 | 8 | def __init__(self, 9 | spatial_scale, 10 | out_size, 11 | out_channels, 12 | no_trans, 13 | group_size=1, 14 | part_size=None, 15 | sample_per_part=4, 16 | trans_std=.0): 17 | super(DeformRoIPooling, self).__init__() 18 | self.spatial_scale = spatial_scale 19 | self.out_size = out_size 20 | self.out_channels = out_channels 21 | self.no_trans = no_trans 22 | self.group_size = group_size 23 | self.part_size = out_size if part_size is None else part_size 24 | self.sample_per_part = sample_per_part 25 | self.trans_std = trans_std 26 | 27 | def forward(self, data, rois, offset): 28 | if self.no_trans: 29 | offset = data.new_empty(0) 30 | return deform_roi_pooling( 31 | data, rois, offset, self.spatial_scale, self.out_size, 32 | self.out_channels, self.no_trans, self.group_size, self.part_size, 33 | self.sample_per_part, self.trans_std) 34 | 35 | 36 | class DeformRoIPoolingPack(DeformRoIPooling): 37 | 38 | def __init__(self, 39 | spatial_scale, 40 | out_size, 41 | out_channels, 42 | no_trans, 43 | group_size=1, 44 | part_size=None, 45 | sample_per_part=4, 46 | trans_std=.0, 47 | deform_fc_channels=1024): 48 | super(DeformRoIPoolingPack, 49 | self).__init__(spatial_scale, out_size, out_channels, no_trans, 50 | group_size, part_size, sample_per_part, trans_std) 51 | 52 | self.deform_fc_channels = deform_fc_channels 53 | 54 | if not no_trans: 55 | self.offset_fc = nn.Sequential( 56 | nn.Linear(self.out_size * self.out_size * self.out_channels, 57 | self.deform_fc_channels), 58 | nn.ReLU(inplace=True), 59 | nn.Linear(self.deform_fc_channels, self.deform_fc_channels), 60 | nn.ReLU(inplace=True), 61 | nn.Linear(self.deform_fc_channels, 62 | self.out_size * self.out_size * 2)) 63 | self.offset_fc[-1].weight.data.zero_() 64 | self.offset_fc[-1].bias.data.zero_() 65 | 66 | def forward(self, data, rois): 67 | assert data.size(1) == self.out_channels 68 | if self.no_trans: 69 | offset = data.new_empty(0) 70 | return deform_roi_pooling( 71 | data, rois, offset, self.spatial_scale, self.out_size, 72 | self.out_channels, self.no_trans, self.group_size, 73 | self.part_size, self.sample_per_part, self.trans_std) 74 | else: 75 | n = rois.shape[0] 76 | offset = data.new_empty(0) 77 | x = deform_roi_pooling(data, rois, offset, self.spatial_scale, 78 | self.out_size, self.out_channels, True, 79 | self.group_size, self.part_size, 80 | self.sample_per_part, self.trans_std) 81 | offset = self.offset_fc(x.view(n, -1)) 82 | offset = offset.view(n, 2, self.out_size, self.out_size) 83 | return deform_roi_pooling( 84 | data, rois, offset, self.spatial_scale, self.out_size, 85 | self.out_channels, self.no_trans, self.group_size, 86 | self.part_size, self.sample_per_part, self.trans_std) 87 | 88 | 89 | class ModulatedDeformRoIPoolingPack(DeformRoIPooling): 90 | 91 | def __init__(self, 92 | spatial_scale, 93 | out_size, 94 | out_channels, 95 | no_trans, 96 | group_size=1, 97 | part_size=None, 98 | sample_per_part=4, 99 | trans_std=.0, 100 | deform_fc_channels=1024): 101 | super(ModulatedDeformRoIPoolingPack, self).__init__( 102 | spatial_scale, out_size, out_channels, no_trans, group_size, 103 | part_size, sample_per_part, trans_std) 104 | 105 | self.deform_fc_channels = deform_fc_channels 106 | 107 | if not no_trans: 108 | self.offset_fc = nn.Sequential( 109 | nn.Linear(self.out_size * self.out_size * self.out_channels, 110 | self.deform_fc_channels), 111 | nn.ReLU(inplace=True), 112 | nn.Linear(self.deform_fc_channels, self.deform_fc_channels), 113 | nn.ReLU(inplace=True), 114 | nn.Linear(self.deform_fc_channels, 115 | self.out_size * self.out_size * 2)) 116 | self.offset_fc[-1].weight.data.zero_() 117 | self.offset_fc[-1].bias.data.zero_() 118 | self.mask_fc = nn.Sequential( 119 | nn.Linear(self.out_size * self.out_size * self.out_channels, 120 | self.deform_fc_channels), 121 | nn.ReLU(inplace=True), 122 | nn.Linear(self.deform_fc_channels, 123 | self.out_size * self.out_size * 1), 124 | nn.Sigmoid()) 125 | self.mask_fc[2].weight.data.zero_() 126 | self.mask_fc[2].bias.data.zero_() 127 | 128 | def forward(self, data, rois): 129 | assert data.size(1) == self.out_channels 130 | if self.no_trans: 131 | offset = data.new_empty(0) 132 | return deform_roi_pooling( 133 | data, rois, offset, self.spatial_scale, self.out_size, 134 | self.out_channels, self.no_trans, self.group_size, 135 | self.part_size, self.sample_per_part, self.trans_std) 136 | else: 137 | n = rois.shape[0] 138 | offset = data.new_empty(0) 139 | x = deform_roi_pooling(data, rois, offset, self.spatial_scale, 140 | self.out_size, self.out_channels, True, 141 | self.group_size, self.part_size, 142 | self.sample_per_part, self.trans_std) 143 | offset = self.offset_fc(x.view(n, -1)) 144 | offset = offset.view(n, 2, self.out_size, self.out_size) 145 | mask = self.mask_fc(x.view(n, -1)) 146 | mask = mask.view(n, 1, self.out_size, self.out_size) 147 | return deform_roi_pooling( 148 | data, rois, offset, self.spatial_scale, self.out_size, 149 | self.out_channels, self.no_trans, self.group_size, 150 | self.part_size, self.sample_per_part, self.trans_std) * mask 151 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/iou_loss.py: -------------------------------------------------------------------------------- 1 | # GIoU and Linear IoU are added by following 2 | # https://github.com/yqyao/FCOS_PLUS/blob/master/maskrcnn_benchmark/layers/iou_loss.py. 3 | import torch 4 | from torch import nn 5 | 6 | 7 | class IOULoss(nn.Module): 8 | def __init__(self, loss_type="iou"): 9 | super(IOULoss, self).__init__() 10 | self.loss_type = loss_type 11 | 12 | def forward(self, pred, target, weight=None): 13 | pred_left = pred[:, 0] 14 | pred_top = pred[:, 1] 15 | pred_right = pred[:, 2] 16 | pred_bottom = pred[:, 3] 17 | 18 | pred_center_x = (pred_left - pred_right) / 2 19 | pred_center_y = (pred_top - pred_bottom) / 2 20 | 21 | target_left = target[:, 0] 22 | target_top = target[:, 1] 23 | target_right = target[:, 2] 24 | target_bottom = target[:, 3] 25 | 26 | target_center_x = (target_left - target_right) / 2 27 | target_center_y = (target_top - target_bottom) / 2 28 | 29 | target_area = (target_left + target_right) * \ 30 | (target_top + target_bottom) 31 | pred_area = (pred_left + pred_right) * \ 32 | (pred_top + pred_bottom) 33 | 34 | w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right) 35 | g_w_intersect = torch.max(pred_left, target_left) + torch.max( 36 | pred_right, target_right) 37 | h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top) 38 | g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top) 39 | ac_uion = g_w_intersect * g_h_intersect + 1e-7 40 | area_intersect = w_intersect * h_intersect 41 | area_union = target_area + pred_area - area_intersect 42 | ious = (area_intersect + 1.0) / (area_union + 1.0) 43 | gious = ious - (ac_uion - area_union) / ac_uion 44 | 45 | distance_center = (pred_center_x - target_center_x) * (pred_center_x - target_center_x) + (pred_center_y - target_center_y) * (pred_center_y - target_center_y) 46 | distance_intersect = g_w_intersect * g_w_intersect + g_h_intersect * g_h_intersect 47 | dious = ious - (distance_center / distance_intersect) 48 | 49 | if self.loss_type == 'iou': 50 | losses = -torch.log(ious) 51 | elif self.loss_type == 'linear_iou': 52 | losses = 1 - ious 53 | elif self.loss_type == 'giou': 54 | losses = 1 - gious 55 | elif self.loss_type == 'diou': 56 | losses = 1 - dious 57 | else: 58 | raise NotImplementedError 59 | 60 | if weight is not None and weight.sum() > 0: 61 | return (losses * weight).sum() 62 | else: 63 | assert losses.numel() != 0 64 | return losses.sum() 65 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | helper class that supports empty tensors on some nn functions. 4 | 5 | Ideally, add support directly in PyTorch to empty tensors in 6 | those functions. 7 | 8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013 9 | is implemented 10 | """ 11 | 12 | import math 13 | import torch 14 | from torch.nn.modules.utils import _ntuple 15 | 16 | 17 | class _NewEmptyTensorOp(torch.autograd.Function): 18 | @staticmethod 19 | def forward(ctx, x, new_shape): 20 | ctx.shape = x.shape 21 | return x.new_empty(new_shape) 22 | 23 | @staticmethod 24 | def backward(ctx, grad): 25 | shape = ctx.shape 26 | return _NewEmptyTensorOp.apply(grad, shape), None 27 | 28 | 29 | class Conv2d(torch.nn.Conv2d): 30 | def forward(self, x): 31 | if x.numel() > 0: 32 | return super(Conv2d, self).forward(x) 33 | # get output shape 34 | 35 | output_shape = [ 36 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 37 | for i, p, di, k, d in zip( 38 | x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride 39 | ) 40 | ] 41 | output_shape = [x.shape[0], self.weight.shape[0]] + output_shape 42 | return _NewEmptyTensorOp.apply(x, output_shape) 43 | 44 | 45 | class ConvTranspose2d(torch.nn.ConvTranspose2d): 46 | def forward(self, x): 47 | if x.numel() > 0: 48 | return super(ConvTranspose2d, self).forward(x) 49 | # get output shape 50 | 51 | output_shape = [ 52 | (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op 53 | for i, p, di, k, d, op in zip( 54 | x.shape[-2:], 55 | self.padding, 56 | self.dilation, 57 | self.kernel_size, 58 | self.stride, 59 | self.output_padding, 60 | ) 61 | ] 62 | output_shape = [x.shape[0], self.bias.shape[0]] + output_shape 63 | return _NewEmptyTensorOp.apply(x, output_shape) 64 | 65 | 66 | class BatchNorm2d(torch.nn.BatchNorm2d): 67 | def forward(self, x): 68 | if x.numel() > 0: 69 | return super(BatchNorm2d, self).forward(x) 70 | # get output shape 71 | output_shape = x.shape 72 | return _NewEmptyTensorOp.apply(x, output_shape) 73 | 74 | 75 | def interpolate( 76 | input, size=None, scale_factor=None, mode="nearest", align_corners=None 77 | ): 78 | if input.numel() > 0: 79 | return torch.nn.functional.interpolate( 80 | input, size, scale_factor, mode, align_corners 81 | ) 82 | 83 | def _check_size_scale_factor(dim): 84 | if size is None and scale_factor is None: 85 | raise ValueError("either size or scale_factor should be defined") 86 | if size is not None and scale_factor is not None: 87 | raise ValueError("only one of size or scale_factor should be defined") 88 | if ( 89 | scale_factor is not None 90 | and isinstance(scale_factor, tuple) 91 | and len(scale_factor) != dim 92 | ): 93 | raise ValueError( 94 | "scale_factor shape must match input shape. " 95 | "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor)) 96 | ) 97 | 98 | def _output_size(dim): 99 | _check_size_scale_factor(dim) 100 | if size is not None: 101 | return size 102 | scale_factors = _ntuple(dim)(scale_factor) 103 | # math.floor might return float in py2.7 104 | return [ 105 | int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim) 106 | ] 107 | 108 | output_shape = tuple(_output_size(2)) 109 | output_shape = input.shape[:-2] + output_shape 110 | return _NewEmptyTensorOp.apply(input, output_shape) 111 | 112 | 113 | class DFConv2d(torch.nn.Module): 114 | """Deformable convolutional layer""" 115 | def __init__( 116 | self, 117 | in_channels, 118 | out_channels, 119 | with_modulated_dcn=True, 120 | kernel_size=3, 121 | stride=1, 122 | groups=1, 123 | padding=1, 124 | dilation=1, 125 | deformable_groups=1, 126 | bias=False 127 | ): 128 | super(DFConv2d, self).__init__() 129 | if isinstance(kernel_size, (list, tuple)): 130 | assert len(kernel_size) == 2 131 | offset_base_channels = kernel_size[0] * kernel_size[1] 132 | else: 133 | offset_base_channels = kernel_size * kernel_size 134 | if with_modulated_dcn: 135 | from siamreppoints.models.layers import ModulatedDeformConv 136 | offset_channels = offset_base_channels * 3 # default: 27 137 | conv_block = ModulatedDeformConv 138 | else: 139 | from siamreppoints.models.layers import DeformConv 140 | offset_channels = offset_base_channels * 2 # default: 18 141 | conv_block = DeformConv 142 | self.offset = Conv2d( 143 | in_channels, 144 | deformable_groups * offset_channels, 145 | kernel_size=kernel_size, 146 | stride=stride, 147 | padding=padding, 148 | groups=1, 149 | dilation=dilation 150 | ) 151 | for l in [self.offset, ]: 152 | torch.nn.init.kaiming_uniform_(l.weight, a=1) 153 | torch.nn.init.constant_(l.bias, 0.) 154 | self.conv = conv_block( 155 | in_channels, 156 | out_channels, 157 | kernel_size=kernel_size, 158 | stride=stride, 159 | padding=padding, 160 | dilation=dilation, 161 | groups=groups, 162 | deformable_groups=deformable_groups, 163 | bias=bias 164 | ) 165 | self.with_modulated_dcn = with_modulated_dcn 166 | self.kernel_size = kernel_size 167 | self.stride = stride 168 | self.padding = padding 169 | self.dilation = dilation 170 | self.offset_base_channels = offset_base_channels 171 | 172 | def forward(self, x): 173 | assert x.numel() > 0, "only non-empty tensors are supported" 174 | if x.numel() > 0: 175 | if not self.with_modulated_dcn: 176 | offset = self.offset(x) 177 | x = self.conv(x, offset) 178 | else: 179 | offset_mask = self.offset(x) 180 | split_point = self.offset_base_channels * 2 181 | offset = offset_mask[:, :split_point, :, :] 182 | mask = offset_mask[:, split_point:, :, :].sigmoid() 183 | x = self.conv(x, offset, mask) 184 | return x 185 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from siamreppoints.models import _C 4 | 5 | nms = _C.nms 6 | ml_nms = _C.ml_nms 7 | # nms.__doc__ = """ 8 | # This function performs Non-maximum suppresion""" 9 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from siamreppoints.models import _C 9 | 10 | import pdb 11 | 12 | class _ROIAlign(Function): 13 | @staticmethod 14 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 15 | ctx.save_for_backward(roi) 16 | ctx.output_size = _pair(output_size) 17 | ctx.spatial_scale = spatial_scale 18 | ctx.sampling_ratio = sampling_ratio 19 | ctx.input_shape = input.size() 20 | output = _C.roi_align_forward(input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | rois, = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | sampling_ratio = ctx.sampling_ratio 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_align_backward( 32 | grad_output, 33 | rois, 34 | spatial_scale, 35 | output_size[0], 36 | output_size[1], 37 | bs, 38 | ch, 39 | h, 40 | w, 41 | sampling_ratio, 42 | ) 43 | return grad_input, None, None, None, None 44 | 45 | 46 | roi_align = _ROIAlign.apply 47 | 48 | 49 | class ROIAlign(nn.Module): 50 | def __init__(self, output_size, spatial_scale, sampling_ratio): 51 | super(ROIAlign, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | self.sampling_ratio = sampling_ratio 55 | 56 | def forward(self, input, rois): 57 | return roi_align( 58 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 59 | ) 60 | 61 | def __repr__(self): 62 | tmpstr = self.__class__.__name__ + "(" 63 | tmpstr += "output_size=" + str(self.output_size) 64 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 65 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 66 | tmpstr += ")" 67 | return tmpstr 68 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from siamreppoints.models import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class Scale(nn.Module): 6 | def __init__(self, init_value=1.0): 7 | super(Scale, self).__init__() 8 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 9 | 10 | def forward(self, input): 11 | return input * self.scale 12 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from siamreppoints.models import _C 7 | 8 | # TODO: Use JIT to replace CUDA implementation in the future. 9 | class _SigmoidFocalLoss(Function): 10 | @staticmethod 11 | def forward(ctx, logits, targets, gamma, alpha): 12 | ctx.save_for_backward(logits, targets) 13 | num_classes = logits.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | losses = _C.sigmoid_focalloss_forward( 19 | logits, targets, num_classes, gamma, alpha 20 | ) 21 | return losses 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, d_loss): 26 | logits, targets = ctx.saved_tensors 27 | num_classes = ctx.num_classes 28 | gamma = ctx.gamma 29 | alpha = ctx.alpha 30 | d_loss = d_loss.contiguous() 31 | d_logits = _C.sigmoid_focalloss_backward( 32 | logits, targets, d_loss, num_classes, gamma, alpha 33 | ) 34 | return d_logits, None, None, None, None 35 | 36 | 37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 38 | 39 | 40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 41 | num_classes = logits.shape[1] 42 | gamma = gamma[0] 43 | alpha = alpha[0] 44 | dtype = targets.dtype 45 | device = targets.device 46 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 47 | 48 | t = targets.unsqueeze(1) 49 | p = torch.sigmoid(logits) 50 | term1 = (1 - p) ** gamma * torch.log(p) 51 | term2 = p ** gamma * torch.log(1 - p) 52 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 53 | 54 | 55 | class SigmoidFocalLoss(nn.Module): 56 | def __init__(self, gamma, alpha): 57 | super(SigmoidFocalLoss, self).__init__() 58 | self.gamma = gamma 59 | self.alpha = alpha 60 | 61 | def forward(self, logits, targets): 62 | device = logits.device 63 | if logits.is_cuda: 64 | loss_func = sigmoid_focal_loss_cuda 65 | else: 66 | loss_func = sigmoid_focal_loss_cpu 67 | 68 | loss = loss_func(logits, targets, self.gamma, self.alpha) 69 | #return loss.sum() 70 | return loss 71 | 72 | def __repr__(self): 73 | tmpstr = self.__class__.__name__ + "(" 74 | tmpstr += "gamma=" + str(self.gamma) 75 | tmpstr += ", alpha=" + str(self.alpha) 76 | tmpstr += ")" 77 | return tmpstr 78 | -------------------------------------------------------------------------------- /siamreppoints/models/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /siamreppoints/models/model_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import torch 9 | import torch.nn as nn 10 | 11 | from siamreppoints.core.config import cfg 12 | from siamreppoints.models.backbone import get_backbone 13 | from siamreppoints.models.head import get_rpn_head 14 | from siamreppoints.models.neck import get_neck 15 | 16 | class ModelBuilder(nn.Module): 17 | def __init__(self): 18 | super(ModelBuilder, self).__init__() 19 | 20 | # build backbone 21 | self.backbone = get_backbone(cfg.BACKBONE.TYPE, 22 | **cfg.BACKBONE.KWARGS) 23 | 24 | # build adjust layer 25 | if cfg.ADJUST.ADJUST: 26 | self.neck = get_neck(cfg.ADJUST.TYPE, 27 | **cfg.ADJUST.KWARGS) 28 | 29 | # build rpn head 30 | self.rpn_head = get_rpn_head(cfg.RPN.TYPE, 31 | **cfg.RPN.KWARGS) 32 | 33 | def instance(self, x): 34 | xf = self.backbone(x) 35 | if cfg.ADJUST.ADJUST: 36 | xf = self.neck(xf) 37 | #self.cf = xf[cfg.ADJUST.LAYER-1] 38 | self.cf = torch.cat([xf[2], xf[1]], dim=1) 39 | 40 | def template(self, z): 41 | zf = self.backbone(z) 42 | if cfg.ADJUST.ADJUST: 43 | zf = self.neck(zf) 44 | self.zf = zf 45 | 46 | def track(self, x, instance_size): 47 | xf = self.backbone(x) 48 | if cfg.ADJUST.ADJUST: 49 | xf = self.neck(xf) 50 | 51 | cls, pts_preds_init, pts_preds_refine = self.rpn_head(self.zf, xf, instance_size) 52 | 53 | cls = cls.permute(0, 2, 3, 1) 54 | cls = cls.reshape(cls.shape[0], -1, 1) 55 | cls = torch.sigmoid(cls) 56 | 57 | #self.cf = xf[cfg.ADJUST.LAYER-1] 58 | self.cf = torch.cat([xf[2], xf[1]], dim=1) 59 | return { 60 | 'score': cls, 61 | 'bbox': pts_preds_refine, 62 | } 63 | 64 | -------------------------------------------------------------------------------- /siamreppoints/models/neck/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.functional as F 11 | 12 | from siamreppoints.models.neck.neck import AdjustLayer, AdjustAllLayer 13 | 14 | NECKS = { 15 | 'AdjustLayer': AdjustLayer, 16 | 'AdjustAllLayer': AdjustAllLayer 17 | } 18 | 19 | def get_neck(name, **kwargs): 20 | return NECKS[name](**kwargs) 21 | -------------------------------------------------------------------------------- /siamreppoints/models/neck/neck.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import torch.nn as nn 9 | import torch 10 | import torch.nn.functional as F 11 | 12 | ##from siamreppoints.models.external.PreciseRoIPooling.pytorch.prroi_pool import PrRoIPool2D 13 | 14 | class AdjustLayer(nn.Module): 15 | def __init__(self, in_channels, out_channels, center_size=7): 16 | super(AdjustLayer, self).__init__() 17 | self.downsample = nn.Sequential( 18 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), 19 | nn.BatchNorm2d(out_channels), 20 | ) 21 | self.center_size = center_size 22 | 23 | def forward(self, x): 24 | x = self.downsample(x) 25 | if x.size(3) < 20: 26 | l = (x.size(3) - self.center_size) // 2 27 | r = l + self.center_size 28 | x = x[:, :, l:r, l:r] 29 | return x 30 | 31 | 32 | class AdjustAllLayer(nn.Module): 33 | def __init__(self, in_channels, out_channels, center_size=7): 34 | super(AdjustAllLayer, self).__init__() 35 | self.num = len(out_channels) 36 | if self.num == 1: 37 | self.downsample = AdjustLayer(in_channels[0], 38 | out_channels[0], 39 | center_size) 40 | else: 41 | for i in range(self.num): 42 | self.add_module('downsample'+str(i+2), 43 | AdjustLayer(in_channels[i], 44 | out_channels[i], 45 | center_size)) 46 | 47 | def forward(self, features): 48 | if self.num == 1: 49 | return self.downsample(features) 50 | else: 51 | out = [] 52 | for i in range(self.num): 53 | adj_layer = getattr(self, 'downsample'+str(i+2)) 54 | out.append(adj_layer(features[i])) 55 | 56 | return out 57 | -------------------------------------------------------------------------------- /siamreppoints/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | 15 | requirements = [ 16 | "torchvision", 17 | "ninja", 18 | "yacs", 19 | "cython", 20 | "matplotlib", 21 | "tqdm", 22 | "opencv-python", 23 | "scikit-image" 24 | ] 25 | 26 | 27 | def get_extensions(): 28 | extensions_dir = os.path.join("models", "csrc") 29 | 30 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 31 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 32 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 33 | sources = main_file + source_cpu 34 | 35 | extension = CppExtension 36 | 37 | extra_compile_args = {"cxx": []} 38 | define_macros = [] 39 | 40 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 41 | extension = CUDAExtension 42 | sources += source_cuda 43 | define_macros += [("WITH_CUDA", None)] 44 | extra_compile_args["nvcc"] = [ 45 | "-DCUDA_HAS_FP16=1", 46 | "-D__CUDA_NO_HALF_OPERATORS__", 47 | "-D__CUDA_NO_HALF_CONVERSIONS__", 48 | "-D__CUDA_NO_HALF2_OPERATORS__", 49 | ] 50 | 51 | include_dirs = [extensions_dir] 52 | 53 | ext_modules = [ 54 | extension( 55 | "models._C", 56 | sources, 57 | include_dirs=include_dirs, 58 | define_macros=define_macros, 59 | extra_compile_args=extra_compile_args 60 | ) 61 | ] 62 | 63 | return ext_modules 64 | 65 | 66 | setup( 67 | name="siamreppoints", 68 | version="0.0.1", 69 | author="UZI", 70 | description="target tracking in pytorch", 71 | packages=find_packages(exclude=("configs", "tests",)), 72 | ext_modules=get_extensions(), 73 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 74 | ) 75 | -------------------------------------------------------------------------------- /siamreppoints/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/tracker/__init__.py -------------------------------------------------------------------------------- /siamreppoints/tracker/base_tracker.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import cv2 9 | import numpy as np 10 | import torch 11 | 12 | from siamreppoints.core.config import cfg 13 | 14 | 15 | class BaseTracker(object): 16 | """ Base tracker of single objec tracking 17 | """ 18 | def init(self, img, bbox): 19 | """ 20 | args: 21 | img(np.ndarray): BGR image 22 | bbox(list): [x, y, width, height] 23 | x, y need to be 0-based 24 | """ 25 | raise NotImplementedError 26 | 27 | def track(self, img): 28 | """ 29 | args: 30 | img(np.ndarray): BGR image 31 | return: 32 | bbox(list):[x, y, width, height] 33 | """ 34 | raise NotImplementedError 35 | 36 | 37 | class SiameseTracker(BaseTracker): 38 | def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans): 39 | """ 40 | args: 41 | im: bgr based image 42 | pos: center position 43 | model_sz: exemplar size 44 | s_z: original size 45 | avg_chans: channel average 46 | """ 47 | if isinstance(pos, float): 48 | pos = [pos, pos] 49 | sz = original_sz 50 | im_sz = im.shape 51 | c = (original_sz + 1) / 2 52 | # context_xmin = round(pos[0] - c) # py2 and py3 round 53 | context_xmin = np.floor(pos[0] - c + 0.5) 54 | context_xmax = context_xmin + sz - 1 55 | # context_ymin = round(pos[1] - c) 56 | context_ymin = np.floor(pos[1] - c + 0.5) 57 | context_ymax = context_ymin + sz - 1 58 | left_pad = int(max(0., -context_xmin)) 59 | top_pad = int(max(0., -context_ymin)) 60 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 61 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 62 | 63 | context_xmin = context_xmin + left_pad 64 | context_xmax = context_xmax + left_pad 65 | context_ymin = context_ymin + top_pad 66 | context_ymax = context_ymax + top_pad 67 | 68 | r, c, k = im.shape 69 | if any([top_pad, bottom_pad, left_pad, right_pad]): 70 | size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k) 71 | te_im = np.zeros(size, np.uint8) 72 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 73 | if top_pad: 74 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 75 | if bottom_pad: 76 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 77 | if left_pad: 78 | te_im[:, 0:left_pad, :] = avg_chans 79 | if right_pad: 80 | te_im[:, c + left_pad:, :] = avg_chans 81 | im_patch = te_im[int(context_ymin):int(context_ymax + 1), 82 | int(context_xmin):int(context_xmax + 1), :] 83 | else: 84 | im_patch = im[int(context_ymin):int(context_ymax + 1), 85 | int(context_xmin):int(context_xmax + 1), :] 86 | 87 | if not np.array_equal(model_sz, original_sz): 88 | im_patch = cv2.resize(im_patch, (model_sz, model_sz)) 89 | im_patch = im_patch.transpose(2, 0, 1) 90 | im_patch = im_patch[np.newaxis, :, :, :] 91 | im_patch = im_patch.astype(np.float32) 92 | im_patch = torch.from_numpy(im_patch) 93 | if cfg.CUDA: 94 | im_patch = im_patch.cuda() 95 | return im_patch 96 | -------------------------------------------------------------------------------- /siamreppoints/tracker/tracker_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from siamreppoints.core.config import cfg 9 | from siamreppoints.tracker.siamreppoints_tracker import SiamReppointsTracker 10 | 11 | 12 | TRACKS = { 13 | 'SiamReppointsTracker': SiamReppointsTracker 14 | } 15 | 16 | 17 | def build_tracker(model): 18 | return TRACKS[cfg.TRACK.TYPE](model) 19 | -------------------------------------------------------------------------------- /siamreppoints/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/utils/__init__.py -------------------------------------------------------------------------------- /siamreppoints/utils/anchor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import math 9 | 10 | import numpy as np 11 | 12 | from siamreppoints.utils.bbox import corner2center, center2corner 13 | 14 | 15 | class Anchors: 16 | """ 17 | This class generate anchors. 18 | """ 19 | def __init__(self, stride, ratios, scales, image_center=0, size=0): 20 | self.stride = stride 21 | self.ratios = ratios 22 | self.scales = scales 23 | self.image_center = image_center 24 | self.size = size 25 | 26 | self.anchor_num = len(self.scales) * len(self.ratios) 27 | 28 | self.anchors = None 29 | 30 | self.generate_anchors() 31 | 32 | def generate_anchors(self): 33 | """ 34 | generate anchors based on predefined configuration 35 | """ 36 | self.anchors = np.zeros((self.anchor_num, 4), dtype=np.float32) 37 | size = self.stride * self.stride 38 | count = 0 39 | for r in self.ratios: 40 | ws = int(math.sqrt(size*1. / r)) 41 | hs = int(ws * r) 42 | 43 | for s in self.scales: 44 | w = ws * s 45 | h = hs * s 46 | self.anchors[count][:] = [-w*0.5, -h*0.5, w*0.5, h*0.5][:] 47 | count += 1 48 | 49 | def generate_all_anchors(self, im_c, size): 50 | """ 51 | im_c: image center 52 | size: image size 53 | """ 54 | if self.image_center == im_c and self.size == size: 55 | return False 56 | self.image_center = im_c 57 | self.size = size 58 | 59 | a0x = im_c - size // 2 * self.stride 60 | ori = np.array([a0x] * 4, dtype=np.float32) 61 | zero_anchors = self.anchors + ori 62 | 63 | x1 = zero_anchors[:, 0] 64 | y1 = zero_anchors[:, 1] 65 | x2 = zero_anchors[:, 2] 66 | y2 = zero_anchors[:, 3] 67 | 68 | x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), 69 | [x1, y1, x2, y2]) 70 | cx, cy, w, h = corner2center([x1, y1, x2, y2]) 71 | 72 | disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride 73 | disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride 74 | 75 | cx = cx + disp_x 76 | cy = cy + disp_y 77 | 78 | # broadcast 79 | zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) 80 | cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) 81 | x1, y1, x2, y2 = center2corner([cx, cy, w, h]) 82 | 83 | self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), 84 | np.stack([cx, cy, w, h]).astype(np.float32)) 85 | return True 86 | -------------------------------------------------------------------------------- /siamreppoints/utils/average_meter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | 9 | class Meter(object): 10 | def __init__(self, name, val, avg): 11 | self.name = name 12 | self.val = val 13 | self.avg = avg 14 | 15 | def __repr__(self): 16 | return "{name}: {val:.6f} ({avg:.6f})".format( 17 | name=self.name, val=self.val, avg=self.avg 18 | ) 19 | 20 | def __format__(self, *tuples, **kwargs): 21 | return self.__repr__() 22 | 23 | 24 | class AverageMeter: 25 | """Computes and stores the average and current value""" 26 | def __init__(self, num=100): 27 | self.num = num 28 | self.reset() 29 | 30 | def reset(self): 31 | self.val = {} 32 | self.sum = {} 33 | self.count = {} 34 | self.history = {} 35 | 36 | def update(self, batch=1, **kwargs): 37 | val = {} 38 | for k in kwargs: 39 | val[k] = kwargs[k] / float(batch) 40 | self.val.update(val) 41 | for k in kwargs: 42 | if k not in self.sum: 43 | self.sum[k] = 0 44 | self.count[k] = 0 45 | self.history[k] = [] 46 | self.sum[k] += kwargs[k] 47 | self.count[k] += batch 48 | for _ in range(batch): 49 | self.history[k].append(val[k]) 50 | 51 | if self.num <= 0: 52 | # < 0, average all 53 | self.history[k] = [] 54 | 55 | # == 0: no average 56 | if self.num == 0: 57 | self.sum[k] = self.val[k] 58 | self.count[k] = 1 59 | 60 | elif len(self.history[k]) > self.num: 61 | pop_num = len(self.history[k]) - self.num 62 | for _ in range(pop_num): 63 | self.sum[k] -= self.history[k][0] 64 | del self.history[k][0] 65 | self.count[k] -= 1 66 | 67 | def __repr__(self): 68 | s = '' 69 | for k in self.sum: 70 | s += self.format_str(k) 71 | return s 72 | 73 | def format_str(self, attr): 74 | return "{name}: {val:.6f} ({avg:.6f}) ".format( 75 | name=attr, 76 | val=float(self.val[attr]), 77 | avg=float(self.sum[attr]) / self.count[attr]) 78 | 79 | def __getattr__(self, attr): 80 | if attr in self.__dict__: 81 | return super(AverageMeter, self).__getattr__(attr) 82 | if attr not in self.sum: 83 | print("invalid key '{}'".format(attr)) 84 | return Meter(attr, 0, 0) 85 | return Meter(attr, self.val[attr], self.avg(attr)) 86 | 87 | def avg(self, attr): 88 | return float(self.sum[attr]) / self.count[attr] 89 | 90 | 91 | if __name__ == '__main__': 92 | avg1 = AverageMeter(10) 93 | avg2 = AverageMeter(0) 94 | avg3 = AverageMeter(-1) 95 | 96 | for i in range(20): 97 | avg1.update(s=i) 98 | avg2.update(s=i) 99 | avg3.update(s=i) 100 | 101 | print('iter {}'.format(i)) 102 | print(avg1.s) 103 | print(avg2.s) 104 | print(avg3.s) 105 | -------------------------------------------------------------------------------- /siamreppoints/utils/bbox.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from collections import namedtuple 9 | 10 | import numpy as np 11 | 12 | 13 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 14 | # alias 15 | BBox = Corner 16 | Center = namedtuple('Center', 'x y w h') 17 | 18 | 19 | def corner2center(corner): 20 | """ convert (x1, y1, x2, y2) to (cx, cy, w, h) 21 | Args: 22 | conrner: Corner or np.array (4*N) 23 | Return: 24 | Center or np.array (4 * N) 25 | """ 26 | if isinstance(corner, Corner): 27 | x1, y1, x2, y2 = corner 28 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 29 | else: 30 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 31 | x = (x1 + x2) * 0.5 32 | y = (y1 + y2) * 0.5 33 | w = x2 - x1 34 | h = y2 - y1 35 | return x, y, w, h 36 | 37 | 38 | def center2corner(center): 39 | """ convert (cx, cy, w, h) to (x1, y1, x2, y2) 40 | Args: 41 | center: Center or np.array (4 * N) 42 | Return: 43 | center or np.array (4 * N) 44 | """ 45 | if isinstance(center, Center): 46 | x, y, w, h = center 47 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 48 | else: 49 | x, y, w, h = center[0], center[1], center[2], center[3] 50 | x1 = x - w * 0.5 51 | y1 = y - h * 0.5 52 | x2 = x + w * 0.5 53 | y2 = y + h * 0.5 54 | return x1, y1, x2, y2 55 | 56 | 57 | def IoU(rect1, rect2): 58 | """ caculate interection over union 59 | Args: 60 | rect1: (x1, y1, x2, y2) 61 | rect2: (x1, y1, x2, y2) 62 | Returns: 63 | iou 64 | """ 65 | # overlap 66 | x1, y1, x2, y2 = rect1[0], rect1[1], rect1[2], rect1[3] 67 | tx1, ty1, tx2, ty2 = rect2[0], rect2[1], rect2[2], rect2[3] 68 | 69 | xx1 = np.maximum(tx1, x1) 70 | yy1 = np.maximum(ty1, y1) 71 | xx2 = np.minimum(tx2, x2) 72 | yy2 = np.minimum(ty2, y2) 73 | 74 | ww = np.maximum(0, xx2 - xx1) 75 | hh = np.maximum(0, yy2 - yy1) 76 | 77 | area = (x2-x1) * (y2-y1) 78 | target_a = (tx2-tx1) * (ty2 - ty1) 79 | inter = ww * hh 80 | iou = inter / (area + target_a - inter) 81 | return iou 82 | 83 | 84 | def cxy_wh_2_rect(pos, sz): 85 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 0-index 86 | """ 87 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) 88 | 89 | 90 | def rect_2_cxy_wh(rect): 91 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 0-index 92 | """ 93 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), \ 94 | np.array([rect[2], rect[3]]) 95 | 96 | 97 | def cxy_wh_2_rect1(pos, sz): 98 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 1-index 99 | """ 100 | return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]]) 101 | 102 | 103 | def rect1_2_cxy_wh(rect): 104 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 1-index 105 | """ 106 | return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), \ 107 | np.array([rect[2], rect[3]]) 108 | 109 | 110 | def get_axis_aligned_bbox(region): 111 | """ convert region to (cx, cy, w, h) that represent by axis aligned box 112 | """ 113 | nv = region.size 114 | if nv == 8: 115 | cx = np.mean(region[0::2]) 116 | cy = np.mean(region[1::2]) 117 | x1 = min(region[0::2]) 118 | x2 = max(region[0::2]) 119 | y1 = min(region[1::2]) 120 | y2 = max(region[1::2]) 121 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * \ 122 | np.linalg.norm(region[2:4] - region[4:6]) 123 | A2 = (x2 - x1) * (y2 - y1) 124 | s = np.sqrt(A1 / A2) 125 | w = s * (x2 - x1) + 1 126 | h = s * (y2 - y1) + 1 127 | else: 128 | x = region[0] 129 | y = region[1] 130 | w = region[2] 131 | h = region[3] 132 | cx = x+w/2 133 | cy = y+h/2 134 | return cx, cy, w, h 135 | 136 | 137 | def get_min_max_bbox(region): 138 | """ convert region to (cx, cy, w, h) that represent by mim-max box 139 | """ 140 | nv = region.size 141 | if nv == 8: 142 | cx = np.mean(region[0::2]) 143 | cy = np.mean(region[1::2]) 144 | x1 = min(region[0::2]) 145 | x2 = max(region[0::2]) 146 | y1 = min(region[1::2]) 147 | y2 = max(region[1::2]) 148 | w = x2 - x1 149 | h = y2 - y1 150 | else: 151 | x = region[0] 152 | y = region[1] 153 | w = region[2] 154 | h = region[3] 155 | cx = x+w/2 156 | cy = y+h/2 157 | return cx, cy, w, h 158 | -------------------------------------------------------------------------------- /siamreppoints/utils/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import os 9 | import socket 10 | import logging 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.distributed as dist 15 | 16 | from siamreppoints.utils.log_helper import log_once 17 | 18 | logger = logging.getLogger('global') 19 | 20 | 21 | def average_reduce(v): 22 | if get_world_size() == 1: 23 | return v 24 | tensor = torch.cuda.FloatTensor(1) 25 | tensor[0] = v 26 | dist.all_reduce(tensor) 27 | v = tensor[0] / get_world_size() 28 | return v 29 | 30 | 31 | class DistModule(nn.Module): 32 | def __init__(self, module, bn_method=0): 33 | super(DistModule, self).__init__() 34 | self.module = module 35 | self.bn_method = bn_method 36 | if get_world_size() > 1: 37 | broadcast_params(self.module) 38 | else: 39 | self.bn_method = 0 # single proccess 40 | 41 | def forward(self, *args, **kwargs): 42 | broadcast_buffers(self.module, self.bn_method) 43 | return self.module(*args, **kwargs) 44 | 45 | def train(self, mode=True): 46 | super(DistModule, self).train(mode) 47 | self.module.train(mode) 48 | return self 49 | 50 | 51 | def broadcast_params(model): 52 | """ broadcast model parameters """ 53 | for p in model.state_dict().values(): 54 | dist.broadcast(p, 0) 55 | 56 | 57 | def broadcast_buffers(model, method=0): 58 | """ broadcast model buffers """ 59 | if method == 0: 60 | return 61 | 62 | world_size = get_world_size() 63 | 64 | for b in model._all_buffers(): 65 | if method == 1: # broadcast from main proccess 66 | dist.broadcast(b, 0) 67 | elif method == 2: # average 68 | dist.all_reduce(b) 69 | b /= world_size 70 | else: 71 | raise Exception('Invalid buffer broadcast code {}'.format(method)) 72 | 73 | 74 | inited = False 75 | 76 | 77 | def _dist_init(): 78 | ''' 79 | if guess right: 80 | ntasks: world_size (process num) 81 | proc_id: rank 82 | ''' 83 | rank = int(os.environ['RANK']) 84 | num_gpus = torch.cuda.device_count() 85 | torch.cuda.set_device(rank % num_gpus) 86 | dist.init_process_group(backend='nccl') 87 | world_size = dist.get_world_size() 88 | return rank, world_size 89 | 90 | 91 | def _get_local_ip(): 92 | try: 93 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 94 | s.connect(('8.8.8.8', 80)) 95 | ip = s.getsockname()[0] 96 | finally: 97 | s.close() 98 | return ip 99 | 100 | 101 | def dist_init(): 102 | global rank, world_size, inited 103 | try: 104 | rank, world_size = _dist_init() 105 | except RuntimeError as e: 106 | if 'public' in e.args[0]: 107 | logger.info(e) 108 | logger.info('Warning: use single process') 109 | rank, world_size = 0, 1 110 | else: 111 | raise RuntimeError(*e.args) 112 | inited = True 113 | return rank, world_size 114 | 115 | 116 | def get_rank(): 117 | if not inited: 118 | raise(Exception('dist not inited')) 119 | return rank 120 | 121 | 122 | def get_world_size(): 123 | if not inited: 124 | raise(Exception('dist not inited')) 125 | return world_size 126 | 127 | 128 | def reduce_gradients(model, _type='sum'): 129 | types = ['sum', 'avg'] 130 | assert _type in types, 'gradients method must be in "{}"'.format(types) 131 | log_once("gradients method is {}".format(_type)) 132 | if get_world_size() > 1: 133 | for param in model.parameters(): 134 | if param.requires_grad: 135 | dist.all_reduce(param.grad.data) 136 | if _type == 'avg': 137 | param.grad.data /= get_world_size() 138 | else: 139 | return None 140 | -------------------------------------------------------------------------------- /siamreppoints/utils/log_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import os 9 | import logging 10 | import math 11 | import sys 12 | 13 | 14 | if hasattr(sys, 'frozen'): # support for py2exe 15 | _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:]) 16 | elif __file__[-4:].lower() in ['.pyc', '.pyo']: 17 | _srcfile = __file__[:-4] + '.py' 18 | else: 19 | _srcfile = __file__ 20 | _srcfile = os.path.normcase(_srcfile) 21 | 22 | logs = set() 23 | 24 | 25 | class Filter: 26 | def __init__(self, flag): 27 | self.flag = flag 28 | 29 | def filter(self, x): 30 | return self.flag 31 | 32 | 33 | class Dummy: 34 | def __init__(self, *arg, **kwargs): 35 | pass 36 | 37 | def __getattr__(self, arg): 38 | def dummy(*args, **kwargs): pass 39 | return dummy 40 | 41 | 42 | def get_format(logger, level): 43 | if 'RANK' in os.environ: 44 | rank = int(os.environ['RANK']) 45 | 46 | if level == logging.INFO: 47 | logger.addFilter(Filter(rank == 0)) 48 | else: 49 | rank = 0 50 | format_str = '[%(asctime)s-rk{}-%(filename)s#%(lineno)3d] %(message)s'.format(rank) 51 | formatter = logging.Formatter(format_str) 52 | return formatter 53 | 54 | 55 | def get_format_custom(logger, level): 56 | if 'RANK' in os.environ: 57 | rank = int(os.environ['RANK']) 58 | if level == logging.INFO: 59 | logger.addFilter(Filter(rank == 0)) 60 | else: 61 | rank = 0 62 | format_str = '[%(asctime)s-rk{}-%(message)s'.format(rank) 63 | formatter = logging.Formatter(format_str) 64 | return formatter 65 | 66 | 67 | def init_log(name, level=logging.INFO, format_func=get_format): 68 | if (name, level) in logs: 69 | return 70 | logs.add((name, level)) 71 | logger = logging.getLogger(name) 72 | logger.setLevel(level) 73 | ch = logging.StreamHandler() 74 | ch.setLevel(level) 75 | formatter = format_func(logger, level) 76 | ch.setFormatter(formatter) 77 | logger.addHandler(ch) 78 | return logger 79 | 80 | 81 | def add_file_handler(name, log_file, level=logging.INFO): 82 | logger = logging.getLogger(name) 83 | fh = logging.FileHandler(log_file) 84 | fh.setFormatter(get_format(logger, level)) 85 | logger.addHandler(fh) 86 | 87 | 88 | init_log('global') 89 | 90 | 91 | def print_speed(i, i_time, n): 92 | """print_speed(index, index_time, total_iteration)""" 93 | logger = logging.getLogger('global') 94 | average_time = i_time 95 | remaining_time = (n - i) * average_time 96 | remaining_day = math.floor(remaining_time / 86400) 97 | remaining_hour = math.floor(remaining_time / 3600 - 98 | remaining_day * 24) 99 | remaining_min = math.floor(remaining_time / 60 - 100 | remaining_day * 1440 - 101 | remaining_hour * 60) 102 | logger.info('Progress: %d / %d [%d%%], Speed: %.3f s/iter, ETA %d:%02d:%02d (D:H:M)\n' % 103 | (i, n, i / n * 100, 104 | average_time, 105 | remaining_day, remaining_hour, remaining_min)) 106 | 107 | 108 | def find_caller(): 109 | def current_frame(): 110 | try: 111 | raise Exception 112 | except: 113 | return sys.exc_info()[2].tb_frame.f_back 114 | 115 | f = current_frame() 116 | if f is not None: 117 | f = f.f_back 118 | rv = "(unknown file)", 0, "(unknown function)" 119 | while hasattr(f, "f_code"): 120 | co = f.f_code 121 | filename = os.path.normcase(co.co_filename) 122 | rv = (co.co_filename, f.f_lineno, co.co_name) 123 | if filename == _srcfile: 124 | f = f.f_back 125 | continue 126 | break 127 | rv = list(rv) 128 | rv[0] = os.path.basename(rv[0]) 129 | return rv 130 | 131 | 132 | class LogOnce: 133 | def __init__(self): 134 | self.logged = set() 135 | self.logger = init_log('log_once', format_func=get_format_custom) 136 | 137 | def log(self, strings): 138 | fn, lineno, caller = find_caller() 139 | key = (fn, lineno, caller, strings) 140 | if key in self.logged: 141 | return 142 | self.logged.add(key) 143 | message = "{filename:s}<{caller}>#{lineno:3d}] {strings}".format( 144 | filename=fn, lineno=lineno, strings=strings, caller=caller) 145 | self.logger.info(message) 146 | 147 | 148 | once_logger = LogOnce() 149 | 150 | 151 | def log_once(strings): 152 | once_logger.log(strings) 153 | 154 | 155 | def main(): 156 | for i, lvl in enumerate([logging.DEBUG, logging.INFO, 157 | logging.WARNING, logging.ERROR, 158 | logging.CRITICAL]): 159 | log_name = str(lvl) 160 | init_log(log_name, lvl) 161 | logger = logging.getLogger(log_name) 162 | print('****cur lvl:{}'.format(lvl)) 163 | logger.debug('debug') 164 | logger.info('info') 165 | logger.warning('warning') 166 | logger.error('error') 167 | logger.critical('critiacal') 168 | 169 | 170 | if __name__ == '__main__': 171 | main() 172 | for i in range(10): 173 | log_once('xxx') 174 | -------------------------------------------------------------------------------- /siamreppoints/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import os 9 | 10 | from colorama import Fore, Style 11 | 12 | 13 | __all__ = ['commit', 'describe'] 14 | 15 | 16 | def _exec(cmd): 17 | f = os.popen(cmd, 'r', 1) 18 | return f.read().strip() 19 | 20 | 21 | def _bold(s): 22 | return "\033[1m%s\033[0m" % s 23 | 24 | 25 | def _color(s): 26 | return f'{Fore.RED}{s}{Style.RESET_ALL}' 27 | 28 | 29 | def _describe(model, lines=None, spaces=0): 30 | head = " " * spaces 31 | for name, p in model.named_parameters(): 32 | if '.' in name: 33 | continue 34 | if p.requires_grad: 35 | name = _color(name) 36 | line = "{head}- {name}".format(head=head, name=name) 37 | lines.append(line) 38 | 39 | for name, m in model.named_children(): 40 | space_num = len(name) + spaces + 1 41 | if m.training: 42 | name = _color(name) 43 | line = "{head}.{name} ({type})".format( 44 | head=head, 45 | name=name, 46 | type=m.__class__.__name__) 47 | lines.append(line) 48 | _describe(m, lines, space_num) 49 | 50 | 51 | def commit(): 52 | root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')) 53 | cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root) 54 | commit = _exec(cmd) 55 | cmd = "cd {}; git log --oneline | head -n1".format(root) 56 | commit_log = _exec(cmd) 57 | return "commit : {}\n log : {}".format(commit, commit_log) 58 | 59 | 60 | def describe(net, name=None): 61 | num = 0 62 | lines = [] 63 | if name is not None: 64 | lines.append(name) 65 | num = len(name) 66 | _describe(net, lines, num) 67 | return "\n".join(lines) 68 | -------------------------------------------------------------------------------- /siamreppoints/utils/model_load.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import logging 9 | 10 | import torch 11 | 12 | 13 | logger = logging.getLogger('global') 14 | 15 | 16 | def check_keys(model, pretrained_state_dict): 17 | ckpt_keys = set(pretrained_state_dict.keys()) 18 | model_keys = set(model.state_dict().keys()) 19 | used_pretrained_keys = model_keys & ckpt_keys 20 | unused_pretrained_keys = ckpt_keys - model_keys 21 | missing_keys = model_keys - ckpt_keys 22 | # filter 'num_batches_tracked' 23 | missing_keys = [x for x in missing_keys 24 | if not x.endswith('num_batches_tracked')] 25 | if len(missing_keys) > 0: 26 | logger.info('[Warning] missing keys: {}'.format(missing_keys)) 27 | logger.info('missing keys:{}'.format(len(missing_keys))) 28 | if len(unused_pretrained_keys) > 0: 29 | logger.info('[Warning] unused_pretrained_keys: {}'.format( 30 | unused_pretrained_keys)) 31 | logger.info('unused checkpoint keys:{}'.format( 32 | len(unused_pretrained_keys))) 33 | logger.info('used keys:{}'.format(len(used_pretrained_keys))) 34 | assert len(used_pretrained_keys) > 0, \ 35 | 'load NONE from pretrained checkpoint' 36 | return True 37 | 38 | 39 | def remove_prefix(state_dict, prefix): 40 | ''' Old style model is stored with all names of parameters 41 | share common prefix 'module.' ''' 42 | logger.info('remove prefix \'{}\''.format(prefix)) 43 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 44 | return {f(key): value for key, value in state_dict.items()} 45 | 46 | 47 | def load_pretrain(model, pretrained_path): 48 | logger.info('load pretrained model from {}'.format(pretrained_path)) 49 | device = torch.cuda.current_device() 50 | pretrained_dict = torch.load(pretrained_path, 51 | map_location=lambda storage, loc: storage.cuda(device)) 52 | if "state_dict" in pretrained_dict.keys(): 53 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 54 | 'module.') 55 | else: 56 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 57 | 58 | try: 59 | check_keys(model, pretrained_dict) 60 | except: 61 | logger.info('[Warning]: using pretrain as features.\ 62 | Adding "features." as prefix') 63 | new_dict = {} 64 | for k, v in pretrained_dict.items(): 65 | k = 'features.' + k 66 | new_dict[k] = v 67 | pretrained_dict = new_dict 68 | check_keys(model, pretrained_dict) 69 | model.load_state_dict(pretrained_dict, strict=False) 70 | return model 71 | 72 | 73 | def restore_from(model, optimizer, ckpt_path): 74 | device = torch.cuda.current_device() 75 | ckpt = torch.load(ckpt_path, 76 | map_location=lambda storage, loc: storage.cuda(device)) 77 | epoch = ckpt['epoch'] 78 | 79 | ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.') 80 | check_keys(model, ckpt_model_dict) 81 | model.load_state_dict(ckpt_model_dict, strict=False) 82 | 83 | check_keys(optimizer, ckpt['optimizer']) 84 | optimizer.load_state_dict(ckpt['optimizer']) 85 | return model, optimizer, epoch 86 | -------------------------------------------------------------------------------- /testing_dataset/README.md: -------------------------------------------------------------------------------- 1 | # Testing dataset directory 2 | # putting your testing dataset here 3 | - [x] [VOT2016](http://www.votchallenge.net/vot2016/dataset.html) 4 | - [x] [VOT2018](http://www.votchallenge.net/vot2018/dataset.html) 5 | - [x] [VOT2018-LT](http://www.votchallenge.net/vot2018/dataset.html) 6 | - [x] [OTB100(OTB2015)](http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html) 7 | - [x] [UAV123](https://ivul.kaust.edu.sa/Pages/Dataset-UAV123.aspx) 8 | - [x] [NFS](http://ci2cv.net/nfs/index.html) 9 | - [x] [LaSOT](https://cis.temple.edu/lasot/) 10 | - [ ] [TrackingNet (Evaluation on Server)](https://tracking-net.org) 11 | - [ ] [GOT-10k (Evaluation on Server)](http://got-10k.aitestunion.com) 12 | 13 | ## Download Dataset 14 | Download json files used in our toolkit [baidu pan](https://pan.baidu.com/s/1js0Qhykqqur7_lNRtle1tA) 15 | 16 | 1. Put CVRP13.json, OTB100.json, OTB50.json in OTB100 dataset directory (you need to copy Jogging to Jogging-1 and Jogging-2, and copy Skating2 to Skating2-1 and Skating2-2 or using softlink) 17 | 18 | The directory should have the below format 19 | 20 | | -- OTB100/ 21 | 22 | ​ | -- Basketball 23 | 24 | ​ | ...... 25 | 26 | ​ | -- Woman 27 | 28 | ​ | -- OTB100.json 29 | 30 | ​ | -- OTB50.json 31 | 32 | ​ | -- CVPR13.json 33 | 34 | 2. Put all other jsons in the dataset directory like in step 1 35 | -------------------------------------------------------------------------------- /toolkit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/toolkit/__init__.py -------------------------------------------------------------------------------- /toolkit/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .vot import VOTDataset, VOTLTDataset 2 | from .otb import OTBDataset 3 | from .uav import UAVDataset 4 | from .lasot import LaSOTDataset 5 | from .nfs import NFSDataset 6 | from .trackingnet import TrackingNetDataset 7 | from .got10k import GOT10kDataset 8 | 9 | class DatasetFactory(object): 10 | @staticmethod 11 | def create_dataset(**kwargs): 12 | """ 13 | Args: 14 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30', 15 | 'VOT2018', 'VOT2016', 'VOT2018-LT' 16 | dataset_root: dataset root 17 | load_img: wether to load image 18 | Return: 19 | dataset 20 | """ 21 | assert 'name' in kwargs, "should provide dataset name" 22 | name = kwargs['name'] 23 | if 'OTB' in name: 24 | dataset = OTBDataset(**kwargs) 25 | elif 'LaSOT' == name: 26 | dataset = LaSOTDataset(**kwargs) 27 | elif 'UAV' in name: 28 | dataset = UAVDataset(**kwargs) 29 | elif 'NFS' in name: 30 | dataset = NFSDataset(**kwargs) 31 | elif 'VOT2018' == name or 'VOT2016' == name or 'VOT2019' == name: 32 | dataset = VOTDataset(**kwargs) 33 | elif 'VOT2018-LT' == name: 34 | dataset = VOTLTDataset(**kwargs) 35 | elif 'TrackingNet' == name: 36 | dataset = TrackingNetDataset(**kwargs) 37 | elif 'GOT-10k' == name: 38 | dataset = GOT10kDataset(**kwargs) 39 | else: 40 | raise Exception("unknow dataset {}".format(kwargs['name'])) 41 | return dataset 42 | 43 | -------------------------------------------------------------------------------- /toolkit/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | class Dataset(object): 4 | def __init__(self, name, dataset_root): 5 | self.name = name 6 | self.dataset_root = dataset_root 7 | self.videos = None 8 | 9 | def __getitem__(self, idx): 10 | if isinstance(idx, str): 11 | return self.videos[idx] 12 | elif isinstance(idx, int): 13 | return self.videos[sorted(list(self.videos.keys()))[idx]] 14 | 15 | def __len__(self): 16 | return len(self.videos) 17 | 18 | def __iter__(self): 19 | keys = sorted(list(self.videos.keys())) 20 | for key in keys: 21 | yield self.videos[key] 22 | 23 | def set_tracker(self, path, tracker_names): 24 | """ 25 | Args: 26 | path: path to tracker results, 27 | tracker_names: list of tracker name 28 | """ 29 | self.tracker_path = path 30 | self.tracker_names = tracker_names 31 | # for video in tqdm(self.videos.values(), 32 | # desc='loading tacker result', ncols=100): 33 | # video.load_tracker(path, tracker_names) 34 | -------------------------------------------------------------------------------- /toolkit/datasets/got10k.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import os 4 | import numpy as np 5 | 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | class GOT10kVideo(Video): 13 | """ 14 | Args: 15 | name: video name 16 | root: dataset root 17 | video_dir: video directory 18 | init_rect: init rectangle 19 | img_names: image names 20 | gt_rect: groundtruth rectangle 21 | attr: attribute of video 22 | """ 23 | def __init__(self, name, root, video_dir, init_rect, img_names, 24 | gt_rect, attr, load_img=False): 25 | super(GOT10kVideo, self).__init__(name, root, video_dir, 26 | init_rect, img_names, gt_rect, attr, load_img) 27 | 28 | # def load_tracker(self, path, tracker_names=None): 29 | # """ 30 | # Args: 31 | # path(str): path to result 32 | # tracker_name(list): name of tracker 33 | # """ 34 | # if not tracker_names: 35 | # tracker_names = [x.split('/')[-1] for x in glob(path) 36 | # if os.path.isdir(x)] 37 | # if isinstance(tracker_names, str): 38 | # tracker_names = [tracker_names] 39 | # # self.pred_trajs = {} 40 | # for name in tracker_names: 41 | # traj_file = os.path.join(path, name, self.name+'.txt') 42 | # if os.path.exists(traj_file): 43 | # with open(traj_file, 'r') as f : 44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 45 | # for x in f.readlines()] 46 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 48 | # else: 49 | 50 | # self.tracker_names = list(self.pred_trajs.keys()) 51 | 52 | class GOT10kDataset(Dataset): 53 | """ 54 | Args: 55 | name: dataset name, should be "NFS30" or "NFS240" 56 | dataset_root, dataset root dir 57 | """ 58 | def __init__(self, name, dataset_root, load_img=False): 59 | super(GOT10kDataset, self).__init__(name, dataset_root) 60 | with open(os.path.join(dataset_root, name+'_new.json'), 'r') as f: 61 | meta_data = json.load(f) 62 | 63 | # load videos 64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 65 | self.videos = {} 66 | for video in pbar: 67 | pbar.set_postfix_str(video) 68 | self.videos[video] = GOT10kVideo(video, 69 | dataset_root, 70 | meta_data[video]['video_dir'], 71 | meta_data[video]['init_rect'], 72 | meta_data[video]['img_names'], 73 | meta_data[video]['gt_rect'], 74 | None) 75 | self.attr = {} 76 | self.attr['ALL'] = list(self.videos.keys()) 77 | -------------------------------------------------------------------------------- /toolkit/datasets/lasot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class LaSOTVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, absent, load_img=False): 24 | super(LaSOTVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | self.absent = np.array(absent, np.int8) 27 | 28 | def load_tracker(self, path, tracker_names=None, store=True): 29 | """ 30 | Args: 31 | path(str): path to result 32 | tracker_name(list): name of tracker 33 | """ 34 | if not tracker_names: 35 | tracker_names = [x.split('/')[-1] for x in glob(path) 36 | if os.path.isdir(x)] 37 | if isinstance(tracker_names, str): 38 | tracker_names = [tracker_names] 39 | for name in tracker_names: 40 | traj_file = os.path.join(path, name, self.name+'.txt') 41 | if os.path.exists(traj_file): 42 | with open(traj_file, 'r') as f : 43 | pred_traj = [list(map(float, x.strip().split(','))) 44 | for x in f.readlines()] 45 | else: 46 | print("File not exists: ", traj_file) 47 | if self.name == 'monkey-17': 48 | pred_traj = pred_traj[:len(self.gt_traj)] 49 | if store: 50 | self.pred_trajs[name] = pred_traj 51 | else: 52 | return pred_traj 53 | self.tracker_names = list(self.pred_trajs.keys()) 54 | 55 | 56 | 57 | class LaSOTDataset(Dataset): 58 | """ 59 | Args: 60 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 61 | dataset_root: dataset root 62 | load_img: wether to load all imgs 63 | """ 64 | def __init__(self, name, dataset_root, load_img=False): 65 | super(LaSOTDataset, self).__init__(name, dataset_root) 66 | with open(os.path.join(dataset_root, name+'_new.json'), 'r') as f: 67 | meta_data = json.load(f) 68 | 69 | # load videos 70 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 71 | self.videos = {} 72 | for video in pbar: 73 | pbar.set_postfix_str(video) 74 | self.videos[video] = LaSOTVideo(video, 75 | dataset_root, 76 | meta_data[video]['video_dir'], 77 | meta_data[video]['init_rect'], 78 | meta_data[video]['img_names'], 79 | meta_data[video]['gt_rect'], 80 | meta_data[video]['attr'], 81 | meta_data[video]['absent']) 82 | 83 | # set attr 84 | attr = [] 85 | for x in self.videos.values(): 86 | attr += x.attr 87 | attr = set(attr) 88 | self.attr = {} 89 | self.attr['ALL'] = list(self.videos.keys()) 90 | for x in attr: 91 | self.attr[x] = [] 92 | for k, v in self.videos.items(): 93 | for attr_ in v.attr: 94 | self.attr[attr_].append(k) 95 | 96 | 97 | -------------------------------------------------------------------------------- /toolkit/datasets/nfs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | 12 | class NFSVideo(Video): 13 | """ 14 | Args: 15 | name: video name 16 | root: dataset root 17 | video_dir: video directory 18 | init_rect: init rectangle 19 | img_names: image names 20 | gt_rect: groundtruth rectangle 21 | attr: attribute of video 22 | """ 23 | def __init__(self, name, root, video_dir, init_rect, img_names, 24 | gt_rect, attr, load_img=False): 25 | super(NFSVideo, self).__init__(name, root, video_dir, 26 | init_rect, img_names, gt_rect, attr, load_img) 27 | 28 | # def load_tracker(self, path, tracker_names=None): 29 | # """ 30 | # Args: 31 | # path(str): path to result 32 | # tracker_name(list): name of tracker 33 | # """ 34 | # if not tracker_names: 35 | # tracker_names = [x.split('/')[-1] for x in glob(path) 36 | # if os.path.isdir(x)] 37 | # if isinstance(tracker_names, str): 38 | # tracker_names = [tracker_names] 39 | # # self.pred_trajs = {} 40 | # for name in tracker_names: 41 | # traj_file = os.path.join(path, name, self.name+'.txt') 42 | # if os.path.exists(traj_file): 43 | # with open(traj_file, 'r') as f : 44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 45 | # for x in f.readlines()] 46 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 48 | # else: 49 | 50 | # self.tracker_names = list(self.pred_trajs.keys()) 51 | 52 | class NFSDataset(Dataset): 53 | """ 54 | Args: 55 | name: dataset name, should be "NFS30" or "NFS240" 56 | dataset_root, dataset root dir 57 | """ 58 | def __init__(self, name, dataset_root, load_img=False): 59 | super(NFSDataset, self).__init__(name, dataset_root) 60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 61 | meta_data = json.load(f) 62 | 63 | # load videos 64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 65 | self.videos = {} 66 | for video in pbar: 67 | pbar.set_postfix_str(video) 68 | self.videos[video] = NFSVideo(video, 69 | dataset_root, 70 | meta_data[video]['video_dir'], 71 | meta_data[video]['init_rect'], 72 | meta_data[video]['img_names'], 73 | meta_data[video]['gt_rect'], 74 | None) 75 | 76 | self.attr = {} 77 | self.attr['ALL'] = list(self.videos.keys()) 78 | -------------------------------------------------------------------------------- /toolkit/datasets/otb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | class OTBVideo(Video): 14 | """ 15 | Args: 16 | name: video name 17 | root: dataset root 18 | video_dir: video directory 19 | init_rect: init rectangle 20 | img_names: image names 21 | gt_rect: groundtruth rectangle 22 | attr: attribute of video 23 | """ 24 | def __init__(self, name, root, video_dir, init_rect, img_names, 25 | gt_rect, attr, load_img=False): 26 | super(OTBVideo, self).__init__(name, root, video_dir, 27 | init_rect, img_names, gt_rect, attr, load_img) 28 | 29 | def load_tracker(self, path, tracker_names=None, store=True): 30 | """ 31 | Args: 32 | path(str): path to result 33 | tracker_name(list): name of tracker 34 | """ 35 | if not tracker_names: 36 | tracker_names = [x.split('/')[-1] for x in glob(path) 37 | if os.path.isdir(x)] 38 | if isinstance(tracker_names, str): 39 | tracker_names = [tracker_names] 40 | for name in tracker_names: 41 | traj_file = os.path.join(path, name, self.name+'.txt') 42 | if not os.path.exists(traj_file): 43 | if self.name == 'FleetFace': 44 | txt_name = 'fleetface.txt' 45 | elif self.name == 'Jogging-1': 46 | txt_name = 'jogging_1.txt' 47 | elif self.name == 'Jogging-2': 48 | txt_name = 'jogging_2.txt' 49 | elif self.name == 'Skating2-1': 50 | txt_name = 'skating2_1.txt' 51 | elif self.name == 'Skating2-2': 52 | txt_name = 'skating2_2.txt' 53 | elif self.name == 'FaceOcc1': 54 | txt_name = 'faceocc1.txt' 55 | elif self.name == 'FaceOcc2': 56 | txt_name = 'faceocc2.txt' 57 | elif self.name == 'Human4-2': 58 | txt_name = 'human4_2.txt' 59 | else: 60 | txt_name = self.name[0].lower()+self.name[1:]+'.txt' 61 | traj_file = os.path.join(path, name, txt_name) 62 | if os.path.exists(traj_file): 63 | with open(traj_file, 'r') as f : 64 | pred_traj = [list(map(float, x.strip().split(','))) 65 | for x in f.readlines()] 66 | if len(pred_traj) != len(self.gt_traj): 67 | print(name, len(pred_traj), len(self.gt_traj), self.name) 68 | if store: 69 | self.pred_trajs[name] = pred_traj 70 | else: 71 | return pred_traj 72 | else: 73 | print(traj_file) 74 | self.tracker_names = list(self.pred_trajs.keys()) 75 | 76 | 77 | 78 | class OTBDataset(Dataset): 79 | """ 80 | Args: 81 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 82 | dataset_root: dataset root 83 | load_img: wether to load all imgs 84 | """ 85 | def __init__(self, name, dataset_root, load_img=False): 86 | super(OTBDataset, self).__init__(name, dataset_root) 87 | with open(os.path.join(dataset_root, name+'_new.json'), 'r') as f: 88 | meta_data = json.load(f) 89 | 90 | # load videos 91 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 92 | self.videos = {} 93 | for video in pbar: 94 | pbar.set_postfix_str(video) 95 | self.videos[video] = OTBVideo(video, 96 | dataset_root, 97 | meta_data[video]['video_dir'], 98 | meta_data[video]['init_rect'], 99 | meta_data[video]['img_names'], 100 | meta_data[video]['gt_rect'], 101 | meta_data[video]['attr'], 102 | load_img) 103 | 104 | # set attr 105 | attr = [] 106 | for x in self.videos.values(): 107 | attr += x.attr 108 | attr = set(attr) 109 | self.attr = {} 110 | self.attr['ALL'] = list(self.videos.keys()) 111 | for x in attr: 112 | self.attr[x] = [] 113 | for k, v in self.videos.items(): 114 | for attr_ in v.attr: 115 | self.attr[attr_].append(k) 116 | -------------------------------------------------------------------------------- /toolkit/datasets/trackingnet.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class TrackingNetVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, load_img=False): 24 | super(TrackingNetVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | 27 | # def load_tracker(self, path, tracker_names=None): 28 | # """ 29 | # Args: 30 | # path(str): path to result 31 | # tracker_name(list): name of tracker 32 | # """ 33 | # if not tracker_names: 34 | # tracker_names = [x.split('/')[-1] for x in glob(path) 35 | # if os.path.isdir(x)] 36 | # if isinstance(tracker_names, str): 37 | # tracker_names = [tracker_names] 38 | # # self.pred_trajs = {} 39 | # for name in tracker_names: 40 | # traj_file = os.path.join(path, name, self.name+'.txt') 41 | # if os.path.exists(traj_file): 42 | # with open(traj_file, 'r') as f : 43 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 44 | # for x in f.readlines()] 45 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 46 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 47 | # else: 48 | 49 | # self.tracker_names = list(self.pred_trajs.keys()) 50 | 51 | class TrackingNetDataset(Dataset): 52 | """ 53 | Args: 54 | name: dataset name, should be "NFS30" or "NFS240" 55 | dataset_root, dataset root dir 56 | """ 57 | def __init__(self, name, dataset_root, load_img=False): 58 | super(TrackingNetDataset, self).__init__(name, dataset_root) 59 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 60 | meta_data = json.load(f) 61 | 62 | # load videos 63 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 64 | self.videos = {} 65 | for video in pbar: 66 | pbar.set_postfix_str(video) 67 | self.videos[video] = TrackingNetVideo(video, 68 | dataset_root, 69 | meta_data[video]['video_dir'], 70 | meta_data[video]['init_rect'], 71 | meta_data[video]['img_names'], 72 | meta_data[video]['gt_rect'], 73 | None) 74 | self.attr = {} 75 | self.attr['ALL'] = list(self.videos.keys()) 76 | -------------------------------------------------------------------------------- /toolkit/datasets/uav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from tqdm import tqdm 5 | from glob import glob 6 | 7 | from .dataset import Dataset 8 | from .video import Video 9 | 10 | class UAVVideo(Video): 11 | """ 12 | Args: 13 | name: video name 14 | root: dataset root 15 | video_dir: video directory 16 | init_rect: init rectangle 17 | img_names: image names 18 | gt_rect: groundtruth rectangle 19 | attr: attribute of video 20 | """ 21 | def __init__(self, name, root, video_dir, init_rect, img_names, 22 | gt_rect, attr, load_img=False): 23 | super(UAVVideo, self).__init__(name, root, video_dir, 24 | init_rect, img_names, gt_rect, attr, load_img) 25 | 26 | 27 | class UAVDataset(Dataset): 28 | """ 29 | Args: 30 | name: dataset name, should be 'UAV123', 'UAV20L' 31 | dataset_root: dataset root 32 | load_img: wether to load all imgs 33 | """ 34 | def __init__(self, name, dataset_root, load_img=False): 35 | super(UAVDataset, self).__init__(name, dataset_root) 36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 37 | meta_data = json.load(f) 38 | 39 | # load videos 40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 41 | self.videos = {} 42 | for video in pbar: 43 | pbar.set_postfix_str(video) 44 | self.videos[video] = UAVVideo(video, 45 | dataset_root, 46 | meta_data[video]['video_dir'], 47 | meta_data[video]['init_rect'], 48 | meta_data[video]['img_names'], 49 | meta_data[video]['gt_rect'], 50 | meta_data[video]['attr']) 51 | 52 | # set attr 53 | attr = [] 54 | for x in self.videos.values(): 55 | attr += x.attr 56 | attr = set(attr) 57 | self.attr = {} 58 | self.attr['ALL'] = list(self.videos.keys()) 59 | for x in attr: 60 | self.attr[x] = [] 61 | for k, v in self.videos.items(): 62 | for attr_ in v.attr: 63 | self.attr[attr_].append(k) 64 | 65 | -------------------------------------------------------------------------------- /toolkit/datasets/video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import re 4 | import numpy as np 5 | import json 6 | 7 | from glob import glob 8 | 9 | class Video(object): 10 | def __init__(self, name, root, video_dir, init_rect, img_names, 11 | gt_rect, attr, load_img=False): 12 | self.name = name 13 | self.video_dir = video_dir 14 | self.init_rect = init_rect 15 | self.gt_traj = gt_rect 16 | self.attr = attr 17 | self.pred_trajs = {} 18 | self.img_names = [os.path.join(root, x) for x in img_names] 19 | self.imgs = None 20 | 21 | if load_img: 22 | self.imgs = [cv2.imread(x) for x in self.img_names] 23 | self.width = self.imgs[0].shape[1] 24 | self.height = self.imgs[0].shape[0] 25 | else: 26 | img = cv2.imread(self.img_names[0]) 27 | assert img is not None, self.img_names[0] 28 | self.width = img.shape[1] 29 | self.height = img.shape[0] 30 | 31 | def load_tracker(self, path, tracker_names=None, store=True): 32 | """ 33 | Args: 34 | path(str): path to result 35 | tracker_name(list): name of tracker 36 | """ 37 | if not tracker_names: 38 | tracker_names = [x.split('/')[-1] for x in glob(path) 39 | if os.path.isdir(x)] 40 | if isinstance(tracker_names, str): 41 | tracker_names = [tracker_names] 42 | for name in tracker_names: 43 | traj_file = os.path.join(path, name, self.name+'.txt') 44 | if os.path.exists(traj_file): 45 | with open(traj_file, 'r') as f : 46 | pred_traj = [list(map(float, x.strip().split(','))) 47 | for x in f.readlines()] 48 | if len(pred_traj) != len(self.gt_traj): 49 | print(name, len(pred_traj), len(self.gt_traj), self.name) 50 | if store: 51 | self.pred_trajs[name] = pred_traj 52 | else: 53 | return pred_traj 54 | else: 55 | print(traj_file) 56 | self.tracker_names = list(self.pred_trajs.keys()) 57 | 58 | def load_img(self): 59 | if self.imgs is None: 60 | self.imgs = [cv2.imread(x) for x in self.img_names] 61 | self.width = self.imgs[0].shape[1] 62 | self.height = self.imgs[0].shape[0] 63 | 64 | def free_img(self): 65 | self.imgs = None 66 | 67 | def __len__(self): 68 | return len(self.img_names) 69 | 70 | def __getitem__(self, idx): 71 | if self.imgs is None: 72 | return cv2.imread(self.img_names[idx]), self.gt_traj[idx] 73 | else: 74 | return self.imgs[idx], self.gt_traj[idx] 75 | 76 | def __iter__(self): 77 | for i in range(len(self.img_names)): 78 | if self.imgs is not None: 79 | yield self.imgs[i], self.gt_traj[i] 80 | else: 81 | yield cv2.imread(self.img_names[i]), self.gt_traj[i] 82 | 83 | def draw_box(self, roi, img, linewidth, color, name=None): 84 | """ 85 | roi: rectangle or polygon 86 | img: numpy array img 87 | linewith: line width of the bbox 88 | """ 89 | if len(roi) > 6 and len(roi) % 2 == 0: 90 | pts = np.array(roi, np.int32).reshape(-1, 1, 2) 91 | color = tuple(map(int, color)) 92 | img = cv2.polylines(img, [pts], True, color, linewidth) 93 | pt = (pts[0, 0, 0], pts[0, 0, 1]-5) 94 | if name: 95 | img = cv2.putText(img, name, pt, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1) 96 | elif len(roi) == 4: 97 | if not np.isnan(roi[0]): 98 | roi = list(map(int, roi)) 99 | color = tuple(map(int, color)) 100 | img = cv2.rectangle(img, (roi[0], roi[1]), (roi[0]+roi[2], roi[1]+roi[3]), 101 | color, linewidth) 102 | if name: 103 | img = cv2.putText(img, name, (roi[0], roi[1]-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1) 104 | return img 105 | 106 | def show(self, pred_trajs={}, linewidth=2, show_name=False): 107 | """ 108 | pred_trajs: dict of pred_traj, {'tracker_name': list of traj} 109 | pred_traj should contain polygon or rectangle(x, y, width, height) 110 | linewith: line width of the bbox 111 | """ 112 | assert self.imgs is not None 113 | video = [] 114 | cv2.namedWindow(self.name, cv2.WINDOW_NORMAL) 115 | colors = {} 116 | if len(pred_trajs) == 0 and len(self.pred_trajs) > 0: 117 | pred_trajs = self.pred_trajs 118 | for i, (roi, img) in enumerate(zip(self.gt_traj, 119 | self.imgs[self.start_frame:self.end_frame+1])): 120 | img = img.copy() 121 | if len(img.shape) == 2: 122 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 123 | else: 124 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 125 | img = self.draw_box(roi, img, linewidth, (0, 255, 0), 126 | 'gt' if show_name else None) 127 | for name, trajs in pred_trajs.items(): 128 | if name not in colors: 129 | color = tuple(np.random.randint(0, 256, 3)) 130 | colors[name] = color 131 | else: 132 | color = colors[name] 133 | img = self.draw_box(trajs[0][i], img, linewidth, color, 134 | name if show_name else None) 135 | cv2.putText(img, str(i+self.start_frame), (5, 20), 136 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 0), 2) 137 | cv2.imshow(self.name, img) 138 | cv2.waitKey(40) 139 | video.append(img.copy()) 140 | return video 141 | -------------------------------------------------------------------------------- /toolkit/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .ar_benchmark import AccuracyRobustnessBenchmark 2 | from .eao_benchmark import EAOBenchmark 3 | from .ope_benchmark import OPEBenchmark 4 | from .f1_benchmark import F1Benchmark 5 | -------------------------------------------------------------------------------- /toolkit/evaluation/ar_benchmark.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author 3 | """ 4 | 5 | import warnings 6 | import itertools 7 | import numpy as np 8 | 9 | from colorama import Style, Fore 10 | from ..utils import calculate_failures, calculate_accuracy 11 | 12 | class AccuracyRobustnessBenchmark: 13 | """ 14 | Args: 15 | dataset: 16 | burnin: 17 | """ 18 | def __init__(self, dataset, burnin=10): 19 | self.dataset = dataset 20 | self.burnin = burnin 21 | 22 | def eval(self, eval_trackers=None): 23 | """ 24 | Args: 25 | eval_tags: list of tag 26 | eval_trackers: list of tracker name 27 | Returns: 28 | ret: dict of results 29 | """ 30 | if eval_trackers is None: 31 | eval_trackers = self.dataset.tracker_names 32 | if isinstance(eval_trackers, str): 33 | eval_trackers = [eval_trackers] 34 | 35 | result = {} 36 | for tracker_name in eval_trackers: 37 | accuracy, failures = self._calculate_accuracy_robustness(tracker_name) 38 | result[tracker_name] = {'overlaps': accuracy, 39 | 'failures': failures} 40 | return result 41 | 42 | def show_result(self, result, eao_result=None, show_video_level=False, helight_threshold=0.5): 43 | """pretty print result 44 | Args: 45 | result: returned dict from function eval 46 | """ 47 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 48 | if eao_result is not None: 49 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|{:^7}|" 50 | header = header.format('Tracker Name', 51 | 'Accuracy', 'Robustness', 'Lost Number', 'EAO') 52 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|{:^7.3f}|" 53 | else: 54 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|" 55 | header = header.format('Tracker Name', 56 | 'Accuracy', 'Robustness', 'Lost Number') 57 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|" 58 | bar = '-'*len(header) 59 | print(bar) 60 | print(header) 61 | print(bar) 62 | if eao_result is not None: 63 | tracker_eao = sorted(eao_result.items(), 64 | key=lambda x:x[1]['all'], 65 | reverse=True)[:20] 66 | tracker_names = [x[0] for x in tracker_eao] 67 | else: 68 | tracker_names = list(result.keys()) 69 | for tracker_name in tracker_names: 70 | # for tracker_name, ret in result.items(): 71 | ret = result[tracker_name] 72 | overlaps = list(itertools.chain(*ret['overlaps'].values())) 73 | accuracy = np.nanmean(overlaps) 74 | length = sum([len(x) for x in ret['overlaps'].values()]) 75 | failures = list(ret['failures'].values()) 76 | lost_number = np.mean(np.sum(failures, axis=0)) 77 | robustness = np.mean(np.sum(np.array(failures), axis=0) / length) * 100 78 | if eao_result is None: 79 | print(formatter.format(tracker_name, accuracy, robustness, lost_number)) 80 | else: 81 | print(formatter.format(tracker_name, accuracy, robustness, lost_number, eao_result[tracker_name]['all'])) 82 | print(bar) 83 | 84 | if show_video_level and len(result) < 10: 85 | print('\n\n') 86 | header1 = "|{:^14}|".format("Tracker name") 87 | header2 = "|{:^14}|".format("Video name") 88 | for tracker_name in result.keys(): 89 | header1 += ("{:^17}|").format(tracker_name) 90 | header2 += "{:^8}|{:^8}|".format("Acc", "LN") 91 | print('-'*len(header1)) 92 | print(header1) 93 | print('-'*len(header1)) 94 | print(header2) 95 | print('-'*len(header1)) 96 | videos = list(result[tracker_name]['overlaps'].keys()) 97 | for video in videos: 98 | row = "|{:^14}|".format(video) 99 | for tracker_name in result.keys(): 100 | overlaps = result[tracker_name]['overlaps'][video] 101 | accuracy = np.nanmean(overlaps) 102 | failures = result[tracker_name]['failures'][video] 103 | lost_number = np.mean(failures) 104 | 105 | accuracy_str = "{:^8.3f}".format(accuracy) 106 | if accuracy < helight_threshold: 107 | row += f'{Fore.RED}{accuracy_str}{Style.RESET_ALL}|' 108 | else: 109 | row += accuracy_str+'|' 110 | lost_num_str = "{:^8.3f}".format(lost_number) 111 | if lost_number > 0: 112 | row += f'{Fore.RED}{lost_num_str}{Style.RESET_ALL}|' 113 | else: 114 | row += lost_num_str+'|' 115 | print(row) 116 | print('-'*len(header1)) 117 | 118 | def _calculate_accuracy_robustness(self, tracker_name): 119 | overlaps = {} 120 | failures = {} 121 | all_length = {} 122 | for i in range(len(self.dataset)): 123 | video = self.dataset[i] 124 | gt_traj = video.gt_traj 125 | if tracker_name not in video.pred_trajs: 126 | tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 127 | else: 128 | tracker_trajs = video.pred_trajs[tracker_name] 129 | overlaps_group = [] 130 | num_failures_group = [] 131 | for tracker_traj in tracker_trajs: 132 | num_failures = calculate_failures(tracker_traj)[0] 133 | overlaps_ = calculate_accuracy(tracker_traj, gt_traj, 134 | burnin=10, bound=(video.width, video.height))[1] 135 | overlaps_group.append(overlaps_) 136 | num_failures_group.append(num_failures) 137 | with warnings.catch_warnings(): 138 | warnings.simplefilter("ignore", category=RuntimeWarning) 139 | overlaps[video.name] = np.nanmean(overlaps_group, axis=0).tolist() 140 | failures[video.name] = num_failures_group 141 | return overlaps, failures 142 | -------------------------------------------------------------------------------- /toolkit/evaluation/f1_benchmark.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from glob import glob 5 | from tqdm import tqdm 6 | from colorama import Style, Fore 7 | 8 | from ..utils import determine_thresholds, calculate_accuracy, calculate_f1 9 | 10 | class F1Benchmark: 11 | def __init__(self, dataset): 12 | """ 13 | Args: 14 | result_path: 15 | """ 16 | self.dataset = dataset 17 | 18 | def eval(self, eval_trackers=None): 19 | """ 20 | Args: 21 | eval_tags: list of tag 22 | eval_trackers: list of tracker name 23 | Returns: 24 | eao: dict of results 25 | """ 26 | if eval_trackers is None: 27 | eval_trackers = self.dataset.tracker_names 28 | if isinstance(eval_trackers, str): 29 | eval_trackers = [eval_trackers] 30 | 31 | ret = {} 32 | for tracker_name in eval_trackers: 33 | precision, recall, f1 = self._cal_precision_reall(tracker_name) 34 | ret[tracker_name] = {"precision": precision, 35 | "recall": recall, 36 | "f1": f1 37 | } 38 | return ret 39 | 40 | def _cal_precision_reall(self, tracker_name): 41 | score = [] 42 | # for i in range(len(self.dataset)): 43 | # video = self.dataset[i] 44 | for video in self.dataset: 45 | if tracker_name not in video.confidence: 46 | score += video.load_tracker(self.dataset.tracker_path, tracker_name, False)[1] 47 | else: 48 | score += video.confidence[tracker_name] 49 | score = np.array(score) 50 | thresholds = determine_thresholds(score)[::-1] 51 | 52 | precision = {} 53 | recall = {} 54 | f1 = {} 55 | for i in range(len(self.dataset)): 56 | video = self.dataset[i] 57 | gt_traj = video.gt_traj 58 | N = sum([1 for x in gt_traj if len(x) > 1]) 59 | if tracker_name not in video.pred_trajs: 60 | tracker_traj, score = video.load_tracker(self.dataset.tracker_path, tracker_name, False) 61 | else: 62 | tracker_traj = video.pred_trajs[tracker_name] 63 | score = video.confidence[tracker_name] 64 | overlaps = calculate_accuracy(tracker_traj, gt_traj, \ 65 | bound=(video.width,video.height))[1] 66 | f1[video.name], precision[video.name], recall[video.name] = \ 67 | calculate_f1(overlaps, score, (video.width,video.height),thresholds, N) 68 | return precision, recall, f1 69 | 70 | def show_result(self, result, show_video_level=False, helight_threshold=0.5): 71 | """pretty print result 72 | Args: 73 | result: returned dict from function eval 74 | """ 75 | # sort tracker according to f1 76 | sorted_tracker = {} 77 | for tracker_name, ret in result.items(): 78 | precision = np.mean(list(ret['precision'].values()), axis=0) 79 | recall = np.mean(list(ret['recall'].values()), axis=0) 80 | f1 = 2 * precision * recall / (precision + recall) 81 | max_idx = np.argmax(f1) 82 | sorted_tracker[tracker_name] = (precision[max_idx], recall[max_idx], 83 | f1[max_idx]) 84 | sorted_tracker_ = sorted(sorted_tracker.items(), 85 | key=lambda x:x[1][2], 86 | reverse=True)[:20] 87 | tracker_names = [x[0] for x in sorted_tracker_] 88 | 89 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12) 90 | header = "|{:^"+str(tracker_name_len)+"}|{:^11}|{:^8}|{:^7}|" 91 | header = header.format('Tracker Name', 92 | 'Precision', 'Recall', 'F1') 93 | bar = '-' * len(header) 94 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^11.3f}|{:^8.3f}|{:^7.3f}|" 95 | print(bar) 96 | print(header) 97 | print(bar) 98 | # for tracker_name, ret in result.items(): 99 | # precision = np.mean(list(ret['precision'].values()), axis=0) 100 | # recall = np.mean(list(ret['recall'].values()), axis=0) 101 | # f1 = 2 * precision * recall / (precision + recall) 102 | # max_idx = np.argmax(f1) 103 | for tracker_name in tracker_names: 104 | precision = sorted_tracker[tracker_name][0] 105 | recall = sorted_tracker[tracker_name][1] 106 | f1 = sorted_tracker[tracker_name][2] 107 | print(formatter.format(tracker_name, precision, recall, f1)) 108 | print(bar) 109 | 110 | if show_video_level and len(result) < 10: 111 | print('\n\n') 112 | header1 = "|{:^14}|".format("Tracker name") 113 | header2 = "|{:^14}|".format("Video name") 114 | for tracker_name in result.keys(): 115 | # col_len = max(20, len(tracker_name)) 116 | header1 += ("{:^28}|").format(tracker_name) 117 | header2 += "{:^11}|{:^8}|{:^7}|".format("Precision", "Recall", "F1") 118 | print('-'*len(header1)) 119 | print(header1) 120 | print('-'*len(header1)) 121 | print(header2) 122 | print('-'*len(header1)) 123 | videos = list(result[tracker_name]['precision'].keys()) 124 | for video in videos: 125 | row = "|{:^14}|".format(video) 126 | for tracker_name in result.keys(): 127 | precision = result[tracker_name]['precision'][video] 128 | recall = result[tracker_name]['recall'][video] 129 | f1 = result[tracker_name]['f1'][video] 130 | max_idx = np.argmax(f1) 131 | precision_str = "{:^11.3f}".format(precision[max_idx]) 132 | if precision[max_idx] < helight_threshold: 133 | row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|' 134 | else: 135 | row += precision_str+'|' 136 | recall_str = "{:^8.3f}".format(recall[max_idx]) 137 | if recall[max_idx] < helight_threshold: 138 | row += f'{Fore.RED}{recall_str}{Style.RESET_ALL}|' 139 | else: 140 | row += recall_str+'|' 141 | f1_str = "{:^7.3f}".format(f1[max_idx]) 142 | if f1[max_idx] < helight_threshold: 143 | row += f'{Fore.RED}{f1_str}{Style.RESET_ALL}|' 144 | else: 145 | row += f1_str+'|' 146 | print(row) 147 | print('-'*len(header1)) 148 | -------------------------------------------------------------------------------- /toolkit/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import region 2 | from .statistics import * 3 | -------------------------------------------------------------------------------- /toolkit/utils/c_region.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "src/region.h": 2 | ctypedef enum region_type "RegionType": 3 | EMTPY 4 | SPECIAL 5 | RECTANGEL 6 | POLYGON 7 | MASK 8 | 9 | ctypedef struct region_bounds: 10 | float top 11 | float bottom 12 | float left 13 | float right 14 | 15 | ctypedef struct region_rectangle: 16 | float x 17 | float y 18 | float width 19 | float height 20 | 21 | # ctypedef struct region_mask: 22 | # int x 23 | # int y 24 | # int width 25 | # int height 26 | # char *data 27 | 28 | ctypedef struct region_polygon: 29 | int count 30 | float *x 31 | float *y 32 | 33 | ctypedef union region_container_data: 34 | region_rectangle rectangle 35 | region_polygon polygon 36 | # region_mask mask 37 | int special 38 | 39 | ctypedef struct region_container: 40 | region_type type 41 | region_container_data data 42 | 43 | # ctypedef struct region_overlap: 44 | # float overlap 45 | # float only1 46 | # float only2 47 | 48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds) 49 | 50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds) 51 | -------------------------------------------------------------------------------- /toolkit/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author fangyi.zhang@vipl.ict.ac.cn 3 | """ 4 | import numpy as np 5 | 6 | def determine_thresholds(confidence, resolution=100): 7 | """choose threshold according to confidence 8 | 9 | Args: 10 | confidence: list or numpy array or numpy array 11 | reolution: number of threshold to choose 12 | 13 | Restures: 14 | threshold: numpy array 15 | """ 16 | if isinstance(confidence, list): 17 | confidence = np.array(confidence) 18 | confidence = confidence.flatten() 19 | confidence = confidence[~np.isnan(confidence)] 20 | confidence.sort() 21 | 22 | assert len(confidence) > resolution and resolution > 2 23 | 24 | thresholds = np.ones((resolution)) 25 | thresholds[0] = - np.inf 26 | thresholds[-1] = np.inf 27 | delta = np.floor(len(confidence) / (resolution - 2)) 28 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32) 29 | thresholds[1:-1] = confidence[idxs] 30 | return thresholds 31 | -------------------------------------------------------------------------------- /toolkit/utils/src/buffer.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef __STRING_BUFFER_H 3 | #define __STRING_BUFFER_H 4 | 5 | // Enable MinGW secure API for _snprintf_s 6 | #define MINGW_HAS_SECURE_API 1 7 | 8 | #ifdef _MSC_VER 9 | #define __INLINE __inline 10 | #else 11 | #define __INLINE inline 12 | #endif 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | typedef struct string_buffer { 19 | char* buffer; 20 | int position; 21 | int size; 22 | } string_buffer; 23 | 24 | typedef struct string_list { 25 | char** buffer; 26 | int position; 27 | int size; 28 | } string_list; 29 | 30 | #define BUFFER_INCREMENT_STEP 4096 31 | 32 | static __INLINE string_buffer* buffer_create(int L) { 33 | string_buffer* B = (string_buffer*) malloc(sizeof(string_buffer)); 34 | B->size = L; 35 | B->buffer = (char*) malloc(sizeof(char) * B->size); 36 | B->position = 0; 37 | return B; 38 | } 39 | 40 | static __INLINE void buffer_reset(string_buffer* B) { 41 | B->position = 0; 42 | } 43 | 44 | static __INLINE void buffer_destroy(string_buffer** B) { 45 | if (!(*B)) return; 46 | if ((*B)->buffer) { 47 | free((*B)->buffer); 48 | (*B)->buffer = NULL; 49 | } 50 | free((*B)); 51 | (*B) = NULL; 52 | } 53 | 54 | static __INLINE char* buffer_extract(const string_buffer* B) { 55 | char *S = (char*) malloc(sizeof(char) * (B->position + 1)); 56 | memcpy(S, B->buffer, B->position); 57 | S[B->position] = '\0'; 58 | return S; 59 | } 60 | 61 | static __INLINE int buffer_size(const string_buffer* B) { 62 | return B->position; 63 | } 64 | 65 | static __INLINE void buffer_push(string_buffer* B, char C) { 66 | int required = 1; 67 | if (required > B->size - B->position) { 68 | B->size = B->position + BUFFER_INCREMENT_STEP; 69 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 70 | } 71 | B->buffer[B->position] = C; 72 | B->position += required; 73 | } 74 | 75 | static __INLINE void buffer_append(string_buffer* B, const char *format, ...) { 76 | 77 | int required; 78 | va_list args; 79 | 80 | #if defined(__OS2__) || defined(__WINDOWS__) || defined(WIN32) || defined(_MSC_VER) 81 | 82 | va_start(args, format); 83 | required = _vscprintf(format, args) + 1; 84 | va_end(args); 85 | if (required >= B->size - B->position) { 86 | B->size = B->position + required + 1; 87 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 88 | } 89 | va_start(args, format); 90 | required = _vsnprintf_s(&(B->buffer[B->position]), B->size - B->position, _TRUNCATE, format, args); 91 | va_end(args); 92 | B->position += required; 93 | 94 | #else 95 | va_start(args, format); 96 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args); 97 | va_end(args); 98 | if (required >= B->size - B->position) { 99 | B->size = B->position + required + 1; 100 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size); 101 | va_start(args, format); 102 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args); 103 | va_end(args); 104 | } 105 | B->position += required; 106 | #endif 107 | 108 | } 109 | 110 | static __INLINE string_list* list_create(int L) { 111 | string_list* B = (string_list*) malloc(sizeof(string_list)); 112 | B->size = L; 113 | B->buffer = (char**) malloc(sizeof(char*) * B->size); 114 | memset(B->buffer, 0, sizeof(char*) * B->size); 115 | B->position = 0; 116 | return B; 117 | } 118 | 119 | static __INLINE void list_reset(string_list* B) { 120 | int i; 121 | for (i = 0; i < B->position; i++) { 122 | if (B->buffer[i]) free(B->buffer[i]); 123 | B->buffer[i] = NULL; 124 | } 125 | B->position = 0; 126 | } 127 | 128 | static __INLINE void list_destroy(string_list **B) { 129 | int i; 130 | 131 | if (!(*B)) return; 132 | 133 | for (i = 0; i < (*B)->position; i++) { 134 | if ((*B)->buffer[i]) free((*B)->buffer[i]); (*B)->buffer[i] = NULL; 135 | } 136 | 137 | if ((*B)->buffer) { 138 | free((*B)->buffer); (*B)->buffer = NULL; 139 | } 140 | 141 | free((*B)); 142 | (*B) = NULL; 143 | } 144 | 145 | static __INLINE char* list_get(const string_list *B, int I) { 146 | if (I < 0 || I >= B->position) { 147 | return NULL; 148 | } else { 149 | if (!B->buffer[I]) { 150 | return NULL; 151 | } else { 152 | char *S; 153 | int length = strlen(B->buffer[I]); 154 | S = (char*) malloc(sizeof(char) * (length + 1)); 155 | memcpy(S, B->buffer[I], length + 1); 156 | return S; 157 | } 158 | } 159 | } 160 | 161 | static __INLINE int list_size(const string_list *B) { 162 | return B->position; 163 | } 164 | 165 | static __INLINE void list_append(string_list *B, char* S) { 166 | int required = 1; 167 | int length = strlen(S); 168 | if (required > B->size - B->position) { 169 | B->size = B->position + 16; 170 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size); 171 | } 172 | B->buffer[B->position] = (char*) malloc(sizeof(char) * (length + 1)); 173 | memcpy(B->buffer[B->position], S, length + 1); 174 | B->position += required; 175 | } 176 | 177 | // This version of the append does not copy the string but simply takes the control of its allocation 178 | static __INLINE void list_append_direct(string_list *B, char* S) { 179 | int required = 1; 180 | // int length = strlen(S); 181 | if (required > B->size - B->position) { 182 | B->size = B->position + 16; 183 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size); 184 | } 185 | B->buffer[B->position] = S; 186 | B->position += required; 187 | } 188 | 189 | 190 | #endif 191 | -------------------------------------------------------------------------------- /toolkit/utils/src/region.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */ 2 | 3 | #ifndef _REGION_H_ 4 | #define _REGION_H_ 5 | 6 | #ifdef TRAX_STATIC_DEFINE 7 | # define __TRAX_EXPORT 8 | #else 9 | # ifndef __TRAX_EXPORT 10 | # if defined(_MSC_VER) 11 | # ifdef trax_EXPORTS 12 | /* We are building this library */ 13 | # define __TRAX_EXPORT __declspec(dllexport) 14 | # else 15 | /* We are using this library */ 16 | # define __TRAX_EXPORT __declspec(dllimport) 17 | # endif 18 | # elif defined(__GNUC__) 19 | # ifdef trax_EXPORTS 20 | /* We are building this library */ 21 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 22 | # else 23 | /* We are using this library */ 24 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 25 | # endif 26 | # endif 27 | # endif 28 | #endif 29 | 30 | #ifndef MAX 31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b)) 32 | #endif 33 | 34 | #ifndef MIN 35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b)) 36 | #endif 37 | 38 | #define TRAX_DEFAULT_CODE 0 39 | 40 | #define REGION_LEGACY_RASTERIZATION 1 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type; 47 | 48 | typedef struct region_bounds { 49 | 50 | float top; 51 | float bottom; 52 | float left; 53 | float right; 54 | 55 | } region_bounds; 56 | 57 | typedef struct region_polygon { 58 | 59 | int count; 60 | 61 | float* x; 62 | float* y; 63 | 64 | } region_polygon; 65 | 66 | typedef struct region_mask { 67 | 68 | int x; 69 | int y; 70 | 71 | int width; 72 | int height; 73 | 74 | char* data; 75 | 76 | } region_mask; 77 | 78 | typedef struct region_rectangle { 79 | 80 | float x; 81 | float y; 82 | float width; 83 | float height; 84 | 85 | } region_rectangle; 86 | 87 | typedef struct region_container { 88 | enum region_type type; 89 | union { 90 | region_rectangle rectangle; 91 | region_polygon polygon; 92 | region_mask mask; 93 | int special; 94 | } data; 95 | } region_container; 96 | 97 | typedef struct region_overlap { 98 | 99 | float overlap; 100 | float only1; 101 | float only2; 102 | 103 | } region_overlap; 104 | 105 | extern const region_bounds region_no_bounds; 106 | 107 | __TRAX_EXPORT int region_set_flags(int mask); 108 | 109 | __TRAX_EXPORT int region_clear_flags(int mask); 110 | 111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds); 112 | 113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds); 114 | 115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom); 116 | 117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region); 118 | 119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region); 120 | 121 | __TRAX_EXPORT char* region_string(region_container* region); 122 | 123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region); 124 | 125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type); 126 | 127 | __TRAX_EXPORT void region_release(region_container** region); 128 | 129 | __TRAX_EXPORT region_container* region_create_special(int code); 130 | 131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height); 132 | 133 | __TRAX_EXPORT region_container* region_create_polygon(int count); 134 | 135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y); 136 | 137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height); 138 | 139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height); 140 | 141 | #ifdef __cplusplus 142 | } 143 | #endif 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /toolkit/utils/statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author fangyi.zhang@vipl.ict.ac.cn 3 | """ 4 | import numpy as np 5 | from . import region 6 | 7 | def calculate_failures(trajectory): 8 | """ Calculate number of failures 9 | Args: 10 | trajectory: list of bbox 11 | Returns: 12 | num_failures: number of failures 13 | failures: failures point in trajectory, start with 0 14 | """ 15 | failures = [i for i, x in zip(range(len(trajectory)), trajectory) 16 | if len(x) == 1 and x[0] == 2] 17 | num_failures = len(failures) 18 | return num_failures, failures 19 | 20 | def calculate_accuracy(pred_trajectory, gt_trajectory, 21 | burnin=0, ignore_unknown=True, bound=None): 22 | """Caculate accuracy socre as average overlap over the entire sequence 23 | Args: 24 | trajectory: list of bbox 25 | gt_trajectory: list of bbox 26 | burnin: number of frames that have to be ignored after the failure 27 | ignore_unknown: ignore frames where the overlap is unknown 28 | bound: bounding region 29 | Return: 30 | acc: average overlap 31 | overlaps: per frame overlaps 32 | """ 33 | pred_trajectory_ = pred_trajectory 34 | if not ignore_unknown: 35 | unkown = [len(x)==1 and x[0] == 0 for x in pred_trajectory] 36 | 37 | if burnin > 0: 38 | pred_trajectory_ = pred_trajectory[:] 39 | mask = [len(x)==1 and x[0] == 1 for x in pred_trajectory] 40 | for i in range(len(mask)): 41 | if mask[i]: 42 | for j in range(burnin): 43 | if i + j < len(mask): 44 | pred_trajectory_[i+j] = [0] 45 | min_len = min(len(pred_trajectory_), len(gt_trajectory)) 46 | overlaps = region.vot_overlap_traj(pred_trajectory_[:min_len], 47 | gt_trajectory[:min_len], bound) 48 | 49 | if not ignore_unknown: 50 | overlaps = [u if u else 0 for u in unkown] 51 | 52 | acc = 0 53 | if len(overlaps) > 0: 54 | acc = np.nanmean(overlaps) 55 | return acc, overlaps 56 | 57 | # def caculate_expected_overlap(pred_trajectorys, gt_trajectorys, skip_init, traj_length=None, 58 | # weights=None, tags=['all']): 59 | # """ Caculate expected overlap 60 | # Args: 61 | # pred_trajectory: list of bbox 62 | # gt_trajectory: list of bbox 63 | # traj_length: a list of sequence length for which the overlap should be evaluated 64 | # weights: a list of per-sequence weights that indicate how much does each sequence 65 | # contribute to the estimate 66 | # tags: set list of tags for which to perform calculation 67 | # """ 68 | # overlaps = [calculate_accuracy(pred, gt)[1] 69 | # for pred, gt in zip(pred_trajectorys, gt_trajectorys)] 70 | # failures = [calculate_accuracy(pred, gt)[1] 71 | # for pred, gt in zip(pred_trajectorys, gt_trajectorys)] 72 | # 73 | # if traj_length is None: 74 | # traj_length = range(1, max([len(x) for x in gt_trajectorys])+1) 75 | # traj_length = list(set(traj_length)) 76 | 77 | def overlap_ratio(rect1, rect2): 78 | '''Compute overlap ratio between two rects 79 | Args 80 | rect:2d array of N x [x,y,w,h] 81 | Return: 82 | iou 83 | ''' 84 | # if rect1.ndim==1: 85 | # rect1 = rect1[np.newaxis, :] 86 | # if rect2.ndim==1: 87 | # rect2 = rect2[np.newaxis, :] 88 | left = np.maximum(rect1[:,0], rect2[:,0]) 89 | right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2]) 90 | top = np.maximum(rect1[:,1], rect2[:,1]) 91 | bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3]) 92 | 93 | intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top) 94 | union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect 95 | iou = intersect / union 96 | iou = np.maximum(np.minimum(1, iou), 0) 97 | return iou 98 | 99 | def success_overlap(gt_bb, result_bb, n_frame): 100 | thresholds_overlap = np.arange(0, 1.05, 0.05) 101 | success = np.zeros(len(thresholds_overlap)) 102 | iou = np.ones(len(gt_bb)) * (-1) 103 | # mask = np.sum(gt_bb > 0, axis=1) == 4 #TODO check all dataset 104 | mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2 105 | iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask]) 106 | for i in range(len(thresholds_overlap)): 107 | success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame) 108 | return success 109 | 110 | def success_error(gt_center, result_center, thresholds, n_frame): 111 | # n_frame = len(gt_center) 112 | success = np.zeros(len(thresholds)) 113 | dist = np.ones(len(gt_center)) * (-1) 114 | mask = np.sum(gt_center > 0, axis=1) == 2 115 | dist[mask] = np.sqrt(np.sum( 116 | np.power(gt_center[mask] - result_center[mask], 2), axis=1)) 117 | for i in range(len(thresholds)): 118 | success[i] = np.sum(dist <= thresholds[i]) / float(n_frame) 119 | return success 120 | 121 | def determine_thresholds(scores, resolution=100): 122 | """ 123 | Args: 124 | scores: 1d array of score 125 | """ 126 | scores = np.sort(scores[np.logical_not(np.isnan(scores))]) 127 | delta = np.floor(len(scores) / (resolution - 2)) 128 | idxs = np.floor(np.linspace(delta-1, len(scores)-delta, resolution-2)+0.5).astype(np.int32) 129 | thresholds = np.zeros((resolution)) 130 | thresholds[0] = - np.inf 131 | thresholds[-1] = np.inf 132 | thresholds[1:-1] = scores[idxs] 133 | return thresholds 134 | 135 | def calculate_f1(overlaps, score, bound, thresholds, N): 136 | overlaps = np.array(overlaps) 137 | overlaps[np.isnan(overlaps)] = 0 138 | score = np.array(score) 139 | score[np.isnan(score)] = 0 140 | precision = np.zeros(len(thresholds)) 141 | recall = np.zeros(len(thresholds)) 142 | for i, th in enumerate(thresholds): 143 | if th == - np.inf: 144 | idx = score > 0 145 | else: 146 | idx = score >= th 147 | if np.sum(idx) == 0: 148 | precision[i] = 1 149 | recall[i] = 0 150 | else: 151 | precision[i] = np.mean(overlaps[idx]) 152 | recall[i] = np.sum(overlaps[idx]) / N 153 | f1 = 2 * precision * recall / (precision + recall) 154 | return f1, precision, recall 155 | 156 | def calculate_expected_overlap(fragments, fweights): 157 | max_len = fragments.shape[1] 158 | expected_overlaps = np.zeros((max_len), np.float32) 159 | expected_overlaps[0] = 1 160 | 161 | # TODO Speed Up 162 | for i in range(1, max_len): 163 | mask = np.logical_not(np.isnan(fragments[:, i])) 164 | if np.any(mask): 165 | fragment = fragments[mask, 1:i+1] 166 | seq_mean = np.sum(fragment, 1) / fragment.shape[1] 167 | expected_overlaps[i] = np.sum(seq_mean * 168 | fweights[mask]) / np.sum(fweights[mask]) 169 | return expected_overlaps 170 | -------------------------------------------------------------------------------- /toolkit/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .draw_f1 import draw_f1 2 | from .draw_success_precision import draw_success_precision 3 | from .draw_eao import draw_eao 4 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_eao.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pickle 4 | 5 | from matplotlib import rc 6 | from .draw_utils import COLOR, MARKER_STYLE 7 | 8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 9 | rc('text', usetex=True) 10 | 11 | def draw_eao(result): 12 | fig = plt.figure() 13 | ax = fig.add_subplot(111, projection='polar') 14 | angles = np.linspace(0, 2*np.pi, 8, endpoint=True) 15 | 16 | attr2value = [] 17 | for i, (tracker_name, ret) in enumerate(result.items()): 18 | value = list(ret.values()) 19 | attr2value.append(value) 20 | value.append(value[0]) 21 | attr2value = np.array(attr2value) 22 | max_value = np.max(attr2value, axis=0) 23 | min_value = np.min(attr2value, axis=0) 24 | for i, (tracker_name, ret) in enumerate(result.items()): 25 | value = list(ret.values()) 26 | value.append(value[0]) 27 | value = np.array(value) 28 | value *= (1 / max_value) 29 | plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i], 30 | label=tracker_name, linewidth=1.5, markersize=6) 31 | 32 | attrs = ["Overall", "Camera motion", 33 | "Illumination change","Motion Change", 34 | "Size change","Occlusion", 35 | "Unassigned"] 36 | attr_value = [] 37 | for attr, maxv, minv in zip(attrs, max_value, min_value): 38 | attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv)) 39 | ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value) 40 | ax.spines['polar'].set_visible(False) 41 | ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5) 42 | ax.grid(b=False) 43 | ax.set_ylim(0, 1.18) 44 | ax.set_yticks([]) 45 | plt.show() 46 | 47 | if __name__ == '__main__': 48 | result = pickle.load(open("../../result.pkl", 'rb')) 49 | draw_eao(result) 50 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_f1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from matplotlib import rc 5 | from .draw_utils import COLOR, LINE_STYLE 6 | 7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 8 | rc('text', usetex=True) 9 | 10 | def draw_f1(result, bold_name=None): 11 | # drawing f1 contour 12 | fig, ax = plt.subplots() 13 | for f1 in np.arange(0.1, 1, 0.1): 14 | recall = np.arange(f1, 1+0.01, 0.01) 15 | precision = f1 * recall / (2 * recall - f1) 16 | ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5) 17 | ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5) 18 | ax.grid(b=True) 19 | ax.set_aspect(1) 20 | plt.xlabel('Recall') 21 | plt.ylabel('Precision') 22 | plt.axis([0, 1, 0, 1]) 23 | plt.title(r'\textbf{VOT2018-LT Precision vs Recall}') 24 | 25 | # draw result line 26 | all_precision = {} 27 | all_recall = {} 28 | best_f1 = {} 29 | best_idx = {} 30 | for tracker_name, ret in result.items(): 31 | precision = np.mean(list(ret['precision'].values()), axis=0) 32 | recall = np.mean(list(ret['recall'].values()), axis=0) 33 | f1 = 2 * precision * recall / (precision + recall) 34 | max_idx = np.argmax(f1) 35 | all_precision[tracker_name] = precision 36 | all_recall[tracker_name] = recall 37 | best_f1[tracker_name] = f1[max_idx] 38 | best_idx[tracker_name] = max_idx 39 | 40 | for idx, (tracker_name, best_f1) in \ 41 | enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)): 42 | if tracker_name == bold_name: 43 | label = r"\textbf{[%.3f] Ours}" % (best_f1) 44 | else: 45 | label = "[%.3f] " % (best_f1) + tracker_name 46 | recall = all_recall[tracker_name][:-1] 47 | precision = all_precision[tracker_name][:-1] 48 | ax.plot(recall, precision, color=COLOR[idx], linestyle='-', 49 | label=label) 50 | f1_idx = best_idx[tracker_name] 51 | ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o', 52 | markerfacecolor=COLOR[idx], markersize=5) 53 | ax.legend(loc='lower right', labelspacing=0.2) 54 | plt.xticks(np.arange(0, 1+0.1, 0.1)) 55 | plt.yticks(np.arange(0, 1+0.1, 0.1)) 56 | plt.show() 57 | 58 | if __name__ == '__main__': 59 | draw_f1(None) 60 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_success_precision.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from .draw_utils import COLOR, LINE_STYLE 5 | 6 | def draw_success_precision(success_ret, name, videos, attr, precision_ret=None, 7 | norm_precision_ret=None, bold_name=None, axis=[0, 1]): 8 | # success plot 9 | fig, ax = plt.subplots() 10 | ax.grid(b=True) 11 | ax.set_aspect(1) 12 | plt.xlabel('Overlap threshold') 13 | plt.ylabel('Success rate') 14 | if attr == 'ALL': 15 | plt.title(r'\textbf{Success plots of OPE on %s}' % (name)) 16 | else: 17 | plt.title(r'\textbf{Success plots of OPE - %s}' % (attr)) 18 | plt.axis([0, 1]+axis) 19 | success = {} 20 | thresholds = np.arange(0, 1.05, 0.05) 21 | for tracker_name in success_ret.keys(): 22 | value = [v for k, v in success_ret[tracker_name].items() if k in videos] 23 | success[tracker_name] = np.mean(value) 24 | for idx, (tracker_name, auc) in \ 25 | enumerate(sorted(success.items(), key=lambda x:x[1], reverse=True)): 26 | if tracker_name == bold_name: 27 | label = r"\textbf{[%.3f] %s}" % (auc, tracker_name) 28 | else: 29 | label = "[%.3f] " % (auc) + tracker_name 30 | value = [v for k, v in success_ret[tracker_name].items() if k in videos] 31 | plt.plot(thresholds, np.mean(value, axis=0), 32 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 33 | ax.legend(loc='lower left', labelspacing=0.2) 34 | ax.autoscale(enable=True, axis='both', tight=True) 35 | xmin, xmax, ymin, ymax = plt.axis() 36 | ax.autoscale(enable=False) 37 | ymax += 0.03 38 | plt.axis([xmin, xmax, ymin, ymax]) 39 | plt.xticks(np.arange(xmin, xmax+0.01, 0.1)) 40 | plt.yticks(np.arange(ymin, ymax, 0.1)) 41 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 42 | plt.show() 43 | 44 | if precision_ret: 45 | # norm precision plot 46 | fig, ax = plt.subplots() 47 | ax.grid(b=True) 48 | ax.set_aspect(50) 49 | plt.xlabel('Location error threshold') 50 | plt.ylabel('Precision') 51 | if attr == 'ALL': 52 | plt.title(r'\textbf{Precision plots of OPE on %s}' % (name)) 53 | else: 54 | plt.title(r'\textbf{Precision plots of OPE - %s}' % (attr)) 55 | plt.axis([0, 50]+axis) 56 | precision = {} 57 | thresholds = np.arange(0, 51, 1) 58 | for tracker_name in precision_ret.keys(): 59 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos] 60 | precision[tracker_name] = np.mean(value, axis=0)[20] 61 | for idx, (tracker_name, pre) in \ 62 | enumerate(sorted(precision.items(), key=lambda x:x[1], reverse=True)): 63 | if tracker_name == bold_name: 64 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name) 65 | else: 66 | label = "[%.3f] " % (pre) + tracker_name 67 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos] 68 | plt.plot(thresholds, np.mean(value, axis=0), 69 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 70 | ax.legend(loc='lower right', labelspacing=0.2) 71 | ax.autoscale(enable=True, axis='both', tight=True) 72 | xmin, xmax, ymin, ymax = plt.axis() 73 | ax.autoscale(enable=False) 74 | ymax += 0.03 75 | plt.axis([xmin, xmax, ymin, ymax]) 76 | plt.xticks(np.arange(xmin, xmax+0.01, 5)) 77 | plt.yticks(np.arange(ymin, ymax, 0.1)) 78 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 79 | plt.show() 80 | 81 | # norm precision plot 82 | if norm_precision_ret: 83 | fig, ax = plt.subplots() 84 | ax.grid(b=True) 85 | plt.xlabel('Location error threshold') 86 | plt.ylabel('Precision') 87 | if attr == 'ALL': 88 | plt.title(r'\textbf{Normalized Precision plots of OPE on %s}' % (name)) 89 | else: 90 | plt.title(r'\textbf{Normalized Precision plots of OPE - %s}' % (attr)) 91 | norm_precision = {} 92 | thresholds = np.arange(0, 51, 1) / 100 93 | for tracker_name in precision_ret.keys(): 94 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos] 95 | norm_precision[tracker_name] = np.mean(value, axis=0)[20] 96 | for idx, (tracker_name, pre) in \ 97 | enumerate(sorted(norm_precision.items(), key=lambda x:x[1], reverse=True)): 98 | if tracker_name == bold_name: 99 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name) 100 | else: 101 | label = "[%.3f] " % (pre) + tracker_name 102 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos] 103 | plt.plot(thresholds, np.mean(value, axis=0), 104 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 105 | ax.legend(loc='lower right', labelspacing=0.2) 106 | ax.autoscale(enable=True, axis='both', tight=True) 107 | xmin, xmax, ymin, ymax = plt.axis() 108 | ax.autoscale(enable=False) 109 | ymax += 0.03 110 | plt.axis([xmin, xmax, ymin, ymax]) 111 | plt.xticks(np.arange(xmin, xmax+0.01, 0.05)) 112 | plt.yticks(np.arange(ymin, ymax, 0.1)) 113 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 114 | plt.show() 115 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_utils.py: -------------------------------------------------------------------------------- 1 | 2 | COLOR = ((1, 0, 0), 3 | (0, 1, 0), 4 | (1, 0, 1), 5 | (1, 1, 0), 6 | (0 , 162/255, 232/255), 7 | (0.5, 0.5, 0.5), 8 | (0, 0, 1), 9 | (0, 1, 1), 10 | (136/255, 0 , 21/255), 11 | (255/255, 127/255, 39/255), 12 | (0, 0, 0)) 13 | 14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-'] 15 | 16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.'] 17 | --------------------------------------------------------------------------------