├── INSTALL.md
├── LICENSE
├── README.md
├── VOT-ST2020+Winners+Presentation.pdf
├── experiments
    └── siamreppoints
    │   ├── config_vot2018_offline.yaml
    │   └── config_vot2019_offline.yaml
├── install.sh
├── requirements.txt
├── setup.py
├── siamreppoints
    ├── __init__.py
    ├── core
    │   ├── __init__.py
    │   ├── config.py
    │   └── xcorr.py
    ├── models
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── alexnet.py
    │   │   ├── mobile_v2.py
    │   │   └── resnet_atrous.py
    │   ├── csrc
    │   │   ├── ROIAlign.h
    │   │   ├── ROIPool.h
    │   │   ├── SigmoidFocalLoss.h
    │   │   ├── cpu
    │   │   │   ├── ROIAlign_cpu.cpp
    │   │   │   ├── nms_cpu.cpp
    │   │   │   └── vision.h
    │   │   ├── cuda
    │   │   │   ├── ROIAlign_cuda.cu
    │   │   │   ├── ROIPool_cuda.cu
    │   │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   │   ├── deform_conv_cuda.cu
    │   │   │   ├── deform_conv_kernel_cuda.cu
    │   │   │   ├── deform_pool_cuda.cu
    │   │   │   ├── deform_pool_kernel_cuda.cu
    │   │   │   ├── ml_nms.cu
    │   │   │   ├── nms.cu
    │   │   │   └── vision.h
    │   │   ├── deform_conv.h
    │   │   ├── deform_pool.h
    │   │   ├── ml_nms.h
    │   │   ├── nms.h
    │   │   └── vision.cpp
    │   ├── head
    │   │   ├── __init__.py
    │   │   └── rpn.py
    │   ├── layers
    │   │   ├── __init__.py
    │   │   ├── _utils.py
    │   │   ├── batch_norm.py
    │   │   ├── dcn
    │   │   │   ├── __init__.py
    │   │   │   ├── deform_conv_func.py
    │   │   │   ├── deform_conv_module.py
    │   │   │   ├── deform_pool_func.py
    │   │   │   └── deform_pool_module.py
    │   │   ├── iou_loss.py
    │   │   ├── misc.py
    │   │   ├── nms.py
    │   │   ├── roi_align.py
    │   │   ├── roi_pool.py
    │   │   ├── scale.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   └── smooth_l1_loss.py
    │   ├── model_builder.py
    │   └── neck
    │   │   ├── __init__.py
    │   │   └── neck.py
    ├── setup.py
    ├── tracker
    │   ├── __init__.py
    │   ├── base_tracker.py
    │   ├── siamreppoints_tracker.py
    │   └── tracker_builder.py
    └── utils
    │   ├── __init__.py
    │   ├── anchor.py
    │   ├── average_meter.py
    │   ├── bbox.py
    │   ├── distributed.py
    │   ├── log_helper.py
    │   ├── lr_scheduler.py
    │   ├── misc.py
    │   └── model_load.py
├── testing_dataset
    └── README.md
├── toolkit
    ├── __init__.py
    ├── datasets
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── got10k.py
    │   ├── lasot.py
    │   ├── nfs.py
    │   ├── otb.py
    │   ├── trackingnet.py
    │   ├── uav.py
    │   ├── video.py
    │   └── vot.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── ar_benchmark.py
    │   ├── eao_benchmark.py
    │   ├── f1_benchmark.py
    │   └── ope_benchmark.py
    ├── utils
    │   ├── __init__.py
    │   ├── c_region.pxd
    │   ├── misc.py
    │   ├── region.c
    │   ├── region.pyx
    │   ├── src
    │   │   ├── buffer.h
    │   │   ├── region.c
    │   │   └── region.h
    │   └── statistics.py
    └── visualization
    │   ├── __init__.py
    │   ├── draw_eao.py
    │   ├── draw_f1.py
    │   ├── draw_success_precision.py
    │   └── draw_utils.py
└── tools
    ├── eval.py
    └── test.py


/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | This document contains detailed instructions for installing dependencies for RPT. We recommand using the [install.sh](install.sh). The code is tested on an Ubuntu 16.04 system with Nvidia GPU (We recommand 1080TI / TITAN XP).
 4 | 
 5 | ### Requirments
 6 | * Conda with Python 3.6.
 7 | * Nvidia GPU.
 8 | * PyTorch 1.1.0
 9 | * yacs
10 | * pyyaml
11 | * matplotlib
12 | * tqdm
13 | * OpenCV
14 | 
15 | ## Step-by-step instructions
16 | 
17 | #### Create environment and activate
18 | ```bash
19 | conda create --name siamreppoints python=3.6
20 | conda activate siamreppoints
21 | ```
22 | 
23 | #### Install numpy/pytorch/opencv
24 | ```
25 | conda install numpy
26 | conda install pytorch=1.1.0 torchvision cuda90 -c pytorch
27 | pip install opencv-python
28 | ```
29 | 
30 | #### Install other requirements
31 | ```
32 | pip install pyyaml yacs tqdm colorama matplotlib cython tensorboardX
33 | ```
34 | 
35 | #### Build extensions
36 | ```
37 | python setup.py build_ext --inplace
38 | ```
39 | 
40 | ### Build extensions of DCN
41 | ```
42 | python ./siamreppoints/setup.py build_ext --inplace
43 | ```
44 | 
45 | ## Try with scripts
46 | ```
47 | bash install.sh /path/to/your/conda siamreppoints
48 | ```
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Lucas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RPT: Learning Point Set Representation for Siamese Visual Tracking[[ECCVW2020](https://arxiv.org/abs/2008.03467)]
 2 | 
 3 | 
 4 | ## :sunny: Currently, this code only supports the offline version of RPT.
 5 | 
 6 | 
 7 | ## News
 8 | - :trophy: **We are the Winner of VOT-2020 Short-Term challenge**
 9 | - :trophy: **We get both 1st on the public and sequestered benchmark dataset of the VOT2020 Short-Term challenge**
10 | - :sunny::sunny:**Our [VOT2020-ST Winner presentation](https://github.com/zhanght021/RPT/blob/master/VOT-ST2020%2BWinners%2BPresentation.pdf) has been uploaded**
11 | 
12 | 
13 | ----
14 | ## Spotlight video
15 | 
16 | [![Video Label](https://i0.hdslb.com/bfs/album/1ea9e961083d81f7fed53d22ed8698a1ac2307f9.jpg@518w_1e_1c.jpg)](https://www.bilibili.com/video/BV17v41117cZ)
17 | 
18 | 
19 | ---
20 | ## Models
21 | | Dataset | pattern | A | R | EAO | Config. Filename |
22 | |:---:|:---:|:---:|:---:|:---:|:---:|
23 | | VOT2018 | offline | 0.610 | 0.150 | 0.497 | config_vot2018_offline.yaml |
24 | | VOT2019 | offline | 0.598 | 0.261 | 0.409 | config_vot2019_offline.yaml |
25 | | VOT2018 | online | 0.629 | 0.103 | 0.510 | :smile:coming soon:smile: |
26 | | VOT2019 | online | 0.623 | 0.186 | 0.417 | :smile:coming soon:smile: |
27 | 
28 | - The pretrained model can be downloaded from [[google](https://drive.google.com/file/d/1b9aynlUa4h1ju9Tir3xd6tT6dPzu40rN/view?usp=sharing)] or [[baidu](https://pan.baidu.com/s/18EXDr4DoeD89Vasuf8WCXQ)], extraction code: g4ac.
29 | - The raw results can be downloaded [[here](https://pan.baidu.com/s/1fAovMOR8UAN46f5Dm-sa6A)], extraction code: mkbh.
30 | 
31 | ----
32 | ## Abstract
33 | While remarkable progress has been made in robust visual tracking, accurate target state estimation still remains a highly challenging problem. In this paper, we argue that this issue is closely related to the prevalent bounding box representation, which provides only a coarse spatial extent of object. Thus an effcient visual tracking framework is proposed to accurately estimate the target state with a finer representation as a set of representative points. The point set is trained to indicate the semantically and geometrically significant positions of target region, enabling more fine-grained localization and modeling of object appearance. We further propose a multi-level aggregation strategy to obtain detailed structure information by fusing hierarchical convolution layers. Extensive experiments on several challenging benchmarks including OTB2015, VOT2018, VOT2019 and GOT-10k demonstrate that our method achieves new state-of-the-art performance while running at over 20 FPS.
34 | 
35 | ---
36 | ## Installation
37 | Please find installation instructions in INSTALL.md
38 | 
39 | ---
40 | ## Quick Start: Using siamreppoints
41 | 
42 | Download pretrained models and put the siamreppoints.model in the correct directory in experiments
43 | 
44 | ```bash
45 | cd siamreppoints/tools
46 | python test.py \
47 |        --snapshot ./snapshot/siamreppoints.model \  #model path
48 |        --dataset VOT2018 \  #dataset name
49 |        --config ./experiments/siamreppoints/config_vot2018_offline.yaml  #config file 
50 | ```
51 | 
52 | 
53 | ```bash
54 | cd siamreppoints/tools
55 | python eval.py \
56 |        --tracker_path ./results \  #result path
57 |        --dataset VOT2018 \  #dataset name
58 |        --tracker_prefix 'siam' \  # tracker_name
59 |        --num 1  # number thread to eval
60 | ```
61 | 
62 | ---
63 | ## Ackowledgement
64 | - [pysot](https://github.com/STVIR/pysot)
65 | 


--------------------------------------------------------------------------------
/VOT-ST2020+Winners+Presentation.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/VOT-ST2020+Winners+Presentation.pdf


--------------------------------------------------------------------------------
/experiments/siamreppoints/config_vot2018_offline.yaml:
--------------------------------------------------------------------------------
 1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
 2 | 
 3 | BACKBONE:
 4 |     TYPE: "resnet50"
 5 |     KWARGS:
 6 |         used_layers: [2, 3, 4]
 7 |     PRETRAINED: 'pretrained_models/resnet50.model'
 8 |     TRAIN_LAYERS: ['layer1', 'layer2', 'layer3', 'layer4']
 9 |     TRAIN_EPOCH: 10
10 |     LAYERS_LR: 0.1
11 | 
12 | ADJUST:
13 |     ADJUST: true
14 |     TYPE: "AdjustAllLayer"
15 |     KWARGS:
16 |         in_channels: [512, 1024, 2048]
17 |         out_channels: [256, 256, 256]
18 | 
19 | RPN:
20 |     TYPE: 'MultiRPN'
21 |     KWARGS:
22 |         anchor_num: 5
23 |         in_channels: [256, 256, 256]
24 |         weighted: true
25 | 
26 | MASK:
27 |     MASK: false
28 | 
29 | ANCHOR:
30 |     STRIDE: 8
31 |     RATIOS: [0.33, 0.5, 1, 2, 3]
32 |     SCALES: [8]
33 |     ANCHOR_NUM: 5
34 | 
35 | TRACK:
36 |     TYPE: 'SiamReppointsTracker'
37 |     PENALTY_K: 0.09384699789214077
38 |     WINDOW_INFLUENCE: 0.2870488747571366
39 |     LR: 0.546949224973851
40 |     EXEMPLAR_SIZE: 127
41 |     INSTANCE_SIZE: 255
42 |     BASE_SIZE: 8
43 |     CONTEXT_AMOUNT: 0.5
44 |     EXPANSION: 1.00
45 | 
46 | TRAIN:
47 |     EPOCH: 20
48 |     START_EPOCH: 0
49 |     BATCH_SIZE: 20
50 |     BASE_LR: 0.005
51 |     CLS_WEIGHT: 1.0
52 |     LOC_WEIGHT: 1.2
53 |     RESUME: ''
54 | 
55 |     LR:
56 |         TYPE: 'log'
57 |         KWARGS:
58 |             start_lr: 0.005
59 |             end_lr: 0.0005
60 |     LR_WARMUP:
61 |         TYPE: 'step'
62 |         EPOCH: 5
63 |         KWARGS:
64 |             start_lr: 0.001
65 |             end_lr: 0.005
66 |             step: 1
67 | 
68 | DATASET:
69 |     NAMES: 
70 |     - 'YOUTUBEBB'
71 |     - 'VID'
72 |     - 'COCO'
73 | 
74 |     TEMPLATE:
75 |         SHIFT: 4
76 |         SCALE: 0.05
77 |         BLUR: 0.0
78 |         FLIP: 0.0
79 |         COLOR: 0.5
80 | 
81 |     SEARCH:
82 |         SHIFT: 64
83 |         SCALE: 0.25
84 |         BLUR: 0.2
85 |         FLIP: 0.0
86 |         COLOR: 0.5
87 | 
88 |     NEG: 0.2
89 |     GRAY: 0.0
90 | 


--------------------------------------------------------------------------------
/experiments/siamreppoints/config_vot2019_offline.yaml:
--------------------------------------------------------------------------------
 1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
 2 | 
 3 | BACKBONE:
 4 |     TYPE: "resnet50"
 5 |     KWARGS:
 6 |         used_layers: [2, 3, 4]
 7 |     PRETRAINED: 'pretrained_models/resnet50.model'
 8 |     TRAIN_LAYERS: ['layer1', 'layer2', 'layer3', 'layer4']
 9 |     TRAIN_EPOCH: 10
10 |     LAYERS_LR: 0.1
11 | 
12 | ADJUST:
13 |     ADJUST: true
14 |     TYPE: "AdjustAllLayer"
15 |     KWARGS:
16 |         in_channels: [512, 1024, 2048]
17 |         out_channels: [256, 256, 256]
18 | 
19 | RPN:
20 |     TYPE: 'MultiRPN'
21 |     KWARGS:
22 |         anchor_num: 5
23 |         in_channels: [256, 256, 256]
24 |         weighted: true
25 | 
26 | MASK:
27 |     MASK: false
28 | 
29 | ANCHOR:
30 |     STRIDE: 8
31 |     RATIOS: [0.33, 0.5, 1, 2, 3]
32 |     SCALES: [8]
33 |     ANCHOR_NUM: 5
34 | 
35 | TRACK:
36 |     TYPE: 'SiamReppointsTracker'
37 |     PENALTY_K: 0.17380070743842363
38 |     WINDOW_INFLUENCE: 0.43745716974756743
39 |     LR: 0.4488453012609389
40 |     EXEMPLAR_SIZE: 127
41 |     INSTANCE_SIZE: 351
42 |     BASE_SIZE: 8
43 |     CONTEXT_AMOUNT: 0.5
44 |     EXPANSION: 1.02
45 | 
46 | TRAIN:
47 |     EPOCH: 20
48 |     START_EPOCH: 0
49 |     BATCH_SIZE: 20
50 |     BASE_LR: 0.005
51 |     CLS_WEIGHT: 1.0
52 |     LOC_WEIGHT: 1.2
53 |     RESUME: ''
54 | 
55 |     LR:
56 |         TYPE: 'log'
57 |         KWARGS:
58 |             start_lr: 0.005
59 |             end_lr: 0.0005
60 |     LR_WARMUP:
61 |         TYPE: 'step'
62 |         EPOCH: 5
63 |         KWARGS:
64 |             start_lr: 0.001
65 |             end_lr: 0.005
66 |             step: 1
67 | 
68 | DATASET:
69 |     NAMES: 
70 |     - 'YOUTUBEBB'
71 |     - 'VID'
72 |     - 'COCO'
73 | 
74 |     TEMPLATE:
75 |         SHIFT: 4
76 |         SCALE: 0.05
77 |         BLUR: 0.0
78 |         FLIP: 0.0
79 |         COLOR: 0.5
80 | 
81 |     SEARCH:
82 |         SHIFT: 64
83 |         SCALE: 0.25
84 |         BLUR: 0.2
85 |         FLIP: 0.0
86 |         COLOR: 0.5
87 | 
88 |     NEG: 0.2
89 |     GRAY: 0.0
90 | 


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | if [ $# -lt 2 ]; then
 4 |     echo "ARGS ERROR!"
 5 |     echo "  bash install.sh /path/to/your/conda env_name"
 6 |     exit 1
 7 | fi
 8 | 
 9 | set -e
10 | 
11 | conda_path=$1
12 | env_name=$2
13 | 
14 | source $conda_path/etc/profile.d/conda.sh
15 | 
16 | echo "****** create environment " $env_name "*****"
17 | # create environment
18 | conda create -y --name $env_name python=3.6
19 | conda activate $env_name
20 | 
21 | echo "***** install numpy pytorch opencv *****"
22 | # numpy
23 | conda install -y numpy
24 | # pytorch
25 | # pytorch with cuda80/cuda90 is tested
26 | conda install -y pytorch=1.1.0 torchvision cuda90 -c pytorch
27 | # opencv
28 | pip install opencv-python
29 | # tensorboardX
30 | 
31 | echo "***** install other libs *****"
32 | pip install tensorboardX
33 | # libs
34 | pip install pyyaml yacs tqdm colorama matplotlib cython
35 | 
36 | echo "***** build extensions *****"
37 | python setup.py build_ext --inplace
38 | 
39 | echo "***** build extensions about DCN *****"
40 | cd siamreppoints
41 | python ./setup.py build_ext --inplace
42 | 
43 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | yacs
3 | tqdm
4 | pyyaml
5 | matplotlib
6 | colorama
7 | cython
8 | tensorboardX
9 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Build import cythonize
 4 | 
 5 | 
 6 | ext_modules = [
 7 |     Extension(
 8 |         name='toolkit.utils.region',
 9 |         sources=[
10 |             'toolkit/utils/region.pyx',
11 |             'toolkit/utils/src/region.c',
12 |         ],
13 |         include_dirs=[
14 |             'toolkit/utils/src'
15 |         ]
16 |     )
17 | ]
18 | 
19 | setup(
20 |     name='toolkit',
21 |     packages=['toolkit'],
22 |     ext_modules=cythonize(ext_modules)
23 | )
24 | 
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/siamreppoints/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/__init__.py


--------------------------------------------------------------------------------
/siamreppoints/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/core/__init__.py


--------------------------------------------------------------------------------
/siamreppoints/core/xcorr.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import torch
 7 | import torch.nn.functional as F
 8 | 
 9 | 
10 | def xcorr_slow(x, kernel):
11 |     """for loop to calculate cross correlation, slow version
12 |     """
13 |     batch = x.size()[0]
14 |     out = []
15 |     for i in range(batch):
16 |         px = x[i]
17 |         pk = kernel[i]
18 |         px = px.view(1, -1, px.size()[1], px.size()[2])
19 |         pk = pk.view(1, -1, pk.size()[1], pk.size()[2])
20 |         po = F.conv2d(px, pk)
21 |         out.append(po)
22 |     out = torch.cat(out, 0)
23 |     return out
24 | 
25 | 
26 | def xcorr_fast(x, kernel):
27 |     """group conv2d to calculate cross correlation, fast version
28 |     """
29 |     batch = kernel.size()[0]
30 |     pk = kernel.view(-1, x.size()[1], kernel.size()[2], kernel.size()[3])
31 |     px = x.view(1, -1, x.size()[2], x.size()[3])
32 |     po = F.conv2d(px, pk, groups=batch)
33 |     po = po.view(batch, -1, po.size()[2], po.size()[3])
34 |     return po
35 | 
36 | def xcorr_depthwise(x, kernel):
37 |     """depthwise cross correlation
38 |     """
39 |     batch = kernel.size(0)
40 |     channel = kernel.size(1)
41 |     x = x.view(1, batch*channel, x.size(2), x.size(3))
42 |     kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3))
43 |     out = F.conv2d(x, kernel, groups=batch*channel)
44 |     out = out.view(batch, channel, out.size(2), out.size(3))
45 |     return out
46 |     
47 | def xcorr_depthwise_dilation(x, kernel, padding=(0, 0), dilation=(1, 1)):
48 |     """depthwise cross correlation
49 |     """
50 |     batch = kernel.size(0)
51 |     channel = kernel.size(1)
52 |     x = x.view(1, batch*channel, x.size(2), x.size(3))
53 |     kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3))
54 |     out = F.conv2d(x, kernel, groups=batch*channel, padding=padding, dilation=dilation)
55 |     out = out.view(batch, channel, out.size(2), out.size(3))
56 |     return out
57 | 
58 | 


--------------------------------------------------------------------------------
/siamreppoints/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/models/__init__.py


--------------------------------------------------------------------------------
/siamreppoints/models/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | from siamreppoints.models.backbone.alexnet import alexnetlegacy, alexnet
 9 | from siamreppoints.models.backbone.mobile_v2 import mobilenetv2
10 | from siamreppoints.models.backbone.resnet_atrous import resnet18, resnet34, resnet50
11 | 
12 | BACKBONES = {
13 |               'alexnetlegacy': alexnetlegacy,
14 |               'mobilenetv2': mobilenetv2,
15 |               'resnet18': resnet18,
16 |               'resnet34': resnet34,
17 |               'resnet50': resnet50,
18 |               'alexnet': alexnet,
19 |             }
20 | 
21 | 
22 | def get_backbone(name, **kwargs):
23 |     return BACKBONES[name](**kwargs)
24 | 


--------------------------------------------------------------------------------
/siamreppoints/models/backbone/alexnet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class AlexNetLegacy(nn.Module):
10 |     configs = [3, 96, 256, 384, 384, 256]
11 | 
12 |     def __init__(self, width_mult=1):
13 |         configs = list(map(lambda x: 3 if x == 3 else
14 |                        int(x*width_mult), AlexNet.configs))
15 |         super(AlexNetLegacy, self).__init__()
16 |         self.features = nn.Sequential(
17 |             nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2),
18 |             nn.BatchNorm2d(configs[1]),
19 |             nn.MaxPool2d(kernel_size=3, stride=2),
20 |             nn.ReLU(inplace=True),
21 |             nn.Conv2d(configs[1], configs[2], kernel_size=5),
22 |             nn.BatchNorm2d(configs[2]),
23 |             nn.MaxPool2d(kernel_size=3, stride=2),
24 |             nn.ReLU(inplace=True),
25 |             nn.Conv2d(configs[2], configs[3], kernel_size=3),
26 |             nn.BatchNorm2d(configs[3]),
27 |             nn.ReLU(inplace=True),
28 |             nn.Conv2d(configs[3], configs[4], kernel_size=3),
29 |             nn.BatchNorm2d(configs[4]),
30 |             nn.ReLU(inplace=True),
31 |             nn.Conv2d(configs[4], configs[5], kernel_size=3),
32 |             nn.BatchNorm2d(configs[5]),
33 |         )
34 |         self.feature_size = configs[5]
35 | 
36 |     def forward(self, x):
37 |         x = self.features(x)
38 |         return x
39 | 
40 | 
41 | class AlexNet(nn.Module):
42 |     configs = [3, 96, 256, 384, 384, 256]
43 | 
44 |     def __init__(self, width_mult=1):
45 |         configs = list(map(lambda x: 3 if x == 3 else
46 |                        int(x*width_mult), AlexNet.configs))
47 |         super(AlexNet, self).__init__()
48 |         self.layer1 = nn.Sequential(
49 |             nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2),
50 |             nn.BatchNorm2d(configs[1]),
51 |             nn.MaxPool2d(kernel_size=3, stride=2),
52 |             nn.ReLU(inplace=True),
53 |             )
54 |         self.layer2 = nn.Sequential(
55 |             nn.Conv2d(configs[1], configs[2], kernel_size=5),
56 |             nn.BatchNorm2d(configs[2]),
57 |             nn.MaxPool2d(kernel_size=3, stride=2),
58 |             nn.ReLU(inplace=True),
59 |             )
60 |         self.layer3 = nn.Sequential(
61 |             nn.Conv2d(configs[2], configs[3], kernel_size=3),
62 |             nn.BatchNorm2d(configs[3]),
63 |             nn.ReLU(inplace=True),
64 |             )
65 |         self.layer4 = nn.Sequential(
66 |             nn.Conv2d(configs[3], configs[4], kernel_size=3),
67 |             nn.BatchNorm2d(configs[4]),
68 |             nn.ReLU(inplace=True),
69 |             )
70 | 
71 |         self.layer5 = nn.Sequential(
72 |             nn.Conv2d(configs[4], configs[5], kernel_size=3),
73 |             nn.BatchNorm2d(configs[5]),
74 |             )
75 |         self.feature_size = configs[5]
76 | 
77 |     def forward(self, x):
78 |         x = self.layer1(x)
79 |         x = self.layer2(x)
80 |         x = self.layer3(x)
81 |         x = self.layer4(x)
82 |         x = self.layer5(x)
83 |         return x
84 | 
85 | 
86 | def alexnetlegacy(**kwargs):
87 |     return AlexNetLegacy(**kwargs)
88 | 
89 | 
90 | def alexnet(**kwargs):
91 |     return AlexNet(**kwargs)
92 | 


--------------------------------------------------------------------------------
/siamreppoints/models/backbone/mobile_v2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | 
 10 | def conv_bn(inp, oup, stride, padding=1):
 11 |     return nn.Sequential(
 12 |         nn.Conv2d(inp, oup, 3, stride, padding, bias=False),
 13 |         nn.BatchNorm2d(oup),
 14 |         nn.ReLU6(inplace=True)
 15 |     )
 16 | 
 17 | 
 18 | def conv_1x1_bn(inp, oup):
 19 |     return nn.Sequential(
 20 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 21 |         nn.BatchNorm2d(oup),
 22 |         nn.ReLU6(inplace=True)
 23 |     )
 24 | 
 25 | 
 26 | class InvertedResidual(nn.Module):
 27 |     def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
 28 |         super(InvertedResidual, self).__init__()
 29 |         self.stride = stride
 30 | 
 31 |         self.use_res_connect = self.stride == 1 and inp == oup
 32 | 
 33 |         padding = 2 - stride
 34 |         if dilation > 1:
 35 |             padding = dilation
 36 | 
 37 |         self.conv = nn.Sequential(
 38 |             # pw
 39 |             nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False),
 40 |             nn.BatchNorm2d(inp * expand_ratio),
 41 |             nn.ReLU6(inplace=True),
 42 |             # dw
 43 |             nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3,
 44 |                       stride, padding, dilation=dilation,
 45 |                       groups=inp * expand_ratio, bias=False),
 46 |             nn.BatchNorm2d(inp * expand_ratio),
 47 |             nn.ReLU6(inplace=True),
 48 |             # pw-linear
 49 |             nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False),
 50 |             nn.BatchNorm2d(oup),
 51 |         )
 52 | 
 53 |     def forward(self, x):
 54 |         if self.use_res_connect:
 55 |             return x + self.conv(x)
 56 |         else:
 57 |             return self.conv(x)
 58 | 
 59 | 
 60 | class MobileNetV2(nn.Sequential):
 61 |     def __init__(self, width_mult=1.0, used_layers=[3, 5, 7]):
 62 |         super(MobileNetV2, self).__init__()
 63 | 
 64 |         self.interverted_residual_setting = [
 65 |             # t, c, n, s
 66 |             [1, 16, 1, 1, 1],
 67 |             [6, 24, 2, 2, 1],
 68 |             [6, 32, 3, 2, 1],
 69 |             [6, 64, 4, 2, 1],
 70 |             [6, 96, 3, 1, 1],
 71 |             [6, 160, 3, 2, 1],
 72 |             [6, 320, 1, 1, 1],
 73 |         ]
 74 |         # 0,2,3,4,6
 75 | 
 76 |         self.interverted_residual_setting = [
 77 |             # t, c, n, s
 78 |             [1, 16, 1, 1, 1],
 79 |             [6, 24, 2, 2, 1],
 80 |             [6, 32, 3, 2, 1],
 81 |             [6, 64, 4, 1, 2],
 82 |             [6, 96, 3, 1, 2],
 83 |             [6, 160, 3, 1, 4],
 84 |             [6, 320, 1, 1, 4],
 85 |         ]
 86 | 
 87 |         self.channels = [24, 32, 96, 320]
 88 |         self.channels = [int(c * width_mult) for c in self.channels]
 89 | 
 90 |         input_channel = int(32 * width_mult)
 91 |         self.last_channel = int(1280 * width_mult) \
 92 |             if width_mult > 1.0 else 1280
 93 | 
 94 |         self.add_module('layer0', conv_bn(3, input_channel, 2, 0))
 95 | 
 96 |         last_dilation = 1
 97 | 
 98 |         self.used_layers = used_layers
 99 | 
100 |         for idx, (t, c, n, s, d) in \
101 |                 enumerate(self.interverted_residual_setting, start=1):
102 |             output_channel = int(c * width_mult)
103 | 
104 |             layers = []
105 | 
106 |             for i in range(n):
107 |                 if i == 0:
108 |                     if d == last_dilation:
109 |                         dd = d
110 |                     else:
111 |                         dd = max(d // 2, 1)
112 |                     layers.append(InvertedResidual(input_channel,
113 |                                                    output_channel, s, t, dd))
114 |                 else:
115 |                     layers.append(InvertedResidual(input_channel,
116 |                                                    output_channel, 1, t, d))
117 |                 input_channel = output_channel
118 | 
119 |             last_dilation = d
120 | 
121 |             self.add_module('layer%d' % (idx), nn.Sequential(*layers))
122 | 
123 |     def forward(self, x):
124 |         outputs = []
125 |         for idx in range(8):
126 |             name = "layer%d" % idx
127 |             x = getattr(self, name)(x)
128 |             outputs.append(x)
129 |         p0, p1, p2, p3, p4 = [outputs[i] for i in [1, 2, 3, 5, 7]]
130 |         out = [outputs[i] for i in self.used_layers]
131 |         if len(out) == 1:
132 |             return out[0]
133 |         return out
134 | 
135 | 
136 | def mobilenetv2(**kwargs):
137 |     model = MobileNetV2(**kwargs)
138 |     return model
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     net = mobilenetv2()
143 | 
144 |     print(net)
145 | 
146 |     from torch.autograd import Variable
147 |     tensor = Variable(torch.Tensor(1, 3, 255, 255)).cuda()
148 | 
149 |     net = net.cuda()
150 | 
151 |     out = net(tensor)
152 | 
153 |     for i, p in enumerate(out):
154 |         print(i, p.size())
155 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/cuda/SigmoidFocalLoss_cuda.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | // This file is modified from  https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu
  3 | // Cheng-Yang Fu
  4 | // cyfu@cs.unc.edu
  5 | #include <ATen/ATen.h>
  6 | #include <ATen/cuda/CUDAContext.h>
  7 | 
  8 | #include <THC/THC.h>
  9 | #include <THC/THCAtomics.cuh>
 10 | #include <THC/THCDeviceUtils.cuh>
 11 | 
 12 | #include <cfloat>
 13 | 
 14 | // TODO make it in a common file
 15 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 16 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 17 |        i += blockDim.x * gridDim.x)
 18 | 
 19 | 
 20 | template <typename T>
 21 | __global__ void SigmoidFocalLossForward(const int nthreads, 
 22 |     const T* logits,
 23 |     const int* targets,
 24 |     const int num_classes,
 25 |     const float gamma, 
 26 |     const float alpha,
 27 |     const int num, 
 28 |     T* losses) {
 29 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 30 | 
 31 |     int n = i / num_classes;
 32 |     int d = i % num_classes; // current class[0~79]; 
 33 |     int t = targets[n]; // target class [1~80];
 34 | 
 35 |     // Decide it is positive or negative case. 
 36 |     T c1 = (t == (d+1)); 
 37 |     T c2 = (t>=0 & t != (d+1));
 38 | 
 39 |     T zn = (1.0 - alpha);
 40 |     T zp = (alpha);
 41 | 
 42 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 43 |     T  p = 1. / (1. + expf(-logits[i]));
 44 | 
 45 |     // (1-p)**gamma * log(p) where
 46 |     T term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
 47 | 
 48 |     // p**gamma * log(1-p)
 49 |     T term2 = powf(p, gamma) *
 50 |             (-1. * logits[i] * (logits[i] >= 0) -   
 51 |              logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
 52 | 
 53 |     losses[i] = 0.0;
 54 |     losses[i] += -c1 * term1 * zp;
 55 |     losses[i] += -c2 * term2 * zn;
 56 | 
 57 |   } // CUDA_1D_KERNEL_LOOP
 58 | } // SigmoidFocalLossForward
 59 | 
 60 | 
 61 | template <typename T>
 62 | __global__ void SigmoidFocalLossBackward(const int nthreads,
 63 |                 const T* logits,
 64 |                 const int* targets,
 65 |                 const T* d_losses,
 66 |                 const int num_classes,
 67 |                 const float gamma,
 68 |                 const float alpha,
 69 |                 const int num,
 70 |                 T* d_logits) {
 71 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 72 | 
 73 |     int n = i / num_classes;
 74 |     int d = i % num_classes; // current class[0~79]; 
 75 |     int t = targets[n]; // target class [1~80], 0 is background;
 76 | 
 77 |     // Decide it is positive or negative case. 
 78 |     T c1 = (t == (d+1));
 79 |     T c2 = (t>=0 & t != (d+1));
 80 | 
 81 |     T zn = (1.0 - alpha);
 82 |     T zp = (alpha);
 83 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 84 |     T  p = 1. / (1. + expf(-logits[i]));
 85 | 
 86 |     // (1-p)**g * (1 - p - g*p*log(p)
 87 |     T term1 = powf((1. - p), gamma) *
 88 |                       (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
 89 | 
 90 |     // (p**g) * (g*(1-p)*log(1-p) - p)
 91 |     T term2 = powf(p, gamma) *
 92 |                   ((-1. * logits[i] * (logits[i] >= 0) -
 93 |                       logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
 94 |                       (1. - p) * gamma - p);
 95 |     d_logits[i] = 0.0;
 96 |     d_logits[i] += -c1 * term1 * zp;
 97 |     d_logits[i] += -c2 * term2 * zn;
 98 |     d_logits[i] = d_logits[i] * d_losses[i];
 99 | 
100 |   } // CUDA_1D_KERNEL_LOOP
101 | } // SigmoidFocalLossBackward
102 | 
103 | 
104 | at::Tensor SigmoidFocalLoss_forward_cuda(
105 | 		const at::Tensor& logits,
106 |                 const at::Tensor& targets,
107 | 		const int num_classes, 
108 | 		const float gamma, 
109 | 		const float alpha) {
110 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
111 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
112 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
113 | 
114 |   const int num_samples = logits.size(0);
115 | 	
116 |   auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
117 |   auto losses_size = num_samples * logits.size(1);
118 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
119 | 
120 |   dim3 grid(std::min(THCCeilDiv((long)losses_size, 512L), 4096L));
121 |   dim3 block(512);
122 | 
123 |   if (losses.numel() == 0) {
124 |     THCudaCheck(cudaGetLastError());
125 |     return losses;
126 |   }
127 | 
128 |   AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] {
129 |     SigmoidFocalLossForward<scalar_t><<<grid, block, 0, stream>>>(
130 |          losses_size,
131 |          logits.contiguous().data<scalar_t>(),
132 | 	 targets.contiguous().data<int>(),
133 |          num_classes,
134 | 	 gamma,
135 | 	 alpha,
136 | 	 num_samples,
137 |          losses.data<scalar_t>());
138 |   });
139 |   THCudaCheck(cudaGetLastError());
140 |   return losses;   
141 | }	
142 | 
143 | 
144 | at::Tensor SigmoidFocalLoss_backward_cuda(
145 | 		const at::Tensor& logits,
146 |                 const at::Tensor& targets,
147 | 		const at::Tensor& d_losses,
148 | 		const int num_classes, 
149 | 		const float gamma, 
150 | 		const float alpha) {
151 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
152 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
153 |   AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor");
154 | 
155 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
156 | 
157 |   const int num_samples = logits.size(0);
158 |   AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes");
159 | 	
160 |   auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
161 |   auto d_logits_size = num_samples * logits.size(1);
162 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
163 | 
164 |   dim3 grid(std::min(THCCeilDiv((long)d_logits_size, 512L), 4096L));
165 |   dim3 block(512);
166 | 
167 |   if (d_logits.numel() == 0) {
168 |     THCudaCheck(cudaGetLastError());
169 |     return d_logits;
170 |   }
171 | 
172 |   AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] {
173 |     SigmoidFocalLossBackward<scalar_t><<<grid, block, 0, stream>>>(
174 |          d_logits_size,
175 |          logits.contiguous().data<scalar_t>(),
176 | 	 targets.contiguous().data<int>(),
177 | 	 d_losses.contiguous().data<scalar_t>(),
178 |          num_classes,
179 | 	 gamma,
180 | 	 alpha,
181 | 	 num_samples,
182 |          d_logits.data<scalar_t>());
183 |   });
184 | 
185 |   THCudaCheck(cudaGetLastError());
186 |   return d_logits;   
187 | }	
188 | 
189 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <ATen/ATen.h>
 9 | #include <ATen/cuda/CUDAContext.h>
10 | 
11 | #include <THC/THC.h>
12 | #include <THC/THCDeviceUtils.cuh>
13 | 
14 | #include <vector>
15 | #include <iostream>
16 | #include <cmath>
17 | 
18 | 
19 | void DeformablePSROIPoolForward(
20 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
21 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
22 |     const int height, const int width, const int num_bbox,
23 |     const int channels_trans, const int no_trans, const float spatial_scale,
24 |     const int output_dim, const int group_size, const int pooled_size,
25 |     const int part_size, const int sample_per_part, const float trans_std);
26 | 
27 | void DeformablePSROIPoolBackwardAcc(
28 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
29 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
30 |     at::Tensor trans_grad, const int batch, const int channels,
31 |     const int height, const int width, const int num_bbox,
32 |     const int channels_trans, const int no_trans, const float spatial_scale,
33 |     const int output_dim, const int group_size, const int pooled_size,
34 |     const int part_size, const int sample_per_part, const float trans_std);
35 | 
36 | void deform_psroi_pooling_cuda_forward(
37 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
38 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
39 |     const int output_dim, const int group_size, const int pooled_size,
40 |     const int part_size, const int sample_per_part, const float trans_std) 
41 | {
42 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
43 | 
44 |   const int batch = input.size(0);
45 |   const int channels = input.size(1);
46 |   const int height = input.size(2);
47 |   const int width = input.size(3);
48 |   const int channels_trans = no_trans ? 2 : trans.size(1);
49 | 
50 |   const int num_bbox = bbox.size(0);
51 |   if (num_bbox != out.size(0))
52 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
53 |              out.size(0), num_bbox);
54 | 
55 |   DeformablePSROIPoolForward(
56 |       input, bbox, trans, out, top_count, batch, channels, height, width,
57 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
58 |       pooled_size, part_size, sample_per_part, trans_std);
59 | }
60 | 
61 | void deform_psroi_pooling_cuda_backward(
62 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
63 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
64 |     const int no_trans, const float spatial_scale, const int output_dim,
65 |     const int group_size, const int pooled_size, const int part_size,
66 |     const int sample_per_part, const float trans_std) 
67 | {
68 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
69 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
70 | 
71 |   const int batch = input.size(0);
72 |   const int channels = input.size(1);
73 |   const int height = input.size(2);
74 |   const int width = input.size(3);
75 |   const int channels_trans = no_trans ? 2 : trans.size(1);
76 | 
77 |   const int num_bbox = bbox.size(0);
78 |   if (num_bbox != out_grad.size(0))
79 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
80 |              out_grad.size(0), num_bbox);
81 | 
82 |   DeformablePSROIPoolBackwardAcc(
83 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
84 |       channels, height, width, num_bbox, channels_trans, no_trans,
85 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
86 |       sample_per_part, trans_std);
87 | }
88 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/cuda/ml_nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   if (a[5] != b[5]) {
 15 |     return 0.0;
 16 |   }
 17 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 18 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 19 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 20 |   float interS = width * height;
 21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 23 |   return interS / (Sa + Sb - interS);
 24 | }
 25 | 
 26 | __global__ void ml_nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 28 |   const int row_start = blockIdx.y;
 29 |   const int col_start = blockIdx.x;
 30 | 
 31 |   // if (row_start > col_start) return;
 32 | 
 33 |   const int row_size =
 34 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 35 |   const int col_size =
 36 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 37 | 
 38 |   __shared__ float block_boxes[threadsPerBlock * 6];
 39 |   if (threadIdx.x < col_size) {
 40 |     block_boxes[threadIdx.x * 6 + 0] =
 41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 0];
 42 |     block_boxes[threadIdx.x * 6 + 1] =
 43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 1];
 44 |     block_boxes[threadIdx.x * 6 + 2] =
 45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 2];
 46 |     block_boxes[threadIdx.x * 6 + 3] =
 47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 3];
 48 |     block_boxes[threadIdx.x * 6 + 4] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 4];
 50 |     block_boxes[threadIdx.x * 6 + 5] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 6 + 5];
 52 |   }
 53 |   __syncthreads();
 54 | 
 55 |   if (threadIdx.x < row_size) {
 56 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 57 |     const float *cur_box = dev_boxes + cur_box_idx * 6;
 58 |     int i = 0;
 59 |     unsigned long long t = 0;
 60 |     int start = 0;
 61 |     if (row_start == col_start) {
 62 |       start = threadIdx.x + 1;
 63 |     }
 64 |     for (i = start; i < col_size; i++) {
 65 |       if (devIoU(cur_box, block_boxes + i * 6) > nms_overlap_thresh) {
 66 |         t |= 1ULL << i;
 67 |       }
 68 |     }
 69 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 70 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 71 |   }
 72 | }
 73 | 
 74 | // boxes is a N x 6 tensor
 75 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 76 |   using scalar_t = float;
 77 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 78 |   auto scores = boxes.select(1, 4);
 79 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 80 |   auto boxes_sorted = boxes.index_select(0, order_t);
 81 | 
 82 |   int boxes_num = boxes.size(0);
 83 | 
 84 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 85 | 
 86 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 87 | 
 88 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 89 | 
 90 |   unsigned long long* mask_dev = NULL;
 91 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 92 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 93 | 
 94 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 95 | 
 96 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 97 |               THCCeilDiv(boxes_num, threadsPerBlock));
 98 |   dim3 threads(threadsPerBlock);
 99 |   ml_nms_kernel<<<blocks, threads>>>(boxes_num,
100 |                                   nms_overlap_thresh,
101 |                                   boxes_dev,
102 |                                   mask_dev);
103 | 
104 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
105 |   THCudaCheck(cudaMemcpy(&mask_host[0],
106 |                         mask_dev,
107 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
108 |                         cudaMemcpyDeviceToHost));
109 | 
110 |   std::vector<unsigned long long> remv(col_blocks);
111 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
112 | 
113 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
114 |   int64_t* keep_out = keep.data<int64_t>();
115 | 
116 |   int num_to_keep = 0;
117 |   for (int i = 0; i < boxes_num; i++) {
118 |     int nblock = i / threadsPerBlock;
119 |     int inblock = i % threadsPerBlock;
120 | 
121 |     if (!(remv[nblock] & (1ULL << inblock))) {
122 |       keep_out[num_to_keep++] = i;
123 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
124 |       for (int j = nblock; j < col_blocks; j++) {
125 |         remv[j] |= p[j];
126 |       }
127 |     }
128 |   }
129 | 
130 |   THCudaFree(state, mask_dev);
131 |   // TODO improve this part
132 |   return std::get<0>(order_t.index({
133 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
134 |                          order_t.device(), keep.scalar_type())
135 |                      }).sort(0, false));
136 | }
137 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({
128 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
129 |                          order_t.device(), keep.scalar_type())
130 |                      }).sort(0, false));
131 | }
132 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #pragma once
  3 | #include <torch/extension.h>
  4 | 
  5 | 
  6 | at::Tensor SigmoidFocalLoss_forward_cuda(
  7 | 		const at::Tensor& logits,
  8 |                 const at::Tensor& targets,
  9 | 		const int num_classes, 
 10 | 		const float gamma, 
 11 | 		const float alpha); 
 12 | 
 13 | at::Tensor SigmoidFocalLoss_backward_cuda(
 14 | 			     const at::Tensor& logits,
 15 |                              const at::Tensor& targets,
 16 | 			     const at::Tensor& d_losses,
 17 | 			     const int num_classes,
 18 | 			     const float gamma,
 19 | 			     const float alpha);
 20 | 
 21 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 22 |                                  const at::Tensor& rois,
 23 |                                  const float spatial_scale,
 24 |                                  const int pooled_height,
 25 |                                  const int pooled_width,
 26 |                                  const int sampling_ratio);
 27 | 
 28 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
 29 |                                   const at::Tensor& rois,
 30 |                                   const float spatial_scale,
 31 |                                   const int pooled_height,
 32 |                                   const int pooled_width,
 33 |                                   const int batch_size,
 34 |                                   const int channels,
 35 |                                   const int height,
 36 |                                   const int width,
 37 |                                   const int sampling_ratio);
 38 | 
 39 | 
 40 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
 41 |                                 const at::Tensor& rois,
 42 |                                 const float spatial_scale,
 43 |                                 const int pooled_height,
 44 |                                 const int pooled_width);
 45 | 
 46 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
 47 |                                  const at::Tensor& input,
 48 |                                  const at::Tensor& rois,
 49 |                                  const at::Tensor& argmax,
 50 |                                  const float spatial_scale,
 51 |                                  const int pooled_height,
 52 |                                  const int pooled_width,
 53 |                                  const int batch_size,
 54 |                                  const int channels,
 55 |                                  const int height,
 56 |                                  const int width);
 57 | 
 58 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 59 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 60 | 
 61 | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
 62 |                              at::Tensor offset, at::Tensor output,
 63 |                              at::Tensor columns, at::Tensor ones, int kW,
 64 |                              int kH, int dW, int dH, int padW, int padH,
 65 |                              int dilationW, int dilationH, int group,
 66 |                              int deformable_group, int im2col_step);
 67 | 
 68 | int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
 69 |                                     at::Tensor gradOutput, at::Tensor gradInput,
 70 |                                     at::Tensor gradOffset, at::Tensor weight,
 71 |                                     at::Tensor columns, int kW, int kH, int dW,
 72 |                                     int dH, int padW, int padH, int dilationW,
 73 |                                     int dilationH, int group,
 74 |                                     int deformable_group, int im2col_step);
 75 | 
 76 | int deform_conv_backward_parameters_cuda(
 77 |     at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
 78 |     at::Tensor gradWeight,  // at::Tensor gradBias,
 79 |     at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
 80 |     int padW, int padH, int dilationW, int dilationH, int group,
 81 |     int deformable_group, float scale, int im2col_step);
 82 | 
 83 | void modulated_deform_conv_cuda_forward(
 84 |     at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
 85 |     at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
 86 |     int kernel_h, int kernel_w, const int stride_h, const int stride_w,
 87 |     const int pad_h, const int pad_w, const int dilation_h,
 88 |     const int dilation_w, const int group, const int deformable_group,
 89 |     const bool with_bias);
 90 | 
 91 | void modulated_deform_conv_cuda_backward(
 92 |     at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
 93 |     at::Tensor offset, at::Tensor mask, at::Tensor columns,
 94 |     at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
 95 |     at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
 96 |     int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
 97 |     int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
 98 |     const bool with_bias);
 99 | 
100 | void deform_psroi_pooling_cuda_forward(
101 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
102 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
103 |     const int output_dim, const int group_size, const int pooled_size,
104 |     const int part_size, const int sample_per_part, const float trans_std);
105 | 
106 | void deform_psroi_pooling_cuda_backward(
107 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
108 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
109 |     const int no_trans, const float spatial_scale, const int output_dim,
110 |     const int group_size, const int pooled_size, const int part_size,
111 |     const int sample_per_part, const float trans_std);
112 | 
113 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
114 |                              const int height,
115 |                              const int width);
116 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/deform_conv.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #pragma once
  3 | #include "cpu/vision.h"
  4 | 
  5 | #ifdef WITH_CUDA
  6 | #include "cuda/vision.h"
  7 | #endif
  8 | 
  9 | 
 10 | // Interface for Python
 11 | int deform_conv_forward(
 12 |     at::Tensor input, 
 13 |     at::Tensor weight,
 14 |     at::Tensor offset, 
 15 |     at::Tensor output,
 16 |     at::Tensor columns, 
 17 |     at::Tensor ones, 
 18 |     int kW,
 19 |     int kH, 
 20 |     int dW, 
 21 |     int dH, 
 22 |     int padW, 
 23 |     int padH,
 24 |     int dilationW, 
 25 |     int dilationH, 
 26 |     int group,
 27 |     int deformable_group, 
 28 |     int im2col_step)
 29 | {
 30 |   if (input.type().is_cuda()) {
 31 | #ifdef WITH_CUDA
 32 |     return deform_conv_forward_cuda(
 33 |         input, weight, offset, output, columns, ones,
 34 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
 35 |         group, deformable_group, im2col_step
 36 |     );
 37 | #else
 38 |     AT_ERROR("Not compiled with GPU support");
 39 | #endif
 40 |   }
 41 |   AT_ERROR("Not implemented on the CPU");
 42 | }
 43 | 
 44 | 
 45 | int deform_conv_backward_input(
 46 |     at::Tensor input, 
 47 |     at::Tensor offset,
 48 |     at::Tensor gradOutput, 
 49 |     at::Tensor gradInput,
 50 |     at::Tensor gradOffset, 
 51 |     at::Tensor weight,
 52 |     at::Tensor columns, 
 53 |     int kW, 
 54 |     int kH, 
 55 |     int dW,
 56 |     int dH, 
 57 |     int padW, 
 58 |     int padH, 
 59 |     int dilationW,
 60 |     int dilationH, 
 61 |     int group,
 62 |     int deformable_group, 
 63 |     int im2col_step)
 64 | {
 65 |   if (input.type().is_cuda()) {
 66 | #ifdef WITH_CUDA
 67 |     return deform_conv_backward_input_cuda(
 68 |         input, offset, gradOutput, gradInput, gradOffset, weight, columns,
 69 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH, 
 70 |         group, deformable_group, im2col_step
 71 |     );
 72 | #else
 73 |     AT_ERROR("Not compiled with GPU support");
 74 | #endif
 75 |   }
 76 |   AT_ERROR("Not implemented on the CPU");
 77 | }
 78 | 
 79 | 
 80 | int deform_conv_backward_parameters(
 81 |     at::Tensor input, 
 82 |     at::Tensor offset, 
 83 |     at::Tensor gradOutput,
 84 |     at::Tensor gradWeight,  // at::Tensor gradBias,
 85 |     at::Tensor columns, 
 86 |     at::Tensor ones, 
 87 |     int kW, 
 88 |     int kH, 
 89 |     int dW, 
 90 |     int dH,
 91 |     int padW, 
 92 |     int padH, 
 93 |     int dilationW, 
 94 |     int dilationH, 
 95 |     int group,
 96 |     int deformable_group, 
 97 |     float scale, 
 98 |     int im2col_step)
 99 | {
100 |   if (input.type().is_cuda()) {
101 | #ifdef WITH_CUDA
102 |     return deform_conv_backward_parameters_cuda(
103 |         input, offset, gradOutput, gradWeight, columns, ones,
104 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
105 |         group, deformable_group, scale, im2col_step
106 |     );
107 | #else
108 |     AT_ERROR("Not compiled with GPU support");
109 | #endif
110 |   }
111 |   AT_ERROR("Not implemented on the CPU");
112 | }
113 | 
114 | 
115 | void modulated_deform_conv_forward(
116 |     at::Tensor input, 
117 |     at::Tensor weight, 
118 |     at::Tensor bias, 
119 |     at::Tensor ones,
120 |     at::Tensor offset, 
121 |     at::Tensor mask, 
122 |     at::Tensor output, 
123 |     at::Tensor columns,
124 |     int kernel_h, 
125 |     int kernel_w, 
126 |     const int stride_h, 
127 |     const int stride_w,
128 |     const int pad_h, 
129 |     const int pad_w, 
130 |     const int dilation_h,
131 |     const int dilation_w, 
132 |     const int group, 
133 |     const int deformable_group,
134 |     const bool with_bias)
135 | {
136 |   if (input.type().is_cuda()) {
137 | #ifdef WITH_CUDA
138 |     return modulated_deform_conv_cuda_forward(
139 |         input, weight, bias, ones, offset, mask, output, columns,
140 |         kernel_h, kernel_w, stride_h, stride_w, 
141 |         pad_h, pad_w, dilation_h, dilation_w,
142 |         group, deformable_group, with_bias
143 |     );
144 | #else
145 |     AT_ERROR("Not compiled with GPU support");
146 | #endif
147 |   }
148 |   AT_ERROR("Not implemented on the CPU");
149 | }
150 | 
151 | 
152 | void modulated_deform_conv_backward(
153 |     at::Tensor input, 
154 |     at::Tensor weight, 
155 |     at::Tensor bias, 
156 |     at::Tensor ones,
157 |     at::Tensor offset, 
158 |     at::Tensor mask, 
159 |     at::Tensor columns,
160 |     at::Tensor grad_input, 
161 |     at::Tensor grad_weight, 
162 |     at::Tensor grad_bias,
163 |     at::Tensor grad_offset, 
164 |     at::Tensor grad_mask, 
165 |     at::Tensor grad_output,
166 |     int kernel_h, 
167 |     int kernel_w, 
168 |     int stride_h, 
169 |     int stride_w, 
170 |     int pad_h,
171 |     int pad_w, 
172 |     int dilation_h, 
173 |     int dilation_w, 
174 |     int group, 
175 |     int deformable_group,
176 |     const bool with_bias)
177 | {
178 |   if (input.type().is_cuda()) {
179 | #ifdef WITH_CUDA
180 |     return modulated_deform_conv_cuda_backward(
181 |         input, weight, bias, ones, offset, mask, columns, 
182 |         grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output,
183 |         kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
184 |         group, deformable_group, with_bias
185 |     );
186 | #else
187 |     AT_ERROR("Not compiled with GPU support");
188 | #endif
189 |   }
190 |   AT_ERROR("Not implemented on the CPU");
191 | }


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 |     at::Tensor input, 
13 |     at::Tensor bbox, 
14 |     at::Tensor trans, 
15 |     at::Tensor out,
16 |     at::Tensor top_count, 
17 |     const int no_trans, 
18 |     const float spatial_scale,
19 |     const int output_dim, 
20 |     const int group_size, 
21 |     const int pooled_size,
22 |     const int part_size, 
23 |     const int sample_per_part, 
24 |     const float trans_std)
25 | {
26 |   if (input.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return deform_psroi_pooling_cuda_forward(
29 |         input, bbox, trans, out, top_count, 
30 |         no_trans, spatial_scale, output_dim, group_size,
31 |         pooled_size, part_size, sample_per_part, trans_std
32 |     );
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | 
41 | void deform_psroi_pooling_backward(
42 |     at::Tensor out_grad, 
43 |     at::Tensor input, 
44 |     at::Tensor bbox, 
45 |     at::Tensor trans,
46 |     at::Tensor top_count, 
47 |     at::Tensor input_grad, 
48 |     at::Tensor trans_grad,
49 |     const int no_trans, 
50 |     const float spatial_scale, 
51 |     const int output_dim,
52 |     const int group_size, 
53 |     const int pooled_size, 
54 |     const int part_size,
55 |     const int sample_per_part, 
56 |     const float trans_std) 
57 | {
58 |   if (input.type().is_cuda()) {
59 | #ifdef WITH_CUDA
60 |     return deform_psroi_pooling_cuda_backward(
61 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
63 |         part_size, sample_per_part, trans_std
64 |     );
65 | #else
66 |     AT_ERROR("Not compiled with GPU support");
67 | #endif
68 |   }
69 |   AT_ERROR("Not implemented on the CPU");
70 | }
71 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/ml_nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor ml_nms(const at::Tensor& dets,
11 |                   const at::Tensor& scores,
12 |                   const at::Tensor& labels,
13 |                   const float threshold) {
14 | 
15 |   if (dets.type().is_cuda()) {
16 | #ifdef WITH_CUDA
17 |     // TODO raise error if not compiled with CUDA
18 |     if (dets.numel() == 0)
19 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
20 |     auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
21 |     return ml_nms_cuda(b, threshold);
22 | #else
23 |     AT_ERROR("Not compiled with GPU support");
24 | #endif
25 |   }
26 |   AT_ERROR("CPU version not implemented");
27 | }
28 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/siamreppoints/models/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ml_nms.h"
 4 | #include "ROIAlign.h"
 5 | #include "ROIPool.h"
 6 | #include "SigmoidFocalLoss.h"
 7 | #include "deform_conv.h"
 8 | #include "deform_pool.h"
 9 | 
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11 |   m.def("nms", &nms, "non-maximum suppression");
12 |   m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression");
13 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
14 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
15 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
16 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
17 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
18 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
19 |   // dcn-v2
20 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
21 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
22 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
23 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
24 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
25 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
26 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
27 | }
28 | 


--------------------------------------------------------------------------------
/siamreppoints/models/head/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | from siamreppoints.models.head.rpn import UPChannelRPN, DepthwiseRPN, MultiRPN
 9 | 
10 | RPNS = {
11 |         'UPChannelRPN': UPChannelRPN,
12 |         'DepthwiseRPN': DepthwiseRPN,
13 |         'MultiRPN': MultiRPN
14 |        }
15 | 
16 | def get_rpn_head(name, **kwargs):
17 |     return RPNS[name](**kwargs)
18 | 
19 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import DFConv2d
 7 | from .misc import ConvTranspose2d
 8 | from .misc import BatchNorm2d
 9 | from .misc import interpolate
10 | from .nms import nms, ml_nms
11 | from .roi_align import ROIAlign
12 | from .roi_align import roi_align
13 | from .roi_pool import ROIPool
14 | from .roi_pool import roi_pool
15 | from .smooth_l1_loss import smooth_l1_loss
16 | from .sigmoid_focal_loss import SigmoidFocalLoss
17 | from .iou_loss import IOULoss
18 | from .scale import Scale
19 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
20 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, \
21 |     ModulatedDeformConvPack
22 | from .dcn.deform_pool_func import deform_roi_pooling
23 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, \
24 |     ModulatedDeformRoIPoolingPack
25 | 
26 | 
27 | __all__ = [
28 |     "nms",
29 |     "ml_nms",
30 |     "roi_align",
31 |     "ROIAlign",
32 |     "roi_pool",
33 |     "ROIPool",
34 |     "smooth_l1_loss",
35 |     "Conv2d",
36 |     "DFConv2d",
37 |     "ConvTranspose2d",
38 |     "interpolate",
39 |     "BatchNorm2d",
40 |     "FrozenBatchNorm2d",
41 |     "SigmoidFocalLoss",
42 |     'deform_conv',
43 |     'modulated_deform_conv',
44 |     'DeformConv',
45 |     'ModulatedDeformConv',
46 |     'ModulatedDeformConvPack',
47 |     'deform_roi_pooling',
48 |     'DeformRoIPooling',
49 |     'DeformRoIPoolingPack',
50 |     'ModulatedDeformRoIPoolingPack',
51 |     "IOULoss",
52 |     "Scale"
53 | ]
54 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         scale = self.weight * self.running_var.rsqrt()
21 |         bias = self.bias - self.running_mean * scale
22 |         scale = scale.reshape(1, -1, 1, 1)
23 |         bias = bias.reshape(1, -1, 1, 1)
24 |         return x * scale + bias
25 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | # 
2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
3 | # 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/dcn/deform_conv_module.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from .deform_conv_func import deform_conv, modulated_deform_conv
  8 | 
  9 | 
 10 | class DeformConv(nn.Module):
 11 |     def __init__(
 12 |         self,
 13 |         in_channels,
 14 |         out_channels,
 15 |         kernel_size,
 16 |         stride=1,
 17 |         padding=0,
 18 |         dilation=1,
 19 |         groups=1,
 20 |         deformable_groups=1,
 21 |         bias=False
 22 |     ):
 23 |         super(DeformConv, self).__init__()
 24 |         self.with_bias = bias
 25 | 
 26 |         assert in_channels % groups == 0, \
 27 |             'in_channels {} cannot be divisible by groups {}'.format(
 28 |                 in_channels, groups)
 29 |         assert out_channels % groups == 0, \
 30 |             'out_channels {} cannot be divisible by groups {}'.format(
 31 |                 out_channels, groups)
 32 |         self.in_channels = in_channels
 33 |         self.out_channels = out_channels
 34 |         self.kernel_size = _pair(kernel_size)
 35 |         self.stride = _pair(stride)
 36 |         self.padding = _pair(padding)
 37 |         self.dilation = _pair(dilation)
 38 |         self.groups = groups
 39 |         self.deformable_groups = deformable_groups
 40 | 
 41 |         self.weight = nn.Parameter(
 42 |             torch.Tensor(out_channels, in_channels // self.groups,
 43 |                          *self.kernel_size))
 44 |         if self.with_bias:
 45 |             self.bias = nn.Parameter(torch.Tensor(out_channels))
 46 | 
 47 |         self.reset_parameters()
 48 | 
 49 |     def reset_parameters(self):
 50 |         n = self.in_channels
 51 |         for k in self.kernel_size:
 52 |             n *= k
 53 |         stdv = 1. / math.sqrt(n)
 54 |         self.weight.data.uniform_(-stdv, stdv)
 55 |         if self.with_bias:
 56 |             torch.nn.init.constant_(self.bias, 0.)
 57 | 
 58 |     def forward(self, input, offset):
 59 |         y = deform_conv(input, offset, self.weight, self.stride,
 60 |                            self.padding, self.dilation, self.groups,
 61 |                            self.deformable_groups)
 62 |         if self.with_bias:
 63 |             assert len(y.size()) == 4
 64 |             y = y + self.bias.reshape(1, -1, 1, 1)
 65 |         return y
 66 | 
 67 |     def __repr__(self):
 68 |         return "".join([
 69 |             "{}(".format(self.__class__.__name__),
 70 |             "in_channels={}, ".format(self.in_channels),
 71 |             "out_channels={}, ".format(self.out_channels),
 72 |             "kernel_size={}, ".format(self.kernel_size),
 73 |             "stride={}, ".format(self.stride),
 74 |             "dilation={}, ".format(self.dilation),
 75 |             "padding={}, ".format(self.padding),
 76 |             "groups={}, ".format(self.groups),
 77 |             "deformable_groups={}, ".format(self.deformable_groups),
 78 |             "bias={})".format(self.with_bias),
 79 |         ])
 80 | 
 81 | 
 82 | class ModulatedDeformConv(nn.Module):
 83 |     def __init__(
 84 |         self,
 85 |         in_channels,
 86 |         out_channels,
 87 |         kernel_size,
 88 |         stride=1,
 89 |         padding=0,
 90 |         dilation=1,
 91 |         groups=1,
 92 |         deformable_groups=1,
 93 |         bias=True
 94 |     ):
 95 |         super(ModulatedDeformConv, self).__init__()
 96 |         self.in_channels = in_channels
 97 |         self.out_channels = out_channels
 98 |         self.kernel_size = _pair(kernel_size)
 99 |         self.stride = stride
100 |         self.padding = padding
101 |         self.dilation = dilation
102 |         self.groups = groups
103 |         self.deformable_groups = deformable_groups
104 |         self.with_bias = bias
105 | 
106 |         self.weight = nn.Parameter(torch.Tensor(
107 |             out_channels, 
108 |             in_channels // groups,
109 |             *self.kernel_size
110 |         ))
111 |         if bias:
112 |             self.bias = nn.Parameter(torch.Tensor(out_channels))
113 |         else:
114 |             self.register_parameter('bias', None)
115 |         self.reset_parameters()
116 | 
117 |     def reset_parameters(self):
118 |         n = self.in_channels
119 |         for k in self.kernel_size:
120 |             n *= k
121 |         stdv = 1. / math.sqrt(n)
122 |         self.weight.data.uniform_(-stdv, stdv)
123 |         if self.bias is not None:
124 |             self.bias.data.zero_()
125 | 
126 |     def forward(self, input, offset, mask):
127 |         return modulated_deform_conv(
128 |             input, offset, mask, self.weight, self.bias, self.stride,
129 |             self.padding, self.dilation, self.groups, self.deformable_groups)
130 | 
131 |     def __repr__(self):
132 |         return "".join([
133 |             "{}(".format(self.__class__.__name__),
134 |             "in_channels={}, ".format(self.in_channels),
135 |             "out_channels={}, ".format(self.out_channels),
136 |             "kernel_size={}, ".format(self.kernel_size),
137 |             "stride={}, ".format(self.stride),
138 |             "dilation={}, ".format(self.dilation),
139 |             "padding={}, ".format(self.padding),
140 |             "groups={}, ".format(self.groups),
141 |             "deformable_groups={}, ".format(self.deformable_groups),
142 |             "bias={})".format(self.with_bias),
143 |         ])
144 | 
145 | 
146 | class ModulatedDeformConvPack(ModulatedDeformConv):
147 |     def __init__(self,
148 |                  in_channels,
149 |                  out_channels,
150 |                  kernel_size,
151 |                  stride=1,
152 |                  padding=0,
153 |                  dilation=1,
154 |                  groups=1,
155 |                  deformable_groups=1,
156 |                  bias=True):
157 |         super(ModulatedDeformConvPack, self).__init__(
158 |             in_channels, out_channels, kernel_size, stride, padding, dilation,
159 |             groups, deformable_groups, bias)
160 | 
161 |         self.conv_offset_mask = nn.Conv2d(
162 |             self.in_channels // self.groups,
163 |             self.deformable_groups * 3 * self.kernel_size[0] *
164 |             self.kernel_size[1],
165 |             kernel_size=self.kernel_size,
166 |             stride=_pair(self.stride),
167 |             padding=_pair(self.padding),
168 |             bias=True)
169 |         self.init_offset()
170 | 
171 |     def init_offset(self):
172 |         self.conv_offset_mask.weight.data.zero_()
173 |         self.conv_offset_mask.bias.data.zero_()
174 | 
175 |     def forward(self, input):
176 |         out = self.conv_offset_mask(input)
177 |         o1, o2, mask = torch.chunk(out, 3, dim=1)
178 |         offset = torch.cat((o1, o2), dim=1)
179 |         mask = torch.sigmoid(mask)
180 |         return modulated_deform_conv(
181 |             input, offset, mask, self.weight, self.bias, self.stride,
182 |             self.padding, self.dilation, self.groups, self.deformable_groups)
183 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from siamreppoints.models import _C
 6 | 
 7 | 
 8 | class DeformRoIPoolingFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(
12 |         ctx,
13 |         data,
14 |         rois,
15 |         offset,
16 |         spatial_scale,
17 |         out_size,
18 |         out_channels,
19 |         no_trans,
20 |         group_size=1,
21 |         part_size=None,
22 |         sample_per_part=4,
23 |         trans_std=.0
24 |     ):
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.out_size = out_size
27 |         ctx.out_channels = out_channels
28 |         ctx.no_trans = no_trans
29 |         ctx.group_size = group_size
30 |         ctx.part_size = out_size if part_size is None else part_size
31 |         ctx.sample_per_part = sample_per_part
32 |         ctx.trans_std = trans_std
33 | 
34 |         assert 0.0 <= ctx.trans_std <= 1.0
35 |         if not data.is_cuda:
36 |             raise NotImplementedError
37 | 
38 |         n = rois.shape[0]
39 |         output = data.new_empty(n, out_channels, out_size, out_size)
40 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
41 |         _C.deform_psroi_pooling_forward(
42 |             data, 
43 |             rois, 
44 |             offset, 
45 |             output, 
46 |             output_count, 
47 |             ctx.no_trans,
48 |             ctx.spatial_scale, 
49 |             ctx.out_channels, 
50 |             ctx.group_size, 
51 |             ctx.out_size,
52 |             ctx.part_size, 
53 |             ctx.sample_per_part, 
54 |             ctx.trans_std
55 |         )
56 | 
57 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
58 |             ctx.save_for_backward(data, rois, offset)
59 |         ctx.output_count = output_count
60 | 
61 |         return output
62 | 
63 |     @staticmethod
64 |     @once_differentiable
65 |     def backward(ctx, grad_output):
66 |         if not grad_output.is_cuda:
67 |             raise NotImplementedError
68 | 
69 |         data, rois, offset = ctx.saved_tensors
70 |         output_count = ctx.output_count
71 |         grad_input = torch.zeros_like(data)
72 |         grad_rois = None
73 |         grad_offset = torch.zeros_like(offset)
74 | 
75 |         _C.deform_psroi_pooling_backward(
76 |             grad_output, 
77 |             data, 
78 |             rois, 
79 |             offset, 
80 |             output_count, 
81 |             grad_input,
82 |             grad_offset, 
83 |             ctx.no_trans, 
84 |             ctx.spatial_scale, 
85 |             ctx.out_channels,
86 |             ctx.group_size, 
87 |             ctx.out_size, 
88 |             ctx.part_size, 
89 |             ctx.sample_per_part,
90 |             ctx.trans_std
91 |         )
92 |         return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
93 | 
94 | 
95 | deform_roi_pooling = DeformRoIPoolingFunction.apply
96 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/dcn/deform_pool_module.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | from .deform_pool_func import deform_roi_pooling
  4 | 
  5 | 
  6 | class DeformRoIPooling(nn.Module):
  7 | 
  8 |     def __init__(self,
  9 |                  spatial_scale,
 10 |                  out_size,
 11 |                  out_channels,
 12 |                  no_trans,
 13 |                  group_size=1,
 14 |                  part_size=None,
 15 |                  sample_per_part=4,
 16 |                  trans_std=.0):
 17 |         super(DeformRoIPooling, self).__init__()
 18 |         self.spatial_scale = spatial_scale
 19 |         self.out_size = out_size
 20 |         self.out_channels = out_channels
 21 |         self.no_trans = no_trans
 22 |         self.group_size = group_size
 23 |         self.part_size = out_size if part_size is None else part_size
 24 |         self.sample_per_part = sample_per_part
 25 |         self.trans_std = trans_std
 26 | 
 27 |     def forward(self, data, rois, offset):
 28 |         if self.no_trans:
 29 |             offset = data.new_empty(0)
 30 |         return deform_roi_pooling(
 31 |             data, rois, offset, self.spatial_scale, self.out_size,
 32 |             self.out_channels, self.no_trans, self.group_size, self.part_size,
 33 |             self.sample_per_part, self.trans_std)
 34 | 
 35 | 
 36 | class DeformRoIPoolingPack(DeformRoIPooling):
 37 | 
 38 |     def __init__(self,
 39 |                  spatial_scale,
 40 |                  out_size,
 41 |                  out_channels,
 42 |                  no_trans,
 43 |                  group_size=1,
 44 |                  part_size=None,
 45 |                  sample_per_part=4,
 46 |                  trans_std=.0,
 47 |                  deform_fc_channels=1024):
 48 |         super(DeformRoIPoolingPack,
 49 |               self).__init__(spatial_scale, out_size, out_channels, no_trans,
 50 |                              group_size, part_size, sample_per_part, trans_std)
 51 | 
 52 |         self.deform_fc_channels = deform_fc_channels
 53 | 
 54 |         if not no_trans:
 55 |             self.offset_fc = nn.Sequential(
 56 |                 nn.Linear(self.out_size * self.out_size * self.out_channels,
 57 |                           self.deform_fc_channels),
 58 |                 nn.ReLU(inplace=True),
 59 |                 nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
 60 |                 nn.ReLU(inplace=True),
 61 |                 nn.Linear(self.deform_fc_channels,
 62 |                           self.out_size * self.out_size * 2))
 63 |             self.offset_fc[-1].weight.data.zero_()
 64 |             self.offset_fc[-1].bias.data.zero_()
 65 | 
 66 |     def forward(self, data, rois):
 67 |         assert data.size(1) == self.out_channels
 68 |         if self.no_trans:
 69 |             offset = data.new_empty(0)
 70 |             return deform_roi_pooling(
 71 |                 data, rois, offset, self.spatial_scale, self.out_size,
 72 |                 self.out_channels, self.no_trans, self.group_size,
 73 |                 self.part_size, self.sample_per_part, self.trans_std)
 74 |         else:
 75 |             n = rois.shape[0]
 76 |             offset = data.new_empty(0)
 77 |             x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
 78 |                                    self.out_size, self.out_channels, True,
 79 |                                    self.group_size, self.part_size,
 80 |                                    self.sample_per_part, self.trans_std)
 81 |             offset = self.offset_fc(x.view(n, -1))
 82 |             offset = offset.view(n, 2, self.out_size, self.out_size)
 83 |             return deform_roi_pooling(
 84 |                 data, rois, offset, self.spatial_scale, self.out_size,
 85 |                 self.out_channels, self.no_trans, self.group_size,
 86 |                 self.part_size, self.sample_per_part, self.trans_std)
 87 | 
 88 | 
 89 | class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
 90 | 
 91 |     def __init__(self,
 92 |                  spatial_scale,
 93 |                  out_size,
 94 |                  out_channels,
 95 |                  no_trans,
 96 |                  group_size=1,
 97 |                  part_size=None,
 98 |                  sample_per_part=4,
 99 |                  trans_std=.0,
100 |                  deform_fc_channels=1024):
101 |         super(ModulatedDeformRoIPoolingPack, self).__init__(
102 |             spatial_scale, out_size, out_channels, no_trans, group_size,
103 |             part_size, sample_per_part, trans_std)
104 | 
105 |         self.deform_fc_channels = deform_fc_channels
106 | 
107 |         if not no_trans:
108 |             self.offset_fc = nn.Sequential(
109 |                 nn.Linear(self.out_size * self.out_size * self.out_channels,
110 |                           self.deform_fc_channels),
111 |                 nn.ReLU(inplace=True),
112 |                 nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
113 |                 nn.ReLU(inplace=True),
114 |                 nn.Linear(self.deform_fc_channels,
115 |                           self.out_size * self.out_size * 2))
116 |             self.offset_fc[-1].weight.data.zero_()
117 |             self.offset_fc[-1].bias.data.zero_()
118 |             self.mask_fc = nn.Sequential(
119 |                 nn.Linear(self.out_size * self.out_size * self.out_channels,
120 |                           self.deform_fc_channels),
121 |                 nn.ReLU(inplace=True),
122 |                 nn.Linear(self.deform_fc_channels,
123 |                           self.out_size * self.out_size * 1),
124 |                 nn.Sigmoid())
125 |             self.mask_fc[2].weight.data.zero_()
126 |             self.mask_fc[2].bias.data.zero_()
127 | 
128 |     def forward(self, data, rois):
129 |         assert data.size(1) == self.out_channels
130 |         if self.no_trans:
131 |             offset = data.new_empty(0)
132 |             return deform_roi_pooling(
133 |                 data, rois, offset, self.spatial_scale, self.out_size,
134 |                 self.out_channels, self.no_trans, self.group_size,
135 |                 self.part_size, self.sample_per_part, self.trans_std)
136 |         else:
137 |             n = rois.shape[0]
138 |             offset = data.new_empty(0)
139 |             x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
140 |                                    self.out_size, self.out_channels, True,
141 |                                    self.group_size, self.part_size,
142 |                                    self.sample_per_part, self.trans_std)
143 |             offset = self.offset_fc(x.view(n, -1))
144 |             offset = offset.view(n, 2, self.out_size, self.out_size)
145 |             mask = self.mask_fc(x.view(n, -1))
146 |             mask = mask.view(n, 1, self.out_size, self.out_size)
147 |             return deform_roi_pooling(
148 |                 data, rois, offset, self.spatial_scale, self.out_size,
149 |                 self.out_channels, self.no_trans, self.group_size,
150 |                 self.part_size, self.sample_per_part, self.trans_std) * mask
151 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/iou_loss.py:
--------------------------------------------------------------------------------
 1 | # GIoU and Linear IoU are added by following
 2 | # https://github.com/yqyao/FCOS_PLUS/blob/master/maskrcnn_benchmark/layers/iou_loss.py.
 3 | import torch
 4 | from torch import nn
 5 | 
 6 | 
 7 | class IOULoss(nn.Module):
 8 |     def __init__(self, loss_type="iou"):
 9 |         super(IOULoss, self).__init__()
10 |         self.loss_type = loss_type
11 | 
12 |     def forward(self, pred, target, weight=None):
13 |         pred_left = pred[:, 0]
14 |         pred_top = pred[:, 1]
15 |         pred_right = pred[:, 2]
16 |         pred_bottom = pred[:, 3]
17 |         
18 |         pred_center_x = (pred_left - pred_right) / 2
19 |         pred_center_y = (pred_top - pred_bottom) / 2
20 |         
21 |         target_left = target[:, 0]
22 |         target_top = target[:, 1]
23 |         target_right = target[:, 2]
24 |         target_bottom = target[:, 3]
25 | 
26 |         target_center_x = (target_left - target_right) / 2
27 |         target_center_y = (target_top - target_bottom) / 2
28 |         
29 |         target_area = (target_left + target_right) * \
30 |                       (target_top + target_bottom)
31 |         pred_area = (pred_left + pred_right) * \
32 |                     (pred_top + pred_bottom)
33 | 
34 |         w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right)
35 |         g_w_intersect = torch.max(pred_left, target_left) + torch.max(
36 |             pred_right, target_right)
37 |         h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top)
38 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top)
39 |         ac_uion = g_w_intersect * g_h_intersect + 1e-7
40 |         area_intersect = w_intersect * h_intersect
41 |         area_union = target_area + pred_area - area_intersect
42 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
43 |         gious = ious - (ac_uion - area_union) / ac_uion
44 |         
45 |         distance_center = (pred_center_x - target_center_x) * (pred_center_x - target_center_x) + (pred_center_y - target_center_y) * (pred_center_y - target_center_y)
46 |         distance_intersect = g_w_intersect * g_w_intersect + g_h_intersect * g_h_intersect
47 |         dious = ious - (distance_center / distance_intersect)
48 |         
49 |         if self.loss_type == 'iou':
50 |             losses = -torch.log(ious)
51 |         elif self.loss_type == 'linear_iou':
52 |             losses = 1 - ious
53 |         elif self.loss_type == 'giou':
54 |             losses = 1 - gious
55 |         elif self.loss_type == 'diou':
56 |             losses = 1 - dious
57 |         else:
58 |             raise NotImplementedError
59 | 
60 |         if weight is not None and weight.sum() > 0:
61 |             return (losses * weight).sum()
62 |         else:
63 |             assert losses.numel() != 0
64 |             return losses.sum()
65 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/misc.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | """
  3 | helper class that supports empty tensors on some nn functions.
  4 | 
  5 | Ideally, add support directly in PyTorch to empty tensors in
  6 | those functions.
  7 | 
  8 | This can be removed once https://github.com/pytorch/pytorch/issues/12013
  9 | is implemented
 10 | """
 11 | 
 12 | import math
 13 | import torch
 14 | from torch.nn.modules.utils import _ntuple
 15 | 
 16 | 
 17 | class _NewEmptyTensorOp(torch.autograd.Function):
 18 |     @staticmethod
 19 |     def forward(ctx, x, new_shape):
 20 |         ctx.shape = x.shape
 21 |         return x.new_empty(new_shape)
 22 | 
 23 |     @staticmethod
 24 |     def backward(ctx, grad):
 25 |         shape = ctx.shape
 26 |         return _NewEmptyTensorOp.apply(grad, shape), None
 27 | 
 28 | 
 29 | class Conv2d(torch.nn.Conv2d):
 30 |     def forward(self, x):
 31 |         if x.numel() > 0:
 32 |             return super(Conv2d, self).forward(x)
 33 |         # get output shape
 34 | 
 35 |         output_shape = [
 36 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
 37 |             for i, p, di, k, d in zip(
 38 |                 x.shape[-2:], self.padding, self.dilation, self.kernel_size, self.stride
 39 |             )
 40 |         ]
 41 |         output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
 42 |         return _NewEmptyTensorOp.apply(x, output_shape)
 43 | 
 44 | 
 45 | class ConvTranspose2d(torch.nn.ConvTranspose2d):
 46 |     def forward(self, x):
 47 |         if x.numel() > 0:
 48 |             return super(ConvTranspose2d, self).forward(x)
 49 |         # get output shape
 50 | 
 51 |         output_shape = [
 52 |             (i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
 53 |             for i, p, di, k, d, op in zip(
 54 |                 x.shape[-2:],
 55 |                 self.padding,
 56 |                 self.dilation,
 57 |                 self.kernel_size,
 58 |                 self.stride,
 59 |                 self.output_padding,
 60 |             )
 61 |         ]
 62 |         output_shape = [x.shape[0], self.bias.shape[0]] + output_shape
 63 |         return _NewEmptyTensorOp.apply(x, output_shape)
 64 | 
 65 | 
 66 | class BatchNorm2d(torch.nn.BatchNorm2d):
 67 |     def forward(self, x):
 68 |         if x.numel() > 0:
 69 |             return super(BatchNorm2d, self).forward(x)
 70 |         # get output shape
 71 |         output_shape = x.shape
 72 |         return _NewEmptyTensorOp.apply(x, output_shape)
 73 | 
 74 | 
 75 | def interpolate(
 76 |     input, size=None, scale_factor=None, mode="nearest", align_corners=None
 77 | ):
 78 |     if input.numel() > 0:
 79 |         return torch.nn.functional.interpolate(
 80 |             input, size, scale_factor, mode, align_corners
 81 |         )
 82 | 
 83 |     def _check_size_scale_factor(dim):
 84 |         if size is None and scale_factor is None:
 85 |             raise ValueError("either size or scale_factor should be defined")
 86 |         if size is not None and scale_factor is not None:
 87 |             raise ValueError("only one of size or scale_factor should be defined")
 88 |         if (
 89 |             scale_factor is not None
 90 |             and isinstance(scale_factor, tuple)
 91 |             and len(scale_factor) != dim
 92 |         ):
 93 |             raise ValueError(
 94 |                 "scale_factor shape must match input shape. "
 95 |                 "Input is {}D, scale_factor size is {}".format(dim, len(scale_factor))
 96 |             )
 97 | 
 98 |     def _output_size(dim):
 99 |         _check_size_scale_factor(dim)
100 |         if size is not None:
101 |             return size
102 |         scale_factors = _ntuple(dim)(scale_factor)
103 |         # math.floor might return float in py2.7
104 |         return [
105 |             int(math.floor(input.size(i + 2) * scale_factors[i])) for i in range(dim)
106 |         ]
107 | 
108 |     output_shape = tuple(_output_size(2))
109 |     output_shape = input.shape[:-2] + output_shape
110 |     return _NewEmptyTensorOp.apply(input, output_shape)
111 | 
112 | 
113 | class DFConv2d(torch.nn.Module):
114 |     """Deformable convolutional layer"""
115 |     def __init__(
116 |         self,
117 |         in_channels,
118 |         out_channels,
119 |         with_modulated_dcn=True,
120 |         kernel_size=3,
121 |         stride=1,
122 |         groups=1,
123 |         padding=1,
124 |         dilation=1,
125 |         deformable_groups=1,
126 |         bias=False
127 |     ):
128 |         super(DFConv2d, self).__init__()
129 |         if isinstance(kernel_size, (list, tuple)):
130 |             assert len(kernel_size) == 2
131 |             offset_base_channels = kernel_size[0] * kernel_size[1]
132 |         else:
133 |             offset_base_channels = kernel_size * kernel_size
134 |         if with_modulated_dcn:
135 |             from siamreppoints.models.layers import ModulatedDeformConv
136 |             offset_channels = offset_base_channels * 3  # default: 27
137 |             conv_block = ModulatedDeformConv
138 |         else:
139 |             from siamreppoints.models.layers import DeformConv
140 |             offset_channels = offset_base_channels * 2  # default: 18
141 |             conv_block = DeformConv
142 |         self.offset = Conv2d(
143 |             in_channels,
144 |             deformable_groups * offset_channels,
145 |             kernel_size=kernel_size,
146 |             stride=stride,
147 |             padding=padding,
148 |             groups=1,
149 |             dilation=dilation
150 |         )
151 |         for l in [self.offset, ]:
152 |             torch.nn.init.kaiming_uniform_(l.weight, a=1)
153 |             torch.nn.init.constant_(l.bias, 0.)
154 |         self.conv = conv_block(
155 |             in_channels,
156 |             out_channels,
157 |             kernel_size=kernel_size,
158 |             stride=stride,
159 |             padding=padding,
160 |             dilation=dilation,
161 |             groups=groups,
162 |             deformable_groups=deformable_groups,
163 |             bias=bias
164 |         )
165 |         self.with_modulated_dcn = with_modulated_dcn
166 |         self.kernel_size = kernel_size
167 |         self.stride = stride
168 |         self.padding = padding
169 |         self.dilation = dilation
170 |         self.offset_base_channels = offset_base_channels
171 | 
172 |     def forward(self, x):
173 |         assert x.numel() > 0, "only non-empty tensors are supported"
174 |         if x.numel() > 0:
175 |             if not self.with_modulated_dcn:
176 |                 offset = self.offset(x)
177 |                 x = self.conv(x, offset)
178 |             else:
179 |                 offset_mask = self.offset(x)
180 |                 split_point = self.offset_base_channels * 2
181 |                 offset = offset_mask[:, :split_point, :, :]
182 |                 mask = offset_mask[:, split_point:, :, :].sigmoid()
183 |                 x = self.conv(x, offset, mask)
184 |             return x
185 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from siamreppoints.models import _C
4 | 
5 | nms = _C.nms
6 | ml_nms = _C.ml_nms
7 | # nms.__doc__ = """
8 | # This function performs Non-maximum suppresion"""
9 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from siamreppoints.models import _C
 9 | 
10 | import pdb
11 | 
12 | class _ROIAlign(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
15 |         ctx.save_for_backward(roi)
16 |         ctx.output_size = _pair(output_size)
17 |         ctx.spatial_scale = spatial_scale
18 |         ctx.sampling_ratio = sampling_ratio
19 |         ctx.input_shape = input.size()
20 |         output = _C.roi_align_forward(input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         rois, = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         sampling_ratio = ctx.sampling_ratio
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_align_backward(
32 |             grad_output,
33 |             rois,
34 |             spatial_scale,
35 |             output_size[0],
36 |             output_size[1],
37 |             bs,
38 |             ch,
39 |             h,
40 |             w,
41 |             sampling_ratio,
42 |         )
43 |         return grad_input, None, None, None, None
44 | 
45 | 
46 | roi_align = _ROIAlign.apply
47 | 
48 | 
49 | class ROIAlign(nn.Module):
50 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
51 |         super(ROIAlign, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 |         self.sampling_ratio = sampling_ratio
55 | 
56 |     def forward(self, input, rois):
57 |         return roi_align(
58 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
59 |         )
60 | 
61 |     def __repr__(self):
62 |         tmpstr = self.__class__.__name__ + "("
63 |         tmpstr += "output_size=" + str(self.output_size)
64 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
65 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
66 |         tmpstr += ")"
67 |         return tmpstr
68 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from siamreppoints.models import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     def __init__(self, init_value=1.0):
 7 |         super(Scale, self).__init__()
 8 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
 9 | 
10 |     def forward(self, input):
11 |         return input * self.scale
12 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from siamreppoints.models import _C
 7 | 
 8 | # TODO: Use JIT to replace CUDA implementation in the future.
 9 | class _SigmoidFocalLoss(Function):
10 |     @staticmethod
11 |     def forward(ctx, logits, targets, gamma, alpha):
12 |         ctx.save_for_backward(logits, targets)
13 |         num_classes = logits.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         losses = _C.sigmoid_focalloss_forward(
19 |             logits, targets, num_classes, gamma, alpha
20 |         )
21 |         return losses
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, d_loss):
26 |         logits, targets = ctx.saved_tensors
27 |         num_classes = ctx.num_classes
28 |         gamma = ctx.gamma
29 |         alpha = ctx.alpha
30 |         d_loss = d_loss.contiguous()
31 |         d_logits = _C.sigmoid_focalloss_backward(
32 |             logits, targets, d_loss, num_classes, gamma, alpha
33 |         )
34 |         return d_logits, None, None, None, None
35 | 
36 | 
37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
38 | 
39 | 
40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
41 |     num_classes = logits.shape[1]
42 |     gamma = gamma[0]
43 |     alpha = alpha[0]
44 |     dtype = targets.dtype
45 |     device = targets.device
46 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
47 | 
48 |     t = targets.unsqueeze(1)
49 |     p = torch.sigmoid(logits)
50 |     term1 = (1 - p) ** gamma * torch.log(p)
51 |     term2 = p ** gamma * torch.log(1 - p)
52 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
53 | 
54 | 
55 | class SigmoidFocalLoss(nn.Module):
56 |     def __init__(self, gamma, alpha):
57 |         super(SigmoidFocalLoss, self).__init__()
58 |         self.gamma = gamma
59 |         self.alpha = alpha
60 | 
61 |     def forward(self, logits, targets):
62 |         device = logits.device
63 |         if logits.is_cuda:
64 |             loss_func = sigmoid_focal_loss_cuda
65 |         else:
66 |             loss_func = sigmoid_focal_loss_cpu
67 | 
68 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
69 |         #return loss.sum()
70 |         return loss
71 | 
72 |     def __repr__(self):
73 |         tmpstr = self.__class__.__name__ + "("
74 |         tmpstr += "gamma=" + str(self.gamma)
75 |         tmpstr += ", alpha=" + str(self.alpha)
76 |         tmpstr += ")"
77 |         return tmpstr
78 | 


--------------------------------------------------------------------------------
/siamreppoints/models/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 


--------------------------------------------------------------------------------
/siamreppoints/models/model_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | 
11 | from siamreppoints.core.config import cfg
12 | from siamreppoints.models.backbone import get_backbone
13 | from siamreppoints.models.head import get_rpn_head
14 | from siamreppoints.models.neck import get_neck
15 | 
16 | class ModelBuilder(nn.Module):
17 |     def __init__(self):
18 |         super(ModelBuilder, self).__init__()
19 | 
20 |         # build backbone
21 |         self.backbone = get_backbone(cfg.BACKBONE.TYPE,
22 |                                      **cfg.BACKBONE.KWARGS)
23 | 
24 |         # build adjust layer
25 |         if cfg.ADJUST.ADJUST:
26 |             self.neck = get_neck(cfg.ADJUST.TYPE,
27 |                                  **cfg.ADJUST.KWARGS)
28 | 
29 |         # build rpn head
30 |         self.rpn_head = get_rpn_head(cfg.RPN.TYPE,
31 |                                      **cfg.RPN.KWARGS)
32 |     
33 |     def instance(self, x):
34 |         xf = self.backbone(x)
35 |         if cfg.ADJUST.ADJUST:
36 |             xf = self.neck(xf)
37 |         #self.cf = xf[cfg.ADJUST.LAYER-1]
38 |         self.cf = torch.cat([xf[2], xf[1]], dim=1)
39 |     
40 |     def template(self, z):
41 |         zf = self.backbone(z)
42 |         if cfg.ADJUST.ADJUST:
43 |             zf = self.neck(zf)
44 |         self.zf = zf
45 |     
46 |     def track(self, x, instance_size):
47 |         xf = self.backbone(x)
48 |         if cfg.ADJUST.ADJUST:
49 |             xf = self.neck(xf)
50 | 
51 |         cls, pts_preds_init, pts_preds_refine = self.rpn_head(self.zf, xf, instance_size)
52 |         
53 |         cls = cls.permute(0, 2, 3, 1)
54 |         cls = cls.reshape(cls.shape[0], -1, 1)
55 |         cls = torch.sigmoid(cls)
56 |         
57 |         #self.cf = xf[cfg.ADJUST.LAYER-1]
58 |         self.cf = torch.cat([xf[2], xf[1]], dim=1)
59 |         return {
60 |                 'score': cls,
61 |                 'bbox': pts_preds_refine,
62 |                }
63 |     
64 | 


--------------------------------------------------------------------------------
/siamreppoints/models/neck/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import torch
 9 | import torch.nn as nn
10 | import torch.nn.functional as F
11 | 
12 | from siamreppoints.models.neck.neck import AdjustLayer, AdjustAllLayer
13 | 
14 | NECKS = {
15 |          'AdjustLayer': AdjustLayer,
16 |          'AdjustAllLayer': AdjustAllLayer
17 |         }
18 | 
19 | def get_neck(name, **kwargs):
20 |     return NECKS[name](**kwargs)
21 | 


--------------------------------------------------------------------------------
/siamreppoints/models/neck/neck.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import torch.nn as nn
 9 | import torch
10 | import torch.nn.functional as F
11 | 
12 | ##from siamreppoints.models.external.PreciseRoIPooling.pytorch.prroi_pool import PrRoIPool2D
13 | 
14 | class AdjustLayer(nn.Module):
15 |     def __init__(self, in_channels, out_channels, center_size=7):
16 |         super(AdjustLayer, self).__init__()
17 |         self.downsample = nn.Sequential(
18 |             nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
19 |             nn.BatchNorm2d(out_channels),
20 |             )
21 |         self.center_size = center_size
22 | 
23 |     def forward(self, x):
24 |         x = self.downsample(x)
25 |         if x.size(3) < 20:
26 |             l = (x.size(3) - self.center_size) // 2
27 |             r = l + self.center_size
28 |             x = x[:, :, l:r, l:r]
29 |         return x
30 | 
31 | 
32 | class AdjustAllLayer(nn.Module):
33 |     def __init__(self, in_channels, out_channels, center_size=7):
34 |         super(AdjustAllLayer, self).__init__()
35 |         self.num = len(out_channels)
36 |         if self.num == 1:
37 |             self.downsample = AdjustLayer(in_channels[0],
38 |                                           out_channels[0],
39 |                                           center_size)
40 |         else:
41 |             for i in range(self.num):
42 |                 self.add_module('downsample'+str(i+2),
43 |                                 AdjustLayer(in_channels[i],
44 |                                             out_channels[i],
45 |                                             center_size))
46 | 
47 |     def forward(self, features):
48 |         if self.num == 1:
49 |             return self.downsample(features)
50 |         else:
51 |             out = []
52 |             for i in range(self.num):
53 |                 adj_layer = getattr(self, 'downsample'+str(i+2))
54 |                 out.append(adj_layer(features[i]))
55 |             
56 |             return out
57 | 


--------------------------------------------------------------------------------
/siamreppoints/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | 
15 | requirements = [
16 |     "torchvision",
17 |     "ninja",
18 |     "yacs",
19 |     "cython",
20 |     "matplotlib",
21 |     "tqdm",
22 |     "opencv-python",
23 |     "scikit-image"
24 | ]
25 | 
26 | 
27 | def get_extensions():
28 |     extensions_dir = os.path.join("models", "csrc")
29 | 
30 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
31 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
32 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
33 |     sources = main_file + source_cpu
34 | 
35 |     extension = CppExtension
36 | 
37 |     extra_compile_args = {"cxx": []}
38 |     define_macros = []
39 | 
40 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
41 |         extension = CUDAExtension
42 |         sources += source_cuda
43 |         define_macros += [("WITH_CUDA", None)]
44 |         extra_compile_args["nvcc"] = [
45 |             "-DCUDA_HAS_FP16=1",
46 |             "-D__CUDA_NO_HALF_OPERATORS__",
47 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
48 |             "-D__CUDA_NO_HALF2_OPERATORS__",
49 |         ]
50 | 
51 |     include_dirs = [extensions_dir]
52 | 
53 |     ext_modules = [
54 |         extension(
55 |             "models._C",
56 |             sources,
57 |             include_dirs=include_dirs,
58 |             define_macros=define_macros,
59 |             extra_compile_args=extra_compile_args
60 |         )
61 |     ]
62 | 
63 |     return ext_modules
64 | 
65 | 
66 | setup(
67 |     name="siamreppoints",
68 |     version="0.0.1",
69 |     author="UZI",
70 |     description="target tracking in pytorch",
71 |     packages=find_packages(exclude=("configs", "tests",)),
72 |     ext_modules=get_extensions(),
73 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
74 | )
75 | 


--------------------------------------------------------------------------------
/siamreppoints/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/tracker/__init__.py


--------------------------------------------------------------------------------
/siamreppoints/tracker/base_tracker.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import cv2
 9 | import numpy as np
10 | import torch
11 | 
12 | from siamreppoints.core.config import cfg
13 | 
14 | 
15 | class BaseTracker(object):
16 |     """ Base tracker of single objec tracking
17 |     """
18 |     def init(self, img, bbox):
19 |         """
20 |         args:
21 |             img(np.ndarray): BGR image
22 |             bbox(list): [x, y, width, height]
23 |                         x, y need to be 0-based
24 |         """
25 |         raise NotImplementedError
26 | 
27 |     def track(self, img):
28 |         """
29 |         args:
30 |             img(np.ndarray): BGR image
31 |         return:
32 |             bbox(list):[x, y, width, height]
33 |         """
34 |         raise NotImplementedError
35 | 
36 | 
37 | class SiameseTracker(BaseTracker):
38 |     def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans):
39 |         """
40 |         args:
41 |             im: bgr based image
42 |             pos: center position
43 |             model_sz: exemplar size
44 |             s_z: original size
45 |             avg_chans: channel average
46 |         """
47 |         if isinstance(pos, float):
48 |             pos = [pos, pos]
49 |         sz = original_sz
50 |         im_sz = im.shape
51 |         c = (original_sz + 1) / 2
52 |         # context_xmin = round(pos[0] - c) # py2 and py3 round
53 |         context_xmin = np.floor(pos[0] - c + 0.5)
54 |         context_xmax = context_xmin + sz - 1
55 |         # context_ymin = round(pos[1] - c)
56 |         context_ymin = np.floor(pos[1] - c + 0.5)
57 |         context_ymax = context_ymin + sz - 1
58 |         left_pad = int(max(0., -context_xmin))
59 |         top_pad = int(max(0., -context_ymin))
60 |         right_pad = int(max(0., context_xmax - im_sz[1] + 1))
61 |         bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
62 | 
63 |         context_xmin = context_xmin + left_pad
64 |         context_xmax = context_xmax + left_pad
65 |         context_ymin = context_ymin + top_pad
66 |         context_ymax = context_ymax + top_pad
67 | 
68 |         r, c, k = im.shape
69 |         if any([top_pad, bottom_pad, left_pad, right_pad]):
70 |             size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k)
71 |             te_im = np.zeros(size, np.uint8)
72 |             te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
73 |             if top_pad:
74 |                 te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
75 |             if bottom_pad:
76 |                 te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
77 |             if left_pad:
78 |                 te_im[:, 0:left_pad, :] = avg_chans
79 |             if right_pad:
80 |                 te_im[:, c + left_pad:, :] = avg_chans
81 |             im_patch = te_im[int(context_ymin):int(context_ymax + 1),
82 |                              int(context_xmin):int(context_xmax + 1), :]
83 |         else:
84 |             im_patch = im[int(context_ymin):int(context_ymax + 1),
85 |                           int(context_xmin):int(context_xmax + 1), :]
86 | 
87 |         if not np.array_equal(model_sz, original_sz):
88 |             im_patch = cv2.resize(im_patch, (model_sz, model_sz))
89 |         im_patch = im_patch.transpose(2, 0, 1)
90 |         im_patch = im_patch[np.newaxis, :, :, :]
91 |         im_patch = im_patch.astype(np.float32)
92 |         im_patch = torch.from_numpy(im_patch)
93 |         if cfg.CUDA:
94 |             im_patch = im_patch.cuda()
95 |         return im_patch
96 | 


--------------------------------------------------------------------------------
/siamreppoints/tracker/tracker_builder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | from siamreppoints.core.config import cfg
 9 | from siamreppoints.tracker.siamreppoints_tracker import SiamReppointsTracker
10 | 
11 | 
12 | TRACKS = {
13 |           'SiamReppointsTracker': SiamReppointsTracker
14 |          }
15 | 
16 | 
17 | def build_tracker(model):
18 |     return TRACKS[cfg.TRACK.TYPE](model)
19 | 


--------------------------------------------------------------------------------
/siamreppoints/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/siamreppoints/utils/__init__.py


--------------------------------------------------------------------------------
/siamreppoints/utils/anchor.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import math
 9 | 
10 | import numpy as np
11 | 
12 | from siamreppoints.utils.bbox import corner2center, center2corner
13 | 
14 | 
15 | class Anchors:
16 |     """
17 |     This class generate anchors.
18 |     """
19 |     def __init__(self, stride, ratios, scales, image_center=0, size=0):
20 |         self.stride = stride
21 |         self.ratios = ratios
22 |         self.scales = scales
23 |         self.image_center = image_center
24 |         self.size = size
25 | 
26 |         self.anchor_num = len(self.scales) * len(self.ratios)
27 | 
28 |         self.anchors = None
29 | 
30 |         self.generate_anchors()
31 | 
32 |     def generate_anchors(self):
33 |         """
34 |         generate anchors based on predefined configuration
35 |         """
36 |         self.anchors = np.zeros((self.anchor_num, 4), dtype=np.float32)
37 |         size = self.stride * self.stride
38 |         count = 0
39 |         for r in self.ratios:
40 |             ws = int(math.sqrt(size*1. / r))
41 |             hs = int(ws * r)
42 | 
43 |             for s in self.scales:
44 |                 w = ws * s
45 |                 h = hs * s
46 |                 self.anchors[count][:] = [-w*0.5, -h*0.5, w*0.5, h*0.5][:]
47 |                 count += 1
48 | 
49 |     def generate_all_anchors(self, im_c, size):
50 |         """
51 |         im_c: image center
52 |         size: image size
53 |         """
54 |         if self.image_center == im_c and self.size == size:
55 |             return False
56 |         self.image_center = im_c
57 |         self.size = size
58 | 
59 |         a0x = im_c - size // 2 * self.stride
60 |         ori = np.array([a0x] * 4, dtype=np.float32)
61 |         zero_anchors = self.anchors + ori
62 | 
63 |         x1 = zero_anchors[:, 0]
64 |         y1 = zero_anchors[:, 1]
65 |         x2 = zero_anchors[:, 2]
66 |         y2 = zero_anchors[:, 3]
67 | 
68 |         x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1),
69 |                              [x1, y1, x2, y2])
70 |         cx, cy, w, h = corner2center([x1, y1, x2, y2])
71 | 
72 |         disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride
73 |         disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride
74 | 
75 |         cx = cx + disp_x
76 |         cy = cy + disp_y
77 | 
78 |         # broadcast
79 |         zero = np.zeros((self.anchor_num, size, size), dtype=np.float32)
80 |         cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h])
81 |         x1, y1, x2, y2 = center2corner([cx, cy, w, h])
82 | 
83 |         self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32),
84 |                             np.stack([cx, cy, w,  h]).astype(np.float32))
85 |         return True
86 | 


--------------------------------------------------------------------------------
/siamreppoints/utils/average_meter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | 
  9 | class Meter(object):
 10 |     def __init__(self, name, val, avg):
 11 |         self.name = name
 12 |         self.val = val
 13 |         self.avg = avg
 14 | 
 15 |     def __repr__(self):
 16 |         return "{name}: {val:.6f} ({avg:.6f})".format(
 17 |             name=self.name, val=self.val, avg=self.avg
 18 |         )
 19 | 
 20 |     def __format__(self, *tuples, **kwargs):
 21 |         return self.__repr__()
 22 | 
 23 | 
 24 | class AverageMeter:
 25 |     """Computes and stores the average and current value"""
 26 |     def __init__(self, num=100):
 27 |         self.num = num
 28 |         self.reset()
 29 | 
 30 |     def reset(self):
 31 |         self.val = {}
 32 |         self.sum = {}
 33 |         self.count = {}
 34 |         self.history = {}
 35 | 
 36 |     def update(self, batch=1, **kwargs):
 37 |         val = {}
 38 |         for k in kwargs:
 39 |             val[k] = kwargs[k] / float(batch)
 40 |         self.val.update(val)
 41 |         for k in kwargs:
 42 |             if k not in self.sum:
 43 |                 self.sum[k] = 0
 44 |                 self.count[k] = 0
 45 |                 self.history[k] = []
 46 |             self.sum[k] += kwargs[k]
 47 |             self.count[k] += batch
 48 |             for _ in range(batch):
 49 |                 self.history[k].append(val[k])
 50 | 
 51 |             if self.num <= 0:
 52 |                 # < 0, average all
 53 |                 self.history[k] = []
 54 | 
 55 |                 # == 0: no average
 56 |                 if self.num == 0:
 57 |                     self.sum[k] = self.val[k]
 58 |                     self.count[k] = 1
 59 | 
 60 |             elif len(self.history[k]) > self.num:
 61 |                 pop_num = len(self.history[k]) - self.num
 62 |                 for _ in range(pop_num):
 63 |                     self.sum[k] -= self.history[k][0]
 64 |                     del self.history[k][0]
 65 |                     self.count[k] -= 1
 66 | 
 67 |     def __repr__(self):
 68 |         s = ''
 69 |         for k in self.sum:
 70 |             s += self.format_str(k)
 71 |         return s
 72 | 
 73 |     def format_str(self, attr):
 74 |         return "{name}: {val:.6f} ({avg:.6f}) ".format(
 75 |                     name=attr,
 76 |                     val=float(self.val[attr]),
 77 |                     avg=float(self.sum[attr]) / self.count[attr])
 78 | 
 79 |     def __getattr__(self, attr):
 80 |         if attr in self.__dict__:
 81 |             return super(AverageMeter, self).__getattr__(attr)
 82 |         if attr not in self.sum:
 83 |             print("invalid key '{}'".format(attr))
 84 |             return Meter(attr, 0, 0)
 85 |         return Meter(attr, self.val[attr], self.avg(attr))
 86 | 
 87 |     def avg(self, attr):
 88 |         return float(self.sum[attr]) / self.count[attr]
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 |     avg1 = AverageMeter(10)
 93 |     avg2 = AverageMeter(0)
 94 |     avg3 = AverageMeter(-1)
 95 | 
 96 |     for i in range(20):
 97 |         avg1.update(s=i)
 98 |         avg2.update(s=i)
 99 |         avg3.update(s=i)
100 | 
101 |         print('iter {}'.format(i))
102 |         print(avg1.s)
103 |         print(avg2.s)
104 |         print(avg3.s)
105 | 


--------------------------------------------------------------------------------
/siamreppoints/utils/bbox.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | from collections import namedtuple
  9 | 
 10 | import numpy as np
 11 | 
 12 | 
 13 | Corner = namedtuple('Corner', 'x1 y1 x2 y2')
 14 | # alias
 15 | BBox = Corner
 16 | Center = namedtuple('Center', 'x y w h')
 17 | 
 18 | 
 19 | def corner2center(corner):
 20 |     """ convert (x1, y1, x2, y2) to (cx, cy, w, h)
 21 |     Args:
 22 |         conrner: Corner or np.array (4*N)
 23 |     Return:
 24 |         Center or np.array (4 * N)
 25 |     """
 26 |     if isinstance(corner, Corner):
 27 |         x1, y1, x2, y2 = corner
 28 |         return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1))
 29 |     else:
 30 |         x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3]
 31 |         x = (x1 + x2) * 0.5
 32 |         y = (y1 + y2) * 0.5
 33 |         w = x2 - x1
 34 |         h = y2 - y1
 35 |         return x, y, w, h
 36 | 
 37 | 
 38 | def center2corner(center):
 39 |     """ convert (cx, cy, w, h) to (x1, y1, x2, y2)
 40 |     Args:
 41 |         center: Center or np.array (4 * N)
 42 |     Return:
 43 |         center or np.array (4 * N)
 44 |     """
 45 |     if isinstance(center, Center):
 46 |         x, y, w, h = center
 47 |         return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5)
 48 |     else:
 49 |         x, y, w, h = center[0], center[1], center[2], center[3]
 50 |         x1 = x - w * 0.5
 51 |         y1 = y - h * 0.5
 52 |         x2 = x + w * 0.5
 53 |         y2 = y + h * 0.5
 54 |         return x1, y1, x2, y2
 55 | 
 56 | 
 57 | def IoU(rect1, rect2):
 58 |     """ caculate interection over union
 59 |     Args:
 60 |         rect1: (x1, y1, x2, y2)
 61 |         rect2: (x1, y1, x2, y2)
 62 |     Returns:
 63 |         iou
 64 |     """
 65 |     # overlap
 66 |     x1, y1, x2, y2 = rect1[0], rect1[1], rect1[2], rect1[3]
 67 |     tx1, ty1, tx2, ty2 = rect2[0], rect2[1], rect2[2], rect2[3]
 68 | 
 69 |     xx1 = np.maximum(tx1, x1)
 70 |     yy1 = np.maximum(ty1, y1)
 71 |     xx2 = np.minimum(tx2, x2)
 72 |     yy2 = np.minimum(ty2, y2)
 73 | 
 74 |     ww = np.maximum(0, xx2 - xx1)
 75 |     hh = np.maximum(0, yy2 - yy1)
 76 | 
 77 |     area = (x2-x1) * (y2-y1)
 78 |     target_a = (tx2-tx1) * (ty2 - ty1)
 79 |     inter = ww * hh
 80 |     iou = inter / (area + target_a - inter)
 81 |     return iou
 82 | 
 83 | 
 84 | def cxy_wh_2_rect(pos, sz):
 85 |     """ convert (cx, cy, w, h) to (x1, y1, w, h), 0-index
 86 |     """
 87 |     return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]])
 88 | 
 89 | 
 90 | def rect_2_cxy_wh(rect):
 91 |     """ convert (x1, y1, w, h) to (cx, cy, w, h), 0-index
 92 |     """
 93 |     return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), \
 94 |         np.array([rect[2], rect[3]])
 95 | 
 96 | 
 97 | def cxy_wh_2_rect1(pos, sz):
 98 |     """ convert (cx, cy, w, h) to (x1, y1, w, h), 1-index
 99 |     """
100 |     return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]])
101 | 
102 | 
103 | def rect1_2_cxy_wh(rect):
104 |     """ convert (x1, y1, w, h) to (cx, cy, w, h), 1-index
105 |     """
106 |     return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), \
107 |         np.array([rect[2], rect[3]])
108 | 
109 | 
110 | def get_axis_aligned_bbox(region):
111 |     """ convert region to (cx, cy, w, h) that represent by axis aligned box
112 |     """
113 |     nv = region.size
114 |     if nv == 8:
115 |         cx = np.mean(region[0::2])
116 |         cy = np.mean(region[1::2])
117 |         x1 = min(region[0::2])
118 |         x2 = max(region[0::2])
119 |         y1 = min(region[1::2])
120 |         y2 = max(region[1::2])
121 |         A1 = np.linalg.norm(region[0:2] - region[2:4]) * \
122 |             np.linalg.norm(region[2:4] - region[4:6])
123 |         A2 = (x2 - x1) * (y2 - y1)
124 |         s = np.sqrt(A1 / A2)
125 |         w = s * (x2 - x1) + 1
126 |         h = s * (y2 - y1) + 1
127 |     else:
128 |         x = region[0]
129 |         y = region[1]
130 |         w = region[2]
131 |         h = region[3]
132 |         cx = x+w/2
133 |         cy = y+h/2
134 |     return cx, cy, w, h
135 | 
136 | 
137 | def get_min_max_bbox(region):
138 |     """ convert region to (cx, cy, w, h) that represent by mim-max box
139 |     """
140 |     nv = region.size
141 |     if nv == 8:
142 |         cx = np.mean(region[0::2])
143 |         cy = np.mean(region[1::2])
144 |         x1 = min(region[0::2])
145 |         x2 = max(region[0::2])
146 |         y1 = min(region[1::2])
147 |         y2 = max(region[1::2])
148 |         w = x2 - x1
149 |         h = y2 - y1
150 |     else:
151 |         x = region[0]
152 |         y = region[1]
153 |         w = region[2]
154 |         h = region[3]
155 |         cx = x+w/2
156 |         cy = y+h/2
157 |     return cx, cy, w, h
158 | 


--------------------------------------------------------------------------------
/siamreppoints/utils/distributed.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import os
  9 | import socket
 10 | import logging
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.distributed as dist
 15 | 
 16 | from siamreppoints.utils.log_helper import log_once
 17 | 
 18 | logger = logging.getLogger('global')
 19 | 
 20 | 
 21 | def average_reduce(v):
 22 |     if get_world_size() == 1:
 23 |         return v
 24 |     tensor = torch.cuda.FloatTensor(1)
 25 |     tensor[0] = v
 26 |     dist.all_reduce(tensor)
 27 |     v = tensor[0] / get_world_size()
 28 |     return v
 29 | 
 30 | 
 31 | class DistModule(nn.Module):
 32 |     def __init__(self, module, bn_method=0):
 33 |         super(DistModule, self).__init__()
 34 |         self.module = module
 35 |         self.bn_method = bn_method
 36 |         if get_world_size() > 1:
 37 |             broadcast_params(self.module)
 38 |         else:
 39 |             self.bn_method = 0  # single proccess
 40 | 
 41 |     def forward(self, *args, **kwargs):
 42 |         broadcast_buffers(self.module, self.bn_method)
 43 |         return self.module(*args, **kwargs)
 44 | 
 45 |     def train(self, mode=True):
 46 |         super(DistModule, self).train(mode)
 47 |         self.module.train(mode)
 48 |         return self
 49 | 
 50 | 
 51 | def broadcast_params(model):
 52 |     """ broadcast model parameters """
 53 |     for p in model.state_dict().values():
 54 |         dist.broadcast(p, 0)
 55 | 
 56 | 
 57 | def broadcast_buffers(model, method=0):
 58 |     """ broadcast model buffers """
 59 |     if method == 0:
 60 |         return
 61 | 
 62 |     world_size = get_world_size()
 63 | 
 64 |     for b in model._all_buffers():
 65 |         if method == 1:  # broadcast from main proccess
 66 |             dist.broadcast(b, 0)
 67 |         elif method == 2:  # average
 68 |             dist.all_reduce(b)
 69 |             b /= world_size
 70 |         else:
 71 |             raise Exception('Invalid buffer broadcast code {}'.format(method))
 72 | 
 73 | 
 74 | inited = False
 75 | 
 76 | 
 77 | def _dist_init():
 78 |     '''
 79 |     if guess right:
 80 |         ntasks: world_size (process num)
 81 |         proc_id: rank
 82 |     '''
 83 |     rank = int(os.environ['RANK'])
 84 |     num_gpus = torch.cuda.device_count()
 85 |     torch.cuda.set_device(rank % num_gpus)
 86 |     dist.init_process_group(backend='nccl')
 87 |     world_size = dist.get_world_size()
 88 |     return rank, world_size
 89 | 
 90 | 
 91 | def _get_local_ip():
 92 |     try:
 93 |         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 94 |         s.connect(('8.8.8.8', 80))
 95 |         ip = s.getsockname()[0]
 96 |     finally:
 97 |         s.close()
 98 |     return ip
 99 | 
100 | 
101 | def dist_init():
102 |     global rank, world_size, inited
103 |     try:
104 |         rank, world_size = _dist_init()
105 |     except RuntimeError as e:
106 |         if 'public' in e.args[0]:
107 |             logger.info(e)
108 |             logger.info('Warning: use single process')
109 |             rank, world_size = 0, 1
110 |         else:
111 |             raise RuntimeError(*e.args)
112 |     inited = True
113 |     return rank, world_size
114 | 
115 | 
116 | def get_rank():
117 |     if not inited:
118 |         raise(Exception('dist not inited'))
119 |     return rank
120 | 
121 | 
122 | def get_world_size():
123 |     if not inited:
124 |         raise(Exception('dist not inited'))
125 |     return world_size
126 | 
127 | 
128 | def reduce_gradients(model, _type='sum'):
129 |     types = ['sum', 'avg']
130 |     assert _type in types, 'gradients method must be in "{}"'.format(types)
131 |     log_once("gradients method is {}".format(_type))
132 |     if get_world_size() > 1:
133 |         for param in model.parameters():
134 |             if param.requires_grad:
135 |                 dist.all_reduce(param.grad.data)
136 |                 if _type == 'avg':
137 |                     param.grad.data /= get_world_size()
138 |     else:
139 |         return None
140 | 


--------------------------------------------------------------------------------
/siamreppoints/utils/log_helper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import os
  9 | import logging
 10 | import math
 11 | import sys
 12 | 
 13 | 
 14 | if hasattr(sys, 'frozen'):  # support for py2exe
 15 |     _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:])
 16 | elif __file__[-4:].lower() in ['.pyc', '.pyo']:
 17 |     _srcfile = __file__[:-4] + '.py'
 18 | else:
 19 |     _srcfile = __file__
 20 | _srcfile = os.path.normcase(_srcfile)
 21 | 
 22 | logs = set()
 23 | 
 24 | 
 25 | class Filter:
 26 |     def __init__(self, flag):
 27 |         self.flag = flag
 28 | 
 29 |     def filter(self, x):
 30 |         return self.flag
 31 | 
 32 | 
 33 | class Dummy:
 34 |     def __init__(self, *arg, **kwargs):
 35 |         pass
 36 | 
 37 |     def __getattr__(self, arg):
 38 |         def dummy(*args, **kwargs): pass
 39 |         return dummy
 40 | 
 41 | 
 42 | def get_format(logger, level):
 43 |     if 'RANK' in os.environ:
 44 |         rank = int(os.environ['RANK'])
 45 | 
 46 |         if level == logging.INFO:
 47 |             logger.addFilter(Filter(rank == 0))
 48 |     else:
 49 |         rank = 0
 50 |     format_str = '[%(asctime)s-rk{}-%(filename)s#%(lineno)3d] %(message)s'.format(rank)
 51 |     formatter = logging.Formatter(format_str)
 52 |     return formatter
 53 | 
 54 | 
 55 | def get_format_custom(logger, level):
 56 |     if 'RANK' in os.environ:
 57 |         rank = int(os.environ['RANK'])
 58 |         if level == logging.INFO:
 59 |             logger.addFilter(Filter(rank == 0))
 60 |     else:
 61 |         rank = 0
 62 |     format_str = '[%(asctime)s-rk{}-%(message)s'.format(rank)
 63 |     formatter = logging.Formatter(format_str)
 64 |     return formatter
 65 | 
 66 | 
 67 | def init_log(name, level=logging.INFO, format_func=get_format):
 68 |     if (name, level) in logs:
 69 |         return
 70 |     logs.add((name, level))
 71 |     logger = logging.getLogger(name)
 72 |     logger.setLevel(level)
 73 |     ch = logging.StreamHandler()
 74 |     ch.setLevel(level)
 75 |     formatter = format_func(logger, level)
 76 |     ch.setFormatter(formatter)
 77 |     logger.addHandler(ch)
 78 |     return logger
 79 | 
 80 | 
 81 | def add_file_handler(name, log_file, level=logging.INFO):
 82 |     logger = logging.getLogger(name)
 83 |     fh = logging.FileHandler(log_file)
 84 |     fh.setFormatter(get_format(logger, level))
 85 |     logger.addHandler(fh)
 86 | 
 87 | 
 88 | init_log('global')
 89 | 
 90 | 
 91 | def print_speed(i, i_time, n):
 92 |     """print_speed(index, index_time, total_iteration)"""
 93 |     logger = logging.getLogger('global')
 94 |     average_time = i_time
 95 |     remaining_time = (n - i) * average_time
 96 |     remaining_day = math.floor(remaining_time / 86400)
 97 |     remaining_hour = math.floor(remaining_time / 3600 -
 98 |                                 remaining_day * 24)
 99 |     remaining_min = math.floor(remaining_time / 60 -
100 |                                remaining_day * 1440 -
101 |                                remaining_hour * 60)
102 |     logger.info('Progress: %d / %d [%d%%], Speed: %.3f s/iter, ETA %d:%02d:%02d (D:H:M)\n' %
103 |                 (i, n, i / n * 100,
104 |                  average_time,
105 |                  remaining_day, remaining_hour, remaining_min))
106 | 
107 | 
108 | def find_caller():
109 |     def current_frame():
110 |         try:
111 |             raise Exception
112 |         except:
113 |             return sys.exc_info()[2].tb_frame.f_back
114 | 
115 |     f = current_frame()
116 |     if f is not None:
117 |         f = f.f_back
118 |     rv = "(unknown file)", 0, "(unknown function)"
119 |     while hasattr(f, "f_code"):
120 |         co = f.f_code
121 |         filename = os.path.normcase(co.co_filename)
122 |         rv = (co.co_filename, f.f_lineno, co.co_name)
123 |         if filename == _srcfile:
124 |             f = f.f_back
125 |             continue
126 |         break
127 |     rv = list(rv)
128 |     rv[0] = os.path.basename(rv[0])
129 |     return rv
130 | 
131 | 
132 | class LogOnce:
133 |     def __init__(self):
134 |         self.logged = set()
135 |         self.logger = init_log('log_once', format_func=get_format_custom)
136 | 
137 |     def log(self, strings):
138 |         fn, lineno, caller = find_caller()
139 |         key = (fn, lineno, caller, strings)
140 |         if key in self.logged:
141 |             return
142 |         self.logged.add(key)
143 |         message = "{filename:s}<{caller}>#{lineno:3d}] {strings}".format(
144 |                 filename=fn, lineno=lineno, strings=strings, caller=caller)
145 |         self.logger.info(message)
146 | 
147 | 
148 | once_logger = LogOnce()
149 | 
150 | 
151 | def log_once(strings):
152 |     once_logger.log(strings)
153 | 
154 | 
155 | def main():
156 |     for i, lvl in enumerate([logging.DEBUG, logging.INFO,
157 |                              logging.WARNING, logging.ERROR,
158 |                              logging.CRITICAL]):
159 |         log_name = str(lvl)
160 |         init_log(log_name, lvl)
161 |         logger = logging.getLogger(log_name)
162 |         print('****cur lvl:{}'.format(lvl))
163 |         logger.debug('debug')
164 |         logger.info('info')
165 |         logger.warning('warning')
166 |         logger.error('error')
167 |         logger.critical('critiacal')
168 | 
169 | 
170 | if __name__ == '__main__':
171 |     main()
172 |     for i in range(10):
173 |         log_once('xxx')
174 | 


--------------------------------------------------------------------------------
/siamreppoints/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import os
 9 | 
10 | from colorama import Fore, Style
11 | 
12 | 
13 | __all__ = ['commit', 'describe']
14 | 
15 | 
16 | def _exec(cmd):
17 |     f = os.popen(cmd, 'r', 1)
18 |     return f.read().strip()
19 | 
20 | 
21 | def _bold(s):
22 |     return "\033[1m%s\033[0m" % s
23 | 
24 | 
25 | def _color(s):
26 |     return f'{Fore.RED}{s}{Style.RESET_ALL}'
27 | 
28 | 
29 | def _describe(model, lines=None, spaces=0):
30 |     head = " " * spaces
31 |     for name, p in model.named_parameters():
32 |         if '.' in name:
33 |             continue
34 |         if p.requires_grad:
35 |             name = _color(name)
36 |         line = "{head}- {name}".format(head=head, name=name)
37 |         lines.append(line)
38 | 
39 |     for name, m in model.named_children():
40 |         space_num = len(name) + spaces + 1
41 |         if m.training:
42 |             name = _color(name)
43 |         line = "{head}.{name} ({type})".format(
44 |                 head=head,
45 |                 name=name,
46 |                 type=m.__class__.__name__)
47 |         lines.append(line)
48 |         _describe(m, lines, space_num)
49 | 
50 | 
51 | def commit():
52 |     root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))
53 |     cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root)
54 |     commit = _exec(cmd)
55 |     cmd = "cd {}; git log --oneline | head -n1".format(root)
56 |     commit_log = _exec(cmd)
57 |     return "commit : {}\n  log  : {}".format(commit, commit_log)
58 | 
59 | 
60 | def describe(net, name=None):
61 |     num = 0
62 |     lines = []
63 |     if name is not None:
64 |         lines.append(name)
65 |         num = len(name)
66 |     _describe(net, lines, num)
67 |     return "\n".join(lines)
68 | 


--------------------------------------------------------------------------------
/siamreppoints/utils/model_load.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import logging
 9 | 
10 | import torch
11 | 
12 | 
13 | logger = logging.getLogger('global')
14 | 
15 | 
16 | def check_keys(model, pretrained_state_dict):
17 |     ckpt_keys = set(pretrained_state_dict.keys())
18 |     model_keys = set(model.state_dict().keys())
19 |     used_pretrained_keys = model_keys & ckpt_keys
20 |     unused_pretrained_keys = ckpt_keys - model_keys
21 |     missing_keys = model_keys - ckpt_keys
22 |     # filter 'num_batches_tracked'
23 |     missing_keys = [x for x in missing_keys
24 |                     if not x.endswith('num_batches_tracked')]
25 |     if len(missing_keys) > 0:
26 |         logger.info('[Warning] missing keys: {}'.format(missing_keys))
27 |         logger.info('missing keys:{}'.format(len(missing_keys)))
28 |     if len(unused_pretrained_keys) > 0:
29 |         logger.info('[Warning] unused_pretrained_keys: {}'.format(
30 |             unused_pretrained_keys))
31 |         logger.info('unused checkpoint keys:{}'.format(
32 |             len(unused_pretrained_keys)))
33 |     logger.info('used keys:{}'.format(len(used_pretrained_keys)))
34 |     assert len(used_pretrained_keys) > 0, \
35 |         'load NONE from pretrained checkpoint'
36 |     return True
37 | 
38 | 
39 | def remove_prefix(state_dict, prefix):
40 |     ''' Old style model is stored with all names of parameters
41 |     share common prefix 'module.' '''
42 |     logger.info('remove prefix \'{}\''.format(prefix))
43 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
44 |     return {f(key): value for key, value in state_dict.items()}
45 | 
46 | 
47 | def load_pretrain(model, pretrained_path):
48 |     logger.info('load pretrained model from {}'.format(pretrained_path))
49 |     device = torch.cuda.current_device()
50 |     pretrained_dict = torch.load(pretrained_path,
51 |         map_location=lambda storage, loc: storage.cuda(device))
52 |     if "state_dict" in pretrained_dict.keys():
53 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'],
54 |                                         'module.')
55 |     else:
56 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
57 | 
58 |     try:
59 |         check_keys(model, pretrained_dict)
60 |     except:
61 |         logger.info('[Warning]: using pretrain as features.\
62 |                 Adding "features." as prefix')
63 |         new_dict = {}
64 |         for k, v in pretrained_dict.items():
65 |             k = 'features.' + k
66 |             new_dict[k] = v
67 |         pretrained_dict = new_dict
68 |         check_keys(model, pretrained_dict)
69 |     model.load_state_dict(pretrained_dict, strict=False)
70 |     return model
71 | 
72 | 
73 | def restore_from(model, optimizer, ckpt_path):
74 |     device = torch.cuda.current_device()
75 |     ckpt = torch.load(ckpt_path,
76 |         map_location=lambda storage, loc: storage.cuda(device))
77 |     epoch = ckpt['epoch']
78 | 
79 |     ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.')
80 |     check_keys(model, ckpt_model_dict)
81 |     model.load_state_dict(ckpt_model_dict, strict=False)
82 | 
83 |     check_keys(optimizer, ckpt['optimizer'])
84 |     optimizer.load_state_dict(ckpt['optimizer'])
85 |     return model, optimizer, epoch
86 | 


--------------------------------------------------------------------------------
/testing_dataset/README.md:
--------------------------------------------------------------------------------
 1 | # Testing dataset directory
 2 | # putting your testing dataset here
 3 | - [x] [VOT2016](http://www.votchallenge.net/vot2016/dataset.html)
 4 | - [x] [VOT2018](http://www.votchallenge.net/vot2018/dataset.html)
 5 | - [x] [VOT2018-LT](http://www.votchallenge.net/vot2018/dataset.html)
 6 | - [x] [OTB100(OTB2015)](http://cvlab.hanyang.ac.kr/tracker_benchmark/datasets.html)
 7 | - [x] [UAV123](https://ivul.kaust.edu.sa/Pages/Dataset-UAV123.aspx)
 8 | - [x] [NFS](http://ci2cv.net/nfs/index.html)
 9 | - [x] [LaSOT](https://cis.temple.edu/lasot/)
10 | - [ ] [TrackingNet (Evaluation on Server)](https://tracking-net.org)
11 | - [ ] [GOT-10k (Evaluation on Server)](http://got-10k.aitestunion.com)
12 | 
13 | ## Download Dataset
14 | Download json files used in our toolkit [baidu pan](https://pan.baidu.com/s/1js0Qhykqqur7_lNRtle1tA)
15 | 
16 | 1. Put CVRP13.json, OTB100.json, OTB50.json in OTB100 dataset directory (you need to copy Jogging to Jogging-1 and Jogging-2, and copy Skating2 to Skating2-1 and Skating2-2 or using softlink)
17 | 
18 |    The directory should have the below format
19 | 
20 |    | -- OTB100/
21 | 
22 |    ​	| -- Basketball
23 | 
24 |    ​	| 	......
25 | 
26 |    ​	| -- Woman
27 | 
28 |    ​	| -- OTB100.json
29 | 
30 |    ​	| -- OTB50.json
31 | 
32 |    ​	| -- CVPR13.json
33 | 
34 | 2. Put all other jsons in the dataset directory like in step 1
35 | 


--------------------------------------------------------------------------------
/toolkit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zhanght021/RPT/9084392caaf502fe15ffdc5387b38d33da35283f/toolkit/__init__.py


--------------------------------------------------------------------------------
/toolkit/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vot import VOTDataset, VOTLTDataset
 2 | from .otb import OTBDataset
 3 | from .uav import UAVDataset
 4 | from .lasot import LaSOTDataset
 5 | from .nfs import NFSDataset
 6 | from .trackingnet import TrackingNetDataset
 7 | from .got10k import GOT10kDataset
 8 | 
 9 | class DatasetFactory(object):
10 |     @staticmethod
11 |     def create_dataset(**kwargs):
12 |         """
13 |         Args:
14 |             name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30',
15 |                 'VOT2018', 'VOT2016', 'VOT2018-LT'
16 |             dataset_root: dataset root
17 |             load_img: wether to load image
18 |         Return:
19 |             dataset
20 |         """
21 |         assert 'name' in kwargs, "should provide dataset name"
22 |         name = kwargs['name']
23 |         if 'OTB' in name:
24 |             dataset = OTBDataset(**kwargs)
25 |         elif 'LaSOT' == name:
26 |             dataset = LaSOTDataset(**kwargs)
27 |         elif 'UAV' in name:
28 |             dataset = UAVDataset(**kwargs)
29 |         elif 'NFS' in name:
30 |             dataset = NFSDataset(**kwargs)
31 |         elif 'VOT2018' == name or 'VOT2016' == name or 'VOT2019' == name:
32 |             dataset = VOTDataset(**kwargs)
33 |         elif 'VOT2018-LT' == name:
34 |             dataset = VOTLTDataset(**kwargs)
35 |         elif 'TrackingNet' == name:
36 |             dataset = TrackingNetDataset(**kwargs)
37 |         elif 'GOT-10k' == name:
38 |             dataset = GOT10kDataset(**kwargs)
39 |         else:
40 |             raise Exception("unknow dataset {}".format(kwargs['name']))
41 |         return dataset
42 | 
43 | 


--------------------------------------------------------------------------------
/toolkit/datasets/dataset.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | 
 3 | class Dataset(object):
 4 |     def __init__(self, name, dataset_root):
 5 |         self.name = name
 6 |         self.dataset_root = dataset_root
 7 |         self.videos = None
 8 | 
 9 |     def __getitem__(self, idx):
10 |         if isinstance(idx, str):
11 |             return self.videos[idx]
12 |         elif isinstance(idx, int):
13 |             return self.videos[sorted(list(self.videos.keys()))[idx]]
14 | 
15 |     def __len__(self):
16 |         return len(self.videos)
17 | 
18 |     def __iter__(self):
19 |         keys = sorted(list(self.videos.keys()))
20 |         for key in keys:
21 |             yield self.videos[key]
22 | 
23 |     def set_tracker(self, path, tracker_names):
24 |         """
25 |         Args:
26 |             path: path to tracker results,
27 |             tracker_names: list of tracker name
28 |         """
29 |         self.tracker_path = path
30 |         self.tracker_names = tracker_names
31 |         # for video in tqdm(self.videos.values(), 
32 |         #         desc='loading tacker result', ncols=100):
33 |         #     video.load_tracker(path, tracker_names)
34 | 


--------------------------------------------------------------------------------
/toolkit/datasets/got10k.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import json
 3 | import os
 4 | import numpy as np
 5 | 
 6 | from tqdm import tqdm
 7 | from glob import glob
 8 | 
 9 | from .dataset import Dataset
10 | from .video import Video
11 | 
12 | class GOT10kVideo(Video):
13 |     """
14 |     Args:
15 |         name: video name
16 |         root: dataset root
17 |         video_dir: video directory
18 |         init_rect: init rectangle
19 |         img_names: image names
20 |         gt_rect: groundtruth rectangle
21 |         attr: attribute of video
22 |     """
23 |     def __init__(self, name, root, video_dir, init_rect, img_names,
24 |             gt_rect, attr, load_img=False):
25 |         super(GOT10kVideo, self).__init__(name, root, video_dir,
26 |                 init_rect, img_names, gt_rect, attr, load_img)
27 | 
28 |     # def load_tracker(self, path, tracker_names=None):
29 |     #     """
30 |     #     Args:
31 |     #         path(str): path to result
32 |     #         tracker_name(list): name of tracker
33 |     #     """
34 |     #     if not tracker_names:
35 |     #         tracker_names = [x.split('/')[-1] for x in glob(path)
36 |     #                 if os.path.isdir(x)]
37 |     #     if isinstance(tracker_names, str):
38 |     #         tracker_names = [tracker_names]
39 |     #     # self.pred_trajs = {}
40 |     #     for name in tracker_names:
41 |     #         traj_file = os.path.join(path, name, self.name+'.txt')
42 |     #         if os.path.exists(traj_file):
43 |     #             with open(traj_file, 'r') as f :
44 |     #                 self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
45 |     #                         for x in f.readlines()]
46 |     #             if len(self.pred_trajs[name]) != len(self.gt_traj):
47 |     #                 print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
48 |     #         else:
49 | 
50 |     #     self.tracker_names = list(self.pred_trajs.keys())
51 | 
52 | class GOT10kDataset(Dataset):
53 |     """
54 |     Args:
55 |         name:  dataset name, should be "NFS30" or "NFS240"
56 |         dataset_root, dataset root dir
57 |     """
58 |     def __init__(self, name, dataset_root, load_img=False):
59 |         super(GOT10kDataset, self).__init__(name, dataset_root)
60 |         with open(os.path.join(dataset_root, name+'_new.json'), 'r') as f:
61 |             meta_data = json.load(f)
62 | 
63 |         # load videos
64 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
65 |         self.videos = {}
66 |         for video in pbar:
67 |             pbar.set_postfix_str(video)
68 |             self.videos[video] = GOT10kVideo(video,
69 |                                           dataset_root,
70 |                                           meta_data[video]['video_dir'],
71 |                                           meta_data[video]['init_rect'],
72 |                                           meta_data[video]['img_names'],
73 |                                           meta_data[video]['gt_rect'],
74 |                                           None)
75 |         self.attr = {}
76 |         self.attr['ALL'] = list(self.videos.keys())
77 | 


--------------------------------------------------------------------------------
/toolkit/datasets/lasot.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import numpy as np
 4 | 
 5 | from tqdm import tqdm
 6 | from glob import glob
 7 | 
 8 | from .dataset import Dataset
 9 | from .video import Video
10 | 
11 | class LaSOTVideo(Video):
12 |     """
13 |     Args:
14 |         name: video name
15 |         root: dataset root
16 |         video_dir: video directory
17 |         init_rect: init rectangle
18 |         img_names: image names
19 |         gt_rect: groundtruth rectangle
20 |         attr: attribute of video
21 |     """
22 |     def __init__(self, name, root, video_dir, init_rect, img_names,
23 |             gt_rect, attr, absent, load_img=False):
24 |         super(LaSOTVideo, self).__init__(name, root, video_dir,
25 |                 init_rect, img_names, gt_rect, attr, load_img)
26 |         self.absent = np.array(absent, np.int8)
27 | 
28 |     def load_tracker(self, path, tracker_names=None, store=True):
29 |         """
30 |         Args:
31 |             path(str): path to result
32 |             tracker_name(list): name of tracker
33 |         """
34 |         if not tracker_names:
35 |             tracker_names = [x.split('/')[-1] for x in glob(path)
36 |                     if os.path.isdir(x)]
37 |         if isinstance(tracker_names, str):
38 |             tracker_names = [tracker_names]
39 |         for name in tracker_names:
40 |             traj_file = os.path.join(path, name, self.name+'.txt')
41 |             if os.path.exists(traj_file):
42 |                 with open(traj_file, 'r') as f :
43 |                     pred_traj = [list(map(float, x.strip().split(',')))
44 |                             for x in f.readlines()]
45 |             else:
46 |                 print("File not exists: ", traj_file)
47 |             if self.name == 'monkey-17':
48 |                 pred_traj = pred_traj[:len(self.gt_traj)]
49 |             if store:
50 |                 self.pred_trajs[name] = pred_traj
51 |             else:
52 |                 return pred_traj
53 |         self.tracker_names = list(self.pred_trajs.keys())
54 | 
55 | 
56 | 
57 | class LaSOTDataset(Dataset):
58 |     """
59 |     Args:
60 |         name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
61 |         dataset_root: dataset root
62 |         load_img: wether to load all imgs
63 |     """
64 |     def __init__(self, name, dataset_root, load_img=False):
65 |         super(LaSOTDataset, self).__init__(name, dataset_root)
66 |         with open(os.path.join(dataset_root, name+'_new.json'), 'r') as f:
67 |             meta_data = json.load(f)
68 | 
69 |         # load videos
70 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
71 |         self.videos = {}
72 |         for video in pbar:
73 |             pbar.set_postfix_str(video)
74 |             self.videos[video] = LaSOTVideo(video,
75 |                                           dataset_root,
76 |                                           meta_data[video]['video_dir'],
77 |                                           meta_data[video]['init_rect'],
78 |                                           meta_data[video]['img_names'],
79 |                                           meta_data[video]['gt_rect'],
80 |                                           meta_data[video]['attr'],
81 |                                           meta_data[video]['absent'])
82 | 
83 |         # set attr
84 |         attr = []
85 |         for x in self.videos.values():
86 |             attr += x.attr
87 |         attr = set(attr)
88 |         self.attr = {}
89 |         self.attr['ALL'] = list(self.videos.keys())
90 |         for x in attr:
91 |             self.attr[x] = []
92 |         for k, v in self.videos.items():
93 |             for attr_ in v.attr:
94 |                 self.attr[attr_].append(k)
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/toolkit/datasets/nfs.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import numpy as np
 4 | 
 5 | from tqdm import tqdm
 6 | from glob import glob
 7 | 
 8 | from .dataset import Dataset
 9 | from .video import Video
10 | 
11 | 
12 | class NFSVideo(Video):
13 |     """
14 |     Args:
15 |         name: video name
16 |         root: dataset root
17 |         video_dir: video directory
18 |         init_rect: init rectangle
19 |         img_names: image names
20 |         gt_rect: groundtruth rectangle
21 |         attr: attribute of video
22 |     """
23 |     def __init__(self, name, root, video_dir, init_rect, img_names,
24 |             gt_rect, attr, load_img=False):
25 |         super(NFSVideo, self).__init__(name, root, video_dir,
26 |                 init_rect, img_names, gt_rect, attr, load_img)
27 | 
28 |     # def load_tracker(self, path, tracker_names=None):
29 |     #     """
30 |     #     Args:
31 |     #         path(str): path to result
32 |     #         tracker_name(list): name of tracker
33 |     #     """
34 |     #     if not tracker_names:
35 |     #         tracker_names = [x.split('/')[-1] for x in glob(path)
36 |     #                 if os.path.isdir(x)]
37 |     #     if isinstance(tracker_names, str):
38 |     #         tracker_names = [tracker_names]
39 |     #     # self.pred_trajs = {}
40 |     #     for name in tracker_names:
41 |     #         traj_file = os.path.join(path, name, self.name+'.txt')
42 |     #         if os.path.exists(traj_file):
43 |     #             with open(traj_file, 'r') as f :
44 |     #                 self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
45 |     #                         for x in f.readlines()]
46 |     #             if len(self.pred_trajs[name]) != len(self.gt_traj):
47 |     #                 print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
48 |     #         else:
49 | 
50 |     #     self.tracker_names = list(self.pred_trajs.keys())
51 | 
52 | class NFSDataset(Dataset):
53 |     """
54 |     Args:
55 |         name:  dataset name, should be "NFS30" or "NFS240"
56 |         dataset_root, dataset root dir
57 |     """
58 |     def __init__(self, name, dataset_root, load_img=False):
59 |         super(NFSDataset, self).__init__(name, dataset_root)
60 |         with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
61 |             meta_data = json.load(f)
62 | 
63 |         # load videos
64 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
65 |         self.videos = {}
66 |         for video in pbar:
67 |             pbar.set_postfix_str(video)
68 |             self.videos[video] = NFSVideo(video,
69 |                                           dataset_root,
70 |                                           meta_data[video]['video_dir'],
71 |                                           meta_data[video]['init_rect'],
72 |                                           meta_data[video]['img_names'],
73 |                                           meta_data[video]['gt_rect'],
74 |                                           None)
75 | 
76 |         self.attr = {}
77 |         self.attr['ALL'] = list(self.videos.keys())
78 | 


--------------------------------------------------------------------------------
/toolkit/datasets/otb.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | from glob import glob
  8 | 
  9 | from .dataset import Dataset
 10 | from .video import Video
 11 | 
 12 | 
 13 | class OTBVideo(Video):
 14 |     """
 15 |     Args:
 16 |         name: video name
 17 |         root: dataset root
 18 |         video_dir: video directory
 19 |         init_rect: init rectangle
 20 |         img_names: image names
 21 |         gt_rect: groundtruth rectangle
 22 |         attr: attribute of video
 23 |     """
 24 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 25 |             gt_rect, attr, load_img=False):
 26 |         super(OTBVideo, self).__init__(name, root, video_dir,
 27 |                 init_rect, img_names, gt_rect, attr, load_img)
 28 | 
 29 |     def load_tracker(self, path, tracker_names=None, store=True):
 30 |         """
 31 |         Args:
 32 |             path(str): path to result
 33 |             tracker_name(list): name of tracker
 34 |         """
 35 |         if not tracker_names:
 36 |             tracker_names = [x.split('/')[-1] for x in glob(path)
 37 |                     if os.path.isdir(x)]
 38 |         if isinstance(tracker_names, str):
 39 |             tracker_names = [tracker_names]
 40 |         for name in tracker_names:
 41 |             traj_file = os.path.join(path, name, self.name+'.txt')
 42 |             if not os.path.exists(traj_file):
 43 |                 if self.name == 'FleetFace':
 44 |                     txt_name = 'fleetface.txt'
 45 |                 elif self.name == 'Jogging-1':
 46 |                     txt_name = 'jogging_1.txt'
 47 |                 elif self.name == 'Jogging-2':
 48 |                     txt_name = 'jogging_2.txt'
 49 |                 elif self.name == 'Skating2-1':
 50 |                     txt_name = 'skating2_1.txt'
 51 |                 elif self.name == 'Skating2-2':
 52 |                     txt_name = 'skating2_2.txt'
 53 |                 elif self.name == 'FaceOcc1':
 54 |                     txt_name = 'faceocc1.txt'
 55 |                 elif self.name == 'FaceOcc2':
 56 |                     txt_name = 'faceocc2.txt'
 57 |                 elif self.name == 'Human4-2':
 58 |                     txt_name = 'human4_2.txt'
 59 |                 else:
 60 |                     txt_name = self.name[0].lower()+self.name[1:]+'.txt'
 61 |                 traj_file = os.path.join(path, name, txt_name)
 62 |             if os.path.exists(traj_file):
 63 |                 with open(traj_file, 'r') as f :
 64 |                     pred_traj = [list(map(float, x.strip().split(',')))
 65 |                             for x in f.readlines()]
 66 |                     if len(pred_traj) != len(self.gt_traj):
 67 |                         print(name, len(pred_traj), len(self.gt_traj), self.name)
 68 |                     if store:
 69 |                         self.pred_trajs[name] = pred_traj
 70 |                     else:
 71 |                         return pred_traj
 72 |             else:
 73 |                 print(traj_file)
 74 |         self.tracker_names = list(self.pred_trajs.keys())
 75 | 
 76 | 
 77 | 
 78 | class OTBDataset(Dataset):
 79 |     """
 80 |     Args:
 81 |         name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
 82 |         dataset_root: dataset root
 83 |         load_img: wether to load all imgs
 84 |     """
 85 |     def __init__(self, name, dataset_root, load_img=False):
 86 |         super(OTBDataset, self).__init__(name, dataset_root)
 87 |         with open(os.path.join(dataset_root, name+'_new.json'), 'r') as f:
 88 |             meta_data = json.load(f)
 89 | 
 90 |         # load videos
 91 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
 92 |         self.videos = {}
 93 |         for video in pbar:
 94 |             pbar.set_postfix_str(video)
 95 |             self.videos[video] = OTBVideo(video,
 96 |                                           dataset_root,
 97 |                                           meta_data[video]['video_dir'],
 98 |                                           meta_data[video]['init_rect'],
 99 |                                           meta_data[video]['img_names'],
100 |                                           meta_data[video]['gt_rect'],
101 |                                           meta_data[video]['attr'],
102 |                                           load_img)
103 | 
104 |         # set attr
105 |         attr = []
106 |         for x in self.videos.values():
107 |             attr += x.attr
108 |         attr = set(attr)
109 |         self.attr = {}
110 |         self.attr['ALL'] = list(self.videos.keys())
111 |         for x in attr:
112 |             self.attr[x] = []
113 |         for k, v in self.videos.items():
114 |             for attr_ in v.attr:
115 |                 self.attr[attr_].append(k)
116 | 


--------------------------------------------------------------------------------
/toolkit/datasets/trackingnet.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | import numpy as np
 4 | 
 5 | from tqdm import tqdm
 6 | from glob import glob
 7 | 
 8 | from .dataset import Dataset
 9 | from .video import Video
10 | 
11 | class TrackingNetVideo(Video):
12 |     """
13 |     Args:
14 |         name: video name
15 |         root: dataset root
16 |         video_dir: video directory
17 |         init_rect: init rectangle
18 |         img_names: image names
19 |         gt_rect: groundtruth rectangle
20 |         attr: attribute of video
21 |     """
22 |     def __init__(self, name, root, video_dir, init_rect, img_names,
23 |             gt_rect, attr, load_img=False):
24 |         super(TrackingNetVideo, self).__init__(name, root, video_dir,
25 |                 init_rect, img_names, gt_rect, attr, load_img)
26 | 
27 |     # def load_tracker(self, path, tracker_names=None):
28 |     #     """
29 |     #     Args:
30 |     #         path(str): path to result
31 |     #         tracker_name(list): name of tracker
32 |     #     """
33 |     #     if not tracker_names:
34 |     #         tracker_names = [x.split('/')[-1] for x in glob(path)
35 |     #                 if os.path.isdir(x)]
36 |     #     if isinstance(tracker_names, str):
37 |     #         tracker_names = [tracker_names]
38 |     #     # self.pred_trajs = {}
39 |     #     for name in tracker_names:
40 |     #         traj_file = os.path.join(path, name, self.name+'.txt')
41 |     #         if os.path.exists(traj_file):
42 |     #             with open(traj_file, 'r') as f :
43 |     #                 self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
44 |     #                         for x in f.readlines()]
45 |     #             if len(self.pred_trajs[name]) != len(self.gt_traj):
46 |     #                 print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
47 |     #         else:
48 | 
49 |     #     self.tracker_names = list(self.pred_trajs.keys())
50 | 
51 | class TrackingNetDataset(Dataset):
52 |     """
53 |     Args:
54 |         name:  dataset name, should be "NFS30" or "NFS240"
55 |         dataset_root, dataset root dir
56 |     """
57 |     def __init__(self, name, dataset_root, load_img=False):
58 |         super(TrackingNetDataset, self).__init__(name, dataset_root)
59 |         with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
60 |             meta_data = json.load(f)
61 | 
62 |         # load videos
63 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
64 |         self.videos = {}
65 |         for video in pbar:
66 |             pbar.set_postfix_str(video)
67 |             self.videos[video] = TrackingNetVideo(video,
68 |                                           dataset_root,
69 |                                           meta_data[video]['video_dir'],
70 |                                           meta_data[video]['init_rect'],
71 |                                           meta_data[video]['img_names'],
72 |                                           meta_data[video]['gt_rect'],
73 |                                           None)
74 |         self.attr = {}
75 |         self.attr['ALL'] = list(self.videos.keys())
76 | 


--------------------------------------------------------------------------------
/toolkit/datasets/uav.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from tqdm import tqdm
 5 | from glob import glob
 6 | 
 7 | from .dataset import Dataset
 8 | from .video import Video
 9 | 
10 | class UAVVideo(Video):
11 |     """
12 |     Args:
13 |         name: video name
14 |         root: dataset root
15 |         video_dir: video directory
16 |         init_rect: init rectangle
17 |         img_names: image names
18 |         gt_rect: groundtruth rectangle
19 |         attr: attribute of video
20 |     """
21 |     def __init__(self, name, root, video_dir, init_rect, img_names,
22 |             gt_rect, attr, load_img=False):
23 |         super(UAVVideo, self).__init__(name, root, video_dir,
24 |                 init_rect, img_names, gt_rect, attr, load_img)
25 | 
26 | 
27 | class UAVDataset(Dataset):
28 |     """
29 |     Args:
30 |         name: dataset name, should be 'UAV123', 'UAV20L'
31 |         dataset_root: dataset root
32 |         load_img: wether to load all imgs
33 |     """
34 |     def __init__(self, name, dataset_root, load_img=False):
35 |         super(UAVDataset, self).__init__(name, dataset_root)
36 |         with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
37 |             meta_data = json.load(f)
38 | 
39 |         # load videos
40 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
41 |         self.videos = {}
42 |         for video in pbar:
43 |             pbar.set_postfix_str(video)
44 |             self.videos[video] = UAVVideo(video,
45 |                                           dataset_root,
46 |                                           meta_data[video]['video_dir'],
47 |                                           meta_data[video]['init_rect'],
48 |                                           meta_data[video]['img_names'],
49 |                                           meta_data[video]['gt_rect'],
50 |                                           meta_data[video]['attr'])
51 | 
52 |         # set attr
53 |         attr = []
54 |         for x in self.videos.values():
55 |             attr += x.attr
56 |         attr = set(attr)
57 |         self.attr = {}
58 |         self.attr['ALL'] = list(self.videos.keys())
59 |         for x in attr:
60 |             self.attr[x] = []
61 |         for k, v in self.videos.items():
62 |             for attr_ in v.attr:
63 |                 self.attr[attr_].append(k)
64 | 
65 | 


--------------------------------------------------------------------------------
/toolkit/datasets/video.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import re
  4 | import numpy as np
  5 | import json
  6 | 
  7 | from glob import glob
  8 | 
  9 | class Video(object):
 10 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 11 |             gt_rect, attr, load_img=False):
 12 |         self.name = name
 13 |         self.video_dir = video_dir
 14 |         self.init_rect = init_rect
 15 |         self.gt_traj = gt_rect
 16 |         self.attr = attr
 17 |         self.pred_trajs = {}
 18 |         self.img_names = [os.path.join(root, x) for x in img_names]
 19 |         self.imgs = None
 20 | 
 21 |         if load_img:
 22 |             self.imgs = [cv2.imread(x) for x in self.img_names]
 23 |             self.width = self.imgs[0].shape[1]
 24 |             self.height = self.imgs[0].shape[0]
 25 |         else:
 26 |             img = cv2.imread(self.img_names[0])
 27 |             assert img is not None, self.img_names[0]
 28 |             self.width = img.shape[1]
 29 |             self.height = img.shape[0]
 30 | 
 31 |     def load_tracker(self, path, tracker_names=None, store=True):
 32 |         """
 33 |         Args:
 34 |             path(str): path to result
 35 |             tracker_name(list): name of tracker
 36 |         """
 37 |         if not tracker_names:
 38 |             tracker_names = [x.split('/')[-1] for x in glob(path)
 39 |                     if os.path.isdir(x)]
 40 |         if isinstance(tracker_names, str):
 41 |             tracker_names = [tracker_names]
 42 |         for name in tracker_names:
 43 |             traj_file = os.path.join(path, name, self.name+'.txt')
 44 |             if os.path.exists(traj_file):
 45 |                 with open(traj_file, 'r') as f :
 46 |                     pred_traj = [list(map(float, x.strip().split(',')))
 47 |                             for x in f.readlines()]
 48 |                 if len(pred_traj) != len(self.gt_traj):
 49 |                     print(name, len(pred_traj), len(self.gt_traj), self.name)
 50 |                 if store:
 51 |                     self.pred_trajs[name] = pred_traj
 52 |                 else:
 53 |                     return pred_traj
 54 |             else:
 55 |                 print(traj_file)
 56 |         self.tracker_names = list(self.pred_trajs.keys())
 57 | 
 58 |     def load_img(self):
 59 |         if self.imgs is None:
 60 |             self.imgs = [cv2.imread(x) for x in self.img_names]
 61 |             self.width = self.imgs[0].shape[1]
 62 |             self.height = self.imgs[0].shape[0]
 63 | 
 64 |     def free_img(self):
 65 |         self.imgs = None
 66 | 
 67 |     def __len__(self):
 68 |         return len(self.img_names)
 69 | 
 70 |     def __getitem__(self, idx):
 71 |         if self.imgs is None:
 72 |             return cv2.imread(self.img_names[idx]), self.gt_traj[idx]
 73 |         else:
 74 |             return self.imgs[idx], self.gt_traj[idx]
 75 | 
 76 |     def __iter__(self):
 77 |         for i in range(len(self.img_names)):
 78 |             if self.imgs is not None:
 79 |                 yield self.imgs[i], self.gt_traj[i]
 80 |             else:
 81 |                 yield cv2.imread(self.img_names[i]), self.gt_traj[i]
 82 | 
 83 |     def draw_box(self, roi, img, linewidth, color, name=None):
 84 |         """
 85 |             roi: rectangle or polygon
 86 |             img: numpy array img
 87 |             linewith: line width of the bbox
 88 |         """
 89 |         if len(roi) > 6 and len(roi) % 2 == 0:
 90 |             pts = np.array(roi, np.int32).reshape(-1, 1, 2)
 91 |             color = tuple(map(int, color))
 92 |             img = cv2.polylines(img, [pts], True, color, linewidth)
 93 |             pt = (pts[0, 0, 0], pts[0, 0, 1]-5)
 94 |             if name:
 95 |                 img = cv2.putText(img, name, pt, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1)
 96 |         elif len(roi) == 4:
 97 |             if not np.isnan(roi[0]):
 98 |                 roi = list(map(int, roi))
 99 |                 color = tuple(map(int, color))
100 |                 img = cv2.rectangle(img, (roi[0], roi[1]), (roi[0]+roi[2], roi[1]+roi[3]),
101 |                          color, linewidth)
102 |                 if name:
103 |                     img = cv2.putText(img, name, (roi[0], roi[1]-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1)
104 |         return img
105 | 
106 |     def show(self, pred_trajs={}, linewidth=2, show_name=False):
107 |         """
108 |             pred_trajs: dict of pred_traj, {'tracker_name': list of traj}
109 |                         pred_traj should contain polygon or rectangle(x, y, width, height)
110 |             linewith: line width of the bbox
111 |         """
112 |         assert self.imgs is not None
113 |         video = []
114 |         cv2.namedWindow(self.name, cv2.WINDOW_NORMAL)
115 |         colors = {}
116 |         if len(pred_trajs) == 0 and len(self.pred_trajs) > 0:
117 |             pred_trajs = self.pred_trajs
118 |         for i, (roi, img) in enumerate(zip(self.gt_traj,
119 |                 self.imgs[self.start_frame:self.end_frame+1])):
120 |             img = img.copy()
121 |             if len(img.shape) == 2:
122 |                 img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
123 |             else:
124 |                 img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
125 |             img = self.draw_box(roi, img, linewidth, (0, 255, 0),
126 |                     'gt' if show_name else None)
127 |             for name, trajs in pred_trajs.items():
128 |                 if name not in colors:
129 |                     color = tuple(np.random.randint(0, 256, 3))
130 |                     colors[name] = color
131 |                 else:
132 |                     color = colors[name]
133 |                 img = self.draw_box(trajs[0][i], img, linewidth, color,
134 |                         name if show_name else None)
135 |             cv2.putText(img, str(i+self.start_frame), (5, 20),
136 |                     cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 0), 2)
137 |             cv2.imshow(self.name, img)
138 |             cv2.waitKey(40)
139 |             video.append(img.copy())
140 |         return video
141 | 


--------------------------------------------------------------------------------
/toolkit/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .ar_benchmark import AccuracyRobustnessBenchmark
2 | from .eao_benchmark import EAOBenchmark
3 | from .ope_benchmark import OPEBenchmark
4 | from .f1_benchmark import F1Benchmark
5 | 


--------------------------------------------------------------------------------
/toolkit/evaluation/ar_benchmark.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     @author
  3 | """
  4 | 
  5 | import warnings
  6 | import itertools
  7 | import numpy as np
  8 | 
  9 | from colorama import Style, Fore
 10 | from ..utils import calculate_failures, calculate_accuracy
 11 | 
 12 | class AccuracyRobustnessBenchmark:
 13 |     """
 14 |     Args:
 15 |         dataset:
 16 |         burnin:
 17 |     """
 18 |     def __init__(self, dataset, burnin=10):
 19 |         self.dataset = dataset
 20 |         self.burnin = burnin
 21 | 
 22 |     def eval(self, eval_trackers=None):
 23 |         """
 24 |         Args:
 25 |             eval_tags: list of tag
 26 |             eval_trackers: list of tracker name
 27 |         Returns:
 28 |             ret: dict of results
 29 |         """
 30 |         if eval_trackers is None:
 31 |             eval_trackers = self.dataset.tracker_names
 32 |         if isinstance(eval_trackers, str):
 33 |             eval_trackers = [eval_trackers]
 34 | 
 35 |         result = {}
 36 |         for tracker_name in eval_trackers:
 37 |             accuracy, failures = self._calculate_accuracy_robustness(tracker_name)
 38 |             result[tracker_name] = {'overlaps': accuracy,
 39 |                                     'failures': failures}
 40 |         return result
 41 | 
 42 |     def show_result(self, result, eao_result=None, show_video_level=False, helight_threshold=0.5):
 43 |         """pretty print result
 44 |         Args:
 45 |             result: returned dict from function eval
 46 |         """
 47 |         tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
 48 |         if eao_result is not None:
 49 |             header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|{:^7}|"
 50 |             header = header.format('Tracker Name',
 51 |                     'Accuracy', 'Robustness', 'Lost Number', 'EAO')
 52 |             formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|{:^7.3f}|"
 53 |         else:
 54 |             header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|"
 55 |             header = header.format('Tracker Name',
 56 |                     'Accuracy', 'Robustness', 'Lost Number')
 57 |             formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|"
 58 |         bar = '-'*len(header)
 59 |         print(bar)
 60 |         print(header)
 61 |         print(bar)
 62 |         if eao_result is not None:
 63 |             tracker_eao = sorted(eao_result.items(),
 64 |                                  key=lambda x:x[1]['all'],
 65 |                                  reverse=True)[:20]
 66 |             tracker_names = [x[0] for x in tracker_eao]
 67 |         else:
 68 |             tracker_names = list(result.keys())
 69 |         for tracker_name in tracker_names:
 70 |         # for tracker_name, ret in result.items():
 71 |             ret = result[tracker_name]
 72 |             overlaps = list(itertools.chain(*ret['overlaps'].values()))
 73 |             accuracy = np.nanmean(overlaps)
 74 |             length = sum([len(x) for x in ret['overlaps'].values()])
 75 |             failures = list(ret['failures'].values())
 76 |             lost_number = np.mean(np.sum(failures, axis=0))
 77 |             robustness = np.mean(np.sum(np.array(failures), axis=0) / length) * 100
 78 |             if eao_result is None:
 79 |                 print(formatter.format(tracker_name, accuracy, robustness, lost_number))
 80 |             else:
 81 |                 print(formatter.format(tracker_name, accuracy, robustness, lost_number, eao_result[tracker_name]['all']))
 82 |         print(bar)
 83 | 
 84 |         if show_video_level and len(result) < 10:
 85 |             print('\n\n')
 86 |             header1 = "|{:^14}|".format("Tracker name")
 87 |             header2 = "|{:^14}|".format("Video name")
 88 |             for tracker_name in result.keys():
 89 |                 header1 += ("{:^17}|").format(tracker_name)
 90 |                 header2 += "{:^8}|{:^8}|".format("Acc", "LN")
 91 |             print('-'*len(header1))
 92 |             print(header1)
 93 |             print('-'*len(header1))
 94 |             print(header2)
 95 |             print('-'*len(header1))
 96 |             videos = list(result[tracker_name]['overlaps'].keys())
 97 |             for video in videos:
 98 |                 row = "|{:^14}|".format(video)
 99 |                 for tracker_name in result.keys():
100 |                     overlaps = result[tracker_name]['overlaps'][video]
101 |                     accuracy = np.nanmean(overlaps)
102 |                     failures = result[tracker_name]['failures'][video]
103 |                     lost_number = np.mean(failures)
104 | 
105 |                     accuracy_str = "{:^8.3f}".format(accuracy)
106 |                     if accuracy < helight_threshold:
107 |                         row += f'{Fore.RED}{accuracy_str}{Style.RESET_ALL}|'
108 |                     else:
109 |                         row += accuracy_str+'|'
110 |                     lost_num_str = "{:^8.3f}".format(lost_number)
111 |                     if lost_number > 0:
112 |                         row += f'{Fore.RED}{lost_num_str}{Style.RESET_ALL}|'
113 |                     else:
114 |                         row += lost_num_str+'|'
115 |                 print(row)
116 |             print('-'*len(header1))
117 | 
118 |     def _calculate_accuracy_robustness(self, tracker_name):
119 |         overlaps = {}
120 |         failures = {}
121 |         all_length = {}
122 |         for i in range(len(self.dataset)):
123 |             video = self.dataset[i]
124 |             gt_traj = video.gt_traj
125 |             if tracker_name not in video.pred_trajs:
126 |                 tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
127 |             else:
128 |                 tracker_trajs = video.pred_trajs[tracker_name]
129 |             overlaps_group = []
130 |             num_failures_group = []
131 |             for tracker_traj in tracker_trajs:
132 |                 num_failures = calculate_failures(tracker_traj)[0]
133 |                 overlaps_ = calculate_accuracy(tracker_traj, gt_traj,
134 |                         burnin=10, bound=(video.width, video.height))[1]
135 |                 overlaps_group.append(overlaps_)
136 |                 num_failures_group.append(num_failures)
137 |             with warnings.catch_warnings():
138 |                 warnings.simplefilter("ignore", category=RuntimeWarning)
139 |                 overlaps[video.name] = np.nanmean(overlaps_group, axis=0).tolist()
140 |                 failures[video.name] = num_failures_group
141 |         return overlaps, failures
142 | 


--------------------------------------------------------------------------------
/toolkit/evaluation/f1_benchmark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | from glob import glob
  5 | from tqdm import tqdm
  6 | from colorama import Style, Fore
  7 | 
  8 | from ..utils import determine_thresholds, calculate_accuracy, calculate_f1
  9 | 
 10 | class F1Benchmark:
 11 |     def __init__(self, dataset):
 12 |         """
 13 |         Args:
 14 |             result_path:
 15 |         """
 16 |         self.dataset = dataset
 17 | 
 18 |     def eval(self, eval_trackers=None):
 19 |         """
 20 |         Args:
 21 |             eval_tags: list of tag
 22 |             eval_trackers: list of tracker name
 23 |         Returns:
 24 |             eao: dict of results
 25 |         """
 26 |         if eval_trackers is None:
 27 |             eval_trackers = self.dataset.tracker_names
 28 |         if isinstance(eval_trackers, str):
 29 |             eval_trackers = [eval_trackers]
 30 | 
 31 |         ret = {}
 32 |         for tracker_name in eval_trackers:
 33 |             precision, recall, f1 = self._cal_precision_reall(tracker_name)
 34 |             ret[tracker_name] = {"precision": precision,
 35 |                                  "recall": recall,
 36 |                                  "f1": f1
 37 |                                 }
 38 |         return ret
 39 | 
 40 |     def _cal_precision_reall(self, tracker_name):
 41 |         score = []
 42 |         # for i in range(len(self.dataset)):
 43 |         #     video = self.dataset[i]
 44 |         for video in self.dataset:
 45 |             if tracker_name not in video.confidence:
 46 |                 score += video.load_tracker(self.dataset.tracker_path, tracker_name, False)[1]
 47 |             else:
 48 |                 score += video.confidence[tracker_name]
 49 |         score = np.array(score)
 50 |         thresholds = determine_thresholds(score)[::-1]
 51 | 
 52 |         precision = {}
 53 |         recall = {}
 54 |         f1 = {}
 55 |         for i in range(len(self.dataset)):
 56 |             video = self.dataset[i]
 57 |             gt_traj = video.gt_traj
 58 |             N = sum([1 for x in gt_traj if len(x) > 1])
 59 |             if tracker_name not in video.pred_trajs:
 60 |                 tracker_traj, score = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
 61 |             else:
 62 |                 tracker_traj = video.pred_trajs[tracker_name]
 63 |                 score = video.confidence[tracker_name]
 64 |             overlaps = calculate_accuracy(tracker_traj, gt_traj, \
 65 |                     bound=(video.width,video.height))[1]
 66 |             f1[video.name], precision[video.name], recall[video.name] = \
 67 |                     calculate_f1(overlaps, score, (video.width,video.height),thresholds, N)
 68 |         return precision, recall, f1
 69 | 
 70 |     def show_result(self, result, show_video_level=False, helight_threshold=0.5):
 71 |         """pretty print result
 72 |         Args:
 73 |             result: returned dict from function eval
 74 |         """
 75 |         # sort tracker according to f1
 76 |         sorted_tracker = {}
 77 |         for tracker_name, ret in result.items():
 78 |             precision = np.mean(list(ret['precision'].values()), axis=0)
 79 |             recall = np.mean(list(ret['recall'].values()), axis=0)
 80 |             f1 = 2 * precision * recall / (precision + recall)
 81 |             max_idx = np.argmax(f1)
 82 |             sorted_tracker[tracker_name] = (precision[max_idx], recall[max_idx],
 83 |                     f1[max_idx])
 84 |         sorted_tracker_ = sorted(sorted_tracker.items(),
 85 |                                  key=lambda x:x[1][2],
 86 |                                  reverse=True)[:20]
 87 |         tracker_names = [x[0] for x in sorted_tracker_]
 88 | 
 89 |         tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
 90 |         header = "|{:^"+str(tracker_name_len)+"}|{:^11}|{:^8}|{:^7}|"
 91 |         header = header.format('Tracker Name',
 92 |                 'Precision', 'Recall', 'F1')
 93 |         bar = '-' * len(header)
 94 |         formatter = "|{:^"+str(tracker_name_len)+"}|{:^11.3f}|{:^8.3f}|{:^7.3f}|"
 95 |         print(bar)
 96 |         print(header)
 97 |         print(bar)
 98 |         # for tracker_name, ret in result.items():
 99 |         #     precision = np.mean(list(ret['precision'].values()), axis=0)
100 |         #     recall = np.mean(list(ret['recall'].values()), axis=0)
101 |         #     f1 = 2 * precision * recall / (precision + recall)
102 |         #     max_idx = np.argmax(f1)
103 |         for tracker_name in tracker_names:
104 |             precision = sorted_tracker[tracker_name][0]
105 |             recall = sorted_tracker[tracker_name][1]
106 |             f1 = sorted_tracker[tracker_name][2]
107 |             print(formatter.format(tracker_name, precision, recall, f1))
108 |         print(bar)
109 | 
110 |         if show_video_level and len(result) < 10:
111 |             print('\n\n')
112 |             header1 = "|{:^14}|".format("Tracker name")
113 |             header2 = "|{:^14}|".format("Video name")
114 |             for tracker_name in result.keys():
115 |                 # col_len = max(20, len(tracker_name))
116 |                 header1 += ("{:^28}|").format(tracker_name)
117 |                 header2 += "{:^11}|{:^8}|{:^7}|".format("Precision", "Recall", "F1")
118 |             print('-'*len(header1))
119 |             print(header1)
120 |             print('-'*len(header1))
121 |             print(header2)
122 |             print('-'*len(header1))
123 |             videos = list(result[tracker_name]['precision'].keys())
124 |             for video in videos:
125 |                 row = "|{:^14}|".format(video)
126 |                 for tracker_name in result.keys():
127 |                     precision = result[tracker_name]['precision'][video]
128 |                     recall = result[tracker_name]['recall'][video]
129 |                     f1 = result[tracker_name]['f1'][video]
130 |                     max_idx = np.argmax(f1)
131 |                     precision_str = "{:^11.3f}".format(precision[max_idx])
132 |                     if precision[max_idx] < helight_threshold:
133 |                         row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|'
134 |                     else:
135 |                         row += precision_str+'|'
136 |                     recall_str = "{:^8.3f}".format(recall[max_idx])
137 |                     if recall[max_idx] < helight_threshold:
138 |                         row += f'{Fore.RED}{recall_str}{Style.RESET_ALL}|'
139 |                     else:
140 |                         row += recall_str+'|'
141 |                     f1_str = "{:^7.3f}".format(f1[max_idx])
142 |                     if f1[max_idx] < helight_threshold:
143 |                         row += f'{Fore.RED}{f1_str}{Style.RESET_ALL}|'
144 |                     else:
145 |                         row += f1_str+'|'
146 |                 print(row)
147 |             print('-'*len(header1))
148 | 


--------------------------------------------------------------------------------
/toolkit/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import region
2 | from .statistics import *
3 | 


--------------------------------------------------------------------------------
/toolkit/utils/c_region.pxd:
--------------------------------------------------------------------------------
 1 | cdef extern from "src/region.h":
 2 |     ctypedef enum region_type "RegionType":
 3 |         EMTPY
 4 |         SPECIAL
 5 |         RECTANGEL
 6 |         POLYGON
 7 |         MASK
 8 | 
 9 |     ctypedef struct region_bounds:
10 |         float top
11 |         float bottom
12 |         float left
13 |         float right
14 | 
15 |     ctypedef struct region_rectangle:
16 |         float x
17 |         float y
18 |         float width
19 |         float height
20 | 
21 |     # ctypedef struct region_mask:
22 |     #     int x
23 |     #     int y
24 |     #     int width
25 |     #     int height
26 |     #     char *data
27 | 
28 |     ctypedef struct region_polygon:
29 |         int count
30 |         float *x
31 |         float *y
32 | 
33 |     ctypedef union region_container_data:
34 |         region_rectangle rectangle
35 |         region_polygon polygon
36 |         # region_mask mask
37 |         int special
38 | 
39 |     ctypedef struct region_container:
40 |         region_type type
41 |         region_container_data data
42 | 
43 |     # ctypedef struct region_overlap:
44 |     #     float overlap
45 |     #     float only1
46 |     #     float only2
47 | 
48 |     # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds)
49 | 
50 |     float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds)
51 | 


--------------------------------------------------------------------------------
/toolkit/utils/misc.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     @author fangyi.zhang@vipl.ict.ac.cn
 3 | """
 4 | import numpy as np
 5 | 
 6 | def determine_thresholds(confidence, resolution=100):
 7 |     """choose threshold according to confidence
 8 | 
 9 |     Args:
10 |         confidence: list or numpy array or numpy array
11 |         reolution: number of threshold to choose
12 | 
13 |     Restures:
14 |         threshold: numpy array
15 |     """
16 |     if isinstance(confidence, list):
17 |         confidence = np.array(confidence)
18 |     confidence = confidence.flatten()
19 |     confidence = confidence[~np.isnan(confidence)]
20 |     confidence.sort()
21 | 
22 |     assert len(confidence) > resolution and resolution > 2
23 | 
24 |     thresholds = np.ones((resolution))
25 |     thresholds[0] = - np.inf
26 |     thresholds[-1] = np.inf
27 |     delta = np.floor(len(confidence) / (resolution - 2))
28 |     idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32)
29 |     thresholds[1:-1] =  confidence[idxs]
30 |     return thresholds
31 | 


--------------------------------------------------------------------------------
/toolkit/utils/src/buffer.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef __STRING_BUFFER_H
  3 | #define __STRING_BUFFER_H
  4 | 
  5 | // Enable MinGW secure API for _snprintf_s
  6 | #define MINGW_HAS_SECURE_API 1
  7 | 
  8 | #ifdef _MSC_VER
  9 | #define __INLINE __inline
 10 | #else
 11 | #define __INLINE inline
 12 | #endif
 13 | 
 14 | #include <string.h>
 15 | #include <stdlib.h>
 16 | #include <stdarg.h>
 17 | 
 18 | typedef struct string_buffer {
 19 | 	char* buffer;
 20 | 	int position;
 21 | 	int size;
 22 | } string_buffer;
 23 | 
 24 | typedef struct string_list {
 25 | 	char** buffer;
 26 | 	int position;
 27 | 	int size;
 28 | } string_list;
 29 | 
 30 | #define BUFFER_INCREMENT_STEP 4096
 31 | 
 32 | static __INLINE string_buffer* buffer_create(int L) {
 33 | 	string_buffer* B = (string_buffer*) malloc(sizeof(string_buffer));
 34 | 	B->size = L;
 35 | 	B->buffer = (char*) malloc(sizeof(char) * B->size);
 36 | 	B->position = 0;
 37 | 	return B;
 38 | }
 39 | 
 40 | static __INLINE void buffer_reset(string_buffer* B) {
 41 | 	B->position = 0;
 42 | }
 43 | 
 44 | static __INLINE void buffer_destroy(string_buffer** B) {
 45 | 	if (!(*B)) return;
 46 | 	if ((*B)->buffer) {
 47 | 		free((*B)->buffer);
 48 | 		(*B)->buffer = NULL;
 49 | 	}
 50 | 	free((*B));
 51 | 	(*B) = NULL;
 52 | }
 53 | 
 54 | static __INLINE char* buffer_extract(const string_buffer* B) {
 55 | 	char *S = (char*) malloc(sizeof(char) * (B->position + 1));
 56 | 	memcpy(S, B->buffer, B->position);
 57 | 	S[B->position] = '\0';
 58 | 	return S;
 59 | }
 60 | 
 61 | static __INLINE int buffer_size(const string_buffer* B) {
 62 | 	return B->position;
 63 | }
 64 | 
 65 | static __INLINE void buffer_push(string_buffer* B, char C) {
 66 | 	int required = 1;
 67 | 	if (required > B->size - B->position) {
 68 | 		B->size = B->position + BUFFER_INCREMENT_STEP;
 69 | 		B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
 70 | 	}
 71 | 	B->buffer[B->position] = C;
 72 | 	B->position += required;
 73 | }
 74 | 
 75 | static __INLINE void buffer_append(string_buffer* B, const char *format, ...) {
 76 | 
 77 | 	int required;
 78 | 	va_list args;
 79 | 
 80 | #if defined(__OS2__) || defined(__WINDOWS__) || defined(WIN32) || defined(_MSC_VER)
 81 | 
 82 | 	va_start(args, format);
 83 | 	required = _vscprintf(format, args) + 1;
 84 | 	va_end(args);
 85 | 	if (required >= B->size - B->position) {
 86 | 		B->size = B->position + required + 1;
 87 | 		B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
 88 | 	}
 89 | 	va_start(args, format);
 90 | 	required = _vsnprintf_s(&(B->buffer[B->position]), B->size - B->position, _TRUNCATE, format, args);
 91 | 	va_end(args);
 92 | 	B->position += required;
 93 | 
 94 | #else
 95 | 	va_start(args, format);
 96 | 	required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args);
 97 | 	va_end(args);
 98 | 	if (required >= B->size - B->position) {
 99 | 		B->size = B->position + required + 1;
100 | 		B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
101 | 		va_start(args, format);
102 | 		required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args);
103 | 		va_end(args);
104 | 	}
105 | 	B->position += required;
106 | #endif
107 | 
108 | }
109 | 
110 | static __INLINE string_list* list_create(int L) {
111 | 	string_list* B = (string_list*) malloc(sizeof(string_list));
112 | 	B->size = L;
113 | 	B->buffer = (char**) malloc(sizeof(char*) * B->size);
114 | 	memset(B->buffer, 0, sizeof(char*) * B->size);
115 | 	B->position = 0;
116 | 	return B;
117 | }
118 | 
119 | static __INLINE void list_reset(string_list* B) {
120 | 	int i;
121 | 	for (i = 0; i < B->position; i++) {
122 | 		if (B->buffer[i]) free(B->buffer[i]);
123 | 		B->buffer[i] = NULL;
124 | 	}
125 | 	B->position = 0;
126 | }
127 | 
128 | static __INLINE void list_destroy(string_list **B) {
129 | 	int i;
130 | 
131 | 	if (!(*B)) return;
132 | 
133 | 	for (i = 0; i < (*B)->position; i++) {
134 | 		if ((*B)->buffer[i]) free((*B)->buffer[i]); (*B)->buffer[i] = NULL;
135 | 	}
136 | 
137 | 	if ((*B)->buffer) {
138 | 		free((*B)->buffer); (*B)->buffer = NULL;
139 | 	}
140 | 
141 | 	free((*B));
142 | 	(*B) = NULL;
143 | }
144 | 
145 | static __INLINE char* list_get(const string_list *B, int I) {
146 | 	if (I < 0 || I >= B->position) {
147 | 		return NULL;
148 | 	} else {
149 | 		if (!B->buffer[I]) {
150 | 			return NULL;
151 | 		} else {
152 | 			char *S;
153 | 			int length = strlen(B->buffer[I]);
154 | 			S = (char*) malloc(sizeof(char) * (length + 1));
155 | 			memcpy(S, B->buffer[I], length + 1);
156 | 			return S;
157 | 		}
158 | 	}
159 | }
160 | 
161 | static __INLINE int list_size(const string_list *B) {
162 | 	return B->position;
163 | }
164 | 
165 | static __INLINE void list_append(string_list *B, char* S) {
166 | 	int required = 1;
167 | 	int length = strlen(S);
168 | 	if (required > B->size - B->position) {
169 | 		B->size = B->position + 16;
170 | 		B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size);
171 | 	}
172 | 	B->buffer[B->position] = (char*) malloc(sizeof(char) * (length + 1));
173 | 	memcpy(B->buffer[B->position], S, length + 1);
174 | 	B->position += required;
175 | }
176 | 
177 | // This version of the append does not copy the string but simply takes the control of its allocation
178 | static __INLINE void list_append_direct(string_list *B, char* S) {
179 | 	int required = 1;
180 | 	// int length = strlen(S);
181 | 	if (required > B->size - B->position) {
182 | 		B->size = B->position + 16;
183 | 		B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size);
184 | 	}
185 | 	B->buffer[B->position] = S;
186 | 	B->position += required;
187 | }
188 | 
189 | 
190 | #endif
191 | 


--------------------------------------------------------------------------------
/toolkit/utils/src/region.h:
--------------------------------------------------------------------------------
  1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */
  2 | 
  3 | #ifndef _REGION_H_
  4 | #define _REGION_H_
  5 | 
  6 | #ifdef TRAX_STATIC_DEFINE
  7 | #  define __TRAX_EXPORT
  8 | #else
  9 | #  ifndef __TRAX_EXPORT
 10 | #    if defined(_MSC_VER)
 11 | #      ifdef trax_EXPORTS
 12 |          /* We are building this library */
 13 | #        define __TRAX_EXPORT __declspec(dllexport)
 14 | #      else
 15 |          /* We are using this library */
 16 | #        define __TRAX_EXPORT __declspec(dllimport)
 17 | #      endif
 18 | #    elif defined(__GNUC__)
 19 | #      ifdef trax_EXPORTS
 20 |          /* We are building this library */
 21 | #        define __TRAX_EXPORT __attribute__((visibility("default")))
 22 | #      else
 23 |          /* We are using this library */
 24 | #        define __TRAX_EXPORT __attribute__((visibility("default")))
 25 | #      endif
 26 | #    endif
 27 | #  endif
 28 | #endif
 29 | 
 30 | #ifndef MAX
 31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b))
 32 | #endif
 33 | 
 34 | #ifndef MIN
 35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b))
 36 | #endif
 37 | 
 38 | #define TRAX_DEFAULT_CODE 0
 39 | 
 40 | #define REGION_LEGACY_RASTERIZATION 1
 41 | 
 42 | #ifdef __cplusplus
 43 | extern "C" {
 44 | #endif
 45 | 
 46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type;
 47 | 
 48 | typedef struct region_bounds {
 49 | 
 50 | 	float top;
 51 | 	float bottom;
 52 | 	float left;
 53 | 	float right;
 54 | 
 55 | } region_bounds;
 56 | 
 57 | typedef struct region_polygon {
 58 | 
 59 | 	int count;
 60 | 
 61 | 	float* x;
 62 | 	float* y;
 63 | 
 64 | } region_polygon;
 65 | 
 66 | typedef struct region_mask {
 67 | 
 68 |     int x;
 69 |     int y;
 70 | 
 71 |     int width;
 72 |     int height;
 73 | 
 74 |     char* data;
 75 | 
 76 | } region_mask;
 77 | 
 78 | typedef struct region_rectangle {
 79 | 
 80 |     float x;
 81 |     float y;
 82 |     float width;
 83 |     float height;
 84 | 
 85 | } region_rectangle;
 86 | 
 87 | typedef struct region_container {
 88 |     enum region_type type;
 89 |     union {
 90 |         region_rectangle rectangle;
 91 |         region_polygon polygon;
 92 |         region_mask mask;
 93 |         int special;
 94 |     } data;
 95 | } region_container;
 96 | 
 97 | typedef struct region_overlap {
 98 | 
 99 | 	float overlap;    
100 |     float only1;
101 |     float only2;
102 | 
103 | } region_overlap;
104 | 
105 | extern const region_bounds region_no_bounds; 
106 | 
107 | __TRAX_EXPORT int region_set_flags(int mask);
108 | 
109 | __TRAX_EXPORT int region_clear_flags(int mask);
110 | 
111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds);
112 | 
113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds);
114 | 
115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom);
116 | 
117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region);
118 | 
119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region);
120 | 
121 | __TRAX_EXPORT char* region_string(region_container* region);
122 | 
123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region);
124 | 
125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type);
126 | 
127 | __TRAX_EXPORT void region_release(region_container** region);
128 | 
129 | __TRAX_EXPORT region_container* region_create_special(int code);
130 | 
131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height);
132 | 
133 | __TRAX_EXPORT region_container* region_create_polygon(int count);
134 | 
135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y);
136 | 
137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height);
138 | 
139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height);
140 | 
141 | #ifdef __cplusplus
142 | }
143 | #endif
144 | 
145 | #endif
146 | 


--------------------------------------------------------------------------------
/toolkit/utils/statistics.py:
--------------------------------------------------------------------------------
  1 | """
  2 |     @author fangyi.zhang@vipl.ict.ac.cn
  3 | """
  4 | import numpy as np
  5 | from . import region
  6 | 
  7 | def calculate_failures(trajectory):
  8 |     """ Calculate number of failures
  9 |     Args:
 10 |         trajectory: list of bbox
 11 |     Returns:
 12 |         num_failures: number of failures
 13 |         failures: failures point in trajectory, start with 0
 14 |     """
 15 |     failures = [i for i, x in zip(range(len(trajectory)), trajectory)
 16 |             if len(x) == 1 and x[0] == 2]
 17 |     num_failures = len(failures)
 18 |     return num_failures, failures
 19 | 
 20 | def calculate_accuracy(pred_trajectory, gt_trajectory,
 21 |         burnin=0, ignore_unknown=True, bound=None):
 22 |     """Caculate accuracy socre as average overlap over the entire sequence
 23 |     Args:
 24 |         trajectory: list of bbox
 25 |         gt_trajectory: list of bbox
 26 |         burnin: number of frames that have to be ignored after the failure
 27 |         ignore_unknown: ignore frames where the overlap is unknown
 28 |         bound: bounding region
 29 |     Return:
 30 |         acc: average overlap
 31 |         overlaps: per frame overlaps
 32 |     """
 33 |     pred_trajectory_ = pred_trajectory
 34 |     if not ignore_unknown:
 35 |         unkown = [len(x)==1 and x[0] == 0 for x in pred_trajectory]
 36 |     
 37 |     if burnin > 0:
 38 |         pred_trajectory_ = pred_trajectory[:]
 39 |         mask = [len(x)==1 and x[0] == 1 for x in pred_trajectory]
 40 |         for i in range(len(mask)):
 41 |             if mask[i]:
 42 |                 for j in range(burnin):
 43 |                     if i + j < len(mask):
 44 |                         pred_trajectory_[i+j] = [0]
 45 |     min_len = min(len(pred_trajectory_), len(gt_trajectory))
 46 |     overlaps = region.vot_overlap_traj(pred_trajectory_[:min_len],
 47 |             gt_trajectory[:min_len], bound)
 48 | 
 49 |     if not ignore_unknown:
 50 |         overlaps = [u if u else 0 for u in unkown]
 51 | 
 52 |     acc = 0
 53 |     if len(overlaps) > 0:
 54 |         acc = np.nanmean(overlaps)
 55 |     return acc, overlaps
 56 | 
 57 | # def caculate_expected_overlap(pred_trajectorys, gt_trajectorys, skip_init, traj_length=None,
 58 | #         weights=None, tags=['all']):
 59 | #     """ Caculate expected overlap
 60 | #     Args:
 61 | #         pred_trajectory: list of bbox
 62 | #         gt_trajectory: list of bbox
 63 | #         traj_length: a list of sequence length for which the overlap should be evaluated
 64 | #         weights: a list of per-sequence weights that indicate how much does each sequence
 65 | #                 contribute to the estimate
 66 | #         tags:  set list of tags for which to perform calculation
 67 | #     """
 68 | #     overlaps = [calculate_accuracy(pred, gt)[1]
 69 | #             for pred, gt in zip(pred_trajectorys, gt_trajectorys)]
 70 | #     failures = [calculate_accuracy(pred, gt)[1]
 71 | #             for pred, gt in zip(pred_trajectorys, gt_trajectorys)]
 72 | # 
 73 | #     if traj_length is None:
 74 | #         traj_length = range(1, max([len(x) for x in gt_trajectorys])+1)
 75 | #     traj_length = list(set(traj_length))
 76 | 
 77 | def overlap_ratio(rect1, rect2):
 78 |     '''Compute overlap ratio between two rects
 79 |     Args
 80 |         rect:2d array of N x [x,y,w,h]
 81 |     Return:
 82 |         iou
 83 |     '''
 84 |     # if rect1.ndim==1:
 85 |     #     rect1 = rect1[np.newaxis, :]
 86 |     # if rect2.ndim==1:
 87 |     #     rect2 = rect2[np.newaxis, :]
 88 |     left = np.maximum(rect1[:,0], rect2[:,0])
 89 |     right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
 90 |     top = np.maximum(rect1[:,1], rect2[:,1])
 91 |     bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
 92 | 
 93 |     intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
 94 |     union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
 95 |     iou = intersect / union
 96 |     iou = np.maximum(np.minimum(1, iou), 0)
 97 |     return iou
 98 | 
 99 | def success_overlap(gt_bb, result_bb, n_frame):
100 |     thresholds_overlap = np.arange(0, 1.05, 0.05)
101 |     success = np.zeros(len(thresholds_overlap))
102 |     iou = np.ones(len(gt_bb)) * (-1)
103 |     # mask = np.sum(gt_bb > 0, axis=1) == 4 #TODO check all dataset
104 |     mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2
105 |     iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask])
106 |     for i in range(len(thresholds_overlap)):
107 |         success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame)
108 |     return success
109 | 
110 | def success_error(gt_center, result_center, thresholds, n_frame):
111 |     # n_frame = len(gt_center)
112 |     success = np.zeros(len(thresholds))
113 |     dist = np.ones(len(gt_center)) * (-1)
114 |     mask = np.sum(gt_center > 0, axis=1) == 2
115 |     dist[mask] = np.sqrt(np.sum(
116 |         np.power(gt_center[mask] - result_center[mask], 2), axis=1))
117 |     for i in range(len(thresholds)):
118 |         success[i] = np.sum(dist <= thresholds[i]) / float(n_frame)
119 |     return success
120 | 
121 | def determine_thresholds(scores, resolution=100):
122 |     """
123 |     Args:
124 |         scores: 1d array of score
125 |     """
126 |     scores = np.sort(scores[np.logical_not(np.isnan(scores))])
127 |     delta = np.floor(len(scores) / (resolution - 2))
128 |     idxs = np.floor(np.linspace(delta-1, len(scores)-delta, resolution-2)+0.5).astype(np.int32)
129 |     thresholds = np.zeros((resolution))
130 |     thresholds[0] = - np.inf
131 |     thresholds[-1] = np.inf
132 |     thresholds[1:-1] = scores[idxs]
133 |     return thresholds
134 | 
135 | def calculate_f1(overlaps, score, bound, thresholds, N):
136 |     overlaps = np.array(overlaps)
137 |     overlaps[np.isnan(overlaps)] = 0
138 |     score = np.array(score)
139 |     score[np.isnan(score)] = 0
140 |     precision = np.zeros(len(thresholds))
141 |     recall = np.zeros(len(thresholds))
142 |     for i, th in enumerate(thresholds):
143 |         if th == - np.inf:
144 |             idx = score > 0
145 |         else:
146 |             idx = score >= th
147 |         if np.sum(idx) == 0:
148 |             precision[i] = 1
149 |             recall[i] = 0
150 |         else:
151 |             precision[i] = np.mean(overlaps[idx])
152 |             recall[i] = np.sum(overlaps[idx]) / N
153 |     f1 = 2 * precision * recall / (precision + recall)
154 |     return f1, precision, recall
155 | 
156 | def calculate_expected_overlap(fragments, fweights):
157 |     max_len = fragments.shape[1]
158 |     expected_overlaps = np.zeros((max_len), np.float32)
159 |     expected_overlaps[0] = 1
160 | 
161 |     # TODO Speed Up 
162 |     for i in range(1, max_len):
163 |         mask = np.logical_not(np.isnan(fragments[:, i]))
164 |         if np.any(mask):
165 |             fragment = fragments[mask, 1:i+1]
166 |             seq_mean = np.sum(fragment, 1) / fragment.shape[1]
167 |             expected_overlaps[i] = np.sum(seq_mean *
168 |                 fweights[mask]) / np.sum(fweights[mask])
169 |     return expected_overlaps
170 | 


--------------------------------------------------------------------------------
/toolkit/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .draw_f1 import draw_f1
2 | from .draw_success_precision import draw_success_precision
3 | from .draw_eao import draw_eao
4 | 


--------------------------------------------------------------------------------
/toolkit/visualization/draw_eao.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | import pickle
 4 | 
 5 | from matplotlib import rc
 6 | from .draw_utils import COLOR, MARKER_STYLE
 7 | 
 8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
 9 | rc('text', usetex=True)
10 | 
11 | def draw_eao(result):
12 |     fig = plt.figure()
13 |     ax = fig.add_subplot(111, projection='polar')
14 |     angles = np.linspace(0, 2*np.pi, 8, endpoint=True)
15 | 
16 |     attr2value = []
17 |     for i, (tracker_name, ret) in enumerate(result.items()):
18 |         value = list(ret.values())
19 |         attr2value.append(value)
20 |         value.append(value[0])
21 |     attr2value = np.array(attr2value)
22 |     max_value = np.max(attr2value, axis=0)
23 |     min_value = np.min(attr2value, axis=0)
24 |     for i, (tracker_name, ret) in enumerate(result.items()):
25 |         value = list(ret.values())
26 |         value.append(value[0])
27 |         value = np.array(value)
28 |         value *= (1 / max_value)
29 |         plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i],
30 |                 label=tracker_name, linewidth=1.5, markersize=6)
31 | 
32 |     attrs = ["Overall", "Camera motion",
33 |              "Illumination change","Motion Change",
34 |              "Size change","Occlusion",
35 |              "Unassigned"]
36 |     attr_value = []
37 |     for attr, maxv, minv in zip(attrs, max_value, min_value):
38 |         attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv))
39 |     ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value)
40 |     ax.spines['polar'].set_visible(False)
41 |     ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5)
42 |     ax.grid(b=False)
43 |     ax.set_ylim(0, 1.18)
44 |     ax.set_yticks([])
45 |     plt.show()
46 | 
47 | if __name__ == '__main__':
48 |     result = pickle.load(open("../../result.pkl", 'rb'))
49 |     draw_eao(result)
50 | 


--------------------------------------------------------------------------------
/toolkit/visualization/draw_f1.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import numpy as np
 3 | 
 4 | from matplotlib import rc
 5 | from .draw_utils import COLOR, LINE_STYLE
 6 | 
 7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
 8 | rc('text', usetex=True)
 9 | 
10 | def draw_f1(result, bold_name=None):
11 |     # drawing f1 contour
12 |     fig, ax = plt.subplots()
13 |     for f1 in np.arange(0.1, 1, 0.1):
14 |         recall = np.arange(f1, 1+0.01, 0.01)
15 |         precision = f1 * recall / (2 * recall - f1)
16 |         ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5)
17 |         ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5)
18 |     ax.grid(b=True)
19 |     ax.set_aspect(1)
20 |     plt.xlabel('Recall')
21 |     plt.ylabel('Precision')
22 |     plt.axis([0, 1, 0, 1])
23 |     plt.title(r'\textbf{VOT2018-LT Precision vs Recall}')
24 | 
25 |     # draw result line
26 |     all_precision = {}
27 |     all_recall = {}
28 |     best_f1 = {}
29 |     best_idx = {}
30 |     for tracker_name, ret in result.items():
31 |         precision = np.mean(list(ret['precision'].values()), axis=0)
32 |         recall = np.mean(list(ret['recall'].values()), axis=0)
33 |         f1 = 2 * precision * recall / (precision + recall)
34 |         max_idx = np.argmax(f1)
35 |         all_precision[tracker_name] = precision
36 |         all_recall[tracker_name] = recall
37 |         best_f1[tracker_name] = f1[max_idx]
38 |         best_idx[tracker_name] = max_idx
39 | 
40 |     for idx, (tracker_name, best_f1) in \
41 |             enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)):
42 |         if tracker_name == bold_name:
43 |             label = r"\textbf{[%.3f] Ours}" % (best_f1)
44 |         else:
45 |             label = "[%.3f] " % (best_f1) + tracker_name
46 |         recall = all_recall[tracker_name][:-1]
47 |         precision = all_precision[tracker_name][:-1]
48 |         ax.plot(recall, precision, color=COLOR[idx], linestyle='-',
49 |                 label=label)
50 |         f1_idx = best_idx[tracker_name]
51 |         ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o',
52 |                 markerfacecolor=COLOR[idx], markersize=5)
53 |     ax.legend(loc='lower right', labelspacing=0.2)
54 |     plt.xticks(np.arange(0, 1+0.1, 0.1))
55 |     plt.yticks(np.arange(0, 1+0.1, 0.1))
56 |     plt.show()
57 | 
58 | if __name__ == '__main__':
59 |     draw_f1(None)
60 | 


--------------------------------------------------------------------------------
/toolkit/visualization/draw_success_precision.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | 
  4 | from .draw_utils import COLOR, LINE_STYLE
  5 | 
  6 | def draw_success_precision(success_ret, name, videos, attr, precision_ret=None,
  7 |         norm_precision_ret=None, bold_name=None, axis=[0, 1]):
  8 |     # success plot
  9 |     fig, ax = plt.subplots()
 10 |     ax.grid(b=True)
 11 |     ax.set_aspect(1)
 12 |     plt.xlabel('Overlap threshold')
 13 |     plt.ylabel('Success rate')
 14 |     if attr == 'ALL':
 15 |         plt.title(r'\textbf{Success plots of OPE on %s}' % (name))
 16 |     else:
 17 |         plt.title(r'\textbf{Success plots of OPE - %s}' % (attr))
 18 |     plt.axis([0, 1]+axis)
 19 |     success = {}
 20 |     thresholds = np.arange(0, 1.05, 0.05)
 21 |     for tracker_name in success_ret.keys():
 22 |         value = [v for k, v in success_ret[tracker_name].items() if k in videos]
 23 |         success[tracker_name] = np.mean(value)
 24 |     for idx, (tracker_name, auc) in  \
 25 |             enumerate(sorted(success.items(), key=lambda x:x[1], reverse=True)):
 26 |         if tracker_name == bold_name:
 27 |             label = r"\textbf{[%.3f] %s}" % (auc, tracker_name)
 28 |         else:
 29 |             label = "[%.3f] " % (auc) + tracker_name
 30 |         value = [v for k, v in success_ret[tracker_name].items() if k in videos]
 31 |         plt.plot(thresholds, np.mean(value, axis=0),
 32 |                 color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
 33 |     ax.legend(loc='lower left', labelspacing=0.2)
 34 |     ax.autoscale(enable=True, axis='both', tight=True)
 35 |     xmin, xmax, ymin, ymax = plt.axis()
 36 |     ax.autoscale(enable=False)
 37 |     ymax += 0.03
 38 |     plt.axis([xmin, xmax, ymin, ymax])
 39 |     plt.xticks(np.arange(xmin, xmax+0.01, 0.1))
 40 |     plt.yticks(np.arange(ymin, ymax, 0.1))
 41 |     ax.set_aspect((xmax - xmin)/(ymax-ymin))
 42 |     plt.show()
 43 | 
 44 |     if precision_ret:
 45 |         # norm precision plot
 46 |         fig, ax = plt.subplots()
 47 |         ax.grid(b=True)
 48 |         ax.set_aspect(50)
 49 |         plt.xlabel('Location error threshold')
 50 |         plt.ylabel('Precision')
 51 |         if attr == 'ALL':
 52 |             plt.title(r'\textbf{Precision plots of OPE on %s}' % (name))
 53 |         else:
 54 |             plt.title(r'\textbf{Precision plots of OPE - %s}' % (attr))
 55 |         plt.axis([0, 50]+axis)
 56 |         precision = {}
 57 |         thresholds = np.arange(0, 51, 1)
 58 |         for tracker_name in precision_ret.keys():
 59 |             value = [v for k, v in precision_ret[tracker_name].items() if k in videos]
 60 |             precision[tracker_name] = np.mean(value, axis=0)[20]
 61 |         for idx, (tracker_name, pre) in \
 62 |                 enumerate(sorted(precision.items(), key=lambda x:x[1], reverse=True)):
 63 |             if tracker_name == bold_name:
 64 |                 label = r"\textbf{[%.3f] %s}" % (pre, tracker_name)
 65 |             else:
 66 |                 label = "[%.3f] " % (pre) + tracker_name
 67 |             value = [v for k, v in precision_ret[tracker_name].items() if k in videos]
 68 |             plt.plot(thresholds, np.mean(value, axis=0),
 69 |                     color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
 70 |         ax.legend(loc='lower right', labelspacing=0.2)
 71 |         ax.autoscale(enable=True, axis='both', tight=True)
 72 |         xmin, xmax, ymin, ymax = plt.axis()
 73 |         ax.autoscale(enable=False)
 74 |         ymax += 0.03
 75 |         plt.axis([xmin, xmax, ymin, ymax])
 76 |         plt.xticks(np.arange(xmin, xmax+0.01, 5))
 77 |         plt.yticks(np.arange(ymin, ymax, 0.1))
 78 |         ax.set_aspect((xmax - xmin)/(ymax-ymin))
 79 |         plt.show()
 80 | 
 81 |     # norm precision plot
 82 |     if norm_precision_ret:
 83 |         fig, ax = plt.subplots()
 84 |         ax.grid(b=True)
 85 |         plt.xlabel('Location error threshold')
 86 |         plt.ylabel('Precision')
 87 |         if attr == 'ALL':
 88 |             plt.title(r'\textbf{Normalized Precision plots of OPE on %s}' % (name))
 89 |         else:
 90 |             plt.title(r'\textbf{Normalized Precision plots of OPE - %s}' % (attr))
 91 |         norm_precision = {}
 92 |         thresholds = np.arange(0, 51, 1) / 100
 93 |         for tracker_name in precision_ret.keys():
 94 |             value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos]
 95 |             norm_precision[tracker_name] = np.mean(value, axis=0)[20]
 96 |         for idx, (tracker_name, pre) in \
 97 |                 enumerate(sorted(norm_precision.items(), key=lambda x:x[1], reverse=True)):
 98 |             if tracker_name == bold_name:
 99 |                 label = r"\textbf{[%.3f] %s}" % (pre, tracker_name)
100 |             else:
101 |                 label = "[%.3f] " % (pre) + tracker_name
102 |             value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos]
103 |             plt.plot(thresholds, np.mean(value, axis=0),
104 |                     color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
105 |         ax.legend(loc='lower right', labelspacing=0.2)
106 |         ax.autoscale(enable=True, axis='both', tight=True)
107 |         xmin, xmax, ymin, ymax = plt.axis()
108 |         ax.autoscale(enable=False)
109 |         ymax += 0.03
110 |         plt.axis([xmin, xmax, ymin, ymax])
111 |         plt.xticks(np.arange(xmin, xmax+0.01, 0.05))
112 |         plt.yticks(np.arange(ymin, ymax, 0.1))
113 |         ax.set_aspect((xmax - xmin)/(ymax-ymin))
114 |         plt.show()
115 | 


--------------------------------------------------------------------------------
/toolkit/visualization/draw_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | COLOR = ((1, 0, 0),
 3 |          (0, 1, 0),
 4 |          (1, 0, 1),
 5 |          (1, 1, 0),
 6 |          (0  , 162/255, 232/255),
 7 |          (0.5, 0.5, 0.5),
 8 |          (0, 0, 1),
 9 |          (0, 1, 1),
10 |          (136/255, 0  , 21/255),
11 |          (255/255, 127/255, 39/255),
12 |          (0, 0, 0))
13 | 
14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-']
15 | 
16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.']
17 | 


--------------------------------------------------------------------------------