├── models
├── __init__.py
├── PreciseRoIPooling
│ ├── pytorch
│ │ ├── prroi_pool
│ │ │ ├── .gitignore
│ │ │ ├── __init__.py
│ │ │ ├── prroi_pool.py
│ │ │ ├── src
│ │ │ │ ├── prroi_pooling_gpu.h
│ │ │ │ ├── prroi_pooling_gpu_impl.cuh
│ │ │ │ └── prroi_pooling_gpu.c
│ │ │ └── functional.py
│ │ └── tests
│ │ │ └── test_prroi_pooling2d.py
│ ├── _assets
│ │ └── prroi_visualization.png
│ ├── LICENSE
│ ├── .gitignore
│ ├── src
│ │ └── prroi_pooling_gpu_impl.cuh
│ └── README.md
├── cornerdet
│ ├── __init__.py
│ └── cornerdet.py
├── attention
│ ├── __init__.py
│ └── attention.py
├── neck
│ ├── __init__.py
│ └── neck.py
├── backbone
│ └── __init__.py
└── siamese
│ ├── __init__.py
│ └── siamese.py
├── track
├── __init__.py
└── run_CGACD.py
├── train
└── __init__.py
├── toolkit
├── __init__.py
├── utils
│ ├── __init__.py
│ ├── setup.py
│ ├── misc.py
│ ├── c_region.pxd
│ ├── src
│ │ ├── region.h
│ │ └── buffer.h
│ ├── statistics.py
│ └── region.pyx
├── visualization
│ ├── __init__.py
│ ├── draw_utils.py
│ ├── draw_eao.py
│ ├── draw_f1.py
│ └── draw_success_precision.py
├── evaluation
│ ├── __init__.py
│ ├── ar_benchmark.py
│ ├── f1_benchmark.py
│ └── eao_benchmark.py
└── datasets
│ ├── dataset.py
│ ├── __init__.py
│ ├── uav.py
│ ├── got10k.py
│ ├── nfs.py
│ ├── trackingnet.py
│ ├── lasot.py
│ ├── otb.py
│ └── video.py
├── training_dataset
├── coco
│ ├── pycocotools
│ │ ├── __init__.py
│ │ ├── Makefile
│ │ ├── setup.py
│ │ ├── common
│ │ │ ├── maskApi.h
│ │ │ └── gason.h
│ │ └── mask.py
│ ├── visual.py
│ ├── gen_json_clean.py
│ └── par_crop.py
├── vid
│ ├── visual.py
│ ├── parse_vid.py
│ ├── gen_json_clean.py
│ └── par_crop.py
├── det
│ ├── visual.py
│ ├── gen_json_clean.py
│ └── par_crop.py
├── y2b
│ └── gen_json_clean.py
└── got10k
│ ├── gen_json_clean.py
│ └── par_crop.py
├── .gitignore
├── requirement.txt
├── train_cgacd_resnet.sh
├── test.sh
├── setup.py
├── utils
├── log_helper.py
├── misc.py
├── model_load.py
└── utils.py
├── experiments
├── cgacd_resnet
│ └── cgacd_resnet.yml
└── cgacd_resnet_otb
│ └── cgacd_resnet_otb.yml
├── README.md
└── tools
└── eval.py
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/track/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/toolkit/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/training_dataset/coco/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/prroi_pool/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | /_prroi_pooling
3 |
--------------------------------------------------------------------------------
/toolkit/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import region
2 | from .statistics import *
3 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/_assets/prroi_visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/feiaxyt/CGACD/HEAD/models/PreciseRoIPooling/_assets/prroi_visualization.png
--------------------------------------------------------------------------------
/toolkit/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .draw_f1 import draw_f1
2 | from .draw_success_precision import draw_success_precision
3 | from .draw_eao import draw_eao
4 |
--------------------------------------------------------------------------------
/toolkit/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .ar_benchmark import AccuracyRobustnessBenchmark
2 | from .eao_benchmark import EAOBenchmark
3 | from .ope_benchmark import OPEBenchmark
4 | from .f1_benchmark import F1Benchmark
5 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | dataset
2 | *.pyc
3 | checkpoint
4 | result
5 | hp_search_result
6 | runs
7 | *.o
8 | *.so
9 | checkpoint*
10 | *.model
11 | results
12 | reports
13 | .idea
14 | *.pth
15 | .ipynb_checkpoints
16 | raw_results
--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | pytorch==1.1.0
3 | torchvision==0.3.0
4 | opencv-python==3.4.3.18
5 | pyyaml
6 | yacs
7 | tqdm
8 | colorama
9 | matplotlib
10 | cython
11 | tensorboardX
12 | futures
13 | easydict
14 | numba
--------------------------------------------------------------------------------
/models/cornerdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .cornerdet import SepCornerDet
2 |
3 | CORNERDET = {
4 | 'SepCornerDet': SepCornerDet,
5 | }
6 |
7 |
8 | def get_cornerdet(name, **kwargs):
9 | return CORNERDET[name](**kwargs)
10 |
--------------------------------------------------------------------------------
/models/attention/__init__.py:
--------------------------------------------------------------------------------
1 | from .attention import PixelAttention
2 |
3 | ATTENTION = {
4 | 'PixelAttention': PixelAttention,
5 | }
6 |
7 |
8 | def get_attention(name, **kwargs):
9 | return ATTENTION[name](**kwargs)
10 |
--------------------------------------------------------------------------------
/train_cgacd_resnet.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=$PWD:$PYTHONPATH
2 |
3 | #CUDA_VISIBLE_DEVICES=1
4 | python train/train.py \
5 | --config=experiments/cgacd_resnet/cgacd_resnet.yml \
6 | -b 64 \
7 | -j 16 \
8 | --save_name cgacd_resnet
9 |
--------------------------------------------------------------------------------
/toolkit/utils/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Build import cythonize
4 |
5 | setup(
6 | ext_modules = cythonize([Extension("region", ["region.pyx", "src/region.c"])]),
7 | )
8 |
9 |
--------------------------------------------------------------------------------
/models/neck/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) SenseTime. All Rights Reserved.
2 |
3 | from .neck import AdjustLayer
4 |
5 |
6 | NECKS = {
7 | 'AdjustLayer': AdjustLayer
8 | }
9 |
10 |
11 | def get_neck(name, **kwargs):
12 | return NECKS[name](**kwargs)
13 |
--------------------------------------------------------------------------------
/models/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) SenseTime. All Rights Reserved.
2 |
3 | from .resnet import resnet18, resnet50
4 |
5 |
6 | BACKBONES = {
7 | 'resnet50': resnet50,
8 | }
9 |
10 |
11 | def get_backbone(name, **kwargs):
12 | return BACKBONES[name](**kwargs)
13 |
--------------------------------------------------------------------------------
/models/siamese/__init__.py:
--------------------------------------------------------------------------------
1 | from .siamese import UPChannelSiamese, DepthwiseSiamese
2 |
3 |
4 | def get_siamese(name, **kwargs):
5 | SIAMESE = {
6 | 'UPChannelSiamese': UPChannelSiamese,
7 | 'DepthwiseSiamese': DepthwiseSiamese
8 | }
9 | return SIAMESE[name](**kwargs)
10 |
--------------------------------------------------------------------------------
/training_dataset/coco/pycocotools/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | # install pycocotools locally
3 | python setup.py build_ext --inplace
4 | rm -rf build
5 |
6 | install:
7 | # install pycocotools to the Python site-packages
8 | python setup.py build_ext install
9 | rm -rf build
10 | clean:
11 | rm _mask.c _mask.cpython-36m-x86_64-linux-gnu.so
12 |
--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=$PWD:$PYTHONPATH
2 | tracker_name="CGACD_VOT"
3 | config_file="experiments/cgacd_resnet/cgacd_resnet.yml"
4 | START=11
5 | END=19
6 | for s in $(seq $START 1 $END)
7 | do
8 | python tools/test.py \
9 | --model "checkpoint/"$tracker_name"/checkpoint_epoch"$s".pth" \
10 | --config "config/"$config_file \
11 | --dataset "VOT2018" \
12 | --save_name $tracker_name"_"$s
13 | done
14 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/prroi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : __init__.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | from .prroi_pool import *
13 |
14 |
--------------------------------------------------------------------------------
/toolkit/visualization/draw_utils.py:
--------------------------------------------------------------------------------
1 |
2 | COLOR = ((1, 0, 0),
3 | (0, 1, 0),
4 | (1, 0, 1),
5 | (1, 1, 0),
6 | (0 , 162/255, 232/255),
7 | (0.5, 0.5, 0.5),
8 | (0, 0, 1),
9 | (0, 1, 1),
10 | (136/255, 0 , 21/255),
11 | (255/255, 127/255, 39/255),
12 | (0, 0, 0))
13 |
14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-']
15 |
16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.']
17 |
--------------------------------------------------------------------------------
/models/neck/neck.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) SenseTime. All Rights Reserved.
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 |
7 | class AdjustLayer(nn.Module):
8 | def __init__(self, in_channels, out_channels):
9 | super(AdjustLayer, self).__init__()
10 | self.downsample = nn.Sequential(
11 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
12 | nn.BatchNorm2d(out_channels),
13 | )
14 |
15 | def forward(self, x):
16 | x = self.downsample(x)
17 | return x
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Build import cythonize
4 |
5 | ext_modules = [
6 | Extension(
7 | name='toolkit.utils.region',
8 | sources=[
9 | 'toolkit/utils/region.pyx',
10 | 'toolkit/utils/src/region.c',
11 | ],
12 | include_dirs=[
13 | 'toolkit/utils/src'
14 | ]
15 | )
16 | ]
17 |
18 | setup(
19 | name='toolkit',
20 | packages=['toolkit'],
21 | ext_modules=cythonize(ext_modules)
22 | )
23 |
--------------------------------------------------------------------------------
/training_dataset/coco/pycocotools/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | from distutils.extension import Extension
4 | import numpy as np
5 |
6 | # To compile and install locally run "python setup.py build_ext --inplace"
7 | # To install library to Python site-packages run "python setup.py build_ext install"
8 |
9 | ext_modules = [
10 | Extension(
11 | '_mask',
12 | sources=['common/maskApi.c', '_mask.pyx'],
13 | include_dirs = [np.get_include(), 'common'],
14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 | )
16 | ]
17 |
18 | setup(name='pycocotools',
19 | packages=['pycocotools'],
20 | package_dir = {'pycocotools': '.'},
21 | version='2.0',
22 | ext_modules=
23 | cythonize(ext_modules)
24 | )
25 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : prroi_pool.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | import torch.nn as nn
13 |
14 | from .functional import prroi_pool2d
15 |
16 | __all__ = ['PrRoIPool2D']
17 |
18 |
19 | class PrRoIPool2D(nn.Module):
20 | def __init__(self, pooled_height, pooled_width, spatial_scale):
21 | super().__init__()
22 |
23 | self.pooled_height = int(pooled_height)
24 | self.pooled_width = int(pooled_width)
25 | self.spatial_scale = float(spatial_scale)
26 |
27 | def forward(self, features, rois):
28 | return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale)
29 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu.h
3 | * Author : Jiayuan Mao, Tete Xiao
4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
5 | * Date : 07/13/2018
6 | *
7 | * Distributed under terms of the MIT license.
8 | * Copyright (c) 2017 Megvii Technology Limited.
9 | */
10 |
11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale);
12 |
13 | int prroi_pooling_backward_cuda(
14 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
15 | int pooled_height, int pooled_width, float spatial_scale
16 | );
17 |
18 | int prroi_pooling_coor_backward_cuda(
19 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
20 | int pooled_height, int pooled_width, float spatial_scal
21 | );
22 |
23 |
--------------------------------------------------------------------------------
/toolkit/utils/misc.py:
--------------------------------------------------------------------------------
1 | """
2 | @author fangyi.zhang@vipl.ict.ac.cn
3 | """
4 | import numpy as np
5 |
6 | def determine_thresholds(confidence, resolution=100):
7 | """choose threshold according to confidence
8 |
9 | Args:
10 | confidence: list or numpy array or numpy array
11 | reolution: number of threshold to choose
12 |
13 | Restures:
14 | threshold: numpy array
15 | """
16 | if isinstance(confidence, list):
17 | confidence = np.array(confidence)
18 | confidence = confidence.flatten()
19 | confidence = confidence[~np.isnan(confidence)]
20 | confidence.sort()
21 |
22 | assert len(confidence) > resolution and resolution > 2
23 |
24 | thresholds = np.ones((resolution))
25 | thresholds[0] = - np.inf
26 | thresholds[-1] = np.inf
27 | delta = np.floor(len(confidence) / (resolution - 2))
28 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32)
29 | thresholds[1:-1] = confidence[idxs]
30 | return thresholds
31 |
--------------------------------------------------------------------------------
/utils/log_helper.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) SenseTime. All Rights Reserved.
2 |
3 | import os
4 | import logging
5 | import math
6 | import sys
7 |
8 | logs = set()
9 |
10 | def get_format(logger, level):
11 | format_str = '[%(asctime)s-%(filename)s#%(lineno)3d] %(message)s'
12 | formatter = logging.Formatter(format_str)
13 | return formatter
14 |
15 | def init_log(name, level=logging.INFO, format_func=get_format):
16 | if (name, level) in logs:
17 | return
18 | logs.add((name, level))
19 | logger = logging.getLogger(name)
20 | logger.setLevel(level)
21 | ch = logging.StreamHandler()
22 | ch.setLevel(level)
23 | formatter = format_func(logger, level)
24 | ch.setFormatter(formatter)
25 | logger.addHandler(ch)
26 | return logger
27 |
28 | def add_file_handler(name, log_file, level=logging.INFO):
29 | logger = logging.getLogger(name)
30 | fh = logging.FileHandler(log_file)
31 | fh.setFormatter(get_format(logger, level))
32 | logger.addHandler(fh)
33 |
34 | init_log('global')
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Jiayuan Mao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/training_dataset/coco/visual.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from pycocotools.coco import COCO
7 | import cv2
8 | import numpy as np
9 |
10 | color_bar = np.random.randint(0, 255, (90, 3))
11 |
12 | visual = True
13 |
14 | dataDir = '.'
15 | dataType = 'val2017'
16 | annFile = '{}/annotations/instances_{}.json'.format(dataDir,dataType)
17 | coco = COCO(annFile)
18 |
19 | for img_id in coco.imgs:
20 | img = coco.loadImgs(img_id)[0]
21 | annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
22 | anns = coco.loadAnns(annIds)
23 | im = cv2.imread('{}/{}/{}'.format(dataDir, dataType, img['file_name']))
24 | for ann in anns:
25 | rect = ann['bbox']
26 | c = ann['category_id']
27 | if visual:
28 | pt1 = (int(rect[0]), int(rect[1]))
29 | pt2 = (int(rect[0]+rect[2]-1), int(rect[1]+rect[3]-1))
30 | cv2.rectangle(im, pt1, pt2, color_bar[c-1], 3)
31 | cv2.imshow('img', im)
32 | cv2.waitKey(200)
33 | print('done')
34 |
35 |
--------------------------------------------------------------------------------
/toolkit/utils/c_region.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "src/region.h":
2 | ctypedef enum region_type "RegionType":
3 | EMTPY
4 | SPECIAL
5 | RECTANGEL
6 | POLYGON
7 | MASK
8 |
9 | ctypedef struct region_bounds:
10 | float top
11 | float bottom
12 | float left
13 | float right
14 |
15 | ctypedef struct region_rectangle:
16 | float x
17 | float y
18 | float width
19 | float height
20 |
21 | # ctypedef struct region_mask:
22 | # int x
23 | # int y
24 | # int width
25 | # int height
26 | # char *data
27 |
28 | ctypedef struct region_polygon:
29 | int count
30 | float *x
31 | float *y
32 |
33 | ctypedef union region_container_data:
34 | region_rectangle rectangle
35 | region_polygon polygon
36 | # region_mask mask
37 | int special
38 |
39 | ctypedef struct region_container:
40 | region_type type
41 | region_container_data data
42 |
43 | # ctypedef struct region_overlap:
44 | # float overlap
45 | # float only1
46 | # float only2
47 |
48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds)
49 |
50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds)
51 |
--------------------------------------------------------------------------------
/toolkit/datasets/dataset.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tqdm import tqdm
3 |
4 | class Dataset(object):
5 | def __init__(self, name, dataset_root):
6 | self.name = name
7 | self.dataset_root = dataset_root
8 | self.videos = None
9 |
10 | def __getitem__(self, idx):
11 | if isinstance(idx, str):
12 | return self.videos[idx]
13 | elif isinstance(idx, int):
14 | return self.videos[sorted(list(self.videos.keys()))[idx]]
15 |
16 | def __len__(self):
17 | return len(self.videos)
18 |
19 | def __iter__(self):
20 | keys = sorted(list(self.videos.keys()))
21 | for key in keys:
22 | yield self.videos[key]
23 |
24 | def set_tracker(self, path, tracker_names, ):
25 | """
26 | Args:
27 | path: path to tracker results,
28 | tracker_names: list of tracker name
29 | """
30 | self.tracker_path = path
31 | self.tracker_names = []
32 | seq_nums = len(self.videos)
33 | for tracker in tracker_names:
34 | t_path = os.path.join(path, tracker)
35 | if 'VOT' in self.name:
36 | t_path = os.path.join(path, tracker, 'baseline')
37 | seqs = os.listdir(t_path)
38 | if len(seqs) == seq_nums:
39 | self.tracker_names.append(tracker)
40 |
41 | # for video in tqdm(self.videos.values(),
42 | # desc='loading tacker result', ncols=100):
43 | # video.load_tracker(path, tracker_names)
44 |
--------------------------------------------------------------------------------
/utils/misc.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) SenseTime. All Rights Reserved.
2 | import os
3 |
4 | from colorama import Fore, Style
5 |
6 |
7 | __all__ = ['commit', 'describe']
8 |
9 |
10 | def _exec(cmd):
11 | f = os.popen(cmd, 'r', 1)
12 | return f.read().strip()
13 |
14 | def _em(s):
15 | return f'{s}***'
16 |
17 |
18 | def _describe(model, lines=None, spaces=0):
19 | head = " " * spaces
20 | for name, p in model.named_parameters():
21 | if '.' in name:
22 | continue
23 | if p.requires_grad:
24 | name = _em(name)
25 | line = "{head}- {name}".format(head=head, name=name)
26 | lines.append(line)
27 |
28 | for name, m in model.named_children():
29 | space_num = len(name) + spaces + 1
30 | #if m.training:
31 | # name = _em(name)
32 | line = "{head}.{name} ({type})".format(
33 | head=head,
34 | name=name,
35 | type=m.__class__.__name__)
36 | lines.append(line)
37 | _describe(m, lines, space_num)
38 |
39 |
40 | def commit():
41 | root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../'))
42 | cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root)
43 | commit = _exec(cmd)
44 | cmd = "cd {}; git log --oneline | head -n1".format(root)
45 | commit_log = _exec(cmd)
46 | return "commit : {}\n log : {}".format(commit, commit_log)
47 |
48 |
49 | def describe(net, name=None):
50 | num = 0
51 | lines = []
52 | if name is not None:
53 | lines.append(name)
54 | num = len(name)
55 | _describe(net, lines, num)
56 | return "\n".join(lines)
57 |
--------------------------------------------------------------------------------
/toolkit/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .vot import VOTDataset, VOTLTDataset
2 | from .otb import OTBDataset
3 | from .uav import UAVDataset
4 | from .lasot import LaSOTDataset
5 | from .nfs import NFSDataset
6 | from .trackingnet import TrackingNetDataset
7 | from .got10k import GOT10kDataset
8 |
9 | class DatasetFactory(object):
10 | @staticmethod
11 | def create_dataset(**kwargs):
12 | """
13 | Args:
14 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30',
15 | 'VOT2018', 'VOT2016', 'VOT2018-LT'
16 | dataset_root: dataset root
17 | load_img: wether to load image
18 | Return:
19 | dataset
20 | """
21 | assert 'name' in kwargs, "should provide dataset name"
22 | name = kwargs['name']
23 | if 'OTB' in name:
24 | dataset = OTBDataset(**kwargs)
25 | elif 'LaSOT' == name:
26 | dataset = LaSOTDataset(**kwargs)
27 | elif 'UAV' in name:
28 | dataset = UAVDataset(**kwargs)
29 | elif 'NFS' in name:
30 | dataset = NFSDataset(**kwargs)
31 | elif 'VOT2018' == name or 'VOT2016' == name or 'VOT2019' == name:
32 | dataset = VOTDataset(**kwargs)
33 | elif 'VOT2018-LT' == name:
34 | dataset = VOTLTDataset(**kwargs)
35 | elif 'TrackingNet' == name:
36 | dataset = TrackingNetDataset(**kwargs)
37 | elif 'GOT-10k' == name:
38 | dataset = GOT10kDataset(**kwargs)
39 | elif 'got10k_val' == name:
40 | dataset = GOT10kDataset(**kwargs)
41 | else:
42 | raise Exception("unknow dataset {}".format(kwargs['name']))
43 | return dataset
44 |
45 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : test_prroi_pooling2d.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 18/02/2018
6 | #
7 | # This file is part of Jacinle.
8 |
9 | import unittest
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | from prroi_pool import PrRoIPool2D
15 | from jactorch.utils.unittest import TorchTestCase
16 |
17 |
18 |
19 |
20 | class TestPrRoIPool2D(TorchTestCase):
21 | def test_forward(self):
22 | pool = PrRoIPool2D(7, 7, spatial_scale=0.5)
23 | features = torch.rand((4, 16, 24, 32)).cuda()
24 | rois = torch.tensor([
25 | [0, 0, 0, 14, 14],
26 | [1, 14, 14, 28, 28],
27 | ]).float().cuda()
28 |
29 | out = pool(features, rois)
30 | out_gold = F.avg_pool2d(features, kernel_size=2, stride=1)
31 |
32 | self.assertTensorClose(out, torch.stack((
33 | out_gold[0, :, :7, :7],
34 | out_gold[1, :, 7:14, 7:14],
35 | ), dim=0))
36 |
37 | def test_backward_shapeonly(self):
38 | pool = PrRoIPool2D(2, 2, spatial_scale=0.5)
39 |
40 | features = torch.rand((4, 2, 24, 32)).cuda()
41 | rois = torch.tensor([
42 | [0, 0, 0, 4, 4],
43 | [1, 14, 14, 18, 18],
44 | ]).float().cuda()
45 | features.requires_grad = rois.requires_grad = True
46 | out = pool(features, rois)
47 |
48 | loss = out.sum()
49 | loss.backward()
50 |
51 | self.assertTupleEqual(features.size(), features.grad.size())
52 | self.assertTupleEqual(rois.size(), rois.grad.size())
53 |
54 |
55 | if __name__ == '__main__':
56 | unittest.main()
57 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | .vim-template*
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | db.sqlite3
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 | env.bak/
93 | venv.bak/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu_impl.cuh
3 | * Author : Tete Xiao, Jiayuan Mao
4 | * Email : jasonhsiao97@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | * Copyright (c) 2017 Megvii Technology Limited.
8 | */
9 |
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 |
20 | void PrRoIPoolingForwardGpu(
21 | cudaStream_t stream,
22 | F_DEVPTR_IN bottom_data,
23 | F_DEVPTR_IN bottom_rois,
24 | F_DEVPTR_OUT top_data,
25 | const int channels_, const int height_, const int width_,
26 | const int pooled_height_, const int pooled_width_,
27 | const float spatial_scale_,
28 | const int top_count);
29 |
30 | void PrRoIPoolingBackwardGpu(
31 | cudaStream_t stream,
32 | F_DEVPTR_IN bottom_data,
33 | F_DEVPTR_IN bottom_rois,
34 | F_DEVPTR_IN top_data,
35 | F_DEVPTR_IN top_diff,
36 | F_DEVPTR_OUT bottom_diff,
37 | const int channels_, const int height_, const int width_,
38 | const int pooled_height_, const int pooled_width_,
39 | const float spatial_scale_,
40 | const int top_count, const int bottom_count);
41 |
42 | void PrRoIPoolingCoorBackwardGpu(
43 | cudaStream_t stream,
44 | F_DEVPTR_IN bottom_data,
45 | F_DEVPTR_IN bottom_rois,
46 | F_DEVPTR_IN top_data,
47 | F_DEVPTR_IN top_diff,
48 | F_DEVPTR_OUT bottom_diff,
49 | const int channels_, const int height_, const int width_,
50 | const int pooled_height_, const int pooled_width_,
51 | const float spatial_scale_,
52 | const int top_count, const int bottom_count);
53 |
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 |
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 |
60 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu_impl.cuh
3 | * Author : Tete Xiao, Jiayuan Mao
4 | * Email : jasonhsiao97@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | * Copyright (c) 2017 Megvii Technology Limited.
8 | */
9 |
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 |
20 | void PrRoIPoolingForwardGpu(
21 | cudaStream_t stream,
22 | F_DEVPTR_IN bottom_data,
23 | F_DEVPTR_IN bottom_rois,
24 | F_DEVPTR_OUT top_data,
25 | const int channels_, const int height_, const int width_,
26 | const int pooled_height_, const int pooled_width_,
27 | const float spatial_scale_,
28 | const int top_count);
29 |
30 | void PrRoIPoolingBackwardGpu(
31 | cudaStream_t stream,
32 | F_DEVPTR_IN bottom_data,
33 | F_DEVPTR_IN bottom_rois,
34 | F_DEVPTR_IN top_data,
35 | F_DEVPTR_IN top_diff,
36 | F_DEVPTR_OUT bottom_diff,
37 | const int channels_, const int height_, const int width_,
38 | const int pooled_height_, const int pooled_width_,
39 | const float spatial_scale_,
40 | const int top_count, const int bottom_count);
41 |
42 | void PrRoIPoolingCoorBackwardGpu(
43 | cudaStream_t stream,
44 | F_DEVPTR_IN bottom_data,
45 | F_DEVPTR_IN bottom_rois,
46 | F_DEVPTR_IN top_data,
47 | F_DEVPTR_IN top_diff,
48 | F_DEVPTR_OUT bottom_diff,
49 | const int channels_, const int height_, const int width_,
50 | const int pooled_height_, const int pooled_width_,
51 | const float spatial_scale_,
52 | const int top_count, const int bottom_count);
53 |
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 |
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 |
60 |
--------------------------------------------------------------------------------
/toolkit/visualization/draw_eao.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import pickle
4 |
5 | from matplotlib import rc
6 | from .draw_utils import COLOR, MARKER_STYLE
7 |
8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
9 | rc('text', usetex=True)
10 |
11 | def draw_eao(result):
12 | fig = plt.figure()
13 | ax = fig.add_subplot(111, projection='polar')
14 | angles = np.linspace(0, 2*np.pi, 8, endpoint=True)
15 |
16 | attr2value = []
17 | for i, (tracker_name, ret) in enumerate(result.items()):
18 | value = list(ret.values())
19 | attr2value.append(value)
20 | value.append(value[0])
21 | attr2value = np.array(attr2value)
22 | max_value = np.max(attr2value, axis=0)
23 | min_value = np.min(attr2value, axis=0)
24 | for i, (tracker_name, ret) in enumerate(result.items()):
25 | value = list(ret.values())
26 | value.append(value[0])
27 | value = np.array(value)
28 | value *= (1 / max_value)
29 | plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i],
30 | label=tracker_name, linewidth=1.5, markersize=6)
31 |
32 | attrs = ["Overall", "Camera motion",
33 | "Illumination change","Motion Change",
34 | "Size change","Occlusion",
35 | "Unassigned"]
36 | attr_value = []
37 | for attr, maxv, minv in zip(attrs, max_value, min_value):
38 | attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv))
39 | ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value)
40 | ax.spines['polar'].set_visible(False)
41 | ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5)
42 | ax.grid(b=False)
43 | ax.set_ylim(0, 1.18)
44 | ax.set_yticks([])
45 | plt.show()
46 |
47 | if __name__ == '__main__':
48 | result = pickle.load(open("../../result.pkl", 'rb'))
49 | draw_eao(result)
50 |
--------------------------------------------------------------------------------
/training_dataset/vid/visual.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join
7 | from os import listdir
8 | import cv2
9 | import numpy as np
10 | import glob
11 | import xml.etree.ElementTree as ET
12 |
13 | visual = False
14 | color_bar = np.random.randint(0, 255, (90, 3))
15 |
16 | VID_base_path = './ILSVRC2015'
17 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/')
18 | img_base_path = join(VID_base_path, 'Data/VID/train/')
19 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
20 | for sub_set in sub_sets:
21 | sub_set_base_path = join(ann_base_path, sub_set)
22 | videos = sorted(listdir(sub_set_base_path))
23 | for vi, video in enumerate(videos):
24 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
25 |
26 | video_base_path = join(sub_set_base_path, video)
27 | xmls = sorted(glob.glob(join(video_base_path, '*.xml')))
28 | for xml in xmls:
29 | f = dict()
30 | xmltree = ET.parse(xml)
31 | size = xmltree.findall('size')[0]
32 | frame_sz = [int(it.text) for it in size]
33 | objects = xmltree.findall('object')
34 | if visual:
35 | im = cv2.imread(xml.replace('xml', 'JPEG').replace('Annotations', 'Data'))
36 | for object_iter in objects:
37 | trackid = int(object_iter.find('trackid').text)
38 | bndbox = object_iter.find('bndbox')
39 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
40 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
41 | if visual:
42 | pt1 = (int(bbox[0]), int(bbox[1]))
43 | pt2 = (int(bbox[2]), int(bbox[3]))
44 | cv2.rectangle(im, pt1, pt2, color_bar[trackid], 3)
45 | if visual:
46 | cv2.imshow('img', im)
47 | cv2.waitKey(1)
48 |
49 | print('done!')
50 |
--------------------------------------------------------------------------------
/training_dataset/det/visual.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join
7 | from os import listdir
8 | import cv2
9 | import numpy as np
10 | import glob
11 | import xml.etree.ElementTree as ET
12 |
13 | visual = False
14 | color_bar = np.random.randint(0, 255, (90, 3))
15 |
16 | VID_base_path = './ILSVRC2015'
17 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/')
18 | img_base_path = join(VID_base_path, 'Data/DET/train/')
19 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i'})
20 | for sub_set in sub_sets:
21 | sub_set_base_path = join(ann_base_path, sub_set)
22 | class_names = sorted(listdir(sub_set_base_path))
23 | for vi, class_name in enumerate(class_names):
24 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(class_names)))
25 |
26 | class_base_path = join(sub_set_base_path, class_name)
27 | xmls = sorted(glob.glob(join(class_base_path, '*.xml')))
28 | for xml in xmls:
29 | f = dict()
30 | xmltree = ET.parse(xml)
31 | size = xmltree.findall('size')[0]
32 | frame_sz = [int(it.text) for it in size]
33 | objects = xmltree.findall('object')
34 | # if visual:
35 | img_path = xml.replace('xml', 'JPEG').replace('Annotations', 'Data')
36 | im = cv2.imread(img_path)
37 | for object_iter in objects:
38 | bndbox = object_iter.find('bndbox')
39 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
40 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
41 | if visual:
42 | pt1 = (int(bbox[0]), int(bbox[1]))
43 | pt2 = (int(bbox[2]), int(bbox[3]))
44 | cv2.rectangle(im, pt1, pt2, color_bar[vi], 3)
45 | if visual:
46 | cv2.imshow('img', im)
47 | cv2.waitKey(500)
48 |
49 | print('done!')
50 |
--------------------------------------------------------------------------------
/toolkit/datasets/uav.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | from tqdm import tqdm
5 | from glob import glob
6 |
7 | from .dataset import Dataset
8 | from .video import Video
9 |
10 | class UAVVideo(Video):
11 | """
12 | Args:
13 | name: video name
14 | root: dataset root
15 | video_dir: video directory
16 | init_rect: init rectangle
17 | img_names: image names
18 | gt_rect: groundtruth rectangle
19 | attr: attribute of video
20 | """
21 | def __init__(self, name, root, video_dir, init_rect, img_names,
22 | gt_rect, attr, load_img=False):
23 | super(UAVVideo, self).__init__(name, root, video_dir,
24 | init_rect, img_names, gt_rect, attr, load_img)
25 |
26 |
27 | class UAVDataset(Dataset):
28 | """
29 | Args:
30 | name: dataset name, should be 'UAV123', 'UAV20L'
31 | dataset_root: dataset root
32 | load_img: wether to load all imgs
33 | """
34 | def __init__(self, name, dataset_root, load_img=False):
35 | super(UAVDataset, self).__init__(name, dataset_root)
36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
37 | meta_data = json.load(f)
38 |
39 | # load videos
40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
41 | self.videos = {}
42 | for video in pbar:
43 | pbar.set_postfix_str(video)
44 | self.videos[video] = UAVVideo(video,
45 | dataset_root,
46 | meta_data[video]['video_dir'],
47 | meta_data[video]['init_rect'],
48 | meta_data[video]['img_names'],
49 | meta_data[video]['gt_rect'],
50 | meta_data[video]['attr'])
51 |
52 | # set attr
53 | attr = []
54 | for x in self.videos.values():
55 | attr += x.attr
56 | attr = set(attr)
57 | self.attr = {}
58 | self.attr['ALL'] = list(self.videos.keys())
59 | for x in attr:
60 | self.attr[x] = []
61 | for k, v in self.videos.items():
62 | for attr_ in v.attr:
63 | self.attr[attr_].append(k)
64 |
65 |
--------------------------------------------------------------------------------
/toolkit/visualization/draw_f1.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | from matplotlib import rc
5 | from .draw_utils import COLOR, LINE_STYLE
6 |
7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
8 | rc('text', usetex=True)
9 |
10 | def draw_f1(result, bold_name=None):
11 | # drawing f1 contour
12 | fig, ax = plt.subplots()
13 | for f1 in np.arange(0.1, 1, 0.1):
14 | recall = np.arange(f1, 1+0.01, 0.01)
15 | precision = f1 * recall / (2 * recall - f1)
16 | ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5)
17 | ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5)
18 | ax.grid(b=True)
19 | ax.set_aspect(1)
20 | plt.xlabel('Recall')
21 | plt.ylabel('Precision')
22 | plt.axis([0, 1, 0, 1])
23 | plt.title(r'\textbf{VOT2018-LT Precision vs Recall}')
24 |
25 | # draw result line
26 | all_precision = {}
27 | all_recall = {}
28 | best_f1 = {}
29 | best_idx = {}
30 | for tracker_name, ret in result.items():
31 | precision = np.mean(list(ret['precision'].values()), axis=0)
32 | recall = np.mean(list(ret['recall'].values()), axis=0)
33 | f1 = 2 * precision * recall / (precision + recall)
34 | max_idx = np.argmax(f1)
35 | all_precision[tracker_name] = precision
36 | all_recall[tracker_name] = recall
37 | best_f1[tracker_name] = f1[max_idx]
38 | best_idx[tracker_name] = max_idx
39 |
40 | for idx, (tracker_name, best_f1) in \
41 | enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)):
42 | if tracker_name == bold_name:
43 | label = r"\textbf{[%.3f] Ours}" % (best_f1)
44 | else:
45 | label = "[%.3f] " % (best_f1) + tracker_name
46 | recall = all_recall[tracker_name][:-1]
47 | precision = all_precision[tracker_name][:-1]
48 | ax.plot(recall, precision, color=COLOR[idx], linestyle='-',
49 | label=label)
50 | f1_idx = best_idx[tracker_name]
51 | ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o',
52 | markerfacecolor=COLOR[idx], markersize=5)
53 | ax.legend(loc='lower right', labelspacing=0.2)
54 | plt.xticks(np.arange(0, 1+0.1, 0.1))
55 | plt.yticks(np.arange(0, 1+0.1, 0.1))
56 | plt.show()
57 |
58 | if __name__ == '__main__':
59 | draw_f1(None)
60 |
--------------------------------------------------------------------------------
/training_dataset/coco/pycocotools/common/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 |
9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 |
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 |
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 |
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 |
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 |
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 |
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 |
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 |
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 |
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 |
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 |
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 |
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 |
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 |
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 |
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 |
--------------------------------------------------------------------------------
/training_dataset/vid/parse_vid.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join
7 | from os import listdir
8 | import json
9 | import glob
10 | import xml.etree.ElementTree as ET
11 |
12 | VID_base_path = '/ssd/feiji/Research/Data/ILSVRC2015'
13 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/')
14 | img_base_path = join(VID_base_path, 'Data/VID/train/')
15 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
16 |
17 | vid = []
18 | for sub_set in sub_sets:
19 | sub_set_base_path = join(ann_base_path, sub_set)
20 | videos = sorted(listdir(sub_set_base_path))
21 | s = []
22 | for vi, video in enumerate(videos):
23 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
24 | v = dict()
25 | v['base_path'] = join(sub_set, video)
26 | v['frame'] = []
27 | video_base_path = join(sub_set_base_path, video)
28 | xmls = sorted(glob.glob(join(video_base_path, '*.xml')))
29 | for xml in xmls:
30 | f = dict()
31 | xmltree = ET.parse(xml)
32 | size = xmltree.findall('size')[0]
33 | frame_sz = [int(it.text) for it in size]
34 | objects = xmltree.findall('object')
35 | objs = []
36 | for object_iter in objects:
37 | trackid = int(object_iter.find('trackid').text)
38 | name = (object_iter.find('name')).text
39 | bndbox = object_iter.find('bndbox')
40 | occluded = int(object_iter.find('occluded').text)
41 | o = dict()
42 | o['c'] = name
43 | o['bbox'] = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
44 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
45 | o['trackid'] = trackid
46 | o['occ'] = occluded
47 | objs.append(o)
48 | f['frame_sz'] = frame_sz
49 | f['img_path'] = xml.split('/')[-1].replace('xml', 'JPEG')
50 | f['objs'] = objs
51 | v['frame'].append(f)
52 | s.append(v)
53 | vid.append(s)
54 | print('save json (raw vid info), please wait 1 min~')
55 | json.dump(vid, open('vid.json', 'w'), indent=4, sort_keys=True)
56 | print('done!')
57 |
--------------------------------------------------------------------------------
/training_dataset/y2b/gen_json_clean.py:
--------------------------------------------------------------------------------
1 | from os.path import join, isdir
2 | from os import mkdir
3 | import numpy as np
4 | import cv2
5 | import glob
6 | import json
7 |
8 | def check_neg(bbox):
9 | x1, y1, x2, y2 = bbox
10 | w, h = x2 - x1, y2 -y1
11 | if w <= 0 or h <= 0:
12 | return False
13 | return True
14 |
15 | def check_size(frame_sz, bbox):
16 | #min_ratio = 0.1
17 | max_ratio = 0.75
18 | # only accept objects >10% and <75% of the total frame
19 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
20 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio)
21 | return ok
22 |
23 |
24 | def check_borders(frame_sz, bbox):
25 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
26 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
27 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
28 | ((frame_sz[1] - bbox[3]) > dist_from_border)
29 | return ok
30 |
31 | data_file = 'train.json'
32 | path_format = "{}.{}.{}.jpg"
33 | root = "/ssd/feiji/Research/Data/y2b_crop511"
34 | anno = json.load(open(data_file, 'r'))
35 | wh_file = 'train_wh.json'
36 | wh = json.load(open(wh_file, 'r'))
37 | out = {}
38 | n_videos = 0
39 | for video, tracks in anno.items():
40 | new_tracks = {}
41 | video_id = video.split('/')[-1]
42 | if not(video_id in wh):
43 | continue
44 | frame_sz = wh[video_id]
45 | for track, frames in tracks.items():
46 | new_frames = {}
47 | valid_num = 0
48 | for frame, bbox in frames.items():
49 | new_info = {}
50 | image_path = join(root, video, path_format.format(frame, track, 'x'))
51 | new_info['bbox'] = bbox
52 | new_info['valid'] = 0
53 | if check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox):
54 | new_info['valid'] = 1
55 | valid_num += 1
56 | new_frames[frame] = new_info
57 | #new_frames['track_category'] = video.split('/')[0]
58 |
59 | if valid_num > 0:
60 | new_tracks[track] = new_frames
61 | if len(new_tracks) > 0:
62 | out[video] = new_tracks
63 | n_videos += 1
64 | print('video: {:d}'.format(n_videos))
65 |
66 | json.dump(out, open('train_largeclean.json', 'w'), indent=4, sort_keys=True)
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
--------------------------------------------------------------------------------
/experiments/cgacd_resnet/cgacd_resnet.yml:
--------------------------------------------------------------------------------
1 | backbone:
2 | type: 'resnet50'
3 | pretrained: 'resnet50.model'
4 | unfix_layers: ['layer3', 'layer2']
5 | unfix_steps: [10, 10]
6 | unfix_lr: [0.1, 0.1]
7 | kwargs:
8 | used_layers: [3]
9 |
10 | adjust:
11 | adjust: true
12 | type: 'AdjustLayer'
13 | kwargs:
14 | in_channels: 1024
15 | out_channels: 256
16 |
17 | siamese:
18 | type: 'UPChannelSiamese'
19 |
20 | attention:
21 | attention: true
22 | type: 'PixelAttention'
23 |
24 | cornerdet:
25 | cornerdet: true
26 | type: 'SepCornerDet'
27 |
28 | train:
29 | roi_augmentation:
30 | ratio: 0.5
31 | shift: 8
32 | scale: 0.1
33 | creg_weight: 0.25
34 | epoch: 20
35 | pretrain_epoch: 1
36 | response_size: 25
37 | template_pool_size: 5
38 | search_pool_size: 7
39 | train_dataset:
40 | names:
41 | - 'youtubebb'
42 | - 'got10k'
43 | - 'vid'
44 | - 'coco'
45 | - 'det'
46 | youtubebb:
47 | num_use: 100000
48 | root: '/ssd/feiji/Research/Data/y2b_crop511'
49 | anno: '/home/feiji/Research/Data/data_preprocess/y2b/train_largeclean.json'
50 | got10k:
51 | num_use: 100000
52 | root: '/ssd/feiji/Research/Data/GOT-10k_crop511'
53 | anno: '/home/feiji/Research/Data/data_preprocess/got10k/train_largeclean.json'
54 | vid:
55 | num_use: 50000
56 | root: '/ssd/feiji/Research/Data/VID_crop511'
57 | anno: '/home/feiji/Research/Data/data_preprocess/vid/train_largeclean.json'
58 | coco:
59 | num_use: 50000
60 | root: '/ssd/feiji/Research/Data/COCO_crop511'
61 | anno: '/home/feiji/Research/Data/data_preprocess/coco/train2017_largeclean.json'
62 | det:
63 | num_use: 50000
64 | root: '/ssd/feiji/Research/Data/DET_crop511'
65 | anno: '/home/feiji/Research/Data/data_preprocess/det/train_largeclean.json'
66 |
67 | video_per_epoch: 350000
68 |
69 | augmentation:
70 | neg: 0.2
71 | gray: 0.25
72 | search:
73 | shift: 64
74 | scale: 0.18
75 | blur: 0.2
76 |
77 | lr:
78 | type: 'log'
79 | start_lr: 0.001
80 | end_lr: 0.0001
81 | pretrain:
82 | start_lr: 0.0005
83 | type: 'step'
84 | step: 1
85 | epoch: 1
86 | warmup:
87 | start_lr: 0.0006
88 | end_lr: 0.001
89 | type: 'step'
90 | step: 1
91 | epoch: 4
92 |
93 | track:
94 | response_size: 25
95 | penalty_k: 0.055
96 | window_influence: 0.42
97 | lr: 0.2
98 |
--------------------------------------------------------------------------------
/experiments/cgacd_resnet_otb/cgacd_resnet_otb.yml:
--------------------------------------------------------------------------------
1 | backbone:
2 | type: 'resnet50'
3 | pretrained: 'resnet50.model'
4 | unfix_layers: ['layer3', 'layer2']
5 | unfix_steps: [10, 10]
6 | unfix_lr: [0.1, 0.1]
7 | kwargs:
8 | used_layers: [3]
9 |
10 | adjust:
11 | adjust: true
12 | type: 'AdjustLayer'
13 | kwargs:
14 | in_channels: 1024
15 | out_channels: 256
16 |
17 | siamese:
18 | type: 'UPChannelSiamese'
19 |
20 | attention:
21 | attention: true
22 | type: 'PixelAttention'
23 |
24 | cornerdet:
25 | cornerdet: true
26 | type: 'SepCornerDet'
27 |
28 | train:
29 | roi_augmentation:
30 | ratio: 0.5
31 | shift: 16
32 | scale: 0.1
33 | creg_weight: 0.25
34 | epoch: 20
35 | pretrain_epoch: 1
36 | response_size: 25
37 | template_pool_size: 5
38 | search_pool_size: 7
39 | train_dataset:
40 | names:
41 | - 'youtubebb'
42 | - 'got10k'
43 | - 'vid'
44 | - 'coco'
45 | - 'det'
46 | youtubebb:
47 | num_use: 100000
48 | root: '/ssd/feiji/Research/Data/y2b_crop511'
49 | anno: '/home/feiji/Research/Data/data_preprocess/y2b/train_largeclean.json'
50 | got10k:
51 | num_use: 100000
52 | root: '/ssd/feiji/Research/Data/GOT-10k_crop511'
53 | anno: '/home/feiji/Research/Data/data_preprocess/got10k/train_largeclean.json'
54 | vid:
55 | num_use: 50000
56 | root: '/ssd/feiji/Research/Data/VID_crop511'
57 | anno: '/home/feiji/Research/Data/data_preprocess/vid/train_largeclean.json'
58 | coco:
59 | num_use: 50000
60 | root: '/ssd/feiji/Research/Data/COCO_crop511'
61 | anno: '/home/feiji/Research/Data/data_preprocess/coco/train2017_largeclean.json'
62 | det:
63 | num_use: 50000
64 | root: '/ssd/feiji/Research/Data/DET_crop511'
65 | anno: '/home/feiji/Research/Data/data_preprocess/det/train_largeclean.json'
66 |
67 | video_per_epoch: 350000
68 |
69 | augmentation:
70 | neg: 0.2
71 | gray: 0.25
72 | search:
73 | shift: 64
74 | scale: 0.18
75 | blur: 0.2
76 |
77 | lr:
78 | type: 'log'
79 | start_lr: 0.001
80 | end_lr: 0.0001
81 | pretrain:
82 | start_lr: 0.0005
83 | type: 'step'
84 | step: 1
85 | epoch: 1
86 | warmup:
87 | start_lr: 0.0006
88 | end_lr: 0.001
89 | type: 'step'
90 | step: 1
91 | epoch: 4
92 |
93 | track:
94 | response_size: 25
95 | penalty_k: 0.055
96 | window_influence: 0.42
97 | lr: 0.2
98 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/prroi_pool/functional.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : functional.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | import torch
13 | import torch.autograd as ag
14 |
15 | try:
16 | from os.path import join as pjoin, dirname
17 | from torch.utils.cpp_extension import load as load_extension
18 | root_dir = pjoin(dirname(__file__), 'src')
19 | _prroi_pooling = load_extension(
20 | '_prroi_pooling',
21 | [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')],
22 | verbose=True
23 | )
24 | except ImportError:
25 | raise ImportError('Can not compile Precise RoI Pooling library.')
26 |
27 | __all__ = ['prroi_pool2d']
28 |
29 |
30 | class PrRoIPool2DFunction(ag.Function):
31 | @staticmethod
32 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale):
33 | assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \
34 | 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type())
35 |
36 | pooled_height = int(pooled_height)
37 | pooled_width = int(pooled_width)
38 | spatial_scale = float(spatial_scale)
39 |
40 | features = features.contiguous()
41 | rois = rois.contiguous()
42 | params = (pooled_height, pooled_width, spatial_scale)
43 |
44 | if features.is_cuda:
45 | output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params)
46 | ctx.params = params
47 | # everything here is contiguous.
48 | ctx.save_for_backward(features, rois, output)
49 | else:
50 | raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.')
51 |
52 | return output
53 |
54 | @staticmethod
55 | def backward(ctx, grad_output):
56 | features, rois, output = ctx.saved_tensors
57 | grad_input = grad_coor = None
58 |
59 | if features.requires_grad:
60 | grad_output = grad_output.contiguous()
61 | grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params)
62 | if rois.requires_grad:
63 | grad_output = grad_output.contiguous()
64 | grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params)
65 |
66 | return grad_input, grad_coor, None, None, None
67 |
68 |
69 | prroi_pool2d = PrRoIPool2DFunction.apply
70 |
71 |
--------------------------------------------------------------------------------
/training_dataset/coco/gen_json_clean.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from pycocotools.coco import COCO
7 | from os.path import join
8 | import numpy as np
9 | import json
10 |
11 | def check_neg(bbox):
12 | x1, y1, x2, y2 = bbox
13 | w, h = x2 - x1, y2 -y1
14 | if w <= 0 or h <= 0:
15 | return False
16 | return True
17 |
18 | def check_size(frame_sz, bbox):
19 | #min_ratio = 0.1
20 | max_ratio = 0.75
21 | # only accept objects >10% and <75% of the total frame
22 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
23 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio)
24 | return ok
25 |
26 |
27 | def check_borders(frame_sz, bbox):
28 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
29 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
30 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
31 | ((frame_sz[1] - bbox[3]) > dist_from_border)
32 | return ok
33 |
34 |
35 | dataDir = '/home/feiji/Research/Data/COCO'
36 | for data_subset in ['val2017', 'train2017']:
37 | dataset = dict()
38 | annFile = '{}/annotations/instances_{}.json'.format(dataDir, data_subset)
39 | coco = COCO(annFile)
40 | n_imgs = len(coco.imgs)
41 | for n, img_id in enumerate(coco.imgs):
42 | print('subset: {} image id: {:04d} / {:04d}'.format(data_subset, n, n_imgs))
43 | img = coco.loadImgs(img_id)[0]
44 | annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
45 | anns = coco.loadAnns(annIds)
46 | crop_base_path = join(data_subset, img['file_name'].split('/')[-1].split('.')[0])
47 | frame_sz = [img['width'], img['height']]
48 |
49 | for track_id, ann in enumerate(anns):
50 | info = {}
51 | rect = ann['bbox']
52 | if rect[2] <= 0 or rect[3] <= 0: # lead nan error in cls.
53 | continue
54 | bbox = [rect[0], rect[1], rect[0]+rect[2]-1, rect[1]+rect[3]-1] # x1,y1,x2,y2
55 | if check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox):
56 | if crop_base_path not in dataset:
57 | dataset[crop_base_path] = dict()
58 | info['valid'] = 1
59 | info['bbox'] = bbox
60 | dataset[crop_base_path]['{:02d}'.format(track_id)] = {'000000': info}
61 | #dataset[crop_base_path]['{:02d}'.format(track_id)]['track_category'] = ann['category_id']
62 |
63 | print('save json (dataset), please wait 20 seconds~')
64 | json.dump(dataset, open('{}_largeclean.json'.format(data_subset), 'w'), indent=4, sort_keys=True)
65 | print('done!')
66 |
67 |
--------------------------------------------------------------------------------
/toolkit/datasets/got10k.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | import os
4 |
5 | from tqdm import tqdm
6 |
7 | from .dataset import Dataset
8 | from .video import Video
9 |
10 | class GOT10kVideo(Video):
11 | """
12 | Args:
13 | name: video name
14 | root: dataset root
15 | video_dir: video directory
16 | init_rect: init rectangle
17 | img_names: image names
18 | gt_rect: groundtruth rectangle
19 | attr: attribute of video
20 | """
21 | def __init__(self, name, root, video_dir, init_rect, img_names,
22 | gt_rect, attr, load_img=False):
23 | super(GOT10kVideo, self).__init__(name, root, video_dir,
24 | init_rect, img_names, gt_rect, attr, load_img)
25 |
26 | # def load_tracker(self, path, tracker_names=None):
27 | # """
28 | # Args:
29 | # path(str): path to result
30 | # tracker_name(list): name of tracker
31 | # """
32 | # if not tracker_names:
33 | # tracker_names = [x.split('/')[-1] for x in glob(path)
34 | # if os.path.isdir(x)]
35 | # if isinstance(tracker_names, str):
36 | # tracker_names = [tracker_names]
37 | # # self.pred_trajs = {}
38 | # for name in tracker_names:
39 | # traj_file = os.path.join(path, name, self.name+'.txt')
40 | # if os.path.exists(traj_file):
41 | # with open(traj_file, 'r') as f :
42 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
43 | # for x in f.readlines()]
44 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
45 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
46 | # else:
47 |
48 | # self.tracker_names = list(self.pred_trajs.keys())
49 |
50 | class GOT10kDataset(Dataset):
51 | """
52 | Args:
53 | name: dataset name, should be "NFS30" or "NFS240"
54 | dataset_root, dataset root dir
55 | """
56 | def __init__(self, name, dataset_root, load_img=False):
57 | super(GOT10kDataset, self).__init__(name, dataset_root)
58 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
59 | meta_data = json.load(f)
60 |
61 | # load videos
62 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
63 | self.videos = {}
64 | for video in pbar:
65 | pbar.set_postfix_str(video)
66 | self.videos[video] = GOT10kVideo(video,
67 | dataset_root,
68 | meta_data[video]['video_dir'],
69 | meta_data[video]['init_rect'],
70 | meta_data[video]['img_names'],
71 | meta_data[video]['gt_rect'],
72 | None)
73 | self.attr = {}
74 | self.attr['ALL'] = list(self.videos.keys())
75 |
--------------------------------------------------------------------------------
/toolkit/datasets/nfs.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 |
12 | class NFSVideo(Video):
13 | """
14 | Args:
15 | name: video name
16 | root: dataset root
17 | video_dir: video directory
18 | init_rect: init rectangle
19 | img_names: image names
20 | gt_rect: groundtruth rectangle
21 | attr: attribute of video
22 | """
23 | def __init__(self, name, root, video_dir, init_rect, img_names,
24 | gt_rect, attr, load_img=False):
25 | super(NFSVideo, self).__init__(name, root, video_dir,
26 | init_rect, img_names, gt_rect, attr, load_img)
27 |
28 | # def load_tracker(self, path, tracker_names=None):
29 | # """
30 | # Args:
31 | # path(str): path to result
32 | # tracker_name(list): name of tracker
33 | # """
34 | # if not tracker_names:
35 | # tracker_names = [x.split('/')[-1] for x in glob(path)
36 | # if os.path.isdir(x)]
37 | # if isinstance(tracker_names, str):
38 | # tracker_names = [tracker_names]
39 | # # self.pred_trajs = {}
40 | # for name in tracker_names:
41 | # traj_file = os.path.join(path, name, self.name+'.txt')
42 | # if os.path.exists(traj_file):
43 | # with open(traj_file, 'r') as f :
44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
45 | # for x in f.readlines()]
46 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
48 | # else:
49 |
50 | # self.tracker_names = list(self.pred_trajs.keys())
51 |
52 | class NFSDataset(Dataset):
53 | """
54 | Args:
55 | name: dataset name, should be "NFS30" or "NFS240"
56 | dataset_root, dataset root dir
57 | """
58 | def __init__(self, name, dataset_root, load_img=False):
59 | super(NFSDataset, self).__init__(name, dataset_root)
60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
61 | meta_data = json.load(f)
62 |
63 | # load videos
64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
65 | self.videos = {}
66 | for video in pbar:
67 | pbar.set_postfix_str(video)
68 | self.videos[video] = NFSVideo(video,
69 | dataset_root,
70 | meta_data[video]['video_dir'],
71 | meta_data[video]['init_rect'],
72 | meta_data[video]['img_names'],
73 | meta_data[video]['gt_rect'],
74 | None)
75 |
76 | self.attr = {}
77 | self.attr['ALL'] = list(self.videos.keys())
78 |
--------------------------------------------------------------------------------
/toolkit/datasets/trackingnet.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 | class TrackingNetVideo(Video):
12 | """
13 | Args:
14 | name: video name
15 | root: dataset root
16 | video_dir: video directory
17 | init_rect: init rectangle
18 | img_names: image names
19 | gt_rect: groundtruth rectangle
20 | attr: attribute of video
21 | """
22 | def __init__(self, name, root, video_dir, init_rect, img_names,
23 | gt_rect, attr, load_img=False):
24 | super(TrackingNetVideo, self).__init__(name, root, video_dir,
25 | init_rect, img_names, gt_rect, attr, load_img)
26 |
27 | # def load_tracker(self, path, tracker_names=None):
28 | # """
29 | # Args:
30 | # path(str): path to result
31 | # tracker_name(list): name of tracker
32 | # """
33 | # if not tracker_names:
34 | # tracker_names = [x.split('/')[-1] for x in glob(path)
35 | # if os.path.isdir(x)]
36 | # if isinstance(tracker_names, str):
37 | # tracker_names = [tracker_names]
38 | # # self.pred_trajs = {}
39 | # for name in tracker_names:
40 | # traj_file = os.path.join(path, name, self.name+'.txt')
41 | # if os.path.exists(traj_file):
42 | # with open(traj_file, 'r') as f :
43 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
44 | # for x in f.readlines()]
45 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
46 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
47 | # else:
48 |
49 | # self.tracker_names = list(self.pred_trajs.keys())
50 |
51 | class TrackingNetDataset(Dataset):
52 | """
53 | Args:
54 | name: dataset name, should be "NFS30" or "NFS240"
55 | dataset_root, dataset root dir
56 | """
57 | def __init__(self, name, dataset_root, load_img=False):
58 | super(TrackingNetDataset, self).__init__(name, dataset_root)
59 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
60 | meta_data = json.load(f)
61 |
62 | # load videos
63 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
64 | self.videos = {}
65 | for video in pbar:
66 | pbar.set_postfix_str(video)
67 | self.videos[video] = TrackingNetVideo(video,
68 | dataset_root,
69 | meta_data[video]['video_dir'],
70 | meta_data[video]['init_rect'],
71 | meta_data[video]['img_names'],
72 | meta_data[video]['gt_rect'],
73 | None)
74 | self.attr = {}
75 | self.attr['ALL'] = list(self.videos.keys())
76 |
--------------------------------------------------------------------------------
/training_dataset/det/gen_json_clean.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join, isdir
7 | from os import mkdir
8 | import glob
9 | import numpy as np
10 | import xml.etree.ElementTree as ET
11 | import json
12 |
13 | def check_neg(bbox):
14 | x1, y1, x2, y2 = bbox
15 | w, h = x2 - x1, y2 -y1
16 | if w <= 0 or h <= 0:
17 | return False
18 | return True
19 |
20 | def check_size(frame_sz, bbox):
21 | #min_ratio = 0.1
22 | max_ratio = 0.75
23 | # only accept objects >10% and <75% of the total frame
24 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
25 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio)
26 | return ok
27 |
28 |
29 | def check_borders(frame_sz, bbox):
30 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
31 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
32 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
33 | ((frame_sz[1] - bbox[3]) > dist_from_border)
34 | return ok
35 |
36 | js = {}
37 | VID_base_path = '/home/feiji/Research/Data/ILSVRC2015'
38 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/')
39 | sub_sets = ('ILSVRC2013_train', 'ILSVRC2014_train_0000', 'ILSVRC2014_train_0001','ILSVRC2014_train_0002','ILSVRC2014_train_0003','ILSVRC2014_train_0004','ILSVRC2014_train_0005','ILSVRC2014_train_0006', 'val')
40 | for sub_set in sub_sets:
41 | sub_set_base_path = join(ann_base_path, sub_set)
42 |
43 | if 'ILSVRC2013_train' == sub_set:
44 | xmls = sorted(glob.glob(join(sub_set_base_path, '*', '*.xml')))
45 | else:
46 | xmls = sorted(glob.glob(join(sub_set_base_path, '*.xml')))
47 | n_imgs = len(xmls)
48 | for f, xml in enumerate(xmls):
49 | print('subset: {} frame id: {:08d} / {:08d}'.format(sub_set, f, n_imgs))
50 | xmltree = ET.parse(xml)
51 | objects = xmltree.findall('object')
52 | size = xmltree.find('size')
53 | video = join(sub_set, xml.split('/')[-1].split('.')[0])
54 |
55 | for id, object_iter in enumerate(objects):
56 | info = {}
57 | bndbox = object_iter.find('bndbox')
58 | frame_sz = [int(size.find('width').text), int(size.find('height').text)]
59 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
60 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
61 | if check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox):
62 | info['valid'] = 1
63 | info['bbox'] = bbox
64 | frame = '%06d' % (0)
65 | obj = '%02d' % (id)
66 | if video not in js:
67 | js[video] = {}
68 | if obj not in js[video]:
69 | js[video][obj] = {}
70 | js[video][obj][frame] = info
71 | #js[video][obj]['track_category'] = str(object_iter.find('name').text)
72 |
73 | json.dump(js, open('train_largeclean.json', 'w'), indent=4, sort_keys=True)
74 |
75 |
76 |
--------------------------------------------------------------------------------
/utils/model_load.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) SenseTime. All Rights Reserved.
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 | from __future__ import unicode_literals
7 |
8 | import logging
9 |
10 | import torch
11 |
12 |
13 | logger = logging.getLogger('global')
14 |
15 |
16 | def check_keys(model, pretrained_state_dict):
17 | ckpt_keys = set(pretrained_state_dict.keys())
18 | model_keys = set(model.state_dict().keys())
19 | used_pretrained_keys = model_keys & ckpt_keys
20 | unused_pretrained_keys = ckpt_keys - model_keys
21 | missing_keys = model_keys - ckpt_keys
22 | # filter 'num_batches_tracked'
23 | missing_keys = [x for x in missing_keys
24 | if not x.endswith('num_batches_tracked')]
25 | if len(missing_keys) > 0:
26 | logger.info('[Warning] missing keys: {}'.format(missing_keys))
27 | logger.info('missing keys:{}'.format(len(missing_keys)))
28 | if len(unused_pretrained_keys) > 0:
29 | logger.info('[Warning] unused_pretrained_keys: {}'.format(
30 | unused_pretrained_keys))
31 | logger.info('unused checkpoint keys:{}'.format(
32 | len(unused_pretrained_keys)))
33 | logger.info('used keys:{}'.format(len(used_pretrained_keys)))
34 | assert len(used_pretrained_keys) > 0, \
35 | 'load NONE from pretrained checkpoint'
36 | return True
37 |
38 |
39 | def remove_prefix(state_dict, prefix):
40 | ''' Old style model is stored with all names of parameters
41 | share common prefix 'module.' '''
42 | logger.info('remove prefix \'{}\''.format(prefix))
43 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
44 | return {f(key): value for key, value in state_dict.items()}
45 |
46 |
47 | def load_pretrain(model, pretrained_path):
48 | logger.info('load pretrained model from {}'.format(pretrained_path))
49 | device = torch.cuda.current_device()
50 | pretrained_dict = torch.load(pretrained_path,
51 | map_location=lambda storage, loc: storage.cuda(device))
52 | if "state_dict" in pretrained_dict.keys():
53 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'],
54 | 'module.')
55 | else:
56 | pretrained_dict = remove_prefix(pretrained_dict, 'module.')
57 |
58 | try:
59 | check_keys(model, pretrained_dict)
60 | except:
61 | logger.info('[Warning]: using pretrain as features.\
62 | Adding "features." as prefix')
63 | new_dict = {}
64 | for k, v in pretrained_dict.items():
65 | k = 'features.' + k
66 | new_dict[k] = v
67 | pretrained_dict = new_dict
68 | check_keys(model, pretrained_dict)
69 | model.load_state_dict(pretrained_dict, strict=False)
70 | return model
71 |
72 |
73 | def restore_from(model, ckpt_path):
74 | device = torch.cuda.current_device()
75 | ckpt = torch.load(ckpt_path,
76 | map_location=lambda storage, loc: storage.cuda(device))
77 | epoch = ckpt['epoch']
78 |
79 | ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.')
80 | check_keys(model, ckpt_model_dict)
81 | model.load_state_dict(ckpt_model_dict, strict=False)
82 |
83 | return model, epoch
84 |
--------------------------------------------------------------------------------
/models/attention/attention.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import numpy as np
5 | from config.config import cfg
6 |
7 | class Attention(nn.Module):
8 | def __init__(self):
9 | super(Attention, self).__init__()
10 |
11 | def forward(self, z_f, x_f):
12 | raise NotImplementedError
13 |
14 | class PixelAttention(Attention):
15 | def __init__(self, feat_in=256):
16 | super(PixelAttention, self).__init__()
17 | self.feat_in = feat_in
18 |
19 | self.spatial_pool_agl = nn.Sequential(
20 | nn.Conv2d(25, 32, 3),
21 | nn.BatchNorm2d(32),
22 | nn.ReLU(inplace=True),
23 | nn.Conv2d(32, 32, 3),
24 | nn.BatchNorm2d(32),
25 | nn.ReLU(inplace=True),
26 | nn.ConvTranspose2d(32, 32, 3),
27 | nn.BatchNorm2d(32),
28 | nn.ReLU(inplace=True),
29 | nn.ConvTranspose2d(32, 1, 3),
30 | nn.Sigmoid(),
31 | )
32 |
33 | self.spatial_pool_agr = nn.Sequential(
34 | nn.Conv2d(25, 32, 3),
35 | nn.BatchNorm2d(32),
36 | nn.ReLU(inplace=True),
37 | nn.Conv2d(32, 32, 3),
38 | nn.BatchNorm2d(32),
39 | nn.ReLU(inplace=True),
40 | nn.ConvTranspose2d(32, 32, 3),
41 | nn.BatchNorm2d(32),
42 | nn.ReLU(inplace=True),
43 | nn.ConvTranspose2d(32, 1, 3),
44 | nn.Sigmoid(),
45 | )
46 |
47 | self.channel_pool_ag = nn.Sequential(
48 | nn.Linear(feat_in, feat_in//4),
49 | nn.ReLU(inplace=True),
50 | nn.Linear(feat_in//4, feat_in),
51 | )
52 |
53 | self.channel_maxpool = nn.MaxPool2d(cfg.train.search_pool_size - cfg.train.template_pool_size + 1)
54 | self.channel_avgpool = nn.AvgPool2d(cfg.train.search_pool_size - cfg.train.template_pool_size + 1)
55 | self.channel_activation = nn.Sigmoid()
56 | for m in self.modules():
57 | if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.Linear)):
58 | nn.init.kaiming_normal_(
59 | m.weight.data, mode='fan_out', nonlinearity='relu')
60 | elif isinstance(m, nn.BatchNorm2d):
61 | m.weight.data.fill_(1.0)
62 | m.bias.data.zero_()
63 |
64 | def forward(self, z_f, x_f):
65 | b, c, h, w = z_f.shape
66 | kernel = z_f.reshape(b,c,h*w).permute(0,2,1).reshape(-1, c, 1, 1)
67 | b, c, h, w = x_f.shape
68 | xf_reshape = x_f.reshape(1, -1, h, w)
69 | pixel_corr = F.conv2d(xf_reshape, kernel, groups=b).reshape(b, -1, h, w)# / c
70 | b, c, h, w = pixel_corr.shape
71 | spatial_att_l = self.spatial_pool_agl(pixel_corr)
72 | spatial_att_r = self.spatial_pool_agr(pixel_corr)
73 | b, c, h, w = z_f.shape
74 | kernel = z_f.reshape(b*c, 1, h, w)
75 | b, c, h, w = x_f.shape
76 | xf_reshape = x_f.reshape(1, b*c, h, w)
77 | depth_corr = F.conv2d(xf_reshape, kernel, groups=b*c)
78 | depth_corr = depth_corr.reshape(b, c, depth_corr.shape[-2], depth_corr.shape[-1])
79 | channel_max_pool = self.channel_maxpool(depth_corr).squeeze()
80 | channel_avg_pool = self.channel_avgpool(depth_corr).squeeze()
81 | channel_att = self.channel_activation(self.channel_pool_ag(channel_max_pool) + self.channel_pool_ag(channel_avg_pool)).unsqueeze(-1).unsqueeze(-1)
82 |
83 | x_f = x_f * channel_att
84 | x_f_l = x_f * spatial_att_l
85 | x_f_r = x_f * spatial_att_r
86 | return x_f_l, x_f_r
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # [CGACD](https://openaccess.thecvf.com/content_CVPR_2020/html/Du_Correlation-Guided_Attention_for_Corner_Detection_Based_Visual_Tracking_CVPR_2020_paper.html)
2 |
3 | ## 1. Environment setup
4 | This code has been tested on Ubuntu 18.04, Python 3.7, Pytorch 1.1.0, CUDA 10.0. Please install related libraries before running this code:
5 | ```bash
6 | pip install -r requirements.txt
7 | python setup.py build_ext --inplace
8 | ```
9 | ### Add CGACD to your PYTHONPATH
10 | ```bash
11 | export PYTHONPATH=/path/to/CGACD:$PYTHONPATH
12 | ```
13 |
14 |
15 | ## 2. Test
16 | Download the pretrained model: [OTB and VOT](https://pan.baidu.com/s/11z74ZUGAPhupPLNrbGN5NQ) (code: 16s0) and put them into `checkpoint` directory.
17 |
18 | Download testing datasets and put them into `dataset` directory. Jsons of commonly used datasets can be downloaded from [BaiduYun](https://pan.baidu.com/s/1js0Qhykqqur7_lNRtle1tA#list/path=%2F) or [Google driver](https://drive.google.com/drive/folders/1TC8obz4TvlbvTRWbS4Cn4VwwJ8tXs2sv?usp=sharing). If you want to test the tracker on a new dataset, please refer to [pysot-toolkit](https://github.com/StrangerZhang/pysot-toolkit) to set test_dataset.
19 |
20 | ```bash
21 | python tools/test.py \
22 | --dataset VOT2018 \ # dataset_name
23 | --model checkpoint/CGACD_VOT.pth \ # tracker_name
24 | --save_name CGACD_VOT
25 | ```
26 |
27 | The testing result will be saved in the `results/dataset_name/tracker_name` directory.
28 |
29 | ## 3. Train
30 | ### Prepare training datasets
31 |
32 | Download the datasets:
33 | * [VID](http://image-net.org/challenges/LSVRC/2017/)
34 | * [YOUTUBEBB](https://research.google.com/youtube-bb/)
35 | * [DET](http://image-net.org/challenges/LSVRC/2017/)
36 | * [COCO](http://cocodataset.org)
37 | * [GOT-10K](http://got-10k.aitestunion.com/downloads)
38 |
39 | Scripts to prepare training dataset are listed in `training_dataset` directory.
40 |
41 | ### Download pretrained backbones
42 | Download pretrained backbones from [google driver](https://drive.google.com/drive/folders/1DuXVWVYIeynAcvt9uxtkuleV6bs6e3T9) or [BaiduYun](https://pan.baidu.com/s/1pYe73PjkQx4Ph9cd3ePfCQ) (code: 5o1d) and put them into `pretrained_net` directory.
43 |
44 | ### Train a model
45 | To train the CGACD model, run `train.py` with the desired configs:
46 |
47 | ```bash
48 | python tools/train.py
49 | --config=experiments/cgacd_resnet/cgacd_resnet.yml \
50 | -b 64 \
51 | -j 16 \
52 | --save_name cgacd_resnet
53 | ```
54 |
55 | We use two RTX2080TI for training.
56 |
57 | ## 4. Evaluation
58 | We provide the tracking [results](https://pan.baidu.com/s/1fM36M19LUgd3hI0QFnwkdw) (code: qw69 ) of OTB2015, VOT2018, UAV123, and LaSOT. If you want to evaluate the tracker, please put those results into `results` directory.
59 |
60 | ```
61 | python eval.py \
62 | -p ./results \ # result path
63 | -d VOT2018 \ # dataset_name
64 | -t CGACD_VOT # tracker_name
65 | ```
66 |
67 | ## 5. Acknowledgement
68 | The code is implemented based on [pysot](https://github.com/STVIR/pysot) and [PreciseRoIPooling](https://github.com/vacancy/PreciseRoIPooling). We would like to express our sincere thanks to the contributors.
69 |
70 |
71 | ## 6. Cite
72 | If you use CGACD in your work please cite our paper:
73 | > @InProceedings{Du_2020_CVPR,
74 | author = {Du, Fei and Liu, Peng and Zhao, Wei and Tang, Xianglong},
75 | title = {Correlation-Guided Attention for Corner Detection Based Visual Tracking},
76 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
77 | month = {June},
78 | year = {2020}
79 | }
80 |
81 |
82 |
--------------------------------------------------------------------------------
/track/run_CGACD.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch.nn.functional as F
3 | import torch
4 | from matplotlib import pyplot as plt
5 | import pdb
6 | from utils.utils import get_subwindow_tracking
7 | from config.config import cfg
8 |
9 | def tracker_eval(net, x_crop, target_pos, template_bbox, target_sz, window, scale_z):
10 | target, penalty, score, best_pscore_id = net.track(x_crop, target_sz, template_bbox, window)
11 | #pdb.set_trace()
12 | target = target / scale_z
13 | #import pdb
14 | #pdb.set_trace()
15 | target_sz = target_sz / scale_z
16 | lr = penalty[best_pscore_id] * score[best_pscore_id] * cfg.track.lr
17 |
18 | res_x = target[0] + target_pos[0]
19 | res_y = target[1] + target_pos[1]
20 |
21 | res_w = target_sz[0] * (1 - lr) + target[2] * lr
22 | res_h = target_sz[1] * (1 - lr) + target[3] * lr
23 |
24 | target_pos = np.array([res_x, res_y])
25 | target_sz = np.array([res_w, res_h])
26 | return target_pos, target_sz, score[best_pscore_id]
27 |
28 |
29 | def CGACD_init(im, target_pos, target_sz, net):
30 | state = dict()
31 | state['im_h'] = im.shape[0]
32 | state['im_w'] = im.shape[1]
33 |
34 | avg_chans = np.mean(im, axis=(0, 1))
35 |
36 | wc_z = target_sz[0] + cfg.track.contex_amount * sum(target_sz)
37 | hc_z = target_sz[1] + cfg.track.contex_amount * sum(target_sz)
38 | s_z = round(np.sqrt(wc_z * hc_z))
39 | # initialize the exemplar
40 | z_crop = get_subwindow_tracking(im, target_pos, cfg.track.template_size, s_z, avg_chans)
41 |
42 | scale_z = cfg.track.template_size / s_z
43 | w, h = target_sz[0] * scale_z, target_sz[1] * scale_z
44 | cx, cy = cfg.track.template_size//2, cfg.track.template_size//2
45 | template_bbox = [cx - w*0.5, cy - h*0.5, cx + w*0.5, cy + h*0.5]
46 |
47 | z = torch.from_numpy(np.transpose(z_crop, (2, 0, 1))).float().unsqueeze(0)
48 | net.template(z.cuda())
49 |
50 | if cfg.track.windowing == 'cosine':
51 | window = np.outer(np.hanning(cfg.track.response_size), np.hanning(cfg.track.response_size))
52 | elif cfg.track.windowing == 'uniform':
53 | window = np.ones((cfg.track.response_size, cfg.track.response_size))
54 | window = window.flatten()
55 |
56 | state['net'] = net
57 | state['avg_chans'] = avg_chans
58 | state['window'] = window
59 | state['target_pos'] = target_pos
60 | state['target_sz'] = target_sz
61 | state['template_bbox'] = template_bbox
62 | return state
63 |
64 |
65 | def CGACD_track(state, im):
66 | net = state['net']
67 | avg_chans = state['avg_chans']
68 | window = state['window']
69 | target_pos = state['target_pos']
70 | target_sz = state['target_sz']
71 | template_bbox = state['template_bbox']
72 | wc_z = target_sz[1] + cfg.track.contex_amount * sum(target_sz)
73 | hc_z = target_sz[0] + cfg.track.contex_amount * sum(target_sz)
74 | s_z = np.sqrt(wc_z * hc_z)
75 | scale_z = cfg.track.template_size / s_z
76 | s_x = s_z * (cfg.track.search_size / cfg.track.template_size)
77 |
78 | # extract scaled crops for search region x at previous target position
79 | x_crop = get_subwindow_tracking(im, target_pos, cfg.track.search_size, round(s_x), avg_chans)
80 |
81 | x_crop = torch.from_numpy(np.transpose(x_crop, (2, 0, 1))).float().unsqueeze(0)
82 |
83 | target_pos, target_sz, best_score = tracker_eval(net, x_crop.cuda(), target_pos, template_bbox, target_sz * scale_z, window, scale_z)
84 | target_pos[0] = max(0, min(state['im_w'], target_pos[0]))
85 | target_pos[1] = max(0, min(state['im_h'], target_pos[1]))
86 | target_sz[0] = max(10, min(state['im_w'], target_sz[0]))
87 | target_sz[1] = max(10, min(state['im_h'], target_sz[1]))
88 | state['target_pos'] = target_pos
89 | state['target_sz'] = target_sz
90 | state['best_score'] = best_score
91 | return state
92 |
--------------------------------------------------------------------------------
/training_dataset/got10k/gen_json_clean.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join, isdir
7 | from os import listdir
8 | import json
9 | import numpy as np
10 | import glob
11 | import cv2
12 | from pathlib import Path
13 |
14 | base_path = '/home/feiji/Research/Data/GOT-10k'
15 | sub_sets= sorted({'train', 'val'})
16 |
17 | def check_neg(bbox):
18 | x1, y1, x2, y2 = bbox
19 | w, h = x2 - x1, y2 -y1
20 | if w <= 0 or h <= 0:
21 | return False
22 | return True
23 |
24 | def check_size(frame_sz, bbox):
25 | #min_ratio = 0.1
26 | max_ratio = 0.75
27 | # only accept objects >10% and <75% of the total frame
28 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
29 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio)
30 | return ok
31 |
32 |
33 | def check_borders(frame_sz, bbox):
34 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
35 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
36 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
37 | ((frame_sz[1] - bbox[3]) > dist_from_border)
38 | return ok
39 |
40 | def isinvalid(name):
41 | allname = 'GOT-10k_Train_008628' + 'GOT-10k_Train_008630' + 'GOT-10k_Train_009058' + \
42 | 'GOT-10k_Train_009059' + 'GOT-10k_Train_008633' + 'GOT-10k_Train_008632' + \
43 | 'GOT-10k_Train_008625' + 'GOT-10k_Train_008623' + 'GOT-10k_Train_008637' + \
44 | 'GOT-10k_Train_008627' + 'GOT-10k_Train_008629' + 'GOT-10k_Train_008634' + \
45 | 'GOT-10k_Train_008626' + 'GOT-10k_Train_005996' + 'GOT-10k_Train_004419'
46 |
47 | if allname.find(name) != -1:
48 | return True
49 | return False
50 |
51 |
52 | snippets = dict()
53 | n_snippets = 0
54 | n_videos = 0
55 | for subset in sub_sets:
56 | sub_set_base_path = join(base_path, subset)
57 | videos = sorted(listdir(sub_set_base_path))
58 | for video in videos:
59 | if not isdir(join(sub_set_base_path, video)):
60 | continue
61 | if isinvalid(video):
62 | continue
63 | n_videos += 1
64 | ground_truth_file = join(sub_set_base_path, video, 'groundtruth.txt')
65 | full_occlusion_file = join(sub_set_base_path, video, 'absence.label')
66 | #cover = join(sub_set_base_path, video, 'cover.label')
67 | gt = np.genfromtxt(ground_truth_file, delimiter=',', dtype=float).astype(np.int)
68 | fo = np.genfromtxt(full_occlusion_file, dtype=int)
69 | subdir_paths = sorted(glob.glob(join(sub_set_base_path, video, '*.jpg')))
70 | snippets[join(subset, video)] = dict()
71 | snippet = dict()
72 | track_id = 0
73 | valid_num = 0
74 | img = cv2.imread(subdir_paths[0])
75 | frame_sz = [img.shape[1], img.shape[0]]
76 | for i, img in enumerate(subdir_paths):
77 | info = {}
78 | filename = Path(img).stem
79 | bbox = gt[i]
80 | fo_i = fo[i]
81 | bbox = [int(bbox[0]), int(bbox[1]), int(bbox[0]+bbox[2]), int(bbox[1]+bbox[3])]
82 | info['valid'] = 0
83 | if (not fo_i) and check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox):
84 | info['valid'] = 1
85 | valid_num += 1
86 | info['bbox'] = bbox
87 | snippet['{:06d}'.format(int(filename))] = info
88 | #snippet['track_category'] = 0
89 | if valid_num > 1:
90 | snippets[join(subset, video)]['{:02d}'.format(track_id)] = snippet
91 | n_snippets += 1
92 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets))
93 |
94 | json.dump(snippets, open('train_largeclean.json', 'w'), indent=4, sort_keys=True)
95 | print('done!')
96 |
--------------------------------------------------------------------------------
/toolkit/datasets/lasot.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 | class LaSOTVideo(Video):
12 | """
13 | Args:
14 | name: video name
15 | root: dataset root
16 | video_dir: video directory
17 | init_rect: init rectangle
18 | img_names: image names
19 | gt_rect: groundtruth rectangle
20 | attr: attribute of video
21 | """
22 | def __init__(self, name, root, video_dir, init_rect, img_names,
23 | gt_rect, attr, absent, load_img=False):
24 | super(LaSOTVideo, self).__init__(name, root, video_dir,
25 | init_rect, img_names, gt_rect, attr, load_img)
26 | self.absent = np.array(absent, np.int8)
27 |
28 | def load_tracker(self, path, tracker_names=None, store=True):
29 | """
30 | Args:
31 | path(str): path to result
32 | tracker_name(list): name of tracker
33 | """
34 | if not tracker_names:
35 | tracker_names = [x.split('/')[-1] for x in glob(path)
36 | if os.path.isdir(x)]
37 | if isinstance(tracker_names, str):
38 | tracker_names = [tracker_names]
39 | for name in tracker_names:
40 | traj_file = os.path.join(path, name, self.name+'.txt')
41 | if os.path.exists(traj_file):
42 | with open(traj_file, 'r') as f :
43 | pred_traj = [list(map(float, x.strip().split(',')))
44 | for x in f.readlines()]
45 | else:
46 | print("File not exists: ", traj_file)
47 | if self.name == 'monkey-17':
48 | pred_traj = pred_traj[:len(self.gt_traj)]
49 | if store:
50 | self.pred_trajs[name] = pred_traj
51 | else:
52 | return pred_traj
53 | self.tracker_names = list(self.pred_trajs.keys())
54 |
55 |
56 |
57 | class LaSOTDataset(Dataset):
58 | """
59 | Args:
60 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
61 | dataset_root: dataset root
62 | load_img: wether to load all imgs
63 | """
64 | def __init__(self, name, dataset_root, load_img=False):
65 | super(LaSOTDataset, self).__init__(name, dataset_root)
66 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
67 | meta_data = json.load(f)
68 |
69 | # load videos
70 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
71 | self.videos = {}
72 | for video in pbar:
73 | pbar.set_postfix_str(video)
74 | for i, nm in enumerate(meta_data[video]['img_names']):
75 | for_name = nm.split('-')[0]
76 | meta_data[video]['img_names'][i] = os.path.join(for_name, nm)
77 | self.videos[video] = LaSOTVideo(video,
78 | dataset_root,
79 | meta_data[video]['video_dir'],
80 | meta_data[video]['init_rect'],
81 | meta_data[video]['img_names'],
82 | meta_data[video]['gt_rect'],
83 | meta_data[video]['attr'],
84 | meta_data[video]['absent'])
85 |
86 | # set attr
87 | attr = []
88 | for x in self.videos.values():
89 | attr += x.attr
90 | attr = set(attr)
91 | self.attr = {}
92 | self.attr['ALL'] = list(self.videos.keys())
93 | for x in attr:
94 | self.attr[x] = []
95 | for k, v in self.videos.items():
96 | for attr_ in v.attr:
97 | self.attr[attr_].append(k)
98 |
99 |
100 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/README.md:
--------------------------------------------------------------------------------
1 | # PreciseRoIPooling
2 | This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation).
3 |
4 | **Acquisition of Localization Confidence for Accurate Object Detection**
5 |
6 | _Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.)
7 |
8 | https://arxiv.org/abs/1807.11590
9 |
10 | ## Brief
11 |
12 | In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is:
13 |
14 | - different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates.
15 | - different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous.
16 |
17 | For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper.
18 |
19 |
20 |
21 | ## Implementation
22 |
23 | PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome.
24 |
25 | ## Usage (PyTorch 1.0)
26 |
27 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented).
28 | Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do:
29 |
30 | ```
31 | from prroi_pool import PrRoIPool2D
32 |
33 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
34 | roi_features = avg_pool(features, rois)
35 |
36 | # for those who want to use the "functional"
37 |
38 | from prroi_pool.functional import prroi_pool2d
39 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
40 | ```
41 |
42 |
43 | ## Usage (PyTorch 0.4)
44 |
45 | **!!! Please first checkout to the branch pytorch0.4.**
46 |
47 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented).
48 | To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do:
49 |
50 | ```
51 | from prroi_pool import PrRoIPool2D
52 |
53 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
54 | roi_features = avg_pool(features, rois)
55 |
56 | # for those who want to use the "functional"
57 |
58 | from prroi_pool.functional import prroi_pool2d
59 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
60 | ```
61 |
62 | Here,
63 |
64 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor.
65 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`.
66 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`.
67 |
--------------------------------------------------------------------------------
/toolkit/utils/src/region.h:
--------------------------------------------------------------------------------
1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */
2 |
3 | #ifndef _REGION_H_
4 | #define _REGION_H_
5 |
6 | #ifdef TRAX_STATIC_DEFINE
7 | # define __TRAX_EXPORT
8 | #else
9 | # ifndef __TRAX_EXPORT
10 | # if defined(_MSC_VER)
11 | # ifdef trax_EXPORTS
12 | /* We are building this library */
13 | # define __TRAX_EXPORT __declspec(dllexport)
14 | # else
15 | /* We are using this library */
16 | # define __TRAX_EXPORT __declspec(dllimport)
17 | # endif
18 | # elif defined(__GNUC__)
19 | # ifdef trax_EXPORTS
20 | /* We are building this library */
21 | # define __TRAX_EXPORT __attribute__((visibility("default")))
22 | # else
23 | /* We are using this library */
24 | # define __TRAX_EXPORT __attribute__((visibility("default")))
25 | # endif
26 | # endif
27 | # endif
28 | #endif
29 |
30 | #ifndef MAX
31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b))
32 | #endif
33 |
34 | #ifndef MIN
35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b))
36 | #endif
37 |
38 | #define TRAX_DEFAULT_CODE 0
39 |
40 | #define REGION_LEGACY_RASTERIZATION 1
41 |
42 | #ifdef __cplusplus
43 | extern "C" {
44 | #endif
45 |
46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type;
47 |
48 | typedef struct region_bounds {
49 |
50 | float top;
51 | float bottom;
52 | float left;
53 | float right;
54 |
55 | } region_bounds;
56 |
57 | typedef struct region_polygon {
58 |
59 | int count;
60 |
61 | float* x;
62 | float* y;
63 |
64 | } region_polygon;
65 |
66 | typedef struct region_mask {
67 |
68 | int x;
69 | int y;
70 |
71 | int width;
72 | int height;
73 |
74 | char* data;
75 |
76 | } region_mask;
77 |
78 | typedef struct region_rectangle {
79 |
80 | float x;
81 | float y;
82 | float width;
83 | float height;
84 |
85 | } region_rectangle;
86 |
87 | typedef struct region_container {
88 | enum region_type type;
89 | union {
90 | region_rectangle rectangle;
91 | region_polygon polygon;
92 | region_mask mask;
93 | int special;
94 | } data;
95 | } region_container;
96 |
97 | typedef struct region_overlap {
98 |
99 | float overlap;
100 | float only1;
101 | float only2;
102 |
103 | } region_overlap;
104 |
105 | extern const region_bounds region_no_bounds;
106 |
107 | __TRAX_EXPORT int region_set_flags(int mask);
108 |
109 | __TRAX_EXPORT int region_clear_flags(int mask);
110 |
111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds);
112 |
113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds);
114 |
115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom);
116 |
117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region);
118 |
119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region);
120 |
121 | __TRAX_EXPORT char* region_string(region_container* region);
122 |
123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region);
124 |
125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type);
126 |
127 | __TRAX_EXPORT void region_release(region_container** region);
128 |
129 | __TRAX_EXPORT region_container* region_create_special(int code);
130 |
131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height);
132 |
133 | __TRAX_EXPORT region_container* region_create_polygon(int count);
134 |
135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y);
136 |
137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height);
138 |
139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height);
140 |
141 | #ifdef __cplusplus
142 | }
143 | #endif
144 |
145 | #endif
146 |
--------------------------------------------------------------------------------
/training_dataset/coco/pycocotools/common/gason.h:
--------------------------------------------------------------------------------
1 | // https://github.com/vivkin/gason - pulled January 10, 2016
2 | #pragma once
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | enum JsonTag {
9 | JSON_NUMBER = 0,
10 | JSON_STRING,
11 | JSON_ARRAY,
12 | JSON_OBJECT,
13 | JSON_TRUE,
14 | JSON_FALSE,
15 | JSON_NULL = 0xF
16 | };
17 |
18 | struct JsonNode;
19 |
20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
22 | #define JSON_VALUE_TAG_MASK 0xF
23 | #define JSON_VALUE_TAG_SHIFT 47
24 |
25 | union JsonValue {
26 | uint64_t ival;
27 | double fval;
28 |
29 | JsonValue(double x)
30 | : fval(x) {
31 | }
32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
35 | }
36 | bool isDouble() const {
37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
38 | }
39 | JsonTag getTag() const {
40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
41 | }
42 | uint64_t getPayload() const {
43 | assert(!isDouble());
44 | return ival & JSON_VALUE_PAYLOAD_MASK;
45 | }
46 | double toNumber() const {
47 | assert(getTag() == JSON_NUMBER);
48 | return fval;
49 | }
50 | char *toString() const {
51 | assert(getTag() == JSON_STRING);
52 | return (char *)getPayload();
53 | }
54 | JsonNode *toNode() const {
55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
56 | return (JsonNode *)getPayload();
57 | }
58 | };
59 |
60 | struct JsonNode {
61 | JsonValue value;
62 | JsonNode *next;
63 | char *key;
64 | };
65 |
66 | struct JsonIterator {
67 | JsonNode *p;
68 |
69 | void operator++() {
70 | p = p->next;
71 | }
72 | bool operator!=(const JsonIterator &x) const {
73 | return p != x.p;
74 | }
75 | JsonNode *operator*() const {
76 | return p;
77 | }
78 | JsonNode *operator->() const {
79 | return p;
80 | }
81 | };
82 |
83 | inline JsonIterator begin(JsonValue o) {
84 | return JsonIterator{o.toNode()};
85 | }
86 | inline JsonIterator end(JsonValue) {
87 | return JsonIterator{nullptr};
88 | }
89 |
90 | #define JSON_ERRNO_MAP(XX) \
91 | XX(OK, "ok") \
92 | XX(BAD_NUMBER, "bad number") \
93 | XX(BAD_STRING, "bad string") \
94 | XX(BAD_IDENTIFIER, "bad identifier") \
95 | XX(STACK_OVERFLOW, "stack overflow") \
96 | XX(STACK_UNDERFLOW, "stack underflow") \
97 | XX(MISMATCH_BRACKET, "mismatch bracket") \
98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \
99 | XX(UNQUOTED_KEY, "unquoted key") \
100 | XX(BREAKING_BAD, "breaking bad") \
101 | XX(ALLOCATION_FAILURE, "allocation failure")
102 |
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 | JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 |
109 | const char *jsonStrError(int err);
110 |
111 | class JsonAllocator {
112 | struct Zone {
113 | Zone *next;
114 | size_t used;
115 | } *head = nullptr;
116 |
117 | public:
118 | JsonAllocator() = default;
119 | JsonAllocator(const JsonAllocator &) = delete;
120 | JsonAllocator &operator=(const JsonAllocator &) = delete;
121 | JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 | x.head = nullptr;
123 | }
124 | JsonAllocator &operator=(JsonAllocator &&x) {
125 | head = x.head;
126 | x.head = nullptr;
127 | return *this;
128 | }
129 | ~JsonAllocator() {
130 | deallocate();
131 | }
132 | void *allocate(size_t size);
133 | void deallocate();
134 | };
135 |
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 |
--------------------------------------------------------------------------------
/training_dataset/vid/gen_json_clean.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join
7 | from os import listdir
8 | import json
9 | import numpy as np
10 |
11 | print('load json (raw vid info), please wait 20 seconds~')
12 | vid = json.load(open('vid.json', 'r'))
13 |
14 | def check_neg(bbox):
15 | x1, y1, x2, y2 = bbox
16 | w, h = x2 - x1, y2 -y1
17 | if w <= 0 or h <= 0:
18 | return False
19 | return True
20 |
21 | def check_size(frame_sz, bbox):
22 | #min_ratio = 0.1
23 | max_ratio = 0.75
24 | # only accept objects >10% and <75% of the total frame
25 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
26 | ok = (area_ratio < max_ratio) # and (area_ratio > min_ratio)
27 | return ok
28 |
29 |
30 | def check_borders(frame_sz, bbox):
31 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
32 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
33 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
34 | ((frame_sz[1] - bbox[3]) > dist_from_border)
35 | return ok
36 |
37 |
38 | snippets = dict()
39 | n_snippets = 0
40 | n_videos = 0
41 | for subset in vid:
42 | for video in subset:
43 | n_videos += 1
44 | frames = video['frame']
45 | id_set = []
46 | id_frames = [[]] * 60 # at most 60 objects
47 | for f, frame in enumerate(frames):
48 | objs = frame['objs']
49 | frame_sz = frame['frame_sz']
50 | for obj in objs:
51 | trackid = obj['trackid']
52 | occluded = obj['occ']
53 | bbox = obj['bbox']
54 | # if occluded:
55 | # continue
56 | #
57 | obj['valid'] = 0
58 | if not(occluded) and check_neg(bbox) and check_size(frame_sz, bbox) and check_borders(frame_sz, bbox):
59 | obj['valid'] = 1
60 | #
61 | # if obj['c'] in ['n01674464', 'n01726692', 'n04468005', 'n02062744']:
62 | # continue
63 |
64 | if trackid not in id_set:
65 | id_set.append(trackid)
66 | id_frames[trackid] = []
67 | id_frames[trackid].append(f)
68 | if len(id_set) > 0:
69 | snippets[video['base_path']] = dict()
70 | for selected in id_set:
71 | frame_ids = sorted(id_frames[selected])
72 | sequences = np.split(frame_ids, np.array(np.where(np.diff(frame_ids) > 1)[0]) + 1)
73 | sequences = [s for s in sequences if len(s) > 1] # remove isolated frame.
74 | for seq in sequences:
75 | snippet = dict()
76 | valid_num = 0
77 | for frame_id in seq:
78 | info = {}
79 | frame = frames[frame_id]
80 | for obj in frame['objs']:
81 | if obj['trackid'] == selected:
82 | o = obj
83 | continue
84 | info['bbox'] = o['bbox']
85 | info['valid'] = o['valid']
86 | if o['valid'] == 1:
87 | valid_num+=1
88 | snippet[frame['img_path'].split('.')[0]] = info
89 | #snippet['track_category'] = o['c']
90 | if valid_num > 0:
91 | snippets[video['base_path']]['{:02d}'.format(selected)] = snippet
92 | n_snippets += 1
93 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets))
94 |
95 | train = {k:v for (k,v) in snippets.items() if 'train' in k}
96 | val = {k:v for (k,v) in snippets.items() if 'val' in k}
97 |
98 | json.dump(train, open('train_largeclean.json', 'w'), indent=4, sort_keys=True)
99 | json.dump(val, open('val_largeclean.json', 'w'), indent=4, sort_keys=True)
100 | print('done!')
101 |
--------------------------------------------------------------------------------
/models/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu.c
3 | * Author : Jiayuan Mao, Tete Xiao
4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
5 | * Date : 07/13/2018
6 | *
7 | * Distributed under terms of the MIT license.
8 | * Copyright (c) 2017 Megvii Technology Limited.
9 | */
10 |
11 | #include
12 | #include
13 |
14 | #include
15 | #include
16 |
17 | #include
18 |
19 | #include "prroi_pooling_gpu_impl.cuh"
20 |
21 |
22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) {
23 | int nr_rois = rois.size(0);
24 | int nr_channels = features.size(1);
25 | int height = features.size(2);
26 | int width = features.size(3);
27 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
28 | auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options());
29 |
30 | if (output.numel() == 0) {
31 | THCudaCheck(cudaGetLastError());
32 | return output;
33 | }
34 |
35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
36 | PrRoIPoolingForwardGpu(
37 | stream, features.data(), rois.data(), output.data(),
38 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
39 | top_count
40 | );
41 |
42 | THCudaCheck(cudaGetLastError());
43 | return output;
44 | }
45 |
46 | at::Tensor prroi_pooling_backward_cuda(
47 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
48 | int pooled_height, int pooled_width, float spatial_scale) {
49 |
50 | auto features_diff = at::zeros_like(features);
51 |
52 | int nr_rois = rois.size(0);
53 | int batch_size = features.size(0);
54 | int nr_channels = features.size(1);
55 | int height = features.size(2);
56 | int width = features.size(3);
57 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
58 | int bottom_count = batch_size * nr_channels * height * width;
59 |
60 | if (output.numel() == 0) {
61 | THCudaCheck(cudaGetLastError());
62 | return features_diff;
63 | }
64 |
65 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
66 | PrRoIPoolingBackwardGpu(
67 | stream,
68 | features.data(), rois.data(), output.data(), output_diff.data(),
69 | features_diff.data(),
70 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
71 | top_count, bottom_count
72 | );
73 |
74 | THCudaCheck(cudaGetLastError());
75 | return features_diff;
76 | }
77 |
78 | at::Tensor prroi_pooling_coor_backward_cuda(
79 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
80 | int pooled_height, int pooled_width, float spatial_scale) {
81 |
82 | auto coor_diff = at::zeros_like(rois);
83 |
84 | int nr_rois = rois.size(0);
85 | int nr_channels = features.size(1);
86 | int height = features.size(2);
87 | int width = features.size(3);
88 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
89 | int bottom_count = nr_rois * 5;
90 |
91 | if (output.numel() == 0) {
92 | THCudaCheck(cudaGetLastError());
93 | return coor_diff;
94 | }
95 |
96 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
97 | PrRoIPoolingCoorBackwardGpu(
98 | stream,
99 | features.data(), rois.data(), output.data(), output_diff.data(),
100 | coor_diff.data(),
101 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
102 | top_count, bottom_count
103 | );
104 |
105 | THCudaCheck(cudaGetLastError());
106 | return coor_diff;
107 | }
108 |
109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
110 | m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward");
111 | m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward");
112 | m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor");
113 | }
114 |
--------------------------------------------------------------------------------
/models/siamese/siamese.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) SenseTime. All Rights Reserved.
2 |
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 |
7 |
8 | def xcorr_depthwise(x, kernel):
9 | """depthwise cross correlation
10 | """
11 | batch = kernel.size(0)
12 | channel = kernel.size(1)
13 | x = x.view(1, batch*channel, x.size(2), x.size(3))
14 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3))
15 | out = F.conv2d(x, kernel, groups=batch*channel)
16 | out = out.view(batch, channel, out.size(2), out.size(3))
17 | return out
18 |
19 |
20 | def xcorr_up(x, kernel):
21 | batch_sz = kernel.shape[0]
22 | kernel = kernel.reshape(-1, x.shape[1],
23 | kernel.shape[2], kernel.shape[3])
24 | out = F.conv2d(
25 | x.reshape(1, -1, x.shape[2], x.shape[3]), kernel, groups=batch_sz)
26 | out = out.reshape(batch_sz, -1, out.shape[2], out.shape[3])
27 | return out
28 |
29 |
30 | class UPXCorr(nn.Module):
31 | def __init__(self, out_channels, adjust, feat_in=256, feat_out=256):
32 | super(UPXCorr, self).__init__()
33 | self.conv_kernel = nn.Conv2d(feat_in, feat_out * out_channels, 3)
34 | self.conv_search = nn.Conv2d(feat_in, feat_out, 3)
35 | if adjust:
36 | self.adjust = nn.Conv2d(out_channels, out_channels, 1)
37 | else:
38 | self.adjust = lambda x: x
39 |
40 | def forward(self, z_f, x_f):
41 | kernel = self.conv_kernel(z_f)
42 | search = self.conv_search(x_f)
43 | out = xcorr_up(search, kernel)
44 | return self.adjust(out)
45 |
46 |
47 | class DepthwiseXCorr(nn.Module):
48 | def __init__(self, feat_in=256, feat_out=256, out_channels=1, kernel_size=3):
49 | super(DepthwiseXCorr, self).__init__()
50 | self.conv_kernel = nn.Sequential(
51 | nn.Conv2d(feat_in, feat_out, kernel_size=kernel_size, bias=False),
52 | nn.BatchNorm2d(feat_out),
53 | nn.ReLU(inplace=True),
54 | )
55 | self.conv_search = nn.Sequential(
56 | nn.Conv2d(feat_in, feat_out, kernel_size=kernel_size, bias=False),
57 | nn.BatchNorm2d(feat_out),
58 | nn.ReLU(inplace=True),
59 | )
60 | self.head = nn.Sequential(
61 | nn.Conv2d(feat_out, feat_out, kernel_size=1, bias=False),
62 | nn.BatchNorm2d(feat_out),
63 | nn.ReLU(inplace=True),
64 | nn.Conv2d(feat_out, out_channels, kernel_size=1)
65 | )
66 | self.kernel = None
67 |
68 | def forward(self, kernel, search):
69 | kernel = self.conv_kernel(kernel)
70 | search = self.conv_search(search)
71 | feature = xcorr_depthwise(search, kernel)
72 | out = self.head(feature)
73 | return out
74 |
75 |
76 | class Siamese(nn.Module):
77 | def __init__(self):
78 | super(Siamese, self).__init__()
79 |
80 | def forward(self, z_f, x_f):
81 | raise NotImplementedError
82 |
83 |
84 | def normal_init(m, mean, stddev):
85 | m.weight.data.normal_(mean, stddev)
86 | m.bias.data.zero_()
87 |
88 |
89 | class UPChannelSiamese(Siamese):
90 | def __init__(self, feat_in=256, feature_out=256):
91 | super(UPChannelSiamese, self).__init__()
92 | self.cls = UPXCorr(1, False, feat_in, feature_out)
93 | self.loc = UPXCorr(4, True, feat_in, feature_out)
94 | for m in self.modules():
95 | if isinstance(m, nn.Conv2d):
96 | normal_init(m, 0, 0.001)
97 |
98 | def forward(self, z_f, x_f):
99 | loc = self.loc(z_f[:,:,4:-4,4:-4], x_f)
100 | cls = self.cls(z_f[:,:,4:-4,4:-4], x_f)
101 | return loc, cls
102 |
103 |
104 | class DepthwiseSiamese(Siamese):
105 | def __init__(self, feat_in=256, feature_out=256):
106 | super(DepthwiseSiamese, self).__init__()
107 | self.cls = DepthwiseXCorr(feat_in, feature_out, 1)
108 | self.loc = DepthwiseXCorr(feat_in, feature_out, 4)
109 |
110 | for m in self.modules():
111 | if isinstance(m, nn.Conv2d):
112 | nn.init.kaiming_normal_(
113 | m.weight.data, mode='fan_out', nonlinearity='relu')
114 | elif isinstance(m, nn.BatchNorm2d):
115 | m.weight.data.fill_(1.0)
116 | m.bias.data.zero_()
117 |
118 | def forward(self, z_f, x_f):
119 | cls = self.cls(z_f, x_f)
120 | loc = self.loc(z_f, x_f)
121 | return loc, cls
--------------------------------------------------------------------------------
/models/cornerdet/cornerdet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import numpy as np
5 |
6 |
7 | class CornerDet(nn.Module):
8 | def __init__(self):
9 | super(CornerDet, self).__init__()
10 |
11 | def forward(self, x_f):
12 | raise NotImplementedError
13 |
14 |
15 | class SepCornerDet(CornerDet):
16 | def __init__(self, feat_in=256):
17 | super(SepCornerDet, self).__init__()
18 |
19 | self.up0_l = nn.Sequential(
20 | nn.Conv2d(feat_in, 256, 3, padding=1),
21 | nn.BatchNorm2d(256),
22 | nn.ReLU(inplace=True),
23 | nn.Conv2d(256, 64, 1),
24 | nn.BatchNorm2d(64),
25 | nn.ReLU(inplace=True),
26 | )
27 |
28 | self.up1_l = nn.Sequential(
29 | nn.Conv2d(64, 64, 3, padding=1),
30 | nn.BatchNorm2d(64),
31 | nn.ReLU(inplace=True),
32 | nn.Conv2d(64, 32, 1),
33 | nn.BatchNorm2d(32),
34 | nn.ReLU(inplace=True),
35 | )
36 |
37 | self.up2_l = nn.Sequential(
38 | nn.Conv2d(32, 32, 3, padding=1),
39 | nn.BatchNorm2d(32),
40 | nn.ReLU(inplace=True),
41 | nn.Conv2d(32, 1, 1),
42 | )
43 |
44 | self.up0_r = nn.Sequential(
45 | nn.Conv2d(feat_in, 256, 3, padding=1),
46 | nn.BatchNorm2d(256),
47 | nn.ReLU(inplace=True),
48 | nn.Conv2d(256, 64, 1),
49 | nn.BatchNorm2d(64),
50 | nn.ReLU(inplace=True),
51 | )
52 |
53 | self.up1_r = nn.Sequential(
54 | nn.Conv2d(64, 64, 3, padding=1),
55 | nn.BatchNorm2d(64),
56 | nn.ReLU(inplace=True),
57 | nn.Conv2d(64, 32, 1),
58 | nn.BatchNorm2d(32),
59 | nn.ReLU(inplace=True),
60 | )
61 |
62 | self.up2_r = nn.Sequential(
63 | nn.Conv2d(32, 32, 3, padding=1),
64 | nn.BatchNorm2d(32),
65 | nn.ReLU(inplace=True),
66 | nn.Conv2d(32, 1, 1),
67 | )
68 |
69 | for m in self.modules():
70 | if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d, nn.Linear)):
71 | nn.init.kaiming_normal_(
72 | m.weight.data, mode='fan_out', nonlinearity='relu')
73 | elif isinstance(m, nn.BatchNorm2d):
74 | m.weight.data.fill_(1.0)
75 | m.bias.data.zero_()
76 |
77 | def forward(self, x_f):
78 | x_f_l, x_f_r = x_f
79 | x_f_l = self.up0_l(x_f_l)
80 | resolution = x_f_l.shape[-1]
81 | x_f_l = self.up1_l(F.interpolate(
82 | x_f_l, size=(resolution*2+1, resolution*2+1)))
83 | resolution = x_f_l.shape[-1]
84 | heat_map_l = self.up2_l(F.interpolate(
85 | x_f_l, size=(resolution*2+1, resolution*2+1)))
86 |
87 | batch_sz = x_f_l.shape[0]
88 | left_top_map = F.softmax(heat_map_l.squeeze().reshape(batch_sz, -1), 1).reshape(
89 | batch_sz, heat_map_l.shape[-2], heat_map_l.shape[-1])
90 |
91 | x_f_r = self.up0_r(x_f_r)
92 | resolution = x_f_r.shape[-1]
93 | x_f_r = self.up1_r(F.interpolate(
94 | x_f_r, size=(resolution*2+1, resolution*2+1)))
95 | resolution = x_f_r.shape[-1]
96 | heat_map_r = self.up2_r(F.interpolate(
97 | x_f_r, size=(resolution*2+1, resolution*2+1)))
98 | batch_sz = x_f_r.shape[0]
99 | right_bottom_map = F.softmax(heat_map_r.squeeze().reshape(batch_sz, -1), 1).reshape(
100 | batch_sz, heat_map_r.shape[-2], heat_map_r.shape[-1])
101 |
102 | heatmap_size = left_top_map.shape[-1]
103 | xx, yy = np.meshgrid([dx for dx in range(int(heatmap_size))],
104 | [dy for dy in range(int(heatmap_size))])
105 | heatmap_xx = torch.from_numpy(xx).float().cuda()
106 | heatmap_yy = torch.from_numpy(yy).float().cuda()
107 |
108 | x1 = ((left_top_map * heatmap_xx).sum(-1).sum(-1) /
109 | heatmap_xx.shape[-1]).reshape(-1, 1)
110 | y1 = ((left_top_map * heatmap_yy).sum(-1).sum(-1) /
111 | heatmap_xx.shape[-2]).reshape(-1, 1)
112 | x2 = ((right_bottom_map * heatmap_xx).sum(-1).sum(-1) /
113 | heatmap_xx.shape[-1]).reshape(-1, 1)
114 | y2 = ((right_bottom_map * heatmap_yy).sum(-1).sum(-1) /
115 | heatmap_xx.shape[-2]).reshape(-1, 1)
116 |
117 | result_target = torch.cat((x1, y1, x2, y2), 1)
118 |
119 | return result_target, left_top_map.shape[-1]
--------------------------------------------------------------------------------
/toolkit/datasets/otb.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | class OTBVideo(Video):
14 | """
15 | Args:
16 | name: video name
17 | root: dataset root
18 | video_dir: video directory
19 | init_rect: init rectangle
20 | img_names: image names
21 | gt_rect: groundtruth rectangle
22 | attr: attribute of video
23 | """
24 | def __init__(self, name, root, video_dir, init_rect, img_names,
25 | gt_rect, attr, load_img=False):
26 | super(OTBVideo, self).__init__(name, root, video_dir,
27 | init_rect, img_names, gt_rect, attr, load_img)
28 |
29 | def load_tracker(self, path, tracker_names=None, store=True):
30 | """
31 | Args:
32 | path(str): path to result
33 | tracker_name(list): name of tracker
34 | """
35 | if not tracker_names:
36 | tracker_names = [x.split('/')[-1] for x in glob(path)
37 | if os.path.isdir(x)]
38 | if isinstance(tracker_names, str):
39 | tracker_names = [tracker_names]
40 | for name in tracker_names:
41 | traj_file = os.path.join(path, name, self.name+'.txt')
42 | if not os.path.exists(traj_file):
43 | if self.name == 'FleetFace':
44 | txt_name = 'fleetface.txt'
45 | elif self.name == 'Jogging-1':
46 | txt_name = 'jogging_1.txt'
47 | elif self.name == 'Jogging-2':
48 | txt_name = 'jogging_2.txt'
49 | elif self.name == 'Skating2-1':
50 | txt_name = 'skating2_1.txt'
51 | elif self.name == 'Skating2-2':
52 | txt_name = 'skating2_2.txt'
53 | elif self.name == 'FaceOcc1':
54 | txt_name = 'faceocc1.txt'
55 | elif self.name == 'FaceOcc2':
56 | txt_name = 'faceocc2.txt'
57 | elif self.name == 'Human4-2':
58 | txt_name = 'human4_2.txt'
59 | else:
60 | txt_name = self.name[0].lower()+self.name[1:]+'.txt'
61 | traj_file = os.path.join(path, name, txt_name)
62 | if os.path.exists(traj_file):
63 | with open(traj_file, 'r') as f :
64 | pred_traj = [list(map(float, x.strip().split(',')))
65 | for x in f.readlines()]
66 | if len(pred_traj) != len(self.gt_traj):
67 | print(name, len(pred_traj), len(self.gt_traj), self.name)
68 | if store:
69 | self.pred_trajs[name] = pred_traj
70 | else:
71 | return pred_traj
72 | else:
73 | print(traj_file)
74 | self.tracker_names = list(self.pred_trajs.keys())
75 |
76 |
77 |
78 | class OTBDataset(Dataset):
79 | """
80 | Args:
81 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
82 | dataset_root: dataset root
83 | load_img: wether to load all imgs
84 | """
85 | def __init__(self, name, dataset_root, load_img=False):
86 | super(OTBDataset, self).__init__(name, dataset_root)
87 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
88 | meta_data = json.load(f)
89 |
90 | # load videos
91 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
92 | self.videos = {}
93 | for video in pbar:
94 | pbar.set_postfix_str(video)
95 | self.videos[video] = OTBVideo(video,
96 | dataset_root,
97 | meta_data[video]['video_dir'],
98 | meta_data[video]['init_rect'],
99 | meta_data[video]['img_names'],
100 | meta_data[video]['gt_rect'],
101 | meta_data[video]['attr'],
102 | load_img)
103 |
104 | # set attr
105 | attr = []
106 | for x in self.videos.values():
107 | attr += x.attr
108 | attr = set(attr)
109 | self.attr = {}
110 | self.attr['ALL'] = list(self.videos.keys())
111 | for x in attr:
112 | self.attr[x] = []
113 | for k, v in self.videos.items():
114 | for attr_ in v.attr:
115 | self.attr[attr_].append(k)
116 |
--------------------------------------------------------------------------------
/training_dataset/coco/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | #import pycocotools._mask as _mask
4 | from . import _mask
5 |
6 | # Interface for manipulating masks stored in RLE format.
7 | #
8 | # RLE is a simple yet efficient format for storing binary masks. RLE
9 | # first divides a vector (or vectorized image) into a series of piecewise
10 | # constant regions and then for each piece simply stores the length of
11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
13 | # (note that the odd counts are always the numbers of zeros). Instead of
14 | # storing the counts directly, additional compression is achieved with a
15 | # variable bitrate representation based on a common scheme called LEB128.
16 | #
17 | # Compression is greatest given large piecewise constant regions.
18 | # Specifically, the size of the RLE is proportional to the number of
19 | # *boundaries* in M (or for an image the number of boundaries in the y
20 | # direction). Assuming fairly simple shapes, the RLE representation is
21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
22 | # is substantially lower, especially for large simple objects (large n).
23 | #
24 | # Many common operations on masks can be computed directly using the RLE
25 | # (without need for decoding). This includes computations such as area,
26 | # union, intersection, etc. All of these operations are linear in the
27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
28 | # of the object. Computing these operations on the original mask is O(n).
29 | # Thus, using the RLE can result in substantial computational savings.
30 | #
31 | # The following API functions are defined:
32 | # encode - Encode binary masks using RLE.
33 | # decode - Decode binary masks encoded via RLE.
34 | # merge - Compute union or intersection of encoded masks.
35 | # iou - Compute intersection over union between masks.
36 | # area - Compute area of encoded masks.
37 | # toBbox - Get bounding boxes surrounding encoded masks.
38 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
39 | #
40 | # Usage:
41 | # Rs = encode( masks )
42 | # masks = decode( Rs )
43 | # R = merge( Rs, intersect=false )
44 | # o = iou( dt, gt, iscrowd )
45 | # a = area( Rs )
46 | # bbs = toBbox( Rs )
47 | # Rs = frPyObjects( [pyObjects], h, w )
48 | #
49 | # In the API the following formats are used:
50 | # Rs - [dict] Run-length encoding of binary masks
51 | # R - dict Run-length encoding of binary mask
52 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
53 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
54 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
55 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
56 | # dt,gt - May be either bounding boxes or encoded masks
57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
58 | #
59 | # Finally, a note about the intersection over union (iou) computation.
60 | # The standard iou of a ground truth (gt) and detected (dt) object is
61 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
62 | # For "crowd" regions, we use a modified criteria. If a gt object is
63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
64 | # Choosing gt' in the crowd gt that best matches the dt can be done using
65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
66 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
67 | # For crowd gt regions we use this modified criteria above for the iou.
68 | #
69 | # To compile run "python setup.py build_ext --inplace"
70 | # Please do not contact us for help with compiling.
71 | #
72 | # Microsoft COCO Toolbox. version 2.0
73 | # Data, paper, and tutorials available at: http://mscoco.org/
74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
75 | # Licensed under the Simplified BSD License [see coco/license.txt]
76 |
77 | iou = _mask.iou
78 | merge = _mask.merge
79 | frPyObjects = _mask.frPyObjects
80 |
81 | def encode(bimask):
82 | if len(bimask.shape) == 3:
83 | return _mask.encode(bimask)
84 | elif len(bimask.shape) == 2:
85 | h, w = bimask.shape
86 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
87 |
88 | def decode(rleObjs):
89 | if type(rleObjs) == list:
90 | return _mask.decode(rleObjs)
91 | else:
92 | return _mask.decode([rleObjs])[:,:,0]
93 |
94 | def area(rleObjs):
95 | if type(rleObjs) == list:
96 | return _mask.area(rleObjs)
97 | else:
98 | return _mask.area([rleObjs])[0]
99 |
100 | def toBbox(rleObjs):
101 | if type(rleObjs) == list:
102 | return _mask.toBbox(rleObjs)
103 | else:
104 | return _mask.toBbox([rleObjs])[0]
105 |
--------------------------------------------------------------------------------
/toolkit/utils/src/buffer.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef __STRING_BUFFER_H
3 | #define __STRING_BUFFER_H
4 |
5 | // Enable MinGW secure API for _snprintf_s
6 | #define MINGW_HAS_SECURE_API 1
7 |
8 | #ifdef _MSC_VER
9 | #define __INLINE __inline
10 | #else
11 | #define __INLINE inline
12 | #endif
13 |
14 | #include
15 | #include
16 | #include
17 |
18 | typedef struct string_buffer {
19 | char* buffer;
20 | int position;
21 | int size;
22 | } string_buffer;
23 |
24 | typedef struct string_list {
25 | char** buffer;
26 | int position;
27 | int size;
28 | } string_list;
29 |
30 | #define BUFFER_INCREMENT_STEP 4096
31 |
32 | static __INLINE string_buffer* buffer_create(int L) {
33 | string_buffer* B = (string_buffer*) malloc(sizeof(string_buffer));
34 | B->size = L;
35 | B->buffer = (char*) malloc(sizeof(char) * B->size);
36 | B->position = 0;
37 | return B;
38 | }
39 |
40 | static __INLINE void buffer_reset(string_buffer* B) {
41 | B->position = 0;
42 | }
43 |
44 | static __INLINE void buffer_destroy(string_buffer** B) {
45 | if (!(*B)) return;
46 | if ((*B)->buffer) {
47 | free((*B)->buffer);
48 | (*B)->buffer = NULL;
49 | }
50 | free((*B));
51 | (*B) = NULL;
52 | }
53 |
54 | static __INLINE char* buffer_extract(const string_buffer* B) {
55 | char *S = (char*) malloc(sizeof(char) * (B->position + 1));
56 | memcpy(S, B->buffer, B->position);
57 | S[B->position] = '\0';
58 | return S;
59 | }
60 |
61 | static __INLINE int buffer_size(const string_buffer* B) {
62 | return B->position;
63 | }
64 |
65 | static __INLINE void buffer_push(string_buffer* B, char C) {
66 | int required = 1;
67 | if (required > B->size - B->position) {
68 | B->size = B->position + BUFFER_INCREMENT_STEP;
69 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
70 | }
71 | B->buffer[B->position] = C;
72 | B->position += required;
73 | }
74 |
75 | static __INLINE void buffer_append(string_buffer* B, const char *format, ...) {
76 |
77 | int required;
78 | va_list args;
79 |
80 | #if defined(__OS2__) || defined(__WINDOWS__) || defined(WIN32) || defined(_MSC_VER)
81 |
82 | va_start(args, format);
83 | required = _vscprintf(format, args) + 1;
84 | va_end(args);
85 | if (required >= B->size - B->position) {
86 | B->size = B->position + required + 1;
87 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
88 | }
89 | va_start(args, format);
90 | required = _vsnprintf_s(&(B->buffer[B->position]), B->size - B->position, _TRUNCATE, format, args);
91 | va_end(args);
92 | B->position += required;
93 |
94 | #else
95 | va_start(args, format);
96 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args);
97 | va_end(args);
98 | if (required >= B->size - B->position) {
99 | B->size = B->position + required + 1;
100 | B->buffer = (char*) realloc(B->buffer, sizeof(char) * B->size);
101 | va_start(args, format);
102 | required = vsnprintf(&(B->buffer[B->position]), B->size - B->position, format, args);
103 | va_end(args);
104 | }
105 | B->position += required;
106 | #endif
107 |
108 | }
109 |
110 | static __INLINE string_list* list_create(int L) {
111 | string_list* B = (string_list*) malloc(sizeof(string_list));
112 | B->size = L;
113 | B->buffer = (char**) malloc(sizeof(char*) * B->size);
114 | memset(B->buffer, 0, sizeof(char*) * B->size);
115 | B->position = 0;
116 | return B;
117 | }
118 |
119 | static __INLINE void list_reset(string_list* B) {
120 | int i;
121 | for (i = 0; i < B->position; i++) {
122 | if (B->buffer[i]) free(B->buffer[i]);
123 | B->buffer[i] = NULL;
124 | }
125 | B->position = 0;
126 | }
127 |
128 | static __INLINE void list_destroy(string_list **B) {
129 | int i;
130 |
131 | if (!(*B)) return;
132 |
133 | for (i = 0; i < (*B)->position; i++) {
134 | if ((*B)->buffer[i]) free((*B)->buffer[i]); (*B)->buffer[i] = NULL;
135 | }
136 |
137 | if ((*B)->buffer) {
138 | free((*B)->buffer); (*B)->buffer = NULL;
139 | }
140 |
141 | free((*B));
142 | (*B) = NULL;
143 | }
144 |
145 | static __INLINE char* list_get(const string_list *B, int I) {
146 | if (I < 0 || I >= B->position) {
147 | return NULL;
148 | } else {
149 | if (!B->buffer[I]) {
150 | return NULL;
151 | } else {
152 | char *S;
153 | int length = strlen(B->buffer[I]);
154 | S = (char*) malloc(sizeof(char) * (length + 1));
155 | memcpy(S, B->buffer[I], length + 1);
156 | return S;
157 | }
158 | }
159 | }
160 |
161 | static __INLINE int list_size(const string_list *B) {
162 | return B->position;
163 | }
164 |
165 | static __INLINE void list_append(string_list *B, char* S) {
166 | int required = 1;
167 | int length = strlen(S);
168 | if (required > B->size - B->position) {
169 | B->size = B->position + 16;
170 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size);
171 | }
172 | B->buffer[B->position] = (char*) malloc(sizeof(char) * (length + 1));
173 | memcpy(B->buffer[B->position], S, length + 1);
174 | B->position += required;
175 | }
176 |
177 | // This version of the append does not copy the string but simply takes the control of its allocation
178 | static __INLINE void list_append_direct(string_list *B, char* S) {
179 | int required = 1;
180 | // int length = strlen(S);
181 | if (required > B->size - B->position) {
182 | B->size = B->position + 16;
183 | B->buffer = (char**) realloc(B->buffer, sizeof(char*) * B->size);
184 | }
185 | B->buffer[B->position] = S;
186 | B->position += required;
187 | }
188 |
189 |
190 | #endif
191 |
--------------------------------------------------------------------------------
/toolkit/visualization/draw_success_precision.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | from .draw_utils import COLOR, LINE_STYLE
5 |
6 | def draw_success_precision(success_ret, name, videos, attr, precision_ret=None,
7 | norm_precision_ret=None, bold_name=None, axis=[0, 1]):
8 | # success plot
9 | fig, ax = plt.subplots()
10 | ax.grid(b=True)
11 | ax.set_aspect(1)
12 | plt.xlabel('Overlap threshold')
13 | plt.ylabel('Success rate')
14 | if attr == 'ALL':
15 | plt.title(r'\textbf{Success plots of OPE on %s}' % (name))
16 | else:
17 | plt.title(r'\textbf{Success plots of OPE - %s}' % (attr))
18 | plt.axis([0, 1]+axis)
19 | success = {}
20 | thresholds = np.arange(0, 1.05, 0.05)
21 | for tracker_name in success_ret.keys():
22 | value = [v for k, v in success_ret[tracker_name].items() if k in videos]
23 | success[tracker_name] = np.mean(value)
24 | for idx, (tracker_name, auc) in \
25 | enumerate(sorted(success.items(), key=lambda x:x[1], reverse=True)):
26 | if tracker_name == bold_name:
27 | label = r"\textbf{[%.3f] %s}" % (auc, tracker_name)
28 | else:
29 | label = "[%.3f] " % (auc) + tracker_name
30 | value = [v for k, v in success_ret[tracker_name].items() if k in videos]
31 | plt.plot(thresholds, np.mean(value, axis=0),
32 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
33 | ax.legend(loc='lower left', labelspacing=0.2)
34 | ax.autoscale(enable=True, axis='both', tight=True)
35 | xmin, xmax, ymin, ymax = plt.axis()
36 | ax.autoscale(enable=False)
37 | ymax += 0.03
38 | plt.axis([xmin, xmax, ymin, ymax])
39 | plt.xticks(np.arange(xmin, xmax+0.01, 0.1))
40 | plt.yticks(np.arange(ymin, ymax, 0.1))
41 | ax.set_aspect((xmax - xmin)/(ymax-ymin))
42 | plt.show()
43 |
44 | if precision_ret:
45 | # norm precision plot
46 | fig, ax = plt.subplots()
47 | ax.grid(b=True)
48 | ax.set_aspect(50)
49 | plt.xlabel('Location error threshold')
50 | plt.ylabel('Precision')
51 | if attr == 'ALL':
52 | plt.title(r'\textbf{Precision plots of OPE on %s}' % (name))
53 | else:
54 | plt.title(r'\textbf{Precision plots of OPE - %s}' % (attr))
55 | plt.axis([0, 50]+axis)
56 | precision = {}
57 | thresholds = np.arange(0, 51, 1)
58 | for tracker_name in precision_ret.keys():
59 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos]
60 | precision[tracker_name] = np.mean(value, axis=0)[20]
61 | for idx, (tracker_name, pre) in \
62 | enumerate(sorted(precision.items(), key=lambda x:x[1], reverse=True)):
63 | if tracker_name == bold_name:
64 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name)
65 | else:
66 | label = "[%.3f] " % (pre) + tracker_name
67 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos]
68 | plt.plot(thresholds, np.mean(value, axis=0),
69 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
70 | ax.legend(loc='lower right', labelspacing=0.2)
71 | ax.autoscale(enable=True, axis='both', tight=True)
72 | xmin, xmax, ymin, ymax = plt.axis()
73 | ax.autoscale(enable=False)
74 | ymax += 0.03
75 | plt.axis([xmin, xmax, ymin, ymax])
76 | plt.xticks(np.arange(xmin, xmax+0.01, 5))
77 | plt.yticks(np.arange(ymin, ymax, 0.1))
78 | ax.set_aspect((xmax - xmin)/(ymax-ymin))
79 | plt.show()
80 |
81 | # norm precision plot
82 | if norm_precision_ret:
83 | fig, ax = plt.subplots()
84 | ax.grid(b=True)
85 | plt.xlabel('Location error threshold')
86 | plt.ylabel('Precision')
87 | if attr == 'ALL':
88 | plt.title(r'\textbf{Normalized Precision plots of OPE on %s}' % (name))
89 | else:
90 | plt.title(r'\textbf{Normalized Precision plots of OPE - %s}' % (attr))
91 | norm_precision = {}
92 | thresholds = np.arange(0, 51, 1) / 100
93 | for tracker_name in precision_ret.keys():
94 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos]
95 | norm_precision[tracker_name] = np.mean(value, axis=0)[20]
96 | for idx, (tracker_name, pre) in \
97 | enumerate(sorted(norm_precision.items(), key=lambda x:x[1], reverse=True)):
98 | if tracker_name == bold_name:
99 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name)
100 | else:
101 | label = "[%.3f] " % (pre) + tracker_name
102 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos]
103 | plt.plot(thresholds, np.mean(value, axis=0),
104 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
105 | ax.legend(loc='lower right', labelspacing=0.2)
106 | ax.autoscale(enable=True, axis='both', tight=True)
107 | xmin, xmax, ymin, ymax = plt.axis()
108 | ax.autoscale(enable=False)
109 | ymax += 0.03
110 | plt.axis([xmin, xmax, ymin, ymax])
111 | plt.xticks(np.arange(xmin, xmax+0.01, 0.05))
112 | plt.yticks(np.arange(ymin, ymax, 0.1))
113 | ax.set_aspect((xmax - xmin)/(ymax-ymin))
114 | plt.show()
115 |
--------------------------------------------------------------------------------
/toolkit/datasets/video.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cv2
3 | import re
4 | import numpy as np
5 | import json
6 |
7 | from glob import glob
8 |
9 | class Video(object):
10 | def __init__(self, name, root, video_dir, init_rect, img_names,
11 | gt_rect, attr, load_img=False):
12 | self.name = name
13 | self.video_dir = video_dir
14 | self.init_rect = init_rect
15 | self.gt_traj = gt_rect
16 | self.attr = attr
17 | self.pred_trajs = {}
18 | self.img_names = [os.path.join(root, x) for x in img_names]
19 | self.imgs = None
20 |
21 | if load_img:
22 | self.imgs = [cv2.imread(x) for x in self.img_names]
23 | self.width = self.imgs[0].shape[1]
24 | self.height = self.imgs[0].shape[0]
25 | else:
26 | img = cv2.imread(self.img_names[0])
27 | assert img is not None, self.img_names[0]
28 | self.width = img.shape[1]
29 | self.height = img.shape[0]
30 |
31 | def load_tracker(self, path, tracker_names=None, store=True):
32 | """
33 | Args:
34 | path(str): path to result
35 | tracker_name(list): name of tracker
36 | """
37 | if not tracker_names:
38 | tracker_names = [x.split('/')[-1] for x in glob(path)
39 | if os.path.isdir(x)]
40 | if isinstance(tracker_names, str):
41 | tracker_names = [tracker_names]
42 | for name in tracker_names:
43 | traj_file = os.path.join(path, name, self.name+'.txt')
44 | if os.path.exists(traj_file):
45 | with open(traj_file, 'r') as f :
46 | pred_traj = [list(map(float, x.strip().split(',')))
47 | for x in f.readlines()]
48 | if len(pred_traj) != len(self.gt_traj):
49 | print(name, len(pred_traj), len(self.gt_traj), self.name)
50 | if store:
51 | self.pred_trajs[name] = pred_traj
52 | else:
53 | return pred_traj
54 | else:
55 | print(traj_file)
56 | self.tracker_names = list(self.pred_trajs.keys())
57 |
58 | def load_img(self):
59 | if self.imgs is None:
60 | self.imgs = [cv2.imread(x) for x in self.img_names]
61 | self.width = self.imgs[0].shape[1]
62 | self.height = self.imgs[0].shape[0]
63 |
64 | def free_img(self):
65 | self.imgs = None
66 |
67 | def __len__(self):
68 | return len(self.img_names)
69 |
70 | def __getitem__(self, idx):
71 | if self.imgs is None:
72 | return cv2.imread(self.img_names[idx]), self.gt_traj[idx]
73 | else:
74 | return self.imgs[idx], self.gt_traj[idx]
75 |
76 | def __iter__(self):
77 | for i in range(len(self.img_names)):
78 | if self.imgs is not None:
79 | yield self.imgs[i], self.gt_traj[i]
80 | else:
81 | yield cv2.imread(self.img_names[i]), self.gt_traj[i]
82 |
83 | def draw_box(self, roi, img, linewidth, color, name=None):
84 | """
85 | roi: rectangle or polygon
86 | img: numpy array img
87 | linewith: line width of the bbox
88 | """
89 | if len(roi) > 6 and len(roi) % 2 == 0:
90 | pts = np.array(roi, np.int32).reshape(-1, 1, 2)
91 | color = tuple(map(int, color))
92 | img = cv2.polylines(img, [pts], True, color, linewidth)
93 | pt = (pts[0, 0, 0], pts[0, 0, 1]-5)
94 | if name:
95 | img = cv2.putText(img, name, pt, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1)
96 | elif len(roi) == 4:
97 | if not np.isnan(roi[0]):
98 | roi = list(map(int, roi))
99 | color = tuple(map(int, color))
100 | img = cv2.rectangle(img, (roi[0], roi[1]), (roi[0]+roi[2], roi[1]+roi[3]),
101 | color, linewidth)
102 | if name:
103 | img = cv2.putText(img, name, (roi[0], roi[1]-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1)
104 | return img
105 |
106 | def show(self, pred_trajs={}, linewidth=2, show_name=False):
107 | """
108 | pred_trajs: dict of pred_traj, {'tracker_name': list of traj}
109 | pred_traj should contain polygon or rectangle(x, y, width, height)
110 | linewith: line width of the bbox
111 | """
112 | assert self.imgs is not None
113 | video = []
114 | cv2.namedWindow(self.name, cv2.WINDOW_NORMAL)
115 | colors = {}
116 | if len(pred_trajs) == 0 and len(self.pred_trajs) > 0:
117 | pred_trajs = self.pred_trajs
118 | for i, (roi, img) in enumerate(zip(self.gt_traj,
119 | self.imgs[self.start_frame:self.end_frame+1])):
120 | img = img.copy()
121 | if len(img.shape) == 2:
122 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
123 | else:
124 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
125 | img = self.draw_box(roi, img, linewidth, (0, 255, 0),
126 | 'gt' if show_name else None)
127 | for name, trajs in pred_trajs.items():
128 | if name not in colors:
129 | color = tuple(np.random.randint(0, 256, 3))
130 | colors[name] = color
131 | else:
132 | color = colors[name]
133 | img = self.draw_box(trajs[0][i], img, linewidth, color,
134 | name if show_name else None)
135 | cv2.putText(img, str(i+self.start_frame), (5, 20),
136 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 0), 2)
137 | cv2.imshow(self.name, img)
138 | cv2.waitKey(40)
139 | video.append(img.copy())
140 | return video
141 |
--------------------------------------------------------------------------------
/training_dataset/got10k/par_crop.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join, isdir
7 | from os import listdir, mkdir, makedirs
8 | import cv2
9 | import numpy as np
10 | import glob
11 | import xml.etree.ElementTree as ET
12 | from concurrent import futures
13 | from pathlib import Path
14 | import sys
15 | import time
16 |
17 | base_path = '/ssd/feiji/Research/Data/GOT-10k'
18 | sub_sets= sorted({'train', 'val'})
19 | # Print iterations progress (thanks StackOverflow)
20 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100):
21 | """
22 | Call in a loop to create terminal progress bar
23 | @params:
24 | iteration - Required : current iteration (Int)
25 | total - Required : total iterations (Int)
26 | prefix - Optional : prefix string (Str)
27 | suffix - Optional : suffix string (Str)
28 | decimals - Optional : positive number of decimals in percent complete (Int)
29 | barLength - Optional : character length of bar (Int)
30 | """
31 | formatStr = "{0:." + str(decimals) + "f}"
32 | percents = formatStr.format(100 * (iteration / float(total)))
33 | filledLength = int(round(barLength * iteration / float(total)))
34 | bar = '' * filledLength + '-' * (barLength - filledLength)
35 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
36 | if iteration == total:
37 | sys.stdout.write('\x1b[2K\r')
38 | sys.stdout.flush()
39 |
40 |
41 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)):
42 | a = (out_sz-1) / (bbox[2]-bbox[0])
43 | b = (out_sz-1) / (bbox[3]-bbox[1])
44 | c = -a * bbox[0]
45 | d = -b * bbox[1]
46 | mapping = np.array([[a, 0, c],
47 | [0, b, d]]).astype(np.float)
48 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding)
49 | return crop
50 |
51 |
52 | def pos_s_2_bbox(pos, s):
53 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2]
54 |
55 |
56 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)):
57 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.]
58 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]]
59 | wc_z = target_size[1] + context_amount * sum(target_size)
60 | hc_z = target_size[0] + context_amount * sum(target_size)
61 | s_z = np.sqrt(wc_z * hc_z)
62 | scale_z = exemplar_size / s_z
63 | d_search = (instanc_size - exemplar_size) / 2
64 | pad = d_search / scale_z
65 | s_x = s_z + 2 * pad
66 |
67 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding)
68 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding)
69 | return z, x
70 |
71 |
72 | def crop_like_SiamFCx(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)):
73 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.]
74 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]]
75 | wc_z = target_size[1] + context_amount * sum(target_size)
76 | hc_z = target_size[0] + context_amount * sum(target_size)
77 | s_z = np.sqrt(wc_z * hc_z)
78 | scale_z = exemplar_size / s_z
79 | d_search = (instanc_size - exemplar_size) / 2
80 | pad = d_search / scale_z
81 | s_x = s_z + 2 * pad
82 |
83 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding)
84 | return x
85 |
86 |
87 | def crop_video(sub_set, video, crop_path, instanc_size):
88 | sub_set_base_path = join(base_path, sub_set)
89 | ground_truth_file = join(sub_set_base_path, video, 'groundtruth.txt')
90 | full_occlusion_file = join(sub_set_base_path, video, 'absence.label')
91 | cover = join(sub_set_base_path, video, 'cover.label')
92 | gt = np.genfromtxt(ground_truth_file, delimiter=',', dtype=float).astype(np.int)
93 | fo = np.genfromtxt(full_occlusion_file, dtype=int)
94 |
95 | video_crop_base_path = join(crop_path, sub_set, video)
96 | if not isdir(video_crop_base_path): makedirs(video_crop_base_path)
97 |
98 | subdir_paths = sorted(glob.glob(join(sub_set_base_path, video, '*.jpg')))
99 |
100 | trackid = 0
101 | for i, img in enumerate(subdir_paths):
102 | filename = Path(img).stem
103 | bbox = gt[i] # x,y,w,h
104 | im = cv2.imread(img)
105 | avg_chans = np.mean(im, axis=(0, 1))
106 | bbox = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]]
107 | x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans)
108 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x)
109 |
110 |
111 | def main(instanc_size=511, num_threads=24):
112 | crop_path = '/ssd/feiji/Research/Data/GOT-10k_crop{:d}'.format(instanc_size)
113 | if not isdir(crop_path): mkdir(crop_path)
114 |
115 | for sub_set in sub_sets:
116 | sub_set_base_path = join(base_path, sub_set)
117 | videos = sorted(listdir(sub_set_base_path))
118 | n_videos = len(videos)
119 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
120 | fs = [executor.submit(crop_video, sub_set, video, crop_path, instanc_size) for video in videos]
121 | for i, f in enumerate(futures.as_completed(fs)):
122 | # Write progress to error so that it can be seen
123 | printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40)
124 |
125 |
126 | if __name__ == '__main__':
127 | since = time.time()
128 | main(int(sys.argv[1]), int(sys.argv[2]))
129 | time_elapsed = time.time() - since
130 | print('Total complete in {:.0f}m {:.0f}s'.format(
131 | time_elapsed // 60, time_elapsed % 60))
132 |
--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 | import torch
4 | from PIL import Image
5 | from collections import OrderedDict
6 | import glob
7 | from os.path import realpath, join
8 |
9 | def smooth_l1_loss(bbox_pred, bbox_targets, sigma=3.0):
10 | sigma_2 = sigma ** 2
11 | in_box_diff = bbox_pred - bbox_targets
12 | abs_in_box_diff = torch.abs(in_box_diff)
13 | smoothL1_sign = (abs_in_box_diff < 1. / sigma_2).detach().float()
14 | out_loss_box = torch.pow(in_box_diff, 2) * (sigma_2 / 2.) * smoothL1_sign \
15 | + (abs_in_box_diff - (0.5 / sigma_2)) * (1. - smoothL1_sign)
16 | loss_box = out_loss_box.sum() / out_loss_box.shape[0]
17 | return loss_box
18 |
19 | def l1_loss(bbox_pred, bbox_targets):
20 | loss = (bbox_pred - bbox_targets).abs()
21 | return loss.sum().div(bbox_pred.shape[0])
22 |
23 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)):
24 | bbox = [float(x) for x in bbox]
25 | a = (out_sz-1) / (bbox[2]-bbox[0])
26 | b = (out_sz-1) / (bbox[3]-bbox[1])
27 | c = -a * bbox[0]
28 | d = -b * bbox[1]
29 | mapping = np.array([[a, 0, c],
30 | [0, b, d]]).astype(np.float)
31 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding)
32 | return crop
33 |
34 | def read_image(path):
35 | with open(path, 'rb') as fb:
36 | with Image.open(fb) as img:
37 | return img.convert('RGB')
38 |
39 | def cxy_wh_2_bbox(cxy, wh):
40 | return np.array([cxy[0] - wh[0] / 2, cxy[1] - wh[1] / 2, cxy[0] + wh[0] / 2, cxy[1] + wh[1] / 2]) # 0-index
41 |
42 |
43 | def get_subwindow_tracking(im, pos, model_sz, original_sz, avg_chans):
44 |
45 | if isinstance(pos, float):
46 | pos = [pos, pos]
47 | sz = original_sz
48 | im_sz = im.shape
49 | c = (original_sz+1) / 2
50 | context_xmin = round(pos[0] - c) # floor(pos(2) - sz(2) / 2);
51 | context_xmax = context_xmin + sz - 1
52 | context_ymin = round(pos[1] - c) # floor(pos(1) - sz(1) / 2);
53 | context_ymax = context_ymin + sz - 1
54 | left_pad = int(max(0., -context_xmin))
55 | top_pad = int(max(0., -context_ymin))
56 | right_pad = int(max(0., context_xmax - im_sz[1] + 1))
57 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
58 |
59 | context_xmin = context_xmin + left_pad
60 | context_xmax = context_xmax + left_pad
61 | context_ymin = context_ymin + top_pad
62 | context_ymax = context_ymax + top_pad
63 |
64 | # zzp: a more easy speed version
65 | r, c, k = im.shape
66 | if any([top_pad, bottom_pad, left_pad, right_pad]):
67 | te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8) # 0 is better than 1 initialization
68 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
69 | if top_pad:
70 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
71 | if bottom_pad:
72 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
73 | if left_pad:
74 | te_im[:, 0:left_pad, :] = avg_chans
75 | if right_pad:
76 | te_im[:, c + left_pad:, :] = avg_chans
77 | im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :]
78 | else:
79 | im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :]
80 |
81 | if not np.array_equal(model_sz, original_sz):
82 | im_patch = cv2.resize(im_patch_original, (int(model_sz), int(model_sz))) # zzp: use cv to get a better speed
83 | else:
84 | im_patch = im_patch_original
85 |
86 | return im_patch
87 |
88 |
89 | def to_numpy(tensor):
90 | if torch.is_tensor(tensor):
91 | return tensor.cpu().numpy()
92 | elif type(tensor).__module__ != 'numpy':
93 | raise ValueError("Cannot convert {} to numpy array"
94 | .format(type(tensor)))
95 | return tensor
96 |
97 |
98 | def to_torch(ndarray):
99 | if type(ndarray).__module__ == 'numpy':
100 | return torch.from_numpy(ndarray)
101 | elif not torch.is_tensor(ndarray):
102 | raise ValueError("Cannot convert {} to torch tensor"
103 | .format(type(ndarray)))
104 | return ndarray
105 |
106 |
107 | def im_to_numpy(img):
108 | img = to_numpy(img)
109 | img = np.transpose(img, (1, 2, 0)) # H*W*C
110 | return img
111 |
112 |
113 | def im_to_torch(img):
114 | img = np.transpose(img, (2, 0, 1)) # C*H*W
115 | img = to_torch(img).float()
116 | return img
117 |
118 |
119 | def torch_to_img(img):
120 | img = to_numpy(torch.squeeze(img, 0))
121 | img = np.transpose(img, (1, 2, 0)) # H*W*C
122 | return img
123 |
124 |
125 | def cxy_wh_2_rect(pos, sz):
126 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) # 0-index
127 |
128 |
129 | def rect_2_cxy_wh(rect):
130 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]]) # 0-index
131 |
132 |
133 | def cxy_wh_2_rect1(pos, sz):
134 | return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]]) # 1-index
135 |
136 |
137 | def rect1_2_cxy_wh(rect):
138 | return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), np.array([rect[2], rect[3]]) # 0-index
139 |
140 |
141 | def get_axis_aligned_bbox(region):
142 | nv = region.size
143 | if nv == 8:
144 | cx = np.mean(region[0::2])
145 | cy = np.mean(region[1::2])
146 | x1 = min(region[0::2])
147 | x2 = max(region[0::2])
148 | y1 = min(region[1::2])
149 | y2 = max(region[1::2])
150 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6])
151 | A2 = (x2 - x1) * (y2 - y1)
152 | s = np.sqrt(A1 / A2)
153 | w = s * (x2 - x1) + 1
154 | h = s * (y2 - y1) + 1
155 | else:
156 | x = region[0]
157 | y = region[1]
158 | w = region[2]
159 | h = region[3]
160 | cx = x+w/2
161 | cy = y+h/2
162 | return cx, cy, w, h
163 |
--------------------------------------------------------------------------------
/training_dataset/det/par_crop.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join, isdir
7 | from os import mkdir, makedirs
8 | import cv2
9 | import numpy as np
10 | import glob
11 | import xml.etree.ElementTree as ET
12 | from concurrent import futures
13 | import time
14 | import sys
15 |
16 |
17 | # Print iterations progress (thanks StackOverflow)
18 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100):
19 | """
20 | Call in a loop to create terminal progress bar
21 | @params:
22 | iteration - Required : current iteration (Int)
23 | total - Required : total iterations (Int)
24 | prefix - Optional : prefix string (Str)
25 | suffix - Optional : suffix string (Str)
26 | decimals - Optional : positive number of decimals in percent complete (Int)
27 | barLength - Optional : character length of bar (Int)
28 | """
29 | formatStr = "{0:." + str(decimals) + "f}"
30 | percents = formatStr.format(100 * (iteration / float(total)))
31 | filledLength = int(round(barLength * iteration / float(total)))
32 | bar = '' * filledLength + '-' * (barLength - filledLength)
33 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
34 | if iteration == total:
35 | sys.stdout.write('\x1b[2K\r')
36 | sys.stdout.flush()
37 |
38 |
39 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)):
40 | a = (out_sz - 1) / (bbox[2] - bbox[0])
41 | b = (out_sz - 1) / (bbox[3] - bbox[1])
42 | c = -a * bbox[0]
43 | d = -b * bbox[1]
44 | mapping = np.array([[a, 0, c],
45 | [0, b, d]]).astype(np.float)
46 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding)
47 | return crop
48 |
49 |
50 | def pos_s_2_bbox(pos, s):
51 | return [pos[0] - s / 2, pos[1] - s / 2, pos[0] + s / 2, pos[1] + s / 2]
52 |
53 |
54 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)):
55 | target_pos = [(bbox[2] + bbox[0]) / 2., (bbox[3] + bbox[1]) / 2.]
56 | target_size = [bbox[2] - bbox[0], bbox[3] - bbox[1]]
57 | wc_z = target_size[1] + context_amount * sum(target_size)
58 | hc_z = target_size[0] + context_amount * sum(target_size)
59 | s_z = np.sqrt(wc_z * hc_z)
60 | scale_z = exemplar_size / s_z
61 | d_search = (instanc_size - exemplar_size) / 2
62 | pad = d_search / scale_z
63 | s_x = s_z + 2 * pad
64 |
65 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding)
66 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding)
67 | return z, x
68 |
69 |
70 | def crop_like_SiamFCx(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)):
71 | target_pos = [(bbox[2] + bbox[0]) / 2., (bbox[3] + bbox[1]) / 2.]
72 | target_size = [bbox[2] - bbox[0], bbox[3] - bbox[1]]
73 | wc_z = target_size[1] + context_amount * sum(target_size)
74 | hc_z = target_size[0] + context_amount * sum(target_size)
75 | s_z = np.sqrt(wc_z * hc_z)
76 | scale_z = exemplar_size / s_z
77 | d_search = (instanc_size - exemplar_size) / 2
78 | pad = d_search / scale_z
79 | s_x = s_z + 2 * pad
80 |
81 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding)
82 | return x
83 |
84 |
85 | def crop_xml(xml, sub_set_crop_path, instanc_size=511):
86 | xmltree = ET.parse(xml)
87 | objects = xmltree.findall('object')
88 |
89 | frame_crop_base_path = join(sub_set_crop_path, xml.split('/')[-1].split('.')[0])
90 | if not isdir(frame_crop_base_path): makedirs(frame_crop_base_path)
91 |
92 | img_path = xml.replace('xml', 'JPEG').replace('Annotations', 'Data')
93 |
94 | im = cv2.imread(img_path)
95 | avg_chans = np.mean(im, axis=(0, 1))
96 |
97 | for id, object_iter in enumerate(objects):
98 | bndbox = object_iter.find('bndbox')
99 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
100 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
101 |
102 | # z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans)
103 | # x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans)
104 | # cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(0, id)), z)
105 | x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans)
106 | cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(0, id)), x)
107 |
108 |
109 | def main(instanc_size=511, num_threads=24):
110 | crop_path = '/ssd/feiji/Research/Data/DET_crop{:d}'.format(instanc_size)
111 | if not isdir(crop_path): mkdir(crop_path)
112 | VID_base_path = '/ssd/feiji/Research/Data/ILSVRC2015'
113 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/')
114 | sub_sets = ['ILSVRC2013_train', 'ILSVRC2014_train_0000', 'ILSVRC2014_train_0001','ILSVRC2014_train_0002','ILSVRC2014_train_0003','ILSVRC2014_train_0004','ILSVRC2014_train_0005','ILSVRC2014_train_0006', 'val']
115 | for sub_set in sub_sets:
116 | sub_set_base_path = join(ann_base_path, sub_set)
117 | if 'ILSVRC2013_train' == sub_set:
118 | xmls = sorted(glob.glob(join(sub_set_base_path, '*', '*.xml')))
119 | else:
120 | xmls = sorted(glob.glob(join(sub_set_base_path, '*.xml')))
121 |
122 | n_imgs = len(xmls)
123 | sub_set_crop_path = join(crop_path, sub_set)
124 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
125 | fs = [executor.submit(crop_xml, xml, sub_set_crop_path, instanc_size) for xml in xmls]
126 | for i, f in enumerate(futures.as_completed(fs)):
127 | printProgress(i, n_imgs, prefix=sub_set, suffix='Done ', barLength=80)
128 |
129 |
130 | if __name__ == '__main__':
131 | since = time.time()
132 | main(int(sys.argv[1]), int(sys.argv[2]))
133 | time_elapsed = time.time() - since
134 | print('Total complete in {:.0f}m {:.0f}s'.format(
135 | time_elapsed // 60, time_elapsed % 60))
136 |
--------------------------------------------------------------------------------
/toolkit/evaluation/ar_benchmark.py:
--------------------------------------------------------------------------------
1 | """
2 | @author
3 | """
4 |
5 | import warnings
6 | import itertools
7 | import numpy as np
8 |
9 | from colorama import Style, Fore
10 | from ..utils import calculate_failures, calculate_accuracy
11 |
12 | class AccuracyRobustnessBenchmark:
13 | """
14 | Args:
15 | dataset:
16 | burnin:
17 | """
18 | def __init__(self, dataset, burnin=10):
19 | self.dataset = dataset
20 | self.burnin = burnin
21 |
22 | def eval(self, eval_trackers=None):
23 | """
24 | Args:
25 | eval_tags: list of tag
26 | eval_trackers: list of tracker name
27 | Returns:
28 | ret: dict of results
29 | """
30 | if eval_trackers is None:
31 | eval_trackers = self.dataset.tracker_names
32 | if isinstance(eval_trackers, str):
33 | eval_trackers = [eval_trackers]
34 |
35 | result = {}
36 | for tracker_name in eval_trackers:
37 | accuracy, failures = self._calculate_accuracy_robustness(tracker_name)
38 | result[tracker_name] = {'overlaps': accuracy,
39 | 'failures': failures}
40 | return result
41 |
42 | def show_result(self, result, eao_result=None, show_video_level=False, helight_threshold=0.5):
43 | """pretty print result
44 | Args:
45 | result: returned dict from function eval
46 | """
47 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
48 | if eao_result is not None:
49 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|{:^7}|"
50 | header = header.format('Tracker Name',
51 | 'Accuracy', 'Robustness', 'Lost Number', 'EAO')
52 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|{:^7.3f}|"
53 | else:
54 | header = "|{:^"+str(tracker_name_len)+"}|{:^10}|{:^12}|{:^13}|"
55 | header = header.format('Tracker Name',
56 | 'Accuracy', 'Robustness', 'Lost Number')
57 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^10.3f}|{:^12.3f}|{:^13.1f}|"
58 | bar = '-'*len(header)
59 | print(bar)
60 | print(header)
61 | print(bar)
62 | if eao_result is not None:
63 | tracker_eao = sorted(eao_result.items(),
64 | key=lambda x:x[1]['all'],
65 | reverse=True)[:20]
66 | tracker_names = [x[0] for x in tracker_eao]
67 | else:
68 | tracker_names = list(result.keys())
69 | for tracker_name in tracker_names:
70 | # for tracker_name, ret in result.items():
71 | ret = result[tracker_name]
72 | overlaps = list(itertools.chain(*ret['overlaps'].values()))
73 | accuracy = np.nanmean(overlaps)
74 | length = sum([len(x) for x in ret['overlaps'].values()])
75 | failures = list(ret['failures'].values())
76 | lost_number = np.mean(np.sum(failures, axis=0))
77 | robustness = np.mean(np.sum(np.array(failures), axis=0) / length) * 100
78 | if eao_result is None:
79 | print(formatter.format(tracker_name, accuracy, robustness, lost_number))
80 | else:
81 | print(formatter.format(tracker_name, accuracy, robustness, lost_number, eao_result[tracker_name]['all']))
82 | print(bar)
83 |
84 | if show_video_level and len(result) < 10:
85 | print('\n\n')
86 | header1 = "|{:^14}|".format("Tracker name")
87 | header2 = "|{:^14}|".format("Video name")
88 | for tracker_name in result.keys():
89 | header1 += ("{:^17}|").format(tracker_name)
90 | header2 += "{:^8}|{:^8}|".format("Acc", "LN")
91 | print('-'*len(header1))
92 | print(header1)
93 | print('-'*len(header1))
94 | print(header2)
95 | print('-'*len(header1))
96 | videos = list(result[tracker_name]['overlaps'].keys())
97 | for video in videos:
98 | row = "|{:^14}|".format(video)
99 | for tracker_name in result.keys():
100 | overlaps = result[tracker_name]['overlaps'][video]
101 | accuracy = np.nanmean(overlaps)
102 | failures = result[tracker_name]['failures'][video]
103 | lost_number = np.mean(failures)
104 |
105 | accuracy_str = "{:^8.3f}".format(accuracy)
106 | if accuracy < helight_threshold:
107 | row += f'{Fore.RED}{accuracy_str}{Style.RESET_ALL}|'
108 | else:
109 | row += accuracy_str+'|'
110 | lost_num_str = "{:^8.3f}".format(lost_number)
111 | if lost_number > 0:
112 | row += f'{Fore.RED}{lost_num_str}{Style.RESET_ALL}|'
113 | else:
114 | row += lost_num_str+'|'
115 | print(row)
116 | print('-'*len(header1))
117 |
118 | def _calculate_accuracy_robustness(self, tracker_name):
119 | overlaps = {}
120 | failures = {}
121 | all_length = {}
122 | for i in range(len(self.dataset)):
123 | video = self.dataset[i]
124 | gt_traj = video.gt_traj
125 | if tracker_name not in video.pred_trajs:
126 | tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
127 | else:
128 | tracker_trajs = video.pred_trajs[tracker_name]
129 | overlaps_group = []
130 | num_failures_group = []
131 | for tracker_traj in tracker_trajs:
132 | num_failures = calculate_failures(tracker_traj)[0]
133 | overlaps_ = calculate_accuracy(tracker_traj, gt_traj,
134 | burnin=10, bound=(video.width, video.height))[1]
135 | overlaps_group.append(overlaps_)
136 | num_failures_group.append(num_failures)
137 | with warnings.catch_warnings():
138 | warnings.simplefilter("ignore", category=RuntimeWarning)
139 | overlaps[video.name] = np.nanmean(overlaps_group, axis=0).tolist()
140 | failures[video.name] = num_failures_group
141 | return overlaps, failures
142 |
--------------------------------------------------------------------------------
/training_dataset/coco/par_crop.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from pycocotools.coco import COCO
7 | import cv2
8 | import numpy as np
9 | from os.path import join, isdir
10 | from os import mkdir, makedirs
11 | from concurrent import futures
12 | import sys
13 | import time
14 | import argparse
15 |
16 | parser = argparse.ArgumentParser(description='COCO Parallel Preprocessing for SiamMask')
17 | parser.add_argument('--exemplar_size', type=int, default=127, help='size of exemplar')
18 | parser.add_argument('--context_amount', type=float, default=0.5, help='context amount')
19 | parser.add_argument('--search_size', type=int, default=255, help='size of cropped search region')
20 | parser.add_argument('--enable_mask', action='store_false', help='whether crop mask')
21 | parser.add_argument('--num_threads', type=int, default=24, help='number of threads')
22 | args = parser.parse_args()
23 |
24 |
25 | # Print iterations progress (thanks StackOverflow)
26 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100):
27 | """
28 | Call in a loop to create terminal progress bar
29 | @params:
30 | iteration - Required : current iteration (Int)
31 | total - Required : total iterations (Int)
32 | prefix - Optional : prefix string (Str)
33 | suffix - Optional : suffix string (Str)
34 | decimals - Optional : positive number of decimals in percent complete (Int)
35 | barLength - Optional : character length of bar (Int)
36 | """
37 | formatStr = "{0:." + str(decimals) + "f}"
38 | percents = formatStr.format(100 * (iteration / float(total)))
39 | filledLength = int(round(barLength * iteration / float(total)))
40 | bar = '' * filledLength + '-' * (barLength - filledLength)
41 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
42 | if iteration == total:
43 | sys.stdout.write('\x1b[2K\r')
44 | sys.stdout.flush()
45 |
46 |
47 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)):
48 | a = (out_sz-1) / (bbox[2]-bbox[0])
49 | b = (out_sz-1) / (bbox[3]-bbox[1])
50 | c = -a * bbox[0]
51 | d = -b * bbox[1]
52 | mapping = np.array([[a, 0, c],
53 | [0, b, d]]).astype(np.float)
54 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz),
55 | borderMode=cv2.BORDER_CONSTANT, borderValue=padding)
56 | return crop
57 |
58 |
59 | def pos_s_2_bbox(pos, s):
60 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2]
61 |
62 |
63 | def crop_like_SiamFCx(image, bbox, exemplar_size=127, context_amount=0.5, search_size=255, padding=(0, 0, 0)):
64 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.]
65 | target_size = [bbox[2]-bbox[0]+1, bbox[3]-bbox[1]+1]
66 | wc_z = target_size[1] + context_amount * sum(target_size)
67 | hc_z = target_size[0] + context_amount * sum(target_size)
68 | s_z = np.sqrt(wc_z * hc_z)
69 | scale_z = exemplar_size / s_z
70 | d_search = (search_size - exemplar_size) / 2
71 | pad = d_search / scale_z
72 | s_x = s_z + 2 * pad
73 |
74 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), search_size, padding)
75 | return x
76 |
77 |
78 | def crop_img(img, anns, set_crop_base_path, set_img_base_path,
79 | exemplar_size=127, context_amount=0.5, search_size=511, enable_mask=True):
80 | frame_crop_base_path = join(set_crop_base_path, img['file_name'].split('/')[-1].split('.')[0])
81 | if not isdir(frame_crop_base_path): makedirs(frame_crop_base_path)
82 |
83 | im = cv2.imread('{}/{}'.format(set_img_base_path, img['file_name']))
84 | avg_chans = np.mean(im, axis=(0, 1))
85 | for track_id, ann in enumerate(anns):
86 | rect = ann['bbox']
87 | if rect[2] <= 0 or rect[3] <= 0:
88 | continue
89 | bbox = [rect[0], rect[1], rect[0]+rect[2]-1, rect[1]+rect[3]-1]
90 |
91 | x = crop_like_SiamFCx(im, bbox, exemplar_size=exemplar_size, context_amount=context_amount,
92 | search_size=search_size, padding=avg_chans)
93 | cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(0, track_id)), x)
94 |
95 | if enable_mask:
96 | im_mask = coco.annToMask(ann).astype(np.float32)
97 | x = (crop_like_SiamFCx(im_mask, bbox, exemplar_size=exemplar_size, context_amount=context_amount,
98 | search_size=search_size) > 0.5).astype(np.uint8) * 255
99 | cv2.imwrite(join(frame_crop_base_path, '{:06d}.{:02d}.m.png'.format(0, track_id)), x)
100 |
101 |
102 | def main(exemplar_size=127, context_amount=0.5, search_size=511, enable_mask=True, num_threads=24):
103 | global coco # will used for generate mask
104 | data_dir = '/ssd/feiji/Research/Data/COCO'
105 | crop_path = '/ssd/feiji/Research/Data/COCO_crop{:d}'.format(search_size)
106 | if not isdir(crop_path): mkdir(crop_path)
107 |
108 | for data_subset in ['val2017', 'train2017']:
109 | set_crop_base_path = join(crop_path, data_subset)
110 | set_img_base_path = join(data_dir, data_subset)
111 |
112 | anno_file = '{}/annotations/instances_{}.json'.format(data_dir, data_subset)
113 | coco = COCO(anno_file)
114 | n_imgs = len(coco.imgs)
115 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
116 | fs = [executor.submit(crop_img, coco.loadImgs(id)[0],
117 | coco.loadAnns(coco.getAnnIds(imgIds=id, iscrowd=None)),
118 | set_crop_base_path, set_img_base_path,
119 | exemplar_size, context_amount, search_size,
120 | enable_mask) for id in coco.imgs]
121 | for i, f in enumerate(futures.as_completed(fs)):
122 | printProgress(i, n_imgs, prefix=data_subset, suffix='Done ', barLength=40)
123 | print('done')
124 |
125 |
126 | if __name__ == '__main__':
127 | since = time.time()
128 | main(args.exemplar_size, args.context_amount, args.search_size, False, args.num_threads)
129 | time_elapsed = time.time() - since
130 | print('Total complete in {:.0f}m {:.0f}s'.format(
131 | time_elapsed // 60, time_elapsed % 60))
132 |
--------------------------------------------------------------------------------
/training_dataset/vid/par_crop.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # SiamMask
3 | # Licensed under The MIT License
4 | # Written by Qiang Wang (wangqiang2015 at ia.ac.cn)
5 | # --------------------------------------------------------
6 | from os.path import join, isdir
7 | from os import listdir, mkdir, makedirs
8 | import cv2
9 | import numpy as np
10 | import glob
11 | import xml.etree.ElementTree as ET
12 | from concurrent import futures
13 | import sys
14 | import time
15 |
16 | VID_base_path = '/ssd/feiji/Research/Data/ILSVRC2015'
17 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/')
18 | sub_sets= sorted({'a', 'b', 'c', 'd', 'e'})
19 | # Print iterations progress (thanks StackOverflow)
20 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100):
21 | """
22 | Call in a loop to create terminal progress bar
23 | @params:
24 | iteration - Required : current iteration (Int)
25 | total - Required : total iterations (Int)
26 | prefix - Optional : prefix string (Str)
27 | suffix - Optional : suffix string (Str)
28 | decimals - Optional : positive number of decimals in percent complete (Int)
29 | barLength - Optional : character length of bar (Int)
30 | """
31 | formatStr = "{0:." + str(decimals) + "f}"
32 | percents = formatStr.format(100 * (iteration / float(total)))
33 | filledLength = int(round(barLength * iteration / float(total)))
34 | bar = '' * filledLength + '-' * (barLength - filledLength)
35 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)),
36 | if iteration == total:
37 | sys.stdout.write('\x1b[2K\r')
38 | sys.stdout.flush()
39 |
40 |
41 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)):
42 | a = (out_sz-1) / (bbox[2]-bbox[0])
43 | b = (out_sz-1) / (bbox[3]-bbox[1])
44 | c = -a * bbox[0]
45 | d = -b * bbox[1]
46 | mapping = np.array([[a, 0, c],
47 | [0, b, d]]).astype(np.float)
48 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding)
49 | return crop
50 |
51 |
52 | def pos_s_2_bbox(pos, s):
53 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2]
54 |
55 |
56 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)):
57 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.]
58 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]]
59 | wc_z = target_size[1] + context_amount * sum(target_size)
60 | hc_z = target_size[0] + context_amount * sum(target_size)
61 | s_z = np.sqrt(wc_z * hc_z)
62 | scale_z = exemplar_size / s_z
63 | d_search = (instanc_size - exemplar_size) / 2
64 | pad = d_search / scale_z
65 | s_x = s_z + 2 * pad
66 |
67 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding)
68 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding)
69 | return z, x
70 |
71 |
72 | def crop_like_SiamFCx(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)):
73 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.]
74 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]]
75 | wc_z = target_size[1] + context_amount * sum(target_size)
76 | hc_z = target_size[0] + context_amount * sum(target_size)
77 | s_z = np.sqrt(wc_z * hc_z)
78 | scale_z = exemplar_size / s_z
79 | d_search = (instanc_size - exemplar_size) / 2
80 | pad = d_search / scale_z
81 | s_x = s_z + 2 * pad
82 |
83 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding)
84 | return x
85 |
86 |
87 | def crop_video(sub_set, video, crop_path, instanc_size):
88 | video_crop_base_path = join(crop_path, sub_set, video)
89 | if not isdir(video_crop_base_path): makedirs(video_crop_base_path)
90 |
91 | sub_set_base_path = join(ann_base_path, sub_set)
92 | xmls = sorted(glob.glob(join(sub_set_base_path, video, '*.xml')))
93 | for xml in xmls:
94 | xmltree = ET.parse(xml)
95 | # size = xmltree.findall('size')[0]
96 | # frame_sz = [int(it.text) for it in size]
97 | objects = xmltree.findall('object')
98 | objs = []
99 | filename = xmltree.findall('filename')[0].text
100 |
101 | im = cv2.imread(xml.replace('xml', 'JPEG').replace('Annotations', 'Data'))
102 | avg_chans = np.mean(im, axis=(0, 1))
103 | for object_iter in objects:
104 | trackid = int(object_iter.find('trackid').text)
105 | # name = (object_iter.find('name')).text
106 | bndbox = object_iter.find('bndbox')
107 | # occluded = int(object_iter.find('occluded').text)
108 |
109 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
110 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
111 | # z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans)
112 | # cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(filename), trackid)), z)
113 | # cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x)
114 |
115 | x = crop_like_SiamFCx(im, bbox, instanc_size=instanc_size, padding=avg_chans)
116 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x)
117 |
118 |
119 | def main(instanc_size=511, num_threads=24):
120 | crop_path = '/ssd/feiji/Research/Data/VID_crop{:d}'.format(instanc_size)
121 | if not isdir(crop_path): mkdir(crop_path)
122 |
123 | for sub_set in sub_sets:
124 | sub_set_base_path = join(ann_base_path, sub_set)
125 | videos = sorted(listdir(sub_set_base_path))
126 | n_videos = len(videos)
127 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor:
128 | fs = [executor.submit(crop_video, sub_set, video, crop_path, instanc_size) for video in videos]
129 | for i, f in enumerate(futures.as_completed(fs)):
130 | # Write progress to error so that it can be seen
131 | printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40)
132 |
133 |
134 | if __name__ == '__main__':
135 | since = time.time()
136 | main(int(sys.argv[1]), int(sys.argv[2]))
137 | time_elapsed = time.time() - since
138 | print('Total complete in {:.0f}m {:.0f}s'.format(
139 | time_elapsed // 60, time_elapsed % 60))
140 |
--------------------------------------------------------------------------------
/toolkit/evaluation/f1_benchmark.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from glob import glob
5 | from tqdm import tqdm
6 | from colorama import Style, Fore
7 |
8 | from ..utils import determine_thresholds, calculate_accuracy, calculate_f1
9 |
10 | class F1Benchmark:
11 | def __init__(self, dataset):
12 | """
13 | Args:
14 | result_path:
15 | """
16 | self.dataset = dataset
17 |
18 | def eval(self, eval_trackers=None):
19 | """
20 | Args:
21 | eval_tags: list of tag
22 | eval_trackers: list of tracker name
23 | Returns:
24 | eao: dict of results
25 | """
26 | if eval_trackers is None:
27 | eval_trackers = self.dataset.tracker_names
28 | if isinstance(eval_trackers, str):
29 | eval_trackers = [eval_trackers]
30 |
31 | ret = {}
32 | for tracker_name in eval_trackers:
33 | precision, recall, f1 = self._cal_precision_reall(tracker_name)
34 | ret[tracker_name] = {"precision": precision,
35 | "recall": recall,
36 | "f1": f1
37 | }
38 | return ret
39 |
40 | def _cal_precision_reall(self, tracker_name):
41 | score = []
42 | # for i in range(len(self.dataset)):
43 | # video = self.dataset[i]
44 | for video in self.dataset:
45 | if tracker_name not in video.confidence:
46 | score += video.load_tracker(self.dataset.tracker_path, tracker_name, False)[1]
47 | else:
48 | score += video.confidence[tracker_name]
49 | score = np.array(score)
50 | thresholds = determine_thresholds(score)[::-1]
51 |
52 | precision = {}
53 | recall = {}
54 | f1 = {}
55 | for i in range(len(self.dataset)):
56 | video = self.dataset[i]
57 | gt_traj = video.gt_traj
58 | N = sum([1 for x in gt_traj if len(x) > 1])
59 | if tracker_name not in video.pred_trajs:
60 | tracker_traj, score = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
61 | else:
62 | tracker_traj = video.pred_trajs[tracker_name]
63 | score = video.confidence[tracker_name]
64 | overlaps = calculate_accuracy(tracker_traj, gt_traj, \
65 | bound=(video.width,video.height))[1]
66 | f1[video.name], precision[video.name], recall[video.name] = \
67 | calculate_f1(overlaps, score, (video.width,video.height),thresholds, N)
68 | return precision, recall, f1
69 |
70 | def show_result(self, result, show_video_level=False, helight_threshold=0.5):
71 | """pretty print result
72 | Args:
73 | result: returned dict from function eval
74 | """
75 | # sort tracker according to f1
76 | sorted_tracker = {}
77 | for tracker_name, ret in result.items():
78 | precision = np.mean(list(ret['precision'].values()), axis=0)
79 | recall = np.mean(list(ret['recall'].values()), axis=0)
80 | f1 = 2 * precision * recall / (precision + recall)
81 | max_idx = np.argmax(f1)
82 | sorted_tracker[tracker_name] = (precision[max_idx], recall[max_idx],
83 | f1[max_idx])
84 | sorted_tracker_ = sorted(sorted_tracker.items(),
85 | key=lambda x:x[1][2],
86 | reverse=True)[:20]
87 | tracker_names = [x[0] for x in sorted_tracker_]
88 |
89 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
90 | header = "|{:^"+str(tracker_name_len)+"}|{:^11}|{:^8}|{:^7}|"
91 | header = header.format('Tracker Name',
92 | 'Precision', 'Recall', 'F1')
93 | bar = '-' * len(header)
94 | formatter = "|{:^"+str(tracker_name_len)+"}|{:^11.3f}|{:^8.3f}|{:^7.3f}|"
95 | print(bar)
96 | print(header)
97 | print(bar)
98 | # for tracker_name, ret in result.items():
99 | # precision = np.mean(list(ret['precision'].values()), axis=0)
100 | # recall = np.mean(list(ret['recall'].values()), axis=0)
101 | # f1 = 2 * precision * recall / (precision + recall)
102 | # max_idx = np.argmax(f1)
103 | for tracker_name in tracker_names:
104 | precision = sorted_tracker[tracker_name][0]
105 | recall = sorted_tracker[tracker_name][1]
106 | f1 = sorted_tracker[tracker_name][2]
107 | print(formatter.format(tracker_name, precision, recall, f1))
108 | print(bar)
109 |
110 | if show_video_level and len(result) < 10:
111 | print('\n\n')
112 | header1 = "|{:^14}|".format("Tracker name")
113 | header2 = "|{:^14}|".format("Video name")
114 | for tracker_name in result.keys():
115 | # col_len = max(20, len(tracker_name))
116 | header1 += ("{:^28}|").format(tracker_name)
117 | header2 += "{:^11}|{:^8}|{:^7}|".format("Precision", "Recall", "F1")
118 | print('-'*len(header1))
119 | print(header1)
120 | print('-'*len(header1))
121 | print(header2)
122 | print('-'*len(header1))
123 | videos = list(result[tracker_name]['precision'].keys())
124 | for video in videos:
125 | row = "|{:^14}|".format(video)
126 | for tracker_name in result.keys():
127 | precision = result[tracker_name]['precision'][video]
128 | recall = result[tracker_name]['recall'][video]
129 | f1 = result[tracker_name]['f1'][video]
130 | max_idx = np.argmax(f1)
131 | precision_str = "{:^11.3f}".format(precision[max_idx])
132 | if precision[max_idx] < helight_threshold:
133 | row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|'
134 | else:
135 | row += precision_str+'|'
136 | recall_str = "{:^8.3f}".format(recall[max_idx])
137 | if recall[max_idx] < helight_threshold:
138 | row += f'{Fore.RED}{recall_str}{Style.RESET_ALL}|'
139 | else:
140 | row += recall_str+'|'
141 | f1_str = "{:^7.3f}".format(f1[max_idx])
142 | if f1[max_idx] < helight_threshold:
143 | row += f'{Fore.RED}{f1_str}{Style.RESET_ALL}|'
144 | else:
145 | row += f1_str+'|'
146 | print(row)
147 | print('-'*len(header1))
148 |
--------------------------------------------------------------------------------
/toolkit/utils/statistics.py:
--------------------------------------------------------------------------------
1 | """
2 | @author fangyi.zhang@vipl.ict.ac.cn
3 | """
4 |
5 | import numpy as np
6 | from numba import jit
7 | from . import region
8 |
9 | def calculate_failures(trajectory):
10 | """ Calculate number of failures
11 | Args:
12 | trajectory: list of bbox
13 | Returns:
14 | num_failures: number of failures
15 | failures: failures point in trajectory, start with 0
16 | """
17 | failures = [i for i, x in zip(range(len(trajectory)), trajectory)
18 | if len(x) == 1 and x[0] == 2]
19 | num_failures = len(failures)
20 | return num_failures, failures
21 |
22 | def calculate_accuracy(pred_trajectory, gt_trajectory,
23 | burnin=0, ignore_unknown=True, bound=None):
24 | """Caculate accuracy socre as average overlap over the entire sequence
25 | Args:
26 | trajectory: list of bbox
27 | gt_trajectory: list of bbox
28 | burnin: number of frames that have to be ignored after the failure
29 | ignore_unknown: ignore frames where the overlap is unknown
30 | bound: bounding region
31 | Return:
32 | acc: average overlap
33 | overlaps: per frame overlaps
34 | """
35 | pred_trajectory_ = pred_trajectory
36 | if not ignore_unknown:
37 | unkown = [len(x)==1 and x[0] == 0 for x in pred_trajectory]
38 |
39 | if burnin > 0:
40 | pred_trajectory_ = pred_trajectory[:]
41 | mask = [len(x)==1 and x[0] == 1 for x in pred_trajectory]
42 | for i in range(len(mask)):
43 | if mask[i]:
44 | for j in range(burnin):
45 | if i + j < len(mask):
46 | pred_trajectory_[i+j] = [0]
47 | min_len = min(len(pred_trajectory_), len(gt_trajectory))
48 | overlaps = region.vot_overlap_traj(pred_trajectory_[:min_len],
49 | gt_trajectory[:min_len], bound)
50 |
51 | if not ignore_unknown:
52 | overlaps = [x if u else 0 for u in unkown]
53 |
54 | acc = 0
55 | if len(overlaps) > 0:
56 | acc = np.nanmean(overlaps)
57 | return acc, overlaps
58 |
59 | # def caculate_expected_overlap(pred_trajectorys, gt_trajectorys, skip_init, traj_length=None,
60 | # weights=None, tags=['all']):
61 | # """ Caculate expected overlap
62 | # Args:
63 | # pred_trajectory: list of bbox
64 | # gt_trajectory: list of bbox
65 | # traj_length: a list of sequence length for which the overlap should be evaluated
66 | # weights: a list of per-sequence weights that indicate how much does each sequence
67 | # contribute to the estimate
68 | # tags: set list of tags for which to perform calculation
69 | # """
70 | # overlaps = [calculate_accuracy(pred, gt)[1]
71 | # for pred, gt in zip(pred_trajectorys, gt_trajectorys)]
72 | # failures = [calculate_accuracy(pred, gt)[1]
73 | # for pred, gt in zip(pred_trajectorys, gt_trajectorys)]
74 | #
75 | # if traj_length is None:
76 | # traj_length = range(1, max([len(x) for x in gt_trajectorys])+1)
77 | # traj_length = list(set(traj_length))
78 |
79 | @jit(nopython=True)
80 | def overlap_ratio(rect1, rect2):
81 | '''Compute overlap ratio between two rects
82 | Args
83 | rect:2d array of N x [x,y,w,h]
84 | Return:
85 | iou
86 | '''
87 | # if rect1.ndim==1:
88 | # rect1 = rect1[np.newaxis, :]
89 | # if rect2.ndim==1:
90 | # rect2 = rect2[np.newaxis, :]
91 | left = np.maximum(rect1[:,0], rect2[:,0])
92 | right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
93 | top = np.maximum(rect1[:,1], rect2[:,1])
94 | bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
95 |
96 | intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
97 | union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
98 | iou = intersect / union
99 | iou = np.maximum(np.minimum(1, iou), 0)
100 | return iou
101 |
102 | @jit(nopython=True)
103 | def success_overlap(gt_bb, result_bb, n_frame):
104 | thresholds_overlap = np.arange(0, 1.05, 0.05)
105 | success = np.zeros(len(thresholds_overlap))
106 | iou = np.ones(len(gt_bb)) * (-1)
107 | mask = np.sum(gt_bb > 0, axis=1) == 4
108 | iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask])
109 | for i in range(len(thresholds_overlap)):
110 | success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame)
111 | return success
112 |
113 | @jit(nopython=True)
114 | def success_error(gt_center, result_center, thresholds, n_frame):
115 | # n_frame = len(gt_center)
116 | success = np.zeros(len(thresholds))
117 | dist = np.ones(len(gt_center)) * (-1)
118 | mask = np.sum(gt_center > 0, axis=1) == 2
119 | dist[mask] = np.sqrt(np.sum(
120 | np.power(gt_center[mask] - result_center[mask], 2), axis=1))
121 | for i in range(len(thresholds)):
122 | success[i] = np.sum(dist <= thresholds[i]) / float(n_frame)
123 | return success
124 |
125 | @jit(nopython=True)
126 | def determine_thresholds(scores, resolution=100):
127 | """
128 | Args:
129 | scores: 1d array of score
130 | """
131 | scores = np.sort(scores[np.logical_not(np.isnan(scores))])
132 | delta = np.floor(len(scores) / (resolution - 2))
133 | idxs = np.floor(np.linspace(delta-1, len(scores)-delta, resolution-2)+0.5).astype(np.int32)
134 | thresholds = np.zeros((resolution))
135 | thresholds[0] = - np.inf
136 | thresholds[-1] = np.inf
137 | thresholds[1:-1] = scores[idxs]
138 | return thresholds
139 |
140 | @jit(nopython=True)
141 | def calculate_f1(overlaps, score, bound, thresholds, N):
142 | overlaps = np.array(overlaps)
143 | overlaps[np.isnan(overlaps)] = 0
144 | score = np.array(score)
145 | score[np.isnan(score)] = 0
146 | precision = np.zeros(len(thresholds))
147 | recall = np.zeros(len(thresholds))
148 | for i, th in enumerate(thresholds):
149 | if th == - np.inf:
150 | idx = score > 0
151 | else:
152 | idx = score >= th
153 | if np.sum(idx) == 0:
154 | precision[i] = 1
155 | recall[i] = 0
156 | else:
157 | precision[i] = np.mean(overlaps[idx])
158 | recall[i] = np.sum(overlaps[idx]) / N
159 | f1 = 2 * precision * recall / (precision + recall)
160 | return f1, precision, recall
161 |
162 | @jit(nopython=True)
163 | def calculate_expected_overlap(fragments, fweights):
164 | max_len = fragments.shape[1]
165 | expected_overlaps = np.zeros((max_len), np.float32)
166 | expected_overlaps[0] = 1
167 |
168 | # TODO Speed Up
169 | for i in range(1, max_len):
170 | mask = np.logical_not(np.isnan(fragments[:, i]))
171 | if np.any(mask):
172 | fragment = fragments[mask, 1:i+1]
173 | seq_mean = np.sum(fragment, 1) / fragment.shape[1]
174 | expected_overlaps[i] = np.sum(seq_mean *
175 | fweights[mask]) / np.sum(fweights[mask])
176 | return expected_overlaps
177 |
--------------------------------------------------------------------------------
/toolkit/evaluation/eao_benchmark.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import numpy as np
4 |
5 | from glob import glob
6 |
7 | from ..utils import calculate_failures, calculate_accuracy, calculate_expected_overlap
8 |
9 | class EAOBenchmark:
10 | """
11 | Args:
12 | dataset:
13 | """
14 | def __init__(self, dataset, skipping=5, tags=['all']):
15 | self.dataset = dataset
16 | self.skipping = skipping
17 | self.tags = tags
18 | # NOTE we not use gmm to generate low, high, peak value
19 | if dataset.name == 'VOT2019':
20 | self.low = 46
21 | self.high = 291
22 | self.peak = 128
23 | elif dataset.name == 'VOT2018' or dataset.name == 'VOT2017':
24 | self.low = 100
25 | self.high = 356
26 | self.peak = 160
27 | elif dataset.name == 'VOT2016':
28 | self.low = 108
29 | self.high = 371
30 | self.peak = 168
31 |
32 | def eval(self, eval_trackers=None):
33 | """
34 | Args:
35 | eval_tags: list of tag
36 | eval_trackers: list of tracker name
37 | Returns:
38 | eao: dict of results
39 | """
40 | if eval_trackers is None:
41 | eval_trackers = self.dataset.tracker_names
42 | if isinstance(eval_trackers, str):
43 | eval_trackers = [eval_trackers]
44 |
45 | ret = {}
46 | for tracker_name in eval_trackers:
47 | eao = self._calculate_eao(tracker_name, self.tags)
48 | ret[tracker_name] = eao
49 | return ret
50 |
51 | def show_result(self, result, topk=10):
52 | """pretty print result
53 | Args:
54 | result: returned dict from function eval
55 | """
56 | if len(self.tags) == 1:
57 | tracker_name_len = max((max([len(x) for x in result.keys()])+2), 12)
58 | header = ("|{:^"+str(tracker_name_len)+"}|{:^10}|").format('Tracker Name', 'EAO')
59 | bar = '-'*len(header)
60 | formatter = "|{:^20}|{:^10.3f}|"
61 | print(bar)
62 | print(header)
63 | print(bar)
64 | tracker_eao = sorted(result.items(),
65 | key=lambda x: x[1]['all'],
66 | reverse=True)[:topk]
67 | for tracker_name, eao in tracker_eao:
68 | # for tracker_name, ret in result.items():
69 | print(formatter.format(tracker_name, eao))
70 | print(bar)
71 | else:
72 | header = "|{:^20}|".format('Tracker Name')
73 | header += "{:^7}|{:^15}|{:^14}|{:^15}|{:^13}|{:^11}|{:^7}|".format(*self.tags)
74 | bar = '-'*len(header)
75 | formatter = "{:^7.3f}|{:^15.3f}|{:^14.3f}|{:^15.3f}|{:^13.3f}|{:^11.3f}|{:^7.3f}|"
76 | print(bar)
77 | print(header)
78 | print(bar)
79 | sorted_tacker = sorted(result.items(),
80 | key=lambda x: x[1]['all'],
81 | reverse=True)[:topk]
82 | sorted_tacker = [x[0] for x in sorted_tacker]
83 | for tracker_name in sorted_tacker:
84 | # for tracker_name, ret in result.items():
85 | print("|{:^20}|".format(tracker_name)+formatter.format(
86 | *[result[tracker_name][x] for x in self.tags]))
87 | print(bar)
88 |
89 | def _calculate_eao(self, tracker_name, tags):
90 | all_overlaps = []
91 | all_failures = []
92 | video_names = []
93 | gt_traj_length = []
94 | # for i in range(len(self.dataset)):
95 | for video in self.dataset:
96 | # video = self.dataset[i]
97 | gt_traj = video.gt_traj
98 | if tracker_name not in video.pred_trajs:
99 | tracker_trajs = video.load_tracker(self.dataset.tracker_path, tracker_name, False)
100 | else:
101 | tracker_trajs = video.pred_trajs[tracker_name]
102 | for tracker_traj in tracker_trajs:
103 | gt_traj_length.append(len(gt_traj))
104 | video_names.append(video.name)
105 | overlaps = calculate_accuracy(tracker_traj, gt_traj, bound=(video.width-1, video.height-1))[1]
106 | failures = calculate_failures(tracker_traj)[1]
107 | all_overlaps.append(overlaps)
108 | all_failures.append(failures)
109 | fragment_num = sum([len(x)+1 for x in all_failures])
110 | max_len = max([len(x) for x in all_overlaps])
111 | if len(tracker_trajs) == 0:
112 | print('Warning: some seqs in {}.{} not found'.format(tracker_name, tags))
113 | seq_weight = 1 / (len(tracker_trajs) + 1e-10) # division by zero
114 |
115 | eao = {}
116 | for tag in tags:
117 | # prepare segments
118 | fweights = np.ones((fragment_num)) * np.nan
119 | fragments = np.ones((fragment_num, max_len)) * np.nan
120 | seg_counter = 0
121 | for name, traj_len, failures, overlaps in zip(video_names, gt_traj_length,
122 | all_failures, all_overlaps):
123 | if len(failures) > 0:
124 | points = [x+self.skipping for x in failures if
125 | x+self.skipping <= len(overlaps)]
126 | points.insert(0, 0)
127 | for i in range(len(points)):
128 | if i != len(points) - 1:
129 | fragment = np.array(overlaps[points[i]:points[i+1]+1])
130 | fragments[seg_counter, :] = 0
131 | else:
132 | fragment = np.array(overlaps[points[i]:])
133 | fragment[np.isnan(fragment)] = 0
134 | fragments[seg_counter, :len(fragment)] = fragment
135 | if i != len(points) - 1:
136 | # tag_value = self.dataset[name].tags[tag][points[i]:points[i+1]+1]
137 | tag_value = self.dataset[name].select_tag(tag, points[i], points[i+1]+1)
138 | w = sum(tag_value) / (points[i+1] - points[i]+1)
139 | fweights[seg_counter] = seq_weight * w
140 | else:
141 | # tag_value = self.dataset[name].tags[tag][points[i]:len(overlaps)]
142 | tag_value = self.dataset[name].select_tag(tag, points[i], len(overlaps))
143 | w = sum(tag_value) / (traj_len - points[i]+1e-16)
144 | fweights[seg_counter] = seq_weight * w# (len(fragment) / (traj_len-points[i]))
145 | seg_counter += 1
146 | else:
147 | # no failure
148 | max_idx = min(len(overlaps), max_len)
149 | fragments[seg_counter, :max_idx] = overlaps[:max_idx]
150 | # tag_value = self.dataset[name].tags[tag][:max_idx]
151 | tag_value = self.dataset[name].select_tag(tag, 0, max_idx)
152 | w = sum(tag_value) / max_idx
153 | fweights[seg_counter] = seq_weight * w
154 | seg_counter += 1
155 |
156 | expected_overlaps = calculate_expected_overlap(fragments, fweights)
157 | # caculate eao
158 | weight = np.zeros((len(expected_overlaps)))
159 | weight[self.low-1:self.high-1+1] = 1
160 | is_valid = np.logical_not(np.isnan(expected_overlaps))
161 | eao_ = np.sum(expected_overlaps[is_valid] * weight[is_valid]) / np.sum(weight[is_valid])
162 | eao[tag] = eao_
163 | return eao
164 |
--------------------------------------------------------------------------------
/tools/eval.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import os
7 | import argparse
8 |
9 | from glob import glob
10 | from tqdm import tqdm
11 | from multiprocessing import Pool
12 | from toolkit.datasets import OTBDataset, UAVDataset, LaSOTDataset, \
13 | VOTDataset, NFSDataset, VOTLTDataset, GOT10kDataset
14 | from toolkit.evaluation import OPEBenchmark, AccuracyRobustnessBenchmark, \
15 | EAOBenchmark, F1Benchmark
16 |
17 | parser = argparse.ArgumentParser(description='tracking evaluation')
18 | parser.add_argument('--tracker_path', '-p', type=str,
19 | help='tracker result path')
20 | parser.add_argument('--dataset', '-d', type=str,
21 | help='dataset name')
22 | parser.add_argument('--num', '-n', default=1, type=int,
23 | help='number of thread to eval')
24 | parser.add_argument('--tracker_prefix', '-t', default='',
25 | type=str, help='tracker name')
26 | parser.add_argument('--show_video_level', '-s', dest='show_video_level',
27 | action='store_true')
28 | parser.set_defaults(show_video_level=False)
29 | args = parser.parse_args()
30 |
31 |
32 | def main():
33 | tracker_dir = os.path.join(args.tracker_path, args.dataset)
34 | trackers = glob(os.path.join(args.tracker_path,
35 | args.dataset,
36 | args.tracker_prefix+'*'))
37 | trackers = [x.split('/')[-1] for x in trackers]
38 |
39 | assert len(trackers) > 0
40 | args.num = min(args.num, len(trackers))
41 |
42 | root = os.path.realpath(os.path.join(os.path.dirname(__file__),
43 | '../dataset'))
44 | root = os.path.join(root, args.dataset)
45 | if 'OTB' in args.dataset:
46 | dataset = OTBDataset(args.dataset, root)
47 | dataset.set_tracker(tracker_dir, trackers)
48 | trackers = dataset.tracker_names
49 | benchmark = OPEBenchmark(dataset)
50 | success_ret = {}
51 | with Pool(processes=args.num) as pool:
52 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
53 | trackers), desc='eval success', total=len(trackers), ncols=100):
54 | success_ret.update(ret)
55 | precision_ret = {}
56 | with Pool(processes=args.num) as pool:
57 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
58 | trackers), desc='eval precision', total=len(trackers), ncols=100):
59 | precision_ret.update(ret)
60 | benchmark.show_result(success_ret, precision_ret,
61 | show_video_level=args.show_video_level)
62 | elif 'LaSOT' == args.dataset:
63 | dataset = LaSOTDataset(args.dataset, root)
64 | dataset.set_tracker(tracker_dir, trackers)
65 | trackers = dataset.tracker_names
66 | benchmark = OPEBenchmark(dataset)
67 | success_ret = {}
68 | with Pool(processes=args.num) as pool:
69 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
70 | trackers), desc='eval success', total=len(trackers), ncols=100):
71 | success_ret.update(ret)
72 | precision_ret = {}
73 | with Pool(processes=args.num) as pool:
74 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
75 | trackers), desc='eval precision', total=len(trackers), ncols=100):
76 | precision_ret.update(ret)
77 | norm_precision_ret = {}
78 | with Pool(processes=args.num) as pool:
79 | for ret in tqdm(pool.imap_unordered(benchmark.eval_norm_precision,
80 | trackers), desc='eval norm precision', total=len(trackers), ncols=100):
81 | norm_precision_ret.update(ret)
82 | benchmark.show_result(success_ret, precision_ret, norm_precision_ret,
83 | show_video_level=args.show_video_level)
84 | elif 'UAV' in args.dataset:
85 | dataset = UAVDataset(args.dataset, root)
86 | dataset.set_tracker(tracker_dir, trackers)
87 | trackers = dataset.tracker_names
88 | benchmark = OPEBenchmark(dataset)
89 | success_ret = {}
90 | with Pool(processes=args.num) as pool:
91 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
92 | trackers), desc='eval success', total=len(trackers), ncols=100):
93 | success_ret.update(ret)
94 | precision_ret = {}
95 | with Pool(processes=args.num) as pool:
96 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
97 | trackers), desc='eval precision', total=len(trackers), ncols=100):
98 | precision_ret.update(ret)
99 | benchmark.show_result(success_ret, precision_ret,
100 | show_video_level=args.show_video_level)
101 | elif 'got10k' in args.dataset:
102 | dataset = GOT10kDataset(args.dataset, root)
103 | dataset.set_tracker(tracker_dir, trackers)
104 | trackers = dataset.tracker_names
105 | benchmark = OPEBenchmark(dataset)
106 | success_ret = {}
107 | with Pool(processes=args.num) as pool:
108 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
109 | trackers), desc='eval success', total=len(trackers), ncols=100):
110 | success_ret.update(ret)
111 | precision_ret = {}
112 | with Pool(processes=args.num) as pool:
113 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
114 | trackers), desc='eval precision', total=len(trackers), ncols=100):
115 | precision_ret.update(ret)
116 | benchmark.show_result(success_ret, precision_ret,
117 | show_video_level=args.show_video_level)
118 | elif 'NFS' in args.dataset:
119 | dataset = NFSDataset(args.dataset, root)
120 | dataset.set_tracker(tracker_dir, trackers)
121 | trackers = dataset.tracker_names
122 | benchmark = OPEBenchmark(dataset)
123 | success_ret = {}
124 | with Pool(processes=args.num) as pool:
125 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
126 | trackers), desc='eval success', total=len(trackers), ncols=100):
127 | success_ret.update(ret)
128 | precision_ret = {}
129 | with Pool(processes=args.num) as pool:
130 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
131 | trackers), desc='eval precision', total=len(trackers), ncols=100):
132 | precision_ret.update(ret)
133 | benchmark.show_result(success_ret, precision_ret,
134 | show_video_level=args.show_video_level)
135 | elif args.dataset in ['VOT2016', 'VOT2017', 'VOT2018', 'VOT2019']:
136 | dataset = VOTDataset(args.dataset, root)
137 | dataset.set_tracker(tracker_dir, trackers)
138 | trackers = dataset.tracker_names
139 | ar_benchmark = AccuracyRobustnessBenchmark(dataset)
140 | ar_result = {}
141 | with Pool(processes=args.num) as pool:
142 | for ret in tqdm(pool.imap_unordered(ar_benchmark.eval,
143 | trackers), desc='eval ar', total=len(trackers), ncols=100):
144 | ar_result.update(ret)
145 |
146 | benchmark = EAOBenchmark(dataset)
147 | eao_result = {}
148 | with Pool(processes=args.num) as pool:
149 | for ret in tqdm(pool.imap_unordered(benchmark.eval,
150 | trackers), desc='eval eao', total=len(trackers), ncols=100):
151 | eao_result.update(ret)
152 | ar_benchmark.show_result(ar_result, eao_result,
153 | show_video_level=args.show_video_level)
154 | elif 'VOT2018-LT' == args.dataset:
155 | dataset = VOTLTDataset(args.dataset, root)
156 | dataset.set_tracker(tracker_dir, trackers)
157 | trackers = dataset.tracker_names
158 | benchmark = F1Benchmark(dataset)
159 | f1_result = {}
160 | with Pool(processes=args.num) as pool:
161 | for ret in tqdm(pool.imap_unordered(benchmark.eval,
162 | trackers), desc='eval f1', total=len(trackers), ncols=100):
163 | f1_result.update(ret)
164 | benchmark.show_result(f1_result,
165 | show_video_level=args.show_video_level)
166 |
167 |
168 | if __name__ == '__main__':
169 | main()
170 |
--------------------------------------------------------------------------------
/toolkit/utils/region.pyx:
--------------------------------------------------------------------------------
1 | """
2 | @author fangyi.zhang@vipl.ict.ac.cn
3 | """
4 | # distutils: sources = src/region.c
5 | # distutils: include_dirs = src/
6 |
7 | from libc.stdlib cimport malloc, free
8 | from libc.stdio cimport sprintf
9 | from libc.string cimport strlen
10 |
11 | cimport c_region
12 |
13 | cpdef enum RegionType:
14 | EMTPY
15 | SPECIAL
16 | RECTANGEL
17 | POLYGON
18 | MASK
19 |
20 | cdef class RegionBounds:
21 | cdef c_region.region_bounds* _c_region_bounds
22 |
23 | def __cinit__(self):
24 | self._c_region_bounds = malloc(
25 | sizeof(c_region.region_bounds))
26 | if not self._c_region_bounds:
27 | self._c_region_bounds = NULL
28 | raise MemoryError()
29 |
30 | def __init__(self, top, bottom, left, right):
31 | self.set(top, bottom, left, right)
32 |
33 | def __dealloc__(self):
34 | if self._c_region_bounds is not NULL:
35 | free(self._c_region_bounds)
36 | self._c_region_bounds = NULL
37 |
38 | def __str__(self):
39 | return "top: {:.3f} bottom: {:.3f} left: {:.3f} reight: {:.3f}".format(
40 | self._c_region_bounds.top,
41 | self._c_region_bounds.bottom,
42 | self._c_region_bounds.left,
43 | self._c_region_bounds.right)
44 |
45 | def get(self):
46 | return (self._c_region_bounds.top,
47 | self._c_region_bounds.bottom,
48 | self._c_region_bounds.left,
49 | self._c_region_bounds.right)
50 |
51 | def set(self, top, bottom, left, right):
52 | self._c_region_bounds.top = top
53 | self._c_region_bounds.bottom = bottom
54 | self._c_region_bounds.left = left
55 | self._c_region_bounds.right = right
56 |
57 | cdef class Rectangle:
58 | cdef c_region.region_rectangle* _c_region_rectangle
59 |
60 | def __cinit__(self):
61 | self._c_region_rectangle = malloc(
62 | sizeof(c_region.region_rectangle))
63 | if not self._c_region_rectangle:
64 | self._c_region_rectangle = NULL
65 | raise MemoryError()
66 |
67 | def __init__(self, x, y, width, height):
68 | self.set(x, y, width, height)
69 |
70 | def __dealloc__(self):
71 | if self._c_region_rectangle is not NULL:
72 | free(self._c_region_rectangle)
73 | self._c_region_rectangle = NULL
74 |
75 | def __str__(self):
76 | return "x: {:.3f} y: {:.3f} width: {:.3f} height: {:.3f}".format(
77 | self._c_region_rectangle.x,
78 | self._c_region_rectangle.y,
79 | self._c_region_rectangle.width,
80 | self._c_region_rectangle.height)
81 |
82 | def set(self, x, y, width, height):
83 | self._c_region_rectangle.x = x
84 | self._c_region_rectangle.y = y
85 | self._c_region_rectangle.width = width
86 | self._c_region_rectangle.height = height
87 |
88 | def get(self):
89 | """
90 | return:
91 | (x, y, width, height)
92 | """
93 | return (self._c_region_rectangle.x,
94 | self._c_region_rectangle.y,
95 | self._c_region_rectangle.width,
96 | self._c_region_rectangle.height)
97 |
98 | cdef class Polygon:
99 | cdef c_region.region_polygon* _c_region_polygon
100 |
101 | def __cinit__(self, points):
102 | """
103 | args:
104 | points: tuple of point
105 | points = ((1, 1), (10, 10))
106 | """
107 | num = len(points) // 2
108 | self._c_region_polygon = malloc(
109 | sizeof(c_region.region_polygon))
110 | if not self._c_region_polygon:
111 | self._c_region_polygon = NULL
112 | raise MemoryError()
113 | self._c_region_polygon.count = num
114 | self._c_region_polygon.x = malloc(sizeof(float) * num)
115 | if not self._c_region_polygon.x:
116 | raise MemoryError()
117 | self._c_region_polygon.y = malloc(sizeof(float) * num)
118 | if not self._c_region_polygon.y:
119 | raise MemoryError()
120 |
121 | for i in range(num):
122 | self._c_region_polygon.x[i] = points[i*2]
123 | self._c_region_polygon.y[i] = points[i*2+1]
124 |
125 | def __dealloc__(self):
126 | if self._c_region_polygon is not NULL:
127 | if self._c_region_polygon.x is not NULL:
128 | free(self._c_region_polygon.x)
129 | self._c_region_polygon.x = NULL
130 | if self._c_region_polygon.y is not NULL:
131 | free(self._c_region_polygon.y)
132 | self._c_region_polygon.y = NULL
133 | free(self._c_region_polygon)
134 | self._c_region_polygon = NULL
135 |
136 | def __str__(self):
137 | ret = ""
138 | for i in range(self._c_region_polygon.count-1):
139 | ret += "({:.3f} {:.3f}) ".format(self._c_region_polygon.x[i],
140 | self._c_region_polygon.y[i])
141 | ret += "({:.3f} {:.3f})".format(self._c_region_polygon.x[i],
142 | self._c_region_polygon.y[i])
143 | return ret
144 |
145 | def vot_overlap(polygon1, polygon2, bounds=None):
146 | """ computing overlap between two polygon
147 | Args:
148 | polygon1: polygon tuple of points
149 | polygon2: polygon tuple of points
150 | bounds: tuple of (left, top, right, bottom) or tuple of (width height)
151 | Return:
152 | overlap: overlap between two polygons
153 | """
154 | if len(polygon1) == 1 or len(polygon2) == 1:
155 | return float("nan")
156 |
157 | if len(polygon1) == 4:
158 | polygon1_ = Polygon([polygon1[0], polygon1[1],
159 | polygon1[0]+polygon1[2], polygon1[1],
160 | polygon1[0]+polygon1[2], polygon1[1]+polygon1[3],
161 | polygon1[0], polygon1[1]+polygon1[3]])
162 | else:
163 | polygon1_ = Polygon(polygon1)
164 |
165 | if len(polygon2) == 4:
166 | polygon2_ = Polygon([polygon2[0], polygon2[1],
167 | polygon2[0]+polygon2[2], polygon2[1],
168 | polygon2[0]+polygon2[2], polygon2[1]+polygon2[3],
169 | polygon2[0], polygon2[1]+polygon2[3]])
170 | else:
171 | polygon2_ = Polygon(polygon2)
172 |
173 | if bounds is not None and len(bounds) == 4:
174 | pno_bounds = RegionBounds(bounds[0], bounds[1], bounds[2], bounds[3])
175 | elif bounds is not None and len(bounds) == 2:
176 | pno_bounds = RegionBounds(0, bounds[1], 0, bounds[0])
177 | else:
178 | pno_bounds = RegionBounds(-float("inf"), float("inf"),
179 | -float("inf"), float("inf"))
180 | cdef float only1 = 0
181 | cdef float only2 = 0
182 | cdef c_region.region_polygon* c_polygon1 = polygon1_._c_region_polygon
183 | cdef c_region.region_polygon* c_polygon2 = polygon2_._c_region_polygon
184 | cdef c_region.region_bounds no_bounds = pno_bounds._c_region_bounds[0] # deference
185 | return c_region.compute_polygon_overlap(c_polygon1,
186 | c_polygon2,
187 | &only1,
188 | &only2,
189 | no_bounds)
190 |
191 | def vot_overlap_traj(polygons1, polygons2, bounds=None):
192 | """ computing overlap between two trajectory
193 | Args:
194 | polygons1: list of polygon
195 | polygons2: list of polygon
196 | bounds: tuple of (left, top, right, bottom) or tuple of (width height)
197 | Return:
198 | overlaps: overlaps between all pair of polygons
199 | """
200 | assert len(polygons1) == len(polygons2)
201 | overlaps = []
202 | for i in range(len(polygons1)):
203 | overlap = vot_overlap(polygons1[i], polygons2[i], bounds=bounds)
204 | overlaps.append(overlap)
205 | return overlaps
206 |
207 |
208 | def vot_float2str(template, float value):
209 | """
210 | Args:
211 | tempate: like "%.3f" in C syntax
212 | value: float value
213 | """
214 | cdef bytes ptemplate = template.encode()
215 | cdef const char* ctemplate = ptemplate
216 | cdef char* output = malloc(sizeof(char) * 100)
217 | if not output:
218 | raise MemoryError()
219 | sprintf(output, ctemplate, value)
220 | try:
221 | ret = output[:strlen(output)].decode()
222 | finally:
223 | free(output)
224 | return ret
225 |
--------------------------------------------------------------------------------