├── LICENSE
├── README.md
├── demo
├── Ocean_overview.jpg
├── lines.jpg
├── ocean1.gif
├── oceanplu_overview.png
├── oceanplus.gif
└── siamdw_overview.jpg
├── experiments
├── test
│ ├── DAVIS
│ │ └── OceanPlus.yaml
│ ├── GOT10K
│ │ └── Ocean.yaml
│ ├── LASOT
│ │ └── Ocean.yaml
│ ├── OTB
│ │ ├── Ocean.yaml
│ │ └── SiamDW.yaml
│ └── VOT
│ │ ├── ONLINE.yaml
│ │ ├── Ocean.yaml
│ │ ├── OceanPlus.yaml
│ │ └── SiamDW.yaml
└── train
│ ├── Ocean.yaml
│ └── SiamDW.yaml
├── lib
├── core
│ ├── config.py
│ ├── config_ocean.py
│ ├── config_oceanplus.py
│ ├── config_siamdw.py
│ ├── eval_davis.py
│ ├── eval_got10k.py
│ ├── eval_lasot.py
│ ├── eval_otb.py
│ ├── eval_visdrone.py
│ ├── extract_tune_logs.py
│ └── function.py
├── dataset
│ ├── crop
│ │ ├── DAVIS
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ └── readme.md
│ │ ├── RGBT210
│ │ │ ├── RGBT210_genjson.py
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ └── readme.md
│ │ ├── RGBT234
│ │ │ ├── RGBT234_genjson.py
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ └── readme.md
│ │ ├── coco
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ └── readme.md
│ │ ├── det
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ └── readme.md
│ │ ├── got10k
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ ├── parser_got10k.py
│ │ │ └── readme.md
│ │ ├── lasot
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ ├── parser_lasot.py
│ │ │ └── readme.md
│ │ ├── vid
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ ├── parse_vid.py
│ │ │ └── readme.md
│ │ └── visdrone
│ │ │ ├── gen_json.py
│ │ │ ├── par_crop.py
│ │ │ ├── parser_visdrone.py
│ │ │ └── readme.md
│ ├── ocean.py
│ └── siamfc.py
├── eval_toolkit
│ ├── bin
│ │ ├── _init_paths.py
│ │ └── eval.py
│ ├── davis
│ │ └── davis2017-evaluation
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── davis2017
│ │ │ ├── __init__.py
│ │ │ ├── davis.py
│ │ │ ├── davis.py.ori
│ │ │ ├── evaluation.py
│ │ │ ├── metrics.py
│ │ │ ├── results.py
│ │ │ └── utils.py
│ │ │ ├── demo.sh
│ │ │ ├── evaluation_codalab.py
│ │ │ ├── evaluation_method.py
│ │ │ ├── pytest
│ │ │ └── test_evaluation.py
│ │ │ ├── setup.cfg
│ │ │ └── setup.py
│ ├── pysot
│ │ ├── __init__.py
│ │ ├── datasets
│ │ │ ├── __init__.py
│ │ │ ├── dataset.py
│ │ │ ├── got10k.py
│ │ │ ├── lasot.py
│ │ │ ├── nfs.py
│ │ │ ├── otb.py
│ │ │ ├── trackingnet.py
│ │ │ ├── uav.py
│ │ │ ├── video.py
│ │ │ └── vot.py
│ │ ├── evaluation
│ │ │ ├── __init__.py
│ │ │ ├── ar_benchmark.py
│ │ │ ├── eao_benchmark.py
│ │ │ ├── f1_benchmark.py
│ │ │ └── ope_benchmark.py
│ │ ├── utils
│ │ │ ├── __init__.py
│ │ │ ├── build
│ │ │ │ ├── temp.linux-x86_64-3.6
│ │ │ │ │ ├── region.o
│ │ │ │ │ └── src
│ │ │ │ │ │ └── region.o
│ │ │ │ └── temp.linux-x86_64-3.7
│ │ │ │ │ ├── region.o
│ │ │ │ │ └── src
│ │ │ │ │ └── region.o
│ │ │ ├── c_region.pxd
│ │ │ ├── misc.py
│ │ │ ├── region.c
│ │ │ ├── region.cpython-36m-x86_64-linux-gnu.so
│ │ │ ├── region.cpython-37m-x86_64-linux-gnu.so
│ │ │ ├── region.pyx
│ │ │ ├── setup.py
│ │ │ ├── src
│ │ │ │ ├── buffer.h
│ │ │ │ ├── region.c
│ │ │ │ └── region.h
│ │ │ └── statistics.py
│ │ └── visualization
│ │ │ ├── __init__.py
│ │ │ ├── draw_eao.py
│ │ │ ├── draw_f1.py
│ │ │ ├── draw_success_precision.py
│ │ │ └── draw_utils.py
│ └── requirements.txt
├── models
│ ├── __init__.py
│ ├── backbones.py
│ ├── connect.py
│ ├── dcn
│ │ ├── __init__.py
│ │ ├── deform_conv.py
│ │ ├── deform_conv_cuda.cpython-36m-x86_64-linux-gnu.so
│ │ ├── deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so
│ │ ├── deform_pool.py
│ │ ├── deform_pool_cuda.cpython-36m-x86_64-linux-gnu.so
│ │ ├── deform_pool_cuda.cpython-37m-x86_64-linux-gnu.so
│ │ └── src
│ │ │ ├── deform_conv_cuda.cpp
│ │ │ ├── deform_conv_cuda_kernel.cu
│ │ │ ├── deform_pool_cuda.cpp
│ │ │ └── deform_pool_cuda_kernel.cu
│ ├── mask.py
│ ├── models.py
│ ├── modules.py
│ ├── ocean.py
│ ├── oceanTRT.py
│ ├── oceanplus.py
│ ├── online
│ │ ├── __init__.py
│ │ ├── backbone
│ │ │ ├── __init__.py
│ │ │ ├── resnet.py
│ │ │ └── resnet18_vggm.py
│ │ ├── bbreg
│ │ │ ├── __init__.py
│ │ │ └── iou_net.py
│ │ ├── classifier
│ │ │ ├── __init__.py
│ │ │ ├── features.py
│ │ │ ├── initializer.py
│ │ │ ├── linear_filter.py
│ │ │ └── optimizer.py
│ │ ├── external
│ │ │ └── PreciseRoIPooling
│ │ │ │ ├── .gitignore
│ │ │ │ ├── LICENSE
│ │ │ │ ├── README.md
│ │ │ │ ├── _assets
│ │ │ │ └── prroi_visualization.png
│ │ │ │ ├── pytorch
│ │ │ │ ├── prroi_pool
│ │ │ │ │ ├── .gitignore
│ │ │ │ │ ├── __init__.py
│ │ │ │ │ ├── functional.py
│ │ │ │ │ ├── prroi_pool.py
│ │ │ │ │ └── src
│ │ │ │ │ │ ├── prroi_pooling_gpu.c
│ │ │ │ │ │ ├── prroi_pooling_gpu.h
│ │ │ │ │ │ ├── prroi_pooling_gpu_impl.cu
│ │ │ │ │ │ └── prroi_pooling_gpu_impl.cuh
│ │ │ │ └── tests
│ │ │ │ │ └── test_prroi_pooling2d.py
│ │ │ │ └── src
│ │ │ │ ├── prroi_pooling_gpu_impl.cu
│ │ │ │ └── prroi_pooling_gpu_impl.cuh
│ │ └── layers
│ │ │ ├── __init__.py
│ │ │ ├── activation.py
│ │ │ ├── blocks.py
│ │ │ ├── distance.py
│ │ │ ├── filter.py
│ │ │ ├── normalization.py
│ │ │ └── transform.py
│ └── siamfc.py
├── online
│ ├── __init__.py
│ ├── augmentation.py
│ ├── base_actor.py
│ ├── base_trainer.py
│ ├── complex.py
│ ├── dcf.py
│ ├── extractor.py
│ ├── fourier.py
│ ├── loading.py
│ ├── ltr_trainer.py
│ ├── model_constructor.py
│ ├── operation.py
│ ├── optim.py
│ ├── optimization.py
│ ├── preprocessing.py
│ ├── tensordict.py
│ ├── tensorlist.py
│ └── tracking.py
├── tracker
│ ├── ocean.py
│ ├── oceanplus.py
│ ├── online.py
│ └── siamfc.py
├── tutorial
│ ├── Ocean
│ │ └── ocean.md
│ ├── OceanPlus
│ │ └── oceanplus.md
│ ├── SiamDW
│ │ └── siamdw.md
│ ├── install.sh
│ └── install_trt.md
├── utils
│ ├── __init__.py
│ ├── cutout.py
│ ├── extract_tpejson_fc.py
│ ├── extract_tpejson_ocean.py
│ ├── extract_tpelog.py
│ ├── extract_tpelog_fc.py
│ ├── utils.py
│ └── watch_tpe.sh
└── version.py
├── setup.py
└── tracking
├── _init_paths.py
├── onekey.py
├── run_video.py
├── test_epochs.py
├── test_ocean.py
├── test_oceanplus.py
├── test_siamdw.py
├── train_ocean.py
├── train_siamdw.py
├── tune_tpe.py
├── vot.py
├── vot_wrap.py
└── vot_wrap_mms.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 eccv2020
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/demo/Ocean_overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/Ocean_overview.jpg
--------------------------------------------------------------------------------
/demo/lines.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/lines.jpg
--------------------------------------------------------------------------------
/demo/ocean1.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/ocean1.gif
--------------------------------------------------------------------------------
/demo/oceanplu_overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/oceanplu_overview.png
--------------------------------------------------------------------------------
/demo/oceanplus.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/oceanplus.gif
--------------------------------------------------------------------------------
/demo/siamdw_overview.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/siamdw_overview.jpg
--------------------------------------------------------------------------------
/experiments/test/DAVIS/OceanPlus.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | DAVIS2016:
3 | penalty_k: 0.032
4 | lr: 0.98
5 | window_influence: 0.45
6 | small_sz: 255
7 | big_sz: 287
8 | seg_thr: 0.84
9 | DAVIS2017:
10 | penalty_k: 0.031
11 | lr: 1
12 | window_influence: 0.35
13 | small_sz: 255
14 | big_sz: 287
15 | seg_thr: 0.84
16 |
--------------------------------------------------------------------------------
/experiments/test/GOT10K/Ocean.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | GOT10K:
3 | penalty_k: 0.022
4 | lr: 0.799
5 | window_influence: 0.118
6 | small_sz: 255
7 | big_sz: 255
8 |
9 |
10 |
--------------------------------------------------------------------------------
/experiments/test/LASOT/Ocean.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | LASOT:
3 | penalty_k: 0.11
4 | lr: 0.7
5 | window_influence: 0.20
6 | small_sz: 255
7 | big_sz: 255
8 |
9 |
--------------------------------------------------------------------------------
/experiments/test/OTB/Ocean.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | OTB2015:
3 | penalty_k: 0.087
4 | lr: 0.408
5 | window_influence: 0.366
6 | small_sz: 271
7 | big_sz: 271
8 |
9 |
--------------------------------------------------------------------------------
/experiments/test/OTB/SiamDW.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | OTB2013:
3 | scale_step: 1.0482
4 | scale_lr: 0.3629
5 | scale_penalty: 0.9997
6 | w_influence: 0.3896
7 | OTB2015:
8 | scale_step: 1.1897
9 | scale_lr: 0.2226
10 | scale_penalty: 0.9370
11 | w_influence: 0.2897
--------------------------------------------------------------------------------
/experiments/test/VOT/ONLINE.yaml:
--------------------------------------------------------------------------------
1 | ## BASE:
2 | # Patch sampling parameters
3 | image_sample_size: 14 * 16 # Maximum image sample size
4 | search_area_scale: 4 # Scale relative to target size
5 | vot_anno_conversion_type: 'preserve_area'
6 | use_gpu: True
7 | debug: 0
8 |
9 | update_classifier: True
10 | net_opt_iter: 25
11 | net_opt_update_iter: 3
12 | net_opt_hn_iter: 3
13 |
14 |
15 | # Training parameters
16 | sample_memory_size: 250 # Memory size
17 | train_skipping: 10 # How often to run training (every n-th frame)
18 | init_samples_minimum_weight: 0.0
19 |
20 | # Windowing
21 | window_output: True # Perform windowing of output scores
22 |
23 | # Detection parameters
24 | scale_factors: torch.ones(1) # What scales to use for localization (only one scale if IoUNet is used)
25 | score_upsample_factor: 1 # How much Fourier upsampling to use
26 |
27 |
28 | # Learning parameters for each feature type
29 | learning_rate: 0.0075 # Learning rate
30 | use_augmentation: True # Whether to use augmentation for this feature
31 |
32 |
33 |
34 | ## DATA:
35 | # Init data augmentation parameters
36 | augmentation:
37 | fliplr: True
38 | rotate: [5, -5, 10, -10, 20, -20, 30, -30, 45,-45, -60, 60]
39 | blur: '[(2, 0.2), (0.2, 2), (3,1), (1, 3), (2, 2)]'
40 | relativeshift: '[(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)]'
41 | dropout: '(7, 0.2)'
42 |
43 | augmentation_expansion_factor: 2 # How much to expand sample when doing augmentation
44 | random_shift_factor: 1 / 3 # How much random shift to do on each augmented sample
45 |
46 |
47 |
48 | ##REFINE:
49 | # Advanced localization parameters
50 | use_iou_net: False
51 | advanced_localization: True # Use this or not
52 | target_not_found_threshold: 0.0 # Absolute score threshold to detect target missing
53 | distractor_threshold: 100 # Relative threshold to find distractors
54 | hard_negative_threshold: 0.45 # Relative threshold to find hard negative samples
55 | target_neighborhood_scale: 2.2 # Target neighborhood to remove
56 | dispalcement_scale: 0.7 # Dispacement to consider for distractors
57 | hard_negative_learning_rate: 0.02 # Learning rate if hard negative detected
58 | update_scale_when_uncertain: True # Update scale or not if distractor is close
59 |
60 | # IoUNet parameters
61 | iounet_augmentation: False # Use the augmented samples to compute the modulation vector
62 | iounet_use_log_scale: True
63 | iounet_k: 3 # Top-k average to estimate final box
64 | num_init_random_boxes: 9 # Num extra random boxes in addition to the classifier prediction
65 | box_jitter_pos: 0.1 # How much to jitter the translation for random boxes
66 | box_jitter_sz: 0.5 # How much to jitter the scale for random boxes
67 | maximal_aspect_ratio: 6 # Limit on the aspect ratio
68 | box_refinement_iter: 5 # Number of iterations for refining the boxes
69 | box_refinement_step_length: 1 # Gradient step length in the bounding box refinement
70 | box_refinement_step_decay: 1 # Multiplicative step length decay (1 means no decay)
71 |
--------------------------------------------------------------------------------
/experiments/test/VOT/Ocean.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | VOT2018: # 0.467
3 | penalty_k: 0.021
4 | lr: 0.730
5 | window_influence: 0.321
6 | small_sz: 255
7 | big_sz: 271
8 | ratio: 0.93
9 | VOT2019: # 0.330 (slight higher than paper. small fluctuation is noramal)
10 | penalty_k: 0.062
11 | lr: 0.765
12 | window_influence: 0.380
13 | small_sz: 255
14 | big_sz: 271
15 | ratio: 0.94
16 | VOT2018ON: # also for NOOA version (0.438 for NOOA)
17 | penalty_k: 0.187
18 | lr: 0.800
19 | window_influence: 0.640
20 | small_sz: 271
21 | big_sz: 287
22 | online_ratio: 0.5
23 | VOT2019ON: # also for NOOA version (0.323 for NOOA)
24 | penalty_k: 0.06
25 | lr: 0.644
26 | window_influence: 0.484
27 | small_sz: 255
28 | big_sz: 287
29 | online_ratio: 0.7
30 | VOT2020: # ocean-online/oceanplus all (you may tune these params on VOT2020 to get better results)
31 | penalty_k: 0.06
32 | lr: 0.644
33 | window_influence: 0.484
34 | small_sz: 255
35 | big_sz: 287
36 | online_ratio: 0.7
37 |
--------------------------------------------------------------------------------
/experiments/test/VOT/OceanPlus.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | VOT2020:
3 | penalty_k: 0.06
4 | lr: 0.644
5 | window_influence: 0.484
6 | small_sz: 255
7 | big_sz: 287
8 | seg_thr: 0.9
9 | online_ratio: 0.9
10 | DAVIS2016:
11 | penalty_k: 0.032
12 | lr: 0.98
13 | window_influence: 0.45
14 | small_sz: 255
15 | big_sz: 287
16 | seg_thr: 0.84
17 | DAVIS2017:
18 | penalty_k: 0.031
19 | lr: 1
20 | window_influence: 0.35
21 | small_sz: 255
22 | big_sz: 287
23 | seg_thr: 0.84
24 |
--------------------------------------------------------------------------------
/experiments/test/VOT/SiamDW.yaml:
--------------------------------------------------------------------------------
1 | TEST:
2 | VOT2015:
3 | scale_step: 1.1190
4 | scale_lr: 0.4373
5 | scale_penalty: 0.9811
6 | w_influence: 0.2569
7 | VOT2016:
8 | scale_step: 1.1535
9 | scale_lr: 0.4596
10 | scale_penalty: 0.9259
11 | w_influence: 0.3309
12 | VOT2017:
13 | scale_step: 1.1466
14 | scale_lr: 0.2061
15 | scale_penalty: 0.9994
16 | w_influence: 0.3242
--------------------------------------------------------------------------------
/experiments/train/Ocean.yaml:
--------------------------------------------------------------------------------
1 | OCEAN:
2 | GPUS: '0,1,2,3,4,5,6,7'
3 | PRINT_FREQ: 10
4 | WORKERS: 32
5 | OUTPUT_DIR: 'logs' # log file
6 | CHECKPOINT_DIR: 'snapshot' # checkpoint file
7 |
8 | TRAIN:
9 | ISTRUE: True # whether to test
10 | MODEL: "Ocean"
11 | ALIGN: True # object aware branch
12 | START_EPOCH: 0
13 | END_EPOCH: 50
14 | TEMPLATE_SIZE: 127
15 | SEARCH_SIZE: 255
16 | BATCH: 32
17 | STRIDE: 8
18 | RESUME: False
19 | PRETRAIN: 'pretrain.model'
20 | LR_POLICY: 'log'
21 |
22 | WARMUP:
23 | IFNOT: True
24 | TYPE: 'step'
25 | EPOCH: 5 # res50 5
26 | KWARGS:
27 | start_lr: 0.001 # res50 0.001 alex: 0.005
28 | end_lr: 0.005 # res50 0.005 alex: 0.01
29 | step: 1
30 | LR:
31 | TYPE: 'log'
32 | KWARGS:
33 | start_lr: 0.005 # res50 0.005 alex 0.01
34 | end_lr: 0.00001 # res50 0.0005
35 |
36 |
37 | LAYERS_LR: 0.1 # res50 0.1 alex: 1 # scale ration for backbone
38 | BASE_LR: 0.005
39 | UNFIX_EPOCH: 10
40 | WARM_POLICY: 'step'
41 | UNFIX_POLICY: 'log'
42 | MOMENTUM: 0.9
43 | WEIGHT_DECAY: 0.0001
44 | TRAINABLE_LAYER: ['layer1', 'layer2', 'layer3'] # ['layer2', 'layer3', 'layer4']
45 | WHICH_USE: ['YTB', 'VID', 'COCO', 'DET', 'GOT10K']
46 |
47 | TEST: # TEST model is same as TRAIN.MODEL
48 | ISTRUE: False # whether to test
49 | THREADS: 16 # multi threads test
50 | DATA: 'VOT2019'
51 | START_EPOCH: 30
52 | END_EPOCH: 50
53 | RGBTSPLIT: None # None for main channel, 'RGB' and 'T' for RGBT
54 | TUNE: # TUNE model is same as TRAIN.MODEL
55 | ISTRUE: False # whether to tune
56 | DATA: 'VOT2019'
57 | METHOD: 'TPE'
58 | RGBTSPLT: None
59 | DATASET:
60 | SHIFT: 4
61 | SCALE: 0.05
62 | COLOR: 1
63 | FLIP: 0
64 | BLUR: 0
65 | ROTATION: 0
66 | LABELSMOOTH: False
67 | MIXUP: 0
68 | GRAY: 0
69 | CUTOUT: 0
70 |
71 | SHIFTs: 64
72 | SCALEs: 0.18
73 |
74 | VID:
75 | PATH: './data/vid/crop511'
76 | ANNOTATION: './data/vid/train.json'
77 | RANGE: 100
78 | USE: 110000
79 | YTB:
80 | PATH: './data/y2b/crop511'
81 | ANNOTATION: './data/y2b/train.json'
82 | RANGE: 3
83 | USE: 210000
84 | GOT10K:
85 | PATH: './data/got10k/crop511'
86 | ANNOTATION: './data/got10k/all.json'
87 | RANGE: 100
88 | USE: 160000
89 | DET:
90 | PATH: './data/det/crop511'
91 | ANNOTATION: './data/det/train.json'
92 | RANGE: 100
93 | USE: 60000
94 | COCO:
95 | PATH: "./data/coco/crop511"
96 | ANNOTATION: "./data/coco/train2017.json"
97 | RANGE: 1
98 | USE: 60000
99 |
100 |
--------------------------------------------------------------------------------
/experiments/train/SiamDW.yaml:
--------------------------------------------------------------------------------
1 | SIAMFC:
2 | GPUS: '0,1,2,3'
3 | PRINT_FREQ: 10
4 | WORKERS: 32
5 | OUTPUT_DIR: 'logs' # log file
6 | CHECKPOINT_DIR: 'snapshot' # checkpoint file
7 |
8 | TRAIN:
9 | ISTRUE: True # whether to train
10 | MODEL: "SiamDW" # SiamFCIncep22, SiamFCNext22
11 | START_EPOCH: 0
12 | END_EPOCH: 50
13 | TEMPLATE_SIZE: 127
14 | SEARCH_SIZE: 255
15 | STRIDE: 8
16 | PAIRS: 200000
17 | PRETRAIN: 'pretrain.model'
18 | LR_POLICY: 'log'
19 | LR: 0.01
20 | LR_END: 0.00001
21 | MOMENTUM: 0.9
22 | WEIGHT_DECAY: 0.0001
23 | WHICH_USE: 'GOT10K' # VID or 'GOT10K'
24 | TEST: # TEST model is same as TRAIN.MODEL
25 | ISTRUE: True # whether to test
26 | THREADS: 16 # multi threads test
27 | DATA: 'VOT2017'
28 | START_EPOCH: 30
29 | END_EPOCH: 50
30 | TUNE: # TUNE model is same as TRAIN.MODEL
31 | ISTRUE: False # whether to tune
32 | DATA: 'VOT2017'
33 | METHOD: 'TPE'
34 | DATASET:
35 | SHIFT: 4
36 | SCALE: 0.05
37 | COLOR: 1
38 | FLIP: 0
39 | BLUR: 0
40 | ROTATION: 0
41 | GOT10K:
42 | PATH: './data/got10k/crop511'
43 | ANNOTATION: './data/got10k/train.json'
44 |
--------------------------------------------------------------------------------
/lib/core/eval_davis.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Zhipeng Zhang (zhangzhipeng2017@ia.ac.cn)
5 | # multi-gpu test for epochs
6 | # ------------------------------------------------------------------------------
7 |
8 | import os
9 | import time
10 | import argparse
11 | import numpy as np
12 | from os import listdir
13 | from os.path import join, exists
14 | from concurrent import futures
15 |
16 | parser = argparse.ArgumentParser(description='multi-gpu test all epochs')
17 | parser.add_argument('--dataset', default='DAVIS2016', type=str, help='benchmarks')
18 | parser.add_argument('--num_threads', default=16, type=int, help='number of threads')
19 | parser.add_argument('--datapath', default='dataset/DAVIS', type=str, help='benchmarks')
20 | args = parser.parse_args()
21 |
22 |
23 | def eval_davis(epoch):
24 | year = args.dataset[5:]
25 | full_path = join('result', args.dataset, epoch)
26 | os.system('python lib/eval_toolkit/davis/davis2017-evaluation/evaluation_method.py --task semi-supervised --results_path {0} --davis_path {1} --year {2}'.format(full_path, args.datapath, year))
27 |
28 |
29 | def extract_davis(epochs):
30 | # J&F-Mean,J-Mean,J-Recall,J-Decay,F-Mean,F-Recall,F-Decay
31 | results = dict()
32 | print('\t \tJ&F-Mean,J-Mean,J-Recall,J-Decay,F-Mean,F-Recall,F-Decay')
33 |
34 | JFm = []
35 | Jm = []
36 | Jr = []
37 | Jd = []
38 | Fm = []
39 | Fr = []
40 | Fd = []
41 |
42 | for e in epochs:
43 | results[e] = dict()
44 | full_path = join('result', args.dataset, e, 'global_results-val.csv')
45 | record = open(full_path, 'r').readlines()
46 | record = eval(record[1])
47 | print('{} {} {} {} {} {} {} {}'.format(e, record[0], record[1], record[2], record[3], record[4], record[5], record[6]))
48 |
49 | JFm.append(record[0])
50 | Jm.append(record[1])
51 | Jr.append(record[2])
52 | Jd.append(record[3])
53 | Fm.append(record[4])
54 | Fr.append(record[5])
55 | Fd.append(record[6])
56 | print('=========> sort with J&F: <===========')
57 | argidx = np.argmax(np.array(JFm))
58 | print('{} {} {} {} {} {} {} {}'.format(epochs[argidx], JFm[argidx], Jm[argidx], Jr[argidx], Jd[argidx], Fm[argidx], Fr[argidx], Fd[argidx]))
59 | print('=========> sort with Jm: <===========')
60 | argidx = np.argmax(np.array(Jm))
61 | print('{} {} {} {} {} {} {} {}'.format(epochs[argidx], JFm[argidx], Jm[argidx], Jr[argidx], Jd[argidx], Fm[argidx], Fr[argidx], Fd[argidx]))
62 |
63 |
64 | base_path = join('result', args.dataset)
65 | epochs = listdir(base_path)
66 | print('total {} epochs'.format(len(epochs)))
67 |
68 | # multi-process evaluation
69 | if args.dataset in ['DAVIS2016', 'DAVIS2017']:
70 | with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor:
71 | fs = [executor.submit(eval_davis, e) for e in epochs]
72 | print('done')
73 | extract_davis(epochs)
74 | else:
75 | raise ValueError('not supported data')
76 |
--------------------------------------------------------------------------------
/lib/core/extract_tune_logs.py:
--------------------------------------------------------------------------------
1 | import shutil
2 | import argparse
3 | import numpy as np
4 |
5 |
6 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results')
7 | parser.add_argument('--path', default='logs/tpe_tune_rpn.log', help='tune result path')
8 | parser.add_argument('--dataset', default='VOT2018', help='test dataset')
9 | parser.add_argument('--save_path', default='logs', help='log file save path')
10 |
11 |
12 | def collect_results(args):
13 | if not args.path.endswith('txt'):
14 | name = args.path.split('.')[0]
15 | name = name + '.txt'
16 | shutil.copy(args.path, name)
17 | args.path = name
18 | fin = open(args.path, 'r')
19 | lines = fin.readlines()
20 | penalty_k = []
21 | scale_lr = []
22 | wi = []
23 | sz = []
24 | bz = []
25 | eao = []
26 | count = 0 # total numbers
27 |
28 | for line in lines:
29 | if not line.startswith('penalty_k'):
30 | pass
31 | else:
32 | # print(line)
33 | count += 1
34 | temp0, temp1, temp2, temp3, temp4, temp5 = line.split(',')
35 | penalty_k.append(float(temp0.split(': ')[-1]))
36 | scale_lr.append(float(temp1.split(': ')[-1]))
37 | wi.append(float(temp2.split(': ')[-1]))
38 | sz.append(float(temp3.split(': ')[-1]))
39 | bz.append(float(temp4.split(': ')[-1]))
40 | eao.append(float(temp5.split(': ')[-1]))
41 |
42 | # find max
43 | eao = np.array(eao)
44 | max_idx = np.argmax(eao)
45 | max_eao = eao[max_idx]
46 | print('{} params group have been tested'.format(count))
47 | print('penalty_k: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, small_sz: {}, big_sz: {}, auc: {}'.format(penalty_k[max_idx], scale_lr[max_idx], wi[max_idx], sz[max_idx], bz[max_idx], max_eao))
48 |
49 |
50 | if __name__ == '__main__':
51 | args = parser.parse_args()
52 | collect_results(args)
53 |
--------------------------------------------------------------------------------
/lib/dataset/crop/DAVIS/gen_json.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # processing DAVIS train
3 | # --------------------------------------------------------
4 | from os.path import join
5 | import json
6 | import os
7 | import cv2
8 | import pdb
9 | import numpy as np
10 | import pdb
11 | from PIL import Image
12 |
13 | data_dir = '/home/zpzhang/data/testing/DAVIS-trainval'
14 | saveDir = '/home/zpzhang/data/training/DAVIS'
15 |
16 | dataset = dict()
17 | train_txt = join(data_dir, 'ImageSets/2017', 'train.txt')
18 | videos = open(train_txt, 'r').readlines()
19 | n_videos = len(videos)
20 |
21 | for iidx, video_name in enumerate(videos):
22 | video_name = video_name[:-1]
23 |
24 | print('video id: {:04d} / {:04d}'.format(iidx, n_videos))
25 | try:
26 | imgs = sorted(os.listdir(join(data_dir, 'JPEGImages/480p', video_name)))
27 | except:
28 | continue
29 | dataset[video_name] = dict()
30 |
31 | for idx, im_name in enumerate(imgs):
32 | mask_path = join(data_dir, 'Annotations/480p', video_name, im_name.replace('.jpg', '.png'))
33 | mask = np.array(Image.open(mask_path)).astype(np.uint8)
34 | objects = np.unique(mask)
35 |
36 | for track_id in range(1, len(objects)):
37 | color = objects[track_id]
38 | mask_temp = (mask == color).astype(np.uint8) * 255
39 | x, y, w, h = cv2.boundingRect(mask_temp)
40 | bbox = [x, y, x + w - 1, y + h - 1] # [x1,y1,x2,y2]
41 | if w <= 0 or h <= 0: # lead nan error in cls.
42 | continue
43 |
44 | if '{:02d}'.format(track_id - 1) not in dataset[video_name].keys():
45 | dataset[video_name]['{:02d}'.format(track_id - 1)] = dict()
46 | dataset[video_name]['{:02d}'.format(track_id-1)]['{:06d}'.format(int(im_name.split('.')[0]))] = bbox
47 | print('save json (dataset), please wait 20 seconds~')
48 | save_path = join(saveDir, 'davis.json')
49 | json.dump(dataset, open(save_path, 'w'), indent=4, sort_keys=True)
50 | print('done!')
51 |
52 |
--------------------------------------------------------------------------------
/lib/dataset/crop/DAVIS/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing DAVIS
2 |
3 | ````shell
4 | python par_crop.py --enable_mask --num_threads 24
5 | python gen_json.py
6 | ````
7 |
--------------------------------------------------------------------------------
/lib/dataset/crop/RGBT210/RGBT210_genjson.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 | # __author__ = 'zzp'
4 |
5 | import json
6 | import numpy as np
7 | from os import listdir
8 | from os.path import join
9 |
10 | basepath = '/data/share/RGBT210/'
11 | save = dict()
12 |
13 |
14 | def genjson():
15 | videos = listdir(basepath)
16 |
17 | for v in videos:
18 | save[v] = dict()
19 | save[v]['name'] = v # video name
20 |
21 | # save img names
22 | v_in_path = join(basepath, v, 'infrared')
23 | v_rgb_path = join(basepath, v, 'visible')
24 | temp1 = listdir(v_in_path)
25 | temp2 = listdir(v_rgb_path)
26 | temp1.sort()
27 | temp2.sort()
28 | save[v]['infrared_imgs'] = temp1 # infrared file names
29 | save[v]['visible_imgs'] = temp2 # infrared file names
30 |
31 | # read gt
32 | v_in_gt_path = join(basepath, v, 'init.txt')
33 | v_rgb_gt_path = join(basepath, v, 'init.txt')
34 | v_in_gts = np.loadtxt(v_in_gt_path, delimiter=',')
35 | v_rgb_gts = np.loadtxt(v_rgb_gt_path, delimiter=',')
36 |
37 | v_in_gts[:, 0:2] = v_in_gts[:, 0:2] - 1 # to python 0 index
38 | v_rgb_gts[:, 0:2] = v_rgb_gts[:, 0:2] - 1 # to python 0 index
39 |
40 | v_in_init = v_in_gts[0]
41 | v_rgb_init = v_rgb_gts[0]
42 |
43 | # save int and gt
44 | save[v]['infrared_init'] = v_in_init.tolist()
45 | save[v]['visible_init'] = v_rgb_init.tolist()
46 | save[v]['infrared_gt'] = v_in_gts.tolist()
47 | save[v]['visible_gt'] = v_rgb_gts.tolist()
48 |
49 | json.dump(save, open('/data/zpzhang/datasets/dataset/RGBT210.json', 'w'), indent=4, sort_keys=True)
50 |
51 |
52 | if __name__ == '__main__':
53 | genjson()
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/lib/dataset/crop/RGBT210/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import cv2
5 | import numpy as np
6 | from pprint import pprint
7 |
8 | print('loading json (raw RGBT234 info), please wait 20 seconds~')
9 | RGBT210 = json.load(open('/data/zpzhang/datasets/dataset/RGBT210.json', 'r'))
10 | RGBT210_base_path = '/data/share/RGBT210'
11 |
12 | def check_size(frame_sz, bbox):
13 | min_ratio = 0.1
14 | max_ratio = 0.75
15 | # only accept objects >10% and <75% of the total frame
16 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
17 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio)
18 | return ok
19 |
20 |
21 | def check_borders(frame_sz, bbox):
22 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
23 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
24 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
25 | ((frame_sz[1] - bbox[3]) > dist_from_border)
26 | return ok
27 |
28 |
29 | snippets = dict()
30 |
31 | n_videos = 0
32 |
33 |
34 | for v_name in list(RGBT210.keys()):
35 | video = RGBT210[v_name]
36 | n_videos += 1
37 | in_frames = video['infrared_imgs']
38 | rgb_frames = video['visible_imgs']
39 | snippet = dict()
40 | snippets[video['name']] = dict()
41 |
42 | # read a image to get im size
43 | im_temp_path = join(RGBT210_base_path, video['name'], 'visible', rgb_frames[0])
44 | im_temp = cv2.imread(im_temp_path)
45 | frame_sz = [im_temp.shape[1], im_temp.shape[0]]
46 |
47 | in_gts = video['infrared_gt']
48 | rgb_gts = video['visible_gt']
49 |
50 | for f, in_frame in enumerate(in_frames):
51 | in_bbox = in_gts[f] # (x,y,w,h)
52 | rgb_bbox = rgb_gts[f] # (x,y,w,h)
53 |
54 | bboxs = [[in_bbox[0], in_bbox[1], in_bbox[0]+in_bbox[2], in_bbox[1]+in_bbox[3]],
55 | [rgb_bbox[0], rgb_bbox[1], rgb_bbox[0]+rgb_bbox[2], rgb_bbox[1]+rgb_bbox[3]]] #(xmin, ymin, xmax, ymax)
56 |
57 | imgs = [in_frames[f], rgb_frames[f]] # image name may be different in visible and rgb imgs
58 |
59 | snippet['{:06d}'.format(f)] = [imgs, bboxs]
60 |
61 | snippets[video['name']]['{:02d}'.format(0)] = snippet.copy()
62 |
63 | json.dump(snippets, open('/data/share/SMALLSIAM/RGBT210/all.json', 'w'), indent=4, sort_keys=True)
64 | print('done!')
65 |
--------------------------------------------------------------------------------
/lib/dataset/crop/RGBT210/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing RGBT234 (train and val)
2 |
3 |
4 | ### Crop & Generate data info (20 min)
5 |
6 | ````sh
7 | python RGBT234_genjson.py
8 | python par_crop.py 511 24
9 | python gen_json.py
10 | ````
11 |
--------------------------------------------------------------------------------
/lib/dataset/crop/RGBT234/RGBT234_genjson.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 | # __author__ = 'zzp'
4 |
5 | import json
6 | import numpy as np
7 | from os import listdir
8 | from os.path import join
9 |
10 | basepath = '/data/zpzhang/datasets/dataset/RGBT234/'
11 | save = dict()
12 |
13 |
14 | def genjson():
15 | videos = listdir(basepath)
16 |
17 | for v in videos:
18 | save[v] = dict()
19 | save[v]['name'] = v # video name
20 |
21 | # save img names
22 | v_in_path = join(basepath, v, 'infrared')
23 | v_rgb_path = join(basepath, v, 'visible')
24 | temp1 = listdir(v_in_path)
25 | temp2 = listdir(v_rgb_path)
26 | temp1.sort()
27 | temp2.sort()
28 | save[v]['infrared_imgs'] = temp1 # infrared file names
29 | save[v]['visible_imgs'] = temp2 # infrared file names
30 |
31 | # read gt
32 | v_in_gt_path = join(basepath, v, 'infrared.txt')
33 | v_rgb_gt_path = join(basepath, v, 'visible.txt')
34 | v_in_gts = np.loadtxt(v_in_gt_path, delimiter=',')
35 | v_rgb_gts = np.loadtxt(v_rgb_gt_path, delimiter=',')
36 |
37 | v_in_gts[:, 0:2] = v_in_gts[:, 0:2] - 1 # to python 0 index
38 | v_rgb_gts[:, 0:2] = v_rgb_gts[:, 0:2] - 1 # to python 0 index
39 |
40 | v_in_init = v_in_gts[0]
41 | v_rgb_init = v_rgb_gts[0]
42 |
43 | # save int and gt
44 | save[v]['infrared_init'] = v_in_init.tolist()
45 | save[v]['visible_init'] = v_rgb_init.tolist()
46 | save[v]['infrared_gt'] = v_in_gts.tolist()
47 | save[v]['visible_gt'] = v_rgb_gts.tolist()
48 |
49 | json.dump(save, open('/data/zpzhang/datasets/dataset/RGBT234.json', 'w'), indent=4, sort_keys=True)
50 |
51 |
52 | if __name__ == '__main__':
53 | genjson()
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/lib/dataset/crop/RGBT234/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import cv2
5 | import numpy as np
6 | from pprint import pprint
7 |
8 | print('loading json (raw RGBT234 info), please wait 20 seconds~')
9 | RGBT234 = json.load(open('RGBT234.json', 'r'))
10 | RGBT234_base_path = '/data/zpzhang/datasets/dataset/RGBT234'
11 |
12 | def check_size(frame_sz, bbox):
13 | min_ratio = 0.1
14 | max_ratio = 0.75
15 | # only accept objects >10% and <75% of the total frame
16 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
17 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio)
18 | return ok
19 |
20 |
21 | def check_borders(frame_sz, bbox):
22 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
23 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
24 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
25 | ((frame_sz[1] - bbox[3]) > dist_from_border)
26 | return ok
27 |
28 |
29 | snippets = dict()
30 |
31 | n_videos = 0
32 |
33 |
34 | for v_name in list(RGBT234.keys()):
35 | video = RGBT234[v_name]
36 | n_videos += 1
37 | in_frames = video['infrared_imgs']
38 | rgb_frames = video['visible_imgs']
39 | snippet = dict()
40 | snippets[video['name']] = dict()
41 |
42 | # read a image to get im size
43 | im_temp_path = join(RGBT234_base_path, video['name'], 'visible', rgb_frames[0])
44 | im_temp = cv2.imread(im_temp_path)
45 | frame_sz = [im_temp.shape[1], im_temp.shape[0]]
46 |
47 | in_gts = video['infrared_gt']
48 | rgb_gts = video['visible_gt']
49 |
50 | for f, in_frame in enumerate(in_frames):
51 | in_bbox = in_gts[f] # (x,y,w,h)
52 | rgb_bbox = rgb_gts[f] # (x,y,w,h)
53 |
54 | bboxs = [[in_bbox[0], in_bbox[1], in_bbox[0]+in_bbox[2], in_bbox[1]+in_bbox[3]],
55 | [rgb_bbox[0], rgb_bbox[1], rgb_bbox[0]+rgb_bbox[2], rgb_bbox[1]+rgb_bbox[3]]] #(xmin, ymin, xmax, ymax)
56 |
57 | imgs = [in_frames[f], rgb_frames[f]] # image name may be different in visible and rgb imgs
58 |
59 | snippet['{:06d}'.format(f)] = [imgs, bboxs]
60 |
61 | snippets[video['name']]['{:02d}'.format(0)] = snippet.copy()
62 |
63 | json.dump(snippets, open('/data/share/SMALLSIAM/RGBT234/all.json', 'w'), indent=4, sort_keys=True)
64 | print('done!')
65 |
--------------------------------------------------------------------------------
/lib/dataset/crop/RGBT234/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing RGBT234 (train and val)
2 |
3 |
4 | ### Crop & Generate data info (20 min)
5 |
6 | ````sh
7 | python RGBT234_genjson.py
8 | python par_crop.py 511 24
9 | python gen_json.py
10 | ````
11 |
--------------------------------------------------------------------------------
/lib/dataset/crop/coco/gen_json.py:
--------------------------------------------------------------------------------
1 | from pycocotools.coco import COCO
2 | from os.path import join
3 | import json
4 | import os
5 |
6 |
7 | dataDir = '/data/home/hopeng/msralab_IMG/Users/hopeng/data_official/coco'
8 | #'/data/share/coco'
9 | for dataType in ['val2017', 'train2017']:
10 | dataset = dict()
11 | annFile = '{}/annotations/instances_{}.json'.format(dataDir,dataType)
12 | coco = COCO(annFile)
13 | n_imgs = len(coco.imgs)
14 | for n, img_id in enumerate(coco.imgs):
15 | print('subset: {} image id: {:04d} / {:04d}'.format(dataType, n, n_imgs))
16 | img = coco.loadImgs(img_id)[0]
17 | annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None)
18 | anns = coco.loadAnns(annIds)
19 | video_crop_base_path = join(dataType, img['file_name'].split('/')[-1].split('.')[0])
20 |
21 | if len(anns) > 0:
22 | dataset[video_crop_base_path] = dict()
23 |
24 | for trackid, ann in enumerate(anns):
25 | rect = ann['bbox']
26 | c = ann['category_id']
27 | bbox = [rect[0], rect[1], rect[0]+rect[2], rect[1]+rect[3]]
28 | if rect[2] <= 0 or rect[3] <= 0: # lead nan error in cls.
29 | continue
30 | dataset[video_crop_base_path]['{:02d}'.format(trackid)] = {'000000': bbox}
31 |
32 | print('save json (dataset), please wait 20 seconds~')
33 | #json.dump(dataset, open('{}.json'.format(dataType), 'w'), indent=4, sort_keys=True)
34 | json.dump(dataset, open('{}.json'.format(os.path.join(dataDir, dataType)), 'w'), indent=4, sort_keys=True)
35 | print('done!')
36 |
37 |
--------------------------------------------------------------------------------
/lib/dataset/crop/coco/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing COCO
2 |
3 | ### Download raw images and annotations
4 |
5 | ````shell
6 | wget http://images.cocodataset.org/zips/train2017.zip
7 | wget http://images.cocodataset.org/zips/val2017.zip
8 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
9 |
10 | unzip ./train2017.zip
11 | unzip ./val2017.zip
12 | unzip ./annotations_trainval2017.zip
13 | cd pycocotools && make && cd ..
14 | ````
15 |
16 | ### Crop & Generate data info (10 min)
17 |
18 | ````shell
19 | #python par_crop.py [data_path] [crop_size] [num_threads]
20 | python par_crop.py /data/share/coco 511 12
21 | python gen_json.py
22 | ````
23 |
24 | Code are modified from SiamMask.
25 |
--------------------------------------------------------------------------------
/lib/dataset/crop/det/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join, isdir
2 | from os import mkdir
3 | import glob
4 | import xml.etree.ElementTree as ET
5 | import json
6 |
7 | js = {}
8 | #VID_base_path = '/data/share/ILSVRC'
9 | VID_base_path = '/data/home/hopeng/data_local/ILSVRC2015'
10 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/')
11 | sub_sets = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i')
12 | for sub_set in sub_sets:
13 | sub_set_base_path = join(ann_base_path, sub_set)
14 |
15 | if 'a' == sub_set:
16 | xmls = sorted(glob.glob(join(sub_set_base_path, '*', '*.xml')))
17 | else:
18 | xmls = sorted(glob.glob(join(sub_set_base_path, '*.xml')))
19 | n_imgs = len(xmls)
20 | for f, xml in enumerate(xmls):
21 | print('subset: {} frame id: {:08d} / {:08d}'.format(sub_set, f, n_imgs))
22 | xmltree = ET.parse(xml)
23 | objects = xmltree.findall('object')
24 |
25 | video = join(sub_set, xml.split('/')[-1].split('.')[0])
26 |
27 | for id, object_iter in enumerate(objects):
28 | bndbox = object_iter.find('bndbox')
29 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
30 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
31 | frame = '%06d' % (0)
32 | obj = '%02d' % (id)
33 | if video not in js:
34 | js[video] = {}
35 | if obj not in js[video]:
36 | js[video][obj] = {}
37 | js[video][obj][frame] = bbox
38 |
39 | train = {k:v for (k,v) in js.items() if 'i/' not in k}
40 | val = {k:v for (k,v) in js.items() if 'i/' in k}
41 |
42 | #json.dump(train, open('train.json', 'w'), indent=4, sort_keys=True)
43 | #json.dump(val, open('val.json', 'w'), indent=4, sort_keys=True)
44 | json.dump(train, open('/data/home/hopeng/data_local/ILSVRC2015/DET/train.json', 'w'), indent=4, sort_keys=True)
45 | json.dump(val, open('/data/home/hopeng/data_local/ILSVRC2015/DET/val.json', 'w'), indent=4, sort_keys=True)
46 |
--------------------------------------------------------------------------------
/lib/dataset/crop/det/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing DET(Object detection)
2 | Large Scale Visual Recognition Challenge 2015 (ILSVRC2015)
3 |
4 | ### Download dataset (49GB)
5 |
6 | ````shell
7 | wget http://image-net.org/image/ILSVRC2015/ILSVRC2015_DET.tar.gz
8 | tar -xzvf ./ILSVRC2015_DET.tar.gz
9 |
10 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2013_train ILSVRC/Annotations/DET/train/a
11 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0000 ILSVRC/Annotations/DET/train/b
12 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0001 ILSVRC/Annotations/DET/train/c
13 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0002 ILSVRC/Annotations/DET/train/d
14 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0003 ILSVRC/Annotations/DET/train/e
15 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0004 ILSVRC/Annotations/DET/train/f
16 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0005 ILSVRC/Annotations/DET/train/g
17 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0006 ILSVRC/Annotations/DET/train/h
18 | ln -sfb $PWD/ILSVRC/Annotations/DET/val ILSVRC/Annotations/DET/train/i
19 |
20 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2013_train ILSVRC/Data/DET/train/a
21 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0000 ILSVRC/Data/DET/train/b
22 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0001 ILSVRC/Data/DET/train/c
23 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0002 ILSVRC/Data/DET/train/d
24 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0003 ILSVRC/Data/DET/train/e
25 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0004 ILSVRC/Data/DET/train/f
26 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0005 ILSVRC/Data/DET/train/g
27 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0006 ILSVRC/Data/DET/train/h
28 | ln -sfb $PWD/ILSVRC/Data/DET/val ILSVRC/Data/DET/train/i
29 | ````
30 |
31 | ### Crop & Generate data info (20 min)
32 |
33 | ````shell
34 | #python par_crop.py [crop_size] [num_threads]
35 | python par_crop.py /data/share/ILSVRC 511 12
36 | python gen_json.py
37 | ````
38 |
39 | Codes are modified from SiamMask.
40 |
--------------------------------------------------------------------------------
/lib/dataset/crop/got10k/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import numpy as np
5 |
6 | print('loading json (raw got10k info), please wait 20 seconds~')
7 | got10k = json.load(open('got10k.json', 'r'))
8 |
9 |
10 | def check_size(frame_sz, bbox):
11 | min_ratio = 0.1
12 | max_ratio = 0.75
13 | # only accept objects >10% and <75% of the total frame
14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio)
16 | return ok
17 |
18 |
19 | def check_borders(frame_sz, bbox):
20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
23 | ((frame_sz[1] - bbox[3]) > dist_from_border)
24 | return ok
25 |
26 |
27 | snippets = dict()
28 |
29 | n_videos = 0
30 | for subset in got10k:
31 | for video in subset:
32 | n_videos += 1
33 | frames = video['frame']
34 | snippet = dict()
35 | snippets[video['base_path']] = dict()
36 | for f, frame in enumerate(frames):
37 | frame_sz = frame['frame_sz']
38 | bbox = frame['bbox'] # (x,y,w,h)
39 |
40 | snippet['{:06d}'.format(f)] = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] #(xmin, ymin, xmax, ymax)
41 |
42 | snippets[video['base_path']]['{:02d}'.format(0)] = snippet.copy()
43 |
44 | train = {k:v for (k,v) in snippets.items() if 'train' in k}
45 | val = {k:v for (k,v) in snippets.items() if 'val' in k}
46 |
47 | # json.dump(train, open('/data2/got10k/train.json', 'w'), indent=4, sort_keys=True)
48 | json.dump(val, open('/data2/got10k/val.json', 'w'), indent=4, sort_keys=True)
49 | print('done!')
50 |
--------------------------------------------------------------------------------
/lib/dataset/crop/got10k/parser_got10k.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 | # __author__ = 'zzp'
4 |
5 | import cv2
6 | import json
7 | import glob
8 | import numpy as np
9 | from os.path import join
10 | from os import listdir
11 |
12 | import argparse
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--dir',type=str, default='/data/share/GOT10K', help='your vid data dir')
16 | args = parser.parse_args()
17 |
18 | got10k_base_path = args.dir
19 | sub_sets = sorted({'train', 'val'})
20 |
21 | got10k = []
22 | for sub_set in sub_sets:
23 | sub_set_base_path = join(got10k_base_path, sub_set)
24 | videos = sorted(listdir(sub_set_base_path))
25 | s = []
26 | for vi, video in enumerate(videos):
27 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
28 | v = dict()
29 | v['base_path'] = join(sub_set, video)
30 | v['frame'] = []
31 | video_base_path = join(sub_set_base_path, video)
32 | gts_path = join(video_base_path, 'groundtruth.txt')
33 | # gts_file = open(gts_path, 'r')
34 | # gts = gts_file.readlines()
35 | gts = np.loadtxt(open(gts_path, "rb"), delimiter=',')
36 |
37 | # get image size
38 | im_path = join(video_base_path, '00000001.jpg')
39 | im = cv2.imread(im_path)
40 | size = im.shape # height, width
41 | frame_sz = [size[1], size[0]] # width,height
42 |
43 | # get all im name
44 | jpgs = sorted(glob.glob(join(video_base_path, '*.jpg')))
45 |
46 | f = dict()
47 | for idx, img_path in enumerate(jpgs):
48 | f['frame_sz'] = frame_sz
49 | f['img_path'] = img_path.split('/')[-1]
50 |
51 | gt = gts[idx]
52 | bbox = [int(g) for g in gt] # (x,y,w,h)
53 | f['bbox'] = bbox
54 | v['frame'].append(f.copy())
55 | s.append(v)
56 | got10k.append(s)
57 | print('save json (raw got10k info), please wait 1 min~')
58 | json.dump(got10k, open('got10k.json', 'w'), indent=4, sort_keys=True)
59 | print('got10k.json has been saved in ./')
60 |
--------------------------------------------------------------------------------
/lib/dataset/crop/got10k/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing GOT10K (train and val)
2 |
3 |
4 | ### Crop & Generate data info (20 min)
5 |
6 | ````shell
7 | rm ./train/list.txt
8 | rm ./val/list.txt
9 |
10 | python parse_got10k.py
11 | python par_crop.py 511 16
12 | python gen_json.py
13 | ````
14 |
--------------------------------------------------------------------------------
/lib/dataset/crop/lasot/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import numpy as np
5 |
6 | print('loading json (raw lasot info), please wait 20 seconds~')
7 | lasot = json.load(open('lasot.json', 'r'))
8 |
9 |
10 | def check_size(frame_sz, bbox):
11 | min_ratio = 0.1
12 | max_ratio = 0.75
13 | # only accept objects >10% and <75% of the total frame
14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio)
16 | return ok
17 |
18 |
19 | def check_borders(frame_sz, bbox):
20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
23 | ((frame_sz[1] - bbox[3]) > dist_from_border)
24 | return ok
25 |
26 |
27 | snippets = dict()
28 |
29 | n_videos = 0
30 | for subset in lasot:
31 | for video in subset:
32 | n_videos += 1
33 | frames = video['frame']
34 | snippet = dict()
35 |
36 | snippets[video['base_path'].split('/')[-1]] = dict()
37 | for f, frame in enumerate(frames):
38 | frame_sz = frame['frame_sz']
39 | bbox = frame['bbox'] # (x,y,w,h)
40 |
41 | snippet['{:06d}'.format(f)] = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] #(xmin, ymin, xmax, ymax)
42 |
43 | snippets[video['base_path'].split('/')[-1]]['{:02d}'.format(0)] = snippet.copy()
44 |
45 | json.dump(snippets, open('/data/share/LASOT/train.json', 'w'), indent=4, sort_keys=True)
46 | print('done!')
47 |
--------------------------------------------------------------------------------
/lib/dataset/crop/lasot/parser_lasot.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 | # __author__ = 'zzp'
4 |
5 | import cv2
6 | import json
7 | import glob
8 | import numpy as np
9 | from os.path import join
10 | from os import listdir
11 |
12 | import argparse
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--dir',type=str, default='/data/share/LaSOTBenchmark', help='your vid data dir')
16 | args = parser.parse_args()
17 |
18 | lasot_base_path = args.dir
19 | # sub_sets = sorted({'train', 'val'})
20 |
21 | lasot = []
22 |
23 | videos_fathers = sorted(listdir(lasot_base_path))
24 | s = []
25 | for _, video_f in enumerate(videos_fathers):
26 | videos_sons = sorted(listdir(join(lasot_base_path, video_f)))
27 |
28 | for vi, video in enumerate(videos_sons):
29 |
30 | print('father class: {} video id: {:04d} / {:04d}'.format(video_f, vi, len(videos_sons)))
31 | v = dict()
32 | v['base_path'] = join(video_f, video)
33 | v['frame'] = []
34 | video_base_path = join(lasot_base_path, video_f, video)
35 | gts_path = join(video_base_path, 'groundtruth.txt')
36 | # gts_file = open(gts_path, 'r')
37 | # gts = gts_file.readlines()
38 | gts = np.loadtxt(open(gts_path, "rb"), delimiter=',')
39 |
40 | # get image size
41 | im_path = join(video_base_path, 'img', '00000001.jpg')
42 | im = cv2.imread(im_path)
43 | size = im.shape # height, width
44 | frame_sz = [size[1], size[0]] # width,height
45 |
46 | # get all im name
47 | jpgs = sorted(glob.glob(join(video_base_path, 'img', '*.jpg')))
48 |
49 | f = dict()
50 | for idx, img_path in enumerate(jpgs):
51 | f['frame_sz'] = frame_sz
52 | f['img_path'] = img_path.split('/')[-1]
53 |
54 | gt = gts[idx]
55 | bbox = [int(g) for g in gt] # (x,y,w,h)
56 | f['bbox'] = bbox
57 | v['frame'].append(f.copy())
58 | s.append(v)
59 | lasot.append(s)
60 |
61 | print('save json (raw lasot info), please wait 1 min~')
62 | json.dump(lasot, open('lasot.json', 'w'), indent=4, sort_keys=True)
63 | print('lasot.json has been saved in ./')
64 |
--------------------------------------------------------------------------------
/lib/dataset/crop/lasot/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing LASOT (train and val)
2 |
3 |
4 | ### Crop & Generate data info (20 min)
5 |
6 | ````shell
7 | rm ./train/list.txt
8 | rm ./val/list.txt
9 |
10 | python parse_lasot.py
11 | python par_crop.py 511 16
12 | python gen_json.py
13 | ````
14 |
--------------------------------------------------------------------------------
/lib/dataset/crop/vid/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import numpy as np
5 |
6 | print('loading json (raw vid info), please wait 20 seconds~')
7 | vid = json.load(open('vid.json', 'r'))
8 |
9 |
10 | def check_size(frame_sz, bbox):
11 | min_ratio = 0.1
12 | max_ratio = 0.75
13 | # only accept objects >10% and <75% of the total frame
14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio)
16 | return ok
17 |
18 |
19 | def check_borders(frame_sz, bbox):
20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
23 | ((frame_sz[1] - bbox[3]) > dist_from_border)
24 | return ok
25 |
26 |
27 | snippets = dict()
28 | n_snippets = 0
29 | n_videos = 0
30 | for subset in vid:
31 | for video in subset:
32 | n_videos += 1
33 | frames = video['frame']
34 | id_set = []
35 | id_frames = [[]] * 60 # at most 60 objects
36 | for f, frame in enumerate(frames):
37 | objs = frame['objs']
38 | frame_sz = frame['frame_sz']
39 | for obj in objs:
40 | trackid = obj['trackid']
41 | occluded = obj['occ']
42 | bbox = obj['bbox']
43 | # if occluded:
44 | # continue
45 | #
46 | # if not(check_size(frame_sz, bbox) and check_borders(frame_sz, bbox)):
47 | # continue
48 | #
49 | # if obj['c'] in ['n01674464', 'n01726692', 'n04468005', 'n02062744']:
50 | # continue
51 |
52 | if trackid not in id_set:
53 | id_set.append(trackid)
54 | id_frames[trackid] = []
55 | id_frames[trackid].append(f)
56 | if len(id_set) > 0:
57 | snippets[video['base_path']] = dict()
58 | for selected in id_set:
59 | frame_ids = sorted(id_frames[selected])
60 | sequences = np.split(frame_ids, np.array(np.where(np.diff(frame_ids) > 1)[0]) + 1)
61 | sequences = [s for s in sequences if len(s) > 1] # remove isolated frame.
62 | for seq in sequences:
63 | snippet = dict()
64 | for frame_id in seq:
65 | frame = frames[frame_id]
66 | for obj in frame['objs']:
67 | if obj['trackid'] == selected:
68 | o = obj
69 | continue
70 | snippet[frame['img_path'].split('.')[0]] = o['bbox']
71 | snippets[video['base_path']]['{:02d}'.format(selected)] = snippet
72 | n_snippets += 1
73 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets))
74 |
75 | train = {k:v for (k,v) in snippets.items() if 'train' in k}
76 | val = {k:v for (k,v) in snippets.items() if 'val' in k}
77 |
78 | json.dump(train, open('/data/home/hopeng/data_local/ILSVRC2015/VID/train.json', 'w'), indent=4, sort_keys=True)
79 | json.dump(val, open('/data/home/hopeng/data_local/ILSVRC2015/VID/val.json', 'w'), indent=4, sort_keys=True)
80 | print('done!')
81 |
--------------------------------------------------------------------------------
/lib/dataset/crop/vid/parse_vid.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import glob
5 | import argparse
6 | import xml.etree.ElementTree as ET
7 |
8 | parser = argparse.ArgumentParser()
9 | parser.add_argument('--dir',type=str, default='/data/share/ILSVRC2015', help='your vid data dir' )
10 | args = parser.parse_args()
11 |
12 | VID_base_path = args.dir
13 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/')
14 | img_base_path = join(VID_base_path, 'Data/VID/train/')
15 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
16 |
17 | vid = []
18 | for sub_set in sub_sets:
19 | sub_set_base_path = join(ann_base_path, sub_set)
20 | videos = sorted(listdir(sub_set_base_path))
21 | s = []
22 | for vi, video in enumerate(videos):
23 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
24 | v = dict()
25 | v['base_path'] = join(sub_set, video)
26 | v['frame'] = []
27 | video_base_path = join(sub_set_base_path, video)
28 | xmls = sorted(glob.glob(join(video_base_path, '*.xml')))
29 | for xml in xmls:
30 | f = dict()
31 | xmltree = ET.parse(xml)
32 | size = xmltree.findall('size')[0]
33 | frame_sz = [int(it.text) for it in size] # width,height
34 | objects = xmltree.findall('object')
35 | objs = []
36 | for object_iter in objects:
37 | trackid = int(object_iter.find('trackid').text)
38 | name = (object_iter.find('name')).text
39 | bndbox = object_iter.find('bndbox')
40 | occluded = int(object_iter.find('occluded').text)
41 | o = dict()
42 | o['c'] = name
43 | o['bbox'] = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
44 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
45 | o['trackid'] = trackid
46 | o['occ'] = occluded
47 | objs.append(o)
48 | f['frame_sz'] = frame_sz
49 | f['img_path'] = xml.split('/')[-1].replace('xml', 'JPEG')
50 | f['objs'] = objs
51 | v['frame'].append(f)
52 | s.append(v)
53 | vid.append(s)
54 | print('save json (raw vid info), please wait 1 min~')
55 | json.dump(vid, open('vid.json', 'w'), indent=4, sort_keys=True)
56 | print('val.json has been saved in ./')
57 |
--------------------------------------------------------------------------------
/lib/dataset/crop/vid/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing VID(Object detection from video)
2 | Large Scale Visual Recognition Challenge 2015 (ILSVRC2015)
3 |
4 | ### Download dataset (86GB)
5 |
6 | ````shell
7 | wget http://bvisionweb1.cs.unc.edu/ilsvrc2015/ILSVRC2015_VID.tar.gz
8 | tar -xzvf ./ILSVRC2015_VID.tar.gz
9 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000 ILSVRC2015/Annotations/VID/train/a
10 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0001 ILSVRC2015/Annotations/VID/train/b
11 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0002 ILSVRC2015/Annotations/VID/train/c
12 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0003 ILSVRC2015/Annotations/VID/train/d
13 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/val ILSVRC2015/Annotations/VID/train/e
14 |
15 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0000 ILSVRC2015/Data/VID/train/a
16 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0001 ILSVRC2015/Data/VID/train/b
17 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0002 ILSVRC2015/Data/VID/train/c
18 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0003 ILSVRC2015/Data/VID/train/d
19 | ln -sfb $PWD/ILSVRC2015/Data/VID/val ILSVRC2015/Data/VID/train/e
20 | ````
21 |
22 | ### Crop & Generate data info (20 min)
23 |
24 | ````shell
25 | python parse_vid.py
26 |
27 | #python par_crop.py [crop_size] [num_threads]
28 | python par_crop.py 511 12
29 | python gen_json.py
30 | ````
31 | Codes are modified from SiamMask.
--------------------------------------------------------------------------------
/lib/dataset/crop/visdrone/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import numpy as np
5 |
6 | print('loading json (raw visdrone info), please wait 20 seconds~')
7 | visdrone = json.load(open('visdrone.json', 'r'))
8 |
9 |
10 | def check_size(frame_sz, bbox):
11 | min_ratio = 0.1
12 | max_ratio = 0.75
13 | # only accept objects >10% and <75% of the total frame
14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio)
16 | return ok
17 |
18 |
19 | def check_borders(frame_sz, bbox):
20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
23 | ((frame_sz[1] - bbox[3]) > dist_from_border)
24 | return ok
25 |
26 |
27 | snippets = dict()
28 |
29 | n_videos = 0
30 | for subset in visdrone:
31 | for video in subset:
32 | n_videos += 1
33 | frames = video['frame']
34 | snippet = dict()
35 | bp = video['base_path']
36 | bp = bp.split('/')
37 | bp = join(bp[0], bp[-1])
38 |
39 | snippets[bp] = dict()
40 | for f, frame in enumerate(frames):
41 | frame_sz = frame['frame_sz']
42 | bbox = frame['bbox'] # (x,y,w,h)
43 |
44 | snippet['{:06d}'.format(f)] = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] #(xmin, ymin, xmax, ymax)
45 |
46 | snippets[bp]['{:02d}'.format(0)] = snippet.copy()
47 |
48 | # train = {k:v for (k,v) in snippets.items() if 'train' in k}
49 | # val = {k:v for (k,v) in snippets.items() if 'val' in k}
50 |
51 | train = {k:v for (k,v) in snippets.items()}
52 |
53 | # json.dump(train, open('/data2/visdrone/train.json', 'w'), indent=4, sort_keys=True)
54 | json.dump(train, open('/data/home/v-zhipeng/dataset/training/VISDRONE/train.json', 'w'), indent=4, sort_keys=True)
55 | print('done!')
56 |
--------------------------------------------------------------------------------
/lib/dataset/crop/visdrone/parser_visdrone.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 | # __author__ = 'zzp'
4 |
5 | import cv2
6 | import json
7 | import glob
8 | import numpy as np
9 | from os.path import join
10 | from os import listdir
11 |
12 | import argparse
13 |
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--dir',type=str, default='/data/home/v-zhipeng/dataset/testing/VISDRONE', help='your vid data dir')
16 | args = parser.parse_args()
17 |
18 | visdrone_base_path = args.dir
19 | sub_sets = sorted({'VisDrone2019-SOT-train', 'VisDrone2019-SOT-val'})
20 |
21 | visdrone = []
22 | for sub_set in sub_sets:
23 | sub_set_base_path = join(visdrone_base_path, sub_set)
24 | videos = sorted(listdir(join(sub_set_base_path, 'sequences')))
25 | s = []
26 | for vi, video in enumerate(videos):
27 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
28 | v = dict()
29 | v['base_path'] = join(sub_set, 'sequences', video)
30 | v['frame'] = []
31 | video_base_path = join(sub_set_base_path, 'sequences', video)
32 | gts_path = join(sub_set_base_path, 'annotations', '{}.txt'.format(video))
33 | # gts_file = open(gts_path, 'r')
34 | # gts = gts_file.readlines()
35 | gts = np.loadtxt(open(gts_path, "rb"), delimiter=',')
36 |
37 | # get image size
38 | im_path = join(video_base_path, 'img0000001.jpg')
39 | im = cv2.imread(im_path)
40 | size = im.shape # height, width
41 | frame_sz = [size[1], size[0]] # width,height
42 |
43 | # get all im name
44 | jpgs = sorted(glob.glob(join(video_base_path, '*.jpg')))
45 |
46 | f = dict()
47 | for idx, img_path in enumerate(jpgs):
48 | f['frame_sz'] = frame_sz
49 | f['img_path'] = img_path.split('/')[-1]
50 |
51 | gt = gts[idx]
52 | bbox = [int(g) for g in gt] # (x,y,w,h)
53 | f['bbox'] = bbox
54 | v['frame'].append(f.copy())
55 | s.append(v)
56 | visdrone.append(s)
57 | print('save json (raw visdrone info), please wait 1 min~')
58 | json.dump(visdrone, open('visdrone.json', 'w'), indent=4, sort_keys=True)
59 | print('visdrone.json has been saved in ./')
60 |
--------------------------------------------------------------------------------
/lib/dataset/crop/visdrone/readme.md:
--------------------------------------------------------------------------------
1 | # Preprocessing VISDRONE (train and val)
2 |
3 |
4 | ### Crop & Generate data info (20 min)
5 |
6 | ````shell
7 | rm ./train/list.txt
8 | rm ./val/list.txt
9 |
10 | python parse_visdrone.py
11 | python par_crop.py 511 16
12 | python gen_json.py
13 | ````
14 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/bin/_init_paths.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import absolute_import
3 | from __future__ import division
4 | from __future__ import print_function
5 |
6 | import os.path as osp
7 | import sys
8 |
9 |
10 | def add_path(path):
11 | if path not in sys.path:
12 | sys.path.insert(0, path)
13 |
14 |
15 | this_dir = osp.dirname(__file__)
16 |
17 | lib_path = osp.join(this_dir, '../..', 'eval_toolkit')
18 | add_path(lib_path)
19 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | docs/site/
99 | /site
100 |
101 | # mypy
102 | .mypy_cache/
103 |
104 | # pytest
105 | .pytest_cache
106 |
107 | # Pylint
108 | .pylintrc
109 |
110 | # PyCharm
111 | .idea/
112 | .DS_Store
113 |
114 | # Generated C code
115 | _mask.c
116 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/README.md:
--------------------------------------------------------------------------------
1 | # DAVIS 2017 Semi-supervised and Unsupervised evaluation package
2 |
3 | This package is used to evaluate semi-supervised and unsupervised video multi-object segmentation models for the DAVIS 2017 dataset.
4 |
5 | This tool is also used to evaluate the submissions in the Codalab site for the Semi-supervised DAVIS Challenge and the Unsupervised DAVIS Challenge
6 |
7 | ### Installation
8 | ```bash
9 | # Download the code
10 | git clone https://github.com/davisvideochallenge/davis2017-evaluation.git && cd davis2017-evaluation
11 | # Install it - Python 3.6 or higher required
12 | python setup.py install
13 | ```
14 | If you don't want to specify the DAVIS path every time, you can modify the default value in the variable `default_davis_path` in `evaluation_method.py`(the following examples assume that you have set it).
15 | Otherwise, you can specify the path in every call using using the flag `--davis_path /path/to/DAVIS` when calling `evaluation_method.py`.
16 |
17 | Once the evaluation has finished, two different CSV files will be generated inside the folder with the results:
18 | - `global_results-SUBSET.csv` contains the overall results for a certain `SUBSET`.
19 | - `per-sequence_results-SUBSET.csv` contain the per sequence results for a certain `SUBSET`.
20 |
21 | If a folder that contains the previous files is evaluated again, the results will be read from the CSV files instead of recomputing them.
22 |
23 | ## Evaluate DAVIS 2017 Semi-supervised
24 | In order to evaluate your semi-supervised method in DAVIS 2017, execute the following command substituting `results/semi-supervised/osvos` by the folder path that contains your results:
25 | ```bash
26 | python evaluation_method.py --task semi-supervised --results_path results/semi-supervised/osvos
27 | ```
28 | The semi-supervised results have been generated using [OSVOS](https://github.com/kmaninis/OSVOS-caffe).
29 |
30 | ## Evaluate DAVIS 2017 Unsupervised
31 | In order to evaluate your unsupervised method in DAVIS 2017, execute the following command substituting `results/unsupervised/rvos` by the folder path that contains your results:
32 | ```bash
33 | python evaluation_method.py --task unsupervised --results_path results/unsupervised/rvos
34 | ```
35 | The unsupervised results example have been generated using [RVOS](https://github.com/imatge-upc/rvos).
36 |
37 | ## Evaluation running in Codalab
38 | In case you would like to know which is the evaluation script that is running in the Codalab servers, check the `evaluation_codalab.py` script.
39 |
40 | This package runs in the following docker image: [scaelles/codalab:anaconda3-2018.12](https://cloud.docker.com/u/scaelles/repository/docker/scaelles/codalab)
41 |
42 | ## Citation
43 |
44 | Please cite both papers in your publications if DAVIS or this code helps your research.
45 |
46 | ```latex
47 | @article{Caelles_arXiv_2019,
48 | author = {Sergi Caelles and Jordi Pont-Tuset and Federico Perazzi and Alberto Montes and Kevis-Kokitsi Maninis and Luc {Van Gool}},
49 | title = {The 2019 DAVIS Challenge on VOS: Unsupervised Multi-Object Segmentation},
50 | journal = {arXiv},
51 | year = {2019}
52 | }
53 | ```
54 |
55 | ```latex
56 | @article{Pont-Tuset_arXiv_2017,
57 | author = {Jordi Pont-Tuset and Federico Perazzi and Sergi Caelles and Pablo Arbel\'aez and Alexander Sorkine-Hornung and Luc {Van Gool}},
58 | title = {The 2017 DAVIS Challenge on Video Object Segmentation},
59 | journal = {arXiv:1704.00675},
60 | year = {2017}
61 | }
62 | ```
63 |
64 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/davis2017/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 |
3 | __version__ = '0.1.0'
4 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/davis2017/results.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | from PIL import Image
4 | import sys
5 | import pdb
6 |
7 | class Results(object):
8 | def __init__(self, root_dir):
9 | self.root_dir = root_dir
10 |
11 | def _read_mask(self, sequence, frame_id):
12 | try:
13 | mask_path = os.path.join(self.root_dir, sequence, f'{frame_id}.png')
14 | # pdb.set_trace()
15 |
16 | return np.array(Image.open(mask_path))
17 | except IOError as err:
18 | sys.stdout.write(sequence + " frame %s not found!\n" % frame_id)
19 | sys.stdout.write("The frames have to be indexed PNG files placed inside the corespondent sequence "
20 | "folder.\nThe indexes have to match with the initial frame.\n")
21 | sys.stderr.write("IOError: " + err.strerror + "\n")
22 | sys.exit()
23 |
24 | def read_masks(self, sequence, masks_id):
25 | mask_0 = self._read_mask(sequence, masks_id[0])
26 | masks = np.zeros((len(masks_id), *mask_0.shape))
27 | for ii, m in enumerate(masks_id):
28 | masks[ii, ...] = self._read_mask(sequence, m)
29 | num_objects = int(np.max(masks))
30 | tmp = np.ones((num_objects, *masks.shape))
31 | tmp = tmp * np.arange(1, num_objects + 1)[:, None, None, None]
32 | masks = (tmp == masks[None, ...]) > 0
33 | return masks
34 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/demo.sh:
--------------------------------------------------------------------------------
1 | python evaluation_method.py --task semi-supervised --results_path /home/zpzhang/project/ECCV2020/TrackSeg/results/DAVIS2016 --davis_path /home/zpzhang/data/testing/DAVIS-trainval --year 2016
2 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/evaluation_codalab.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import sys
3 | import os.path
4 | from time import time
5 |
6 | import numpy as np
7 | import pandas
8 | from davis2017.evaluation import DAVISEvaluation
9 |
10 | task = 'semi-supervised'
11 | gt_set = 'test-dev'
12 |
13 | time_start = time()
14 | # as per the metadata file, input and output directories are the arguments
15 | if len(sys.argv) < 3:
16 | input_dir = "input_dir"
17 | output_dir = "output_dir"
18 | debug = True
19 | else:
20 | [_, input_dir, output_dir] = sys.argv
21 | debug = False
22 |
23 | # unzipped submission data is always in the 'res' subdirectory
24 | # https://github.com/codalab/codalab-competitions/wiki/User_Building-a-Scoring-Program-for-a-Competition#directory-structure-for-submissions
25 | submission_path = os.path.join(input_dir, 'res')
26 | if not os.path.exists(submission_path):
27 | sys.exit('Could not find submission file {0}'.format(submission_path))
28 |
29 | # unzipped reference data is always in the 'ref' subdirectory
30 | # https://github.com/codalab/codalab-competitions/wiki/User_Building-a-Scoring-Program-for-a-Competition#directory-structure-for-submissions
31 | gt_path = os.path.join(input_dir, 'ref')
32 | if not os.path.exists(gt_path):
33 | sys.exit('Could not find GT file {0}'.format(gt_path))
34 |
35 |
36 | # Create dataset
37 | dataset_eval = DAVISEvaluation(davis_root=gt_path, gt_set=gt_set, task=task, codalab=True)
38 |
39 | # Check directory structure
40 | res_subfolders = os.listdir(submission_path)
41 | if len(res_subfolders) == 1:
42 | sys.stdout.write(
43 | "Incorrect folder structure, the folders of the sequences have to be placed directly inside the "
44 | "zip.\nInside every folder of the sequences there must be an indexed PNG file for every frame.\n"
45 | "The indexes have to match with the initial frame.\n")
46 | sys.exit()
47 |
48 | # Check that all sequences are there
49 | missing = False
50 | for seq in dataset_eval.dataset.get_sequences():
51 | if seq not in res_subfolders:
52 | sys.stdout.write(seq + " sequence is missing.\n")
53 | missing = True
54 | if missing:
55 | sys.stdout.write(
56 | "Verify also the folder structure, the folders of the sequences have to be placed directly inside "
57 | "the zip.\nInside every folder of the sequences there must be an indexed PNG file for every frame.\n"
58 | "The indexes have to match with the initial frame.\n")
59 | sys.exit()
60 |
61 | metrics_res = dataset_eval.evaluate(submission_path, debug=debug)
62 | J, F = metrics_res['J'], metrics_res['F']
63 |
64 | # Generate output to the stdout
65 | seq_names = list(J['M_per_object'].keys())
66 | if gt_set == "val" or gt_set == "train" or gt_set == "test-dev":
67 | sys.stdout.write("----------------Global results in CSV---------------\n")
68 | g_measures = ['J&F-Mean', 'J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay']
69 | final_mean = (np.mean(J["M"]) + np.mean(F["M"])) / 2.
70 | g_res = np.array([final_mean, np.mean(J["M"]), np.mean(J["R"]), np.mean(J["D"]), np.mean(F["M"]), np.mean(F["R"]),
71 | np.mean(F["D"])])
72 | table_g = pandas.DataFrame(data=np.reshape(g_res, [1, len(g_res)]), columns=g_measures)
73 | table_g.to_csv(sys.stdout, index=False, float_format="%0.3f")
74 |
75 | sys.stdout.write("\n\n------------Per sequence results in CSV-------------\n")
76 | seq_measures = ['Sequence', 'J-Mean', 'F-Mean']
77 | J_per_object = [J['M_per_object'][x] for x in seq_names]
78 | F_per_object = [F['M_per_object'][x] for x in seq_names]
79 | table_seq = pandas.DataFrame(data=list(zip(seq_names, J_per_object, F_per_object)), columns=seq_measures)
80 | table_seq.to_csv(sys.stdout, index=False, float_format="%0.3f")
81 |
82 | # Write scores to a file named "scores.txt"
83 | with open(os.path.join(output_dir, 'scores.txt'), 'w') as output_file:
84 | final_mean = (np.mean(J["M"]) + np.mean(F["M"])) / 2.
85 | output_file.write("GlobalMean: %f\n" % final_mean)
86 | output_file.write("JMean: %f\n" % np.mean(J["M"]))
87 | output_file.write("JRecall: %f\n" % np.mean(J["R"]))
88 | output_file.write("JDecay: %f\n" % np.mean(J["D"]))
89 | output_file.write("FMean: %f\n" % np.mean(F["M"]))
90 | output_file.write("FRecall: %f\n" % np.mean(F["R"]))
91 | output_file.write("FDecay: %f\n" % np.mean(F["D"]))
92 | total_time = time() - time_start
93 | sys.stdout.write('\nTotal time:' + str(total_time))
94 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/evaluation_method.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 | from time import time
5 | import argparse
6 |
7 | import numpy as np
8 | import pandas as pd
9 | from davis2017.evaluation import DAVISEvaluation
10 |
11 | default_davis_path = '/path/to/the/folder/DAVIS'
12 |
13 | time_start = time()
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--davis_path', type=str, help='Path to the DAVIS folder containing the JPEGImages, Annotations, '
16 | 'ImageSets, Annotations_unsupervised folders',
17 | required=False, default=default_davis_path)
18 | parser.add_argument('--set', type=str, help='Subset to evaluate the results', default='val')
19 | parser.add_argument('--task', type=str, help='Task to evaluate the results', default='unsupervised',
20 | choices=['semi-supervised', 'unsupervised'])
21 | parser.add_argument('--results_path', type=str, help='Path to the folder containing the sequences folders',
22 | required=True)
23 | parser.add_argument("--year", type=str, help="Davis dataset year (default: 2017)", default='2017',
24 | choices=['2016', '2017', '2019'])
25 |
26 | args, _ = parser.parse_known_args()
27 | csv_name_global = f'global_results-{args.set}.csv'
28 | csv_name_per_sequence = f'per-sequence_results-{args.set}.csv'
29 |
30 | # Check if the method has been evaluated before, if so read the results, otherwise compute the results
31 | csv_name_global_path = os.path.join(args.results_path, csv_name_global)
32 | csv_name_per_sequence_path = os.path.join(args.results_path, csv_name_per_sequence)
33 | if os.path.exists(csv_name_global_path) and os.path.exists(csv_name_per_sequence_path):
34 | print('Using precomputed results...')
35 | table_g = pd.read_csv(csv_name_global_path)
36 | table_seq = pd.read_csv(csv_name_per_sequence_path)
37 | else:
38 | print(f'Evaluating sequences for the {args.task} task...')
39 | # Create dataset and evaluate
40 | dataset_eval = DAVISEvaluation(davis_root=args.davis_path, task=args.task, gt_set=args.set, year=args.year)
41 | metrics_res = dataset_eval.evaluate(args.results_path)
42 | J, F = metrics_res['J'], metrics_res['F']
43 |
44 | # Generate dataframe for the general results
45 | g_measures = ['J&F-Mean', 'J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay']
46 | final_mean = (np.mean(J["M"]) + np.mean(F["M"])) / 2.
47 | g_res = np.array([final_mean, np.mean(J["M"]), np.mean(J["R"]), np.mean(J["D"]), np.mean(F["M"]), np.mean(F["R"]),
48 | np.mean(F["D"])])
49 | g_res = np.reshape(g_res, [1, len(g_res)])
50 | table_g = pd.DataFrame(data=g_res, columns=g_measures)
51 | with open(csv_name_global_path, 'w') as f:
52 | table_g.to_csv(f, index=False, float_format="%.3f")
53 | print(f'Global results saved in {csv_name_global_path}')
54 |
55 | # Generate a dataframe for the per sequence results
56 | seq_names = list(J['M_per_object'].keys())
57 | seq_measures = ['Sequence', 'J-Mean', 'F-Mean']
58 | J_per_object = [J['M_per_object'][x] for x in seq_names]
59 | F_per_object = [F['M_per_object'][x] for x in seq_names]
60 | table_seq = pd.DataFrame(data=list(zip(seq_names, J_per_object, F_per_object)), columns=seq_measures)
61 | with open(csv_name_per_sequence_path, 'w') as f:
62 | table_seq.to_csv(f, index=False, float_format="%.3f")
63 | print(f'Per-sequence results saved in {csv_name_per_sequence_path}')
64 |
65 | # Print the results
66 | sys.stdout.write(f"--------------------------- Global results for {args.set} ---------------------------\n")
67 | print(table_g.to_string(index=False))
68 | sys.stdout.write(f"\n---------- Per sequence results for {args.set} ----------\n")
69 | print(table_seq.to_string(index=False))
70 | total_time = time() - time_start
71 | sys.stdout.write('\nTotal time:' + str(total_time))
72 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | name = davis2017
3 | version = attr: davis2017.__version__
4 | description = Evaluation Framework for DAVIS 2017 Semi-supervised and Unsupervised used in the DAVIS Challenges
5 | long_description = file: README.md
6 | long_description_content_type = text/markdown
7 | keywords = segmentation
8 | license = GPL v3
9 | author = Sergi Caelles
10 | author-email = scaelles@vision.ee.ethz.ch
11 | home-page = https://github.com/davisvideochallenge/davis2017-evaluation
12 | classifiers =
13 | Development Status :: 4 - Beta
14 | Intended Audience :: Developers
15 | Intended Audience :: Education
16 | Intended Audience :: Science/Research
17 | License :: OSI Approved :: GNU General Public License v3 (GPLv3)
18 | Programming Language :: Python :: 3.6
19 | Programming Language :: Python :: 3.7
20 | Topic :: Scientific/Engineering :: Human Machine Interfaces
21 | Topic :: Software Development :: Libraries
22 | Topic :: Software Development :: Libraries :: Python Modules
23 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/davis/davis2017-evaluation/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | import sys
3 |
4 | if sys.version_info < (3, 6):
5 | sys.exit('Sorry, only Python >= 3.6 is supported')
6 |
7 | setup(
8 | python_requires='>=3.6, <4',
9 | install_requires=[
10 | 'Pillow>=4.1.1',
11 | 'networkx>=2.0',
12 | 'numpy>=1.12.1',
13 | 'opencv-python>=4.0.0.21',
14 | 'pandas>=0.21.1',
15 | 'pathlib2;python_version<"3.5"',
16 | 'scikit-image>=0.13.1',
17 | 'scikit-learn>=0.18',
18 | 'scipy>=1.0.0',
19 | 'tqdm>=4.28.1'
20 | ])
21 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/__init__.py
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .vot import VOTDataset, VOTLTDataset
2 | from .otb import OTBDataset
3 | from .uav import UAVDataset
4 | from .lasot import LaSOTDataset
5 | from .nfs import NFSDataset
6 | from .trackingnet import TrackingNetDataset
7 | from .got10k import GOT10kDataset
8 |
9 | class DatasetFactory(object):
10 | @staticmethod
11 | def create_dataset(**kwargs):
12 | """
13 | Args:
14 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30',
15 | 'VOT2018', 'VOT2016', 'VOT2018-LT'
16 | dataset_root: dataset root
17 | load_img: wether to load image
18 | Return:
19 | dataset
20 | """
21 | assert 'name' in kwargs, "should provide dataset name"
22 | name = kwargs['name']
23 | if 'OTB' in name:
24 | dataset = OTBDataset(**kwargs)
25 | elif 'LaSOT' == name:
26 | dataset = LaSOTDataset(**kwargs)
27 | elif 'UAV' in name:
28 | dataset = UAVDataset(**kwargs)
29 | elif 'NFS' in name:
30 | dataset = NFSDataset(**kwargs)
31 | elif 'VOT2018' == name or 'VOT2016' == name:
32 | dataset = VOTDataset(**kwargs)
33 | elif 'VOT2018-LT' == name:
34 | dataset = VOTLTDataset(**kwargs)
35 | elif 'TrackingNet' == name:
36 | dataset = TrackingNetDataset(**kwargs)
37 | elif 'GOT-10k' == name:
38 | dataset = GOT10kDataset(**kwargs)
39 | else:
40 | raise Exception("unknow dataset {}".format(kwargs['name']))
41 | return dataset
42 |
43 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/datasets/dataset.py:
--------------------------------------------------------------------------------
1 | from tqdm import tqdm
2 |
3 | class Dataset(object):
4 | def __init__(self, name, dataset_root):
5 | self.name = name
6 | self.dataset_root = dataset_root
7 | self.videos = None
8 |
9 | def __getitem__(self, idx):
10 | if isinstance(idx, str):
11 | return self.videos[idx]
12 | elif isinstance(idx, int):
13 | return self.videos[sorted(list(self.videos.keys()))[idx]]
14 |
15 | def __len__(self):
16 | return len(self.videos)
17 |
18 | def __iter__(self):
19 | keys = sorted(list(self.videos.keys()))
20 | for key in keys:
21 | yield self.videos[key]
22 |
23 | def set_tracker(self, path, tracker_names):
24 | """
25 | Args:
26 | path: path to tracker results,
27 | tracker_names: list of tracker name
28 | """
29 | self.tracker_path = path
30 | self.tracker_names = tracker_names
31 | # for video in tqdm(self.videos.values(),
32 | # desc='loading tacker result', ncols=100):
33 | # video.load_tracker(path, tracker_names)
34 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/datasets/got10k.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | import os
4 | import numpy as np
5 |
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 | class GOT10kVideo(Video):
13 | """
14 | Args:
15 | name: video name
16 | root: dataset root
17 | video_dir: video directory
18 | init_rect: init rectangle
19 | img_names: image names
20 | gt_rect: groundtruth rectangle
21 | attr: attribute of video
22 | """
23 | def __init__(self, name, root, video_dir, init_rect, img_names,
24 | gt_rect, attr, load_img=False):
25 | super(GOT10kVideo, self).__init__(name, root, video_dir,
26 | init_rect, img_names, gt_rect, attr, load_img)
27 |
28 | # def load_tracker(self, path, tracker_names=None):
29 | # """
30 | # Args:
31 | # path(str): path to result
32 | # tracker_name(list): name of tracker
33 | # """
34 | # if not tracker_names:
35 | # tracker_names = [x.split('/')[-1] for x in glob(path)
36 | # if os.path.isdir(x)]
37 | # if isinstance(tracker_names, str):
38 | # tracker_names = [tracker_names]
39 | # # self.pred_trajs = {}
40 | # for name in tracker_names:
41 | # traj_file = os.path.join(path, name, self.name+'.txt')
42 | # if os.path.exists(traj_file):
43 | # with open(traj_file, 'r') as f :
44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
45 | # for x in f.readlines()]
46 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
48 | # else:
49 |
50 | # self.tracker_names = list(self.pred_trajs.keys())
51 |
52 | class GOT10kDataset(Dataset):
53 | """
54 | Args:
55 | name: dataset name, should be "NFS30" or "NFS240"
56 | dataset_root, dataset root dir
57 | """
58 | def __init__(self, name, dataset_root, load_img=False):
59 | super(GOT10kDataset, self).__init__(name, dataset_root)
60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
61 | meta_data = json.load(f)
62 |
63 | # load videos
64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
65 | self.videos = {}
66 | for video in pbar:
67 | pbar.set_postfix_str(video)
68 | self.videos[video] = GOT10kVideo(video,
69 | dataset_root,
70 | meta_data[video]['video_dir'],
71 | meta_data[video]['init_rect'],
72 | meta_data[video]['img_names'],
73 | meta_data[video]['gt_rect'],
74 | None)
75 | self.attr = {}
76 | self.attr['ALL'] = list(self.videos.keys())
77 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/datasets/lasot.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 | class LaSOTVideo(Video):
12 | """
13 | Args:
14 | name: video name
15 | root: dataset root
16 | video_dir: video directory
17 | init_rect: init rectangle
18 | img_names: image names
19 | gt_rect: groundtruth rectangle
20 | attr: attribute of video
21 | """
22 | def __init__(self, name, root, video_dir, init_rect, img_names,
23 | gt_rect, attr, absent, load_img=False):
24 | super(LaSOTVideo, self).__init__(name, root, video_dir,
25 | init_rect, img_names, gt_rect, attr, load_img)
26 | self.absent = np.array(absent, np.int8)
27 |
28 | def load_tracker(self, path, tracker_names=None, store=True):
29 | """
30 | Args:
31 | path(str): path to result
32 | tracker_name(list): name of tracker
33 | """
34 | if not tracker_names:
35 | tracker_names = [x.split('/')[-1] for x in glob(path)
36 | if os.path.isdir(x)]
37 | if isinstance(tracker_names, str):
38 | tracker_names = [tracker_names]
39 | for name in tracker_names:
40 | traj_file = os.path.join(path, name, self.name+'.txt')
41 | if os.path.exists(traj_file):
42 | with open(traj_file, 'r') as f :
43 | pred_traj = [list(map(float, x.strip().split(',')))
44 | for x in f.readlines()]
45 | else:
46 | print("File not exists: ", traj_file)
47 | if self.name == 'monkey-17':
48 | pred_traj = pred_traj[:len(self.gt_traj)]
49 | if store:
50 | self.pred_trajs[name] = pred_traj
51 | else:
52 | return pred_traj
53 | self.tracker_names = list(self.pred_trajs.keys())
54 |
55 |
56 |
57 | class LaSOTDataset(Dataset):
58 | """
59 | Args:
60 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
61 | dataset_root: dataset root
62 | load_img: wether to load all imgs
63 | """
64 | def __init__(self, name, dataset_root, load_img=False):
65 | super(LaSOTDataset, self).__init__(name, dataset_root)
66 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
67 | meta_data = json.load(f)
68 |
69 | # load videos
70 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
71 | self.videos = {}
72 | for video in pbar:
73 | pbar.set_postfix_str(video)
74 | self.videos[video] = LaSOTVideo(video,
75 | dataset_root,
76 | meta_data[video]['video_dir'],
77 | meta_data[video]['init_rect'],
78 | meta_data[video]['img_names'],
79 | meta_data[video]['gt_rect'],
80 | meta_data[video]['attr'],
81 | meta_data[video]['absent'])
82 |
83 | # set attr
84 | attr = []
85 | for x in self.videos.values():
86 | attr += x.attr
87 | attr = set(attr)
88 | self.attr = {}
89 | self.attr['ALL'] = list(self.videos.keys())
90 | for x in attr:
91 | self.attr[x] = []
92 | for k, v in self.videos.items():
93 | for attr_ in v.attr:
94 | self.attr[attr_].append(k)
95 |
96 |
97 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/datasets/nfs.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 |
12 | class NFSVideo(Video):
13 | """
14 | Args:
15 | name: video name
16 | root: dataset root
17 | video_dir: video directory
18 | init_rect: init rectangle
19 | img_names: image names
20 | gt_rect: groundtruth rectangle
21 | attr: attribute of video
22 | """
23 | def __init__(self, name, root, video_dir, init_rect, img_names,
24 | gt_rect, attr, load_img=False):
25 | super(NFSVideo, self).__init__(name, root, video_dir,
26 | init_rect, img_names, gt_rect, attr, load_img)
27 |
28 | # def load_tracker(self, path, tracker_names=None):
29 | # """
30 | # Args:
31 | # path(str): path to result
32 | # tracker_name(list): name of tracker
33 | # """
34 | # if not tracker_names:
35 | # tracker_names = [x.split('/')[-1] for x in glob(path)
36 | # if os.path.isdir(x)]
37 | # if isinstance(tracker_names, str):
38 | # tracker_names = [tracker_names]
39 | # # self.pred_trajs = {}
40 | # for name in tracker_names:
41 | # traj_file = os.path.join(path, name, self.name+'.txt')
42 | # if os.path.exists(traj_file):
43 | # with open(traj_file, 'r') as f :
44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
45 | # for x in f.readlines()]
46 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
48 | # else:
49 |
50 | # self.tracker_names = list(self.pred_trajs.keys())
51 |
52 | class NFSDataset(Dataset):
53 | """
54 | Args:
55 | name: dataset name, should be "NFS30" or "NFS240"
56 | dataset_root, dataset root dir
57 | """
58 | def __init__(self, name, dataset_root, load_img=False):
59 | super(NFSDataset, self).__init__(name, dataset_root)
60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
61 | meta_data = json.load(f)
62 |
63 | # load videos
64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
65 | self.videos = {}
66 | for video in pbar:
67 | pbar.set_postfix_str(video)
68 | self.videos[video] = NFSVideo(video,
69 | dataset_root,
70 | meta_data[video]['video_dir'],
71 | meta_data[video]['init_rect'],
72 | meta_data[video]['img_names'],
73 | meta_data[video]['gt_rect'],
74 | None)
75 |
76 | self.attr = {}
77 | self.attr['ALL'] = list(self.videos.keys())
78 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/datasets/trackingnet.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 | class TrackingNetVideo(Video):
12 | """
13 | Args:
14 | name: video name
15 | root: dataset root
16 | video_dir: video directory
17 | init_rect: init rectangle
18 | img_names: image names
19 | gt_rect: groundtruth rectangle
20 | attr: attribute of video
21 | """
22 | def __init__(self, name, root, video_dir, init_rect, img_names,
23 | gt_rect, attr, load_img=False):
24 | super(TrackingNetVideo, self).__init__(name, root, video_dir,
25 | init_rect, img_names, gt_rect, attr, load_img)
26 |
27 | # def load_tracker(self, path, tracker_names=None):
28 | # """
29 | # Args:
30 | # path(str): path to result
31 | # tracker_name(list): name of tracker
32 | # """
33 | # if not tracker_names:
34 | # tracker_names = [x.split('/')[-1] for x in glob(path)
35 | # if os.path.isdir(x)]
36 | # if isinstance(tracker_names, str):
37 | # tracker_names = [tracker_names]
38 | # # self.pred_trajs = {}
39 | # for name in tracker_names:
40 | # traj_file = os.path.join(path, name, self.name+'.txt')
41 | # if os.path.exists(traj_file):
42 | # with open(traj_file, 'r') as f :
43 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
44 | # for x in f.readlines()]
45 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
46 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
47 | # else:
48 |
49 | # self.tracker_names = list(self.pred_trajs.keys())
50 |
51 | class TrackingNetDataset(Dataset):
52 | """
53 | Args:
54 | name: dataset name, should be "NFS30" or "NFS240"
55 | dataset_root, dataset root dir
56 | """
57 | def __init__(self, name, dataset_root, load_img=False):
58 | super(TrackingNetDataset, self).__init__(name, dataset_root)
59 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
60 | meta_data = json.load(f)
61 |
62 | # load videos
63 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
64 | self.videos = {}
65 | for video in pbar:
66 | pbar.set_postfix_str(video)
67 | self.videos[video] = TrackingNetVideo(video,
68 | dataset_root,
69 | meta_data[video]['video_dir'],
70 | meta_data[video]['init_rect'],
71 | meta_data[video]['img_names'],
72 | meta_data[video]['gt_rect'],
73 | None)
74 | self.attr = {}
75 | self.attr['ALL'] = list(self.videos.keys())
76 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/datasets/uav.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | from tqdm import tqdm
5 | from glob import glob
6 |
7 | from .dataset import Dataset
8 | from .video import Video
9 |
10 | class UAVVideo(Video):
11 | """
12 | Args:
13 | name: video name
14 | root: dataset root
15 | video_dir: video directory
16 | init_rect: init rectangle
17 | img_names: image names
18 | gt_rect: groundtruth rectangle
19 | attr: attribute of video
20 | """
21 | def __init__(self, name, root, video_dir, init_rect, img_names,
22 | gt_rect, attr, load_img=False):
23 | super(UAVVideo, self).__init__(name, root, video_dir,
24 | init_rect, img_names, gt_rect, attr, load_img)
25 |
26 |
27 | class UAVDataset(Dataset):
28 | """
29 | Args:
30 | name: dataset name, should be 'UAV123', 'UAV20L'
31 | dataset_root: dataset root
32 | load_img: wether to load all imgs
33 | """
34 | def __init__(self, name, dataset_root, load_img=False):
35 | super(UAVDataset, self).__init__(name, dataset_root)
36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
37 | meta_data = json.load(f)
38 |
39 | # load videos
40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
41 | self.videos = {}
42 | for video in pbar:
43 | pbar.set_postfix_str(video)
44 | self.videos[video] = UAVVideo(video,
45 | dataset_root,
46 | meta_data[video]['video_dir'],
47 | meta_data[video]['init_rect'],
48 | meta_data[video]['img_names'],
49 | meta_data[video]['gt_rect'],
50 | meta_data[video]['attr'])
51 |
52 | # set attr
53 | attr = []
54 | for x in self.videos.values():
55 | attr += x.attr
56 | attr = set(attr)
57 | self.attr = {}
58 | self.attr['ALL'] = list(self.videos.keys())
59 | for x in attr:
60 | self.attr[x] = []
61 | for k, v in self.videos.items():
62 | for attr_ in v.attr:
63 | self.attr[attr_].append(k)
64 |
65 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .ar_benchmark import AccuracyRobustnessBenchmark
2 | from .eao_benchmark import EAOBenchmark
3 | from .ope_benchmark import OPEBenchmark
4 | from .f1_benchmark import F1Benchmark
5 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import region
2 | from .statistics import *
3 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/region.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/region.o
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/src/region.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/src/region.o
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/region.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/region.o
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/c_region.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "src/region.h":
2 | ctypedef enum region_type "RegionType":
3 | EMTPY
4 | SPECIAL
5 | RECTANGEL
6 | POLYGON
7 | MASK
8 |
9 | ctypedef struct region_bounds:
10 | float top
11 | float bottom
12 | float left
13 | float right
14 |
15 | ctypedef struct region_rectangle:
16 | float x
17 | float y
18 | float width
19 | float height
20 |
21 | # ctypedef struct region_mask:
22 | # int x
23 | # int y
24 | # int width
25 | # int height
26 | # char *data
27 |
28 | ctypedef struct region_polygon:
29 | int count
30 | float *x
31 | float *y
32 |
33 | ctypedef union region_container_data:
34 | region_rectangle rectangle
35 | region_polygon polygon
36 | # region_mask mask
37 | int special
38 |
39 | ctypedef struct region_container:
40 | region_type type
41 | region_container_data data
42 |
43 | # ctypedef struct region_overlap:
44 | # float overlap
45 | # float only1
46 | # float only2
47 |
48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds)
49 |
50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds)
51 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/misc.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 |
4 | def determine_thresholds(confidence, resolution=100):
5 | """choose threshold according to confidence
6 |
7 | Args:
8 | confidence: list or numpy array or numpy array
9 | reolution: number of threshold to choose
10 |
11 | Restures:
12 | threshold: numpy array
13 | """
14 | if isinstance(confidence, list):
15 | confidence = np.array(confidence)
16 | confidence = confidence.flatten()
17 | confidence = confidence[~np.isnan(confidence)]
18 | confidence.sort()
19 |
20 | assert len(confidence) > resolution and resolution > 2
21 |
22 | thresholds = np.ones((resolution))
23 | thresholds[0] = - np.inf
24 | thresholds[-1] = np.inf
25 | delta = np.floor(len(confidence) / (resolution - 2))
26 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32)
27 | thresholds[1:-1] = confidence[idxs]
28 | return thresholds
29 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/region.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/region.cpython-36m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from distutils.extension import Extension
3 | from Cython.Build import cythonize
4 |
5 | setup(
6 | ext_modules = cythonize([Extension("region", ["region.pyx", "src/region.c"])]),
7 | )
8 |
9 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/utils/src/region.h:
--------------------------------------------------------------------------------
1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */
2 |
3 | #ifndef _REGION_H_
4 | #define _REGION_H_
5 |
6 | #ifdef TRAX_STATIC_DEFINE
7 | # define __TRAX_EXPORT
8 | #else
9 | # ifndef __TRAX_EXPORT
10 | # if defined(_MSC_VER)
11 | # ifdef trax_EXPORTS
12 | /* We are building this library */
13 | # define __TRAX_EXPORT __declspec(dllexport)
14 | # else
15 | /* We are using this library */
16 | # define __TRAX_EXPORT __declspec(dllimport)
17 | # endif
18 | # elif defined(__GNUC__)
19 | # ifdef trax_EXPORTS
20 | /* We are building this library */
21 | # define __TRAX_EXPORT __attribute__((visibility("default")))
22 | # else
23 | /* We are using this library */
24 | # define __TRAX_EXPORT __attribute__((visibility("default")))
25 | # endif
26 | # endif
27 | # endif
28 | #endif
29 |
30 | #ifndef MAX
31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b))
32 | #endif
33 |
34 | #ifndef MIN
35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b))
36 | #endif
37 |
38 | #define TRAX_DEFAULT_CODE 0
39 |
40 | #define REGION_LEGACY_RASTERIZATION 1
41 |
42 | #ifdef __cplusplus
43 | extern "C" {
44 | #endif
45 |
46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type;
47 |
48 | typedef struct region_bounds {
49 |
50 | float top;
51 | float bottom;
52 | float left;
53 | float right;
54 |
55 | } region_bounds;
56 |
57 | typedef struct region_polygon {
58 |
59 | int count;
60 |
61 | float* x;
62 | float* y;
63 |
64 | } region_polygon;
65 |
66 | typedef struct region_mask {
67 |
68 | int x;
69 | int y;
70 |
71 | int width;
72 | int height;
73 |
74 | char* data;
75 |
76 | } region_mask;
77 |
78 | typedef struct region_rectangle {
79 |
80 | float x;
81 | float y;
82 | float width;
83 | float height;
84 |
85 | } region_rectangle;
86 |
87 | typedef struct region_container {
88 | enum region_type type;
89 | union {
90 | region_rectangle rectangle;
91 | region_polygon polygon;
92 | region_mask mask;
93 | int special;
94 | } data;
95 | } region_container;
96 |
97 | typedef struct region_overlap {
98 |
99 | float overlap;
100 | float only1;
101 | float only2;
102 |
103 | } region_overlap;
104 |
105 | extern const region_bounds region_no_bounds;
106 |
107 | __TRAX_EXPORT int region_set_flags(int mask);
108 |
109 | __TRAX_EXPORT int region_clear_flags(int mask);
110 |
111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds);
112 |
113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds);
114 |
115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom);
116 |
117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region);
118 |
119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region);
120 |
121 | __TRAX_EXPORT char* region_string(region_container* region);
122 |
123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region);
124 |
125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type);
126 |
127 | __TRAX_EXPORT void region_release(region_container** region);
128 |
129 | __TRAX_EXPORT region_container* region_create_special(int code);
130 |
131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height);
132 |
133 | __TRAX_EXPORT region_container* region_create_polygon(int count);
134 |
135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y);
136 |
137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height);
138 |
139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height);
140 |
141 | #ifdef __cplusplus
142 | }
143 | #endif
144 |
145 | #endif
146 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .draw_f1 import draw_f1
2 | from .draw_success_precision import draw_success_precision
3 | from .draw_eao import draw_eao
4 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/visualization/draw_eao.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import pickle
4 |
5 | from matplotlib import rc
6 | from .draw_utils import COLOR, MARKER_STYLE
7 |
8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
9 | rc('text', usetex=True)
10 |
11 | def draw_eao(result):
12 | fig = plt.figure()
13 | ax = fig.add_subplot(111, projection='polar')
14 | angles = np.linspace(0, 2*np.pi, 8, endpoint=True)
15 |
16 | attr2value = []
17 | for i, (tracker_name, ret) in enumerate(result.items()):
18 | value = list(ret.values())
19 | attr2value.append(value)
20 | value.append(value[0])
21 | attr2value = np.array(attr2value)
22 | max_value = np.max(attr2value, axis=0)
23 | min_value = np.min(attr2value, axis=0)
24 | for i, (tracker_name, ret) in enumerate(result.items()):
25 | value = list(ret.values())
26 | value.append(value[0])
27 | value = np.array(value)
28 | value *= (1 / max_value)
29 | plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i],
30 | label=tracker_name, linewidth=1.5, markersize=6)
31 |
32 | attrs = ["Overall", "Camera motion",
33 | "Illumination change","Motion Change",
34 | "Size change","Occlusion",
35 | "Unassigned"]
36 | attr_value = []
37 | for attr, maxv, minv in zip(attrs, max_value, min_value):
38 | attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv))
39 | ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value)
40 | ax.spines['polar'].set_visible(False)
41 | ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5)
42 | ax.grid(b=False)
43 | ax.set_ylim(0, 1.18)
44 | ax.set_yticks([])
45 | plt.show()
46 |
47 | if __name__ == '__main__':
48 | result = pickle.load(open("../../result.pkl", 'rb'))
49 | draw_eao(result)
50 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/visualization/draw_f1.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | from matplotlib import rc
5 | from .draw_utils import COLOR, LINE_STYLE
6 |
7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
8 | rc('text', usetex=True)
9 |
10 | def draw_f1(result, bold_name=None):
11 | # drawing f1 contour
12 | fig, ax = plt.subplots()
13 | for f1 in np.arange(0.1, 1, 0.1):
14 | recall = np.arange(f1, 1+0.01, 0.01)
15 | precision = f1 * recall / (2 * recall - f1)
16 | ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5)
17 | ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5)
18 | ax.grid(b=True)
19 | ax.set_aspect(1)
20 | plt.xlabel('Recall')
21 | plt.ylabel('Precision')
22 | plt.axis([0, 1, 0, 1])
23 | plt.title(r'\textbf{VOT2018-LT Precision vs Recall}')
24 |
25 | # draw result line
26 | all_precision = {}
27 | all_recall = {}
28 | best_f1 = {}
29 | best_idx = {}
30 | for tracker_name, ret in result.items():
31 | precision = np.mean(list(ret['precision'].values()), axis=0)
32 | recall = np.mean(list(ret['recall'].values()), axis=0)
33 | f1 = 2 * precision * recall / (precision + recall)
34 | max_idx = np.argmax(f1)
35 | all_precision[tracker_name] = precision
36 | all_recall[tracker_name] = recall
37 | best_f1[tracker_name] = f1[max_idx]
38 | best_idx[tracker_name] = max_idx
39 |
40 | for idx, (tracker_name, best_f1) in \
41 | enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)):
42 | if tracker_name == bold_name:
43 | label = r"\textbf{[%.3f] Ours}" % (best_f1)
44 | else:
45 | label = "[%.3f] " % (best_f1) + tracker_name
46 | recall = all_recall[tracker_name][:-1]
47 | precision = all_precision[tracker_name][:-1]
48 | ax.plot(recall, precision, color=COLOR[idx], linestyle='-',
49 | label=label)
50 | f1_idx = best_idx[tracker_name]
51 | ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o',
52 | markerfacecolor=COLOR[idx], markersize=5)
53 | ax.legend(loc='lower right', labelspacing=0.2)
54 | plt.xticks(np.arange(0, 1+0.1, 0.1))
55 | plt.yticks(np.arange(0, 1+0.1, 0.1))
56 | plt.show()
57 |
58 | if __name__ == '__main__':
59 | draw_f1(None)
60 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/pysot/visualization/draw_utils.py:
--------------------------------------------------------------------------------
1 |
2 | COLOR = ((1, 0, 0),
3 | (0, 1, 0),
4 | (1, 0, 1),
5 | (1, 1, 0),
6 | (0 , 162/255, 232/255),
7 | (0.5, 0.5, 0.5),
8 | (0, 0, 1),
9 | (0, 1, 1),
10 | (136/255, 0 , 21/255),
11 | (255/255, 127/255, 39/255),
12 | (0, 0, 0))
13 |
14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-']
15 |
16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.']
17 |
--------------------------------------------------------------------------------
/lib/eval_toolkit/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | numpy
3 | glob
4 | opencv-python
5 | colorama
6 | numba
7 |
--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/__init__.py
--------------------------------------------------------------------------------
/lib/models/backbones.py:
--------------------------------------------------------------------------------
1 | # -----------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Zhipeng Zhang (zhangzhipeng2017@ia.ac.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import torch
8 | import torch.nn as nn
9 | from .modules import Bottleneck, ResNet_plus2, Bottleneck_BIG_CI, ResNet
10 |
11 | eps = 1e-5
12 | # ---------------------
13 | # For Ocean and Ocean+
14 | # ---------------------
15 | class ResNet50(nn.Module):
16 | def __init__(self, used_layers=[2, 3, 4], online=False):
17 | super(ResNet50, self).__init__()
18 | self.features = ResNet_plus2(Bottleneck, [3, 4, 6, 3], used_layers=used_layers, online=online)
19 |
20 | def forward(self, x, online=False):
21 | if not online:
22 | x_stages, x = self.features(x, online=online)
23 | return x_stages, x
24 | else:
25 | x = self.features(x, online=online)
26 | return x
27 |
28 | # ---------------------
29 | # For SiamDW
30 | # ---------------------
31 | class ResNet22W(nn.Module):
32 | """
33 | ResNet22W: double 3*3 layer (only) channels in residual blob
34 | """
35 | def __init__(self):
36 | super(ResNet22W, self).__init__()
37 | self.features = ResNet(Bottleneck_BIG_CI, [3, 4], [True, False], [False, True], firstchannels=64, channels=[64, 128])
38 | self.feature_size = 512
39 |
40 | def forward(self, x):
41 | x = self.features(x)
42 |
43 | return x
44 |
45 |
46 | if __name__ == '__main__':
47 | import torch
48 | net = ResNet50().cuda()
49 | print(net)
50 |
51 | params = list(net.parameters())
52 | k = 0
53 | for i in params:
54 | l = 1
55 | for j in i.size():
56 | l *= j
57 | k = k + l
58 | print("total params" + str(k/1e6) + "M")
59 |
60 | search = torch.rand(1, 3, 255, 255)
61 | search = torch.Tensor(search).cuda()
62 | out = net(search)
63 | print(out.size())
64 |
65 | print()
66 |
--------------------------------------------------------------------------------
/lib/models/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
2 | ModulatedDeformConvPack, deform_conv,
3 | modulated_deform_conv)
4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling)
6 |
7 | __all__ = [
8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
11 | 'deform_roi_pooling'
12 | ]
13 |
--------------------------------------------------------------------------------
/lib/models/dcn/deform_conv_cuda.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_conv_cuda.cpython-36m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/lib/models/dcn/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/lib/models/dcn/deform_pool_cuda.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_pool_cuda.cpython-36m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/lib/models/dcn/deform_pool_cuda.cpython-37m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_pool_cuda.cpython-37m-x86_64-linux-gnu.so
--------------------------------------------------------------------------------
/lib/models/dcn/src/deform_pool_cuda.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include
3 |
4 | #include
5 | #include
6 |
7 | void DeformablePSROIPoolForward(
8 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
9 | at::Tensor out, at::Tensor top_count, const int batch, const int channels,
10 | const int height, const int width, const int num_bbox,
11 | const int channels_trans, const int no_trans, const float spatial_scale,
12 | const int output_dim, const int group_size, const int pooled_size,
13 | const int part_size, const int sample_per_part, const float trans_std);
14 |
15 | void DeformablePSROIPoolBackwardAcc(
16 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
17 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
18 | at::Tensor trans_grad, const int batch, const int channels,
19 | const int height, const int width, const int num_bbox,
20 | const int channels_trans, const int no_trans, const float spatial_scale,
21 | const int output_dim, const int group_size, const int pooled_size,
22 | const int part_size, const int sample_per_part, const float trans_std);
23 |
24 | void deform_psroi_pooling_cuda_forward(
25 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
26 | at::Tensor top_count, const int no_trans, const float spatial_scale,
27 | const int output_dim, const int group_size, const int pooled_size,
28 | const int part_size, const int sample_per_part, const float trans_std) {
29 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
30 |
31 | const int batch = input.size(0);
32 | const int channels = input.size(1);
33 | const int height = input.size(2);
34 | const int width = input.size(3);
35 | const int channels_trans = no_trans ? 2 : trans.size(1);
36 |
37 | const int num_bbox = bbox.size(0);
38 | if (num_bbox != out.size(0))
39 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
40 | out.size(0), num_bbox);
41 |
42 | DeformablePSROIPoolForward(
43 | input, bbox, trans, out, top_count, batch, channels, height, width,
44 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
45 | pooled_size, part_size, sample_per_part, trans_std);
46 | }
47 |
48 | void deform_psroi_pooling_cuda_backward(
49 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
50 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
51 | const int no_trans, const float spatial_scale, const int output_dim,
52 | const int group_size, const int pooled_size, const int part_size,
53 | const int sample_per_part, const float trans_std) {
54 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
55 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
56 |
57 | const int batch = input.size(0);
58 | const int channels = input.size(1);
59 | const int height = input.size(2);
60 | const int width = input.size(3);
61 | const int channels_trans = no_trans ? 2 : trans.size(1);
62 |
63 | const int num_bbox = bbox.size(0);
64 | if (num_bbox != out_grad.size(0))
65 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
66 | out_grad.size(0), num_bbox);
67 |
68 | DeformablePSROIPoolBackwardAcc(
69 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
70 | channels, height, width, num_bbox, channels_trans, no_trans,
71 | spatial_scale, output_dim, group_size, pooled_size, part_size,
72 | sample_per_part, trans_std);
73 | }
74 |
75 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
76 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward,
77 | "deform psroi pooling forward(CUDA)");
78 | m.def("deform_psroi_pooling_cuda_backward",
79 | &deform_psroi_pooling_cuda_backward,
80 | "deform psroi pooling backward(CUDA)");
81 | }
--------------------------------------------------------------------------------
/lib/models/oceanTRT.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Zhipeng Zhang (zhangzhipeng2017@ia.ac.cn)
5 | # ------------------------------------------------------------------------------
6 |
7 | import torch
8 | import torch.nn as nn
9 | import numpy as np
10 |
11 | class OceanTRT_(nn.Module):
12 | def __init__(self):
13 | super(OceanTRT_, self).__init__()
14 | self.features = None
15 | self.connect_model = None
16 | self.zf = None # for online tracking
17 | self.neck = None
18 | self.search_size = 255
19 | self.score_size = 25
20 |
21 |
22 | def tensorrt_init(self, trt_net, corr=None):
23 | """
24 | TensorRT init
25 | """
26 | self.t_backbone255, self.s_backbone_siam255, self.s_backbone_siam287, self.s_backbone_online, self.t_neck255, \
27 | self.s_neck255, self.s_neck287, self.multiDiCorr255, self.multiDiCorr287, self.boxtower255, self.boxtower287 = trt_net
28 |
29 | if corr:
30 | self.multiDiCorr255, self.multiDiCorr287 = corr
31 |
32 | def extract_for_online(self, x):
33 | xf = self.s_backbone_online(x, torch.Tensor([1]).cuda())
34 | return xf
35 |
36 | def template(self, z):
37 | _, _, _, self.zf = self.t_backbone255(z, torch.Tensor([]).cuda())
38 | self.zf_ori = self.t_neck255(self.zf)
39 | self.zf = self.zf_ori[:, :, 4:-4, 4:-4].contiguous()
40 |
41 | def track(self, x):
42 | """
43 | Please see OceanOnlinePT for pytorch version (more clean)
44 | """
45 | b1, b2, b3, xf = self.s_backbone_siam255(x, torch.Tensor([]).cuda())
46 | xf = self.s_neck255(xf) # b4
47 |
48 | # backbone encode (something is wrong with connect model)
49 | cls_z0, cls_z1, cls_z2, cls_x0, cls_x1, cls_x2, reg_z0, reg_z1, reg_z2, reg_x0, reg_x1, reg_x2 = self.multiDiCorr255(xf, self.zf)
50 |
51 | # correlation
52 | cls_z = [cls_z0, cls_z1, cls_z2]
53 | cls_x = [cls_x0, cls_x1, cls_x2]
54 | reg_z = [reg_z0, reg_z1, reg_z2]
55 | reg_x = [reg_x0, reg_x1, reg_x2]
56 |
57 | cls_dw, reg_dw = self.connect_model2(cls_z, cls_x, reg_z, reg_x)
58 | # cls and reg
59 | bbox_pred, cls_pred = self.boxtower255(cls_dw, reg_dw)
60 |
61 | return cls_pred, bbox_pred.squeeze(0)
62 |
--------------------------------------------------------------------------------
/lib/models/oceanplus.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | import torch.nn.functional as F
5 |
6 | class OceanPlus_(nn.Module):
7 | def __init__(self):
8 | super(OceanPlus_, self).__init__()
9 | self.features = None
10 | self.connect_model = None
11 | self.mask_model = None
12 | self.zf = None
13 | self.criterion = nn.BCEWithLogitsLoss()
14 | self.neck = None
15 | self.search_size = 255
16 | self.score_size = 25
17 | self.batch = 32 if self.training else 1
18 | self.lambda_u = 0.1
19 | self.lambda_s = 0.2
20 |
21 | # self.grids()
22 |
23 | def feature_extractor(self, x, online=False):
24 | return self.features(x, online=online)
25 |
26 | def extract_for_online(self, x):
27 | xf = self.feature_extractor(x, online=True)
28 | return xf
29 |
30 | def connector(self, template_feature, search_feature):
31 | pred_score = self.connect_model(template_feature, search_feature)
32 | return pred_score
33 |
34 | def update_roi_template(self, target_pos, target_sz, score):
35 | """
36 | :param target_pos: pos in search (not the original)
37 | :param target_sz: size in target size
38 | :param score:
39 | :return:
40 | """
41 |
42 | lambda_u = self.lambda_u * float(score)
43 | lambda_s = self.lambda_s
44 | N, C, H, W = self.search_size
45 | stride = 8
46 | assert N == 1, "not supported"
47 | l = W // 2
48 | x = range(-l, l + 1)
49 | y = range(-l, l + 1)
50 |
51 | hc_z = (target_sz[1] + 0.3 * sum(target_sz)) / stride
52 | wc_z = (target_sz[0] + 0.3 * sum(target_sz)) / stride
53 | grid_x = np.linspace(- wc_z / 2, wc_z / 2, 17)
54 | grid_y = np.linspace(- hc_z / 2, hc_z / 2, 17)
55 | grid_x = grid_x[5:-5] + target_pos[0] / stride
56 | grid_y = grid_y[5:-5] + target_pos[1] / stride
57 | x_offset = grid_x / l
58 | y_offset = grid_y / l
59 |
60 | grid = np.reshape(np.transpose([np.tile(x_offset, len(y_offset)), np.repeat(y_offset, len(x_offset))]), (len(grid_y), len(grid_x), 2))
61 | grid = torch.from_numpy(grid).unsqueeze(0).cuda()
62 |
63 | zmap = nn.functional.grid_sample(self.xf.double(), grid).float()
64 | # cls_kernel = self.rpn.cls.make_kernel(zmap)
65 | self.MA_kernel = (1 - lambda_u) * self.MA_kernel + lambda_u * zmap
66 | self.zf_update = self.zf * lambda_s + self.MA_kernel * (1.0 - lambda_s)
67 |
68 | def template(self, z, template_mask = None):
69 | _, self.zf = self.feature_extractor(z)
70 |
71 | if self.neck is not None:
72 | self.zf_ori, self.zf = self.neck(self.zf, crop=True)
73 |
74 | self.template_mask = template_mask.float()
75 | self.MA_kernel = self.zf.detach()
76 | self.zf_update = None
77 |
78 |
79 | def track(self, x):
80 |
81 | features_stages, xf = self.feature_extractor(x)
82 |
83 | if self.neck is not None:
84 | xf = self.neck(xf, crop=False)
85 |
86 | features_stages.append(xf)
87 | bbox_pred, cls_pred, cls_feature, reg_feature = self.connect_model(xf, self.zf, update=self.zf_update)
88 |
89 | features_stages.append(cls_feature)
90 | pred_mask = self.mask_model(features_stages, input_size=x.size()[2:], zf_ori=self.zf_ori, template_mask=self.template_mask)
91 | self.search_size = xf.size()
92 | self.xf = xf.detach()
93 |
94 | return cls_pred, bbox_pred, pred_mask
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/lib/models/online/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/online/__init__.py
--------------------------------------------------------------------------------
/lib/models/online/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .resnet import *
2 | from .resnet18_vggm import *
3 |
--------------------------------------------------------------------------------
/lib/models/online/bbreg/__init__.py:
--------------------------------------------------------------------------------
1 | from .iou_net import IoUNet
2 |
--------------------------------------------------------------------------------
/lib/models/online/classifier/__init__.py:
--------------------------------------------------------------------------------
1 | from .linear_filter import LinearFilter
2 |
--------------------------------------------------------------------------------
/lib/models/online/classifier/features.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | import torch.nn.functional as F
4 | from torchvision.models.resnet import BasicBlock, Bottleneck
5 | from models.online.layers.normalization import InstanceL2Norm
6 | from models.online.layers.transform import InterpCat
7 |
8 |
9 | def residual_basic_block(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None,
10 | interp_cat=False):
11 | """Construct a network block based on the BasicBlock used in ResNet 18 and 34."""
12 | if out_dim is None:
13 | out_dim = feature_dim
14 | feat_layers = []
15 | if interp_cat:
16 | feat_layers.append(InterpCat())
17 | for i in range(num_blocks):
18 | odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim
19 | feat_layers.append(BasicBlock(feature_dim, odim))
20 | if final_conv:
21 | feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False))
22 | if l2norm:
23 | feat_layers.append(InstanceL2Norm(scale=norm_scale))
24 | return nn.Sequential(*feat_layers)
25 |
26 |
27 | def residual_basic_block_pool(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None,
28 | pool=True):
29 | """Construct a network block based on the BasicBlock used in ResNet."""
30 | if out_dim is None:
31 | out_dim = feature_dim
32 | feat_layers = []
33 | for i in range(num_blocks):
34 | odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim
35 | feat_layers.append(BasicBlock(feature_dim, odim))
36 | if final_conv:
37 | feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False))
38 | if pool:
39 | feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
40 | if l2norm:
41 | feat_layers.append(InstanceL2Norm(scale=norm_scale))
42 |
43 | return nn.Sequential(*feat_layers)
44 |
45 |
46 | def residual_bottleneck(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None,
47 | interp_cat=False):
48 | """Construct a network block based on the Bottleneck block used in ResNet."""
49 | if out_dim is None:
50 | out_dim = feature_dim
51 | feat_layers = []
52 | if interp_cat:
53 | feat_layers.append(InterpCat())
54 | for i in range(num_blocks):
55 | planes = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim // 4
56 | feat_layers.append(Bottleneck(4*feature_dim, planes))
57 | if final_conv:
58 | feat_layers.append(nn.Conv2d(4*feature_dim, out_dim, kernel_size=3, padding=1, bias=False))
59 | if l2norm:
60 | feat_layers.append(InstanceL2Norm(scale=norm_scale))
61 | return nn.Sequential(*feat_layers)
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | .vim-template*
7 |
8 | # C extensions
9 | *.so
10 |
11 | # Distribution / packaging
12 | .Python
13 | build/
14 | develop-eggs/
15 | dist/
16 | downloads/
17 | eggs/
18 | .eggs/
19 | lib/
20 | lib64/
21 | parts/
22 | sdist/
23 | var/
24 | wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | .hypothesis/
50 | .pytest_cache/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 | db.sqlite3
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 | env.bak/
93 | venv.bak/
94 |
95 | # Spyder project settings
96 | .spyderproject
97 | .spyproject
98 |
99 | # Rope project settings
100 | .ropeproject
101 |
102 | # mkdocs documentation
103 | /site
104 |
105 | # mypy
106 | .mypy_cache/
107 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2018 Jiayuan Mao
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/README.md:
--------------------------------------------------------------------------------
1 | # PreciseRoIPooling
2 | This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation).
3 |
4 | **Acquisition of Localization Confidence for Accurate Object Detection**
5 |
6 | _Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.)
7 |
8 | https://arxiv.org/abs/1807.11590
9 |
10 | ## Brief
11 |
12 | In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is:
13 |
14 | - different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates.
15 | - different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous.
16 |
17 | For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper.
18 |
19 |
20 |
21 | ## Implementation
22 |
23 | PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome.
24 |
25 | ## Usage (PyTorch 1.0)
26 |
27 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented).
28 | Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do:
29 |
30 | ```
31 | from prroi_pool import PrRoIPool2D
32 |
33 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
34 | roi_features = avg_pool(features, rois)
35 |
36 | # for those who want to use the "functional"
37 |
38 | from prroi_pool.functional import prroi_pool2d
39 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
40 | ```
41 |
42 |
43 | ## Usage (PyTorch 0.4)
44 |
45 | **!!! Please first checkout to the branch pytorch0.4.**
46 |
47 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented).
48 | To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do:
49 |
50 | ```
51 | from prroi_pool import PrRoIPool2D
52 |
53 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale)
54 | roi_features = avg_pool(features, rois)
55 |
56 | # for those who want to use the "functional"
57 |
58 | from prroi_pool.functional import prroi_pool2d
59 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale)
60 | ```
61 |
62 | Here,
63 |
64 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor.
65 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`.
66 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`.
67 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/_assets/prroi_visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/online/external/PreciseRoIPooling/_assets/prroi_visualization.png
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | /_prroi_pooling
3 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : __init__.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | from .prroi_pool import *
13 |
14 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/functional.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : functional.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | import torch
13 | import torch.autograd as ag
14 |
15 | __all__ = ['prroi_pool2d']
16 |
17 |
18 | _prroi_pooling = None
19 |
20 |
21 | def _import_prroi_pooling():
22 | global _prroi_pooling
23 |
24 | if _prroi_pooling is None:
25 | try:
26 | from os.path import join as pjoin, dirname
27 | from torch.utils.cpp_extension import load as load_extension
28 | root_dir = pjoin(dirname(__file__), 'src')
29 |
30 | _prroi_pooling = load_extension(
31 | '_prroi_pooling',
32 | [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')],
33 | verbose=True
34 | )
35 | except ImportError:
36 | raise ImportError('Can not compile Precise RoI Pooling library.')
37 |
38 | return _prroi_pooling
39 |
40 |
41 | class PrRoIPool2DFunction(ag.Function):
42 | @staticmethod
43 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale):
44 | _prroi_pooling = _import_prroi_pooling()
45 |
46 | assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \
47 | 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type())
48 |
49 | pooled_height = int(pooled_height)
50 | pooled_width = int(pooled_width)
51 | spatial_scale = float(spatial_scale)
52 |
53 | features = features.contiguous()
54 | rois = rois.contiguous()
55 | params = (pooled_height, pooled_width, spatial_scale)
56 |
57 | if features.is_cuda:
58 | output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params)
59 | ctx.params = params
60 | # everything here is contiguous.
61 | ctx.save_for_backward(features, rois, output)
62 | else:
63 | raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.')
64 |
65 | return output
66 |
67 | @staticmethod
68 | def backward(ctx, grad_output):
69 | _prroi_pooling = _import_prroi_pooling()
70 |
71 | features, rois, output = ctx.saved_tensors
72 | grad_input = grad_coor = None
73 |
74 | if features.requires_grad:
75 | grad_output = grad_output.contiguous()
76 | grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params)
77 | if rois.requires_grad:
78 | grad_output = grad_output.contiguous()
79 | grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params)
80 |
81 | return grad_input, grad_coor, None, None, None
82 |
83 |
84 | prroi_pool2d = PrRoIPool2DFunction.apply
85 |
86 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python3
2 | # -*- coding: utf-8 -*-
3 | # File : prroi_pool.py
4 | # Author : Jiayuan Mao, Tete Xiao
5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
6 | # Date : 07/13/2018
7 | #
8 | # This file is part of PreciseRoIPooling.
9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 |
12 | import torch.nn as nn
13 |
14 | from .functional import prroi_pool2d
15 |
16 | __all__ = ['PrRoIPool2D']
17 |
18 |
19 | class PrRoIPool2D(nn.Module):
20 | def __init__(self, pooled_height, pooled_width, spatial_scale):
21 | super().__init__()
22 |
23 | self.pooled_height = int(pooled_height)
24 | self.pooled_width = int(pooled_width)
25 | self.spatial_scale = float(spatial_scale)
26 |
27 | def forward(self, features, rois):
28 | return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale)
29 |
30 | def extra_repr(self):
31 | return 'kernel_size=({pooled_height}, {pooled_width}), spatial_scale={spatial_scale}'.format(**self.__dict__)
32 |
33 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu.c
3 | * Author : Jiayuan Mao, Tete Xiao
4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
5 | * Date : 07/13/2018
6 | *
7 | * Distributed under terms of the MIT license.
8 | * Copyright (c) 2017 Megvii Technology Limited.
9 | */
10 |
11 | #include
12 | #include
13 |
14 | #include
15 | #include
16 |
17 | #include
18 |
19 | #include "prroi_pooling_gpu_impl.cuh"
20 |
21 |
22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) {
23 | int nr_rois = rois.size(0);
24 | int nr_channels = features.size(1);
25 | int height = features.size(2);
26 | int width = features.size(3);
27 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
28 | auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options());
29 |
30 | if (output.numel() == 0) {
31 | THCudaCheck(cudaGetLastError());
32 | return output;
33 | }
34 |
35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
36 | PrRoIPoolingForwardGpu(
37 | stream, features.data(), rois.data(), output.data(),
38 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
39 | top_count
40 | );
41 |
42 | THCudaCheck(cudaGetLastError());
43 | return output;
44 | }
45 |
46 | at::Tensor prroi_pooling_backward_cuda(
47 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
48 | int pooled_height, int pooled_width, float spatial_scale) {
49 |
50 | auto features_diff = at::zeros_like(features);
51 |
52 | int nr_rois = rois.size(0);
53 | int batch_size = features.size(0);
54 | int nr_channels = features.size(1);
55 | int height = features.size(2);
56 | int width = features.size(3);
57 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
58 | int bottom_count = batch_size * nr_channels * height * width;
59 |
60 | if (output.numel() == 0) {
61 | THCudaCheck(cudaGetLastError());
62 | return features_diff;
63 | }
64 |
65 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
66 | PrRoIPoolingBackwardGpu(
67 | stream,
68 | features.data(), rois.data(), output.data(), output_diff.data(),
69 | features_diff.data(),
70 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
71 | top_count, bottom_count
72 | );
73 |
74 | THCudaCheck(cudaGetLastError());
75 | return features_diff;
76 | }
77 |
78 | at::Tensor prroi_pooling_coor_backward_cuda(
79 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
80 | int pooled_height, int pooled_width, float spatial_scale) {
81 |
82 | auto coor_diff = at::zeros_like(rois);
83 |
84 | int nr_rois = rois.size(0);
85 | int nr_channels = features.size(1);
86 | int height = features.size(2);
87 | int width = features.size(3);
88 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
89 | int bottom_count = nr_rois * 5;
90 |
91 | if (output.numel() == 0) {
92 | THCudaCheck(cudaGetLastError());
93 | return coor_diff;
94 | }
95 |
96 | cudaStream_t stream = at::cuda::getCurrentCUDAStream();
97 | PrRoIPoolingCoorBackwardGpu(
98 | stream,
99 | features.data(), rois.data(), output.data(), output_diff.data(),
100 | coor_diff.data(),
101 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
102 | top_count, bottom_count
103 | );
104 |
105 | THCudaCheck(cudaGetLastError());
106 | return coor_diff;
107 | }
108 |
109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
110 | m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward");
111 | m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward");
112 | m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor");
113 | }
114 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu.h
3 | * Author : Jiayuan Mao, Tete Xiao
4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com
5 | * Date : 07/13/2018
6 | *
7 | * Distributed under terms of the MIT license.
8 | * Copyright (c) 2017 Megvii Technology Limited.
9 | */
10 |
11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale);
12 |
13 | int prroi_pooling_backward_cuda(
14 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
15 | int pooled_height, int pooled_width, float spatial_scale
16 | );
17 |
18 | int prroi_pooling_coor_backward_cuda(
19 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
20 | int pooled_height, int pooled_width, float spatial_scal
21 | );
22 |
23 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cu:
--------------------------------------------------------------------------------
1 | ../../../src/prroi_pooling_gpu_impl.cu
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
1 | ../../../src/prroi_pooling_gpu_impl.cuh
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # File : test_prroi_pooling2d.py
3 | # Author : Jiayuan Mao
4 | # Email : maojiayuan@gmail.com
5 | # Date : 18/02/2018
6 | #
7 | # This file is part of Jacinle.
8 |
9 | import unittest
10 |
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 |
15 | from jactorch.utils.unittest import TorchTestCase
16 |
17 | from prroi_pool import PrRoIPool2D
18 |
19 |
20 | class TestPrRoIPool2D(TorchTestCase):
21 | def test_forward(self):
22 | pool = PrRoIPool2D(7, 7, spatial_scale=0.5)
23 | features = torch.rand((4, 16, 24, 32)).cuda()
24 | rois = torch.tensor([
25 | [0, 0, 0, 14, 14],
26 | [1, 14, 14, 28, 28],
27 | ]).float().cuda()
28 |
29 | out = pool(features, rois)
30 | out_gold = F.avg_pool2d(features, kernel_size=2, stride=1)
31 |
32 | self.assertTensorClose(out, torch.stack((
33 | out_gold[0, :, :7, :7],
34 | out_gold[1, :, 7:14, 7:14],
35 | ), dim=0))
36 |
37 | def test_backward_shapeonly(self):
38 | pool = PrRoIPool2D(2, 2, spatial_scale=0.5)
39 |
40 | features = torch.rand((4, 2, 24, 32)).cuda()
41 | rois = torch.tensor([
42 | [0, 0, 0, 4, 4],
43 | [1, 14, 14, 18, 18],
44 | ]).float().cuda()
45 | features.requires_grad = rois.requires_grad = True
46 | out = pool(features, rois)
47 |
48 | loss = out.sum()
49 | loss.backward()
50 |
51 | self.assertTupleEqual(features.size(), features.grad.size())
52 | self.assertTupleEqual(rois.size(), rois.grad.size())
53 |
54 |
55 | if __name__ == '__main__':
56 | unittest.main()
57 |
--------------------------------------------------------------------------------
/lib/models/online/external/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
1 | /*
2 | * File : prroi_pooling_gpu_impl.cuh
3 | * Author : Tete Xiao, Jiayuan Mao
4 | * Email : jasonhsiao97@gmail.com
5 | *
6 | * Distributed under terms of the MIT license.
7 | * Copyright (c) 2017 Megvii Technology Limited.
8 | */
9 |
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 |
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 |
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 |
20 | void PrRoIPoolingForwardGpu(
21 | cudaStream_t stream,
22 | F_DEVPTR_IN bottom_data,
23 | F_DEVPTR_IN bottom_rois,
24 | F_DEVPTR_OUT top_data,
25 | const int channels_, const int height_, const int width_,
26 | const int pooled_height_, const int pooled_width_,
27 | const float spatial_scale_,
28 | const int top_count);
29 |
30 | void PrRoIPoolingBackwardGpu(
31 | cudaStream_t stream,
32 | F_DEVPTR_IN bottom_data,
33 | F_DEVPTR_IN bottom_rois,
34 | F_DEVPTR_IN top_data,
35 | F_DEVPTR_IN top_diff,
36 | F_DEVPTR_OUT bottom_diff,
37 | const int channels_, const int height_, const int width_,
38 | const int pooled_height_, const int pooled_width_,
39 | const float spatial_scale_,
40 | const int top_count, const int bottom_count);
41 |
42 | void PrRoIPoolingCoorBackwardGpu(
43 | cudaStream_t stream,
44 | F_DEVPTR_IN bottom_data,
45 | F_DEVPTR_IN bottom_rois,
46 | F_DEVPTR_IN top_data,
47 | F_DEVPTR_IN top_diff,
48 | F_DEVPTR_OUT bottom_diff,
49 | const int channels_, const int height_, const int width_,
50 | const int pooled_height_, const int pooled_width_,
51 | const float spatial_scale_,
52 | const int top_count, const int bottom_count);
53 |
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 |
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 |
60 |
--------------------------------------------------------------------------------
/lib/models/online/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/online/layers/__init__.py
--------------------------------------------------------------------------------
/lib/models/online/layers/activation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class MLU(nn.Module):
7 | r"""MLU activation
8 | """
9 | def __init__(self, min_val, inplace=False):
10 | super().__init__()
11 | self.min_val = min_val
12 | self.inplace = inplace
13 |
14 | def forward(self, input):
15 | return F.elu(F.leaky_relu(input, 1/self.min_val, inplace=self.inplace), self.min_val, inplace=self.inplace)
16 |
17 |
18 | class LeakyReluPar(nn.Module):
19 | r"""LeakyRelu parametric activation
20 | """
21 |
22 | def forward(self, x, a):
23 | return (1.0 - a)/2.0 * torch.abs(x) + (1.0 + a)/2.0 * x
24 |
25 | class LeakyReluParDeriv(nn.Module):
26 | r"""Derivative of the LeakyRelu parametric activation, wrt x.
27 | """
28 |
29 | def forward(self, x, a):
30 | return (1.0 - a)/2.0 * torch.sign(x.detach()) + (1.0 + a)/2.0
31 |
32 |
33 | class BentIdentPar(nn.Module):
34 | r"""BentIdent parametric activation
35 | """
36 | def __init__(self, b=1.0):
37 | super().__init__()
38 | self.b = b
39 |
40 | def forward(self, x, a):
41 | return (1.0 - a)/2.0 * (torch.sqrt(x*x + 4.0*self.b*self.b) - 2.0*self.b) + (1.0 + a)/2.0 * x
42 |
43 |
44 | class BentIdentParDeriv(nn.Module):
45 | r"""BentIdent parametric activation deriv
46 | """
47 | def __init__(self, b=1.0):
48 | super().__init__()
49 | self.b = b
50 |
51 | def forward(self, x, a):
52 | return (1.0 - a)/2.0 * (x / torch.sqrt(x*x + 4.0*self.b*self.b)) + (1.0 + a)/2.0
53 |
54 |
--------------------------------------------------------------------------------
/lib/models/online/layers/blocks.py:
--------------------------------------------------------------------------------
1 | from torch import nn
2 |
3 |
4 | def conv_block(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1, bias=True,
5 | batch_norm=True, relu=True):
6 | layers = [nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
7 | padding=padding, dilation=dilation, bias=bias)]
8 | if batch_norm:
9 | layers.append(nn.BatchNorm2d(out_planes))
10 | if relu:
11 | layers.append(nn.ReLU(inplace=True))
12 | return nn.Sequential(*layers)
13 |
14 |
15 | class LinearBlock(nn.Module):
16 | def __init__(self, in_planes, out_planes, input_sz, bias=True, batch_norm=True, relu=True):
17 | super().__init__()
18 | self.linear = nn.Linear(in_planes*input_sz*input_sz, out_planes, bias=bias)
19 | self.bn = nn.BatchNorm2d(out_planes) if batch_norm else None
20 | self.relu = nn.ReLU(inplace=True) if relu else None
21 |
22 | def forward(self, x):
23 | x = self.linear(x.view(x.shape[0], -1))
24 | if self.bn is not None:
25 | x = self.bn(x.view(x.shape[0], x.shape[1], 1, 1))
26 | if self.relu is not None:
27 | x = self.relu(x)
28 | return x.view(x.shape[0], -1)
--------------------------------------------------------------------------------
/lib/models/online/layers/distance.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class DistanceMap(nn.Module):
7 | """Generate a distance map from a origin center location.
8 | args:
9 | num_bins: Number of bins in the map.
10 | bin_displacement: Displacement of the bins.
11 | """
12 | def __init__(self, num_bins, bin_displacement=1.0):
13 | super().__init__()
14 | self.num_bins = num_bins
15 | self.bin_displacement = bin_displacement
16 |
17 | def forward(self, center, output_sz):
18 | """Create the distance map.
19 | args:
20 | center: Torch tensor with (y,x) center position. Dims (batch, 2)
21 | output_sz: Size of output distance map. 2-dimensional tuple."""
22 |
23 | center = center.view(-1,2)
24 |
25 | bin_centers = torch.arange(self.num_bins, dtype=torch.float32, device=center.device).view(1, -1, 1, 1)
26 |
27 | k0 = torch.arange(output_sz[0], dtype=torch.float32, device=center.device).view(1,1,-1,1)
28 | k1 = torch.arange(output_sz[1], dtype=torch.float32, device=center.device).view(1,1,1,-1)
29 |
30 | d0 = k0 - center[:,0].view(-1,1,1,1)
31 | d1 = k1 - center[:,1].view(-1,1,1,1)
32 |
33 | dist = torch.sqrt(d0*d0 + d1*d1)
34 | bin_diff = dist / self.bin_displacement - bin_centers
35 |
36 | bin_val = torch.cat((F.relu(1.0 - torch.abs(bin_diff[:,:-1,:,:]), inplace=True),
37 | (1.0 + bin_diff[:,-1:,:,:]).clamp(0, 1)), dim=1)
38 |
39 | return bin_val
40 |
41 |
42 |
--------------------------------------------------------------------------------
/lib/models/online/layers/normalization.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class InstanceL2Norm(nn.Module):
7 | """Instance L2 normalization.
8 | """
9 | def __init__(self, size_average=True, eps=1e-5, scale=1.0):
10 | super().__init__()
11 | self.size_average = size_average
12 | self.eps = eps
13 | self.scale = scale
14 |
15 | def forward(self, input):
16 | if self.size_average:
17 | return input * (self.scale * ((input.shape[1] * input.shape[2] * input.shape[3]) / (
18 | torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps)).sqrt())
19 | else:
20 | return input * (self.scale / (torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps).sqrt())
21 |
22 |
--------------------------------------------------------------------------------
/lib/models/online/layers/transform.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from collections import OrderedDict
5 |
6 |
7 | def interpolate(x, sz):
8 | """Interpolate 4D tensor x to size sz."""
9 | sz = sz.tolist() if torch.is_tensor(sz) else sz
10 | return F.interpolate(x, sz, mode='bilinear', align_corners=False) if x.shape[-2:] != sz else x
11 |
12 |
13 | class InterpCat(nn.Module):
14 | """Interpolate and concatenate features of different resolutions."""
15 |
16 | def forward(self, input):
17 | if isinstance(input, (dict, OrderedDict)):
18 | input = list(input.values())
19 |
20 | output_shape = None
21 | for x in input:
22 | if output_shape is None or output_shape[0] > x.shape[-2]:
23 | output_shape = x.shape[-2:]
24 |
25 | return torch.cat([interpolate(x, output_shape) for x in input], dim=-3)
26 |
--------------------------------------------------------------------------------
/lib/models/siamfc.py:
--------------------------------------------------------------------------------
1 | # ------------------------------------------------------------------------------
2 | # Copyright (c) Microsoft
3 | # Licensed under the MIT License.
4 | # Written by Houwen Peng and Zhipeng Zhang
5 | # Email: houwen.peng@microsoft.com
6 | # Main Results: see readme.md
7 | # ------------------------------------------------------------------------------
8 |
9 | import torch
10 | import torch.nn as nn
11 | from torch.autograd import Variable
12 |
13 |
14 | class SiamFC_(nn.Module):
15 | def __init__(self):
16 | super(SiamFC_, self).__init__()
17 | self.features = None
18 | self.connect_model = None
19 | self.zf = None # for online tracking
20 | self.criterion = nn.BCEWithLogitsLoss()
21 |
22 | def feature_extractor(self, x):
23 | return self.features(x)
24 |
25 | def connector(self, template_feature, search_feature):
26 | pred_score = self.connect_model(template_feature, search_feature)
27 | return pred_score
28 |
29 | def _cls_loss(self, pred, label, select):
30 | if len(select.size()) == 0: return 0
31 | pred = torch.index_select(pred, 0, select)
32 | label = torch.index_select(label, 0, select)
33 | return self.criterion(pred, label) # the same as tf version
34 |
35 | def _weighted_BCE(self, pred, label):
36 | pred = pred.view(-1)
37 | label = label.view(-1)
38 | pos = Variable(label.data.eq(1).nonzero().squeeze()).cuda()
39 | neg = Variable(label.data.eq(0).nonzero().squeeze()).cuda()
40 |
41 | loss_pos = self._cls_loss(pred, label, pos)
42 | loss_neg = self._cls_loss(pred, label, neg)
43 | return loss_pos * 0.5 + loss_neg * 0.5
44 |
45 | def template(self, z):
46 | self.zf = self.feature_extractor(z)
47 |
48 | def track(self, x):
49 | xf = self.feature_extractor(x)
50 | score = self.connector(self.zf, xf)
51 | return score
52 |
53 | def forward(self, template, search, label=None):
54 | zf = self.feature_extractor(template)
55 | xf = self.feature_extractor(search)
56 | score = self.connector(zf, xf)
57 | if self.training:
58 | return self._weighted_BCE(score, label)
59 | else:
60 | raise ValueError('forward is only used for training.')
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/lib/online/__init__.py:
--------------------------------------------------------------------------------
1 | from .tensorlist import TensorList
2 | from .tensordict import TensorDict
3 | from .loading import load_network
4 |
--------------------------------------------------------------------------------
/lib/online/base_actor.py:
--------------------------------------------------------------------------------
1 | from online import TensorDict
2 | import torch.nn as nn
3 |
4 |
5 | class BaseActor:
6 | """ Base class for actor. The actor class handles the passing of the data through the network
7 | and calculation the loss"""
8 | def __init__(self, net, objective):
9 | """
10 | args:
11 | net - The network to train
12 | objective - The loss function
13 | """
14 | self.net = net
15 | self.objective = objective
16 |
17 | def __call__(self, data: TensorDict):
18 | """ Called in each training iteration. Should pass in input data through the network, calculate the loss, and
19 | return the training stats for the input data
20 | args:
21 | data - A TensorDict containing all the necessary data blocks.
22 |
23 | returns:
24 | loss - loss for the input data
25 | stats - a dict containing detailed losses
26 | """
27 | raise NotImplementedError
28 |
29 | def to(self, device):
30 | """ Move the network to device
31 | args:
32 | device - device to use. 'cpu' or 'cuda'
33 | """
34 | self.net.to(device)
35 |
36 | def train(self, mode=True):
37 | """ Set whether the network is in train mode.
38 | args:
39 | mode (True) - Bool specifying whether in training mode.
40 | """
41 | self.net.train(mode)
42 |
43 |
44 | # fix backbone again
45 | # fix the first three blocks
46 | print('======> fix backbone again <=======')
47 | for param in self.net.feature_extractor.parameters():
48 | param.requires_grad = False
49 | for m in self.net.feature_extractor.modules():
50 | if isinstance(m, nn.BatchNorm2d):
51 | m.eval()
52 |
53 | for layer in ['layeronline']:
54 | for param in getattr(self.net.feature_extractor.features.features, layer).parameters():
55 | param.requires_grad = True
56 | for m in getattr(self.net.feature_extractor.features.features, layer).modules():
57 | if isinstance(m, nn.BatchNorm2d):
58 | m.train()
59 |
60 | print('double check trainable')
61 | self.check_trainable(self.net)
62 |
63 |
64 |
65 | def eval(self):
66 | """ Set network to eval mode"""
67 | self.train(False)
68 |
69 | def check_trainable(self, model):
70 | """
71 | print trainable params info
72 | """
73 | trainable_params = [p for p in model.parameters() if p.requires_grad]
74 | print('trainable params:')
75 | for name, param in model.named_parameters():
76 | if param.requires_grad:
77 | print(name)
78 |
79 | assert len(trainable_params) > 0, 'no trainable parameters'
80 |
--------------------------------------------------------------------------------
/lib/online/loading.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import os
3 | import sys
4 | from pathlib import Path
5 | import importlib
6 | from online.model_constructor import NetConstructor
7 |
8 | def check_keys(model, pretrained_state_dict):
9 | ckpt_keys = set(pretrained_state_dict.keys())
10 | model_keys = set(model.state_dict().keys())
11 | used_pretrained_keys = model_keys & ckpt_keys
12 | unused_pretrained_keys = ckpt_keys - model_keys
13 | missing_keys = model_keys - ckpt_keys
14 |
15 | print('missing keys:{}'.format(missing_keys))
16 |
17 | print('=========================================')
18 | # clean it to no batch_tracked key words
19 | unused_pretrained_keys = [k for k in unused_pretrained_keys if 'num_batches_tracked' not in k]
20 |
21 | print('unused checkpoint keys:{}'.format(unused_pretrained_keys))
22 | # print('used keys:{}'.format(used_pretrained_keys))
23 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
24 | return True
25 |
26 | def load_pretrain(model, pretrained_dict):
27 |
28 | device = torch.cuda.current_device()
29 |
30 | check_keys(model, pretrained_dict)
31 | model.load_state_dict(pretrained_dict, strict=False)
32 | return model
33 |
34 |
35 | def load_network(ckpt_path=None, constructor_fun_name='online_resnet18', constructor_module='lib.models.online.bbreg.online'):
36 |
37 | # Load network
38 | checkpoint_dict = torch.load(ckpt_path) # key: net
39 |
40 | # get model structure from constructor
41 | net_constr = NetConstructor(fun_name=constructor_fun_name, fun_module=constructor_module)
42 | # Legacy networks before refactoring
43 |
44 | net = net_constr.get()
45 |
46 | net = load_pretrain(net, checkpoint_dict['net'])
47 |
48 | return net
49 |
50 |
51 | def load_weights(net, path, strict=True):
52 | checkpoint_dict = torch.load(path)
53 | weight_dict = checkpoint_dict['net']
54 | net.load_state_dict(weight_dict, strict=strict)
55 | return net
56 |
57 |
58 | def torch_load_legacy(path):
59 | """Load network with legacy environment."""
60 |
61 | # Setup legacy env (for older networks)
62 | _setup_legacy_env()
63 |
64 | # Load network
65 | checkpoint_dict = torch.load(path)
66 |
67 | # Cleanup legacy
68 | _cleanup_legacy_env()
69 |
70 | return checkpoint_dict
71 |
72 |
73 | def _setup_legacy_env():
74 | importlib.import_module('ltr')
75 | sys.modules['dlframework'] = sys.modules['ltr']
76 | sys.modules['dlframework.common'] = sys.modules['ltr']
77 | for m in ('model_constructor', 'stats', 'settings', 'local'):
78 | importlib.import_module('ltr.admin.'+m)
79 | sys.modules['dlframework.common.utils.'+m] = sys.modules['ltr.admin.'+m]
80 |
81 |
82 | def _cleanup_legacy_env():
83 | del_modules = []
84 | for m in sys.modules.keys():
85 | if m.startswith('dlframework'):
86 | del_modules.append(m)
87 | for m in del_modules:
88 | del sys.modules[m]
89 |
--------------------------------------------------------------------------------
/lib/online/model_constructor.py:
--------------------------------------------------------------------------------
1 | from functools import wraps
2 | import importlib
3 |
4 |
5 | def model_constructor(f):
6 | """ Wraps the function 'f' which returns the network. An extra field 'constructor' is added to the network returned
7 | by 'f'. This field contains an instance of the 'NetConstructor' class, which contains the information needed to
8 | re-construct the network, such as the name of the function 'f', the function arguments etc. Thus, the network can
9 | be easily constructed from a saved checkpoint by calling NetConstructor.get() function.
10 | """
11 | @wraps(f)
12 | def f_wrapper(*args, **kwds):
13 | net_constr = NetConstructor(f.__name__, f.__module__, args, kwds)
14 | output = f(*args, **kwds)
15 | if isinstance(output, (tuple, list)):
16 | # Assume first argument is the network
17 | output[0].constructor = net_constr
18 | else:
19 | output.constructor = net_constr
20 | return output
21 | return f_wrapper
22 |
23 |
24 | class NetConstructor:
25 | """ Class to construct networks. Takes as input the function name (e.g. atom_resnet18), the name of the module
26 | which contains the network function (e.g. ltr.models.bbreg.atom) and the arguments for the network
27 | function. The class object can then be stored along with the network weights to re-construct the network."""
28 | def __init__(self, fun_name, fun_module):
29 | """
30 | args:
31 | fun_name - The function which returns the network
32 | fun_module - the module which contains the network function
33 | args - arguments which are passed to the network function
34 | kwds - arguments which are passed to the network function
35 | """
36 | self.fun_name = fun_name
37 | self.fun_module = fun_module
38 | #self.args = args
39 | #self.kwds = kwds
40 |
41 | def get(self):
42 | """ Rebuild the network by calling the network function with the correct arguments. """
43 | net_module = importlib.import_module(self.fun_module)
44 | net_fun = getattr(net_module, self.fun_name)
45 | return net_fun()
46 |
--------------------------------------------------------------------------------
/lib/online/operation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn.functional as F
3 | from online.tensorlist import tensor_operation, TensorList
4 |
5 |
6 | @tensor_operation
7 | def conv2d(input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride=1, padding=0, dilation=1, groups=1, mode=None):
8 | """Standard conv2d. Returns the input if weight=None."""
9 |
10 | if weight is None:
11 | return input
12 |
13 | ind = None
14 | if mode is not None:
15 | if padding != 0:
16 | raise ValueError('Cannot input both padding and mode.')
17 | if mode == 'same':
18 | padding = (weight.shape[2]//2, weight.shape[3]//2)
19 | if weight.shape[2] % 2 == 0 or weight.shape[3] % 2 == 0:
20 | ind = (slice(-1) if weight.shape[2] % 2 == 0 else slice(None),
21 | slice(-1) if weight.shape[3] % 2 == 0 else slice(None))
22 | elif mode == 'valid':
23 | padding = (0, 0)
24 | elif mode == 'full':
25 | padding = (weight.shape[2]-1, weight.shape[3]-1)
26 | else:
27 | raise ValueError('Unknown mode for padding.')
28 |
29 | out = F.conv2d(input, weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups)
30 | if ind is None:
31 | return out
32 | return out[:,:,ind[0],ind[1]]
33 |
34 |
35 | @tensor_operation
36 | def conv1x1(input: torch.Tensor, weight: torch.Tensor):
37 | """Do a convolution with a 1x1 kernel weights. Implemented with matmul, which can be faster than using conv."""
38 |
39 | if weight is None:
40 | return input
41 |
42 | return torch.matmul(weight.view(weight.shape[0], weight.shape[1]),
43 | input.view(input.shape[0], input.shape[1], -1)).view(input.shape[0], weight.shape[0], input.shape[2], input.shape[3])
44 |
--------------------------------------------------------------------------------
/lib/online/optim.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import sys
3 | from online import optimization, TensorList, operation
4 | import math
5 |
6 |
7 | class FactorizedConvProblem(optimization.L2Problem):
8 | def __init__(self, training_samples: TensorList, y: TensorList, filter_reg: torch.Tensor, projection_reg, params, sample_weights: TensorList,
9 | projection_activation, response_activation):
10 | self.training_samples = training_samples
11 | self.y = y
12 | self.filter_reg = filter_reg
13 | self.sample_weights = sample_weights
14 | self.params = params
15 | self.projection_reg = projection_reg
16 | self.projection_activation = projection_activation
17 | self.response_activation = response_activation
18 |
19 | self.diag_M = self.filter_reg.concat(projection_reg)
20 |
21 | def __call__(self, x: TensorList):
22 | """
23 | Compute residuals
24 | :param x: [filters, projection_matrices]
25 | :return: [data_terms, filter_regularizations, proj_mat_regularizations]
26 | """
27 | filter = x[:len(x)//2] # w2 in paper
28 | P = x[len(x)//2:] # w1 in paper
29 |
30 | # Do first convolution
31 | compressed_samples = operation.conv1x1(self.training_samples, P).apply(self.projection_activation)
32 |
33 | # Do second convolution
34 | residuals = operation.conv2d(compressed_samples, filter, mode='same').apply(self.response_activation)
35 |
36 | # Compute data residuals
37 | residuals = residuals - self.y
38 |
39 | residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals
40 |
41 | # Add regularization for projection matrix
42 | residuals.extend(self.filter_reg.apply(math.sqrt) * filter)
43 |
44 | # Add regularization for projection matrix
45 | residuals.extend(self.projection_reg.apply(math.sqrt) * P)
46 |
47 | return residuals
48 |
49 |
50 | def ip_input(self, a: TensorList, b: TensorList):
51 | num = len(a) // 2 # Number of filters
52 | a_filter = a[:num]
53 | b_filter = b[:num]
54 | a_P = a[num:]
55 | b_P = b[num:]
56 |
57 | # Filter inner product
58 | # ip_out = a_filter.reshape(-1) @ b_filter.reshape(-1)
59 | ip_out = operation.conv2d(a_filter, b_filter).view(-1)
60 |
61 | # Add projection matrix part
62 | # ip_out += a_P.reshape(-1) @ b_P.reshape(-1)
63 | ip_out += operation.conv2d(a_P.view(1,-1,1,1), b_P.view(1,-1,1,1)).view(-1)
64 |
65 | # Have independent inner products for each filter
66 | return ip_out.concat(ip_out.clone())
67 |
68 | def M1(self, x: TensorList):
69 | return x / self.diag_M
70 |
71 |
72 | class ConvProblem(optimization.L2Problem):
73 | def __init__(self, training_samples: TensorList, y: TensorList, filter_reg: torch.Tensor, sample_weights: TensorList, response_activation):
74 | self.training_samples = training_samples
75 | self.y = y
76 | self.filter_reg = filter_reg
77 | self.sample_weights = sample_weights
78 | self.response_activation = response_activation
79 |
80 | def __call__(self, x: TensorList):
81 | """
82 | Compute residuals
83 | :param x: [filters]
84 | :return: [data_terms, filter_regularizations]
85 | """
86 | # Do convolution and compute residuals
87 | residuals = operation.conv2d(self.training_samples, x, mode='same').apply(self.response_activation)
88 | residuals = residuals - self.y
89 |
90 | residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals
91 |
92 | # Add regularization for projection matrix
93 | residuals.extend(self.filter_reg.apply(math.sqrt) * x)
94 |
95 | return residuals
96 |
97 | def ip_input(self, a: TensorList, b: TensorList):
98 | # return a.reshape(-1) @ b.reshape(-1)
99 | # return (a * b).sum()
100 | return operation.conv2d(a, b).view(-1)
101 |
--------------------------------------------------------------------------------
/lib/online/tensordict.py:
--------------------------------------------------------------------------------
1 | from collections import OrderedDict
2 | import torch
3 |
4 |
5 | class TensorDict(OrderedDict):
6 | """Container mainly used for dicts of torch tensors. Extends OrderedDict with pytorch functionality."""
7 |
8 | def concat(self, other):
9 | """Concatenates two dicts without copying internal data."""
10 | return TensorDict(self, **other)
11 |
12 | def copy(self):
13 | return TensorDict(super(TensorDict, self).copy())
14 |
15 | def __getattr__(self, name):
16 | if not hasattr(torch.Tensor, name):
17 | raise AttributeError('\'TensorDict\' object has not attribute \'{}\''.format(name))
18 |
19 | def apply_attr(*args, **kwargs):
20 | return TensorDict({n: getattr(e, name)(*args, **kwargs) if hasattr(e, name) else e for n, e in self.items()})
21 | return apply_attr
22 |
23 | def attribute(self, attr: str, *args):
24 | return TensorDict({n: getattr(e, attr, *args) for n, e in self.items()})
25 |
26 | def apply(self, fn, *args, **kwargs):
27 | return TensorDict({n: fn(e, *args, **kwargs) for n, e in self.items()})
28 |
29 | @staticmethod
30 | def _iterable(a):
31 | return isinstance(a, (TensorDict, list))
32 |
33 |
--------------------------------------------------------------------------------
/lib/online/tracking.py:
--------------------------------------------------------------------------------
1 | from .base_actor import BaseActor
2 |
3 |
4 | class ONLINEActor(BaseActor):
5 | """Actor for training the ONLINE network."""
6 | def __init__(self, net, objective, loss_weight=None):
7 | super().__init__(net, objective)
8 | if loss_weight is None:
9 | loss_weight = {'iou': 1.0, 'test_clf': 1.0}
10 | self.loss_weight = loss_weight
11 |
12 | def __call__(self, data):
13 | """
14 | args:
15 | data - The input data, should contain the fields 'train_images', 'test_images', 'train_anno',
16 | 'test_proposals', 'proposal_iou' and 'test_label'.
17 |
18 | returns:
19 | loss - the training loss
20 | stats - dict containing detailed losses
21 | """
22 | # Run network
23 | target_scores = self.net(train_imgs=data['train_images'],
24 | test_imgs=data['test_images'],
25 | train_bb=data['train_anno'],
26 | test_proposals=data['test_proposals'])
27 |
28 | # Classification losses for the different optimization iterations
29 | clf_losses_test = [self.objective['test_clf'](s, data['test_label'], data['test_anno']) for s in target_scores]
30 |
31 | # Loss of the final filter
32 | clf_loss_test = clf_losses_test[-1]
33 | loss_target_classifier = self.loss_weight['test_clf'] * clf_loss_test
34 |
35 | # Loss for the initial filter iteration
36 | loss_test_init_clf = 0
37 | if 'test_init_clf' in self.loss_weight.keys():
38 | loss_test_init_clf = self.loss_weight['test_init_clf'] * clf_losses_test[0]
39 |
40 | # Loss for the intermediate filter iterations
41 | loss_test_iter_clf = 0
42 | if 'test_iter_clf' in self.loss_weight.keys():
43 | test_iter_weights = self.loss_weight['test_iter_clf']
44 | if isinstance(test_iter_weights, list):
45 | loss_test_iter_clf = sum([a*b for a, b in zip(test_iter_weights, clf_losses_test[1:-1])])
46 | else:
47 | loss_test_iter_clf = (test_iter_weights / (len(clf_losses_test) - 2)) * sum(clf_losses_test[1:-1])
48 |
49 | # Total loss
50 | # loss = loss_iou + loss_target_classifier + loss_test_init_clf + loss_test_iter_clf
51 | loss = loss_target_classifier + loss_test_init_clf + loss_test_iter_clf
52 |
53 | # Log stats
54 | stats = {'Loss/total': loss.item(),
55 | # 'Loss/iou': loss_iou.item(),
56 | 'Loss/iou': 0,
57 | 'Loss/target_clf': loss_target_classifier.item()}
58 | if 'test_init_clf' in self.loss_weight.keys():
59 | stats['Loss/test_init_clf'] = loss_test_init_clf.item()
60 | if 'test_iter_clf' in self.loss_weight.keys():
61 | stats['Loss/test_iter_clf'] = loss_test_iter_clf.item()
62 | stats['ClfTrain/test_loss'] = clf_loss_test.item()
63 | if len(clf_losses_test) > 0:
64 | stats['ClfTrain/test_init_loss'] = clf_losses_test[0].item()
65 | if len(clf_losses_test) > 2:
66 | stats['ClfTrain/test_iter_loss'] = sum(clf_losses_test[1:-1]).item() / (len(clf_losses_test) - 2)
67 |
68 | return loss, stats
69 |
--------------------------------------------------------------------------------
/lib/tutorial/Ocean/ocean.md:
--------------------------------------------------------------------------------
1 | # Ocean tutorial
2 | ## Testing
3 |
4 | We assume the root path is $TracKit, e.g. `/home/zpzhang/TracKit`
5 | ### Set up environment
6 |
7 | ```
8 | cd $TracKit/lib/tutorial
9 | bash install.sh $conda_path TracKit
10 | cd $TracKit
11 | conda activate TracKit
12 | python setup.py develop
13 | ```
14 | `$conda_path` denotes your anaconda path, e.g. `/home/zpzhang/anaconda3`
15 |
16 | - **[Optional]** Install TensorRT according to the [tutorial](../install_trt.md).
17 |
18 | **Note:** we perform TensorRT evaluation on RTX2080 Ti and CUDA10.0. If you fail to install it, please use pytorch version.
19 |
20 |
21 |
22 | ### Prepare data and models
23 | 1. Download the pretrained [PyTorch model](https://drive.google.com/drive/folders/1XU5wmyC7MsI6C_9Lv-UH1mwDIh57FFf8?usp=sharing) and [TensorRT model](https://github.com/researchmm/TracKit/releases/tag/tensorrt) to `$TracKit/snapshot`.
24 | 2. Download [json](https://drive.google.com/drive/folders/1kYX_c8rw7HMW0e5V400vaLy9huiYvDHE?usp=sharing) files of testing data and put them in `$TracKit/dataset`.
25 | 3. Download testing data e.g. VOT2019 and put them in `$TracKit/dataset`. Please download each data from their official websites, and the directories should be named like `VOT2019`, `OTB2015`, `GOT10K`, `LASOT`.
26 |
27 | ### Testing
28 | In root path `$TracKit`,
29 |
30 | ```
31 | python tracking/test_ocean.py --arch Ocean --resume snapshot/OceanV.pth --dataset VOT2019
32 | ```
33 | ### Evaluation
34 | ```
35 | python lib/eval_toolkit/bin/eval.py --dataset_dir dataset --dataset VOT2019 --tracker_result_dir result/VOT2019 --trackers Ocean
36 | ```
37 | You may test other datasets with our code. Please corresponds the provided pre-trained model `--resume` and dataset `--dataset`. See [ocean_model.txt](https://drive.google.com/file/d/1T2QjyxN4movpFtpzCH8xHHX5_Dz7G5Y6/view?usp=sharing) for their correspondences.
38 |
39 |
40 | ### TensorRT toy
41 | Testing video: `twinnings` in OTB2015 (472 frames)
42 | Testing GPU: `RTX2080Ti`
43 |
44 | - TensorRT (**149fps**)
45 | ```
46 | python tracking/test_ocean.py --arch OceanTRT --resume snapshot/OceanV.pth --dataset OTB2015 --video twinnings
47 | ```
48 |
49 | - Pytorch (**68fps**)
50 | ```
51 | python tracking/test_ocean.py --arch Ocean --resume snapshot/OceanV.pth --dataset OTB2015 --video twinnings
52 | ```
53 |
54 | **Note:**
55 | - TensorRT version of Ocean only supports 255 input.
56 | - Current TensorRT does not well support some operations. We would continuously renew it following official TensorRT updating. If you want to test on the benchmark, please us the Pytorch version.
57 | - If you want to use our code in a realistic product, our TensorRT code may help you.
58 |
59 |
60 |
61 | :cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud:
62 | ## Training
63 | #### prepare data
64 | - Please download training data from [GoogleDrive](https://drive.google.com/drive/folders/1ehjVhg6ewdWSWt709zd1TkjWF7UJlQlq?usp=sharing) or [BaiduDrive(urxq)](https://pan.baidu.com/s/1jGPEJieir5OWqCmibV3yrQ), and then put them in `$TracKit/data`
65 | - You could also refer to scripts in `$TracKit/lib/dataset/crop` to process your custom data.
66 | - For splited files in BaiduDrive, please use `cat got10k.tar.* | tar -zxv` to merge and unzip.
67 |
68 |
69 | #### prepare pretrained model
70 | Please download the pretrained model on ImageNet [here](https://drive.google.com/drive/folders/1ctoxaPiS9qinhmN_bl5z3VNhYnrhl99t?usp=sharing), and then put it in `$TracKit/pretrain`.
71 |
72 | #### modify settings
73 | Please modify the training settings in `$TracKit/experiments/train/Ocean.yaml`. The default number of GPU and batch size in paper are 8 and 32 respectively.
74 |
75 | #### run
76 | In root path $TracKit,
77 | ```
78 | python tracking/onekey.py
79 | ```
80 | This script integrates **train**, **epoch test** and **tune**. It is suggested to run them one by one when you are not familiar with our whole framework (modify the key `ISTRUE` in `$TracKit/experiments/train/Ocean.yaml`). When you know this framework well, simply run this one-key script. VOT2018 is much more sensitive than other datasets, thus I would suggest you tune 4000-5000 groups for it. For other datasets like VOT2019/OTB, 1500-2000 may be enough. For truely large dataset like LASOT, I would suggest you tune with grid search (only selecting epoch and tuning `window_influence` is enough for LASOT in my experience.)
81 |
--------------------------------------------------------------------------------
/lib/tutorial/OceanPlus/oceanplus.md:
--------------------------------------------------------------------------------
1 | # OceanPlus tutorial
2 | ## Testing
3 |
4 | We assume the root path is $TracKit, e.g. `/home/zpzhang/TracKit`
5 |
6 | ### Set up environment
7 |
8 | ```
9 | cd $TracKit/lib/tutorial
10 | bash install.sh $conda_path TracKit
11 | cd $TracKit
12 | conda activate TracKit
13 | python setup.py develop
14 | ```
15 | `$conda_path` denotes your anaconda path, e.g. `/home/zpzhang/anaconda3`
16 |
17 |
18 | **Note:** all the results for VOT2020 in the paper (including other methods) are performed with `vot-toolkit=0.2.0`. Please use the same env to reproduce our results.
19 |
20 |
21 | ### Prepare data and models
22 |
23 | 1. Following the official [guidelines](https://www.votchallenge.net/howto/tutorial_python.html) to set up VOT workspace.
24 |
25 | 2. Download from [GoogleDrive](https://drive.google.com/drive/folders/1_uagYRFpQmYoWAc0oeiAY49gHwQxztrN?usp=sharing) and put them in `$TracKit/snapshot`
26 |
27 |
28 | ### Testing
29 |
30 | #### For VOT2020
31 |
32 | - **Note: the results are 0.444/0.451 for single stage (MSS) and multi-stage (MMS) models, respectively. This is a bit higher than reproted.**
33 |
34 | 1. Modify scripts
35 |
36 | - Set the model path in line81 of `$TracKit/tracking/vot_wrap.py` or `$TracKit/tracking/vot_wrap_mms.py`.
37 |
38 | - for model without MMS network (faster):
39 | ```
40 | set running script in vot2020 workspace (i.e. trackers.ini) to `vot_wrap.py`
41 | ```
42 | - for model with MMS network (slower):
43 | ```
44 | set running script in vot2020 workspace (i.e. trackers.ini) to `vot_wrap_mms.py`
45 | ```
46 | - Note: We provided a reference of `trackers.ini` in `$TracKit/trackers.ini`. Please find more running guidelines in VOT official [web](https://www.votchallenge.net/howto/tutorial_python.html).
47 |
48 | 2. run
49 | ```
50 | CUDA_VISIBLE_DEVICES=0 vot evaluate --workspace $workspace_path OceanPlus
51 | ```
52 | - Note: If you only want to test "baseline" track in vot for saving time, please remove lines 10-21 in `$root/anaconda3/envs/TracKit/lib/python3.7/site-packages/vot/stack/vot2020.yaml`.
53 |
54 |
55 | 3. evaluate
56 | ```
57 | vot analysis --workspace $workspace_path OceanPlus --output json
58 | ```
59 |
60 |
61 | We also provided the trackers submitted to VOT2020 challenge, i.e. [[OceanPlus]](https://drive.google.com/file/d/1DNDZshPed_fcl1DB2lKiOU1bjYC_dxtp/view?usp=sharing), [[OceanPlus-Online]](https://drive.google.com/file/d/1UahJTVPfV0gcqKlBEFc6nwIaqNhyjKQQ/view?usp=sharing), [[OceanPlus-Online-TRT]](https://drive.google.com/file/d/1pdrgyx6XKzN4b3Cyplnr5bcB4TilRS1y/view?usp=sharing).
62 |
63 | #### For VOS
64 | 1. prepare data
65 | Download dataset from DAVIS, and then
66 | ```
67 | ln -sfb $path_to_DAVIS-trainval $TracKit/dataset/DAVIS
68 | ```
69 |
70 | 2. run
71 | ```
72 | CUDA_VISIBLE_DEVICES=0 python tracking/test_oceanplus.py --arch OceanPlus --mms True --dataset DAVIS2016 --resume snapshot/OceanPlusMMS.pth
73 | ```
74 |
75 | 3. evaluate
76 | ```
77 | python lib/core/eval_davis.py --dataset DAVIS2016 --num_threads 1 --datapath dataset/DAVIS
78 | ```
79 |
80 | :cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud:
81 |
82 | The training code will be released after accepted. Thanks for your interest!
83 |
84 |
85 |
--------------------------------------------------------------------------------
/lib/tutorial/SiamDW/siamdw.md:
--------------------------------------------------------------------------------
1 | # SiamDW tutorial
2 | ## Testing
3 |
4 | We assume the root path is $TracKit, e.g. `/home/zpzhang/TracKit`
5 | ### Set up environment
6 | Please follow [readme of Ocean](../Ocean/ocean.md) to install the environment.
7 |
8 | ### Prepare data and models
9 | 1. Download the pretrained [PyTorch model](https://drive.google.com/file/d/1SzIql02jJ6Id1k0M6f-zjUA3RgAm6E5U/view?usp=sharing) to `$TracKit/snapshot`.
10 | 2. Download [json](https://drive.google.com/open?id=1S-RkzyMVRFWueWW91NmZldUJuDyhGdp1) files of testing data and put thme in `$TracKit/dataset`.
11 | 3. Download testing data e.g. VOT2017 and put them in `$TracKit/dataset`.
12 |
13 | ### Testing
14 | In root path `$TracKit`,
15 | ```
16 | python tracking/test_siamdw.py --arch SiamDW --resume snapshot/siamdw_res22w.pth --dataset VOT2017
17 | ```
18 |
19 |
20 | ### Training
21 | In root path `$TracKit`,
22 | 1. Download pretrain model from [here](https://drive.google.com/file/d/1wXyW82idctCd4FkqKxvuWsL707joEIeI/view?usp=sharing) and put it in `pretrain` (named with `pretrain.model`).
23 |
24 | 2. modify `experiments/train/SiamDW.yaml` according to your needs. (pls use GOT10K with 20w pairs each epoch in my opinion)
25 | ```
26 | python tracking/train_siamdw.py
27 | ```
28 |
29 | Then, pls follow the `epoch testing` and `tuning` as in Ocean.
30 |
--------------------------------------------------------------------------------
/lib/tutorial/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | if [ "$#" -ne 2 ]; then
4 | echo "ERROR! Illegal number of parameters. Usage: bash install.sh conda_install_path environment_name"
5 | exit 0
6 | fi
7 |
8 | conda_install_path=$1
9 | conda_env_name=$2
10 |
11 | source $conda_install_path/etc/profile.d/conda.sh
12 | echo "****************** Creating conda environment ${conda_env_name} python=3.7 ******************"
13 | conda create -y -n $conda_env_name python=3.7
14 |
15 | echo ""
16 | echo ""
17 | echo "****************** Activating conda environment ${conda_env_name} ******************"
18 | conda activate $conda_env_name
19 |
20 | echo ""
21 | echo ""
22 | echo "****************** Installing pytorch with cuda10 ******************"
23 | conda install -y pytorch==1.1.0 torchvision==0.3.0 cudatoolkit=10.0 -c pytorch
24 |
25 | echo ""
26 | echo ""
27 | echo "****************** Installing matplotlib 2.2.2 ******************"
28 | conda install -y matplotlib=2.2.2
29 |
30 | echo ""
31 | echo ""
32 | echo "****************** Installing pandas ******************"
33 | conda install -y pandas
34 |
35 | echo ""
36 | echo ""
37 | echo "****************** Installing opencv ******************"
38 | pip install opencv-python
39 |
40 | echo ""
41 | echo ""
42 | echo "****************** Installing tensorboardX ******************"
43 | pip install tensorboardX
44 |
45 | echo ""
46 | echo ""
47 | echo "****************** Installing cython ******************"
48 | conda install -y cython
49 |
50 |
51 | echo ""
52 | echo ""
53 | echo "****************** Installing skimage ******************"
54 | pip install scikit-image
55 |
56 |
57 |
58 | echo ""
59 | echo ""
60 | echo "****************** Installing pillow ******************"
61 | pip install 'pillow<7.0.0'
62 |
63 | echo ""
64 | echo ""
65 | echo "****************** Installing scipy ******************"
66 | pip install scipy
67 |
68 | echo ""
69 | echo ""
70 | echo "****************** Installing shapely ******************"
71 | pip install shapely
72 |
73 | echo ""
74 | echo ""
75 | echo "****************** Installing easydict ******************"
76 | pip install easydict
77 |
78 | echo ""
79 | echo ""
80 | echo "****************** Installing jpeg4py python wrapper ******************"
81 | pip install jpeg4py
82 | pip install mpi4py
83 | pip install ray==0.8.7
84 | pip install hyperopt
85 |
86 |
87 | echo ""
88 | echo ""
89 | echo "****************** Installing vot python toolkit ******************"
90 | pip install git+https://github.com/votchallenge/vot-toolkit-python@7a1b807df3d64ea310c554e9f487f1e5f53bf249
91 |
92 | echo "****************** Installation complete! ******************"
93 |
--------------------------------------------------------------------------------
/lib/tutorial/install_trt.md:
--------------------------------------------------------------------------------
1 | # Install TensorRT
2 | We install TensorRT on RTX2080Ti with CUDA10.0. If you fail to install it, please use pytorch version.
3 |
4 | 1) install pycuda
5 | ```
6 | export C_INCLUDE_PATH=/usr/local/cuda-10.0/include/:${C_INCLUDE_PATH}
7 | export CPLUS_INCLUDE_PATH=/usr/local/cuda-10.0/include/:${CPLUS_INCLUDE_PATH}
8 | pip install pycuda
9 | ```
10 | 2) download tensorrt
11 | - Go to [NVIDIA-TENSORRT](https://developer.nvidia.com/tensorrt) and then click `Download Now`.
12 | - Login and download TensorRT7 (please select the version that suits for your platform). We use [TensorRT 7.0.0.11 for Ubuntu 18.04 and CUDA 10.0 tar package](https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/7.0/7.0.0.11/tars/TensorRT-7.0.0.11.Ubuntu-18.04.x86_64-gnu.cuda-10.0.cudnn7.6.tar.gz) in our experiment.
13 |
14 | 3) install
15 | ```bash
16 | tar -zxf TensorRT-7.0.0.11.Ubuntu-18.04.x86_64-gnu.cuda-10.0.cudnn7.6.tar.gz
17 | vim ~/.bashrc
18 |
19 | # Add codes in your file ~/.bashrc
20 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:
21 | # for example
22 | # export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/zpzhang/TensorRT-7.0.0.11/lib
23 |
24 | source ~/.bashrc
25 | conda activate OceanOnlineTRT
26 | cd TensorRT-7.0.0.11/python
27 | # Remember declare your python version=3.7
28 | pip install tensorrt-7.0.0.11-cp37-none-linux_x86_64.whl
29 | cd TensorRT-7.0.0.11/graphsurgeon
30 | pip install graphsurgeon-0.4.1-py2.py3-none-any.whl
31 | ```
32 |
33 | 4) Verify the installation
34 | ```
35 | python
36 | import tensorrt
37 | ```
38 |
39 | 5) Install Torch2trt
40 | ```
41 | conda activate OceanOnlineTRT
42 | git clone https://github.com/NVIDIA-AI-IOT/torch2trt
43 | cd torch2trt
44 | python setup.py install
45 | ```
46 | Verify the installation
47 | ```
48 | python
49 | import torch2trt
50 | ```
51 |
52 |
53 |
54 |
55 | ### Note
56 | - If you met the error `ImportError: libcudart.so.10.0: cannot open shared object file: No such file or directory`, please run `sudo cp /usr/local/cuda-10.0/lib64/libcudart.so.10.0 /usr/local/lib/libcudart.so.10.0 && sudo ldconfig`.
57 |
58 | - If you met the error `PermissionError: [Errno 13] Permission denied: '/tmp/torch_extensions/_prroi_pooling/lock'`, please remove `/tmp/torch_extensions/ _prroi_pooling` and rerun the tracker. If other user in your machine have compiled prroi pooling before, this may happens. Besides, if you have compiled pproi_pooling before, please remove `/tmp/torch_extensions/`. Otherwise, you may fail to compile in the new conda environment.
59 |
60 |
--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/utils/__init__.py
--------------------------------------------------------------------------------
/lib/utils/cutout.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 |
4 |
5 | class Cutout(object):
6 | """Randomly mask out one or more patches from an image.
7 |
8 | Args:
9 | n_holes (int): Number of patches to cut out of each image.
10 | length (int): The length (in pixels) of each square patch.
11 | """
12 | def __init__(self, n_holes, length):
13 | self.n_holes = n_holes
14 | self.length = length
15 |
16 | def __call__(self, img):
17 | """
18 | Args:
19 | img (Tensor): Tensor image of size (C, H, W).
20 | Returns:
21 | Tensor: Image with n_holes of dimension length x length cut out of it.
22 | """
23 | h = img.size(1)
24 | w = img.size(2)
25 |
26 | mask = np.ones((h, w), np.float32)
27 |
28 | for n in range(self.n_holes):
29 | y = np.random.randint(h//4, h-h//4)
30 | x = np.random.randint(w//4, w-w//4)
31 |
32 | y1 = np.clip(y - self.length // 2, 0, h)
33 | y2 = np.clip(y + self.length // 2, 0, h)
34 | x1 = np.clip(x - self.length // 2, 0, w)
35 | x2 = np.clip(x + self.length // 2, 0, w)
36 |
37 | mask[y1: y2, x1: x2] = 0.
38 |
39 | mask = torch.from_numpy(mask)
40 | mask = mask.expand_as(img)
41 | img = img * mask
42 |
43 | return img
44 |
--------------------------------------------------------------------------------
/lib/utils/extract_tpejson_fc.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 |
4 | import os
5 | import json
6 | import shutil
7 | import argparse
8 | import numpy as np
9 | import pdb
10 |
11 |
12 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results')
13 | parser.add_argument('--path', default='./TPE_results/zp_tune', help='tune result path')
14 | parser.add_argument('--dataset', default='VOT2018', help='test dataset')
15 | parser.add_argument('--save_path', default='logs', help='log file save path')
16 |
17 |
18 | def collect_results(args):
19 | dirs = os.listdir(args.path)
20 | print('[*] ===== total {} files in TPE dir'.format(len(dirs)))
21 |
22 | count = 0
23 | scale_penalty = []
24 | scale_lr = []
25 | wi = []
26 | scale_step = []
27 | eao = []
28 | count = 0 # total numbers
29 |
30 | for d in dirs:
31 | param_path = os.path.join(args.path, d)
32 | json_path = os.path.join(param_path, 'result.json')
33 |
34 | if not os.path.exists(json_path):
35 | continue
36 |
37 | # pdb.set_trace()
38 | try:
39 | js = json.load(open(json_path, 'r'))
40 | except:
41 | continue
42 |
43 | if not "EAO" in list(js.keys()):
44 | continue
45 | else:
46 | count += 1
47 | eao.append(js['EAO'])
48 | temp = js['config']
49 | scale_lr.append(temp["scale_lr"])
50 | wi.append(temp["w_influence"])
51 | scale_step.append(temp["scale_step"])
52 | scale_penalty.append(temp["scale_penalty"])
53 |
54 |
55 | # find max
56 | print('{} params group have been tested'.format(count))
57 | eao = np.array(eao)
58 | max_idx = np.argmax(eao)
59 | max_eao = eao[max_idx]
60 | print('scale_penalty: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, scale_step: {}, eao: {}'.format(scale_penalty[max_idx], scale_lr[max_idx], wi[max_idx], scale_step[max_idx], max_eao))
61 |
62 |
63 | if __name__ == '__main__':
64 | args = parser.parse_args()
65 | collect_results(args)
--------------------------------------------------------------------------------
/lib/utils/extract_tpejson_ocean.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 |
4 | import os
5 | import json
6 | import shutil
7 | import argparse
8 | import numpy as np
9 | import pdb
10 |
11 |
12 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results')
13 | parser.add_argument('--path', default='./TPE_results/zp_tune', help='tune result path')
14 | parser.add_argument('--dataset', default='VOT2019', help='test dataset')
15 | parser.add_argument('--save_path', default='logs', help='log file save path')
16 |
17 |
18 | def collect_results(args):
19 | dirs = os.listdir(args.path)
20 | print('[*] ===== total {} files in TPE dir'.format(len(dirs)))
21 |
22 | count = 0
23 | penalty_k = []
24 | scale_lr = []
25 | wi = []
26 | big_sz = []
27 | small_sz = []
28 | ratio = []
29 | eao = []
30 | count = 0 # total numbers
31 |
32 | for d in dirs:
33 | param_path = os.path.join(args.path, d)
34 | json_path = os.path.join(param_path, 'result.json')
35 |
36 | if not os.path.exists(json_path):
37 | continue
38 |
39 | # pdb.set_trace()
40 | try:
41 | js = json.load(open(json_path, 'r'))
42 | except:
43 | continue
44 |
45 | if not "EAO" in list(js.keys()):
46 | continue
47 | else:
48 | count += 1
49 | # pdb.set_trace()
50 | eao.append(js['EAO'])
51 | temp = js['config']
52 | scale_lr.append(temp["scale_lr"])
53 | wi.append(temp["window_influence"])
54 | penalty_k.append(temp["penalty_k"])
55 | ratio.append(temp["ratio"])
56 | small_sz.append(temp["small_sz"])
57 | big_sz.append(temp["big_sz"])
58 |
59 |
60 | # find max
61 | print('{} params group have been tested'.format(count))
62 | eao = np.array(eao)
63 | max_idx = np.argmax(eao)
64 | max_eao = eao[max_idx]
65 | print('penalty_k: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, ratio: {:.4f}, small_sz: {}, big_sz: {:.4f}, eao: {}'.format(penalty_k[max_idx], scale_lr[max_idx], wi[max_idx], ratio[max_idx], small_sz[max_idx], big_sz[max_idx], max_eao))
66 |
67 |
68 | if __name__ == '__main__':
69 | args = parser.parse_args()
70 | collect_results(args)
--------------------------------------------------------------------------------
/lib/utils/extract_tpelog.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 |
4 | import shutil
5 | import argparse
6 | import numpy as np
7 |
8 |
9 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results')
10 | parser.add_argument('--path', default='logs/gene_adjust_rpn.log', help='tune result path')
11 | parser.add_argument('--dataset', default='VOT2018', help='test dataset')
12 | parser.add_argument('--save_path', default='logs', help='log file save path')
13 |
14 |
15 | def collect_results(args):
16 | if not args.path.endswith('txt'):
17 | name = args.path.split('.')[0]
18 | name = name + '.txt'
19 | shutil.copy(args.path, name)
20 | args.path = name
21 | fin = open(args.path, 'r')
22 | lines = fin.readlines()
23 | penalty_k = []
24 | scale_lr = []
25 | wi = []
26 | wi = []
27 | ratio = []
28 | sz = []
29 | bz = []
30 | eao = []
31 | ratio = []
32 | count = 0 # total numbers
33 |
34 | for line in lines:
35 | if not 'penalty_k:' in line:
36 | pass
37 | else:
38 | count += 1
39 | temp0, temp1, temp2, temp3, temp4, temp5, temp6 = line.split(',')
40 | #print(temp6.split(': ')[-1])
41 | #exit()
42 | penalty_k.append(float(temp0.split(': ')[-1]))
43 | scale_lr.append(float(temp1.split(': ')[-1]))
44 | wi.append(float(temp2.split(': ')[-1]))
45 | sz.append(float(temp3.split(': ')[-1]))
46 | bz.append(float(temp4.split(': ')[-1]))
47 | ratio.append(float(temp5.split(': ')[-1]))
48 | try:
49 | eao.append(float(temp6.split(': ')[-1].split('==')[0]))
50 | except:
51 | eao.append(float(temp6.split(': ')[-1].split('Result')[0]))
52 | #print(line)
53 | #print(temp6.split(': ')[-1])
54 | #exit()
55 |
56 | # find max
57 | eao = np.array(eao)
58 | max_idx = np.argmax(eao)
59 | max_eao = eao[max_idx]
60 | print('{} params group have been tested'.format(count))
61 | print('penalty_k: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, ratio: {:.4f}, small_sz: {}, big_sz: {}, auc: {}'.format(penalty_k[max_idx], scale_lr[max_idx], wi[max_idx], ratio[max_idx], sz[max_idx], bz[max_idx], max_eao))
62 |
63 |
64 | if __name__ == '__main__':
65 | args = parser.parse_args()
66 | collect_results(args)
67 |
--------------------------------------------------------------------------------
/lib/utils/extract_tpelog_fc.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 | # ! ./usr/bin/env python
3 |
4 |
5 | import shutil
6 | import argparse
7 | import numpy as np
8 |
9 |
10 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results')
11 | parser.add_argument('--path', default='logs/gene_adjust_rpn.log', help='tune result path')
12 | parser.add_argument('--dataset', default='VOT2018', help='test dataset')
13 | parser.add_argument('--save_path', default='logs', help='log file save path')
14 |
15 |
16 | def collect_results(args):
17 | if not args.path.endswith('txt'):
18 | name = args.path.split('.')[0]
19 | name = name + '.txt'
20 | shutil.copy(args.path, name)
21 | args.path = name
22 | fin = open(args.path, 'r')
23 | lines = fin.readlines()
24 | scale_step = []
25 | scale_lr = []
26 | scale_penalty = []
27 | wi = []
28 | eao = []
29 | count = 0 # total numbers
30 |
31 | for line in lines:
32 | if not line.startswith('scale_step'):
33 | pass
34 | else:
35 | # print(line)
36 | count += 1
37 | print(line.split(','))
38 | exit()
39 | temp0, temp1, temp2, temp3, temp4, temp5 = line.split(',')
40 | scale_step.append(float(temp0.split(': ')[-1]))
41 | scale_lr.append(float(temp1.split(': ')[-1]))
42 | scale_penalty.append(float(temp2.split(': ')[-1]))
43 | wi.append(float(temp3.split(': ')[-1]))
44 | eao.append(float(temp4.split(': ')[-1]))
45 |
46 | # find max
47 | eao = np.array(eao)
48 | max_idx = np.argmax(eao)
49 | max_eao = eao[max_idx]
50 | print('{} params group have been tested'.format(count))
51 | print('scale_step: {:.4f}, scale_lr: {:.4f}, scale_penalty: {:.4f}, win_influence: {}, eao: {}'.format(scale_step[max_idx], scale_lr[max_idx], scale_penalty[max_idx], wi[max_idx], max_eao))
52 |
53 |
54 | if __name__ == '__main__':
55 | args = parser.parse_args()
56 | collect_results(args)
57 |
--------------------------------------------------------------------------------
/lib/utils/watch_tpe.sh:
--------------------------------------------------------------------------------
1 | watch -n 1 python lib/utils/extract_tpelog.py --path logs/tpe_tune.log
2 |
--------------------------------------------------------------------------------
/lib/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | # TIME: Sun May 24 21:24:18 2020
3 |
4 | __version__ = '1.0.rc0'
5 | short_version = '1.0.rc0'
6 |
--------------------------------------------------------------------------------
/tracking/_init_paths.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os.path as osp
6 | import sys
7 |
8 |
9 | def add_path(path):
10 | if path not in sys.path:
11 | sys.path.insert(0, path)
12 |
13 |
14 | this_dir = osp.dirname(__file__)
15 |
16 | lib_path = osp.join(this_dir, '..', 'lib')
17 | add_path(lib_path)
18 |
19 | # sys.path.insert(1, osp.join(this_dir, '..', 'lib/eval_toolkit/bin'))
20 |
--------------------------------------------------------------------------------
/tracking/onekey.py:
--------------------------------------------------------------------------------
1 |
2 | import _init_paths
3 | import os
4 | import yaml
5 | import argparse
6 | from os.path import exists
7 | from utils.utils import load_yaml, extract_logs
8 |
9 | def parse_args():
10 | """
11 | args for onekey.
12 | """
13 | parser = argparse.ArgumentParser(description='Train SiamFC with onekey')
14 | # for train
15 | parser.add_argument('--cfg', type=str, default='experiments/train/Ocean.yaml', help='yaml configure file name')
16 |
17 | # for
18 |
19 | args = parser.parse_args()
20 |
21 | return args
22 |
23 |
24 | def main():
25 | args = parse_args()
26 |
27 | # train - test - tune information
28 | info = yaml.load(open(args.cfg, 'r').read())
29 | info = info['OCEAN']
30 | trainINFO = info['TRAIN']
31 | testINFO = info['TEST']
32 | tuneINFO = info['TUNE']
33 | dataINFO = info['DATASET']
34 |
35 | # epoch training -- train 50 or more epochs
36 | if trainINFO['ISTRUE']:
37 | print('==> train phase')
38 | print('python ./tracking/train_ocean.py --cfg {0} --gpus {1} --workers {2} 2>&1 | tee logs/ocean_train.log'
39 | .format(args.cfg, info['GPUS'], info['WORKERS']))
40 |
41 | if not exists('logs'):
42 | os.makedirs('logs')
43 |
44 | os.system('python ./tracking/train_ocean.py --cfg {0} --gpus {1} --workers {2} 2>&1 | tee logs/siamrpn_train.log'
45 | .format(args.cfg, info['GPUS'], info['WORKERS']))
46 |
47 | # epoch testing -- test 30-50 epochs (or more)
48 | if testINFO['ISTRUE']:
49 | print('==> test phase')
50 | print('mpiexec -n {0} python ./tracking/test_epochs.py --arch {1} --start_epoch {2} --end_epoch {3} --gpu_nums={4} \
51 | --threads {0} --dataset {5} --align {6} 2>&1 | tee logs/ocean_epoch_test.log'
52 | .format(testINFO['THREADS'], trainINFO['MODEL'], testINFO['START_EPOCH'], testINFO['END_EPOCH'],
53 | (len(info['GPUS']) + 1) // 2, testINFO['DATA'], trainINFO['ALIGN']))
54 |
55 | if not exists('logs'):
56 | os.makedirs('logs')
57 |
58 | os.system('mpiexec -n {0} python ./tracking/test_epochs.py --arch {1} --start_epoch {2} --end_epoch {3} --gpu_nums={4} \
59 | --threads {0} --dataset {5} --align {6} 2>&1 | tee logs/ocean_epoch_test.log'
60 | .format(testINFO['THREADS'], trainINFO['MODEL'], testINFO['START_EPOCH'], testINFO['END_EPOCH'],
61 | (len(info['GPUS']) + 1) // 2, testINFO['DATA'], trainINFO['ALIGN']))
62 |
63 | # test on vot or otb benchmark
64 | print('====> use new testing toolkit')
65 | trackers = os.listdir(os.path.join('./result', testINFO['DATA']))
66 | trackers = " ".join(trackers)
67 | if 'VOT' in testINFO['DATA']:
68 | print('python lib/eval_toolkit/bin/eval.py --dataset_dir dataset --dataset {0} --tracker_result_dir result/{0} --trackers {1}'.format(testINFO['DATA'], trackers))
69 | os.system('python lib/eval_toolkit/bin/eval.py --dataset_dir dataset --dataset {0} --tracker_result_dir result/{0} --trackers {1} 2>&1 | tee logs/ocean_eval_epochs.log'.format(testINFO['DATA'], trackers))
70 | else:
71 | raise ValueError('not supported now, please add new dataset')
72 |
73 | # tuning -- with TPE
74 | if tuneINFO['ISTRUE']:
75 |
76 | if 'VOT' in testINFO['DATA']: # for vot real-time and baseline
77 | resume = extract_logs('logs/ocean_eval_epochs.log', 'VOT')
78 | else:
79 | raise ValueError('not supported now')
80 |
81 | print('==> tune phase')
82 | print('python -u ./tracking/tune_tpe.py --arch {0} --resume {1} --dataset {2} --gpu_nums {3} --align {4}\
83 | 2>&1 | tee logs/tpe_tune.log'.format(trainINFO['MODEL'], 'snapshot/'+ resume, tuneINFO['DATA'], (len(info['GPUS']) + 1) // 2, trainINFO['ALIGN']))
84 |
85 | if not exists('logs'):
86 | os.makedirs('logs')
87 | os.system('python -u ./tracking/tune_tpe.py --arch {0} --resume {1} --dataset {2} --gpu_nums {3} --align {4}\
88 | 2>&1 | tee logs/tpe_tune.log'.format(trainINFO['MODEL'], 'snapshot/'+ resume, tuneINFO['DATA'], (len(info['GPUS']) + 1) // 2, trainINFO['ALIGN']))
89 |
90 |
91 | if __name__ == '__main__':
92 | main()
93 |
--------------------------------------------------------------------------------
/tracking/test_epochs.py:
--------------------------------------------------------------------------------
1 | import os
2 | import time
3 | import argparse
4 | from mpi4py import MPI
5 |
6 |
7 | parser = argparse.ArgumentParser(description='multi-gpu test all epochs')
8 | parser.add_argument('--arch', dest='arch', default='SiamFCIncep22',
9 | help='architecture of model')
10 | parser.add_argument('--start_epoch', default=30, type=int, required=True, help='test end epoch')
11 | parser.add_argument('--end_epoch', default=50, type=int, required=True,
12 | help='test end epoch')
13 | parser.add_argument('--gpu_nums', default=4, type=int, required=True, help='test start epoch')
14 | parser.add_argument('--anchor_nums', default=5, type=int, help='anchor numbers')
15 | parser.add_argument('--threads', default=16, type=int, required=True)
16 | parser.add_argument('--dataset', default='VOT0219', type=str, help='benchmark to test')
17 | parser.add_argument('--align', default='False', type=str, help='align')
18 | args = parser.parse_args()
19 |
20 | # init gpu and epochs
21 | comm = MPI.COMM_WORLD
22 | size = comm.Get_size()
23 | rank = comm.Get_rank()
24 | GPU_ID = rank % args.gpu_nums
25 | node_name = MPI.Get_processor_name() # get the name of the node
26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU_ID)
27 | print("node name: {}, GPU_ID: {}".format(node_name, GPU_ID))
28 | time.sleep(rank * 5)
29 |
30 | # run test scripts -- two epoch for each thread
31 | for i in range(2):
32 | arch = args.arch
33 | dataset = args.dataset
34 | try:
35 | epoch_ID += args.threads # for 16 queue
36 | except:
37 | epoch_ID = rank % (args.end_epoch - args.start_epoch + 1) + args.start_epoch
38 |
39 | if epoch_ID > args.end_epoch:
40 | continue
41 |
42 | resume = 'snapshot/checkpoint_e{}.pth'.format(epoch_ID)
43 | print('==> test {}th epoch'.format(epoch_ID))
44 | os.system('python ./tracking/test_ocean.py --arch {0} --resume {1} --dataset {2} --align {3} --epoch_test True'.format(arch, resume, dataset, args.align))
45 |
--------------------------------------------------------------------------------
/tracking/vot.py:
--------------------------------------------------------------------------------
1 | """
2 | \file vot.py
3 |
4 | @brief Python utility functions for VOT integration
5 |
6 | @author Luka Cehovin, Alessio Dore
7 |
8 | @date 2016
9 |
10 | """
11 |
12 | import sys
13 | import copy
14 | import collections
15 | import numpy as np
16 |
17 | try:
18 | import trax
19 | except ImportError:
20 | raise Exception('TraX support not found. Please add trax module to Python path.')
21 |
22 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height'])
23 | Point = collections.namedtuple('Point', ['x', 'y'])
24 | Polygon = collections.namedtuple('Polygon', ['points'])
25 |
26 | class VOT(object):
27 | """ Base class for Python VOT integration """
28 | def __init__(self, region_format, channels=None):
29 | """ Constructor
30 |
31 | Args:
32 | region_format: Region format options
33 | """
34 | assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON, trax.Region.MASK])
35 |
36 | if channels is None:
37 | channels = ['color']
38 | elif channels == 'rgbd':
39 | channels = ['color', 'depth']
40 | elif channels == 'rgbt':
41 | channels = ['color', 'ir']
42 | elif channels == 'ir':
43 | channels = ['ir']
44 | else:
45 | raise Exception('Illegal configuration {}.'.format(channels))
46 |
47 | self._trax = trax.Server([region_format], [trax.Image.PATH], channels, customMetadata=dict(vot="python"))
48 |
49 | request = self._trax.wait()
50 | assert(request.type == 'initialize')
51 | if isinstance(request.region, trax.Polygon):
52 | self._region = Polygon([Point(x[0], x[1]) for x in request.region])
53 | if isinstance(request.region, trax.Mask):
54 | self._region = request.region.array(True)
55 | else:
56 | self._region = Rectangle(*request.region.bounds())
57 | self._image = [x.path() for k, x in request.image.items()]
58 | if len(self._image) == 1:
59 | self._image = self._image[0]
60 |
61 | self._trax.status(request.region)
62 |
63 | def region(self):
64 | """
65 | Send configuration message to the client and receive the initialization
66 | region and the path of the first image
67 |
68 | Returns:
69 | initialization region
70 | """
71 |
72 | return self._region
73 |
74 | def report(self, region, confidence = None):
75 | """
76 | Report the tracking results to the client
77 |
78 | Arguments:
79 | region: region for the frame
80 | """
81 | assert(isinstance(region, (Rectangle, Polygon, np.ndarray)))
82 | if isinstance(region, Polygon):
83 | tregion = trax.Polygon.create([(x.x, x.y) for x in region.points])
84 | if isinstance(region, np.ndarray):
85 | tregion = trax.Mask.create(region)
86 | else:
87 | tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height)
88 | properties = {}
89 | if not confidence is None:
90 | properties['confidence'] = confidence
91 | self._trax.status(tregion, properties)
92 |
93 | def frame(self):
94 | """
95 | Get a frame (image path) from client
96 |
97 | Returns:
98 | absolute path of the image
99 | """
100 | if hasattr(self, "_image"):
101 | image = self._image
102 | del self._image
103 | return image
104 |
105 | request = self._trax.wait()
106 |
107 | if request.type == 'frame':
108 | image = [x.path() for k, x in request.image.items()]
109 | if len(image) == 1:
110 | return image[0]
111 | return image
112 | else:
113 | return None
114 |
115 |
116 | def quit(self):
117 | if hasattr(self, '_trax'):
118 | self._trax.quit()
119 |
120 | def __del__(self):
121 | self.quit()
122 |
123 |
--------------------------------------------------------------------------------
/tracking/vot_wrap.py:
--------------------------------------------------------------------------------
1 | import _init_paths
2 | import vot
3 | import os
4 | import cv2
5 | import sys
6 | import random
7 | import argparse
8 | import numpy as np
9 | import torch
10 | import models.models as models
11 |
12 | from os.path import exists, join, dirname, realpath
13 | from tracker.oceanplus import OceanPlus
14 | from easydict import EasyDict as edict
15 | from utils.utils import load_pretrain, cxy_wh_2_rect, get_axis_aligned_bbox, load_dataset, poly_iou
16 |
17 | from vot import Rectangle,Polygon, Point
18 |
19 |
20 | def make_full_size(x, output_sz):
21 | '''
22 | zero-pad input x (right and down) to match output_sz
23 | x: numpy array e.g., binary mask
24 | output_sz: size of the output [width, height]
25 | '''
26 | if x.shape[0] == output_sz[1] and x.shape[1] == output_sz[0]:
27 | return x
28 | pad_x = output_sz[0] - x.shape[1]
29 | if pad_x < 0:
30 | x = x[:, :x.shape[1] + pad_x]
31 | # padding has to be set to zero, otherwise pad function fails
32 | pad_x = 0
33 | pad_y = output_sz[1] - x.shape[0]
34 | if pad_y < 0:
35 | x = x[:x.shape[0] + pad_y, :]
36 | # padding has to be set to zero, otherwise pad function fails
37 | pad_y = 0
38 | return np.pad(x, ((0, pad_y), (0, pad_x)), 'constant', constant_values=0)
39 |
40 | def rect_from_mask(mask):
41 | '''
42 | create an axis-aligned rectangle from a given binary mask
43 | mask in created as a minimal rectangle containing all non-zero pixels
44 | '''
45 | x_ = np.sum(mask, axis=0)
46 | y_ = np.sum(mask, axis=1)
47 | x0 = np.min(np.nonzero(x_))
48 | x1 = np.max(np.nonzero(x_))
49 | y0 = np.min(np.nonzero(y_))
50 | y1 = np.max(np.nonzero(y_))
51 |
52 | w = x1 - x0 + 1
53 | h = y1 - y0 + 1
54 | # return [x0, y0, x1 - x0 + 1, y1 - y0 + 1]
55 | return [x0 + w/2 , y0 + h/2, w, h]
56 |
57 | def mask_from_rect(rect, output_sz):
58 | '''
59 | create a binary mask from a given rectangle
60 | rect: axis-aligned rectangle [x0, y0, width, height]
61 | output_sz: size of the output [width, height]
62 | '''
63 | mask = np.zeros((output_sz[1], output_sz[0]), dtype=np.uint8)
64 | x0 = max(int(round(rect[0])), 0)
65 | y0 = max(int(round(rect[1])), 0)
66 | x1 = min(int(round(rect[0] + rect[2])), output_sz[0])
67 | y1 = min(int(round(rect[1] + rect[3])), output_sz[1])
68 | mask[y0:y1, x0:x1] = 1
69 | return mask
70 |
71 | # define tracker
72 | info = edict()
73 | info.arch = "OceanPlus"
74 | info.dataset = "VOT2020"
75 | info.epoch_test = False
76 | info.align = False
77 | info.online = False
78 | mask_vot = True
79 |
80 | net = models.__dict__[info.arch](online=info.online, mms=False)
81 | net = load_pretrain(net, "$tracker_path/snapshot/OceanPlusMSS.pth")
82 | net.eval()
83 | net = net.cuda()
84 |
85 | # warm up
86 | print('==== warm up ====')
87 | for i in range(10):
88 | net.template(torch.rand(1, 3, 127, 127).cuda(), torch.rand(1, 127, 127).cuda())
89 | net.track(torch.rand(1, 3, 255, 255).cuda())
90 |
91 | tracker = OceanPlus(info)
92 |
93 | # vot2020 settings
94 |
95 | if mask_vot:
96 | handle = vot.VOT("mask")
97 | else:
98 | handle = vot.VOT("rectangle")
99 |
100 | image_file = handle.frame()
101 |
102 | if not image_file:
103 | sys.exit(0)
104 |
105 | im = cv2.imread(image_file) # HxWxC
106 |
107 | if mask_vot:
108 | print('the input is a binary mask')
109 | selection = handle.region()
110 | mask = make_full_size(selection, (im.shape[1], im.shape[0]))
111 | bbox = rect_from_mask(mask) # [cx,cy,w,h] TODO: use cv.minmaxRect here
112 | cx, cy, w, h = bbox
113 | else:
114 | print('the input is a rect box')
115 | selection = handle.region() # selection in ncc_mask
116 | lx, ly, w, h = selection.x, selection.y, selection.width, selection.height
117 | cx, cy = lx + w/2, ly + h/2
118 |
119 | target_pos = np.array([cx, cy])
120 | target_sz = np.array([w, h])
121 | state = tracker.init(im, target_pos, target_sz, net, mask=mask)
122 |
123 |
124 | count = 0
125 | while True:
126 | image_file = handle.frame()
127 | if not image_file:
128 | break
129 | im = cv2.imread(image_file) # HxWxC
130 | state = tracker.track(state, im)
131 | mask = state['mask']
132 | if mask is None or mask.sum() < 10:
133 | rect = cxy_wh_2_rect(state['target_pos'], state['target_sz'])
134 | mask = mask_from_rect(rect, (im.shape[1], im.shape[0]))
135 | handle.report(mask, state['cls_score'])
136 | count += 1
137 |
--------------------------------------------------------------------------------