├── LICENSE ├── README.md ├── demo ├── Ocean_overview.jpg ├── lines.jpg ├── ocean1.gif ├── oceanplu_overview.png ├── oceanplus.gif └── siamdw_overview.jpg ├── experiments ├── test │ ├── DAVIS │ │ └── OceanPlus.yaml │ ├── GOT10K │ │ └── Ocean.yaml │ ├── LASOT │ │ └── Ocean.yaml │ ├── OTB │ │ ├── Ocean.yaml │ │ └── SiamDW.yaml │ └── VOT │ │ ├── ONLINE.yaml │ │ ├── Ocean.yaml │ │ ├── OceanPlus.yaml │ │ └── SiamDW.yaml └── train │ ├── Ocean.yaml │ └── SiamDW.yaml ├── lib ├── core │ ├── config.py │ ├── config_ocean.py │ ├── config_oceanplus.py │ ├── config_siamdw.py │ ├── eval_davis.py │ ├── eval_got10k.py │ ├── eval_lasot.py │ ├── eval_otb.py │ ├── eval_visdrone.py │ ├── extract_tune_logs.py │ └── function.py ├── dataset │ ├── crop │ │ ├── DAVIS │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ └── readme.md │ │ ├── RGBT210 │ │ │ ├── RGBT210_genjson.py │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ └── readme.md │ │ ├── RGBT234 │ │ │ ├── RGBT234_genjson.py │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ └── readme.md │ │ ├── coco │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ └── readme.md │ │ ├── det │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ └── readme.md │ │ ├── got10k │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ ├── parser_got10k.py │ │ │ └── readme.md │ │ ├── lasot │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ ├── parser_lasot.py │ │ │ └── readme.md │ │ ├── vid │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ ├── parse_vid.py │ │ │ └── readme.md │ │ └── visdrone │ │ │ ├── gen_json.py │ │ │ ├── par_crop.py │ │ │ ├── parser_visdrone.py │ │ │ └── readme.md │ ├── ocean.py │ └── siamfc.py ├── eval_toolkit │ ├── bin │ │ ├── _init_paths.py │ │ └── eval.py │ ├── davis │ │ └── davis2017-evaluation │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── davis2017 │ │ │ ├── __init__.py │ │ │ ├── davis.py │ │ │ ├── davis.py.ori │ │ │ ├── evaluation.py │ │ │ ├── metrics.py │ │ │ ├── results.py │ │ │ └── utils.py │ │ │ ├── demo.sh │ │ │ ├── evaluation_codalab.py │ │ │ ├── evaluation_method.py │ │ │ ├── pytest │ │ │ └── test_evaluation.py │ │ │ ├── setup.cfg │ │ │ └── setup.py │ ├── pysot │ │ ├── __init__.py │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── dataset.py │ │ │ ├── got10k.py │ │ │ ├── lasot.py │ │ │ ├── nfs.py │ │ │ ├── otb.py │ │ │ ├── trackingnet.py │ │ │ ├── uav.py │ │ │ ├── video.py │ │ │ └── vot.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── ar_benchmark.py │ │ │ ├── eao_benchmark.py │ │ │ ├── f1_benchmark.py │ │ │ └── ope_benchmark.py │ │ ├── utils │ │ │ ├── __init__.py │ │ │ ├── build │ │ │ │ ├── temp.linux-x86_64-3.6 │ │ │ │ │ ├── region.o │ │ │ │ │ └── src │ │ │ │ │ │ └── region.o │ │ │ │ └── temp.linux-x86_64-3.7 │ │ │ │ │ ├── region.o │ │ │ │ │ └── src │ │ │ │ │ └── region.o │ │ │ ├── c_region.pxd │ │ │ ├── misc.py │ │ │ ├── region.c │ │ │ ├── region.cpython-36m-x86_64-linux-gnu.so │ │ │ ├── region.cpython-37m-x86_64-linux-gnu.so │ │ │ ├── region.pyx │ │ │ ├── setup.py │ │ │ ├── src │ │ │ │ ├── buffer.h │ │ │ │ ├── region.c │ │ │ │ └── region.h │ │ │ └── statistics.py │ │ └── visualization │ │ │ ├── __init__.py │ │ │ ├── draw_eao.py │ │ │ ├── draw_f1.py │ │ │ ├── draw_success_precision.py │ │ │ └── draw_utils.py │ └── requirements.txt ├── models │ ├── __init__.py │ ├── backbones.py │ ├── connect.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ ├── deform_conv_cuda.cpython-36m-x86_64-linux-gnu.so │ │ ├── deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so │ │ ├── deform_pool.py │ │ ├── deform_pool_cuda.cpython-36m-x86_64-linux-gnu.so │ │ ├── deform_pool_cuda.cpython-37m-x86_64-linux-gnu.so │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ ├── mask.py │ ├── models.py │ ├── modules.py │ ├── ocean.py │ ├── oceanTRT.py │ ├── oceanplus.py │ ├── online │ │ ├── __init__.py │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── resnet.py │ │ │ └── resnet18_vggm.py │ │ ├── bbreg │ │ │ ├── __init__.py │ │ │ └── iou_net.py │ │ ├── classifier │ │ │ ├── __init__.py │ │ │ ├── features.py │ │ │ ├── initializer.py │ │ │ ├── linear_filter.py │ │ │ └── optimizer.py │ │ ├── external │ │ │ └── PreciseRoIPooling │ │ │ │ ├── .gitignore │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── _assets │ │ │ │ └── prroi_visualization.png │ │ │ │ ├── pytorch │ │ │ │ ├── prroi_pool │ │ │ │ │ ├── .gitignore │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── functional.py │ │ │ │ │ ├── prroi_pool.py │ │ │ │ │ └── src │ │ │ │ │ │ ├── prroi_pooling_gpu.c │ │ │ │ │ │ ├── prroi_pooling_gpu.h │ │ │ │ │ │ ├── prroi_pooling_gpu_impl.cu │ │ │ │ │ │ └── prroi_pooling_gpu_impl.cuh │ │ │ │ └── tests │ │ │ │ │ └── test_prroi_pooling2d.py │ │ │ │ └── src │ │ │ │ ├── prroi_pooling_gpu_impl.cu │ │ │ │ └── prroi_pooling_gpu_impl.cuh │ │ └── layers │ │ │ ├── __init__.py │ │ │ ├── activation.py │ │ │ ├── blocks.py │ │ │ ├── distance.py │ │ │ ├── filter.py │ │ │ ├── normalization.py │ │ │ └── transform.py │ └── siamfc.py ├── online │ ├── __init__.py │ ├── augmentation.py │ ├── base_actor.py │ ├── base_trainer.py │ ├── complex.py │ ├── dcf.py │ ├── extractor.py │ ├── fourier.py │ ├── loading.py │ ├── ltr_trainer.py │ ├── model_constructor.py │ ├── operation.py │ ├── optim.py │ ├── optimization.py │ ├── preprocessing.py │ ├── tensordict.py │ ├── tensorlist.py │ └── tracking.py ├── tracker │ ├── ocean.py │ ├── oceanplus.py │ ├── online.py │ └── siamfc.py ├── tutorial │ ├── Ocean │ │ └── ocean.md │ ├── OceanPlus │ │ └── oceanplus.md │ ├── SiamDW │ │ └── siamdw.md │ ├── install.sh │ └── install_trt.md ├── utils │ ├── __init__.py │ ├── cutout.py │ ├── extract_tpejson_fc.py │ ├── extract_tpejson_ocean.py │ ├── extract_tpelog.py │ ├── extract_tpelog_fc.py │ ├── utils.py │ └── watch_tpe.sh └── version.py ├── setup.py └── tracking ├── _init_paths.py ├── onekey.py ├── run_video.py ├── test_epochs.py ├── test_ocean.py ├── test_oceanplus.py ├── test_siamdw.py ├── train_ocean.py ├── train_siamdw.py ├── tune_tpe.py ├── vot.py ├── vot_wrap.py └── vot_wrap_mms.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 eccv2020 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /demo/Ocean_overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/Ocean_overview.jpg -------------------------------------------------------------------------------- /demo/lines.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/lines.jpg -------------------------------------------------------------------------------- /demo/ocean1.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/ocean1.gif -------------------------------------------------------------------------------- /demo/oceanplu_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/oceanplu_overview.png -------------------------------------------------------------------------------- /demo/oceanplus.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/oceanplus.gif -------------------------------------------------------------------------------- /demo/siamdw_overview.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/demo/siamdw_overview.jpg -------------------------------------------------------------------------------- /experiments/test/DAVIS/OceanPlus.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | DAVIS2016: 3 | penalty_k: 0.032 4 | lr: 0.98 5 | window_influence: 0.45 6 | small_sz: 255 7 | big_sz: 287 8 | seg_thr: 0.84 9 | DAVIS2017: 10 | penalty_k: 0.031 11 | lr: 1 12 | window_influence: 0.35 13 | small_sz: 255 14 | big_sz: 287 15 | seg_thr: 0.84 16 | -------------------------------------------------------------------------------- /experiments/test/GOT10K/Ocean.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | GOT10K: 3 | penalty_k: 0.022 4 | lr: 0.799 5 | window_influence: 0.118 6 | small_sz: 255 7 | big_sz: 255 8 | 9 | 10 | -------------------------------------------------------------------------------- /experiments/test/LASOT/Ocean.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | LASOT: 3 | penalty_k: 0.11 4 | lr: 0.7 5 | window_influence: 0.20 6 | small_sz: 255 7 | big_sz: 255 8 | 9 | -------------------------------------------------------------------------------- /experiments/test/OTB/Ocean.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | OTB2015: 3 | penalty_k: 0.087 4 | lr: 0.408 5 | window_influence: 0.366 6 | small_sz: 271 7 | big_sz: 271 8 | 9 | -------------------------------------------------------------------------------- /experiments/test/OTB/SiamDW.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | OTB2013: 3 | scale_step: 1.0482 4 | scale_lr: 0.3629 5 | scale_penalty: 0.9997 6 | w_influence: 0.3896 7 | OTB2015: 8 | scale_step: 1.1897 9 | scale_lr: 0.2226 10 | scale_penalty: 0.9370 11 | w_influence: 0.2897 -------------------------------------------------------------------------------- /experiments/test/VOT/ONLINE.yaml: -------------------------------------------------------------------------------- 1 | ## BASE: 2 | # Patch sampling parameters 3 | image_sample_size: 14 * 16 # Maximum image sample size 4 | search_area_scale: 4 # Scale relative to target size 5 | vot_anno_conversion_type: 'preserve_area' 6 | use_gpu: True 7 | debug: 0 8 | 9 | update_classifier: True 10 | net_opt_iter: 25 11 | net_opt_update_iter: 3 12 | net_opt_hn_iter: 3 13 | 14 | 15 | # Training parameters 16 | sample_memory_size: 250 # Memory size 17 | train_skipping: 10 # How often to run training (every n-th frame) 18 | init_samples_minimum_weight: 0.0 19 | 20 | # Windowing 21 | window_output: True # Perform windowing of output scores 22 | 23 | # Detection parameters 24 | scale_factors: torch.ones(1) # What scales to use for localization (only one scale if IoUNet is used) 25 | score_upsample_factor: 1 # How much Fourier upsampling to use 26 | 27 | 28 | # Learning parameters for each feature type 29 | learning_rate: 0.0075 # Learning rate 30 | use_augmentation: True # Whether to use augmentation for this feature 31 | 32 | 33 | 34 | ## DATA: 35 | # Init data augmentation parameters 36 | augmentation: 37 | fliplr: True 38 | rotate: [5, -5, 10, -10, 20, -20, 30, -30, 45,-45, -60, 60] 39 | blur: '[(2, 0.2), (0.2, 2), (3,1), (1, 3), (2, 2)]' 40 | relativeshift: '[(0.6, 0.6), (-0.6, 0.6), (0.6, -0.6), (-0.6,-0.6)]' 41 | dropout: '(7, 0.2)' 42 | 43 | augmentation_expansion_factor: 2 # How much to expand sample when doing augmentation 44 | random_shift_factor: 1 / 3 # How much random shift to do on each augmented sample 45 | 46 | 47 | 48 | ##REFINE: 49 | # Advanced localization parameters 50 | use_iou_net: False 51 | advanced_localization: True # Use this or not 52 | target_not_found_threshold: 0.0 # Absolute score threshold to detect target missing 53 | distractor_threshold: 100 # Relative threshold to find distractors 54 | hard_negative_threshold: 0.45 # Relative threshold to find hard negative samples 55 | target_neighborhood_scale: 2.2 # Target neighborhood to remove 56 | dispalcement_scale: 0.7 # Dispacement to consider for distractors 57 | hard_negative_learning_rate: 0.02 # Learning rate if hard negative detected 58 | update_scale_when_uncertain: True # Update scale or not if distractor is close 59 | 60 | # IoUNet parameters 61 | iounet_augmentation: False # Use the augmented samples to compute the modulation vector 62 | iounet_use_log_scale: True 63 | iounet_k: 3 # Top-k average to estimate final box 64 | num_init_random_boxes: 9 # Num extra random boxes in addition to the classifier prediction 65 | box_jitter_pos: 0.1 # How much to jitter the translation for random boxes 66 | box_jitter_sz: 0.5 # How much to jitter the scale for random boxes 67 | maximal_aspect_ratio: 6 # Limit on the aspect ratio 68 | box_refinement_iter: 5 # Number of iterations for refining the boxes 69 | box_refinement_step_length: 1 # Gradient step length in the bounding box refinement 70 | box_refinement_step_decay: 1 # Multiplicative step length decay (1 means no decay) 71 | -------------------------------------------------------------------------------- /experiments/test/VOT/Ocean.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | VOT2018: # 0.467 3 | penalty_k: 0.021 4 | lr: 0.730 5 | window_influence: 0.321 6 | small_sz: 255 7 | big_sz: 271 8 | ratio: 0.93 9 | VOT2019: # 0.330 (slight higher than paper. small fluctuation is noramal) 10 | penalty_k: 0.062 11 | lr: 0.765 12 | window_influence: 0.380 13 | small_sz: 255 14 | big_sz: 271 15 | ratio: 0.94 16 | VOT2018ON: # also for NOOA version (0.438 for NOOA) 17 | penalty_k: 0.187 18 | lr: 0.800 19 | window_influence: 0.640 20 | small_sz: 271 21 | big_sz: 287 22 | online_ratio: 0.5 23 | VOT2019ON: # also for NOOA version (0.323 for NOOA) 24 | penalty_k: 0.06 25 | lr: 0.644 26 | window_influence: 0.484 27 | small_sz: 255 28 | big_sz: 287 29 | online_ratio: 0.7 30 | VOT2020: # ocean-online/oceanplus all (you may tune these params on VOT2020 to get better results) 31 | penalty_k: 0.06 32 | lr: 0.644 33 | window_influence: 0.484 34 | small_sz: 255 35 | big_sz: 287 36 | online_ratio: 0.7 37 | -------------------------------------------------------------------------------- /experiments/test/VOT/OceanPlus.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | VOT2020: 3 | penalty_k: 0.06 4 | lr: 0.644 5 | window_influence: 0.484 6 | small_sz: 255 7 | big_sz: 287 8 | seg_thr: 0.9 9 | online_ratio: 0.9 10 | DAVIS2016: 11 | penalty_k: 0.032 12 | lr: 0.98 13 | window_influence: 0.45 14 | small_sz: 255 15 | big_sz: 287 16 | seg_thr: 0.84 17 | DAVIS2017: 18 | penalty_k: 0.031 19 | lr: 1 20 | window_influence: 0.35 21 | small_sz: 255 22 | big_sz: 287 23 | seg_thr: 0.84 24 | -------------------------------------------------------------------------------- /experiments/test/VOT/SiamDW.yaml: -------------------------------------------------------------------------------- 1 | TEST: 2 | VOT2015: 3 | scale_step: 1.1190 4 | scale_lr: 0.4373 5 | scale_penalty: 0.9811 6 | w_influence: 0.2569 7 | VOT2016: 8 | scale_step: 1.1535 9 | scale_lr: 0.4596 10 | scale_penalty: 0.9259 11 | w_influence: 0.3309 12 | VOT2017: 13 | scale_step: 1.1466 14 | scale_lr: 0.2061 15 | scale_penalty: 0.9994 16 | w_influence: 0.3242 -------------------------------------------------------------------------------- /experiments/train/Ocean.yaml: -------------------------------------------------------------------------------- 1 | OCEAN: 2 | GPUS: '0,1,2,3,4,5,6,7' 3 | PRINT_FREQ: 10 4 | WORKERS: 32 5 | OUTPUT_DIR: 'logs' # log file 6 | CHECKPOINT_DIR: 'snapshot' # checkpoint file 7 | 8 | TRAIN: 9 | ISTRUE: True # whether to test 10 | MODEL: "Ocean" 11 | ALIGN: True # object aware branch 12 | START_EPOCH: 0 13 | END_EPOCH: 50 14 | TEMPLATE_SIZE: 127 15 | SEARCH_SIZE: 255 16 | BATCH: 32 17 | STRIDE: 8 18 | RESUME: False 19 | PRETRAIN: 'pretrain.model' 20 | LR_POLICY: 'log' 21 | 22 | WARMUP: 23 | IFNOT: True 24 | TYPE: 'step' 25 | EPOCH: 5 # res50 5 26 | KWARGS: 27 | start_lr: 0.001 # res50 0.001 alex: 0.005 28 | end_lr: 0.005 # res50 0.005 alex: 0.01 29 | step: 1 30 | LR: 31 | TYPE: 'log' 32 | KWARGS: 33 | start_lr: 0.005 # res50 0.005 alex 0.01 34 | end_lr: 0.00001 # res50 0.0005 35 | 36 | 37 | LAYERS_LR: 0.1 # res50 0.1 alex: 1 # scale ration for backbone 38 | BASE_LR: 0.005 39 | UNFIX_EPOCH: 10 40 | WARM_POLICY: 'step' 41 | UNFIX_POLICY: 'log' 42 | MOMENTUM: 0.9 43 | WEIGHT_DECAY: 0.0001 44 | TRAINABLE_LAYER: ['layer1', 'layer2', 'layer3'] # ['layer2', 'layer3', 'layer4'] 45 | WHICH_USE: ['YTB', 'VID', 'COCO', 'DET', 'GOT10K'] 46 | 47 | TEST: # TEST model is same as TRAIN.MODEL 48 | ISTRUE: False # whether to test 49 | THREADS: 16 # multi threads test 50 | DATA: 'VOT2019' 51 | START_EPOCH: 30 52 | END_EPOCH: 50 53 | RGBTSPLIT: None # None for main channel, 'RGB' and 'T' for RGBT 54 | TUNE: # TUNE model is same as TRAIN.MODEL 55 | ISTRUE: False # whether to tune 56 | DATA: 'VOT2019' 57 | METHOD: 'TPE' 58 | RGBTSPLT: None 59 | DATASET: 60 | SHIFT: 4 61 | SCALE: 0.05 62 | COLOR: 1 63 | FLIP: 0 64 | BLUR: 0 65 | ROTATION: 0 66 | LABELSMOOTH: False 67 | MIXUP: 0 68 | GRAY: 0 69 | CUTOUT: 0 70 | 71 | SHIFTs: 64 72 | SCALEs: 0.18 73 | 74 | VID: 75 | PATH: './data/vid/crop511' 76 | ANNOTATION: './data/vid/train.json' 77 | RANGE: 100 78 | USE: 110000 79 | YTB: 80 | PATH: './data/y2b/crop511' 81 | ANNOTATION: './data/y2b/train.json' 82 | RANGE: 3 83 | USE: 210000 84 | GOT10K: 85 | PATH: './data/got10k/crop511' 86 | ANNOTATION: './data/got10k/all.json' 87 | RANGE: 100 88 | USE: 160000 89 | DET: 90 | PATH: './data/det/crop511' 91 | ANNOTATION: './data/det/train.json' 92 | RANGE: 100 93 | USE: 60000 94 | COCO: 95 | PATH: "./data/coco/crop511" 96 | ANNOTATION: "./data/coco/train2017.json" 97 | RANGE: 1 98 | USE: 60000 99 | 100 | -------------------------------------------------------------------------------- /experiments/train/SiamDW.yaml: -------------------------------------------------------------------------------- 1 | SIAMFC: 2 | GPUS: '0,1,2,3' 3 | PRINT_FREQ: 10 4 | WORKERS: 32 5 | OUTPUT_DIR: 'logs' # log file 6 | CHECKPOINT_DIR: 'snapshot' # checkpoint file 7 | 8 | TRAIN: 9 | ISTRUE: True # whether to train 10 | MODEL: "SiamDW" # SiamFCIncep22, SiamFCNext22 11 | START_EPOCH: 0 12 | END_EPOCH: 50 13 | TEMPLATE_SIZE: 127 14 | SEARCH_SIZE: 255 15 | STRIDE: 8 16 | PAIRS: 200000 17 | PRETRAIN: 'pretrain.model' 18 | LR_POLICY: 'log' 19 | LR: 0.01 20 | LR_END: 0.00001 21 | MOMENTUM: 0.9 22 | WEIGHT_DECAY: 0.0001 23 | WHICH_USE: 'GOT10K' # VID or 'GOT10K' 24 | TEST: # TEST model is same as TRAIN.MODEL 25 | ISTRUE: True # whether to test 26 | THREADS: 16 # multi threads test 27 | DATA: 'VOT2017' 28 | START_EPOCH: 30 29 | END_EPOCH: 50 30 | TUNE: # TUNE model is same as TRAIN.MODEL 31 | ISTRUE: False # whether to tune 32 | DATA: 'VOT2017' 33 | METHOD: 'TPE' 34 | DATASET: 35 | SHIFT: 4 36 | SCALE: 0.05 37 | COLOR: 1 38 | FLIP: 0 39 | BLUR: 0 40 | ROTATION: 0 41 | GOT10K: 42 | PATH: './data/got10k/crop511' 43 | ANNOTATION: './data/got10k/train.json' 44 | -------------------------------------------------------------------------------- /lib/core/eval_davis.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Zhipeng Zhang (zhangzhipeng2017@ia.ac.cn) 5 | # multi-gpu test for epochs 6 | # ------------------------------------------------------------------------------ 7 | 8 | import os 9 | import time 10 | import argparse 11 | import numpy as np 12 | from os import listdir 13 | from os.path import join, exists 14 | from concurrent import futures 15 | 16 | parser = argparse.ArgumentParser(description='multi-gpu test all epochs') 17 | parser.add_argument('--dataset', default='DAVIS2016', type=str, help='benchmarks') 18 | parser.add_argument('--num_threads', default=16, type=int, help='number of threads') 19 | parser.add_argument('--datapath', default='dataset/DAVIS', type=str, help='benchmarks') 20 | args = parser.parse_args() 21 | 22 | 23 | def eval_davis(epoch): 24 | year = args.dataset[5:] 25 | full_path = join('result', args.dataset, epoch) 26 | os.system('python lib/eval_toolkit/davis/davis2017-evaluation/evaluation_method.py --task semi-supervised --results_path {0} --davis_path {1} --year {2}'.format(full_path, args.datapath, year)) 27 | 28 | 29 | def extract_davis(epochs): 30 | # J&F-Mean,J-Mean,J-Recall,J-Decay,F-Mean,F-Recall,F-Decay 31 | results = dict() 32 | print('\t \tJ&F-Mean,J-Mean,J-Recall,J-Decay,F-Mean,F-Recall,F-Decay') 33 | 34 | JFm = [] 35 | Jm = [] 36 | Jr = [] 37 | Jd = [] 38 | Fm = [] 39 | Fr = [] 40 | Fd = [] 41 | 42 | for e in epochs: 43 | results[e] = dict() 44 | full_path = join('result', args.dataset, e, 'global_results-val.csv') 45 | record = open(full_path, 'r').readlines() 46 | record = eval(record[1]) 47 | print('{} {} {} {} {} {} {} {}'.format(e, record[0], record[1], record[2], record[3], record[4], record[5], record[6])) 48 | 49 | JFm.append(record[0]) 50 | Jm.append(record[1]) 51 | Jr.append(record[2]) 52 | Jd.append(record[3]) 53 | Fm.append(record[4]) 54 | Fr.append(record[5]) 55 | Fd.append(record[6]) 56 | print('=========> sort with J&F: <===========') 57 | argidx = np.argmax(np.array(JFm)) 58 | print('{} {} {} {} {} {} {} {}'.format(epochs[argidx], JFm[argidx], Jm[argidx], Jr[argidx], Jd[argidx], Fm[argidx], Fr[argidx], Fd[argidx])) 59 | print('=========> sort with Jm: <===========') 60 | argidx = np.argmax(np.array(Jm)) 61 | print('{} {} {} {} {} {} {} {}'.format(epochs[argidx], JFm[argidx], Jm[argidx], Jr[argidx], Jd[argidx], Fm[argidx], Fr[argidx], Fd[argidx])) 62 | 63 | 64 | base_path = join('result', args.dataset) 65 | epochs = listdir(base_path) 66 | print('total {} epochs'.format(len(epochs))) 67 | 68 | # multi-process evaluation 69 | if args.dataset in ['DAVIS2016', 'DAVIS2017']: 70 | with futures.ProcessPoolExecutor(max_workers=args.num_threads) as executor: 71 | fs = [executor.submit(eval_davis, e) for e in epochs] 72 | print('done') 73 | extract_davis(epochs) 74 | else: 75 | raise ValueError('not supported data') 76 | -------------------------------------------------------------------------------- /lib/core/extract_tune_logs.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import argparse 3 | import numpy as np 4 | 5 | 6 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results') 7 | parser.add_argument('--path', default='logs/tpe_tune_rpn.log', help='tune result path') 8 | parser.add_argument('--dataset', default='VOT2018', help='test dataset') 9 | parser.add_argument('--save_path', default='logs', help='log file save path') 10 | 11 | 12 | def collect_results(args): 13 | if not args.path.endswith('txt'): 14 | name = args.path.split('.')[0] 15 | name = name + '.txt' 16 | shutil.copy(args.path, name) 17 | args.path = name 18 | fin = open(args.path, 'r') 19 | lines = fin.readlines() 20 | penalty_k = [] 21 | scale_lr = [] 22 | wi = [] 23 | sz = [] 24 | bz = [] 25 | eao = [] 26 | count = 0 # total numbers 27 | 28 | for line in lines: 29 | if not line.startswith('penalty_k'): 30 | pass 31 | else: 32 | # print(line) 33 | count += 1 34 | temp0, temp1, temp2, temp3, temp4, temp5 = line.split(',') 35 | penalty_k.append(float(temp0.split(': ')[-1])) 36 | scale_lr.append(float(temp1.split(': ')[-1])) 37 | wi.append(float(temp2.split(': ')[-1])) 38 | sz.append(float(temp3.split(': ')[-1])) 39 | bz.append(float(temp4.split(': ')[-1])) 40 | eao.append(float(temp5.split(': ')[-1])) 41 | 42 | # find max 43 | eao = np.array(eao) 44 | max_idx = np.argmax(eao) 45 | max_eao = eao[max_idx] 46 | print('{} params group have been tested'.format(count)) 47 | print('penalty_k: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, small_sz: {}, big_sz: {}, auc: {}'.format(penalty_k[max_idx], scale_lr[max_idx], wi[max_idx], sz[max_idx], bz[max_idx], max_eao)) 48 | 49 | 50 | if __name__ == '__main__': 51 | args = parser.parse_args() 52 | collect_results(args) 53 | -------------------------------------------------------------------------------- /lib/dataset/crop/DAVIS/gen_json.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # processing DAVIS train 3 | # -------------------------------------------------------- 4 | from os.path import join 5 | import json 6 | import os 7 | import cv2 8 | import pdb 9 | import numpy as np 10 | import pdb 11 | from PIL import Image 12 | 13 | data_dir = '/home/zpzhang/data/testing/DAVIS-trainval' 14 | saveDir = '/home/zpzhang/data/training/DAVIS' 15 | 16 | dataset = dict() 17 | train_txt = join(data_dir, 'ImageSets/2017', 'train.txt') 18 | videos = open(train_txt, 'r').readlines() 19 | n_videos = len(videos) 20 | 21 | for iidx, video_name in enumerate(videos): 22 | video_name = video_name[:-1] 23 | 24 | print('video id: {:04d} / {:04d}'.format(iidx, n_videos)) 25 | try: 26 | imgs = sorted(os.listdir(join(data_dir, 'JPEGImages/480p', video_name))) 27 | except: 28 | continue 29 | dataset[video_name] = dict() 30 | 31 | for idx, im_name in enumerate(imgs): 32 | mask_path = join(data_dir, 'Annotations/480p', video_name, im_name.replace('.jpg', '.png')) 33 | mask = np.array(Image.open(mask_path)).astype(np.uint8) 34 | objects = np.unique(mask) 35 | 36 | for track_id in range(1, len(objects)): 37 | color = objects[track_id] 38 | mask_temp = (mask == color).astype(np.uint8) * 255 39 | x, y, w, h = cv2.boundingRect(mask_temp) 40 | bbox = [x, y, x + w - 1, y + h - 1] # [x1,y1,x2,y2] 41 | if w <= 0 or h <= 0: # lead nan error in cls. 42 | continue 43 | 44 | if '{:02d}'.format(track_id - 1) not in dataset[video_name].keys(): 45 | dataset[video_name]['{:02d}'.format(track_id - 1)] = dict() 46 | dataset[video_name]['{:02d}'.format(track_id-1)]['{:06d}'.format(int(im_name.split('.')[0]))] = bbox 47 | print('save json (dataset), please wait 20 seconds~') 48 | save_path = join(saveDir, 'davis.json') 49 | json.dump(dataset, open(save_path, 'w'), indent=4, sort_keys=True) 50 | print('done!') 51 | 52 | -------------------------------------------------------------------------------- /lib/dataset/crop/DAVIS/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing DAVIS 2 | 3 | ````shell 4 | python par_crop.py --enable_mask --num_threads 24 5 | python gen_json.py 6 | ```` 7 | -------------------------------------------------------------------------------- /lib/dataset/crop/RGBT210/RGBT210_genjson.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | # __author__ = 'zzp' 4 | 5 | import json 6 | import numpy as np 7 | from os import listdir 8 | from os.path import join 9 | 10 | basepath = '/data/share/RGBT210/' 11 | save = dict() 12 | 13 | 14 | def genjson(): 15 | videos = listdir(basepath) 16 | 17 | for v in videos: 18 | save[v] = dict() 19 | save[v]['name'] = v # video name 20 | 21 | # save img names 22 | v_in_path = join(basepath, v, 'infrared') 23 | v_rgb_path = join(basepath, v, 'visible') 24 | temp1 = listdir(v_in_path) 25 | temp2 = listdir(v_rgb_path) 26 | temp1.sort() 27 | temp2.sort() 28 | save[v]['infrared_imgs'] = temp1 # infrared file names 29 | save[v]['visible_imgs'] = temp2 # infrared file names 30 | 31 | # read gt 32 | v_in_gt_path = join(basepath, v, 'init.txt') 33 | v_rgb_gt_path = join(basepath, v, 'init.txt') 34 | v_in_gts = np.loadtxt(v_in_gt_path, delimiter=',') 35 | v_rgb_gts = np.loadtxt(v_rgb_gt_path, delimiter=',') 36 | 37 | v_in_gts[:, 0:2] = v_in_gts[:, 0:2] - 1 # to python 0 index 38 | v_rgb_gts[:, 0:2] = v_rgb_gts[:, 0:2] - 1 # to python 0 index 39 | 40 | v_in_init = v_in_gts[0] 41 | v_rgb_init = v_rgb_gts[0] 42 | 43 | # save int and gt 44 | save[v]['infrared_init'] = v_in_init.tolist() 45 | save[v]['visible_init'] = v_rgb_init.tolist() 46 | save[v]['infrared_gt'] = v_in_gts.tolist() 47 | save[v]['visible_gt'] = v_rgb_gts.tolist() 48 | 49 | json.dump(save, open('/data/zpzhang/datasets/dataset/RGBT210.json', 'w'), indent=4, sort_keys=True) 50 | 51 | 52 | if __name__ == '__main__': 53 | genjson() 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /lib/dataset/crop/RGBT210/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import cv2 5 | import numpy as np 6 | from pprint import pprint 7 | 8 | print('loading json (raw RGBT234 info), please wait 20 seconds~') 9 | RGBT210 = json.load(open('/data/zpzhang/datasets/dataset/RGBT210.json', 'r')) 10 | RGBT210_base_path = '/data/share/RGBT210' 11 | 12 | def check_size(frame_sz, bbox): 13 | min_ratio = 0.1 14 | max_ratio = 0.75 15 | # only accept objects >10% and <75% of the total frame 16 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 17 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 18 | return ok 19 | 20 | 21 | def check_borders(frame_sz, bbox): 22 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 23 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 24 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 25 | ((frame_sz[1] - bbox[3]) > dist_from_border) 26 | return ok 27 | 28 | 29 | snippets = dict() 30 | 31 | n_videos = 0 32 | 33 | 34 | for v_name in list(RGBT210.keys()): 35 | video = RGBT210[v_name] 36 | n_videos += 1 37 | in_frames = video['infrared_imgs'] 38 | rgb_frames = video['visible_imgs'] 39 | snippet = dict() 40 | snippets[video['name']] = dict() 41 | 42 | # read a image to get im size 43 | im_temp_path = join(RGBT210_base_path, video['name'], 'visible', rgb_frames[0]) 44 | im_temp = cv2.imread(im_temp_path) 45 | frame_sz = [im_temp.shape[1], im_temp.shape[0]] 46 | 47 | in_gts = video['infrared_gt'] 48 | rgb_gts = video['visible_gt'] 49 | 50 | for f, in_frame in enumerate(in_frames): 51 | in_bbox = in_gts[f] # (x,y,w,h) 52 | rgb_bbox = rgb_gts[f] # (x,y,w,h) 53 | 54 | bboxs = [[in_bbox[0], in_bbox[1], in_bbox[0]+in_bbox[2], in_bbox[1]+in_bbox[3]], 55 | [rgb_bbox[0], rgb_bbox[1], rgb_bbox[0]+rgb_bbox[2], rgb_bbox[1]+rgb_bbox[3]]] #(xmin, ymin, xmax, ymax) 56 | 57 | imgs = [in_frames[f], rgb_frames[f]] # image name may be different in visible and rgb imgs 58 | 59 | snippet['{:06d}'.format(f)] = [imgs, bboxs] 60 | 61 | snippets[video['name']]['{:02d}'.format(0)] = snippet.copy() 62 | 63 | json.dump(snippets, open('/data/share/SMALLSIAM/RGBT210/all.json', 'w'), indent=4, sort_keys=True) 64 | print('done!') 65 | -------------------------------------------------------------------------------- /lib/dataset/crop/RGBT210/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing RGBT234 (train and val) 2 | 3 | 4 | ### Crop & Generate data info (20 min) 5 | 6 | ````sh 7 | python RGBT234_genjson.py 8 | python par_crop.py 511 24 9 | python gen_json.py 10 | ```` 11 | -------------------------------------------------------------------------------- /lib/dataset/crop/RGBT234/RGBT234_genjson.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | # __author__ = 'zzp' 4 | 5 | import json 6 | import numpy as np 7 | from os import listdir 8 | from os.path import join 9 | 10 | basepath = '/data/zpzhang/datasets/dataset/RGBT234/' 11 | save = dict() 12 | 13 | 14 | def genjson(): 15 | videos = listdir(basepath) 16 | 17 | for v in videos: 18 | save[v] = dict() 19 | save[v]['name'] = v # video name 20 | 21 | # save img names 22 | v_in_path = join(basepath, v, 'infrared') 23 | v_rgb_path = join(basepath, v, 'visible') 24 | temp1 = listdir(v_in_path) 25 | temp2 = listdir(v_rgb_path) 26 | temp1.sort() 27 | temp2.sort() 28 | save[v]['infrared_imgs'] = temp1 # infrared file names 29 | save[v]['visible_imgs'] = temp2 # infrared file names 30 | 31 | # read gt 32 | v_in_gt_path = join(basepath, v, 'infrared.txt') 33 | v_rgb_gt_path = join(basepath, v, 'visible.txt') 34 | v_in_gts = np.loadtxt(v_in_gt_path, delimiter=',') 35 | v_rgb_gts = np.loadtxt(v_rgb_gt_path, delimiter=',') 36 | 37 | v_in_gts[:, 0:2] = v_in_gts[:, 0:2] - 1 # to python 0 index 38 | v_rgb_gts[:, 0:2] = v_rgb_gts[:, 0:2] - 1 # to python 0 index 39 | 40 | v_in_init = v_in_gts[0] 41 | v_rgb_init = v_rgb_gts[0] 42 | 43 | # save int and gt 44 | save[v]['infrared_init'] = v_in_init.tolist() 45 | save[v]['visible_init'] = v_rgb_init.tolist() 46 | save[v]['infrared_gt'] = v_in_gts.tolist() 47 | save[v]['visible_gt'] = v_rgb_gts.tolist() 48 | 49 | json.dump(save, open('/data/zpzhang/datasets/dataset/RGBT234.json', 'w'), indent=4, sort_keys=True) 50 | 51 | 52 | if __name__ == '__main__': 53 | genjson() 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /lib/dataset/crop/RGBT234/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import cv2 5 | import numpy as np 6 | from pprint import pprint 7 | 8 | print('loading json (raw RGBT234 info), please wait 20 seconds~') 9 | RGBT234 = json.load(open('RGBT234.json', 'r')) 10 | RGBT234_base_path = '/data/zpzhang/datasets/dataset/RGBT234' 11 | 12 | def check_size(frame_sz, bbox): 13 | min_ratio = 0.1 14 | max_ratio = 0.75 15 | # only accept objects >10% and <75% of the total frame 16 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 17 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 18 | return ok 19 | 20 | 21 | def check_borders(frame_sz, bbox): 22 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 23 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 24 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 25 | ((frame_sz[1] - bbox[3]) > dist_from_border) 26 | return ok 27 | 28 | 29 | snippets = dict() 30 | 31 | n_videos = 0 32 | 33 | 34 | for v_name in list(RGBT234.keys()): 35 | video = RGBT234[v_name] 36 | n_videos += 1 37 | in_frames = video['infrared_imgs'] 38 | rgb_frames = video['visible_imgs'] 39 | snippet = dict() 40 | snippets[video['name']] = dict() 41 | 42 | # read a image to get im size 43 | im_temp_path = join(RGBT234_base_path, video['name'], 'visible', rgb_frames[0]) 44 | im_temp = cv2.imread(im_temp_path) 45 | frame_sz = [im_temp.shape[1], im_temp.shape[0]] 46 | 47 | in_gts = video['infrared_gt'] 48 | rgb_gts = video['visible_gt'] 49 | 50 | for f, in_frame in enumerate(in_frames): 51 | in_bbox = in_gts[f] # (x,y,w,h) 52 | rgb_bbox = rgb_gts[f] # (x,y,w,h) 53 | 54 | bboxs = [[in_bbox[0], in_bbox[1], in_bbox[0]+in_bbox[2], in_bbox[1]+in_bbox[3]], 55 | [rgb_bbox[0], rgb_bbox[1], rgb_bbox[0]+rgb_bbox[2], rgb_bbox[1]+rgb_bbox[3]]] #(xmin, ymin, xmax, ymax) 56 | 57 | imgs = [in_frames[f], rgb_frames[f]] # image name may be different in visible and rgb imgs 58 | 59 | snippet['{:06d}'.format(f)] = [imgs, bboxs] 60 | 61 | snippets[video['name']]['{:02d}'.format(0)] = snippet.copy() 62 | 63 | json.dump(snippets, open('/data/share/SMALLSIAM/RGBT234/all.json', 'w'), indent=4, sort_keys=True) 64 | print('done!') 65 | -------------------------------------------------------------------------------- /lib/dataset/crop/RGBT234/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing RGBT234 (train and val) 2 | 3 | 4 | ### Crop & Generate data info (20 min) 5 | 6 | ````sh 7 | python RGBT234_genjson.py 8 | python par_crop.py 511 24 9 | python gen_json.py 10 | ```` 11 | -------------------------------------------------------------------------------- /lib/dataset/crop/coco/gen_json.py: -------------------------------------------------------------------------------- 1 | from pycocotools.coco import COCO 2 | from os.path import join 3 | import json 4 | import os 5 | 6 | 7 | dataDir = '/data/home/hopeng/msralab_IMG/Users/hopeng/data_official/coco' 8 | #'/data/share/coco' 9 | for dataType in ['val2017', 'train2017']: 10 | dataset = dict() 11 | annFile = '{}/annotations/instances_{}.json'.format(dataDir,dataType) 12 | coco = COCO(annFile) 13 | n_imgs = len(coco.imgs) 14 | for n, img_id in enumerate(coco.imgs): 15 | print('subset: {} image id: {:04d} / {:04d}'.format(dataType, n, n_imgs)) 16 | img = coco.loadImgs(img_id)[0] 17 | annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None) 18 | anns = coco.loadAnns(annIds) 19 | video_crop_base_path = join(dataType, img['file_name'].split('/')[-1].split('.')[0]) 20 | 21 | if len(anns) > 0: 22 | dataset[video_crop_base_path] = dict() 23 | 24 | for trackid, ann in enumerate(anns): 25 | rect = ann['bbox'] 26 | c = ann['category_id'] 27 | bbox = [rect[0], rect[1], rect[0]+rect[2], rect[1]+rect[3]] 28 | if rect[2] <= 0 or rect[3] <= 0: # lead nan error in cls. 29 | continue 30 | dataset[video_crop_base_path]['{:02d}'.format(trackid)] = {'000000': bbox} 31 | 32 | print('save json (dataset), please wait 20 seconds~') 33 | #json.dump(dataset, open('{}.json'.format(dataType), 'w'), indent=4, sort_keys=True) 34 | json.dump(dataset, open('{}.json'.format(os.path.join(dataDir, dataType)), 'w'), indent=4, sort_keys=True) 35 | print('done!') 36 | 37 | -------------------------------------------------------------------------------- /lib/dataset/crop/coco/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing COCO 2 | 3 | ### Download raw images and annotations 4 | 5 | ````shell 6 | wget http://images.cocodataset.org/zips/train2017.zip 7 | wget http://images.cocodataset.org/zips/val2017.zip 8 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 9 | 10 | unzip ./train2017.zip 11 | unzip ./val2017.zip 12 | unzip ./annotations_trainval2017.zip 13 | cd pycocotools && make && cd .. 14 | ```` 15 | 16 | ### Crop & Generate data info (10 min) 17 | 18 | ````shell 19 | #python par_crop.py [data_path] [crop_size] [num_threads] 20 | python par_crop.py /data/share/coco 511 12 21 | python gen_json.py 22 | ```` 23 | 24 | Code are modified from SiamMask. 25 | -------------------------------------------------------------------------------- /lib/dataset/crop/det/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join, isdir 2 | from os import mkdir 3 | import glob 4 | import xml.etree.ElementTree as ET 5 | import json 6 | 7 | js = {} 8 | #VID_base_path = '/data/share/ILSVRC' 9 | VID_base_path = '/data/home/hopeng/data_local/ILSVRC2015' 10 | ann_base_path = join(VID_base_path, 'Annotations/DET/train/') 11 | sub_sets = ('a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i') 12 | for sub_set in sub_sets: 13 | sub_set_base_path = join(ann_base_path, sub_set) 14 | 15 | if 'a' == sub_set: 16 | xmls = sorted(glob.glob(join(sub_set_base_path, '*', '*.xml'))) 17 | else: 18 | xmls = sorted(glob.glob(join(sub_set_base_path, '*.xml'))) 19 | n_imgs = len(xmls) 20 | for f, xml in enumerate(xmls): 21 | print('subset: {} frame id: {:08d} / {:08d}'.format(sub_set, f, n_imgs)) 22 | xmltree = ET.parse(xml) 23 | objects = xmltree.findall('object') 24 | 25 | video = join(sub_set, xml.split('/')[-1].split('.')[0]) 26 | 27 | for id, object_iter in enumerate(objects): 28 | bndbox = object_iter.find('bndbox') 29 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 30 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 31 | frame = '%06d' % (0) 32 | obj = '%02d' % (id) 33 | if video not in js: 34 | js[video] = {} 35 | if obj not in js[video]: 36 | js[video][obj] = {} 37 | js[video][obj][frame] = bbox 38 | 39 | train = {k:v for (k,v) in js.items() if 'i/' not in k} 40 | val = {k:v for (k,v) in js.items() if 'i/' in k} 41 | 42 | #json.dump(train, open('train.json', 'w'), indent=4, sort_keys=True) 43 | #json.dump(val, open('val.json', 'w'), indent=4, sort_keys=True) 44 | json.dump(train, open('/data/home/hopeng/data_local/ILSVRC2015/DET/train.json', 'w'), indent=4, sort_keys=True) 45 | json.dump(val, open('/data/home/hopeng/data_local/ILSVRC2015/DET/val.json', 'w'), indent=4, sort_keys=True) 46 | -------------------------------------------------------------------------------- /lib/dataset/crop/det/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing DET(Object detection) 2 | Large Scale Visual Recognition Challenge 2015 (ILSVRC2015) 3 | 4 | ### Download dataset (49GB) 5 | 6 | ````shell 7 | wget http://image-net.org/image/ILSVRC2015/ILSVRC2015_DET.tar.gz 8 | tar -xzvf ./ILSVRC2015_DET.tar.gz 9 | 10 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2013_train ILSVRC/Annotations/DET/train/a 11 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0000 ILSVRC/Annotations/DET/train/b 12 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0001 ILSVRC/Annotations/DET/train/c 13 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0002 ILSVRC/Annotations/DET/train/d 14 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0003 ILSVRC/Annotations/DET/train/e 15 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0004 ILSVRC/Annotations/DET/train/f 16 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0005 ILSVRC/Annotations/DET/train/g 17 | ln -sfb $PWD/ILSVRC/Annotations/DET/train/ILSVRC2014_train_0006 ILSVRC/Annotations/DET/train/h 18 | ln -sfb $PWD/ILSVRC/Annotations/DET/val ILSVRC/Annotations/DET/train/i 19 | 20 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2013_train ILSVRC/Data/DET/train/a 21 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0000 ILSVRC/Data/DET/train/b 22 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0001 ILSVRC/Data/DET/train/c 23 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0002 ILSVRC/Data/DET/train/d 24 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0003 ILSVRC/Data/DET/train/e 25 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0004 ILSVRC/Data/DET/train/f 26 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0005 ILSVRC/Data/DET/train/g 27 | ln -sfb $PWD/ILSVRC/Data/DET/train/ILSVRC2014_train_0006 ILSVRC/Data/DET/train/h 28 | ln -sfb $PWD/ILSVRC/Data/DET/val ILSVRC/Data/DET/train/i 29 | ```` 30 | 31 | ### Crop & Generate data info (20 min) 32 | 33 | ````shell 34 | #python par_crop.py [crop_size] [num_threads] 35 | python par_crop.py /data/share/ILSVRC 511 12 36 | python gen_json.py 37 | ```` 38 | 39 | Codes are modified from SiamMask. 40 | -------------------------------------------------------------------------------- /lib/dataset/crop/got10k/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import numpy as np 5 | 6 | print('loading json (raw got10k info), please wait 20 seconds~') 7 | got10k = json.load(open('got10k.json', 'r')) 8 | 9 | 10 | def check_size(frame_sz, bbox): 11 | min_ratio = 0.1 12 | max_ratio = 0.75 13 | # only accept objects >10% and <75% of the total frame 14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 16 | return ok 17 | 18 | 19 | def check_borders(frame_sz, bbox): 20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 23 | ((frame_sz[1] - bbox[3]) > dist_from_border) 24 | return ok 25 | 26 | 27 | snippets = dict() 28 | 29 | n_videos = 0 30 | for subset in got10k: 31 | for video in subset: 32 | n_videos += 1 33 | frames = video['frame'] 34 | snippet = dict() 35 | snippets[video['base_path']] = dict() 36 | for f, frame in enumerate(frames): 37 | frame_sz = frame['frame_sz'] 38 | bbox = frame['bbox'] # (x,y,w,h) 39 | 40 | snippet['{:06d}'.format(f)] = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] #(xmin, ymin, xmax, ymax) 41 | 42 | snippets[video['base_path']]['{:02d}'.format(0)] = snippet.copy() 43 | 44 | train = {k:v for (k,v) in snippets.items() if 'train' in k} 45 | val = {k:v for (k,v) in snippets.items() if 'val' in k} 46 | 47 | # json.dump(train, open('/data2/got10k/train.json', 'w'), indent=4, sort_keys=True) 48 | json.dump(val, open('/data2/got10k/val.json', 'w'), indent=4, sort_keys=True) 49 | print('done!') 50 | -------------------------------------------------------------------------------- /lib/dataset/crop/got10k/parser_got10k.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | # __author__ = 'zzp' 4 | 5 | import cv2 6 | import json 7 | import glob 8 | import numpy as np 9 | from os.path import join 10 | from os import listdir 11 | 12 | import argparse 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--dir',type=str, default='/data/share/GOT10K', help='your vid data dir') 16 | args = parser.parse_args() 17 | 18 | got10k_base_path = args.dir 19 | sub_sets = sorted({'train', 'val'}) 20 | 21 | got10k = [] 22 | for sub_set in sub_sets: 23 | sub_set_base_path = join(got10k_base_path, sub_set) 24 | videos = sorted(listdir(sub_set_base_path)) 25 | s = [] 26 | for vi, video in enumerate(videos): 27 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 28 | v = dict() 29 | v['base_path'] = join(sub_set, video) 30 | v['frame'] = [] 31 | video_base_path = join(sub_set_base_path, video) 32 | gts_path = join(video_base_path, 'groundtruth.txt') 33 | # gts_file = open(gts_path, 'r') 34 | # gts = gts_file.readlines() 35 | gts = np.loadtxt(open(gts_path, "rb"), delimiter=',') 36 | 37 | # get image size 38 | im_path = join(video_base_path, '00000001.jpg') 39 | im = cv2.imread(im_path) 40 | size = im.shape # height, width 41 | frame_sz = [size[1], size[0]] # width,height 42 | 43 | # get all im name 44 | jpgs = sorted(glob.glob(join(video_base_path, '*.jpg'))) 45 | 46 | f = dict() 47 | for idx, img_path in enumerate(jpgs): 48 | f['frame_sz'] = frame_sz 49 | f['img_path'] = img_path.split('/')[-1] 50 | 51 | gt = gts[idx] 52 | bbox = [int(g) for g in gt] # (x,y,w,h) 53 | f['bbox'] = bbox 54 | v['frame'].append(f.copy()) 55 | s.append(v) 56 | got10k.append(s) 57 | print('save json (raw got10k info), please wait 1 min~') 58 | json.dump(got10k, open('got10k.json', 'w'), indent=4, sort_keys=True) 59 | print('got10k.json has been saved in ./') 60 | -------------------------------------------------------------------------------- /lib/dataset/crop/got10k/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing GOT10K (train and val) 2 | 3 | 4 | ### Crop & Generate data info (20 min) 5 | 6 | ````shell 7 | rm ./train/list.txt 8 | rm ./val/list.txt 9 | 10 | python parse_got10k.py 11 | python par_crop.py 511 16 12 | python gen_json.py 13 | ```` 14 | -------------------------------------------------------------------------------- /lib/dataset/crop/lasot/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import numpy as np 5 | 6 | print('loading json (raw lasot info), please wait 20 seconds~') 7 | lasot = json.load(open('lasot.json', 'r')) 8 | 9 | 10 | def check_size(frame_sz, bbox): 11 | min_ratio = 0.1 12 | max_ratio = 0.75 13 | # only accept objects >10% and <75% of the total frame 14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 16 | return ok 17 | 18 | 19 | def check_borders(frame_sz, bbox): 20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 23 | ((frame_sz[1] - bbox[3]) > dist_from_border) 24 | return ok 25 | 26 | 27 | snippets = dict() 28 | 29 | n_videos = 0 30 | for subset in lasot: 31 | for video in subset: 32 | n_videos += 1 33 | frames = video['frame'] 34 | snippet = dict() 35 | 36 | snippets[video['base_path'].split('/')[-1]] = dict() 37 | for f, frame in enumerate(frames): 38 | frame_sz = frame['frame_sz'] 39 | bbox = frame['bbox'] # (x,y,w,h) 40 | 41 | snippet['{:06d}'.format(f)] = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] #(xmin, ymin, xmax, ymax) 42 | 43 | snippets[video['base_path'].split('/')[-1]]['{:02d}'.format(0)] = snippet.copy() 44 | 45 | json.dump(snippets, open('/data/share/LASOT/train.json', 'w'), indent=4, sort_keys=True) 46 | print('done!') 47 | -------------------------------------------------------------------------------- /lib/dataset/crop/lasot/parser_lasot.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | # __author__ = 'zzp' 4 | 5 | import cv2 6 | import json 7 | import glob 8 | import numpy as np 9 | from os.path import join 10 | from os import listdir 11 | 12 | import argparse 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--dir',type=str, default='/data/share/LaSOTBenchmark', help='your vid data dir') 16 | args = parser.parse_args() 17 | 18 | lasot_base_path = args.dir 19 | # sub_sets = sorted({'train', 'val'}) 20 | 21 | lasot = [] 22 | 23 | videos_fathers = sorted(listdir(lasot_base_path)) 24 | s = [] 25 | for _, video_f in enumerate(videos_fathers): 26 | videos_sons = sorted(listdir(join(lasot_base_path, video_f))) 27 | 28 | for vi, video in enumerate(videos_sons): 29 | 30 | print('father class: {} video id: {:04d} / {:04d}'.format(video_f, vi, len(videos_sons))) 31 | v = dict() 32 | v['base_path'] = join(video_f, video) 33 | v['frame'] = [] 34 | video_base_path = join(lasot_base_path, video_f, video) 35 | gts_path = join(video_base_path, 'groundtruth.txt') 36 | # gts_file = open(gts_path, 'r') 37 | # gts = gts_file.readlines() 38 | gts = np.loadtxt(open(gts_path, "rb"), delimiter=',') 39 | 40 | # get image size 41 | im_path = join(video_base_path, 'img', '00000001.jpg') 42 | im = cv2.imread(im_path) 43 | size = im.shape # height, width 44 | frame_sz = [size[1], size[0]] # width,height 45 | 46 | # get all im name 47 | jpgs = sorted(glob.glob(join(video_base_path, 'img', '*.jpg'))) 48 | 49 | f = dict() 50 | for idx, img_path in enumerate(jpgs): 51 | f['frame_sz'] = frame_sz 52 | f['img_path'] = img_path.split('/')[-1] 53 | 54 | gt = gts[idx] 55 | bbox = [int(g) for g in gt] # (x,y,w,h) 56 | f['bbox'] = bbox 57 | v['frame'].append(f.copy()) 58 | s.append(v) 59 | lasot.append(s) 60 | 61 | print('save json (raw lasot info), please wait 1 min~') 62 | json.dump(lasot, open('lasot.json', 'w'), indent=4, sort_keys=True) 63 | print('lasot.json has been saved in ./') 64 | -------------------------------------------------------------------------------- /lib/dataset/crop/lasot/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing LASOT (train and val) 2 | 3 | 4 | ### Crop & Generate data info (20 min) 5 | 6 | ````shell 7 | rm ./train/list.txt 8 | rm ./val/list.txt 9 | 10 | python parse_lasot.py 11 | python par_crop.py 511 16 12 | python gen_json.py 13 | ```` 14 | -------------------------------------------------------------------------------- /lib/dataset/crop/vid/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import numpy as np 5 | 6 | print('loading json (raw vid info), please wait 20 seconds~') 7 | vid = json.load(open('vid.json', 'r')) 8 | 9 | 10 | def check_size(frame_sz, bbox): 11 | min_ratio = 0.1 12 | max_ratio = 0.75 13 | # only accept objects >10% and <75% of the total frame 14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 16 | return ok 17 | 18 | 19 | def check_borders(frame_sz, bbox): 20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 23 | ((frame_sz[1] - bbox[3]) > dist_from_border) 24 | return ok 25 | 26 | 27 | snippets = dict() 28 | n_snippets = 0 29 | n_videos = 0 30 | for subset in vid: 31 | for video in subset: 32 | n_videos += 1 33 | frames = video['frame'] 34 | id_set = [] 35 | id_frames = [[]] * 60 # at most 60 objects 36 | for f, frame in enumerate(frames): 37 | objs = frame['objs'] 38 | frame_sz = frame['frame_sz'] 39 | for obj in objs: 40 | trackid = obj['trackid'] 41 | occluded = obj['occ'] 42 | bbox = obj['bbox'] 43 | # if occluded: 44 | # continue 45 | # 46 | # if not(check_size(frame_sz, bbox) and check_borders(frame_sz, bbox)): 47 | # continue 48 | # 49 | # if obj['c'] in ['n01674464', 'n01726692', 'n04468005', 'n02062744']: 50 | # continue 51 | 52 | if trackid not in id_set: 53 | id_set.append(trackid) 54 | id_frames[trackid] = [] 55 | id_frames[trackid].append(f) 56 | if len(id_set) > 0: 57 | snippets[video['base_path']] = dict() 58 | for selected in id_set: 59 | frame_ids = sorted(id_frames[selected]) 60 | sequences = np.split(frame_ids, np.array(np.where(np.diff(frame_ids) > 1)[0]) + 1) 61 | sequences = [s for s in sequences if len(s) > 1] # remove isolated frame. 62 | for seq in sequences: 63 | snippet = dict() 64 | for frame_id in seq: 65 | frame = frames[frame_id] 66 | for obj in frame['objs']: 67 | if obj['trackid'] == selected: 68 | o = obj 69 | continue 70 | snippet[frame['img_path'].split('.')[0]] = o['bbox'] 71 | snippets[video['base_path']]['{:02d}'.format(selected)] = snippet 72 | n_snippets += 1 73 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets)) 74 | 75 | train = {k:v for (k,v) in snippets.items() if 'train' in k} 76 | val = {k:v for (k,v) in snippets.items() if 'val' in k} 77 | 78 | json.dump(train, open('/data/home/hopeng/data_local/ILSVRC2015/VID/train.json', 'w'), indent=4, sort_keys=True) 79 | json.dump(val, open('/data/home/hopeng/data_local/ILSVRC2015/VID/val.json', 'w'), indent=4, sort_keys=True) 80 | print('done!') 81 | -------------------------------------------------------------------------------- /lib/dataset/crop/vid/parse_vid.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import glob 5 | import argparse 6 | import xml.etree.ElementTree as ET 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--dir',type=str, default='/data/share/ILSVRC2015', help='your vid data dir' ) 10 | args = parser.parse_args() 11 | 12 | VID_base_path = args.dir 13 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 14 | img_base_path = join(VID_base_path, 'Data/VID/train/') 15 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'}) 16 | 17 | vid = [] 18 | for sub_set in sub_sets: 19 | sub_set_base_path = join(ann_base_path, sub_set) 20 | videos = sorted(listdir(sub_set_base_path)) 21 | s = [] 22 | for vi, video in enumerate(videos): 23 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 24 | v = dict() 25 | v['base_path'] = join(sub_set, video) 26 | v['frame'] = [] 27 | video_base_path = join(sub_set_base_path, video) 28 | xmls = sorted(glob.glob(join(video_base_path, '*.xml'))) 29 | for xml in xmls: 30 | f = dict() 31 | xmltree = ET.parse(xml) 32 | size = xmltree.findall('size')[0] 33 | frame_sz = [int(it.text) for it in size] # width,height 34 | objects = xmltree.findall('object') 35 | objs = [] 36 | for object_iter in objects: 37 | trackid = int(object_iter.find('trackid').text) 38 | name = (object_iter.find('name')).text 39 | bndbox = object_iter.find('bndbox') 40 | occluded = int(object_iter.find('occluded').text) 41 | o = dict() 42 | o['c'] = name 43 | o['bbox'] = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 44 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 45 | o['trackid'] = trackid 46 | o['occ'] = occluded 47 | objs.append(o) 48 | f['frame_sz'] = frame_sz 49 | f['img_path'] = xml.split('/')[-1].replace('xml', 'JPEG') 50 | f['objs'] = objs 51 | v['frame'].append(f) 52 | s.append(v) 53 | vid.append(s) 54 | print('save json (raw vid info), please wait 1 min~') 55 | json.dump(vid, open('vid.json', 'w'), indent=4, sort_keys=True) 56 | print('val.json has been saved in ./') 57 | -------------------------------------------------------------------------------- /lib/dataset/crop/vid/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing VID(Object detection from video) 2 | Large Scale Visual Recognition Challenge 2015 (ILSVRC2015) 3 | 4 | ### Download dataset (86GB) 5 | 6 | ````shell 7 | wget http://bvisionweb1.cs.unc.edu/ilsvrc2015/ILSVRC2015_VID.tar.gz 8 | tar -xzvf ./ILSVRC2015_VID.tar.gz 9 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000 ILSVRC2015/Annotations/VID/train/a 10 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0001 ILSVRC2015/Annotations/VID/train/b 11 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0002 ILSVRC2015/Annotations/VID/train/c 12 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0003 ILSVRC2015/Annotations/VID/train/d 13 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/val ILSVRC2015/Annotations/VID/train/e 14 | 15 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0000 ILSVRC2015/Data/VID/train/a 16 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0001 ILSVRC2015/Data/VID/train/b 17 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0002 ILSVRC2015/Data/VID/train/c 18 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0003 ILSVRC2015/Data/VID/train/d 19 | ln -sfb $PWD/ILSVRC2015/Data/VID/val ILSVRC2015/Data/VID/train/e 20 | ```` 21 | 22 | ### Crop & Generate data info (20 min) 23 | 24 | ````shell 25 | python parse_vid.py 26 | 27 | #python par_crop.py [crop_size] [num_threads] 28 | python par_crop.py 511 12 29 | python gen_json.py 30 | ```` 31 | Codes are modified from SiamMask. -------------------------------------------------------------------------------- /lib/dataset/crop/visdrone/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import numpy as np 5 | 6 | print('loading json (raw visdrone info), please wait 20 seconds~') 7 | visdrone = json.load(open('visdrone.json', 'r')) 8 | 9 | 10 | def check_size(frame_sz, bbox): 11 | min_ratio = 0.1 12 | max_ratio = 0.75 13 | # only accept objects >10% and <75% of the total frame 14 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 15 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 16 | return ok 17 | 18 | 19 | def check_borders(frame_sz, bbox): 20 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 21 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 22 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 23 | ((frame_sz[1] - bbox[3]) > dist_from_border) 24 | return ok 25 | 26 | 27 | snippets = dict() 28 | 29 | n_videos = 0 30 | for subset in visdrone: 31 | for video in subset: 32 | n_videos += 1 33 | frames = video['frame'] 34 | snippet = dict() 35 | bp = video['base_path'] 36 | bp = bp.split('/') 37 | bp = join(bp[0], bp[-1]) 38 | 39 | snippets[bp] = dict() 40 | for f, frame in enumerate(frames): 41 | frame_sz = frame['frame_sz'] 42 | bbox = frame['bbox'] # (x,y,w,h) 43 | 44 | snippet['{:06d}'.format(f)] = [bbox[0], bbox[1], bbox[0]+bbox[2], bbox[1]+bbox[3]] #(xmin, ymin, xmax, ymax) 45 | 46 | snippets[bp]['{:02d}'.format(0)] = snippet.copy() 47 | 48 | # train = {k:v for (k,v) in snippets.items() if 'train' in k} 49 | # val = {k:v for (k,v) in snippets.items() if 'val' in k} 50 | 51 | train = {k:v for (k,v) in snippets.items()} 52 | 53 | # json.dump(train, open('/data2/visdrone/train.json', 'w'), indent=4, sort_keys=True) 54 | json.dump(train, open('/data/home/v-zhipeng/dataset/training/VISDRONE/train.json', 'w'), indent=4, sort_keys=True) 55 | print('done!') 56 | -------------------------------------------------------------------------------- /lib/dataset/crop/visdrone/parser_visdrone.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | # __author__ = 'zzp' 4 | 5 | import cv2 6 | import json 7 | import glob 8 | import numpy as np 9 | from os.path import join 10 | from os import listdir 11 | 12 | import argparse 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--dir',type=str, default='/data/home/v-zhipeng/dataset/testing/VISDRONE', help='your vid data dir') 16 | args = parser.parse_args() 17 | 18 | visdrone_base_path = args.dir 19 | sub_sets = sorted({'VisDrone2019-SOT-train', 'VisDrone2019-SOT-val'}) 20 | 21 | visdrone = [] 22 | for sub_set in sub_sets: 23 | sub_set_base_path = join(visdrone_base_path, sub_set) 24 | videos = sorted(listdir(join(sub_set_base_path, 'sequences'))) 25 | s = [] 26 | for vi, video in enumerate(videos): 27 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 28 | v = dict() 29 | v['base_path'] = join(sub_set, 'sequences', video) 30 | v['frame'] = [] 31 | video_base_path = join(sub_set_base_path, 'sequences', video) 32 | gts_path = join(sub_set_base_path, 'annotations', '{}.txt'.format(video)) 33 | # gts_file = open(gts_path, 'r') 34 | # gts = gts_file.readlines() 35 | gts = np.loadtxt(open(gts_path, "rb"), delimiter=',') 36 | 37 | # get image size 38 | im_path = join(video_base_path, 'img0000001.jpg') 39 | im = cv2.imread(im_path) 40 | size = im.shape # height, width 41 | frame_sz = [size[1], size[0]] # width,height 42 | 43 | # get all im name 44 | jpgs = sorted(glob.glob(join(video_base_path, '*.jpg'))) 45 | 46 | f = dict() 47 | for idx, img_path in enumerate(jpgs): 48 | f['frame_sz'] = frame_sz 49 | f['img_path'] = img_path.split('/')[-1] 50 | 51 | gt = gts[idx] 52 | bbox = [int(g) for g in gt] # (x,y,w,h) 53 | f['bbox'] = bbox 54 | v['frame'].append(f.copy()) 55 | s.append(v) 56 | visdrone.append(s) 57 | print('save json (raw visdrone info), please wait 1 min~') 58 | json.dump(visdrone, open('visdrone.json', 'w'), indent=4, sort_keys=True) 59 | print('visdrone.json has been saved in ./') 60 | -------------------------------------------------------------------------------- /lib/dataset/crop/visdrone/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing VISDRONE (train and val) 2 | 3 | 4 | ### Crop & Generate data info (20 min) 5 | 6 | ````shell 7 | rm ./train/list.txt 8 | rm ./val/list.txt 9 | 10 | python parse_visdrone.py 11 | python par_crop.py 511 16 12 | python gen_json.py 13 | ```` 14 | -------------------------------------------------------------------------------- /lib/eval_toolkit/bin/_init_paths.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import os.path as osp 7 | import sys 8 | 9 | 10 | def add_path(path): 11 | if path not in sys.path: 12 | sys.path.insert(0, path) 13 | 14 | 15 | this_dir = osp.dirname(__file__) 16 | 17 | lib_path = osp.join(this_dir, '../..', 'eval_toolkit') 18 | add_path(lib_path) 19 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | docs/site/ 99 | /site 100 | 101 | # mypy 102 | .mypy_cache/ 103 | 104 | # pytest 105 | .pytest_cache 106 | 107 | # Pylint 108 | .pylintrc 109 | 110 | # PyCharm 111 | .idea/ 112 | .DS_Store 113 | 114 | # Generated C code 115 | _mask.c 116 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/README.md: -------------------------------------------------------------------------------- 1 | # DAVIS 2017 Semi-supervised and Unsupervised evaluation package 2 | 3 | This package is used to evaluate semi-supervised and unsupervised video multi-object segmentation models for the DAVIS 2017 dataset. 4 | 5 | This tool is also used to evaluate the submissions in the Codalab site for the Semi-supervised DAVIS Challenge and the Unsupervised DAVIS Challenge 6 | 7 | ### Installation 8 | ```bash 9 | # Download the code 10 | git clone https://github.com/davisvideochallenge/davis2017-evaluation.git && cd davis2017-evaluation 11 | # Install it - Python 3.6 or higher required 12 | python setup.py install 13 | ``` 14 | If you don't want to specify the DAVIS path every time, you can modify the default value in the variable `default_davis_path` in `evaluation_method.py`(the following examples assume that you have set it). 15 | Otherwise, you can specify the path in every call using using the flag `--davis_path /path/to/DAVIS` when calling `evaluation_method.py`. 16 | 17 | Once the evaluation has finished, two different CSV files will be generated inside the folder with the results: 18 | - `global_results-SUBSET.csv` contains the overall results for a certain `SUBSET`. 19 | - `per-sequence_results-SUBSET.csv` contain the per sequence results for a certain `SUBSET`. 20 | 21 | If a folder that contains the previous files is evaluated again, the results will be read from the CSV files instead of recomputing them. 22 | 23 | ## Evaluate DAVIS 2017 Semi-supervised 24 | In order to evaluate your semi-supervised method in DAVIS 2017, execute the following command substituting `results/semi-supervised/osvos` by the folder path that contains your results: 25 | ```bash 26 | python evaluation_method.py --task semi-supervised --results_path results/semi-supervised/osvos 27 | ``` 28 | The semi-supervised results have been generated using [OSVOS](https://github.com/kmaninis/OSVOS-caffe). 29 | 30 | ## Evaluate DAVIS 2017 Unsupervised 31 | In order to evaluate your unsupervised method in DAVIS 2017, execute the following command substituting `results/unsupervised/rvos` by the folder path that contains your results: 32 | ```bash 33 | python evaluation_method.py --task unsupervised --results_path results/unsupervised/rvos 34 | ``` 35 | The unsupervised results example have been generated using [RVOS](https://github.com/imatge-upc/rvos). 36 | 37 | ## Evaluation running in Codalab 38 | In case you would like to know which is the evaluation script that is running in the Codalab servers, check the `evaluation_codalab.py` script. 39 | 40 | This package runs in the following docker image: [scaelles/codalab:anaconda3-2018.12](https://cloud.docker.com/u/scaelles/repository/docker/scaelles/codalab) 41 | 42 | ## Citation 43 | 44 | Please cite both papers in your publications if DAVIS or this code helps your research. 45 | 46 | ```latex 47 | @article{Caelles_arXiv_2019, 48 | author = {Sergi Caelles and Jordi Pont-Tuset and Federico Perazzi and Alberto Montes and Kevis-Kokitsi Maninis and Luc {Van Gool}}, 49 | title = {The 2019 DAVIS Challenge on VOS: Unsupervised Multi-Object Segmentation}, 50 | journal = {arXiv}, 51 | year = {2019} 52 | } 53 | ``` 54 | 55 | ```latex 56 | @article{Pont-Tuset_arXiv_2017, 57 | author = {Jordi Pont-Tuset and Federico Perazzi and Sergi Caelles and Pablo Arbel\'aez and Alexander Sorkine-Hornung and Luc {Van Gool}}, 58 | title = {The 2017 DAVIS Challenge on Video Object Segmentation}, 59 | journal = {arXiv:1704.00675}, 60 | year = {2017} 61 | } 62 | ``` 63 | 64 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/davis2017/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | __version__ = '0.1.0' 4 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/davis2017/results.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | import sys 5 | import pdb 6 | 7 | class Results(object): 8 | def __init__(self, root_dir): 9 | self.root_dir = root_dir 10 | 11 | def _read_mask(self, sequence, frame_id): 12 | try: 13 | mask_path = os.path.join(self.root_dir, sequence, f'{frame_id}.png') 14 | # pdb.set_trace() 15 | 16 | return np.array(Image.open(mask_path)) 17 | except IOError as err: 18 | sys.stdout.write(sequence + " frame %s not found!\n" % frame_id) 19 | sys.stdout.write("The frames have to be indexed PNG files placed inside the corespondent sequence " 20 | "folder.\nThe indexes have to match with the initial frame.\n") 21 | sys.stderr.write("IOError: " + err.strerror + "\n") 22 | sys.exit() 23 | 24 | def read_masks(self, sequence, masks_id): 25 | mask_0 = self._read_mask(sequence, masks_id[0]) 26 | masks = np.zeros((len(masks_id), *mask_0.shape)) 27 | for ii, m in enumerate(masks_id): 28 | masks[ii, ...] = self._read_mask(sequence, m) 29 | num_objects = int(np.max(masks)) 30 | tmp = np.ones((num_objects, *masks.shape)) 31 | tmp = tmp * np.arange(1, num_objects + 1)[:, None, None, None] 32 | masks = (tmp == masks[None, ...]) > 0 33 | return masks 34 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/demo.sh: -------------------------------------------------------------------------------- 1 | python evaluation_method.py --task semi-supervised --results_path /home/zpzhang/project/ECCV2020/TrackSeg/results/DAVIS2016 --davis_path /home/zpzhang/data/testing/DAVIS-trainval --year 2016 2 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/evaluation_codalab.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import os.path 4 | from time import time 5 | 6 | import numpy as np 7 | import pandas 8 | from davis2017.evaluation import DAVISEvaluation 9 | 10 | task = 'semi-supervised' 11 | gt_set = 'test-dev' 12 | 13 | time_start = time() 14 | # as per the metadata file, input and output directories are the arguments 15 | if len(sys.argv) < 3: 16 | input_dir = "input_dir" 17 | output_dir = "output_dir" 18 | debug = True 19 | else: 20 | [_, input_dir, output_dir] = sys.argv 21 | debug = False 22 | 23 | # unzipped submission data is always in the 'res' subdirectory 24 | # https://github.com/codalab/codalab-competitions/wiki/User_Building-a-Scoring-Program-for-a-Competition#directory-structure-for-submissions 25 | submission_path = os.path.join(input_dir, 'res') 26 | if not os.path.exists(submission_path): 27 | sys.exit('Could not find submission file {0}'.format(submission_path)) 28 | 29 | # unzipped reference data is always in the 'ref' subdirectory 30 | # https://github.com/codalab/codalab-competitions/wiki/User_Building-a-Scoring-Program-for-a-Competition#directory-structure-for-submissions 31 | gt_path = os.path.join(input_dir, 'ref') 32 | if not os.path.exists(gt_path): 33 | sys.exit('Could not find GT file {0}'.format(gt_path)) 34 | 35 | 36 | # Create dataset 37 | dataset_eval = DAVISEvaluation(davis_root=gt_path, gt_set=gt_set, task=task, codalab=True) 38 | 39 | # Check directory structure 40 | res_subfolders = os.listdir(submission_path) 41 | if len(res_subfolders) == 1: 42 | sys.stdout.write( 43 | "Incorrect folder structure, the folders of the sequences have to be placed directly inside the " 44 | "zip.\nInside every folder of the sequences there must be an indexed PNG file for every frame.\n" 45 | "The indexes have to match with the initial frame.\n") 46 | sys.exit() 47 | 48 | # Check that all sequences are there 49 | missing = False 50 | for seq in dataset_eval.dataset.get_sequences(): 51 | if seq not in res_subfolders: 52 | sys.stdout.write(seq + " sequence is missing.\n") 53 | missing = True 54 | if missing: 55 | sys.stdout.write( 56 | "Verify also the folder structure, the folders of the sequences have to be placed directly inside " 57 | "the zip.\nInside every folder of the sequences there must be an indexed PNG file for every frame.\n" 58 | "The indexes have to match with the initial frame.\n") 59 | sys.exit() 60 | 61 | metrics_res = dataset_eval.evaluate(submission_path, debug=debug) 62 | J, F = metrics_res['J'], metrics_res['F'] 63 | 64 | # Generate output to the stdout 65 | seq_names = list(J['M_per_object'].keys()) 66 | if gt_set == "val" or gt_set == "train" or gt_set == "test-dev": 67 | sys.stdout.write("----------------Global results in CSV---------------\n") 68 | g_measures = ['J&F-Mean', 'J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay'] 69 | final_mean = (np.mean(J["M"]) + np.mean(F["M"])) / 2. 70 | g_res = np.array([final_mean, np.mean(J["M"]), np.mean(J["R"]), np.mean(J["D"]), np.mean(F["M"]), np.mean(F["R"]), 71 | np.mean(F["D"])]) 72 | table_g = pandas.DataFrame(data=np.reshape(g_res, [1, len(g_res)]), columns=g_measures) 73 | table_g.to_csv(sys.stdout, index=False, float_format="%0.3f") 74 | 75 | sys.stdout.write("\n\n------------Per sequence results in CSV-------------\n") 76 | seq_measures = ['Sequence', 'J-Mean', 'F-Mean'] 77 | J_per_object = [J['M_per_object'][x] for x in seq_names] 78 | F_per_object = [F['M_per_object'][x] for x in seq_names] 79 | table_seq = pandas.DataFrame(data=list(zip(seq_names, J_per_object, F_per_object)), columns=seq_measures) 80 | table_seq.to_csv(sys.stdout, index=False, float_format="%0.3f") 81 | 82 | # Write scores to a file named "scores.txt" 83 | with open(os.path.join(output_dir, 'scores.txt'), 'w') as output_file: 84 | final_mean = (np.mean(J["M"]) + np.mean(F["M"])) / 2. 85 | output_file.write("GlobalMean: %f\n" % final_mean) 86 | output_file.write("JMean: %f\n" % np.mean(J["M"])) 87 | output_file.write("JRecall: %f\n" % np.mean(J["R"])) 88 | output_file.write("JDecay: %f\n" % np.mean(J["D"])) 89 | output_file.write("FMean: %f\n" % np.mean(F["M"])) 90 | output_file.write("FRecall: %f\n" % np.mean(F["R"])) 91 | output_file.write("FDecay: %f\n" % np.mean(F["D"])) 92 | total_time = time() - time_start 93 | sys.stdout.write('\nTotal time:' + str(total_time)) 94 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/evaluation_method.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | from time import time 5 | import argparse 6 | 7 | import numpy as np 8 | import pandas as pd 9 | from davis2017.evaluation import DAVISEvaluation 10 | 11 | default_davis_path = '/path/to/the/folder/DAVIS' 12 | 13 | time_start = time() 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--davis_path', type=str, help='Path to the DAVIS folder containing the JPEGImages, Annotations, ' 16 | 'ImageSets, Annotations_unsupervised folders', 17 | required=False, default=default_davis_path) 18 | parser.add_argument('--set', type=str, help='Subset to evaluate the results', default='val') 19 | parser.add_argument('--task', type=str, help='Task to evaluate the results', default='unsupervised', 20 | choices=['semi-supervised', 'unsupervised']) 21 | parser.add_argument('--results_path', type=str, help='Path to the folder containing the sequences folders', 22 | required=True) 23 | parser.add_argument("--year", type=str, help="Davis dataset year (default: 2017)", default='2017', 24 | choices=['2016', '2017', '2019']) 25 | 26 | args, _ = parser.parse_known_args() 27 | csv_name_global = f'global_results-{args.set}.csv' 28 | csv_name_per_sequence = f'per-sequence_results-{args.set}.csv' 29 | 30 | # Check if the method has been evaluated before, if so read the results, otherwise compute the results 31 | csv_name_global_path = os.path.join(args.results_path, csv_name_global) 32 | csv_name_per_sequence_path = os.path.join(args.results_path, csv_name_per_sequence) 33 | if os.path.exists(csv_name_global_path) and os.path.exists(csv_name_per_sequence_path): 34 | print('Using precomputed results...') 35 | table_g = pd.read_csv(csv_name_global_path) 36 | table_seq = pd.read_csv(csv_name_per_sequence_path) 37 | else: 38 | print(f'Evaluating sequences for the {args.task} task...') 39 | # Create dataset and evaluate 40 | dataset_eval = DAVISEvaluation(davis_root=args.davis_path, task=args.task, gt_set=args.set, year=args.year) 41 | metrics_res = dataset_eval.evaluate(args.results_path) 42 | J, F = metrics_res['J'], metrics_res['F'] 43 | 44 | # Generate dataframe for the general results 45 | g_measures = ['J&F-Mean', 'J-Mean', 'J-Recall', 'J-Decay', 'F-Mean', 'F-Recall', 'F-Decay'] 46 | final_mean = (np.mean(J["M"]) + np.mean(F["M"])) / 2. 47 | g_res = np.array([final_mean, np.mean(J["M"]), np.mean(J["R"]), np.mean(J["D"]), np.mean(F["M"]), np.mean(F["R"]), 48 | np.mean(F["D"])]) 49 | g_res = np.reshape(g_res, [1, len(g_res)]) 50 | table_g = pd.DataFrame(data=g_res, columns=g_measures) 51 | with open(csv_name_global_path, 'w') as f: 52 | table_g.to_csv(f, index=False, float_format="%.3f") 53 | print(f'Global results saved in {csv_name_global_path}') 54 | 55 | # Generate a dataframe for the per sequence results 56 | seq_names = list(J['M_per_object'].keys()) 57 | seq_measures = ['Sequence', 'J-Mean', 'F-Mean'] 58 | J_per_object = [J['M_per_object'][x] for x in seq_names] 59 | F_per_object = [F['M_per_object'][x] for x in seq_names] 60 | table_seq = pd.DataFrame(data=list(zip(seq_names, J_per_object, F_per_object)), columns=seq_measures) 61 | with open(csv_name_per_sequence_path, 'w') as f: 62 | table_seq.to_csv(f, index=False, float_format="%.3f") 63 | print(f'Per-sequence results saved in {csv_name_per_sequence_path}') 64 | 65 | # Print the results 66 | sys.stdout.write(f"--------------------------- Global results for {args.set} ---------------------------\n") 67 | print(table_g.to_string(index=False)) 68 | sys.stdout.write(f"\n---------- Per sequence results for {args.set} ----------\n") 69 | print(table_seq.to_string(index=False)) 70 | total_time = time() - time_start 71 | sys.stdout.write('\nTotal time:' + str(total_time)) 72 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = davis2017 3 | version = attr: davis2017.__version__ 4 | description = Evaluation Framework for DAVIS 2017 Semi-supervised and Unsupervised used in the DAVIS Challenges 5 | long_description = file: README.md 6 | long_description_content_type = text/markdown 7 | keywords = segmentation 8 | license = GPL v3 9 | author = Sergi Caelles 10 | author-email = scaelles@vision.ee.ethz.ch 11 | home-page = https://github.com/davisvideochallenge/davis2017-evaluation 12 | classifiers = 13 | Development Status :: 4 - Beta 14 | Intended Audience :: Developers 15 | Intended Audience :: Education 16 | Intended Audience :: Science/Research 17 | License :: OSI Approved :: GNU General Public License v3 (GPLv3) 18 | Programming Language :: Python :: 3.6 19 | Programming Language :: Python :: 3.7 20 | Topic :: Scientific/Engineering :: Human Machine Interfaces 21 | Topic :: Software Development :: Libraries 22 | Topic :: Software Development :: Libraries :: Python Modules 23 | -------------------------------------------------------------------------------- /lib/eval_toolkit/davis/davis2017-evaluation/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | import sys 3 | 4 | if sys.version_info < (3, 6): 5 | sys.exit('Sorry, only Python >= 3.6 is supported') 6 | 7 | setup( 8 | python_requires='>=3.6, <4', 9 | install_requires=[ 10 | 'Pillow>=4.1.1', 11 | 'networkx>=2.0', 12 | 'numpy>=1.12.1', 13 | 'opencv-python>=4.0.0.21', 14 | 'pandas>=0.21.1', 15 | 'pathlib2;python_version<"3.5"', 16 | 'scikit-image>=0.13.1', 17 | 'scikit-learn>=0.18', 18 | 'scipy>=1.0.0', 19 | 'tqdm>=4.28.1' 20 | ]) 21 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/__init__.py -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .vot import VOTDataset, VOTLTDataset 2 | from .otb import OTBDataset 3 | from .uav import UAVDataset 4 | from .lasot import LaSOTDataset 5 | from .nfs import NFSDataset 6 | from .trackingnet import TrackingNetDataset 7 | from .got10k import GOT10kDataset 8 | 9 | class DatasetFactory(object): 10 | @staticmethod 11 | def create_dataset(**kwargs): 12 | """ 13 | Args: 14 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30', 15 | 'VOT2018', 'VOT2016', 'VOT2018-LT' 16 | dataset_root: dataset root 17 | load_img: wether to load image 18 | Return: 19 | dataset 20 | """ 21 | assert 'name' in kwargs, "should provide dataset name" 22 | name = kwargs['name'] 23 | if 'OTB' in name: 24 | dataset = OTBDataset(**kwargs) 25 | elif 'LaSOT' == name: 26 | dataset = LaSOTDataset(**kwargs) 27 | elif 'UAV' in name: 28 | dataset = UAVDataset(**kwargs) 29 | elif 'NFS' in name: 30 | dataset = NFSDataset(**kwargs) 31 | elif 'VOT2018' == name or 'VOT2016' == name: 32 | dataset = VOTDataset(**kwargs) 33 | elif 'VOT2018-LT' == name: 34 | dataset = VOTLTDataset(**kwargs) 35 | elif 'TrackingNet' == name: 36 | dataset = TrackingNetDataset(**kwargs) 37 | elif 'GOT-10k' == name: 38 | dataset = GOT10kDataset(**kwargs) 39 | else: 40 | raise Exception("unknow dataset {}".format(kwargs['name'])) 41 | return dataset 42 | 43 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | class Dataset(object): 4 | def __init__(self, name, dataset_root): 5 | self.name = name 6 | self.dataset_root = dataset_root 7 | self.videos = None 8 | 9 | def __getitem__(self, idx): 10 | if isinstance(idx, str): 11 | return self.videos[idx] 12 | elif isinstance(idx, int): 13 | return self.videos[sorted(list(self.videos.keys()))[idx]] 14 | 15 | def __len__(self): 16 | return len(self.videos) 17 | 18 | def __iter__(self): 19 | keys = sorted(list(self.videos.keys())) 20 | for key in keys: 21 | yield self.videos[key] 22 | 23 | def set_tracker(self, path, tracker_names): 24 | """ 25 | Args: 26 | path: path to tracker results, 27 | tracker_names: list of tracker name 28 | """ 29 | self.tracker_path = path 30 | self.tracker_names = tracker_names 31 | # for video in tqdm(self.videos.values(), 32 | # desc='loading tacker result', ncols=100): 33 | # video.load_tracker(path, tracker_names) 34 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/datasets/got10k.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import os 4 | import numpy as np 5 | 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | class GOT10kVideo(Video): 13 | """ 14 | Args: 15 | name: video name 16 | root: dataset root 17 | video_dir: video directory 18 | init_rect: init rectangle 19 | img_names: image names 20 | gt_rect: groundtruth rectangle 21 | attr: attribute of video 22 | """ 23 | def __init__(self, name, root, video_dir, init_rect, img_names, 24 | gt_rect, attr, load_img=False): 25 | super(GOT10kVideo, self).__init__(name, root, video_dir, 26 | init_rect, img_names, gt_rect, attr, load_img) 27 | 28 | # def load_tracker(self, path, tracker_names=None): 29 | # """ 30 | # Args: 31 | # path(str): path to result 32 | # tracker_name(list): name of tracker 33 | # """ 34 | # if not tracker_names: 35 | # tracker_names = [x.split('/')[-1] for x in glob(path) 36 | # if os.path.isdir(x)] 37 | # if isinstance(tracker_names, str): 38 | # tracker_names = [tracker_names] 39 | # # self.pred_trajs = {} 40 | # for name in tracker_names: 41 | # traj_file = os.path.join(path, name, self.name+'.txt') 42 | # if os.path.exists(traj_file): 43 | # with open(traj_file, 'r') as f : 44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 45 | # for x in f.readlines()] 46 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 48 | # else: 49 | 50 | # self.tracker_names = list(self.pred_trajs.keys()) 51 | 52 | class GOT10kDataset(Dataset): 53 | """ 54 | Args: 55 | name: dataset name, should be "NFS30" or "NFS240" 56 | dataset_root, dataset root dir 57 | """ 58 | def __init__(self, name, dataset_root, load_img=False): 59 | super(GOT10kDataset, self).__init__(name, dataset_root) 60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 61 | meta_data = json.load(f) 62 | 63 | # load videos 64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 65 | self.videos = {} 66 | for video in pbar: 67 | pbar.set_postfix_str(video) 68 | self.videos[video] = GOT10kVideo(video, 69 | dataset_root, 70 | meta_data[video]['video_dir'], 71 | meta_data[video]['init_rect'], 72 | meta_data[video]['img_names'], 73 | meta_data[video]['gt_rect'], 74 | None) 75 | self.attr = {} 76 | self.attr['ALL'] = list(self.videos.keys()) 77 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/datasets/lasot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class LaSOTVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, absent, load_img=False): 24 | super(LaSOTVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | self.absent = np.array(absent, np.int8) 27 | 28 | def load_tracker(self, path, tracker_names=None, store=True): 29 | """ 30 | Args: 31 | path(str): path to result 32 | tracker_name(list): name of tracker 33 | """ 34 | if not tracker_names: 35 | tracker_names = [x.split('/')[-1] for x in glob(path) 36 | if os.path.isdir(x)] 37 | if isinstance(tracker_names, str): 38 | tracker_names = [tracker_names] 39 | for name in tracker_names: 40 | traj_file = os.path.join(path, name, self.name+'.txt') 41 | if os.path.exists(traj_file): 42 | with open(traj_file, 'r') as f : 43 | pred_traj = [list(map(float, x.strip().split(','))) 44 | for x in f.readlines()] 45 | else: 46 | print("File not exists: ", traj_file) 47 | if self.name == 'monkey-17': 48 | pred_traj = pred_traj[:len(self.gt_traj)] 49 | if store: 50 | self.pred_trajs[name] = pred_traj 51 | else: 52 | return pred_traj 53 | self.tracker_names = list(self.pred_trajs.keys()) 54 | 55 | 56 | 57 | class LaSOTDataset(Dataset): 58 | """ 59 | Args: 60 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 61 | dataset_root: dataset root 62 | load_img: wether to load all imgs 63 | """ 64 | def __init__(self, name, dataset_root, load_img=False): 65 | super(LaSOTDataset, self).__init__(name, dataset_root) 66 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 67 | meta_data = json.load(f) 68 | 69 | # load videos 70 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 71 | self.videos = {} 72 | for video in pbar: 73 | pbar.set_postfix_str(video) 74 | self.videos[video] = LaSOTVideo(video, 75 | dataset_root, 76 | meta_data[video]['video_dir'], 77 | meta_data[video]['init_rect'], 78 | meta_data[video]['img_names'], 79 | meta_data[video]['gt_rect'], 80 | meta_data[video]['attr'], 81 | meta_data[video]['absent']) 82 | 83 | # set attr 84 | attr = [] 85 | for x in self.videos.values(): 86 | attr += x.attr 87 | attr = set(attr) 88 | self.attr = {} 89 | self.attr['ALL'] = list(self.videos.keys()) 90 | for x in attr: 91 | self.attr[x] = [] 92 | for k, v in self.videos.items(): 93 | for attr_ in v.attr: 94 | self.attr[attr_].append(k) 95 | 96 | 97 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/datasets/nfs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | 12 | class NFSVideo(Video): 13 | """ 14 | Args: 15 | name: video name 16 | root: dataset root 17 | video_dir: video directory 18 | init_rect: init rectangle 19 | img_names: image names 20 | gt_rect: groundtruth rectangle 21 | attr: attribute of video 22 | """ 23 | def __init__(self, name, root, video_dir, init_rect, img_names, 24 | gt_rect, attr, load_img=False): 25 | super(NFSVideo, self).__init__(name, root, video_dir, 26 | init_rect, img_names, gt_rect, attr, load_img) 27 | 28 | # def load_tracker(self, path, tracker_names=None): 29 | # """ 30 | # Args: 31 | # path(str): path to result 32 | # tracker_name(list): name of tracker 33 | # """ 34 | # if not tracker_names: 35 | # tracker_names = [x.split('/')[-1] for x in glob(path) 36 | # if os.path.isdir(x)] 37 | # if isinstance(tracker_names, str): 38 | # tracker_names = [tracker_names] 39 | # # self.pred_trajs = {} 40 | # for name in tracker_names: 41 | # traj_file = os.path.join(path, name, self.name+'.txt') 42 | # if os.path.exists(traj_file): 43 | # with open(traj_file, 'r') as f : 44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 45 | # for x in f.readlines()] 46 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 48 | # else: 49 | 50 | # self.tracker_names = list(self.pred_trajs.keys()) 51 | 52 | class NFSDataset(Dataset): 53 | """ 54 | Args: 55 | name: dataset name, should be "NFS30" or "NFS240" 56 | dataset_root, dataset root dir 57 | """ 58 | def __init__(self, name, dataset_root, load_img=False): 59 | super(NFSDataset, self).__init__(name, dataset_root) 60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 61 | meta_data = json.load(f) 62 | 63 | # load videos 64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 65 | self.videos = {} 66 | for video in pbar: 67 | pbar.set_postfix_str(video) 68 | self.videos[video] = NFSVideo(video, 69 | dataset_root, 70 | meta_data[video]['video_dir'], 71 | meta_data[video]['init_rect'], 72 | meta_data[video]['img_names'], 73 | meta_data[video]['gt_rect'], 74 | None) 75 | 76 | self.attr = {} 77 | self.attr['ALL'] = list(self.videos.keys()) 78 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/datasets/trackingnet.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class TrackingNetVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, load_img=False): 24 | super(TrackingNetVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | 27 | # def load_tracker(self, path, tracker_names=None): 28 | # """ 29 | # Args: 30 | # path(str): path to result 31 | # tracker_name(list): name of tracker 32 | # """ 33 | # if not tracker_names: 34 | # tracker_names = [x.split('/')[-1] for x in glob(path) 35 | # if os.path.isdir(x)] 36 | # if isinstance(tracker_names, str): 37 | # tracker_names = [tracker_names] 38 | # # self.pred_trajs = {} 39 | # for name in tracker_names: 40 | # traj_file = os.path.join(path, name, self.name+'.txt') 41 | # if os.path.exists(traj_file): 42 | # with open(traj_file, 'r') as f : 43 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 44 | # for x in f.readlines()] 45 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 46 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 47 | # else: 48 | 49 | # self.tracker_names = list(self.pred_trajs.keys()) 50 | 51 | class TrackingNetDataset(Dataset): 52 | """ 53 | Args: 54 | name: dataset name, should be "NFS30" or "NFS240" 55 | dataset_root, dataset root dir 56 | """ 57 | def __init__(self, name, dataset_root, load_img=False): 58 | super(TrackingNetDataset, self).__init__(name, dataset_root) 59 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 60 | meta_data = json.load(f) 61 | 62 | # load videos 63 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 64 | self.videos = {} 65 | for video in pbar: 66 | pbar.set_postfix_str(video) 67 | self.videos[video] = TrackingNetVideo(video, 68 | dataset_root, 69 | meta_data[video]['video_dir'], 70 | meta_data[video]['init_rect'], 71 | meta_data[video]['img_names'], 72 | meta_data[video]['gt_rect'], 73 | None) 74 | self.attr = {} 75 | self.attr['ALL'] = list(self.videos.keys()) 76 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/datasets/uav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from tqdm import tqdm 5 | from glob import glob 6 | 7 | from .dataset import Dataset 8 | from .video import Video 9 | 10 | class UAVVideo(Video): 11 | """ 12 | Args: 13 | name: video name 14 | root: dataset root 15 | video_dir: video directory 16 | init_rect: init rectangle 17 | img_names: image names 18 | gt_rect: groundtruth rectangle 19 | attr: attribute of video 20 | """ 21 | def __init__(self, name, root, video_dir, init_rect, img_names, 22 | gt_rect, attr, load_img=False): 23 | super(UAVVideo, self).__init__(name, root, video_dir, 24 | init_rect, img_names, gt_rect, attr, load_img) 25 | 26 | 27 | class UAVDataset(Dataset): 28 | """ 29 | Args: 30 | name: dataset name, should be 'UAV123', 'UAV20L' 31 | dataset_root: dataset root 32 | load_img: wether to load all imgs 33 | """ 34 | def __init__(self, name, dataset_root, load_img=False): 35 | super(UAVDataset, self).__init__(name, dataset_root) 36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 37 | meta_data = json.load(f) 38 | 39 | # load videos 40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 41 | self.videos = {} 42 | for video in pbar: 43 | pbar.set_postfix_str(video) 44 | self.videos[video] = UAVVideo(video, 45 | dataset_root, 46 | meta_data[video]['video_dir'], 47 | meta_data[video]['init_rect'], 48 | meta_data[video]['img_names'], 49 | meta_data[video]['gt_rect'], 50 | meta_data[video]['attr']) 51 | 52 | # set attr 53 | attr = [] 54 | for x in self.videos.values(): 55 | attr += x.attr 56 | attr = set(attr) 57 | self.attr = {} 58 | self.attr['ALL'] = list(self.videos.keys()) 59 | for x in attr: 60 | self.attr[x] = [] 61 | for k, v in self.videos.items(): 62 | for attr_ in v.attr: 63 | self.attr[attr_].append(k) 64 | 65 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .ar_benchmark import AccuracyRobustnessBenchmark 2 | from .eao_benchmark import EAOBenchmark 3 | from .ope_benchmark import OPEBenchmark 4 | from .f1_benchmark import F1Benchmark 5 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import region 2 | from .statistics import * 3 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/region.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/region.o -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/src/region.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.6/src/region.o -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/region.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/region.o -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/build/temp.linux-x86_64-3.7/src/region.o -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/c_region.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "src/region.h": 2 | ctypedef enum region_type "RegionType": 3 | EMTPY 4 | SPECIAL 5 | RECTANGEL 6 | POLYGON 7 | MASK 8 | 9 | ctypedef struct region_bounds: 10 | float top 11 | float bottom 12 | float left 13 | float right 14 | 15 | ctypedef struct region_rectangle: 16 | float x 17 | float y 18 | float width 19 | float height 20 | 21 | # ctypedef struct region_mask: 22 | # int x 23 | # int y 24 | # int width 25 | # int height 26 | # char *data 27 | 28 | ctypedef struct region_polygon: 29 | int count 30 | float *x 31 | float *y 32 | 33 | ctypedef union region_container_data: 34 | region_rectangle rectangle 35 | region_polygon polygon 36 | # region_mask mask 37 | int special 38 | 39 | ctypedef struct region_container: 40 | region_type type 41 | region_container_data data 42 | 43 | # ctypedef struct region_overlap: 44 | # float overlap 45 | # float only1 46 | # float only2 47 | 48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds) 49 | 50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds) 51 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/misc.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | def determine_thresholds(confidence, resolution=100): 5 | """choose threshold according to confidence 6 | 7 | Args: 8 | confidence: list or numpy array or numpy array 9 | reolution: number of threshold to choose 10 | 11 | Restures: 12 | threshold: numpy array 13 | """ 14 | if isinstance(confidence, list): 15 | confidence = np.array(confidence) 16 | confidence = confidence.flatten() 17 | confidence = confidence[~np.isnan(confidence)] 18 | confidence.sort() 19 | 20 | assert len(confidence) > resolution and resolution > 2 21 | 22 | thresholds = np.ones((resolution)) 23 | thresholds[0] = - np.inf 24 | thresholds[-1] = np.inf 25 | delta = np.floor(len(confidence) / (resolution - 2)) 26 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32) 27 | thresholds[1:-1] = confidence[idxs] 28 | return thresholds 29 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/region.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/region.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/eval_toolkit/pysot/utils/region.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Build import cythonize 4 | 5 | setup( 6 | ext_modules = cythonize([Extension("region", ["region.pyx", "src/region.c"])]), 7 | ) 8 | 9 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/utils/src/region.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */ 2 | 3 | #ifndef _REGION_H_ 4 | #define _REGION_H_ 5 | 6 | #ifdef TRAX_STATIC_DEFINE 7 | # define __TRAX_EXPORT 8 | #else 9 | # ifndef __TRAX_EXPORT 10 | # if defined(_MSC_VER) 11 | # ifdef trax_EXPORTS 12 | /* We are building this library */ 13 | # define __TRAX_EXPORT __declspec(dllexport) 14 | # else 15 | /* We are using this library */ 16 | # define __TRAX_EXPORT __declspec(dllimport) 17 | # endif 18 | # elif defined(__GNUC__) 19 | # ifdef trax_EXPORTS 20 | /* We are building this library */ 21 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 22 | # else 23 | /* We are using this library */ 24 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 25 | # endif 26 | # endif 27 | # endif 28 | #endif 29 | 30 | #ifndef MAX 31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b)) 32 | #endif 33 | 34 | #ifndef MIN 35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b)) 36 | #endif 37 | 38 | #define TRAX_DEFAULT_CODE 0 39 | 40 | #define REGION_LEGACY_RASTERIZATION 1 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type; 47 | 48 | typedef struct region_bounds { 49 | 50 | float top; 51 | float bottom; 52 | float left; 53 | float right; 54 | 55 | } region_bounds; 56 | 57 | typedef struct region_polygon { 58 | 59 | int count; 60 | 61 | float* x; 62 | float* y; 63 | 64 | } region_polygon; 65 | 66 | typedef struct region_mask { 67 | 68 | int x; 69 | int y; 70 | 71 | int width; 72 | int height; 73 | 74 | char* data; 75 | 76 | } region_mask; 77 | 78 | typedef struct region_rectangle { 79 | 80 | float x; 81 | float y; 82 | float width; 83 | float height; 84 | 85 | } region_rectangle; 86 | 87 | typedef struct region_container { 88 | enum region_type type; 89 | union { 90 | region_rectangle rectangle; 91 | region_polygon polygon; 92 | region_mask mask; 93 | int special; 94 | } data; 95 | } region_container; 96 | 97 | typedef struct region_overlap { 98 | 99 | float overlap; 100 | float only1; 101 | float only2; 102 | 103 | } region_overlap; 104 | 105 | extern const region_bounds region_no_bounds; 106 | 107 | __TRAX_EXPORT int region_set_flags(int mask); 108 | 109 | __TRAX_EXPORT int region_clear_flags(int mask); 110 | 111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds); 112 | 113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds); 114 | 115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom); 116 | 117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region); 118 | 119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region); 120 | 121 | __TRAX_EXPORT char* region_string(region_container* region); 122 | 123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region); 124 | 125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type); 126 | 127 | __TRAX_EXPORT void region_release(region_container** region); 128 | 129 | __TRAX_EXPORT region_container* region_create_special(int code); 130 | 131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height); 132 | 133 | __TRAX_EXPORT region_container* region_create_polygon(int count); 134 | 135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y); 136 | 137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height); 138 | 139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height); 140 | 141 | #ifdef __cplusplus 142 | } 143 | #endif 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .draw_f1 import draw_f1 2 | from .draw_success_precision import draw_success_precision 3 | from .draw_eao import draw_eao 4 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/visualization/draw_eao.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pickle 4 | 5 | from matplotlib import rc 6 | from .draw_utils import COLOR, MARKER_STYLE 7 | 8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 9 | rc('text', usetex=True) 10 | 11 | def draw_eao(result): 12 | fig = plt.figure() 13 | ax = fig.add_subplot(111, projection='polar') 14 | angles = np.linspace(0, 2*np.pi, 8, endpoint=True) 15 | 16 | attr2value = [] 17 | for i, (tracker_name, ret) in enumerate(result.items()): 18 | value = list(ret.values()) 19 | attr2value.append(value) 20 | value.append(value[0]) 21 | attr2value = np.array(attr2value) 22 | max_value = np.max(attr2value, axis=0) 23 | min_value = np.min(attr2value, axis=0) 24 | for i, (tracker_name, ret) in enumerate(result.items()): 25 | value = list(ret.values()) 26 | value.append(value[0]) 27 | value = np.array(value) 28 | value *= (1 / max_value) 29 | plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i], 30 | label=tracker_name, linewidth=1.5, markersize=6) 31 | 32 | attrs = ["Overall", "Camera motion", 33 | "Illumination change","Motion Change", 34 | "Size change","Occlusion", 35 | "Unassigned"] 36 | attr_value = [] 37 | for attr, maxv, minv in zip(attrs, max_value, min_value): 38 | attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv)) 39 | ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value) 40 | ax.spines['polar'].set_visible(False) 41 | ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5) 42 | ax.grid(b=False) 43 | ax.set_ylim(0, 1.18) 44 | ax.set_yticks([]) 45 | plt.show() 46 | 47 | if __name__ == '__main__': 48 | result = pickle.load(open("../../result.pkl", 'rb')) 49 | draw_eao(result) 50 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/visualization/draw_f1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from matplotlib import rc 5 | from .draw_utils import COLOR, LINE_STYLE 6 | 7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 8 | rc('text', usetex=True) 9 | 10 | def draw_f1(result, bold_name=None): 11 | # drawing f1 contour 12 | fig, ax = plt.subplots() 13 | for f1 in np.arange(0.1, 1, 0.1): 14 | recall = np.arange(f1, 1+0.01, 0.01) 15 | precision = f1 * recall / (2 * recall - f1) 16 | ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5) 17 | ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5) 18 | ax.grid(b=True) 19 | ax.set_aspect(1) 20 | plt.xlabel('Recall') 21 | plt.ylabel('Precision') 22 | plt.axis([0, 1, 0, 1]) 23 | plt.title(r'\textbf{VOT2018-LT Precision vs Recall}') 24 | 25 | # draw result line 26 | all_precision = {} 27 | all_recall = {} 28 | best_f1 = {} 29 | best_idx = {} 30 | for tracker_name, ret in result.items(): 31 | precision = np.mean(list(ret['precision'].values()), axis=0) 32 | recall = np.mean(list(ret['recall'].values()), axis=0) 33 | f1 = 2 * precision * recall / (precision + recall) 34 | max_idx = np.argmax(f1) 35 | all_precision[tracker_name] = precision 36 | all_recall[tracker_name] = recall 37 | best_f1[tracker_name] = f1[max_idx] 38 | best_idx[tracker_name] = max_idx 39 | 40 | for idx, (tracker_name, best_f1) in \ 41 | enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)): 42 | if tracker_name == bold_name: 43 | label = r"\textbf{[%.3f] Ours}" % (best_f1) 44 | else: 45 | label = "[%.3f] " % (best_f1) + tracker_name 46 | recall = all_recall[tracker_name][:-1] 47 | precision = all_precision[tracker_name][:-1] 48 | ax.plot(recall, precision, color=COLOR[idx], linestyle='-', 49 | label=label) 50 | f1_idx = best_idx[tracker_name] 51 | ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o', 52 | markerfacecolor=COLOR[idx], markersize=5) 53 | ax.legend(loc='lower right', labelspacing=0.2) 54 | plt.xticks(np.arange(0, 1+0.1, 0.1)) 55 | plt.yticks(np.arange(0, 1+0.1, 0.1)) 56 | plt.show() 57 | 58 | if __name__ == '__main__': 59 | draw_f1(None) 60 | -------------------------------------------------------------------------------- /lib/eval_toolkit/pysot/visualization/draw_utils.py: -------------------------------------------------------------------------------- 1 | 2 | COLOR = ((1, 0, 0), 3 | (0, 1, 0), 4 | (1, 0, 1), 5 | (1, 1, 0), 6 | (0 , 162/255, 232/255), 7 | (0.5, 0.5, 0.5), 8 | (0, 0, 1), 9 | (0, 1, 1), 10 | (136/255, 0 , 21/255), 11 | (255/255, 127/255, 39/255), 12 | (0, 0, 0)) 13 | 14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-'] 15 | 16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.'] 17 | -------------------------------------------------------------------------------- /lib/eval_toolkit/requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | numpy 3 | glob 4 | opencv-python 5 | colorama 6 | numba 7 | -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/__init__.py -------------------------------------------------------------------------------- /lib/models/backbones.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Zhipeng Zhang (zhangzhipeng2017@ia.ac.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import torch 8 | import torch.nn as nn 9 | from .modules import Bottleneck, ResNet_plus2, Bottleneck_BIG_CI, ResNet 10 | 11 | eps = 1e-5 12 | # --------------------- 13 | # For Ocean and Ocean+ 14 | # --------------------- 15 | class ResNet50(nn.Module): 16 | def __init__(self, used_layers=[2, 3, 4], online=False): 17 | super(ResNet50, self).__init__() 18 | self.features = ResNet_plus2(Bottleneck, [3, 4, 6, 3], used_layers=used_layers, online=online) 19 | 20 | def forward(self, x, online=False): 21 | if not online: 22 | x_stages, x = self.features(x, online=online) 23 | return x_stages, x 24 | else: 25 | x = self.features(x, online=online) 26 | return x 27 | 28 | # --------------------- 29 | # For SiamDW 30 | # --------------------- 31 | class ResNet22W(nn.Module): 32 | """ 33 | ResNet22W: double 3*3 layer (only) channels in residual blob 34 | """ 35 | def __init__(self): 36 | super(ResNet22W, self).__init__() 37 | self.features = ResNet(Bottleneck_BIG_CI, [3, 4], [True, False], [False, True], firstchannels=64, channels=[64, 128]) 38 | self.feature_size = 512 39 | 40 | def forward(self, x): 41 | x = self.features(x) 42 | 43 | return x 44 | 45 | 46 | if __name__ == '__main__': 47 | import torch 48 | net = ResNet50().cuda() 49 | print(net) 50 | 51 | params = list(net.parameters()) 52 | k = 0 53 | for i in params: 54 | l = 1 55 | for j in i.size(): 56 | l *= j 57 | k = k + l 58 | print("total params" + str(k/1e6) + "M") 59 | 60 | search = torch.rand(1, 3, 255, 255) 61 | search = torch.Tensor(search).cuda() 62 | out = net(search) 63 | print(out.size()) 64 | 65 | print() 66 | -------------------------------------------------------------------------------- /lib/models/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | 7 | __all__ = [ 8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 11 | 'deform_roi_pooling' 12 | ] 13 | -------------------------------------------------------------------------------- /lib/models/dcn/deform_conv_cuda.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_conv_cuda.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/models/dcn/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_conv_cuda.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/models/dcn/deform_pool_cuda.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_pool_cuda.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/models/dcn/deform_pool_cuda.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/dcn/deform_pool_cuda.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/models/dcn/src/deform_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | void DeformablePSROIPoolForward( 8 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 9 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 10 | const int height, const int width, const int num_bbox, 11 | const int channels_trans, const int no_trans, const float spatial_scale, 12 | const int output_dim, const int group_size, const int pooled_size, 13 | const int part_size, const int sample_per_part, const float trans_std); 14 | 15 | void DeformablePSROIPoolBackwardAcc( 16 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 17 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 18 | at::Tensor trans_grad, const int batch, const int channels, 19 | const int height, const int width, const int num_bbox, 20 | const int channels_trans, const int no_trans, const float spatial_scale, 21 | const int output_dim, const int group_size, const int pooled_size, 22 | const int part_size, const int sample_per_part, const float trans_std); 23 | 24 | void deform_psroi_pooling_cuda_forward( 25 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 26 | at::Tensor top_count, const int no_trans, const float spatial_scale, 27 | const int output_dim, const int group_size, const int pooled_size, 28 | const int part_size, const int sample_per_part, const float trans_std) { 29 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 30 | 31 | const int batch = input.size(0); 32 | const int channels = input.size(1); 33 | const int height = input.size(2); 34 | const int width = input.size(3); 35 | const int channels_trans = no_trans ? 2 : trans.size(1); 36 | 37 | const int num_bbox = bbox.size(0); 38 | if (num_bbox != out.size(0)) 39 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 40 | out.size(0), num_bbox); 41 | 42 | DeformablePSROIPoolForward( 43 | input, bbox, trans, out, top_count, batch, channels, height, width, 44 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 45 | pooled_size, part_size, sample_per_part, trans_std); 46 | } 47 | 48 | void deform_psroi_pooling_cuda_backward( 49 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 50 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 51 | const int no_trans, const float spatial_scale, const int output_dim, 52 | const int group_size, const int pooled_size, const int part_size, 53 | const int sample_per_part, const float trans_std) { 54 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 55 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 56 | 57 | const int batch = input.size(0); 58 | const int channels = input.size(1); 59 | const int height = input.size(2); 60 | const int width = input.size(3); 61 | const int channels_trans = no_trans ? 2 : trans.size(1); 62 | 63 | const int num_bbox = bbox.size(0); 64 | if (num_bbox != out_grad.size(0)) 65 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 66 | out_grad.size(0), num_bbox); 67 | 68 | DeformablePSROIPoolBackwardAcc( 69 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 70 | channels, height, width, num_bbox, channels_trans, no_trans, 71 | spatial_scale, output_dim, group_size, pooled_size, part_size, 72 | sample_per_part, trans_std); 73 | } 74 | 75 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 76 | m.def("deform_psroi_pooling_cuda_forward", &deform_psroi_pooling_cuda_forward, 77 | "deform psroi pooling forward(CUDA)"); 78 | m.def("deform_psroi_pooling_cuda_backward", 79 | &deform_psroi_pooling_cuda_backward, 80 | "deform psroi pooling backward(CUDA)"); 81 | } -------------------------------------------------------------------------------- /lib/models/oceanTRT.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Zhipeng Zhang (zhangzhipeng2017@ia.ac.cn) 5 | # ------------------------------------------------------------------------------ 6 | 7 | import torch 8 | import torch.nn as nn 9 | import numpy as np 10 | 11 | class OceanTRT_(nn.Module): 12 | def __init__(self): 13 | super(OceanTRT_, self).__init__() 14 | self.features = None 15 | self.connect_model = None 16 | self.zf = None # for online tracking 17 | self.neck = None 18 | self.search_size = 255 19 | self.score_size = 25 20 | 21 | 22 | def tensorrt_init(self, trt_net, corr=None): 23 | """ 24 | TensorRT init 25 | """ 26 | self.t_backbone255, self.s_backbone_siam255, self.s_backbone_siam287, self.s_backbone_online, self.t_neck255, \ 27 | self.s_neck255, self.s_neck287, self.multiDiCorr255, self.multiDiCorr287, self.boxtower255, self.boxtower287 = trt_net 28 | 29 | if corr: 30 | self.multiDiCorr255, self.multiDiCorr287 = corr 31 | 32 | def extract_for_online(self, x): 33 | xf = self.s_backbone_online(x, torch.Tensor([1]).cuda()) 34 | return xf 35 | 36 | def template(self, z): 37 | _, _, _, self.zf = self.t_backbone255(z, torch.Tensor([]).cuda()) 38 | self.zf_ori = self.t_neck255(self.zf) 39 | self.zf = self.zf_ori[:, :, 4:-4, 4:-4].contiguous() 40 | 41 | def track(self, x): 42 | """ 43 | Please see OceanOnlinePT for pytorch version (more clean) 44 | """ 45 | b1, b2, b3, xf = self.s_backbone_siam255(x, torch.Tensor([]).cuda()) 46 | xf = self.s_neck255(xf) # b4 47 | 48 | # backbone encode (something is wrong with connect model) 49 | cls_z0, cls_z1, cls_z2, cls_x0, cls_x1, cls_x2, reg_z0, reg_z1, reg_z2, reg_x0, reg_x1, reg_x2 = self.multiDiCorr255(xf, self.zf) 50 | 51 | # correlation 52 | cls_z = [cls_z0, cls_z1, cls_z2] 53 | cls_x = [cls_x0, cls_x1, cls_x2] 54 | reg_z = [reg_z0, reg_z1, reg_z2] 55 | reg_x = [reg_x0, reg_x1, reg_x2] 56 | 57 | cls_dw, reg_dw = self.connect_model2(cls_z, cls_x, reg_z, reg_x) 58 | # cls and reg 59 | bbox_pred, cls_pred = self.boxtower255(cls_dw, reg_dw) 60 | 61 | return cls_pred, bbox_pred.squeeze(0) 62 | -------------------------------------------------------------------------------- /lib/models/oceanplus.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import torch.nn.functional as F 5 | 6 | class OceanPlus_(nn.Module): 7 | def __init__(self): 8 | super(OceanPlus_, self).__init__() 9 | self.features = None 10 | self.connect_model = None 11 | self.mask_model = None 12 | self.zf = None 13 | self.criterion = nn.BCEWithLogitsLoss() 14 | self.neck = None 15 | self.search_size = 255 16 | self.score_size = 25 17 | self.batch = 32 if self.training else 1 18 | self.lambda_u = 0.1 19 | self.lambda_s = 0.2 20 | 21 | # self.grids() 22 | 23 | def feature_extractor(self, x, online=False): 24 | return self.features(x, online=online) 25 | 26 | def extract_for_online(self, x): 27 | xf = self.feature_extractor(x, online=True) 28 | return xf 29 | 30 | def connector(self, template_feature, search_feature): 31 | pred_score = self.connect_model(template_feature, search_feature) 32 | return pred_score 33 | 34 | def update_roi_template(self, target_pos, target_sz, score): 35 | """ 36 | :param target_pos: pos in search (not the original) 37 | :param target_sz: size in target size 38 | :param score: 39 | :return: 40 | """ 41 | 42 | lambda_u = self.lambda_u * float(score) 43 | lambda_s = self.lambda_s 44 | N, C, H, W = self.search_size 45 | stride = 8 46 | assert N == 1, "not supported" 47 | l = W // 2 48 | x = range(-l, l + 1) 49 | y = range(-l, l + 1) 50 | 51 | hc_z = (target_sz[1] + 0.3 * sum(target_sz)) / stride 52 | wc_z = (target_sz[0] + 0.3 * sum(target_sz)) / stride 53 | grid_x = np.linspace(- wc_z / 2, wc_z / 2, 17) 54 | grid_y = np.linspace(- hc_z / 2, hc_z / 2, 17) 55 | grid_x = grid_x[5:-5] + target_pos[0] / stride 56 | grid_y = grid_y[5:-5] + target_pos[1] / stride 57 | x_offset = grid_x / l 58 | y_offset = grid_y / l 59 | 60 | grid = np.reshape(np.transpose([np.tile(x_offset, len(y_offset)), np.repeat(y_offset, len(x_offset))]), (len(grid_y), len(grid_x), 2)) 61 | grid = torch.from_numpy(grid).unsqueeze(0).cuda() 62 | 63 | zmap = nn.functional.grid_sample(self.xf.double(), grid).float() 64 | # cls_kernel = self.rpn.cls.make_kernel(zmap) 65 | self.MA_kernel = (1 - lambda_u) * self.MA_kernel + lambda_u * zmap 66 | self.zf_update = self.zf * lambda_s + self.MA_kernel * (1.0 - lambda_s) 67 | 68 | def template(self, z, template_mask = None): 69 | _, self.zf = self.feature_extractor(z) 70 | 71 | if self.neck is not None: 72 | self.zf_ori, self.zf = self.neck(self.zf, crop=True) 73 | 74 | self.template_mask = template_mask.float() 75 | self.MA_kernel = self.zf.detach() 76 | self.zf_update = None 77 | 78 | 79 | def track(self, x): 80 | 81 | features_stages, xf = self.feature_extractor(x) 82 | 83 | if self.neck is not None: 84 | xf = self.neck(xf, crop=False) 85 | 86 | features_stages.append(xf) 87 | bbox_pred, cls_pred, cls_feature, reg_feature = self.connect_model(xf, self.zf, update=self.zf_update) 88 | 89 | features_stages.append(cls_feature) 90 | pred_mask = self.mask_model(features_stages, input_size=x.size()[2:], zf_ori=self.zf_ori, template_mask=self.template_mask) 91 | self.search_size = xf.size() 92 | self.xf = xf.detach() 93 | 94 | return cls_pred, bbox_pred, pred_mask 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /lib/models/online/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/online/__init__.py -------------------------------------------------------------------------------- /lib/models/online/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .resnet18_vggm import * 3 | -------------------------------------------------------------------------------- /lib/models/online/bbreg/__init__.py: -------------------------------------------------------------------------------- 1 | from .iou_net import IoUNet 2 | -------------------------------------------------------------------------------- /lib/models/online/classifier/__init__.py: -------------------------------------------------------------------------------- 1 | from .linear_filter import LinearFilter 2 | -------------------------------------------------------------------------------- /lib/models/online/classifier/features.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from torchvision.models.resnet import BasicBlock, Bottleneck 5 | from models.online.layers.normalization import InstanceL2Norm 6 | from models.online.layers.transform import InterpCat 7 | 8 | 9 | def residual_basic_block(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None, 10 | interp_cat=False): 11 | """Construct a network block based on the BasicBlock used in ResNet 18 and 34.""" 12 | if out_dim is None: 13 | out_dim = feature_dim 14 | feat_layers = [] 15 | if interp_cat: 16 | feat_layers.append(InterpCat()) 17 | for i in range(num_blocks): 18 | odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim 19 | feat_layers.append(BasicBlock(feature_dim, odim)) 20 | if final_conv: 21 | feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False)) 22 | if l2norm: 23 | feat_layers.append(InstanceL2Norm(scale=norm_scale)) 24 | return nn.Sequential(*feat_layers) 25 | 26 | 27 | def residual_basic_block_pool(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None, 28 | pool=True): 29 | """Construct a network block based on the BasicBlock used in ResNet.""" 30 | if out_dim is None: 31 | out_dim = feature_dim 32 | feat_layers = [] 33 | for i in range(num_blocks): 34 | odim = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim 35 | feat_layers.append(BasicBlock(feature_dim, odim)) 36 | if final_conv: 37 | feat_layers.append(nn.Conv2d(feature_dim, out_dim, kernel_size=3, padding=1, bias=False)) 38 | if pool: 39 | feat_layers.append(nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) 40 | if l2norm: 41 | feat_layers.append(InstanceL2Norm(scale=norm_scale)) 42 | 43 | return nn.Sequential(*feat_layers) 44 | 45 | 46 | def residual_bottleneck(feature_dim=256, num_blocks=1, l2norm=True, final_conv=False, norm_scale=1.0, out_dim=None, 47 | interp_cat=False): 48 | """Construct a network block based on the Bottleneck block used in ResNet.""" 49 | if out_dim is None: 50 | out_dim = feature_dim 51 | feat_layers = [] 52 | if interp_cat: 53 | feat_layers.append(InterpCat()) 54 | for i in range(num_blocks): 55 | planes = feature_dim if i < num_blocks - 1 + int(final_conv) else out_dim // 4 56 | feat_layers.append(Bottleneck(4*feature_dim, planes)) 57 | if final_conv: 58 | feat_layers.append(nn.Conv2d(4*feature_dim, out_dim, kernel_size=3, padding=1, bias=False)) 59 | if l2norm: 60 | feat_layers.append(InstanceL2Norm(scale=norm_scale)) 61 | return nn.Sequential(*feat_layers) -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | .vim-template* 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jiayuan Mao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/README.md: -------------------------------------------------------------------------------- 1 | # PreciseRoIPooling 2 | This repo implements the **Precise RoI Pooling** (PrRoI Pooling), proposed in the paper **Acquisition of Localization Confidence for Accurate Object Detection** published at ECCV 2018 (Oral Presentation). 3 | 4 | **Acquisition of Localization Confidence for Accurate Object Detection** 5 | 6 | _Borui Jiang*, Ruixuan Luo*, Jiayuan Mao*, Tete Xiao, Yuning Jiang_ (* indicates equal contribution.) 7 | 8 | https://arxiv.org/abs/1807.11590 9 | 10 | ## Brief 11 | 12 | In short, Precise RoI Pooling is an integration-based (bilinear interpolation) average pooling method for RoI Pooling. It avoids any quantization and has a continuous gradient on bounding box coordinates. It is: 13 | 14 | - different from the original RoI Pooling proposed in [Fast R-CNN](https://arxiv.org/abs/1504.08083). PrRoI Pooling uses average pooling instead of max pooling for each bin and has a continuous gradient on bounding box coordinates. That is, one can take the derivatives of some loss function w.r.t the coordinates of each RoI and optimize the RoI coordinates. 15 | - different from the RoI Align proposed in [Mask R-CNN](https://arxiv.org/abs/1703.06870). PrRoI Pooling uses a full integration-based average pooling instead of sampling a constant number of points. This makes the gradient w.r.t. the coordinates continuous. 16 | 17 | For a better illustration, we illustrate RoI Pooling, RoI Align and PrRoI Pooing in the following figure. More details including the gradient computation can be found in our paper. 18 | 19 |
20 | 21 | ## Implementation 22 | 23 | PrRoI Pooling was originally implemented by [Tete Xiao](http://tetexiao.com/) based on MegBrain, an (internal) deep learning framework built by Megvii Inc. It was later adapted into open-source deep learning frameworks. Currently, we only support PyTorch. Unfortunately, we don't have any specific plan for the adaptation into other frameworks such as TensorFlow, but any contributions (pull requests) will be more than welcome. 24 | 25 | ## Usage (PyTorch 1.0) 26 | 27 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 1.0+ and only supports CUDA (CPU mode is not implemented). 28 | Since we use PyTorch JIT for cxx/cuda code compilation, to use the module in your code, simply do: 29 | 30 | ``` 31 | from prroi_pool import PrRoIPool2D 32 | 33 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale) 34 | roi_features = avg_pool(features, rois) 35 | 36 | # for those who want to use the "functional" 37 | 38 | from prroi_pool.functional import prroi_pool2d 39 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale) 40 | ``` 41 | 42 | 43 | ## Usage (PyTorch 0.4) 44 | 45 | **!!! Please first checkout to the branch pytorch0.4.** 46 | 47 | In the directory `pytorch/`, we provide a PyTorch-based implementation of PrRoI Pooling. It requires PyTorch 0.4 and only supports CUDA (CPU mode is not implemented). 48 | To use the PrRoI Pooling module, first goto `pytorch/prroi_pool` and execute `./travis.sh` to compile the essential components (you may need `nvcc` for this step). To use the module in your code, simply do: 49 | 50 | ``` 51 | from prroi_pool import PrRoIPool2D 52 | 53 | avg_pool = PrRoIPool2D(window_height, window_width, spatial_scale) 54 | roi_features = avg_pool(features, rois) 55 | 56 | # for those who want to use the "functional" 57 | 58 | from prroi_pool.functional import prroi_pool2d 59 | roi_features = prroi_pool2d(features, rois, window_height, window_width, spatial_scale) 60 | ``` 61 | 62 | Here, 63 | 64 | - RoI is an `m * 5` float tensor of format `(batch_index, x0, y0, x1, y1)`, following the convention in the original Caffe implementation of RoI Pooling, although in some frameworks the batch indices are provided by an integer tensor. 65 | - `spatial_scale` is multiplied to the RoIs. For example, if your feature maps are down-sampled by a factor of 16 (w.r.t. the input image), you should use a spatial scale of `1/16`. 66 | - The coordinates for RoI follows the [L, R) convension. That is, `(0, 0, 4, 4)` denotes a box of size `4x4`. 67 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/_assets/prroi_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/online/external/PreciseRoIPooling/_assets/prroi_visualization.png -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | /_prroi_pooling 3 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : __init__.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | from .prroi_pool import * 13 | 14 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/functional.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : functional.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch 13 | import torch.autograd as ag 14 | 15 | __all__ = ['prroi_pool2d'] 16 | 17 | 18 | _prroi_pooling = None 19 | 20 | 21 | def _import_prroi_pooling(): 22 | global _prroi_pooling 23 | 24 | if _prroi_pooling is None: 25 | try: 26 | from os.path import join as pjoin, dirname 27 | from torch.utils.cpp_extension import load as load_extension 28 | root_dir = pjoin(dirname(__file__), 'src') 29 | 30 | _prroi_pooling = load_extension( 31 | '_prroi_pooling', 32 | [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')], 33 | verbose=True 34 | ) 35 | except ImportError: 36 | raise ImportError('Can not compile Precise RoI Pooling library.') 37 | 38 | return _prroi_pooling 39 | 40 | 41 | class PrRoIPool2DFunction(ag.Function): 42 | @staticmethod 43 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale): 44 | _prroi_pooling = _import_prroi_pooling() 45 | 46 | assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \ 47 | 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type()) 48 | 49 | pooled_height = int(pooled_height) 50 | pooled_width = int(pooled_width) 51 | spatial_scale = float(spatial_scale) 52 | 53 | features = features.contiguous() 54 | rois = rois.contiguous() 55 | params = (pooled_height, pooled_width, spatial_scale) 56 | 57 | if features.is_cuda: 58 | output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params) 59 | ctx.params = params 60 | # everything here is contiguous. 61 | ctx.save_for_backward(features, rois, output) 62 | else: 63 | raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.') 64 | 65 | return output 66 | 67 | @staticmethod 68 | def backward(ctx, grad_output): 69 | _prroi_pooling = _import_prroi_pooling() 70 | 71 | features, rois, output = ctx.saved_tensors 72 | grad_input = grad_coor = None 73 | 74 | if features.requires_grad: 75 | grad_output = grad_output.contiguous() 76 | grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params) 77 | if rois.requires_grad: 78 | grad_output = grad_output.contiguous() 79 | grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params) 80 | 81 | return grad_input, grad_coor, None, None, None 82 | 83 | 84 | prroi_pool2d = PrRoIPool2DFunction.apply 85 | 86 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : prroi_pool.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch.nn as nn 13 | 14 | from .functional import prroi_pool2d 15 | 16 | __all__ = ['PrRoIPool2D'] 17 | 18 | 19 | class PrRoIPool2D(nn.Module): 20 | def __init__(self, pooled_height, pooled_width, spatial_scale): 21 | super().__init__() 22 | 23 | self.pooled_height = int(pooled_height) 24 | self.pooled_width = int(pooled_width) 25 | self.spatial_scale = float(spatial_scale) 26 | 27 | def forward(self, features, rois): 28 | return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale) 29 | 30 | def extra_repr(self): 31 | return 'kernel_size=({pooled_height}, {pooled_width}), spatial_scale={spatial_scale}'.format(**self.__dict__) 32 | 33 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.c 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | #include 18 | 19 | #include "prroi_pooling_gpu_impl.cuh" 20 | 21 | 22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) { 23 | int nr_rois = rois.size(0); 24 | int nr_channels = features.size(1); 25 | int height = features.size(2); 26 | int width = features.size(3); 27 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 28 | auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options()); 29 | 30 | if (output.numel() == 0) { 31 | THCudaCheck(cudaGetLastError()); 32 | return output; 33 | } 34 | 35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 36 | PrRoIPoolingForwardGpu( 37 | stream, features.data(), rois.data(), output.data(), 38 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 39 | top_count 40 | ); 41 | 42 | THCudaCheck(cudaGetLastError()); 43 | return output; 44 | } 45 | 46 | at::Tensor prroi_pooling_backward_cuda( 47 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 48 | int pooled_height, int pooled_width, float spatial_scale) { 49 | 50 | auto features_diff = at::zeros_like(features); 51 | 52 | int nr_rois = rois.size(0); 53 | int batch_size = features.size(0); 54 | int nr_channels = features.size(1); 55 | int height = features.size(2); 56 | int width = features.size(3); 57 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 58 | int bottom_count = batch_size * nr_channels * height * width; 59 | 60 | if (output.numel() == 0) { 61 | THCudaCheck(cudaGetLastError()); 62 | return features_diff; 63 | } 64 | 65 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 66 | PrRoIPoolingBackwardGpu( 67 | stream, 68 | features.data(), rois.data(), output.data(), output_diff.data(), 69 | features_diff.data(), 70 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 71 | top_count, bottom_count 72 | ); 73 | 74 | THCudaCheck(cudaGetLastError()); 75 | return features_diff; 76 | } 77 | 78 | at::Tensor prroi_pooling_coor_backward_cuda( 79 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 80 | int pooled_height, int pooled_width, float spatial_scale) { 81 | 82 | auto coor_diff = at::zeros_like(rois); 83 | 84 | int nr_rois = rois.size(0); 85 | int nr_channels = features.size(1); 86 | int height = features.size(2); 87 | int width = features.size(3); 88 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 89 | int bottom_count = nr_rois * 5; 90 | 91 | if (output.numel() == 0) { 92 | THCudaCheck(cudaGetLastError()); 93 | return coor_diff; 94 | } 95 | 96 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 97 | PrRoIPoolingCoorBackwardGpu( 98 | stream, 99 | features.data(), rois.data(), output.data(), output_diff.data(), 100 | coor_diff.data(), 101 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 102 | top_count, bottom_count 103 | ); 104 | 105 | THCudaCheck(cudaGetLastError()); 106 | return coor_diff; 107 | } 108 | 109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 110 | m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward"); 111 | m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward"); 112 | m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor"); 113 | } 114 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.h 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale); 12 | 13 | int prroi_pooling_backward_cuda( 14 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 15 | int pooled_height, int pooled_width, float spatial_scale 16 | ); 17 | 18 | int prroi_pooling_coor_backward_cuda( 19 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 20 | int pooled_height, int pooled_width, float spatial_scal 21 | ); 22 | 23 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cu: -------------------------------------------------------------------------------- 1 | ../../../src/prroi_pooling_gpu_impl.cu -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | ../../../src/prroi_pooling_gpu_impl.cuh -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : test_prroi_pooling2d.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 18/02/2018 6 | # 7 | # This file is part of Jacinle. 8 | 9 | import unittest 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | from jactorch.utils.unittest import TorchTestCase 16 | 17 | from prroi_pool import PrRoIPool2D 18 | 19 | 20 | class TestPrRoIPool2D(TorchTestCase): 21 | def test_forward(self): 22 | pool = PrRoIPool2D(7, 7, spatial_scale=0.5) 23 | features = torch.rand((4, 16, 24, 32)).cuda() 24 | rois = torch.tensor([ 25 | [0, 0, 0, 14, 14], 26 | [1, 14, 14, 28, 28], 27 | ]).float().cuda() 28 | 29 | out = pool(features, rois) 30 | out_gold = F.avg_pool2d(features, kernel_size=2, stride=1) 31 | 32 | self.assertTensorClose(out, torch.stack(( 33 | out_gold[0, :, :7, :7], 34 | out_gold[1, :, 7:14, 7:14], 35 | ), dim=0)) 36 | 37 | def test_backward_shapeonly(self): 38 | pool = PrRoIPool2D(2, 2, spatial_scale=0.5) 39 | 40 | features = torch.rand((4, 2, 24, 32)).cuda() 41 | rois = torch.tensor([ 42 | [0, 0, 0, 4, 4], 43 | [1, 14, 14, 18, 18], 44 | ]).float().cuda() 45 | features.requires_grad = rois.requires_grad = True 46 | out = pool(features, rois) 47 | 48 | loss = out.sum() 49 | loss.backward() 50 | 51 | self.assertTupleEqual(features.size(), features.grad.size()) 52 | self.assertTupleEqual(rois.size(), rois.grad.size()) 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /lib/models/online/external/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /lib/models/online/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/models/online/layers/__init__.py -------------------------------------------------------------------------------- /lib/models/online/layers/activation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class MLU(nn.Module): 7 | r"""MLU activation 8 | """ 9 | def __init__(self, min_val, inplace=False): 10 | super().__init__() 11 | self.min_val = min_val 12 | self.inplace = inplace 13 | 14 | def forward(self, input): 15 | return F.elu(F.leaky_relu(input, 1/self.min_val, inplace=self.inplace), self.min_val, inplace=self.inplace) 16 | 17 | 18 | class LeakyReluPar(nn.Module): 19 | r"""LeakyRelu parametric activation 20 | """ 21 | 22 | def forward(self, x, a): 23 | return (1.0 - a)/2.0 * torch.abs(x) + (1.0 + a)/2.0 * x 24 | 25 | class LeakyReluParDeriv(nn.Module): 26 | r"""Derivative of the LeakyRelu parametric activation, wrt x. 27 | """ 28 | 29 | def forward(self, x, a): 30 | return (1.0 - a)/2.0 * torch.sign(x.detach()) + (1.0 + a)/2.0 31 | 32 | 33 | class BentIdentPar(nn.Module): 34 | r"""BentIdent parametric activation 35 | """ 36 | def __init__(self, b=1.0): 37 | super().__init__() 38 | self.b = b 39 | 40 | def forward(self, x, a): 41 | return (1.0 - a)/2.0 * (torch.sqrt(x*x + 4.0*self.b*self.b) - 2.0*self.b) + (1.0 + a)/2.0 * x 42 | 43 | 44 | class BentIdentParDeriv(nn.Module): 45 | r"""BentIdent parametric activation deriv 46 | """ 47 | def __init__(self, b=1.0): 48 | super().__init__() 49 | self.b = b 50 | 51 | def forward(self, x, a): 52 | return (1.0 - a)/2.0 * (x / torch.sqrt(x*x + 4.0*self.b*self.b)) + (1.0 + a)/2.0 53 | 54 | -------------------------------------------------------------------------------- /lib/models/online/layers/blocks.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | def conv_block(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1, bias=True, 5 | batch_norm=True, relu=True): 6 | layers = [nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 7 | padding=padding, dilation=dilation, bias=bias)] 8 | if batch_norm: 9 | layers.append(nn.BatchNorm2d(out_planes)) 10 | if relu: 11 | layers.append(nn.ReLU(inplace=True)) 12 | return nn.Sequential(*layers) 13 | 14 | 15 | class LinearBlock(nn.Module): 16 | def __init__(self, in_planes, out_planes, input_sz, bias=True, batch_norm=True, relu=True): 17 | super().__init__() 18 | self.linear = nn.Linear(in_planes*input_sz*input_sz, out_planes, bias=bias) 19 | self.bn = nn.BatchNorm2d(out_planes) if batch_norm else None 20 | self.relu = nn.ReLU(inplace=True) if relu else None 21 | 22 | def forward(self, x): 23 | x = self.linear(x.view(x.shape[0], -1)) 24 | if self.bn is not None: 25 | x = self.bn(x.view(x.shape[0], x.shape[1], 1, 1)) 26 | if self.relu is not None: 27 | x = self.relu(x) 28 | return x.view(x.shape[0], -1) -------------------------------------------------------------------------------- /lib/models/online/layers/distance.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class DistanceMap(nn.Module): 7 | """Generate a distance map from a origin center location. 8 | args: 9 | num_bins: Number of bins in the map. 10 | bin_displacement: Displacement of the bins. 11 | """ 12 | def __init__(self, num_bins, bin_displacement=1.0): 13 | super().__init__() 14 | self.num_bins = num_bins 15 | self.bin_displacement = bin_displacement 16 | 17 | def forward(self, center, output_sz): 18 | """Create the distance map. 19 | args: 20 | center: Torch tensor with (y,x) center position. Dims (batch, 2) 21 | output_sz: Size of output distance map. 2-dimensional tuple.""" 22 | 23 | center = center.view(-1,2) 24 | 25 | bin_centers = torch.arange(self.num_bins, dtype=torch.float32, device=center.device).view(1, -1, 1, 1) 26 | 27 | k0 = torch.arange(output_sz[0], dtype=torch.float32, device=center.device).view(1,1,-1,1) 28 | k1 = torch.arange(output_sz[1], dtype=torch.float32, device=center.device).view(1,1,1,-1) 29 | 30 | d0 = k0 - center[:,0].view(-1,1,1,1) 31 | d1 = k1 - center[:,1].view(-1,1,1,1) 32 | 33 | dist = torch.sqrt(d0*d0 + d1*d1) 34 | bin_diff = dist / self.bin_displacement - bin_centers 35 | 36 | bin_val = torch.cat((F.relu(1.0 - torch.abs(bin_diff[:,:-1,:,:]), inplace=True), 37 | (1.0 + bin_diff[:,-1:,:,:]).clamp(0, 1)), dim=1) 38 | 39 | return bin_val 40 | 41 | 42 | -------------------------------------------------------------------------------- /lib/models/online/layers/normalization.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class InstanceL2Norm(nn.Module): 7 | """Instance L2 normalization. 8 | """ 9 | def __init__(self, size_average=True, eps=1e-5, scale=1.0): 10 | super().__init__() 11 | self.size_average = size_average 12 | self.eps = eps 13 | self.scale = scale 14 | 15 | def forward(self, input): 16 | if self.size_average: 17 | return input * (self.scale * ((input.shape[1] * input.shape[2] * input.shape[3]) / ( 18 | torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps)).sqrt()) 19 | else: 20 | return input * (self.scale / (torch.sum((input * input).view(input.shape[0], 1, 1, -1), dim=3, keepdim=True) + self.eps).sqrt()) 21 | 22 | -------------------------------------------------------------------------------- /lib/models/online/layers/transform.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from collections import OrderedDict 5 | 6 | 7 | def interpolate(x, sz): 8 | """Interpolate 4D tensor x to size sz.""" 9 | sz = sz.tolist() if torch.is_tensor(sz) else sz 10 | return F.interpolate(x, sz, mode='bilinear', align_corners=False) if x.shape[-2:] != sz else x 11 | 12 | 13 | class InterpCat(nn.Module): 14 | """Interpolate and concatenate features of different resolutions.""" 15 | 16 | def forward(self, input): 17 | if isinstance(input, (dict, OrderedDict)): 18 | input = list(input.values()) 19 | 20 | output_shape = None 21 | for x in input: 22 | if output_shape is None or output_shape[0] > x.shape[-2]: 23 | output_shape = x.shape[-2:] 24 | 25 | return torch.cat([interpolate(x, output_shape) for x in input], dim=-3) 26 | -------------------------------------------------------------------------------- /lib/models/siamfc.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Houwen Peng and Zhipeng Zhang 5 | # Email: houwen.peng@microsoft.com 6 | # Main Results: see readme.md 7 | # ------------------------------------------------------------------------------ 8 | 9 | import torch 10 | import torch.nn as nn 11 | from torch.autograd import Variable 12 | 13 | 14 | class SiamFC_(nn.Module): 15 | def __init__(self): 16 | super(SiamFC_, self).__init__() 17 | self.features = None 18 | self.connect_model = None 19 | self.zf = None # for online tracking 20 | self.criterion = nn.BCEWithLogitsLoss() 21 | 22 | def feature_extractor(self, x): 23 | return self.features(x) 24 | 25 | def connector(self, template_feature, search_feature): 26 | pred_score = self.connect_model(template_feature, search_feature) 27 | return pred_score 28 | 29 | def _cls_loss(self, pred, label, select): 30 | if len(select.size()) == 0: return 0 31 | pred = torch.index_select(pred, 0, select) 32 | label = torch.index_select(label, 0, select) 33 | return self.criterion(pred, label) # the same as tf version 34 | 35 | def _weighted_BCE(self, pred, label): 36 | pred = pred.view(-1) 37 | label = label.view(-1) 38 | pos = Variable(label.data.eq(1).nonzero().squeeze()).cuda() 39 | neg = Variable(label.data.eq(0).nonzero().squeeze()).cuda() 40 | 41 | loss_pos = self._cls_loss(pred, label, pos) 42 | loss_neg = self._cls_loss(pred, label, neg) 43 | return loss_pos * 0.5 + loss_neg * 0.5 44 | 45 | def template(self, z): 46 | self.zf = self.feature_extractor(z) 47 | 48 | def track(self, x): 49 | xf = self.feature_extractor(x) 50 | score = self.connector(self.zf, xf) 51 | return score 52 | 53 | def forward(self, template, search, label=None): 54 | zf = self.feature_extractor(template) 55 | xf = self.feature_extractor(search) 56 | score = self.connector(zf, xf) 57 | if self.training: 58 | return self._weighted_BCE(score, label) 59 | else: 60 | raise ValueError('forward is only used for training.') 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /lib/online/__init__.py: -------------------------------------------------------------------------------- 1 | from .tensorlist import TensorList 2 | from .tensordict import TensorDict 3 | from .loading import load_network 4 | -------------------------------------------------------------------------------- /lib/online/base_actor.py: -------------------------------------------------------------------------------- 1 | from online import TensorDict 2 | import torch.nn as nn 3 | 4 | 5 | class BaseActor: 6 | """ Base class for actor. The actor class handles the passing of the data through the network 7 | and calculation the loss""" 8 | def __init__(self, net, objective): 9 | """ 10 | args: 11 | net - The network to train 12 | objective - The loss function 13 | """ 14 | self.net = net 15 | self.objective = objective 16 | 17 | def __call__(self, data: TensorDict): 18 | """ Called in each training iteration. Should pass in input data through the network, calculate the loss, and 19 | return the training stats for the input data 20 | args: 21 | data - A TensorDict containing all the necessary data blocks. 22 | 23 | returns: 24 | loss - loss for the input data 25 | stats - a dict containing detailed losses 26 | """ 27 | raise NotImplementedError 28 | 29 | def to(self, device): 30 | """ Move the network to device 31 | args: 32 | device - device to use. 'cpu' or 'cuda' 33 | """ 34 | self.net.to(device) 35 | 36 | def train(self, mode=True): 37 | """ Set whether the network is in train mode. 38 | args: 39 | mode (True) - Bool specifying whether in training mode. 40 | """ 41 | self.net.train(mode) 42 | 43 | 44 | # fix backbone again 45 | # fix the first three blocks 46 | print('======> fix backbone again <=======') 47 | for param in self.net.feature_extractor.parameters(): 48 | param.requires_grad = False 49 | for m in self.net.feature_extractor.modules(): 50 | if isinstance(m, nn.BatchNorm2d): 51 | m.eval() 52 | 53 | for layer in ['layeronline']: 54 | for param in getattr(self.net.feature_extractor.features.features, layer).parameters(): 55 | param.requires_grad = True 56 | for m in getattr(self.net.feature_extractor.features.features, layer).modules(): 57 | if isinstance(m, nn.BatchNorm2d): 58 | m.train() 59 | 60 | print('double check trainable') 61 | self.check_trainable(self.net) 62 | 63 | 64 | 65 | def eval(self): 66 | """ Set network to eval mode""" 67 | self.train(False) 68 | 69 | def check_trainable(self, model): 70 | """ 71 | print trainable params info 72 | """ 73 | trainable_params = [p for p in model.parameters() if p.requires_grad] 74 | print('trainable params:') 75 | for name, param in model.named_parameters(): 76 | if param.requires_grad: 77 | print(name) 78 | 79 | assert len(trainable_params) > 0, 'no trainable parameters' 80 | -------------------------------------------------------------------------------- /lib/online/loading.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import sys 4 | from pathlib import Path 5 | import importlib 6 | from online.model_constructor import NetConstructor 7 | 8 | def check_keys(model, pretrained_state_dict): 9 | ckpt_keys = set(pretrained_state_dict.keys()) 10 | model_keys = set(model.state_dict().keys()) 11 | used_pretrained_keys = model_keys & ckpt_keys 12 | unused_pretrained_keys = ckpt_keys - model_keys 13 | missing_keys = model_keys - ckpt_keys 14 | 15 | print('missing keys:{}'.format(missing_keys)) 16 | 17 | print('=========================================') 18 | # clean it to no batch_tracked key words 19 | unused_pretrained_keys = [k for k in unused_pretrained_keys if 'num_batches_tracked' not in k] 20 | 21 | print('unused checkpoint keys:{}'.format(unused_pretrained_keys)) 22 | # print('used keys:{}'.format(used_pretrained_keys)) 23 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 24 | return True 25 | 26 | def load_pretrain(model, pretrained_dict): 27 | 28 | device = torch.cuda.current_device() 29 | 30 | check_keys(model, pretrained_dict) 31 | model.load_state_dict(pretrained_dict, strict=False) 32 | return model 33 | 34 | 35 | def load_network(ckpt_path=None, constructor_fun_name='online_resnet18', constructor_module='lib.models.online.bbreg.online'): 36 | 37 | # Load network 38 | checkpoint_dict = torch.load(ckpt_path) # key: net 39 | 40 | # get model structure from constructor 41 | net_constr = NetConstructor(fun_name=constructor_fun_name, fun_module=constructor_module) 42 | # Legacy networks before refactoring 43 | 44 | net = net_constr.get() 45 | 46 | net = load_pretrain(net, checkpoint_dict['net']) 47 | 48 | return net 49 | 50 | 51 | def load_weights(net, path, strict=True): 52 | checkpoint_dict = torch.load(path) 53 | weight_dict = checkpoint_dict['net'] 54 | net.load_state_dict(weight_dict, strict=strict) 55 | return net 56 | 57 | 58 | def torch_load_legacy(path): 59 | """Load network with legacy environment.""" 60 | 61 | # Setup legacy env (for older networks) 62 | _setup_legacy_env() 63 | 64 | # Load network 65 | checkpoint_dict = torch.load(path) 66 | 67 | # Cleanup legacy 68 | _cleanup_legacy_env() 69 | 70 | return checkpoint_dict 71 | 72 | 73 | def _setup_legacy_env(): 74 | importlib.import_module('ltr') 75 | sys.modules['dlframework'] = sys.modules['ltr'] 76 | sys.modules['dlframework.common'] = sys.modules['ltr'] 77 | for m in ('model_constructor', 'stats', 'settings', 'local'): 78 | importlib.import_module('ltr.admin.'+m) 79 | sys.modules['dlframework.common.utils.'+m] = sys.modules['ltr.admin.'+m] 80 | 81 | 82 | def _cleanup_legacy_env(): 83 | del_modules = [] 84 | for m in sys.modules.keys(): 85 | if m.startswith('dlframework'): 86 | del_modules.append(m) 87 | for m in del_modules: 88 | del sys.modules[m] 89 | -------------------------------------------------------------------------------- /lib/online/model_constructor.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | import importlib 3 | 4 | 5 | def model_constructor(f): 6 | """ Wraps the function 'f' which returns the network. An extra field 'constructor' is added to the network returned 7 | by 'f'. This field contains an instance of the 'NetConstructor' class, which contains the information needed to 8 | re-construct the network, such as the name of the function 'f', the function arguments etc. Thus, the network can 9 | be easily constructed from a saved checkpoint by calling NetConstructor.get() function. 10 | """ 11 | @wraps(f) 12 | def f_wrapper(*args, **kwds): 13 | net_constr = NetConstructor(f.__name__, f.__module__, args, kwds) 14 | output = f(*args, **kwds) 15 | if isinstance(output, (tuple, list)): 16 | # Assume first argument is the network 17 | output[0].constructor = net_constr 18 | else: 19 | output.constructor = net_constr 20 | return output 21 | return f_wrapper 22 | 23 | 24 | class NetConstructor: 25 | """ Class to construct networks. Takes as input the function name (e.g. atom_resnet18), the name of the module 26 | which contains the network function (e.g. ltr.models.bbreg.atom) and the arguments for the network 27 | function. The class object can then be stored along with the network weights to re-construct the network.""" 28 | def __init__(self, fun_name, fun_module): 29 | """ 30 | args: 31 | fun_name - The function which returns the network 32 | fun_module - the module which contains the network function 33 | args - arguments which are passed to the network function 34 | kwds - arguments which are passed to the network function 35 | """ 36 | self.fun_name = fun_name 37 | self.fun_module = fun_module 38 | #self.args = args 39 | #self.kwds = kwds 40 | 41 | def get(self): 42 | """ Rebuild the network by calling the network function with the correct arguments. """ 43 | net_module = importlib.import_module(self.fun_module) 44 | net_fun = getattr(net_module, self.fun_name) 45 | return net_fun() 46 | -------------------------------------------------------------------------------- /lib/online/operation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from online.tensorlist import tensor_operation, TensorList 4 | 5 | 6 | @tensor_operation 7 | def conv2d(input: torch.Tensor, weight: torch.Tensor, bias: torch.Tensor = None, stride=1, padding=0, dilation=1, groups=1, mode=None): 8 | """Standard conv2d. Returns the input if weight=None.""" 9 | 10 | if weight is None: 11 | return input 12 | 13 | ind = None 14 | if mode is not None: 15 | if padding != 0: 16 | raise ValueError('Cannot input both padding and mode.') 17 | if mode == 'same': 18 | padding = (weight.shape[2]//2, weight.shape[3]//2) 19 | if weight.shape[2] % 2 == 0 or weight.shape[3] % 2 == 0: 20 | ind = (slice(-1) if weight.shape[2] % 2 == 0 else slice(None), 21 | slice(-1) if weight.shape[3] % 2 == 0 else slice(None)) 22 | elif mode == 'valid': 23 | padding = (0, 0) 24 | elif mode == 'full': 25 | padding = (weight.shape[2]-1, weight.shape[3]-1) 26 | else: 27 | raise ValueError('Unknown mode for padding.') 28 | 29 | out = F.conv2d(input, weight, bias=bias, stride=stride, padding=padding, dilation=dilation, groups=groups) 30 | if ind is None: 31 | return out 32 | return out[:,:,ind[0],ind[1]] 33 | 34 | 35 | @tensor_operation 36 | def conv1x1(input: torch.Tensor, weight: torch.Tensor): 37 | """Do a convolution with a 1x1 kernel weights. Implemented with matmul, which can be faster than using conv.""" 38 | 39 | if weight is None: 40 | return input 41 | 42 | return torch.matmul(weight.view(weight.shape[0], weight.shape[1]), 43 | input.view(input.shape[0], input.shape[1], -1)).view(input.shape[0], weight.shape[0], input.shape[2], input.shape[3]) 44 | -------------------------------------------------------------------------------- /lib/online/optim.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import sys 3 | from online import optimization, TensorList, operation 4 | import math 5 | 6 | 7 | class FactorizedConvProblem(optimization.L2Problem): 8 | def __init__(self, training_samples: TensorList, y: TensorList, filter_reg: torch.Tensor, projection_reg, params, sample_weights: TensorList, 9 | projection_activation, response_activation): 10 | self.training_samples = training_samples 11 | self.y = y 12 | self.filter_reg = filter_reg 13 | self.sample_weights = sample_weights 14 | self.params = params 15 | self.projection_reg = projection_reg 16 | self.projection_activation = projection_activation 17 | self.response_activation = response_activation 18 | 19 | self.diag_M = self.filter_reg.concat(projection_reg) 20 | 21 | def __call__(self, x: TensorList): 22 | """ 23 | Compute residuals 24 | :param x: [filters, projection_matrices] 25 | :return: [data_terms, filter_regularizations, proj_mat_regularizations] 26 | """ 27 | filter = x[:len(x)//2] # w2 in paper 28 | P = x[len(x)//2:] # w1 in paper 29 | 30 | # Do first convolution 31 | compressed_samples = operation.conv1x1(self.training_samples, P).apply(self.projection_activation) 32 | 33 | # Do second convolution 34 | residuals = operation.conv2d(compressed_samples, filter, mode='same').apply(self.response_activation) 35 | 36 | # Compute data residuals 37 | residuals = residuals - self.y 38 | 39 | residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals 40 | 41 | # Add regularization for projection matrix 42 | residuals.extend(self.filter_reg.apply(math.sqrt) * filter) 43 | 44 | # Add regularization for projection matrix 45 | residuals.extend(self.projection_reg.apply(math.sqrt) * P) 46 | 47 | return residuals 48 | 49 | 50 | def ip_input(self, a: TensorList, b: TensorList): 51 | num = len(a) // 2 # Number of filters 52 | a_filter = a[:num] 53 | b_filter = b[:num] 54 | a_P = a[num:] 55 | b_P = b[num:] 56 | 57 | # Filter inner product 58 | # ip_out = a_filter.reshape(-1) @ b_filter.reshape(-1) 59 | ip_out = operation.conv2d(a_filter, b_filter).view(-1) 60 | 61 | # Add projection matrix part 62 | # ip_out += a_P.reshape(-1) @ b_P.reshape(-1) 63 | ip_out += operation.conv2d(a_P.view(1,-1,1,1), b_P.view(1,-1,1,1)).view(-1) 64 | 65 | # Have independent inner products for each filter 66 | return ip_out.concat(ip_out.clone()) 67 | 68 | def M1(self, x: TensorList): 69 | return x / self.diag_M 70 | 71 | 72 | class ConvProblem(optimization.L2Problem): 73 | def __init__(self, training_samples: TensorList, y: TensorList, filter_reg: torch.Tensor, sample_weights: TensorList, response_activation): 74 | self.training_samples = training_samples 75 | self.y = y 76 | self.filter_reg = filter_reg 77 | self.sample_weights = sample_weights 78 | self.response_activation = response_activation 79 | 80 | def __call__(self, x: TensorList): 81 | """ 82 | Compute residuals 83 | :param x: [filters] 84 | :return: [data_terms, filter_regularizations] 85 | """ 86 | # Do convolution and compute residuals 87 | residuals = operation.conv2d(self.training_samples, x, mode='same').apply(self.response_activation) 88 | residuals = residuals - self.y 89 | 90 | residuals = self.sample_weights.sqrt().view(-1, 1, 1, 1) * residuals 91 | 92 | # Add regularization for projection matrix 93 | residuals.extend(self.filter_reg.apply(math.sqrt) * x) 94 | 95 | return residuals 96 | 97 | def ip_input(self, a: TensorList, b: TensorList): 98 | # return a.reshape(-1) @ b.reshape(-1) 99 | # return (a * b).sum() 100 | return operation.conv2d(a, b).view(-1) 101 | -------------------------------------------------------------------------------- /lib/online/tensordict.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch 3 | 4 | 5 | class TensorDict(OrderedDict): 6 | """Container mainly used for dicts of torch tensors. Extends OrderedDict with pytorch functionality.""" 7 | 8 | def concat(self, other): 9 | """Concatenates two dicts without copying internal data.""" 10 | return TensorDict(self, **other) 11 | 12 | def copy(self): 13 | return TensorDict(super(TensorDict, self).copy()) 14 | 15 | def __getattr__(self, name): 16 | if not hasattr(torch.Tensor, name): 17 | raise AttributeError('\'TensorDict\' object has not attribute \'{}\''.format(name)) 18 | 19 | def apply_attr(*args, **kwargs): 20 | return TensorDict({n: getattr(e, name)(*args, **kwargs) if hasattr(e, name) else e for n, e in self.items()}) 21 | return apply_attr 22 | 23 | def attribute(self, attr: str, *args): 24 | return TensorDict({n: getattr(e, attr, *args) for n, e in self.items()}) 25 | 26 | def apply(self, fn, *args, **kwargs): 27 | return TensorDict({n: fn(e, *args, **kwargs) for n, e in self.items()}) 28 | 29 | @staticmethod 30 | def _iterable(a): 31 | return isinstance(a, (TensorDict, list)) 32 | 33 | -------------------------------------------------------------------------------- /lib/online/tracking.py: -------------------------------------------------------------------------------- 1 | from .base_actor import BaseActor 2 | 3 | 4 | class ONLINEActor(BaseActor): 5 | """Actor for training the ONLINE network.""" 6 | def __init__(self, net, objective, loss_weight=None): 7 | super().__init__(net, objective) 8 | if loss_weight is None: 9 | loss_weight = {'iou': 1.0, 'test_clf': 1.0} 10 | self.loss_weight = loss_weight 11 | 12 | def __call__(self, data): 13 | """ 14 | args: 15 | data - The input data, should contain the fields 'train_images', 'test_images', 'train_anno', 16 | 'test_proposals', 'proposal_iou' and 'test_label'. 17 | 18 | returns: 19 | loss - the training loss 20 | stats - dict containing detailed losses 21 | """ 22 | # Run network 23 | target_scores = self.net(train_imgs=data['train_images'], 24 | test_imgs=data['test_images'], 25 | train_bb=data['train_anno'], 26 | test_proposals=data['test_proposals']) 27 | 28 | # Classification losses for the different optimization iterations 29 | clf_losses_test = [self.objective['test_clf'](s, data['test_label'], data['test_anno']) for s in target_scores] 30 | 31 | # Loss of the final filter 32 | clf_loss_test = clf_losses_test[-1] 33 | loss_target_classifier = self.loss_weight['test_clf'] * clf_loss_test 34 | 35 | # Loss for the initial filter iteration 36 | loss_test_init_clf = 0 37 | if 'test_init_clf' in self.loss_weight.keys(): 38 | loss_test_init_clf = self.loss_weight['test_init_clf'] * clf_losses_test[0] 39 | 40 | # Loss for the intermediate filter iterations 41 | loss_test_iter_clf = 0 42 | if 'test_iter_clf' in self.loss_weight.keys(): 43 | test_iter_weights = self.loss_weight['test_iter_clf'] 44 | if isinstance(test_iter_weights, list): 45 | loss_test_iter_clf = sum([a*b for a, b in zip(test_iter_weights, clf_losses_test[1:-1])]) 46 | else: 47 | loss_test_iter_clf = (test_iter_weights / (len(clf_losses_test) - 2)) * sum(clf_losses_test[1:-1]) 48 | 49 | # Total loss 50 | # loss = loss_iou + loss_target_classifier + loss_test_init_clf + loss_test_iter_clf 51 | loss = loss_target_classifier + loss_test_init_clf + loss_test_iter_clf 52 | 53 | # Log stats 54 | stats = {'Loss/total': loss.item(), 55 | # 'Loss/iou': loss_iou.item(), 56 | 'Loss/iou': 0, 57 | 'Loss/target_clf': loss_target_classifier.item()} 58 | if 'test_init_clf' in self.loss_weight.keys(): 59 | stats['Loss/test_init_clf'] = loss_test_init_clf.item() 60 | if 'test_iter_clf' in self.loss_weight.keys(): 61 | stats['Loss/test_iter_clf'] = loss_test_iter_clf.item() 62 | stats['ClfTrain/test_loss'] = clf_loss_test.item() 63 | if len(clf_losses_test) > 0: 64 | stats['ClfTrain/test_init_loss'] = clf_losses_test[0].item() 65 | if len(clf_losses_test) > 2: 66 | stats['ClfTrain/test_iter_loss'] = sum(clf_losses_test[1:-1]).item() / (len(clf_losses_test) - 2) 67 | 68 | return loss, stats 69 | -------------------------------------------------------------------------------- /lib/tutorial/Ocean/ocean.md: -------------------------------------------------------------------------------- 1 | # Ocean tutorial 2 | ## Testing 3 | 4 | We assume the root path is $TracKit, e.g. `/home/zpzhang/TracKit` 5 | ### Set up environment 6 | 7 | ``` 8 | cd $TracKit/lib/tutorial 9 | bash install.sh $conda_path TracKit 10 | cd $TracKit 11 | conda activate TracKit 12 | python setup.py develop 13 | ``` 14 | `$conda_path` denotes your anaconda path, e.g. `/home/zpzhang/anaconda3` 15 | 16 | - **[Optional]** Install TensorRT according to the [tutorial](../install_trt.md). 17 | 18 | **Note:** we perform TensorRT evaluation on RTX2080 Ti and CUDA10.0. If you fail to install it, please use pytorch version. 19 | 20 | 21 | 22 | ### Prepare data and models 23 | 1. Download the pretrained [PyTorch model](https://drive.google.com/drive/folders/1XU5wmyC7MsI6C_9Lv-UH1mwDIh57FFf8?usp=sharing) and [TensorRT model](https://github.com/researchmm/TracKit/releases/tag/tensorrt) to `$TracKit/snapshot`. 24 | 2. Download [json](https://drive.google.com/drive/folders/1kYX_c8rw7HMW0e5V400vaLy9huiYvDHE?usp=sharing) files of testing data and put them in `$TracKit/dataset`. 25 | 3. Download testing data e.g. VOT2019 and put them in `$TracKit/dataset`. Please download each data from their official websites, and the directories should be named like `VOT2019`, `OTB2015`, `GOT10K`, `LASOT`. 26 | 27 | ### Testing 28 | In root path `$TracKit`, 29 | 30 | ``` 31 | python tracking/test_ocean.py --arch Ocean --resume snapshot/OceanV.pth --dataset VOT2019 32 | ``` 33 | ### Evaluation 34 | ``` 35 | python lib/eval_toolkit/bin/eval.py --dataset_dir dataset --dataset VOT2019 --tracker_result_dir result/VOT2019 --trackers Ocean 36 | ``` 37 | You may test other datasets with our code. Please corresponds the provided pre-trained model `--resume` and dataset `--dataset`. See [ocean_model.txt](https://drive.google.com/file/d/1T2QjyxN4movpFtpzCH8xHHX5_Dz7G5Y6/view?usp=sharing) for their correspondences. 38 | 39 | 40 | ### TensorRT toy 41 | Testing video: `twinnings` in OTB2015 (472 frames) 42 | Testing GPU: `RTX2080Ti` 43 | 44 | - TensorRT (**149fps**) 45 | ``` 46 | python tracking/test_ocean.py --arch OceanTRT --resume snapshot/OceanV.pth --dataset OTB2015 --video twinnings 47 | ``` 48 | 49 | - Pytorch (**68fps**) 50 | ``` 51 | python tracking/test_ocean.py --arch Ocean --resume snapshot/OceanV.pth --dataset OTB2015 --video twinnings 52 | ``` 53 | 54 | **Note:** 55 | - TensorRT version of Ocean only supports 255 input. 56 | - Current TensorRT does not well support some operations. We would continuously renew it following official TensorRT updating. If you want to test on the benchmark, please us the Pytorch version. 57 | - If you want to use our code in a realistic product, our TensorRT code may help you. 58 | 59 | 60 | 61 | :cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud: 62 | ## Training 63 | #### prepare data 64 | - Please download training data from [GoogleDrive](https://drive.google.com/drive/folders/1ehjVhg6ewdWSWt709zd1TkjWF7UJlQlq?usp=sharing) or [BaiduDrive(urxq)](https://pan.baidu.com/s/1jGPEJieir5OWqCmibV3yrQ), and then put them in `$TracKit/data` 65 | - You could also refer to scripts in `$TracKit/lib/dataset/crop` to process your custom data. 66 | - For splited files in BaiduDrive, please use `cat got10k.tar.* | tar -zxv` to merge and unzip. 67 | 68 | 69 | #### prepare pretrained model 70 | Please download the pretrained model on ImageNet [here](https://drive.google.com/drive/folders/1ctoxaPiS9qinhmN_bl5z3VNhYnrhl99t?usp=sharing), and then put it in `$TracKit/pretrain`. 71 | 72 | #### modify settings 73 | Please modify the training settings in `$TracKit/experiments/train/Ocean.yaml`. The default number of GPU and batch size in paper are 8 and 32 respectively. 74 | 75 | #### run 76 | In root path $TracKit, 77 | ``` 78 | python tracking/onekey.py 79 | ``` 80 | This script integrates **train**, **epoch test** and **tune**. It is suggested to run them one by one when you are not familiar with our whole framework (modify the key `ISTRUE` in `$TracKit/experiments/train/Ocean.yaml`). When you know this framework well, simply run this one-key script. VOT2018 is much more sensitive than other datasets, thus I would suggest you tune 4000-5000 groups for it. For other datasets like VOT2019/OTB, 1500-2000 may be enough. For truely large dataset like LASOT, I would suggest you tune with grid search (only selecting epoch and tuning `window_influence` is enough for LASOT in my experience.) 81 | -------------------------------------------------------------------------------- /lib/tutorial/OceanPlus/oceanplus.md: -------------------------------------------------------------------------------- 1 | # OceanPlus tutorial 2 | ## Testing 3 | 4 | We assume the root path is $TracKit, e.g. `/home/zpzhang/TracKit` 5 | 6 | ### Set up environment 7 | 8 | ``` 9 | cd $TracKit/lib/tutorial 10 | bash install.sh $conda_path TracKit 11 | cd $TracKit 12 | conda activate TracKit 13 | python setup.py develop 14 | ``` 15 | `$conda_path` denotes your anaconda path, e.g. `/home/zpzhang/anaconda3` 16 | 17 | 18 | **Note:** all the results for VOT2020 in the paper (including other methods) are performed with `vot-toolkit=0.2.0`. Please use the same env to reproduce our results. 19 | 20 | 21 | ### Prepare data and models 22 | 23 | 1. Following the official [guidelines](https://www.votchallenge.net/howto/tutorial_python.html) to set up VOT workspace. 24 | 25 | 2. Download from [GoogleDrive](https://drive.google.com/drive/folders/1_uagYRFpQmYoWAc0oeiAY49gHwQxztrN?usp=sharing) and put them in `$TracKit/snapshot` 26 | 27 | 28 | ### Testing 29 | 30 | #### For VOT2020 31 | 32 | - **Note: the results are 0.444/0.451 for single stage (MSS) and multi-stage (MMS) models, respectively. This is a bit higher than reproted.** 33 | 34 | 1. Modify scripts 35 | 36 | - Set the model path in line81 of `$TracKit/tracking/vot_wrap.py` or `$TracKit/tracking/vot_wrap_mms.py`. 37 | 38 | - for model without MMS network (faster): 39 | ``` 40 | set running script in vot2020 workspace (i.e. trackers.ini) to `vot_wrap.py` 41 | ``` 42 | - for model with MMS network (slower): 43 | ``` 44 | set running script in vot2020 workspace (i.e. trackers.ini) to `vot_wrap_mms.py` 45 | ``` 46 | - Note: We provided a reference of `trackers.ini` in `$TracKit/trackers.ini`. Please find more running guidelines in VOT official [web](https://www.votchallenge.net/howto/tutorial_python.html). 47 | 48 | 2. run 49 | ``` 50 | CUDA_VISIBLE_DEVICES=0 vot evaluate --workspace $workspace_path OceanPlus 51 | ``` 52 | - Note: If you only want to test "baseline" track in vot for saving time, please remove lines 10-21 in `$root/anaconda3/envs/TracKit/lib/python3.7/site-packages/vot/stack/vot2020.yaml`. 53 | 54 | 55 | 3. evaluate 56 | ``` 57 | vot analysis --workspace $workspace_path OceanPlus --output json 58 | ``` 59 | 60 | 61 | We also provided the trackers submitted to VOT2020 challenge, i.e. [[OceanPlus]](https://drive.google.com/file/d/1DNDZshPed_fcl1DB2lKiOU1bjYC_dxtp/view?usp=sharing), [[OceanPlus-Online]](https://drive.google.com/file/d/1UahJTVPfV0gcqKlBEFc6nwIaqNhyjKQQ/view?usp=sharing), [[OceanPlus-Online-TRT]](https://drive.google.com/file/d/1pdrgyx6XKzN4b3Cyplnr5bcB4TilRS1y/view?usp=sharing). 62 | 63 | #### For VOS 64 | 1. prepare data 65 | Download dataset from DAVIS, and then 66 | ``` 67 | ln -sfb $path_to_DAVIS-trainval $TracKit/dataset/DAVIS 68 | ``` 69 | 70 | 2. run 71 | ``` 72 | CUDA_VISIBLE_DEVICES=0 python tracking/test_oceanplus.py --arch OceanPlus --mms True --dataset DAVIS2016 --resume snapshot/OceanPlusMMS.pth 73 | ``` 74 | 75 | 3. evaluate 76 | ``` 77 | python lib/core/eval_davis.py --dataset DAVIS2016 --num_threads 1 --datapath dataset/DAVIS 78 | ``` 79 | 80 | :cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud::cloud: 81 | 82 | The training code will be released after accepted. Thanks for your interest! 83 | 84 | 85 | -------------------------------------------------------------------------------- /lib/tutorial/SiamDW/siamdw.md: -------------------------------------------------------------------------------- 1 | # SiamDW tutorial 2 | ## Testing 3 | 4 | We assume the root path is $TracKit, e.g. `/home/zpzhang/TracKit` 5 | ### Set up environment 6 | Please follow [readme of Ocean](../Ocean/ocean.md) to install the environment. 7 | 8 | ### Prepare data and models 9 | 1. Download the pretrained [PyTorch model](https://drive.google.com/file/d/1SzIql02jJ6Id1k0M6f-zjUA3RgAm6E5U/view?usp=sharing) to `$TracKit/snapshot`. 10 | 2. Download [json](https://drive.google.com/open?id=1S-RkzyMVRFWueWW91NmZldUJuDyhGdp1) files of testing data and put thme in `$TracKit/dataset`. 11 | 3. Download testing data e.g. VOT2017 and put them in `$TracKit/dataset`. 12 | 13 | ### Testing 14 | In root path `$TracKit`, 15 | ``` 16 | python tracking/test_siamdw.py --arch SiamDW --resume snapshot/siamdw_res22w.pth --dataset VOT2017 17 | ``` 18 | 19 | 20 | ### Training 21 | In root path `$TracKit`, 22 | 1. Download pretrain model from [here](https://drive.google.com/file/d/1wXyW82idctCd4FkqKxvuWsL707joEIeI/view?usp=sharing) and put it in `pretrain` (named with `pretrain.model`). 23 | 24 | 2. modify `experiments/train/SiamDW.yaml` according to your needs. (pls use GOT10K with 20w pairs each epoch in my opinion) 25 | ``` 26 | python tracking/train_siamdw.py 27 | ``` 28 | 29 | Then, pls follow the `epoch testing` and `tuning` as in Ocean. 30 | -------------------------------------------------------------------------------- /lib/tutorial/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -ne 2 ]; then 4 | echo "ERROR! Illegal number of parameters. Usage: bash install.sh conda_install_path environment_name" 5 | exit 0 6 | fi 7 | 8 | conda_install_path=$1 9 | conda_env_name=$2 10 | 11 | source $conda_install_path/etc/profile.d/conda.sh 12 | echo "****************** Creating conda environment ${conda_env_name} python=3.7 ******************" 13 | conda create -y -n $conda_env_name python=3.7 14 | 15 | echo "" 16 | echo "" 17 | echo "****************** Activating conda environment ${conda_env_name} ******************" 18 | conda activate $conda_env_name 19 | 20 | echo "" 21 | echo "" 22 | echo "****************** Installing pytorch with cuda10 ******************" 23 | conda install -y pytorch==1.1.0 torchvision==0.3.0 cudatoolkit=10.0 -c pytorch 24 | 25 | echo "" 26 | echo "" 27 | echo "****************** Installing matplotlib 2.2.2 ******************" 28 | conda install -y matplotlib=2.2.2 29 | 30 | echo "" 31 | echo "" 32 | echo "****************** Installing pandas ******************" 33 | conda install -y pandas 34 | 35 | echo "" 36 | echo "" 37 | echo "****************** Installing opencv ******************" 38 | pip install opencv-python 39 | 40 | echo "" 41 | echo "" 42 | echo "****************** Installing tensorboardX ******************" 43 | pip install tensorboardX 44 | 45 | echo "" 46 | echo "" 47 | echo "****************** Installing cython ******************" 48 | conda install -y cython 49 | 50 | 51 | echo "" 52 | echo "" 53 | echo "****************** Installing skimage ******************" 54 | pip install scikit-image 55 | 56 | 57 | 58 | echo "" 59 | echo "" 60 | echo "****************** Installing pillow ******************" 61 | pip install 'pillow<7.0.0' 62 | 63 | echo "" 64 | echo "" 65 | echo "****************** Installing scipy ******************" 66 | pip install scipy 67 | 68 | echo "" 69 | echo "" 70 | echo "****************** Installing shapely ******************" 71 | pip install shapely 72 | 73 | echo "" 74 | echo "" 75 | echo "****************** Installing easydict ******************" 76 | pip install easydict 77 | 78 | echo "" 79 | echo "" 80 | echo "****************** Installing jpeg4py python wrapper ******************" 81 | pip install jpeg4py 82 | pip install mpi4py 83 | pip install ray==0.8.7 84 | pip install hyperopt 85 | 86 | 87 | echo "" 88 | echo "" 89 | echo "****************** Installing vot python toolkit ******************" 90 | pip install git+https://github.com/votchallenge/vot-toolkit-python@7a1b807df3d64ea310c554e9f487f1e5f53bf249 91 | 92 | echo "****************** Installation complete! ******************" 93 | -------------------------------------------------------------------------------- /lib/tutorial/install_trt.md: -------------------------------------------------------------------------------- 1 | # Install TensorRT 2 | We install TensorRT on RTX2080Ti with CUDA10.0. If you fail to install it, please use pytorch version. 3 | 4 | 1) install pycuda 5 | ``` 6 | export C_INCLUDE_PATH=/usr/local/cuda-10.0/include/:${C_INCLUDE_PATH} 7 | export CPLUS_INCLUDE_PATH=/usr/local/cuda-10.0/include/:${CPLUS_INCLUDE_PATH} 8 | pip install pycuda 9 | ``` 10 | 2) download tensorrt 11 | - Go to [NVIDIA-TENSORRT](https://developer.nvidia.com/tensorrt) and then click `Download Now`. 12 | - Login and download TensorRT7 (please select the version that suits for your platform). We use [TensorRT 7.0.0.11 for Ubuntu 18.04 and CUDA 10.0 tar package](https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/7.0/7.0.0.11/tars/TensorRT-7.0.0.11.Ubuntu-18.04.x86_64-gnu.cuda-10.0.cudnn7.6.tar.gz) in our experiment. 13 | 14 | 3) install 15 | ```bash 16 | tar -zxf TensorRT-7.0.0.11.Ubuntu-18.04.x86_64-gnu.cuda-10.0.cudnn7.6.tar.gz 17 | vim ~/.bashrc 18 | 19 | # Add codes in your file ~/.bashrc 20 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH: 21 | # for example 22 | # export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/data/zpzhang/TensorRT-7.0.0.11/lib 23 | 24 | source ~/.bashrc 25 | conda activate OceanOnlineTRT 26 | cd TensorRT-7.0.0.11/python 27 | # Remember declare your python version=3.7 28 | pip install tensorrt-7.0.0.11-cp37-none-linux_x86_64.whl 29 | cd TensorRT-7.0.0.11/graphsurgeon 30 | pip install graphsurgeon-0.4.1-py2.py3-none-any.whl 31 | ``` 32 | 33 | 4) Verify the installation 34 | ``` 35 | python 36 | import tensorrt 37 | ``` 38 | 39 | 5) Install Torch2trt 40 | ``` 41 | conda activate OceanOnlineTRT 42 | git clone https://github.com/NVIDIA-AI-IOT/torch2trt 43 | cd torch2trt 44 | python setup.py install 45 | ``` 46 | Verify the installation 47 | ``` 48 | python 49 | import torch2trt 50 | ``` 51 | 52 | 53 | 54 | 55 | ### Note 56 | - If you met the error `ImportError: libcudart.so.10.0: cannot open shared object file: No such file or directory`, please run `sudo cp /usr/local/cuda-10.0/lib64/libcudart.so.10.0 /usr/local/lib/libcudart.so.10.0 && sudo ldconfig`. 57 | 58 | - If you met the error `PermissionError: [Errno 13] Permission denied: '/tmp/torch_extensions/_prroi_pooling/lock'`, please remove `/tmp/torch_extensions/ _prroi_pooling` and rerun the tracker. If other user in your machine have compiled prroi pooling before, this may happens. Besides, if you have compiled pproi_pooling before, please remove `/tmp/torch_extensions/`. Otherwise, you may fail to compile in the new conda environment. 59 | 60 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/researchmm/TracKit/e351e5bff8071aa14f333d5975a8f408a3e264c6/lib/utils/__init__.py -------------------------------------------------------------------------------- /lib/utils/cutout.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | 5 | class Cutout(object): 6 | """Randomly mask out one or more patches from an image. 7 | 8 | Args: 9 | n_holes (int): Number of patches to cut out of each image. 10 | length (int): The length (in pixels) of each square patch. 11 | """ 12 | def __init__(self, n_holes, length): 13 | self.n_holes = n_holes 14 | self.length = length 15 | 16 | def __call__(self, img): 17 | """ 18 | Args: 19 | img (Tensor): Tensor image of size (C, H, W). 20 | Returns: 21 | Tensor: Image with n_holes of dimension length x length cut out of it. 22 | """ 23 | h = img.size(1) 24 | w = img.size(2) 25 | 26 | mask = np.ones((h, w), np.float32) 27 | 28 | for n in range(self.n_holes): 29 | y = np.random.randint(h//4, h-h//4) 30 | x = np.random.randint(w//4, w-w//4) 31 | 32 | y1 = np.clip(y - self.length // 2, 0, h) 33 | y2 = np.clip(y + self.length // 2, 0, h) 34 | x1 = np.clip(x - self.length // 2, 0, w) 35 | x2 = np.clip(x + self.length // 2, 0, w) 36 | 37 | mask[y1: y2, x1: x2] = 0. 38 | 39 | mask = torch.from_numpy(mask) 40 | mask = mask.expand_as(img) 41 | img = img * mask 42 | 43 | return img 44 | -------------------------------------------------------------------------------- /lib/utils/extract_tpejson_fc.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | 4 | import os 5 | import json 6 | import shutil 7 | import argparse 8 | import numpy as np 9 | import pdb 10 | 11 | 12 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results') 13 | parser.add_argument('--path', default='./TPE_results/zp_tune', help='tune result path') 14 | parser.add_argument('--dataset', default='VOT2018', help='test dataset') 15 | parser.add_argument('--save_path', default='logs', help='log file save path') 16 | 17 | 18 | def collect_results(args): 19 | dirs = os.listdir(args.path) 20 | print('[*] ===== total {} files in TPE dir'.format(len(dirs))) 21 | 22 | count = 0 23 | scale_penalty = [] 24 | scale_lr = [] 25 | wi = [] 26 | scale_step = [] 27 | eao = [] 28 | count = 0 # total numbers 29 | 30 | for d in dirs: 31 | param_path = os.path.join(args.path, d) 32 | json_path = os.path.join(param_path, 'result.json') 33 | 34 | if not os.path.exists(json_path): 35 | continue 36 | 37 | # pdb.set_trace() 38 | try: 39 | js = json.load(open(json_path, 'r')) 40 | except: 41 | continue 42 | 43 | if not "EAO" in list(js.keys()): 44 | continue 45 | else: 46 | count += 1 47 | eao.append(js['EAO']) 48 | temp = js['config'] 49 | scale_lr.append(temp["scale_lr"]) 50 | wi.append(temp["w_influence"]) 51 | scale_step.append(temp["scale_step"]) 52 | scale_penalty.append(temp["scale_penalty"]) 53 | 54 | 55 | # find max 56 | print('{} params group have been tested'.format(count)) 57 | eao = np.array(eao) 58 | max_idx = np.argmax(eao) 59 | max_eao = eao[max_idx] 60 | print('scale_penalty: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, scale_step: {}, eao: {}'.format(scale_penalty[max_idx], scale_lr[max_idx], wi[max_idx], scale_step[max_idx], max_eao)) 61 | 62 | 63 | if __name__ == '__main__': 64 | args = parser.parse_args() 65 | collect_results(args) -------------------------------------------------------------------------------- /lib/utils/extract_tpejson_ocean.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | 4 | import os 5 | import json 6 | import shutil 7 | import argparse 8 | import numpy as np 9 | import pdb 10 | 11 | 12 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results') 13 | parser.add_argument('--path', default='./TPE_results/zp_tune', help='tune result path') 14 | parser.add_argument('--dataset', default='VOT2019', help='test dataset') 15 | parser.add_argument('--save_path', default='logs', help='log file save path') 16 | 17 | 18 | def collect_results(args): 19 | dirs = os.listdir(args.path) 20 | print('[*] ===== total {} files in TPE dir'.format(len(dirs))) 21 | 22 | count = 0 23 | penalty_k = [] 24 | scale_lr = [] 25 | wi = [] 26 | big_sz = [] 27 | small_sz = [] 28 | ratio = [] 29 | eao = [] 30 | count = 0 # total numbers 31 | 32 | for d in dirs: 33 | param_path = os.path.join(args.path, d) 34 | json_path = os.path.join(param_path, 'result.json') 35 | 36 | if not os.path.exists(json_path): 37 | continue 38 | 39 | # pdb.set_trace() 40 | try: 41 | js = json.load(open(json_path, 'r')) 42 | except: 43 | continue 44 | 45 | if not "EAO" in list(js.keys()): 46 | continue 47 | else: 48 | count += 1 49 | # pdb.set_trace() 50 | eao.append(js['EAO']) 51 | temp = js['config'] 52 | scale_lr.append(temp["scale_lr"]) 53 | wi.append(temp["window_influence"]) 54 | penalty_k.append(temp["penalty_k"]) 55 | ratio.append(temp["ratio"]) 56 | small_sz.append(temp["small_sz"]) 57 | big_sz.append(temp["big_sz"]) 58 | 59 | 60 | # find max 61 | print('{} params group have been tested'.format(count)) 62 | eao = np.array(eao) 63 | max_idx = np.argmax(eao) 64 | max_eao = eao[max_idx] 65 | print('penalty_k: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, ratio: {:.4f}, small_sz: {}, big_sz: {:.4f}, eao: {}'.format(penalty_k[max_idx], scale_lr[max_idx], wi[max_idx], ratio[max_idx], small_sz[max_idx], big_sz[max_idx], max_eao)) 66 | 67 | 68 | if __name__ == '__main__': 69 | args = parser.parse_args() 70 | collect_results(args) -------------------------------------------------------------------------------- /lib/utils/extract_tpelog.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | 4 | import shutil 5 | import argparse 6 | import numpy as np 7 | 8 | 9 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results') 10 | parser.add_argument('--path', default='logs/gene_adjust_rpn.log', help='tune result path') 11 | parser.add_argument('--dataset', default='VOT2018', help='test dataset') 12 | parser.add_argument('--save_path', default='logs', help='log file save path') 13 | 14 | 15 | def collect_results(args): 16 | if not args.path.endswith('txt'): 17 | name = args.path.split('.')[0] 18 | name = name + '.txt' 19 | shutil.copy(args.path, name) 20 | args.path = name 21 | fin = open(args.path, 'r') 22 | lines = fin.readlines() 23 | penalty_k = [] 24 | scale_lr = [] 25 | wi = [] 26 | wi = [] 27 | ratio = [] 28 | sz = [] 29 | bz = [] 30 | eao = [] 31 | ratio = [] 32 | count = 0 # total numbers 33 | 34 | for line in lines: 35 | if not 'penalty_k:' in line: 36 | pass 37 | else: 38 | count += 1 39 | temp0, temp1, temp2, temp3, temp4, temp5, temp6 = line.split(',') 40 | #print(temp6.split(': ')[-1]) 41 | #exit() 42 | penalty_k.append(float(temp0.split(': ')[-1])) 43 | scale_lr.append(float(temp1.split(': ')[-1])) 44 | wi.append(float(temp2.split(': ')[-1])) 45 | sz.append(float(temp3.split(': ')[-1])) 46 | bz.append(float(temp4.split(': ')[-1])) 47 | ratio.append(float(temp5.split(': ')[-1])) 48 | try: 49 | eao.append(float(temp6.split(': ')[-1].split('==')[0])) 50 | except: 51 | eao.append(float(temp6.split(': ')[-1].split('Result')[0])) 52 | #print(line) 53 | #print(temp6.split(': ')[-1]) 54 | #exit() 55 | 56 | # find max 57 | eao = np.array(eao) 58 | max_idx = np.argmax(eao) 59 | max_eao = eao[max_idx] 60 | print('{} params group have been tested'.format(count)) 61 | print('penalty_k: {:.4f}, scale_lr: {:.4f}, wi: {:.4f}, ratio: {:.4f}, small_sz: {}, big_sz: {}, auc: {}'.format(penalty_k[max_idx], scale_lr[max_idx], wi[max_idx], ratio[max_idx], sz[max_idx], bz[max_idx], max_eao)) 62 | 63 | 64 | if __name__ == '__main__': 65 | args = parser.parse_args() 66 | collect_results(args) 67 | -------------------------------------------------------------------------------- /lib/utils/extract_tpelog_fc.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | # ! ./usr/bin/env python 3 | 4 | 5 | import shutil 6 | import argparse 7 | import numpy as np 8 | 9 | 10 | parser = argparse.ArgumentParser(description='Analysis siamfc tune results') 11 | parser.add_argument('--path', default='logs/gene_adjust_rpn.log', help='tune result path') 12 | parser.add_argument('--dataset', default='VOT2018', help='test dataset') 13 | parser.add_argument('--save_path', default='logs', help='log file save path') 14 | 15 | 16 | def collect_results(args): 17 | if not args.path.endswith('txt'): 18 | name = args.path.split('.')[0] 19 | name = name + '.txt' 20 | shutil.copy(args.path, name) 21 | args.path = name 22 | fin = open(args.path, 'r') 23 | lines = fin.readlines() 24 | scale_step = [] 25 | scale_lr = [] 26 | scale_penalty = [] 27 | wi = [] 28 | eao = [] 29 | count = 0 # total numbers 30 | 31 | for line in lines: 32 | if not line.startswith('scale_step'): 33 | pass 34 | else: 35 | # print(line) 36 | count += 1 37 | print(line.split(',')) 38 | exit() 39 | temp0, temp1, temp2, temp3, temp4, temp5 = line.split(',') 40 | scale_step.append(float(temp0.split(': ')[-1])) 41 | scale_lr.append(float(temp1.split(': ')[-1])) 42 | scale_penalty.append(float(temp2.split(': ')[-1])) 43 | wi.append(float(temp3.split(': ')[-1])) 44 | eao.append(float(temp4.split(': ')[-1])) 45 | 46 | # find max 47 | eao = np.array(eao) 48 | max_idx = np.argmax(eao) 49 | max_eao = eao[max_idx] 50 | print('{} params group have been tested'.format(count)) 51 | print('scale_step: {:.4f}, scale_lr: {:.4f}, scale_penalty: {:.4f}, win_influence: {}, eao: {}'.format(scale_step[max_idx], scale_lr[max_idx], scale_penalty[max_idx], wi[max_idx], max_eao)) 52 | 53 | 54 | if __name__ == '__main__': 55 | args = parser.parse_args() 56 | collect_results(args) 57 | -------------------------------------------------------------------------------- /lib/utils/watch_tpe.sh: -------------------------------------------------------------------------------- 1 | watch -n 1 python lib/utils/extract_tpelog.py --path logs/tpe_tune.log 2 | -------------------------------------------------------------------------------- /lib/version.py: -------------------------------------------------------------------------------- 1 | # GENERATED VERSION FILE 2 | # TIME: Sun May 24 21:24:18 2020 3 | 4 | __version__ = '1.0.rc0' 5 | short_version = '1.0.rc0' 6 | -------------------------------------------------------------------------------- /tracking/_init_paths.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os.path as osp 6 | import sys 7 | 8 | 9 | def add_path(path): 10 | if path not in sys.path: 11 | sys.path.insert(0, path) 12 | 13 | 14 | this_dir = osp.dirname(__file__) 15 | 16 | lib_path = osp.join(this_dir, '..', 'lib') 17 | add_path(lib_path) 18 | 19 | # sys.path.insert(1, osp.join(this_dir, '..', 'lib/eval_toolkit/bin')) 20 | -------------------------------------------------------------------------------- /tracking/onekey.py: -------------------------------------------------------------------------------- 1 | 2 | import _init_paths 3 | import os 4 | import yaml 5 | import argparse 6 | from os.path import exists 7 | from utils.utils import load_yaml, extract_logs 8 | 9 | def parse_args(): 10 | """ 11 | args for onekey. 12 | """ 13 | parser = argparse.ArgumentParser(description='Train SiamFC with onekey') 14 | # for train 15 | parser.add_argument('--cfg', type=str, default='experiments/train/Ocean.yaml', help='yaml configure file name') 16 | 17 | # for 18 | 19 | args = parser.parse_args() 20 | 21 | return args 22 | 23 | 24 | def main(): 25 | args = parse_args() 26 | 27 | # train - test - tune information 28 | info = yaml.load(open(args.cfg, 'r').read()) 29 | info = info['OCEAN'] 30 | trainINFO = info['TRAIN'] 31 | testINFO = info['TEST'] 32 | tuneINFO = info['TUNE'] 33 | dataINFO = info['DATASET'] 34 | 35 | # epoch training -- train 50 or more epochs 36 | if trainINFO['ISTRUE']: 37 | print('==> train phase') 38 | print('python ./tracking/train_ocean.py --cfg {0} --gpus {1} --workers {2} 2>&1 | tee logs/ocean_train.log' 39 | .format(args.cfg, info['GPUS'], info['WORKERS'])) 40 | 41 | if not exists('logs'): 42 | os.makedirs('logs') 43 | 44 | os.system('python ./tracking/train_ocean.py --cfg {0} --gpus {1} --workers {2} 2>&1 | tee logs/siamrpn_train.log' 45 | .format(args.cfg, info['GPUS'], info['WORKERS'])) 46 | 47 | # epoch testing -- test 30-50 epochs (or more) 48 | if testINFO['ISTRUE']: 49 | print('==> test phase') 50 | print('mpiexec -n {0} python ./tracking/test_epochs.py --arch {1} --start_epoch {2} --end_epoch {3} --gpu_nums={4} \ 51 | --threads {0} --dataset {5} --align {6} 2>&1 | tee logs/ocean_epoch_test.log' 52 | .format(testINFO['THREADS'], trainINFO['MODEL'], testINFO['START_EPOCH'], testINFO['END_EPOCH'], 53 | (len(info['GPUS']) + 1) // 2, testINFO['DATA'], trainINFO['ALIGN'])) 54 | 55 | if not exists('logs'): 56 | os.makedirs('logs') 57 | 58 | os.system('mpiexec -n {0} python ./tracking/test_epochs.py --arch {1} --start_epoch {2} --end_epoch {3} --gpu_nums={4} \ 59 | --threads {0} --dataset {5} --align {6} 2>&1 | tee logs/ocean_epoch_test.log' 60 | .format(testINFO['THREADS'], trainINFO['MODEL'], testINFO['START_EPOCH'], testINFO['END_EPOCH'], 61 | (len(info['GPUS']) + 1) // 2, testINFO['DATA'], trainINFO['ALIGN'])) 62 | 63 | # test on vot or otb benchmark 64 | print('====> use new testing toolkit') 65 | trackers = os.listdir(os.path.join('./result', testINFO['DATA'])) 66 | trackers = " ".join(trackers) 67 | if 'VOT' in testINFO['DATA']: 68 | print('python lib/eval_toolkit/bin/eval.py --dataset_dir dataset --dataset {0} --tracker_result_dir result/{0} --trackers {1}'.format(testINFO['DATA'], trackers)) 69 | os.system('python lib/eval_toolkit/bin/eval.py --dataset_dir dataset --dataset {0} --tracker_result_dir result/{0} --trackers {1} 2>&1 | tee logs/ocean_eval_epochs.log'.format(testINFO['DATA'], trackers)) 70 | else: 71 | raise ValueError('not supported now, please add new dataset') 72 | 73 | # tuning -- with TPE 74 | if tuneINFO['ISTRUE']: 75 | 76 | if 'VOT' in testINFO['DATA']: # for vot real-time and baseline 77 | resume = extract_logs('logs/ocean_eval_epochs.log', 'VOT') 78 | else: 79 | raise ValueError('not supported now') 80 | 81 | print('==> tune phase') 82 | print('python -u ./tracking/tune_tpe.py --arch {0} --resume {1} --dataset {2} --gpu_nums {3} --align {4}\ 83 | 2>&1 | tee logs/tpe_tune.log'.format(trainINFO['MODEL'], 'snapshot/'+ resume, tuneINFO['DATA'], (len(info['GPUS']) + 1) // 2, trainINFO['ALIGN'])) 84 | 85 | if not exists('logs'): 86 | os.makedirs('logs') 87 | os.system('python -u ./tracking/tune_tpe.py --arch {0} --resume {1} --dataset {2} --gpu_nums {3} --align {4}\ 88 | 2>&1 | tee logs/tpe_tune.log'.format(trainINFO['MODEL'], 'snapshot/'+ resume, tuneINFO['DATA'], (len(info['GPUS']) + 1) // 2, trainINFO['ALIGN'])) 89 | 90 | 91 | if __name__ == '__main__': 92 | main() 93 | -------------------------------------------------------------------------------- /tracking/test_epochs.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import argparse 4 | from mpi4py import MPI 5 | 6 | 7 | parser = argparse.ArgumentParser(description='multi-gpu test all epochs') 8 | parser.add_argument('--arch', dest='arch', default='SiamFCIncep22', 9 | help='architecture of model') 10 | parser.add_argument('--start_epoch', default=30, type=int, required=True, help='test end epoch') 11 | parser.add_argument('--end_epoch', default=50, type=int, required=True, 12 | help='test end epoch') 13 | parser.add_argument('--gpu_nums', default=4, type=int, required=True, help='test start epoch') 14 | parser.add_argument('--anchor_nums', default=5, type=int, help='anchor numbers') 15 | parser.add_argument('--threads', default=16, type=int, required=True) 16 | parser.add_argument('--dataset', default='VOT0219', type=str, help='benchmark to test') 17 | parser.add_argument('--align', default='False', type=str, help='align') 18 | args = parser.parse_args() 19 | 20 | # init gpu and epochs 21 | comm = MPI.COMM_WORLD 22 | size = comm.Get_size() 23 | rank = comm.Get_rank() 24 | GPU_ID = rank % args.gpu_nums 25 | node_name = MPI.Get_processor_name() # get the name of the node 26 | os.environ['CUDA_VISIBLE_DEVICES'] = str(GPU_ID) 27 | print("node name: {}, GPU_ID: {}".format(node_name, GPU_ID)) 28 | time.sleep(rank * 5) 29 | 30 | # run test scripts -- two epoch for each thread 31 | for i in range(2): 32 | arch = args.arch 33 | dataset = args.dataset 34 | try: 35 | epoch_ID += args.threads # for 16 queue 36 | except: 37 | epoch_ID = rank % (args.end_epoch - args.start_epoch + 1) + args.start_epoch 38 | 39 | if epoch_ID > args.end_epoch: 40 | continue 41 | 42 | resume = 'snapshot/checkpoint_e{}.pth'.format(epoch_ID) 43 | print('==> test {}th epoch'.format(epoch_ID)) 44 | os.system('python ./tracking/test_ocean.py --arch {0} --resume {1} --dataset {2} --align {3} --epoch_test True'.format(arch, resume, dataset, args.align)) 45 | -------------------------------------------------------------------------------- /tracking/vot.py: -------------------------------------------------------------------------------- 1 | """ 2 | \file vot.py 3 | 4 | @brief Python utility functions for VOT integration 5 | 6 | @author Luka Cehovin, Alessio Dore 7 | 8 | @date 2016 9 | 10 | """ 11 | 12 | import sys 13 | import copy 14 | import collections 15 | import numpy as np 16 | 17 | try: 18 | import trax 19 | except ImportError: 20 | raise Exception('TraX support not found. Please add trax module to Python path.') 21 | 22 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height']) 23 | Point = collections.namedtuple('Point', ['x', 'y']) 24 | Polygon = collections.namedtuple('Polygon', ['points']) 25 | 26 | class VOT(object): 27 | """ Base class for Python VOT integration """ 28 | def __init__(self, region_format, channels=None): 29 | """ Constructor 30 | 31 | Args: 32 | region_format: Region format options 33 | """ 34 | assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON, trax.Region.MASK]) 35 | 36 | if channels is None: 37 | channels = ['color'] 38 | elif channels == 'rgbd': 39 | channels = ['color', 'depth'] 40 | elif channels == 'rgbt': 41 | channels = ['color', 'ir'] 42 | elif channels == 'ir': 43 | channels = ['ir'] 44 | else: 45 | raise Exception('Illegal configuration {}.'.format(channels)) 46 | 47 | self._trax = trax.Server([region_format], [trax.Image.PATH], channels, customMetadata=dict(vot="python")) 48 | 49 | request = self._trax.wait() 50 | assert(request.type == 'initialize') 51 | if isinstance(request.region, trax.Polygon): 52 | self._region = Polygon([Point(x[0], x[1]) for x in request.region]) 53 | if isinstance(request.region, trax.Mask): 54 | self._region = request.region.array(True) 55 | else: 56 | self._region = Rectangle(*request.region.bounds()) 57 | self._image = [x.path() for k, x in request.image.items()] 58 | if len(self._image) == 1: 59 | self._image = self._image[0] 60 | 61 | self._trax.status(request.region) 62 | 63 | def region(self): 64 | """ 65 | Send configuration message to the client and receive the initialization 66 | region and the path of the first image 67 | 68 | Returns: 69 | initialization region 70 | """ 71 | 72 | return self._region 73 | 74 | def report(self, region, confidence = None): 75 | """ 76 | Report the tracking results to the client 77 | 78 | Arguments: 79 | region: region for the frame 80 | """ 81 | assert(isinstance(region, (Rectangle, Polygon, np.ndarray))) 82 | if isinstance(region, Polygon): 83 | tregion = trax.Polygon.create([(x.x, x.y) for x in region.points]) 84 | if isinstance(region, np.ndarray): 85 | tregion = trax.Mask.create(region) 86 | else: 87 | tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height) 88 | properties = {} 89 | if not confidence is None: 90 | properties['confidence'] = confidence 91 | self._trax.status(tregion, properties) 92 | 93 | def frame(self): 94 | """ 95 | Get a frame (image path) from client 96 | 97 | Returns: 98 | absolute path of the image 99 | """ 100 | if hasattr(self, "_image"): 101 | image = self._image 102 | del self._image 103 | return image 104 | 105 | request = self._trax.wait() 106 | 107 | if request.type == 'frame': 108 | image = [x.path() for k, x in request.image.items()] 109 | if len(image) == 1: 110 | return image[0] 111 | return image 112 | else: 113 | return None 114 | 115 | 116 | def quit(self): 117 | if hasattr(self, '_trax'): 118 | self._trax.quit() 119 | 120 | def __del__(self): 121 | self.quit() 122 | 123 | -------------------------------------------------------------------------------- /tracking/vot_wrap.py: -------------------------------------------------------------------------------- 1 | import _init_paths 2 | import vot 3 | import os 4 | import cv2 5 | import sys 6 | import random 7 | import argparse 8 | import numpy as np 9 | import torch 10 | import models.models as models 11 | 12 | from os.path import exists, join, dirname, realpath 13 | from tracker.oceanplus import OceanPlus 14 | from easydict import EasyDict as edict 15 | from utils.utils import load_pretrain, cxy_wh_2_rect, get_axis_aligned_bbox, load_dataset, poly_iou 16 | 17 | from vot import Rectangle,Polygon, Point 18 | 19 | 20 | def make_full_size(x, output_sz): 21 | ''' 22 | zero-pad input x (right and down) to match output_sz 23 | x: numpy array e.g., binary mask 24 | output_sz: size of the output [width, height] 25 | ''' 26 | if x.shape[0] == output_sz[1] and x.shape[1] == output_sz[0]: 27 | return x 28 | pad_x = output_sz[0] - x.shape[1] 29 | if pad_x < 0: 30 | x = x[:, :x.shape[1] + pad_x] 31 | # padding has to be set to zero, otherwise pad function fails 32 | pad_x = 0 33 | pad_y = output_sz[1] - x.shape[0] 34 | if pad_y < 0: 35 | x = x[:x.shape[0] + pad_y, :] 36 | # padding has to be set to zero, otherwise pad function fails 37 | pad_y = 0 38 | return np.pad(x, ((0, pad_y), (0, pad_x)), 'constant', constant_values=0) 39 | 40 | def rect_from_mask(mask): 41 | ''' 42 | create an axis-aligned rectangle from a given binary mask 43 | mask in created as a minimal rectangle containing all non-zero pixels 44 | ''' 45 | x_ = np.sum(mask, axis=0) 46 | y_ = np.sum(mask, axis=1) 47 | x0 = np.min(np.nonzero(x_)) 48 | x1 = np.max(np.nonzero(x_)) 49 | y0 = np.min(np.nonzero(y_)) 50 | y1 = np.max(np.nonzero(y_)) 51 | 52 | w = x1 - x0 + 1 53 | h = y1 - y0 + 1 54 | # return [x0, y0, x1 - x0 + 1, y1 - y0 + 1] 55 | return [x0 + w/2 , y0 + h/2, w, h] 56 | 57 | def mask_from_rect(rect, output_sz): 58 | ''' 59 | create a binary mask from a given rectangle 60 | rect: axis-aligned rectangle [x0, y0, width, height] 61 | output_sz: size of the output [width, height] 62 | ''' 63 | mask = np.zeros((output_sz[1], output_sz[0]), dtype=np.uint8) 64 | x0 = max(int(round(rect[0])), 0) 65 | y0 = max(int(round(rect[1])), 0) 66 | x1 = min(int(round(rect[0] + rect[2])), output_sz[0]) 67 | y1 = min(int(round(rect[1] + rect[3])), output_sz[1]) 68 | mask[y0:y1, x0:x1] = 1 69 | return mask 70 | 71 | # define tracker 72 | info = edict() 73 | info.arch = "OceanPlus" 74 | info.dataset = "VOT2020" 75 | info.epoch_test = False 76 | info.align = False 77 | info.online = False 78 | mask_vot = True 79 | 80 | net = models.__dict__[info.arch](online=info.online, mms=False) 81 | net = load_pretrain(net, "$tracker_path/snapshot/OceanPlusMSS.pth") 82 | net.eval() 83 | net = net.cuda() 84 | 85 | # warm up 86 | print('==== warm up ====') 87 | for i in range(10): 88 | net.template(torch.rand(1, 3, 127, 127).cuda(), torch.rand(1, 127, 127).cuda()) 89 | net.track(torch.rand(1, 3, 255, 255).cuda()) 90 | 91 | tracker = OceanPlus(info) 92 | 93 | # vot2020 settings 94 | 95 | if mask_vot: 96 | handle = vot.VOT("mask") 97 | else: 98 | handle = vot.VOT("rectangle") 99 | 100 | image_file = handle.frame() 101 | 102 | if not image_file: 103 | sys.exit(0) 104 | 105 | im = cv2.imread(image_file) # HxWxC 106 | 107 | if mask_vot: 108 | print('the input is a binary mask') 109 | selection = handle.region() 110 | mask = make_full_size(selection, (im.shape[1], im.shape[0])) 111 | bbox = rect_from_mask(mask) # [cx,cy,w,h] TODO: use cv.minmaxRect here 112 | cx, cy, w, h = bbox 113 | else: 114 | print('the input is a rect box') 115 | selection = handle.region() # selection in ncc_mask 116 | lx, ly, w, h = selection.x, selection.y, selection.width, selection.height 117 | cx, cy = lx + w/2, ly + h/2 118 | 119 | target_pos = np.array([cx, cy]) 120 | target_sz = np.array([w, h]) 121 | state = tracker.init(im, target_pos, target_sz, net, mask=mask) 122 | 123 | 124 | count = 0 125 | while True: 126 | image_file = handle.frame() 127 | if not image_file: 128 | break 129 | im = cv2.imread(image_file) # HxWxC 130 | state = tracker.track(state, im) 131 | mask = state['mask'] 132 | if mask is None or mask.sum() < 10: 133 | rect = cxy_wh_2_rect(state['target_pos'], state['target_sz']) 134 | mask = mask_from_rect(rect, (im.shape[1], im.shape[0])) 135 | handle.report(mask, state['cls_score']) 136 | count += 1 137 | --------------------------------------------------------------------------------