├── .gitignore ├── MODEL_ZOO.md ├── README.md ├── convert.sh ├── convert_new.sh ├── convert_psc.sh ├── experiments ├── siammask_r50_l3 │ ├── config.yaml │ ├── pre_kf_config.yaml │ ├── pre_lb_config.yaml │ ├── pre_lbv_config.yaml │ └── pre_mv_config.yaml ├── siamrpn_mobilev2_l234_dwxcorr │ ├── config.yaml │ ├── pre_kf_config.yaml │ ├── pre_lb_config.yaml │ ├── pre_lbv_config.yaml │ └── pre_mv_config.yaml └── siamrpn_r50_l234_dwxcorr │ ├── config.yaml │ ├── pre_kf_config.yaml │ ├── pre_lb_config.yaml │ ├── pre_lbv_config.yaml │ └── pre_mv_config.yaml ├── onboard ├── test_rt.py └── test_rt_f.py ├── pysot ├── __init__.py ├── __pycache__ │ └── __init__.cpython-38.pyc ├── core │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── config.cpython-38.pyc │ │ └── xcorr.cpython-38.pyc │ ├── config.py │ └── xcorr.py ├── datasets │ ├── __init__.py │ ├── anchor_target.py │ ├── augmentation.py │ ├── dataset.py │ └── la_dataset.py ├── models │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── alexnet.py │ │ ├── mobile_v2.py │ │ └── resnet_atrous.py │ ├── centernet │ │ ├── ddd_utils.py │ │ ├── decode.py │ │ ├── image.py │ │ ├── losses.py │ │ ├── post_process.py │ │ └── utils.py │ ├── head │ │ ├── __init__.py │ │ ├── mask.py │ │ └── rpn.py │ ├── init_weight.py │ ├── loss.py │ ├── model_builder.py │ ├── neck │ │ ├── __init__.py │ │ └── neck.py │ ├── pred_model_builder.py │ └── predictor │ │ ├── __init__.py │ │ ├── base_predictor.py │ │ ├── kf.py │ │ ├── lb_5.py │ │ ├── lbv_5.py │ │ └── mv_v16.py ├── tracker │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── base_tracker.cpython-38.pyc │ │ ├── siammask_tracker.cpython-38.pyc │ │ ├── siammask_tracker_f.cpython-38.pyc │ │ ├── siamrpn_tracker.cpython-38.pyc │ │ ├── siamrpn_tracker_f.cpython-38.pyc │ │ ├── siamrpnlt_tracker.cpython-38.pyc │ │ └── tracker_builder.cpython-38.pyc │ ├── base_tracker.py │ ├── siammask_tracker.py │ ├── siammask_tracker_f.py │ ├── siamrpn_tracker.py │ ├── siamrpn_tracker_f.py │ ├── siamrpn_tracker_ntr.py │ ├── siamrpnlt_tracker.py │ └── tracker_builder.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── anchor.cpython-38.pyc │ ├── bbox.cpython-38.pyc │ └── model_load.cpython-38.pyc │ ├── anchor.py │ ├── average_meter.py │ ├── bbox.py │ ├── distributed.py │ ├── image.py │ ├── img_crop.py │ ├── log_helper.py │ ├── lr_scheduler.py │ ├── misc.py │ └── model_load.py ├── test_agx_mob.sh ├── test_sim_mob.sh ├── toolkit ├── __init__.py ├── __pycache__ │ └── __init__.cpython-38.pyc ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── dataset.cpython-38.pyc │ │ ├── dtb.cpython-38.pyc │ │ ├── got10k.cpython-38.pyc │ │ ├── lasot.cpython-38.pyc │ │ ├── otb.cpython-38.pyc │ │ ├── realworld.cpython-38.pyc │ │ ├── uav10fps.cpython-38.pyc │ │ ├── uav123.cpython-38.pyc │ │ ├── uav20l.cpython-38.pyc │ │ ├── uavdark.cpython-38.pyc │ │ ├── uavdt.cpython-38.pyc │ │ ├── video.cpython-38.pyc │ │ └── visdrone.cpython-38.pyc │ ├── dataset.py │ ├── dtb.py │ ├── got10k.py │ ├── lasot.py │ ├── nfs.py │ ├── otb.py │ ├── realworld.py │ ├── trackingnet.py │ ├── uav.py │ ├── uav10fps.py │ ├── uav123.py │ ├── uav20l.py │ ├── uavdark.py │ ├── uavdt.py │ ├── video.py │ ├── visdrone.py │ ├── visdrone1.py │ └── vot.py ├── evaluation │ ├── __init__.py │ ├── ar_benchmark.py │ ├── eao_benchmark.py │ ├── f1_benchmark.py │ └── ope_benchmark.py ├── utils │ ├── __init__.py │ ├── c_region.pxd │ ├── misc.py │ ├── region.pyx │ ├── src │ │ ├── buffer.h │ │ ├── region.c │ │ └── region.h │ └── statistics.py └── visualization │ ├── __init__.py │ ├── draw_eao.py │ ├── draw_f1.py │ ├── draw_success_precision.py │ └── draw_utils.py ├── tools ├── demo.py ├── eval.py ├── gen_sim_info.py ├── hp_search.py ├── rt_eva.py ├── rt_eva_new.py ├── rt_eva_pre.py ├── test.py ├── test_flop.py ├── test_rt.py ├── test_rt_f.py ├── test_rt_f_ntr.py ├── test_rt_f_sim.py ├── test_rt_sim.py └── train.py ├── train.sh ├── training_dataset ├── got10k │ └── gen_json.py ├── lasot │ ├── gen_json.py │ └── gen_txt.py └── vid │ ├── gen_json.py │ └── parse_vid.py └── vot_iter ├── __init__.py ├── tracker_SiamRPNpp.m ├── vot.py └── vot_iter.py /.gitignore: -------------------------------------------------------------------------------- 1 | testing_dataset/ 2 | Raw_Results_RPN_Mob/ 3 | training_dataset/ 4 | .vscode/ 5 | *.zip 6 | results_rt/ 7 | results_rt_raw/ 8 | results_eLAE/ 9 | Raw/ 10 | models/ 11 | __pycache__/ 12 | *.py[cod] -------------------------------------------------------------------------------- /MODEL_ZOO.md: -------------------------------------------------------------------------------- 1 | # PVT++ Model Zoo 2 | 3 | ## Introduction 4 | 5 | This file documents a collection of baselines trained with PVT++. All configurations for these baselines are located in the [`experiments`](experiments) directory. The tables below provide results about inference. Links to the trained models as well as their output are provided. All the results are obtained on the same Nvidia Jetson AGX Xavier platform. 6 | 7 | ## *Online* Visual Tracking 8 | 9 | | Model | DTB70
(mAUC/mDP)
| UAVDT
(mAUC/mDP)
| UAV20L
(mAUC/mDP)
| UAV123
(mAUC/mDP)
| URL | 10 | | :-----------------------: | :-------------------------------: | :-------------------------------: | :--------------------------------: | :--------------------------------: | :----------------------------------------------------------: | 11 | | RPN_mob | 0.298\|0.392 | 0.494\|0.719 | 0.448\|0.619 | 0.472\|0.678 | [RPN_Mob](https://mega.nz/file/8VlQXBIQ#ZbEBQnpMbQLJPQ0KqpALeHCZvxvOzW6QjTxX3hfnXS0) | 12 | | RPN_mob+Motion | 0.385\|0.523 | 0.529\|0.745 | 0.481\|0.647 | 0.537\|0.737 | [RPN_Mob_M](https://mega.nz/file/hFVklIpZ#0M1VJ7C1zmz4NrfwqWVuVMKRVjyEHedqaAVco2UkYX8) | 13 | | RPN_mob+Visual | 0.352\|0.472 | 0.564\|0.799 | 0.488\|0.675 | 0.504\|0.703 | [RPN_Mob_V](https://mega.nz/file/NRdlTTDS#TAcQwgEJmHLghFxFmDCTOv0gu5z57Eo3iiCaw-dRREw) | 14 | | RPN_mob+MV | 0.399\|0.536 | 0.576\|0.807 | 0.508\|0.697 | 0.537\|0.741 | [RPN_Mob_MV](https://mega.nz/file/EVFxSSYB#4TFSJoVELbztvhJX8xkDlqwldmJT6XucHBEy9nINdlM) | 15 | 16 | We also provide the [Raw_results](https://mega.nz/file/tFd02RxC#98PDk3XDhcXo9sZ-seKP5aklT0xC8rvbcUm77xu1Cmo). 17 | These files can also be found at [Google Drive](https://drive.google.com/file/d/1oZjoHGGXqKSC43yKTwn2zwxFQprDXp7L/view?usp=sharing). 18 | -------------------------------------------------------------------------------- /convert.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH 2 | # \sigma=0 3 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/DTB70/' --tar_root './results_rt/DTB70' --gtroot 'testing_dataset/DTB70' 4 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAVDT/' --tar_root './results_rt/UAVDT' --gtroot 'testing_dataset/UAVDT/anno' 5 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV20L/' --tar_root './results_rt/UAV20L' --gtroot 'testing_dataset/UAV20L/anno' 6 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV123/' --tar_root './results_rt/UAV123' --gtroot 'testing_dataset/UAV123/anno' -------------------------------------------------------------------------------- /convert_new.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH 2 | # DTB70 3 | python tools/rt_eva_new.py --raw_root Raw/DTB70 --tar_root results_eLAE/DTB70 --gtroot testing_dataset/DTB70 4 | # UAVDT 5 | python tools/rt_eva_new.py --raw_root Raw/UAVDT --tar_root results_eLAE/UAVDT --gtroot testing_dataset/UAVDT/anno 6 | # UAV20L 7 | python tools/rt_eva_new.py --raw_root Raw/UAV20L --tar_root results_eLAE/UAV20L --gtroot testing_dataset/UAV20L/anno 8 | # UAV123 9 | python tools/rt_eva_new.py --raw_root Raw/UAV123 --tar_root results_eLAE/UAV123 --gtroot testing_dataset/UAV123/anno -------------------------------------------------------------------------------- /convert_psc.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH 2 | # PVT_pp2 3 | RAW_ROOT='/ocean/projects/cis220061p/bli5/CVPR23/code/PVT_pp2/output_rt/test/tracking_results' 4 | TAR_ROOT='/ocean/projects/cis220061p/bli5/CVPR23/code/PVT_pp2/rt_eva' 5 | GT_ROOT='/ocean/projects/cis220061p/bli5/CVPR23/data' 6 | # PVT_pp 7 | RAW_ROOT='results_rt_raw' 8 | TAR_ROOT='results_rt' 9 | GT_ROOT='testing_dataset' 10 | # \sigma=0 11 | # python3 tools/rt_eva.py --raw_root "${RAW_ROOT}/DTB/" --tar_root "${TAR_ROOT}/DTB70/" --gtroot "${GT_ROOT}/DTB70" 12 | python3 tools/rt_eva_pre.py --raw_root "${RAW_ROOT}/RealWorld/" --tar_root "${TAR_ROOT}/RealWorld/" --gtroot "${GT_ROOT}/real_world/anno" 13 | # python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV20L/' --tar_root './results_rt/UAV20L' --gtroot 'testing_dataset/UAV20L/anno' 14 | # python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV123/' --tar_root './results_rt/UAV123' --gtroot 'testing_dataset/UAV123/anno' -------------------------------------------------------------------------------- /experiments/siammask_r50_l3/config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [0, 1, 2, 3] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [1024] 13 | out_channels: [256] 14 | 15 | RPN: 16 | TYPE: 'DepthwiseRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: 256 20 | out_channels: 256 21 | 22 | MASK: 23 | MASK: True 24 | TYPE: 'MaskCorr' 25 | KWARGS: 26 | in_channels: 256 27 | hidden: 256 28 | out_channels: 3969 29 | 30 | REFINE: 31 | REFINE: True 32 | TYPE: 'Refine' 33 | 34 | ANCHOR: 35 | STRIDE: 8 36 | RATIOS: [0.33, 0.5, 1, 2, 3] 37 | SCALES: [8] 38 | ANCHOR_NUM: 5 39 | 40 | TRACK: 41 | TYPE: 'SiamMaskTracker' 42 | PENALTY_K: 0.10 43 | WINDOW_INFLUENCE: 0.41 44 | LR: 0.32 45 | EXEMPLAR_SIZE: 127 46 | INSTANCE_SIZE: 255 47 | BASE_SIZE: 8 48 | CONTEXT_AMOUNT: 0.5 49 | MASK_THERSHOLD: 0.15 50 | -------------------------------------------------------------------------------- /experiments/siammask_r50_l3/pre_kf_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [0, 1, 2, 3] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [1024] 13 | out_channels: [256] 14 | 15 | RPN: 16 | TYPE: 'DepthwiseRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: 256 20 | out_channels: 256 21 | 22 | MASK: 23 | MASK: True 24 | TYPE: 'MaskCorr' 25 | KWARGS: 26 | in_channels: 256 27 | hidden: 256 28 | out_channels: 3969 29 | 30 | REFINE: 31 | REFINE: True 32 | TYPE: 'Refine' 33 | 34 | ANCHOR: 35 | STRIDE: 8 36 | RATIOS: [0.33, 0.5, 1, 2, 3] 37 | SCALES: [8] 38 | ANCHOR_NUM: 5 39 | 40 | TRACK: 41 | TYPE: 'SiamMaskTracker' 42 | PENALTY_K: 0.10 43 | WINDOW_INFLUENCE: 0.41 44 | LR: 0.32 45 | EXEMPLAR_SIZE: 127 46 | INSTANCE_SIZE: 255 47 | BASE_SIZE: 8 48 | CONTEXT_AMOUNT: 0.5 49 | MASK_THERSHOLD: 0.15 50 | 51 | # Predictive fine-tuning settings 52 | DATASET: 53 | NAMES: ('VID',) 54 | TRAIN: 55 | LATENCY: 2 # number of frames that will be skipped 56 | RESUME: '../pretrained/Mask_R50.pth' # Original trained tracking model 57 | NUM_FRAME: 3 58 | BATCH_SIZE: 4 59 | PRED: 60 | TYPE: 'KF' 61 | 62 | 63 | -------------------------------------------------------------------------------- /experiments/siammask_r50_l3/pre_lb_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [0, 1, 2, 3] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [1024] 13 | out_channels: [256] 14 | 15 | RPN: 16 | TYPE: 'DepthwiseRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: 256 20 | out_channels: 256 21 | 22 | MASK: 23 | MASK: True 24 | TYPE: 'MaskCorr' 25 | KWARGS: 26 | in_channels: 256 27 | hidden: 256 28 | out_channels: 3969 29 | 30 | # REFINE: 31 | # REFINE: True 32 | # TYPE: 'Refine' 33 | 34 | ANCHOR: 35 | STRIDE: 8 36 | RATIOS: [0.33, 0.5, 1, 2, 3] 37 | SCALES: [8] 38 | ANCHOR_NUM: 5 39 | 40 | TRACK: 41 | TYPE: 'SiamMaskTracker' 42 | PENALTY_K: 0.10 43 | WINDOW_INFLUENCE: 0.41 44 | LR: 0.32 45 | EXEMPLAR_SIZE: 127 46 | INSTANCE_SIZE: 255 47 | BASE_SIZE: 8 48 | CONTEXT_AMOUNT: 0.5 49 | MASK_THERSHOLD: 0.15 50 | 51 | # Predictive fine-tuning settings 52 | DATASET: 53 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 54 | VIDEOS_PER_EPOCH: 10000 # 10000 55 | USE_IMG: False 56 | 57 | TRAIN: 58 | EPOCH: 100 59 | LATENCY: 3 # number of frames that will be skipped 60 | LOG_DIR: './logs/Mask_LB5' 61 | SNAPSHOT_DIR: './snapshot/Mask_LB5' 62 | JITTER: 2 # jitter for input latency 63 | PRE_TARGET: 6 # target of prediction 64 | RESUME: 'pretrained/Mask_R50.pth' # Original trained tracking model 65 | # RESUME: './snapshot/checkpoint_e45_l6_vid.pth' 66 | NUM_FRAME: 3 67 | BATCH_SIZE: 128 68 | LR: # for learning rate scheduler 69 | PRED_LR: 0.03 #0.1 70 | TYPE: 'multi-step' 71 | KWARGS: 72 | start_lr: 0.03 73 | steps: [15, 40, 30, 50, 80] 74 | mult: 0.5 75 | epochs: 100 76 | LR_WARMUP: 77 | WARMUP: False 78 | NUM_WORKERS: 24 79 | # TRAIN.LR_WARMUP.WARMUP = True 80 | # Predictor setting 81 | PRED: 82 | MODE: 'A+B' 83 | TRAIN: True 84 | TYPE: 'LB_v5' 85 | INPUT_RATIO: 0.0 86 | KWARGS: 87 | hidden_1: 64 88 | hidden_2: 32 89 | hidden_3: 32 90 | num_input: 3 91 | num_output: 6 92 | -------------------------------------------------------------------------------- /experiments/siammask_r50_l3/pre_lbv_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [0, 1, 2, 3] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [1024] 13 | out_channels: [256] 14 | 15 | RPN: 16 | TYPE: 'DepthwiseRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: 256 20 | out_channels: 256 21 | 22 | MASK: 23 | MASK: True 24 | TYPE: 'MaskCorr' 25 | KWARGS: 26 | in_channels: 256 27 | hidden: 256 28 | out_channels: 3969 29 | 30 | # REFINE: 31 | # REFINE: True 32 | # TYPE: 'Refine' 33 | 34 | ANCHOR: 35 | STRIDE: 8 36 | RATIOS: [0.33, 0.5, 1, 2, 3] 37 | SCALES: [8] 38 | ANCHOR_NUM: 5 39 | 40 | TRACK: 41 | TYPE: 'SiamMaskTracker' 42 | PENALTY_K: 0.10 43 | WINDOW_INFLUENCE: 0.41 44 | LR: 0.32 45 | EXEMPLAR_SIZE: 127 46 | INSTANCE_SIZE: 255 47 | BASE_SIZE: 8 48 | CONTEXT_AMOUNT: 0.5 49 | MASK_THERSHOLD: 0.15 50 | 51 | # Predictive fine-tuning settings 52 | DATASET: 53 | NEG: 0.0 54 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 55 | USE_IMG: True 56 | SEARCH: 57 | SHIFT: 0 58 | SCALE: 0.0 59 | VIDEOS_PER_EPOCH: 10000 # 10000 60 | 61 | TRAIN: 62 | EPOCH: 240 63 | TRACKER_EPOCH: 20 64 | LOG_DIR: './logs/Mask_lbv5' 65 | SNAPSHOT_DIR: './snapshot/Mask_lbv5' 66 | LATENCY: 3 # number of frames that will be skipped 67 | JITTER: 2 # jitter for input latency 68 | PRE_TARGET: 6 # target of prediction 69 | RESUME: 'pretrained/Mask_R50.pth' # Original trained tracking model 70 | NUM_FRAME: 3 71 | BATCH_SIZE: 128 72 | FIX_T: False 73 | BASE_LR: 0.00001 74 | LR: # for learning rate scheduler 75 | PRED_LR: 0.01 # 0.1 76 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR 77 | NECK_LR: 10 # Times of BASE_LR 78 | TYPE: 'multi-step' 79 | KWARGS: 80 | start_lr: 0.01 81 | steps: [200] 82 | mult: 0.1 83 | epochs: 240 84 | LR_WARMUP: 85 | WARMUP: False 86 | NUM_WORKERS: 24 87 | # TRAIN.LR_WARMUP.WARMUP = True 88 | # Predictor setting 89 | PRED: 90 | MODE: 'AB' 91 | TRAIN: True 92 | TYPE: 'LBv_v5' 93 | M_WEIGHT: 0.1 94 | V_WEIGHT: 0.1 95 | MV_WEIGHT: 1.0 96 | KWARGS: 97 | dwconv_k: 3 98 | dwconv_id: 256 # last layer channel 99 | dwconv_hd: 64 100 | hidden_2: 32 101 | hidden_3: 32 102 | num_input: 3 103 | num_output: 6 104 | -------------------------------------------------------------------------------- /experiments/siammask_r50_l3/pre_mv_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [0, 1, 2, 3] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [1024] 13 | out_channels: [256] 14 | 15 | RPN: 16 | TYPE: 'DepthwiseRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: 256 20 | out_channels: 256 21 | 22 | MASK: 23 | MASK: True 24 | TYPE: 'MaskCorr' 25 | KWARGS: 26 | in_channels: 256 27 | hidden: 256 28 | out_channels: 3969 29 | 30 | # REFINE: 31 | # REFINE: True 32 | # TYPE: 'Refine' 33 | 34 | ANCHOR: 35 | STRIDE: 8 36 | RATIOS: [0.33, 0.5, 1, 2, 3] 37 | SCALES: [8] 38 | ANCHOR_NUM: 5 39 | 40 | TRACK: 41 | TYPE: 'SiamMaskTracker' 42 | PENALTY_K: 0.10 43 | WINDOW_INFLUENCE: 0.41 44 | LR: 0.32 45 | EXEMPLAR_SIZE: 127 46 | INSTANCE_SIZE: 255 47 | BASE_SIZE: 8 48 | CONTEXT_AMOUNT: 0.5 49 | MASK_THERSHOLD: 0.15 50 | 51 | # Predictive fine-tuning settings 52 | DATASET: 53 | NEG: 0.0 54 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 55 | USE_IMG: True 56 | SEARCH: 57 | SHIFT: 0 58 | SCALE: 0.0 59 | VIDEOS_PER_EPOCH: 10000 # 10000 60 | 61 | TRAIN: 62 | EPOCH: 300 63 | TRACKER_EPOCH: 10 64 | LOG_DIR: './logs/Mask_mv16' 65 | SNAPSHOT_DIR: './snapshot/Mask_mv16' 66 | LATENCY: 3 # number of frames that will be skipped 67 | JITTER: 2 # jitter for input latency 68 | PRE_TARGET: 6 # target of prediction 69 | RESUME: 'pretrained/Mask_R50.pth' # Original trained tracking model 70 | NUM_FRAME: 3 71 | BATCH_SIZE: 128 72 | FIX_T: False 73 | BASE_LR: 0.00001 74 | LR: # for learning rate scheduler 75 | PRED_LR: 0.004 # 0.1 76 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR 77 | NECK_LR: 10 # Times of BASE_LR 78 | TYPE: 'multi-step' 79 | KWARGS: 80 | start_lr: 0.004 81 | steps: [200] 82 | mult: 0.1 83 | epochs: 300 84 | LR_WARMUP: 85 | WARMUP: False 86 | NUM_WORKERS: 24 87 | # TRAIN.LR_WARMUP.WARMUP = True 88 | # Predictor setting 89 | PRED: 90 | MODE: 'AB' 91 | TRAIN: True 92 | TYPE: 'MV_v16' 93 | M_WEIGHT: 0.1 94 | V_WEIGHT: 0.1 95 | MV_WEIGHT: 1.0 96 | KWARGS: 97 | dwconv_k: 3 98 | dwconv_id: 256 # last layer channel 99 | dwconv_hd: 64 100 | hidden_1: 64 101 | hidden_2: 32 102 | hidden_3: 32 103 | num_input: 3 104 | num_output: 6 105 | -------------------------------------------------------------------------------- /experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "mobilenetv2" 5 | KWARGS: 6 | used_layers: [3, 5, 7] 7 | width_mult: 1.4 8 | 9 | ADJUST: 10 | ADJUST: true 11 | TYPE: "AdjustAllLayer" 12 | KWARGS: 13 | in_channels: [44, 134, 448] 14 | out_channels: [256, 256, 256] 15 | 16 | RPN: 17 | TYPE: 'MultiRPN' 18 | KWARGS: 19 | anchor_num: 5 20 | in_channels: [256, 256, 256] 21 | weighted: False 22 | 23 | MASK: 24 | MASK: False 25 | 26 | ANCHOR: 27 | STRIDE: 8 28 | RATIOS: [0.33, 0.5, 1, 2, 3] 29 | SCALES: [8] 30 | ANCHOR_NUM: 5 31 | 32 | TRACK: 33 | TYPE: 'SiamRPNTracker' 34 | PENALTY_K: 0.04 35 | WINDOW_INFLUENCE: 0.4 36 | LR: 0.5 37 | EXEMPLAR_SIZE: 127 38 | INSTANCE_SIZE: 255 39 | BASE_SIZE: 8 40 | CONTEXT_AMOUNT: 0.5 41 | -------------------------------------------------------------------------------- /experiments/siamrpn_mobilev2_l234_dwxcorr/pre_kf_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "mobilenetv2" 5 | KWARGS: 6 | used_layers: [3, 5, 7] 7 | width_mult: 1.4 8 | 9 | ADJUST: 10 | ADJUST: true 11 | TYPE: "AdjustAllLayer" 12 | KWARGS: 13 | in_channels: [44, 134, 448] 14 | out_channels: [256, 256, 256] 15 | 16 | RPN: 17 | TYPE: 'MultiRPN' 18 | KWARGS: 19 | anchor_num: 5 20 | in_channels: [256, 256, 256] 21 | weighted: False 22 | 23 | MASK: 24 | MASK: False 25 | 26 | ANCHOR: 27 | STRIDE: 8 28 | RATIOS: [0.33, 0.5, 1, 2, 3] 29 | SCALES: [8] 30 | ANCHOR_NUM: 5 31 | 32 | TRACK: 33 | TYPE: 'SiamRPNTracker' 34 | PENALTY_K: 0.04 35 | WINDOW_INFLUENCE: 0.4 36 | LR: 0.5 37 | EXEMPLAR_SIZE: 127 38 | INSTANCE_SIZE: 255 39 | BASE_SIZE: 8 40 | CONTEXT_AMOUNT: 0.5 41 | 42 | # Predictive fine-tuning settings 43 | DATASET: 44 | NAMES: ('VID',) 45 | TRAIN: 46 | LATENCY: 2 # number of frames that will be skipped 47 | RESUME: '../pretrained/RPN_Mob.pth' # Original trained tracking model 48 | NUM_FRAME: 3 49 | BATCH_SIZE: 4 50 | PRED: 51 | TYPE: 'KF' 52 | MODE: 'A+B' 53 | 54 | 55 | -------------------------------------------------------------------------------- /experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "mobilenetv2" 5 | KWARGS: 6 | used_layers: [3, 5, 7] 7 | width_mult: 1.4 8 | 9 | ADJUST: 10 | ADJUST: true 11 | TYPE: "AdjustAllLayer" 12 | KWARGS: 13 | in_channels: [44, 134, 448] 14 | out_channels: [256, 256, 256] 15 | 16 | RPN: 17 | TYPE: 'MultiRPN' 18 | KWARGS: 19 | anchor_num: 5 20 | in_channels: [256, 256, 256] 21 | weighted: False 22 | 23 | MASK: 24 | MASK: false 25 | 26 | ANCHOR: 27 | STRIDE: 8 28 | RATIOS: [0.33, 0.5, 1, 2, 3] 29 | SCALES: [8] 30 | ANCHOR_NUM: 5 31 | 32 | TRACK: 33 | TYPE: 'SiamRPNTracker' 34 | PENALTY_K: 0.04 35 | WINDOW_INFLUENCE: 0.4 36 | LR: 0.5 37 | EXEMPLAR_SIZE: 127 38 | INSTANCE_SIZE: 255 39 | BASE_SIZE: 8 40 | CONTEXT_AMOUNT: 0.5 41 | 42 | # Predictive fine-tuning settings 43 | DATASET: 44 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 45 | VIDEOS_PER_EPOCH: 10000 # 10000 46 | USE_IMG: False 47 | 48 | TRAIN: 49 | EPOCH: 100 50 | LOG_DIR: './logs/RPN_Mob_LB5' 51 | SNAPSHOT_DIR: './snapshot/RPN_Mob_LB5' 52 | LATENCY: 3 # number of frames that will be skipped 53 | JITTER: 1 # jitter for input latency 54 | PRE_TARGET: 3 # target of prediction 55 | RESUME: 'pretrained/RPN_Mob.pth' # Original trained tracking model 56 | # RESUME: './snapshot/checkpoint_e45_l6_vid.pth' 57 | NUM_FRAME: 3 58 | BATCH_SIZE: 128 59 | LR: # for learning rate scheduler 60 | PRED_LR: 0.01 #0.1 61 | TYPE: 'multi-step' 62 | KWARGS: 63 | start_lr: 0.01 64 | steps: [15, 40, 30, 50, 80] 65 | mult: 0.1 66 | epochs: 100 67 | LR_WARMUP: 68 | WARMUP: False 69 | NUM_WORKERS: 24 70 | # TRAIN.LR_WARMUP.WARMUP = True 71 | # Predictor setting 72 | PRED: 73 | MODE: 'A+B' 74 | TRAIN: True 75 | TYPE: 'LB_v5' 76 | INPUT_RATIO: 0.0 77 | KWARGS: 78 | hidden_1: 64 79 | hidden_2: 32 80 | hidden_3: 32 81 | num_input: 3 82 | num_output: 3 83 | -------------------------------------------------------------------------------- /experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "mobilenetv2" 5 | KWARGS: 6 | used_layers: [3, 5, 7] 7 | width_mult: 1.4 8 | 9 | ADJUST: 10 | ADJUST: true 11 | TYPE: "AdjustAllLayer" 12 | KWARGS: 13 | in_channels: [44, 134, 448] 14 | out_channels: [256, 256, 256] 15 | 16 | RPN: 17 | TYPE: 'MultiRPN' 18 | KWARGS: 19 | anchor_num: 5 20 | in_channels: [256, 256, 256] 21 | weighted: False 22 | 23 | MASK: 24 | MASK: false 25 | 26 | ANCHOR: 27 | STRIDE: 8 28 | RATIOS: [0.33, 0.5, 1, 2, 3] 29 | SCALES: [8] 30 | ANCHOR_NUM: 5 31 | 32 | TRACK: 33 | TYPE: 'SiamRPNTracker' 34 | PENALTY_K: 0.04 35 | WINDOW_INFLUENCE: 0.4 36 | LR: 0.5 37 | EXEMPLAR_SIZE: 127 38 | INSTANCE_SIZE: 255 39 | BASE_SIZE: 8 40 | CONTEXT_AMOUNT: 0.5 41 | 42 | # Predictive fine-tuning settings 43 | DATASET: 44 | NEG: 0.0 45 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 46 | USE_IMG: True 47 | SEARCH: 48 | SHIFT: 0 49 | SCALE: 0.0 50 | VIDEOS_PER_EPOCH: 10000 # 10000 51 | 52 | TRAIN: 53 | EPOCH: 210 54 | TRACKER_EPOCH: 20 55 | LOG_DIR: './logs/RPN_Mob_lbv5' 56 | SNAPSHOT_DIR: './snapshot/RPN_Mob_lbv5' 57 | LATENCY: 3 # number of frames that will be skipped 58 | JITTER: 1 # jitter for input latency 59 | PRE_TARGET: 3 # target of prediction 60 | RESUME: 'pretrained/RPN_Mob.pth' # Original trained tracking model 61 | NUM_FRAME: 3 62 | BATCH_SIZE: 128 63 | FIX_T: False 64 | BASE_LR: 0.00001 65 | LR: # for learning rate scheduler 66 | PRED_LR: 0.0035 # 0.1 67 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR 68 | NECK_LR: 10 # Times of BASE_LR 69 | TYPE: 'multi-step' 70 | KWARGS: 71 | start_lr: 0.0035 72 | steps: [200] 73 | mult: 0.1 74 | epochs: 210 75 | LR_WARMUP: 76 | WARMUP: False 77 | NUM_WORKERS: 24 78 | # TRAIN.LR_WARMUP.WARMUP = True 79 | # Predictor setting 80 | PRED: 81 | MODE: 'AB' 82 | TRAIN: True 83 | TYPE: 'LBv_v5' 84 | M_WEIGHT: 0.0 85 | V_WEIGHT: 1.0 86 | MV_WEIGHT: 0.0 87 | KWARGS: 88 | dwconv_k: 3 89 | dwconv_id: 256 # last layer channel 90 | dwconv_hd: 64 91 | hidden_2: 32 92 | hidden_3: 32 93 | num_input: 3 94 | num_output: 3 95 | -------------------------------------------------------------------------------- /experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "mobilenetv2" 5 | KWARGS: 6 | used_layers: [3, 5, 7] 7 | width_mult: 1.4 8 | 9 | ADJUST: 10 | ADJUST: true 11 | TYPE: "AdjustAllLayer" 12 | KWARGS: 13 | in_channels: [44, 134, 448] 14 | out_channels: [256, 256, 256] 15 | 16 | RPN: 17 | TYPE: 'MultiRPN' 18 | KWARGS: 19 | anchor_num: 5 20 | in_channels: [256, 256, 256] 21 | weighted: False 22 | 23 | MASK: 24 | MASK: false 25 | 26 | ANCHOR: 27 | STRIDE: 8 28 | RATIOS: [0.33, 0.5, 1, 2, 3] 29 | SCALES: [8] 30 | ANCHOR_NUM: 5 31 | 32 | TRACK: 33 | TYPE: 'SiamRPNTracker' 34 | PENALTY_K: 0.04 35 | WINDOW_INFLUENCE: 0.4 36 | LR: 0.5 37 | EXEMPLAR_SIZE: 127 38 | INSTANCE_SIZE: 255 39 | BASE_SIZE: 8 40 | CONTEXT_AMOUNT: 0.5 41 | 42 | # Predictive fine-tuning settings 43 | DATASET: 44 | NEG: 0.0 45 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 46 | USE_IMG: True 47 | SEARCH: 48 | SHIFT: 0 49 | SCALE: 0.0 50 | VIDEOS_PER_EPOCH: 10000 # 10000 51 | 52 | TRAIN: 53 | EPOCH: 280 54 | TRACKER_EPOCH: 20 55 | LOG_DIR: './logs/RPN_Mob_mv16' 56 | SNAPSHOT_DIR: './snapshot/RPN_Mob_mv16' 57 | LATENCY: 3 # number of frames that will be skipped 58 | JITTER: 1 # jitter for input latency 59 | PRE_TARGET: 3 # target of prediction 60 | RESUME: 'pretrained/RPN_Mob.pth' # Original trained tracking model 61 | NUM_FRAME: 3 62 | BATCH_SIZE: 128 63 | FIX_T: False 64 | BASE_LR: 0.00001 65 | LR: # for learning rate scheduler 66 | PRED_LR: 0.01 # 0.1 67 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR 68 | NECK_LR: 10 # Times of BASE_LR 69 | TYPE: 'multi-step' 70 | KWARGS: 71 | start_lr: 0.01 72 | steps: [200] 73 | mult: 0.1 74 | epochs: 280 75 | LR_WARMUP: 76 | WARMUP: False 77 | NUM_WORKERS: 24 78 | # TRAIN.LR_WARMUP.WARMUP = True 79 | # Predictor setting 80 | PRED: 81 | MODE: 'AB' 82 | TRAIN: True 83 | TYPE: 'MV_v16' 84 | M_WEIGHT: 0.1 85 | V_WEIGHT: 0.1 86 | MV_WEIGHT: 1.0 87 | KWARGS: 88 | dwconv_k: 3 89 | dwconv_id: 256 # last layer channel 90 | dwconv_hd: 64 91 | hidden_1: 64 92 | hidden_2: 32 93 | hidden_3: 32 94 | num_input: 3 95 | num_output: 3 96 | -------------------------------------------------------------------------------- /experiments/siamrpn_r50_l234_dwxcorr/config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [2, 3, 4] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [512, 1024, 2048] 13 | out_channels: [256, 256, 256] 14 | 15 | RPN: 16 | TYPE: 'MultiRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: [256, 256, 256] 20 | weighted: true 21 | 22 | MASK: 23 | MASK: false 24 | 25 | ANCHOR: 26 | STRIDE: 8 27 | RATIOS: [0.33, 0.5, 1, 2, 3] 28 | SCALES: [8] 29 | ANCHOR_NUM: 5 30 | 31 | TRACK: 32 | TYPE: 'SiamRPNTracker' 33 | PENALTY_K: 0.05 34 | WINDOW_INFLUENCE: 0.42 35 | LR: 0.38 36 | EXEMPLAR_SIZE: 127 37 | INSTANCE_SIZE: 255 38 | BASE_SIZE: 8 39 | CONTEXT_AMOUNT: 0.5 40 | 41 | TRAIN: 42 | BATCH_SIZE: 4 43 | -------------------------------------------------------------------------------- /experiments/siamrpn_r50_l234_dwxcorr/pre_kf_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [2, 3, 4] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [512, 1024, 2048] 13 | out_channels: [256, 256, 256] 14 | 15 | RPN: 16 | TYPE: 'MultiRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: [256, 256, 256] 20 | weighted: true 21 | 22 | MASK: 23 | MASK: false 24 | 25 | ANCHOR: 26 | STRIDE: 8 27 | RATIOS: [0.33, 0.5, 1, 2, 3] 28 | SCALES: [8] 29 | ANCHOR_NUM: 5 30 | 31 | TRACK: 32 | TYPE: 'SiamRPNTracker' 33 | PENALTY_K: 0.05 34 | WINDOW_INFLUENCE: 0.42 35 | LR: 0.38 36 | EXEMPLAR_SIZE: 127 37 | INSTANCE_SIZE: 255 38 | BASE_SIZE: 8 39 | CONTEXT_AMOUNT: 0.5 40 | 41 | # Predictive fine-tuning settings 42 | DATASET: 43 | NAMES: ('VID',) 44 | TRAIN: 45 | LATENCY: 2 # number of frames that will be skipped 46 | RESUME: '../pretrained/RPN_R50.model' # Original trained tracking model 47 | NUM_FRAME: 3 48 | BATCH_SIZE: 4 49 | PRED: 50 | TYPE: 'KF' 51 | 52 | 53 | -------------------------------------------------------------------------------- /experiments/siamrpn_r50_l234_dwxcorr/pre_lb_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [2, 3, 4] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [512, 1024, 2048] 13 | out_channels: [256, 256, 256] 14 | 15 | RPN: 16 | TYPE: 'MultiRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: [256, 256, 256] 20 | weighted: true 21 | 22 | MASK: 23 | MASK: false 24 | 25 | ANCHOR: 26 | STRIDE: 8 27 | RATIOS: [0.33, 0.5, 1, 2, 3] 28 | SCALES: [8] 29 | ANCHOR_NUM: 5 30 | 31 | TRACK: 32 | TYPE: 'SiamRPNTracker' 33 | PENALTY_K: 0.05 34 | WINDOW_INFLUENCE: 0.42 35 | LR: 0.38 36 | EXEMPLAR_SIZE: 127 37 | INSTANCE_SIZE: 255 38 | BASE_SIZE: 8 39 | CONTEXT_AMOUNT: 0.5 40 | 41 | # Predictive fine-tuning settings 42 | DATASET: 43 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 44 | VIDEOS_PER_EPOCH: 10000 # 10000 45 | USE_IMG: False 46 | 47 | TRAIN: 48 | EPOCH: 50 49 | LOG_DIR: './logs/RPN_Res_lb5' 50 | SNAPSHOT_DIR: './snapshot/RPN_Res_lb5' 51 | LATENCY: 6 # number of frames that will be skipped 52 | JITTER: 2 # jitter for input latency 53 | PRE_TARGET: 12 # target of prediction 54 | RESUME: 'pretrained/RPN_R50.model' # Original trained tracking model 55 | NUM_FRAME: 3 56 | BATCH_SIZE: 128 57 | FIX_T: True 58 | BASE_LR: 0.0 59 | LR: # for learning rate scheduler 60 | PRED_LR: 0.03 #0.1 61 | TYPE: 'multi-step' 62 | KWARGS: 63 | start_lr: 0.03 64 | steps: [20, 40, 50] 65 | mult: 0.1 66 | epochs: 60 67 | LR_WARMUP: 68 | WARMUP: False 69 | NUM_WORKERS: 24 70 | # TRAIN.LR_WARMUP.WARMUP = True 71 | # Predictor setting 72 | PRED: 73 | MODE: 'A+B' 74 | TRAIN: True 75 | TYPE: 'LB_v5' 76 | INPUT_RATIO: 0.0 77 | KWARGS: 78 | hidden_1: 64 79 | hidden_2: 32 80 | hidden_3: 32 81 | num_input: 3 82 | num_output: 12 83 | -------------------------------------------------------------------------------- /experiments/siamrpn_r50_l234_dwxcorr/pre_lbv_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [2, 3, 4] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [512, 1024, 2048] 13 | out_channels: [256, 256, 256] 14 | 15 | RPN: 16 | TYPE: 'MultiRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: [256, 256, 256] 20 | weighted: true 21 | 22 | MASK: 23 | MASK: false 24 | 25 | ANCHOR: 26 | STRIDE: 8 27 | RATIOS: [0.33, 0.5, 1, 2, 3] 28 | SCALES: [8] 29 | ANCHOR_NUM: 5 30 | 31 | TRACK: 32 | TYPE: 'SiamRPNTracker' 33 | PENALTY_K: 0.05 34 | WINDOW_INFLUENCE: 0.42 35 | LR: 0.38 36 | EXEMPLAR_SIZE: 127 37 | INSTANCE_SIZE: 255 38 | BASE_SIZE: 8 39 | CONTEXT_AMOUNT: 0.5 40 | 41 | # Predictive fine-tuning settings 42 | DATASET: 43 | NEG: 0.0 44 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 45 | USE_IMG: True 46 | SEARCH: 47 | SHIFT: 0 48 | SCALE: 0.0 49 | VIDEOS_PER_EPOCH: 10000 # 10000 50 | 51 | TRAIN: 52 | EPOCH: 150 53 | TRACKER_EPOCH: 20 54 | LOG_DIR: './logs/RPN_Res_lbv5' 55 | SNAPSHOT_DIR: './snapshot/RPN_Res_lbv5' 56 | LATENCY: 6 # number of frames that will be skipped 57 | JITTER: 2 # jitter for input latency 58 | PRE_TARGET: 12 # target of prediction 59 | RESUME: 'pretrained/RPN_R50.model' # Original trained tracking model 60 | NUM_FRAME: 3 61 | BATCH_SIZE: 64 62 | FIX_T: False 63 | BASE_LR: 0.00001 64 | LR: # for learning rate scheduler 65 | PRED_LR: 0.003 # 0.1 66 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR 67 | NECK_LR: 10 # Times of BASE_LR 68 | TYPE: 'multi-step' 69 | KWARGS: 70 | start_lr: 0.003 71 | steps: [100] 72 | mult: 0.1 73 | epochs: 150 74 | LR_WARMUP: 75 | WARMUP: False 76 | NUM_WORKERS: 24 77 | # TRAIN.LR_WARMUP.WARMUP = True 78 | # Predictor setting 79 | PRED: 80 | MODE: 'AB' 81 | TRAIN: True 82 | TYPE: 'LBv_v5' 83 | M_WEIGHT: 0.0 84 | V_WEIGHT: 1.0 85 | MV_WEIGHT: 0.0 86 | KWARGS: 87 | dwconv_k: 3 88 | dwconv_id: 256 # last layer channel 89 | dwconv_hd: 64 90 | hidden_2: 32 91 | hidden_3: 32 92 | num_input: 3 93 | num_output: 12 94 | -------------------------------------------------------------------------------- /experiments/siamrpn_r50_l234_dwxcorr/pre_mv_config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "siamrpn_r50_l234_dwxcorr" 2 | 3 | BACKBONE: 4 | TYPE: "resnet50" 5 | KWARGS: 6 | used_layers: [2, 3, 4] 7 | 8 | ADJUST: 9 | ADJUST: true 10 | TYPE: "AdjustAllLayer" 11 | KWARGS: 12 | in_channels: [512, 1024, 2048] 13 | out_channels: [256, 256, 256] 14 | 15 | RPN: 16 | TYPE: 'MultiRPN' 17 | KWARGS: 18 | anchor_num: 5 19 | in_channels: [256, 256, 256] 20 | weighted: true 21 | 22 | MASK: 23 | MASK: false 24 | 25 | ANCHOR: 26 | STRIDE: 8 27 | RATIOS: [0.33, 0.5, 1, 2, 3] 28 | SCALES: [8] 29 | ANCHOR_NUM: 5 30 | 31 | TRACK: 32 | TYPE: 'SiamRPNTracker' 33 | PENALTY_K: 0.05 34 | WINDOW_INFLUENCE: 0.42 35 | LR: 0.38 36 | EXEMPLAR_SIZE: 127 37 | INSTANCE_SIZE: 255 38 | BASE_SIZE: 8 39 | CONTEXT_AMOUNT: 0.5 40 | 41 | # Predictive fine-tuning settings 42 | DATASET: 43 | NEG: 0.0 44 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT') 45 | USE_IMG: True 46 | SEARCH: 47 | SHIFT: 0 48 | SCALE: 0.0 49 | VIDEOS_PER_EPOCH: 10000 # 10000 50 | 51 | TRAIN: 52 | EPOCH: 150 53 | TRACKER_EPOCH: 20 54 | START_EPOCH: 220 55 | LOG_DIR: './logs/RPN_Res_mv16' 56 | SNAPSHOT_DIR: './snapshot/RPN_Res_mv16' 57 | LATENCY: 6 # number of frames that will be skipped 58 | JITTER: 2 # jitter for input latency 59 | PRE_TARGET: 12 # target of prediction 60 | RESUME: 'pretrained/RPN_R50.model' # Original trained tracking model 61 | NUM_FRAME: 3 62 | BATCH_SIZE: 64 63 | FIX_T: False 64 | BASE_LR: 0.00001 65 | LR: # for learning rate scheduler 66 | PRED_LR: 0.003 # 0.1 67 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR 68 | NECK_LR: 10 # Times of BASE_LR 69 | TYPE: 'multi-step' 70 | KWARGS: 71 | start_lr: 0.003 72 | steps: [100] 73 | mult: 0.1 74 | epochs: 150 75 | LR_WARMUP: 76 | WARMUP: False 77 | NUM_WORKERS: 24 78 | # TRAIN.LR_WARMUP.WARMUP = True 79 | # Predictor setting 80 | PRED: 81 | MODE: 'AB' 82 | TRAIN: True 83 | TYPE: 'MV_v16' 84 | M_WEIGHT: 0.1 85 | V_WEIGHT: 0.1 86 | MV_WEIGHT: 1.0 87 | KWARGS: 88 | dwconv_k: 3 89 | dwconv_id: 256 # last layer channel 90 | dwconv_hd: 64 91 | hidden_1: 64 92 | hidden_2: 32 93 | hidden_3: 32 94 | num_input: 3 95 | num_output: 12 96 | -------------------------------------------------------------------------------- /pysot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/__init__.py -------------------------------------------------------------------------------- /pysot/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__init__.py -------------------------------------------------------------------------------- /pysot/core/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/core/__pycache__/config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__pycache__/config.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/core/__pycache__/xcorr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__pycache__/xcorr.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/core/xcorr.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | import torch 8 | import torch.nn.functional as F 9 | 10 | 11 | def xcorr_slow(x, kernel): 12 | """for loop to calculate cross correlation, slow version 13 | """ 14 | batch = x.size()[0] 15 | out = [] 16 | for i in range(batch): 17 | px = x[i] 18 | pk = kernel[i] 19 | px = px.view(1, -1, px.size()[1], px.size()[2]) 20 | pk = pk.view(1, -1, pk.size()[1], pk.size()[2]) 21 | po = F.conv2d(px, pk) 22 | out.append(po) 23 | out = torch.cat(out, 0) 24 | return out 25 | 26 | 27 | def xcorr_fast(x, kernel): 28 | """group conv2d to calculate cross correlation, fast version 29 | """ 30 | batch = kernel.size()[0] 31 | pk = kernel.view(-1, x.size()[1], kernel.size()[2], kernel.size()[3]) 32 | px = x.view(1, -1, x.size()[2], x.size()[3]) 33 | po = F.conv2d(px, pk, groups=batch) 34 | po = po.view(batch, -1, po.size()[2], po.size()[3]) 35 | return po 36 | 37 | 38 | def xcorr_depthwise(x, kernel): 39 | """depthwise cross correlation 40 | """ 41 | batch = kernel.size(0) 42 | channel = kernel.size(1) 43 | x = x.view(1, batch*channel, x.size(2), x.size(3)) 44 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3)) 45 | out = F.conv2d(x, kernel, groups=batch*channel) 46 | out = out.view(batch, channel, out.size(2), out.size(3)) 47 | return out 48 | -------------------------------------------------------------------------------- /pysot/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/datasets/__init__.py -------------------------------------------------------------------------------- /pysot/datasets/anchor_target.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | 8 | from pysot.core.config import cfg 9 | from pysot.utils.bbox import IoU, corner2center 10 | from pysot.utils.anchor import Anchors 11 | 12 | 13 | class AnchorTarget: 14 | def __init__(self,): 15 | self.anchors = Anchors(cfg.ANCHOR.STRIDE, 16 | cfg.ANCHOR.RATIOS, 17 | cfg.ANCHOR.SCALES) 18 | 19 | self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2, 20 | size=cfg.TRAIN.OUTPUT_SIZE) 21 | 22 | def __call__(self, target, size, neg=False): 23 | anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES) 24 | 25 | # -1 ignore 0 negative 1 positive 26 | cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64) 27 | delta = np.zeros((4, anchor_num, size, size), dtype=np.float32) 28 | delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32) 29 | 30 | def select(position, keep_num=16): 31 | num = position[0].shape[0] 32 | if num <= keep_num: 33 | return position, num 34 | slt = np.arange(num) 35 | np.random.shuffle(slt) 36 | slt = slt[:keep_num] 37 | return tuple(p[slt] for p in position), keep_num 38 | 39 | tcx, tcy, tw, th = corner2center(target) 40 | 41 | if neg: 42 | # l = size // 2 - 3 43 | # r = size // 2 + 3 + 1 44 | # cls[:, l:r, l:r] = 0 45 | 46 | cx = size // 2 47 | cy = size // 2 48 | cx += int(np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) / 49 | cfg.ANCHOR.STRIDE + 0.5)) 50 | cy += int(np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) / 51 | cfg.ANCHOR.STRIDE + 0.5)) 52 | l = max(0, cx - 3) 53 | r = min(size, cx + 4) 54 | u = max(0, cy - 3) 55 | d = min(size, cy + 4) 56 | cls[:, u:d, l:r] = 0 57 | 58 | neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM) 59 | cls[:] = -1 60 | cls[neg] = 0 61 | 62 | overlap = np.zeros((anchor_num, size, size), dtype=np.float32) 63 | return cls, delta, delta_weight, overlap 64 | 65 | anchor_box = self.anchors.all_anchors[0] 66 | anchor_center = self.anchors.all_anchors[1] 67 | x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \ 68 | anchor_box[2], anchor_box[3] 69 | cx, cy, w, h = anchor_center[0], anchor_center[1], \ 70 | anchor_center[2], anchor_center[3] 71 | 72 | delta[0] = (tcx - cx) / w 73 | delta[1] = (tcy - cy) / h 74 | delta[2] = np.log(tw / w) 75 | delta[3] = np.log(th / h) 76 | 77 | overlap = IoU([x1, y1, x2, y2], target) 78 | 79 | pos = np.where(overlap > cfg.TRAIN.THR_HIGH) 80 | neg = np.where(overlap < cfg.TRAIN.THR_LOW) 81 | 82 | pos, pos_num = select(pos, cfg.TRAIN.POS_NUM) 83 | neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM) 84 | 85 | cls[pos] = 1 86 | delta_weight[pos] = 1. / (pos_num + 1e-6) 87 | 88 | cls[neg] = 0 89 | return cls, delta, delta_weight, overlap 90 | -------------------------------------------------------------------------------- /pysot/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from pysot.models.model_builder import ModelBuilder #tracker + predictor 7 | from pysot.models.pred_model_builder import PredModelBuilder # predictive tracker 8 | 9 | 10 | Builders = { 11 | 'A+B': ModelBuilder, 12 | 'AB': PredModelBuilder, 13 | } 14 | 15 | 16 | def get_modelbuilder(name, **kwargs): 17 | return Builders[name](**kwargs) 18 | 19 | -------------------------------------------------------------------------------- /pysot/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from pysot.models.backbone.alexnet import alexnetlegacy, alexnet 7 | from pysot.models.backbone.mobile_v2 import mobilenetv2 8 | from pysot.models.backbone.resnet_atrous import resnet18, resnet34, resnet50 9 | 10 | BACKBONES = { 11 | 'alexnetlegacy': alexnetlegacy, 12 | 'mobilenetv2': mobilenetv2, 13 | 'resnet18': resnet18, 14 | 'resnet34': resnet34, 15 | 'resnet50': resnet50, 16 | 'alexnet': alexnet, 17 | } 18 | 19 | 20 | def get_backbone(name, **kwargs): 21 | return BACKBONES[name](**kwargs) 22 | -------------------------------------------------------------------------------- /pysot/models/backbone/alexnet.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class AlexNetLegacy(nn.Module): 10 | configs = [3, 96, 256, 384, 384, 256] 11 | 12 | def __init__(self, width_mult=1): 13 | configs = list(map(lambda x: 3 if x == 3 else 14 | int(x*width_mult), AlexNet.configs)) 15 | super(AlexNetLegacy, self).__init__() 16 | self.features = nn.Sequential( 17 | nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2), 18 | nn.BatchNorm2d(configs[1]), 19 | nn.MaxPool2d(kernel_size=3, stride=2), 20 | nn.ReLU(inplace=True), 21 | nn.Conv2d(configs[1], configs[2], kernel_size=5), 22 | nn.BatchNorm2d(configs[2]), 23 | nn.MaxPool2d(kernel_size=3, stride=2), 24 | nn.ReLU(inplace=True), 25 | nn.Conv2d(configs[2], configs[3], kernel_size=3), 26 | nn.BatchNorm2d(configs[3]), 27 | nn.ReLU(inplace=True), 28 | nn.Conv2d(configs[3], configs[4], kernel_size=3), 29 | nn.BatchNorm2d(configs[4]), 30 | nn.ReLU(inplace=True), 31 | nn.Conv2d(configs[4], configs[5], kernel_size=3), 32 | nn.BatchNorm2d(configs[5]), 33 | ) 34 | self.feature_size = configs[5] 35 | 36 | def forward(self, x): 37 | x = self.features(x) 38 | return x 39 | 40 | 41 | class AlexNet(nn.Module): 42 | configs = [3, 96, 256, 384, 384, 256] 43 | 44 | def __init__(self, width_mult=1): 45 | configs = list(map(lambda x: 3 if x == 3 else 46 | int(x*width_mult), AlexNet.configs)) 47 | super(AlexNet, self).__init__() 48 | self.layer1 = nn.Sequential( 49 | nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2), 50 | nn.BatchNorm2d(configs[1]), 51 | nn.MaxPool2d(kernel_size=3, stride=2), 52 | nn.ReLU(inplace=True), 53 | ) 54 | self.layer2 = nn.Sequential( 55 | nn.Conv2d(configs[1], configs[2], kernel_size=5), 56 | nn.BatchNorm2d(configs[2]), 57 | nn.MaxPool2d(kernel_size=3, stride=2), 58 | nn.ReLU(inplace=True), 59 | ) 60 | self.layer3 = nn.Sequential( 61 | nn.Conv2d(configs[2], configs[3], kernel_size=3), 62 | nn.BatchNorm2d(configs[3]), 63 | nn.ReLU(inplace=True), 64 | ) 65 | self.layer4 = nn.Sequential( 66 | nn.Conv2d(configs[3], configs[4], kernel_size=3), 67 | nn.BatchNorm2d(configs[4]), 68 | nn.ReLU(inplace=True), 69 | ) 70 | 71 | self.layer5 = nn.Sequential( 72 | nn.Conv2d(configs[4], configs[5], kernel_size=3), 73 | nn.BatchNorm2d(configs[5]), 74 | ) 75 | self.feature_size = configs[5] 76 | 77 | def forward(self, x): 78 | x = self.layer1(x) 79 | x = self.layer2(x) 80 | x = self.layer3(x) 81 | x = self.layer4(x) 82 | x = self.layer5(x) 83 | return x 84 | 85 | 86 | def alexnetlegacy(**kwargs): 87 | return AlexNetLegacy(**kwargs) 88 | 89 | 90 | def alexnet(**kwargs): 91 | return AlexNet(**kwargs) 92 | -------------------------------------------------------------------------------- /pysot/models/backbone/mobile_v2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | def conv_bn(inp, oup, stride, padding=1): 11 | return nn.Sequential( 12 | nn.Conv2d(inp, oup, 3, stride, padding, bias=False), 13 | nn.BatchNorm2d(oup), 14 | nn.ReLU6(inplace=True) 15 | ) 16 | 17 | 18 | def conv_1x1_bn(inp, oup): 19 | return nn.Sequential( 20 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 21 | nn.BatchNorm2d(oup), 22 | nn.ReLU6(inplace=True) 23 | ) 24 | 25 | 26 | class InvertedResidual(nn.Module): 27 | def __init__(self, inp, oup, stride, expand_ratio, dilation=1): 28 | super(InvertedResidual, self).__init__() 29 | self.stride = stride 30 | 31 | self.use_res_connect = self.stride == 1 and inp == oup 32 | 33 | padding = 2 - stride 34 | if dilation > 1: 35 | padding = dilation 36 | 37 | self.conv = nn.Sequential( 38 | # pw 39 | nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(inp * expand_ratio), 41 | nn.ReLU6(inplace=True), 42 | # dw 43 | nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3, 44 | stride, padding, dilation=dilation, 45 | groups=inp * expand_ratio, bias=False), 46 | nn.BatchNorm2d(inp * expand_ratio), 47 | nn.ReLU6(inplace=True), 48 | # pw-linear 49 | nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False), 50 | nn.BatchNorm2d(oup), 51 | ) 52 | 53 | def forward(self, x): 54 | if self.use_res_connect: 55 | return x + self.conv(x) 56 | else: 57 | return self.conv(x) 58 | 59 | 60 | class MobileNetV2(nn.Sequential): 61 | def __init__(self, width_mult=1.0, used_layers=[3, 5, 7]): 62 | super(MobileNetV2, self).__init__() 63 | 64 | self.interverted_residual_setting = [ 65 | # t, c, n, s 66 | [1, 16, 1, 1, 1], 67 | [6, 24, 2, 2, 1], 68 | [6, 32, 3, 2, 1], 69 | [6, 64, 4, 2, 1], 70 | [6, 96, 3, 1, 1], 71 | [6, 160, 3, 2, 1], 72 | [6, 320, 1, 1, 1], 73 | ] 74 | # 0,2,3,4,6 75 | 76 | self.interverted_residual_setting = [ 77 | # t, c, n, s 78 | [1, 16, 1, 1, 1], 79 | [6, 24, 2, 2, 1], 80 | [6, 32, 3, 2, 1], 81 | [6, 64, 4, 1, 2], 82 | [6, 96, 3, 1, 2], 83 | [6, 160, 3, 1, 4], 84 | [6, 320, 1, 1, 4], 85 | ] 86 | 87 | self.channels = [24, 32, 96, 320] 88 | self.channels = [int(c * width_mult) for c in self.channels] 89 | 90 | input_channel = int(32 * width_mult) 91 | self.last_channel = int(1280 * width_mult) \ 92 | if width_mult > 1.0 else 1280 93 | 94 | self.add_module('layer0', conv_bn(3, input_channel, 2, 0)) 95 | 96 | last_dilation = 1 97 | 98 | self.used_layers = used_layers 99 | 100 | for idx, (t, c, n, s, d) in \ 101 | enumerate(self.interverted_residual_setting, start=1): 102 | output_channel = int(c * width_mult) 103 | 104 | layers = [] 105 | 106 | for i in range(n): 107 | if i == 0: 108 | if d == last_dilation: 109 | dd = d 110 | else: 111 | dd = max(d // 2, 1) 112 | layers.append(InvertedResidual(input_channel, 113 | output_channel, s, t, dd)) 114 | else: 115 | layers.append(InvertedResidual(input_channel, 116 | output_channel, 1, t, d)) 117 | input_channel = output_channel 118 | 119 | last_dilation = d 120 | 121 | self.add_module('layer%d' % (idx), nn.Sequential(*layers)) 122 | 123 | def forward(self, x): 124 | outputs = [] 125 | for idx in range(8): 126 | name = "layer%d" % idx 127 | x = getattr(self, name)(x) 128 | outputs.append(x) 129 | p0, p1, p2, p3, p4 = [outputs[i] for i in [1, 2, 3, 5, 7]] 130 | out = [outputs[i] for i in self.used_layers] 131 | if len(out) == 1: 132 | return out[0] 133 | return out 134 | 135 | 136 | def mobilenetv2(**kwargs): 137 | model = MobileNetV2(**kwargs) 138 | return model 139 | 140 | 141 | if __name__ == '__main__': 142 | net = mobilenetv2() 143 | 144 | print(net) 145 | 146 | from torch.autograd import Variable 147 | tensor = Variable(torch.Tensor(1, 3, 255, 255)).cuda() 148 | 149 | net = net.cuda() 150 | 151 | out = net(tensor) 152 | 153 | for i, p in enumerate(out): 154 | print(i, p.size()) 155 | -------------------------------------------------------------------------------- /pysot/models/centernet/post_process.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from .image import transform_preds 7 | from .ddd_utils import ddd2locrot 8 | 9 | 10 | def get_pred_depth(depth): 11 | return depth 12 | 13 | def get_alpha(rot): 14 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos, 15 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos] 16 | # return rot[:, 0] 17 | idx = rot[:, 1] > rot[:, 5] 18 | alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi) 19 | alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi) 20 | return alpha1 * idx + alpha2 * (1 - idx) 21 | 22 | 23 | def ddd_post_process_2d(dets, c, s, opt): 24 | # dets: batch x max_dets x dim 25 | # return 1-based class det list 26 | ret = [] 27 | include_wh = dets.shape[2] > 16 28 | for i in range(dets.shape[0]): 29 | top_preds = {} 30 | dets[i, :, :2] = transform_preds( 31 | dets[i, :, 0:2], c[i], s[i], (opt.output_w, opt.output_h)) 32 | classes = dets[i, :, -1] 33 | for j in range(opt.num_classes): 34 | inds = (classes == j) 35 | top_preds[j + 1] = np.concatenate([ 36 | dets[i, inds, :3].astype(np.float32), 37 | get_alpha(dets[i, inds, 3:11])[:, np.newaxis].astype(np.float32), 38 | get_pred_depth(dets[i, inds, 11:12]).astype(np.float32), 39 | dets[i, inds, 12:15].astype(np.float32)], axis=1) 40 | if include_wh: 41 | top_preds[j + 1] = np.concatenate([ 42 | top_preds[j + 1], 43 | transform_preds( 44 | dets[i, inds, 15:17], c[i], s[i], (opt.output_w, opt.output_h)) 45 | .astype(np.float32)], axis=1) 46 | ret.append(top_preds) 47 | return ret 48 | 49 | def ddd_post_process_3d(dets, calibs): 50 | # dets: batch x max_dets x dim 51 | # return 1-based class det list 52 | ret = [] 53 | for i in range(len(dets)): 54 | preds = {} 55 | for cls_ind in dets[i].keys(): 56 | preds[cls_ind] = [] 57 | for j in range(len(dets[i][cls_ind])): 58 | center = dets[i][cls_ind][j][:2] 59 | score = dets[i][cls_ind][j][2] 60 | alpha = dets[i][cls_ind][j][3] 61 | depth = dets[i][cls_ind][j][4] 62 | dimensions = dets[i][cls_ind][j][5:8] 63 | wh = dets[i][cls_ind][j][8:10] 64 | locations, rotation_y = ddd2locrot( 65 | center, alpha, dimensions, depth, calibs[0]) 66 | bbox = [center[0] - wh[0] / 2, center[1] - wh[1] / 2, 67 | center[0] + wh[0] / 2, center[1] + wh[1] / 2] 68 | pred = [alpha] + bbox + dimensions.tolist() + \ 69 | locations.tolist() + [rotation_y, score] 70 | preds[cls_ind].append(pred) 71 | preds[cls_ind] = np.array(preds[cls_ind], dtype=np.float32) 72 | ret.append(preds) 73 | return ret 74 | 75 | def ddd_post_process(dets, c, s, calibs, opt): 76 | # dets: batch x max_dets x dim 77 | # return 1-based class det list 78 | dets = ddd_post_process_2d(dets, c, s, opt) 79 | dets = ddd_post_process_3d(dets, calibs) 80 | return dets 81 | 82 | 83 | def ctdet_post_process(dets, c, s, h, w, num_classes): 84 | # dets: batch x max_dets x dim 85 | # return 1-based class det dict 86 | for i in range(dets.shape[0]): 87 | dets[i, :, :2] = transform_preds( 88 | dets[i, :, 0:2], c, s, (w, h)) 89 | dets[i, :, 2:4] = transform_preds( 90 | dets[i, :, 2:4], c, s, (w, h)) 91 | # classes = dets[i, :, -1] 92 | # for j in range(num_classes): 93 | # inds = (classes == j) 94 | # top_preds[j + 1] = np.concatenate([ 95 | # dets[i, inds, :4].astype(np.float32), 96 | # dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist() 97 | # ret.append(top_preds) 98 | return dets 99 | 100 | 101 | def multi_pose_post_process(dets, c, s, h, w): 102 | # dets: batch x max_dets x 40 103 | # return list of 39 in image coord 104 | ret = [] 105 | for i in range(dets.shape[0]): 106 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h)) 107 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h)) 108 | top_preds = np.concatenate( 109 | [bbox.reshape(-1, 4), dets[i, :, 4:5], 110 | pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist() 111 | ret.append({np.ones(1, dtype=np.int32)[0]: top_preds}) 112 | return ret 113 | -------------------------------------------------------------------------------- /pysot/models/centernet/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import torch 6 | 7 | class AverageMeter(object): 8 | """Computes and stores the average and current value""" 9 | def __init__(self): 10 | self.reset() 11 | 12 | def reset(self): 13 | self.val = 0 14 | self.avg = 0 15 | self.sum = 0 16 | self.count = 0 17 | 18 | def update(self, val, n=1): 19 | self.val = val 20 | self.sum += val * n 21 | self.count += n 22 | if self.count > 0: 23 | self.avg = self.sum / self.count -------------------------------------------------------------------------------- /pysot/models/head/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from pysot.models.head.mask import MaskCorr, Refine 7 | from pysot.models.head.rpn import UPChannelRPN, DepthwiseRPN, MultiRPN 8 | 9 | RPNS = { 10 | 'UPChannelRPN': UPChannelRPN, 11 | 'DepthwiseRPN': DepthwiseRPN, 12 | 'MultiRPN': MultiRPN 13 | } 14 | 15 | MASKS = { 16 | 'MaskCorr': MaskCorr, 17 | } 18 | 19 | REFINE = { 20 | 'Refine': Refine, 21 | } 22 | 23 | 24 | def get_rpn_head(name, **kwargs): 25 | return RPNS[name](**kwargs) 26 | 27 | 28 | def get_mask_head(name, **kwargs): 29 | return MASKS[name](**kwargs) 30 | 31 | 32 | def get_refine_head(name): 33 | return REFINE[name]() 34 | -------------------------------------------------------------------------------- /pysot/models/head/mask.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | 9 | from pysot.models.head.rpn import DepthwiseXCorr 10 | from pysot.core.xcorr import xcorr_depthwise 11 | 12 | 13 | class MaskCorr(DepthwiseXCorr): 14 | def __init__(self, in_channels, hidden, out_channels, 15 | kernel_size=3, hidden_kernel_size=5): 16 | super(MaskCorr, self).__init__(in_channels, hidden, 17 | out_channels, kernel_size, 18 | hidden_kernel_size) 19 | 20 | def forward(self, kernel, search): 21 | kernel = self.conv_kernel(kernel) 22 | search = self.conv_search(search) 23 | feature = xcorr_depthwise(search, kernel) 24 | out = self.head(feature) 25 | return out, feature 26 | 27 | 28 | class Refine(nn.Module): 29 | def __init__(self): 30 | super(Refine, self).__init__() 31 | self.v0 = nn.Sequential( 32 | nn.Conv2d(64, 16, 3, padding=1), 33 | nn.ReLU(inplace=True), 34 | nn.Conv2d(16, 4, 3, padding=1), 35 | nn.ReLU(inplace=True), 36 | ) 37 | self.v1 = nn.Sequential( 38 | nn.Conv2d(256, 64, 3, padding=1), 39 | nn.ReLU(inplace=True), 40 | nn.Conv2d(64, 16, 3, padding=1), 41 | nn.ReLU(inplace=True), 42 | ) 43 | self.v2 = nn.Sequential( 44 | nn.Conv2d(512, 128, 3, padding=1), 45 | nn.ReLU(inplace=True), 46 | nn.Conv2d(128, 32, 3, padding=1), 47 | nn.ReLU(inplace=True), 48 | ) 49 | self.h2 = nn.Sequential( 50 | nn.Conv2d(32, 32, 3, padding=1), 51 | nn.ReLU(inplace=True), 52 | nn.Conv2d(32, 32, 3, padding=1), 53 | nn.ReLU(inplace=True), 54 | ) 55 | self.h1 = nn.Sequential( 56 | nn.Conv2d(16, 16, 3, padding=1), 57 | nn.ReLU(inplace=True), 58 | nn.Conv2d(16, 16, 3, padding=1), 59 | nn.ReLU(inplace=True), 60 | ) 61 | self.h0 = nn.Sequential( 62 | nn.Conv2d(4, 4, 3, padding=1), 63 | nn.ReLU(inplace=True), 64 | nn.Conv2d(4, 4, 3, padding=1), 65 | nn.ReLU(inplace=True), 66 | ) 67 | 68 | self.deconv = nn.ConvTranspose2d(256, 32, 15, 15) 69 | self.post0 = nn.Conv2d(32, 16, 3, padding=1) 70 | self.post1 = nn.Conv2d(16, 4, 3, padding=1) 71 | self.post2 = nn.Conv2d(4, 1, 3, padding=1) 72 | 73 | def forward(self, f, corr_feature, pos): 74 | p0 = F.pad(f[0], [16, 16, 16, 16])[:, :, 4*pos[0]:4*pos[0]+61, 4*pos[1]:4*pos[1]+61] 75 | p1 = F.pad(f[1], [8, 8, 8, 8])[:, :, 2*pos[0]:2*pos[0]+31, 2*pos[1]:2*pos[1]+31] 76 | p2 = F.pad(f[2], [4, 4, 4, 4])[:, :, pos[0]:pos[0]+15, pos[1]:pos[1]+15] 77 | 78 | p3 = corr_feature[:, :, pos[0], pos[1]].view(-1, 256, 1, 1) 79 | 80 | out = self.deconv(p3) 81 | out = self.post0(F.upsample(self.h2(out) + self.v2(p2), size=(31, 31))) 82 | out = self.post1(F.upsample(self.h1(out) + self.v1(p1), size=(61, 61))) 83 | out = self.post2(F.upsample(self.h0(out) + self.v0(p0), size=(127, 127))) 84 | out = out.view(-1, 127*127) 85 | return out 86 | -------------------------------------------------------------------------------- /pysot/models/head/rpn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from pysot.core.xcorr import xcorr_fast, xcorr_depthwise 11 | from pysot.models.init_weight import init_weights 12 | 13 | class RPN(nn.Module): 14 | def __init__(self): 15 | super(RPN, self).__init__() 16 | 17 | def forward(self, z_f, x_f): 18 | raise NotImplementedError 19 | 20 | class UPChannelRPN(RPN): 21 | def __init__(self, anchor_num=5, feature_in=256): 22 | super(UPChannelRPN, self).__init__() 23 | 24 | cls_output = 2 * anchor_num 25 | loc_output = 4 * anchor_num 26 | 27 | self.template_cls_conv = nn.Conv2d(feature_in, 28 | feature_in * cls_output, kernel_size=3) 29 | self.template_loc_conv = nn.Conv2d(feature_in, 30 | feature_in * loc_output, kernel_size=3) 31 | 32 | self.search_cls_conv = nn.Conv2d(feature_in, 33 | feature_in, kernel_size=3) 34 | self.search_loc_conv = nn.Conv2d(feature_in, 35 | feature_in, kernel_size=3) 36 | 37 | self.loc_adjust = nn.Conv2d(loc_output, loc_output, kernel_size=1) 38 | 39 | 40 | def forward(self, z_f, x_f): 41 | cls_kernel = self.template_cls_conv(z_f) 42 | loc_kernel = self.template_loc_conv(z_f) 43 | 44 | cls_feature = self.search_cls_conv(x_f) 45 | loc_feature = self.search_loc_conv(x_f) 46 | 47 | cls = xcorr_fast(cls_feature, cls_kernel) 48 | loc = self.loc_adjust(xcorr_fast(loc_feature, loc_kernel)) 49 | return cls, loc 50 | 51 | 52 | class DepthwiseXCorr(nn.Module): 53 | def __init__(self, in_channels, hidden, out_channels, kernel_size=3, hidden_kernel_size=5): 54 | super(DepthwiseXCorr, self).__init__() 55 | self.conv_kernel = nn.Sequential( 56 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False), 57 | nn.BatchNorm2d(hidden), 58 | nn.ReLU(inplace=True), 59 | ) 60 | self.conv_search = nn.Sequential( 61 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False), 62 | nn.BatchNorm2d(hidden), 63 | nn.ReLU(inplace=True), 64 | ) 65 | self.head = nn.Sequential( 66 | nn.Conv2d(hidden, hidden, kernel_size=1, bias=False), 67 | nn.BatchNorm2d(hidden), 68 | nn.ReLU(inplace=True), 69 | nn.Conv2d(hidden, out_channels, kernel_size=1) 70 | ) 71 | 72 | 73 | def forward(self, kernel, search): 74 | kernel = self.conv_kernel(kernel) 75 | search = self.conv_search(search) 76 | feature = xcorr_depthwise(search, kernel) 77 | out = self.head(feature) 78 | return out 79 | 80 | 81 | class DepthwiseRPN(RPN): 82 | def __init__(self, anchor_num=5, in_channels=256, out_channels=256): 83 | super(DepthwiseRPN, self).__init__() 84 | self.cls = DepthwiseXCorr(in_channels, out_channels, 2 * anchor_num) 85 | self.loc = DepthwiseXCorr(in_channels, out_channels, 4 * anchor_num) 86 | 87 | def forward(self, z_f, x_f): 88 | cls = self.cls(z_f, x_f) 89 | loc = self.loc(z_f, x_f) 90 | return cls, loc 91 | 92 | 93 | class MultiRPN(RPN): 94 | def __init__(self, anchor_num, in_channels, weighted=False): 95 | super(MultiRPN, self).__init__() 96 | self.weighted = weighted 97 | for i in range(len(in_channels)): 98 | self.add_module('rpn'+str(i+2), 99 | DepthwiseRPN(anchor_num, in_channels[i], in_channels[i])) 100 | if self.weighted: 101 | self.cls_weight = nn.Parameter(torch.ones(len(in_channels))) 102 | self.loc_weight = nn.Parameter(torch.ones(len(in_channels))) 103 | 104 | def forward(self, z_fs, x_fs): 105 | cls = [] 106 | loc = [] 107 | for idx, (z_f, x_f) in enumerate(zip(z_fs, x_fs), start=2): 108 | rpn = getattr(self, 'rpn'+str(idx)) 109 | c, l = rpn(z_f, x_f) 110 | cls.append(c) 111 | loc.append(l) 112 | 113 | if self.weighted: 114 | cls_weight = F.softmax(self.cls_weight, 0) 115 | loc_weight = F.softmax(self.loc_weight, 0) 116 | 117 | def avg(lst): 118 | return sum(lst) / len(lst) 119 | 120 | def weighted_avg(lst, weight): 121 | s = 0 122 | for i in range(len(weight)): 123 | s += lst[i] * weight[i] 124 | return s 125 | 126 | if self.weighted: 127 | return weighted_avg(cls, cls_weight), weighted_avg(loc, loc_weight) 128 | else: 129 | return avg(cls), avg(loc) 130 | -------------------------------------------------------------------------------- /pysot/models/init_weight.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import math 4 | import warnings 5 | 6 | from torch.nn.init import _calculate_fan_in_and_fan_out 7 | 8 | def init_weights(model): 9 | for m in model.modules(): 10 | if isinstance(m, nn.Conv2d): 11 | nn.init.kaiming_normal_(m.weight.data, 12 | mode='fan_out', 13 | nonlinearity='relu') 14 | elif isinstance(m, nn.BatchNorm2d): 15 | m.weight.data.fill_(1) 16 | m.bias.data.zero_() 17 | 18 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 19 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 20 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 21 | def norm_cdf(x): 22 | # Computes standard normal cumulative distribution function 23 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 24 | 25 | if (mean < a - 2 * std) or (mean > b + 2 * std): 26 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 27 | "The distribution of values may be incorrect.", 28 | stacklevel=2) 29 | 30 | with torch.no_grad(): 31 | # Values are generated by using a truncated uniform distribution and 32 | # then using the inverse CDF for the normal distribution. 33 | # Get upper and lower cdf values 34 | l = norm_cdf((a - mean) / std) 35 | u = norm_cdf((b - mean) / std) 36 | 37 | # Uniformly fill tensor with values from [l, u], then translate to 38 | # [2l-1, 2u-1]. 39 | tensor.uniform_(2 * l - 1, 2 * u - 1) 40 | 41 | # Use inverse cdf transform for normal distribution to get truncated 42 | # standard normal 43 | tensor.erfinv_() 44 | 45 | # Transform to proper mean, std 46 | tensor.mul_(std * math.sqrt(2.)) 47 | tensor.add_(mean) 48 | 49 | # Clamp to ensure it's in the proper range 50 | tensor.clamp_(min=a, max=b) 51 | return tensor 52 | 53 | 54 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 55 | # type: (Tensor, float, float, float, float) -> Tensor 56 | r"""Fills the input Tensor with values drawn from a truncated 57 | normal distribution. The values are effectively drawn from the 58 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 59 | with values outside :math:`[a, b]` redrawn until they are within 60 | the bounds. The method used for generating the random values works 61 | best when :math:`a \leq \text{mean} \leq b`. 62 | Args: 63 | tensor: an n-dimensional `torch.Tensor` 64 | mean: the mean of the normal distribution 65 | std: the standard deviation of the normal distribution 66 | a: the minimum cutoff value 67 | b: the maximum cutoff value 68 | Examples: 69 | >>> w = torch.empty(3, 5) 70 | >>> nn.init.trunc_normal_(w) 71 | """ 72 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 73 | 74 | 75 | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'): 76 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) 77 | if mode == 'fan_in': 78 | denom = fan_in 79 | elif mode == 'fan_out': 80 | denom = fan_out 81 | elif mode == 'fan_avg': 82 | denom = (fan_in + fan_out) / 2 83 | 84 | variance = scale / denom 85 | 86 | if distribution == "truncated_normal": 87 | # constant is stddev of standard normal truncated to (-2, 2) 88 | trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978) 89 | elif distribution == "normal": 90 | tensor.normal_(std=math.sqrt(variance)) 91 | elif distribution == "uniform": 92 | bound = math.sqrt(3 * variance) 93 | tensor.uniform_(-bound, bound) 94 | else: 95 | raise ValueError(f"invalid distribution {distribution}") 96 | 97 | 98 | def lecun_normal_(tensor): 99 | variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal') 100 | -------------------------------------------------------------------------------- /pysot/models/loss.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch 7 | import torch.nn.functional as F 8 | from .centernet.losses import FocalLoss, RegL1Loss 9 | 10 | def _sigmoid(x): 11 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4) 12 | return y 13 | 14 | def get_cls_loss(pred, label, select): 15 | if len(select.size()) == 0 or \ 16 | select.size() == torch.Size([0]): 17 | return 0 18 | pred = torch.index_select(pred, 0, select) 19 | label = torch.index_select(label, 0, select) 20 | return F.nll_loss(pred, label) 21 | 22 | 23 | def select_cross_entropy_loss(pred, label): 24 | pred = pred.view(-1, 2) 25 | label = label.view(-1) 26 | pos = label.data.eq(1).nonzero().squeeze().cuda() 27 | neg = label.data.eq(0).nonzero().squeeze().cuda() 28 | loss_pos = get_cls_loss(pred, label, pos) 29 | loss_neg = get_cls_loss(pred, label, neg) 30 | return loss_pos * 0.5 + loss_neg * 0.5 31 | 32 | 33 | def weight_l1_loss(pred_loc, label_loc, loss_weight): 34 | b, _, sh, sw = pred_loc.size() 35 | pred_loc = pred_loc.view(b, 4, -1, sh, sw) 36 | diff = (pred_loc - label_loc).abs() 37 | diff = diff.sum(dim=1).view(b, -1, sh, sw) 38 | loss = diff * loss_weight 39 | return loss.sum().div(b) 40 | 41 | def l1_loss(pred_loc, label_loc): 42 | b = pred_loc.shape[0] 43 | # pred_loc = pred_loc.view(b, 4, -1, sh, sw) 44 | diff = (pred_loc - label_loc).abs() 45 | loss = diff.sum() 46 | return loss.sum().div(b) 47 | 48 | def trend_l1_loss(pred_loc, label_loc): 49 | b, _, _ = pred_loc.size() 50 | d = torch.FloatTensor([0.0476, 0.0476, 0.1429, 0.381, 0.381]).cuda() 51 | diff = (pred_loc - label_loc).abs() 52 | loss = diff.sum(dim=[0,2])*d*5 53 | return loss.sum().div(b) 54 | 55 | def norm_l1_loss(pred_loc, label_loc, norm_wc, norm_ws): 56 | b, _, _ = pred_loc.size() 57 | # pred_loc = pred_loc.view(b, 4, -1, sh, sw) 58 | norm_loc = torch.zeros_like(label_loc) 59 | # x y use norm 60 | norm_loc[:,:,0:2] = label_loc[:,:,0:2].div(norm_wc.unsqueeze(-1).unsqueeze(-1)) 61 | # w h use origin 62 | norm_loc[:,:,2:4] = label_loc[:,:,2:4].div(norm_ws.unsqueeze(-1).unsqueeze(-1)) 63 | diff = (pred_loc - norm_loc).abs() 64 | # diff = diff.div(norm_w.unsqueeze(-1).unsqueeze(-1)) 65 | loss = diff.sum() 66 | return loss.sum().div(b) 67 | 68 | class CtdetLoss(torch.nn.Module): 69 | def __init__(self, cfg): 70 | super(CtdetLoss, self).__init__() 71 | self.crit = torch.nn.MSELoss() if cfg.PRED.MSE_LOSS else FocalLoss() 72 | self.crit_reg = l1_loss 73 | self.crit_wh = l1_loss 74 | self.cfg = cfg 75 | 76 | def forward(self, output, batch): 77 | cfg = self.cfg 78 | # hm_loss, wh_loss, off_loss = 0, 0, 0 79 | if not cfg.PRED.MSE_LOSS: 80 | output['hm'] = _sigmoid(output['hm']) 81 | hm_loss = self.crit(output['hm'], batch['pred_hm'].cuda()) 82 | wh_loss = self.crit_wh(output['wh'], batch['pred_wh'].cuda()) 83 | off_loss = self.crit_reg(output['reg'], batch['reg'].cuda()) 84 | # for s in range(cfg.TRAIN.PRE_TARGET): 85 | # hm_loss += self.crit(output['hm'][:,s:(s+1)], batch['pred_hm'][:,s:(s+1)].cuda()) 86 | # wh_loss += self.crit_wh(output['wh'][:,2*s:2*(s+1)], batch['reg_mask'][:,:,s].cuda(), batch['ind'][:,:,s].cuda(), batch['pred_wh'][:,:,s].cuda()) 87 | # off_loss += self.crit_reg(output['reg'][:,2*s:2*(s+1)], batch['reg_mask'][:,:,s].cuda(), batch['ind'][:,:,s].cuda(), batch['reg'][:,:,s].cuda()) 88 | 89 | loss = cfg.PRED.HM_W * hm_loss + cfg.PRED.WH_W * wh_loss + \ 90 | cfg.PRED.REG_W * off_loss 91 | loss_stats = {'pred_loss': loss, 'hm_loss': hm_loss, 92 | 'wh_loss': wh_loss, 'off_loss': off_loss} 93 | return loss, loss_stats 94 | -------------------------------------------------------------------------------- /pysot/models/neck/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | from pysot.models.neck.neck import AdjustLayer, AdjustAllLayer 11 | 12 | NECKS = { 13 | 'AdjustLayer': AdjustLayer, 14 | 'AdjustAllLayer': AdjustAllLayer 15 | } 16 | 17 | def get_neck(name, **kwargs): 18 | return NECKS[name](**kwargs) 19 | -------------------------------------------------------------------------------- /pysot/models/neck/neck.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import torch.nn as nn 7 | 8 | 9 | class AdjustLayer(nn.Module): 10 | def __init__(self, in_channels, out_channels, center_size=7): 11 | super(AdjustLayer, self).__init__() 12 | self.downsample = nn.Sequential( 13 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), 14 | nn.BatchNorm2d(out_channels), 15 | ) 16 | self.center_size = center_size 17 | 18 | def forward(self, x): 19 | x = self.downsample(x) 20 | if x.size(3) < 20: 21 | l = (x.size(3) - self.center_size) // 2 22 | r = l + self.center_size 23 | x = x[:, :, l:r, l:r] 24 | return x 25 | 26 | 27 | class AdjustAllLayer(nn.Module): 28 | def __init__(self, in_channels, out_channels, center_size=7): 29 | super(AdjustAllLayer, self).__init__() 30 | self.num = len(out_channels) 31 | if self.num == 1: 32 | self.downsample = AdjustLayer(in_channels[0], 33 | out_channels[0], 34 | center_size) 35 | else: 36 | for i in range(self.num): 37 | self.add_module('downsample'+str(i+2), 38 | AdjustLayer(in_channels[i], 39 | out_channels[i], 40 | center_size)) 41 | 42 | def forward(self, features): 43 | if self.num == 1: 44 | return self.downsample(features) 45 | else: 46 | out = [] 47 | for i in range(self.num): 48 | adj_layer = getattr(self, 'downsample'+str(i+2)) 49 | out.append(adj_layer(features[i])) 50 | return out 51 | -------------------------------------------------------------------------------- /pysot/models/predictor/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from pysot.models.predictor.base_predictor import BasePredictor 7 | # motion predictor 8 | from pysot.models.predictor.kf import KalmanF 9 | from pysot.models.predictor.lb_5 import LearnBaseV5 10 | # visual predictor 11 | from pysot.models.predictor.lbv_5 import VisualBaseV5 12 | # joint predictor 13 | from pysot.models.predictor.mv_v16 import MVV16 14 | 15 | Predictors = { 16 | 'KF': KalmanF, 17 | 'LB_v5': LearnBaseV5, 18 | 'LBv_v5':VisualBaseV5, 19 | 'MV_v16': MVV16, 20 | } 21 | 22 | 23 | def get_predictor(name, **kwargs): 24 | return Predictors[name](**kwargs) 25 | 26 | -------------------------------------------------------------------------------- /pysot/models/predictor/base_predictor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import cv2 7 | import numpy as np 8 | import torch 9 | 10 | from pysot.core.config import cfg 11 | import torch 12 | import torch.nn as nn 13 | 14 | 15 | class BasePredictor(nn.Module): 16 | """ Base predictor for prediction 17 | """ 18 | def init(self, box_init, img_0): 19 | """ 20 | args: 21 | box_init(np.ndarray): [l, t, w, h] 22 | img_0(np.ndarray): BGR image 23 | """ 24 | raise NotImplementedError 25 | 26 | def predict(self, curr_fid, data, delta_t): 27 | """ 28 | args: 29 | curr_fid(int): latest processed frame (base frame) 30 | data(dict): output of tracker 31 | delta_t(list/ndarray): target delta_t for prediction (target frame) 32 | return: 33 | bbox(list/ndarray): predicted boxes [[cx, cy, w, h]_1, [cx, cy, w, h]_2, ...] 34 | pre_fidx(list/ndarray): future frame id for predicted boxes [fidx_1, fidx_2, ...] 35 | """ 36 | raise NotImplementedError -------------------------------------------------------------------------------- /pysot/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__init__.py -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/base_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/base_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siammask_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siammask_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siammask_tracker_f.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siammask_tracker_f.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siamrpn_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siamrpn_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siamrpn_tracker_f.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siamrpn_tracker_f.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siamrpnlt_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siamrpnlt_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/tracker_builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/tracker_builder.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/base_tracker.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import cv2 7 | import numpy as np 8 | import torch 9 | 10 | from pysot.core.config import cfg 11 | 12 | 13 | class BaseTracker(object): 14 | """ Base tracker of single objec tracking 15 | """ 16 | def init(self, img, bbox): 17 | """ 18 | args: 19 | img(np.ndarray): BGR image 20 | bbox(list): [x, y, width, height] 21 | x, y need to be 0-based 22 | """ 23 | raise NotImplementedError 24 | 25 | def track(self, img): 26 | """ 27 | args: 28 | img(np.ndarray): BGR image 29 | return: 30 | bbox(list):[x, y, width, height] 31 | """ 32 | raise NotImplementedError 33 | 34 | 35 | class SiameseTracker(BaseTracker): 36 | def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans): 37 | """ 38 | args: 39 | im: bgr based image 40 | pos: center position 41 | model_sz: exemplar size 42 | s_z: original size 43 | avg_chans: channel average 44 | """ 45 | if isinstance(pos, float): 46 | pos = [pos, pos] 47 | sz = original_sz 48 | im_sz = im.shape 49 | c = (original_sz + 1) / 2 50 | # context_xmin = round(pos[0] - c) # py2 and py3 round 51 | context_xmin = np.floor(pos[0] - c + 0.5) 52 | context_xmax = context_xmin + sz - 1 53 | # context_ymin = round(pos[1] - c) 54 | context_ymin = np.floor(pos[1] - c + 0.5) 55 | context_ymax = context_ymin + sz - 1 56 | left_pad = int(max(0., -context_xmin)) 57 | top_pad = int(max(0., -context_ymin)) 58 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 59 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 60 | 61 | context_xmin = context_xmin + left_pad 62 | context_xmax = context_xmax + left_pad 63 | context_ymin = context_ymin + top_pad 64 | context_ymax = context_ymax + top_pad 65 | 66 | r, c, k = im.shape 67 | if any([top_pad, bottom_pad, left_pad, right_pad]): 68 | size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k) 69 | te_im = np.zeros(size, np.uint8) 70 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 71 | if top_pad: 72 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 73 | if bottom_pad: 74 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 75 | if left_pad: 76 | te_im[:, 0:left_pad, :] = avg_chans 77 | if right_pad: 78 | te_im[:, c + left_pad:, :] = avg_chans 79 | im_patch = te_im[int(context_ymin):int(context_ymax + 1), 80 | int(context_xmin):int(context_xmax + 1), :] 81 | else: 82 | im_patch = im[int(context_ymin):int(context_ymax + 1), 83 | int(context_xmin):int(context_xmax + 1), :] 84 | 85 | if not np.array_equal(model_sz, original_sz): 86 | im_patch = cv2.resize(im_patch, (model_sz, model_sz)) 87 | im_patch = im_patch.transpose(2, 0, 1) 88 | im_patch = im_patch[np.newaxis, :, :, :] 89 | im_patch = im_patch.astype(np.float32) 90 | im_patch = torch.from_numpy(im_patch) 91 | if cfg.CUDA: 92 | im_patch = im_patch.cuda() 93 | return im_patch 94 | -------------------------------------------------------------------------------- /pysot/tracker/siamrpnlt_tracker.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | 8 | from pysot.core.config import cfg 9 | from pysot.tracker.siamrpn_tracker import SiamRPNTracker 10 | 11 | 12 | class SiamRPNLTTracker(SiamRPNTracker): 13 | def __init__(self, model): 14 | super(SiamRPNLTTracker, self).__init__(model) 15 | self.longterm_state = False 16 | 17 | def track(self, img): 18 | """ 19 | args: 20 | img(np.ndarray): BGR image 21 | return: 22 | bbox(list):[x, y, width, height] 23 | """ 24 | w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) 25 | h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) 26 | s_z = np.sqrt(w_z * h_z) 27 | scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z 28 | 29 | if self.longterm_state: 30 | instance_size = cfg.TRACK.LOST_INSTANCE_SIZE 31 | else: 32 | instance_size = cfg.TRACK.INSTANCE_SIZE 33 | 34 | score_size = (instance_size - cfg.TRACK.EXEMPLAR_SIZE) // \ 35 | cfg.ANCHOR.STRIDE + 1 + cfg.TRACK.BASE_SIZE 36 | hanning = np.hanning(score_size) 37 | window = np.outer(hanning, hanning) 38 | window = np.tile(window.flatten(), self.anchor_num) 39 | anchors = self.generate_anchor(score_size) 40 | 41 | s_x = s_z * (instance_size / cfg.TRACK.EXEMPLAR_SIZE) 42 | 43 | x_crop = self.get_subwindow(img, self.center_pos, instance_size, 44 | round(s_x), self.channel_average) 45 | outputs = self.model.track(x_crop) 46 | score = self._convert_score(outputs['cls']) 47 | pred_bbox = self._convert_bbox(outputs['loc'], anchors) 48 | 49 | def change(r): 50 | return np.maximum(r, 1. / r) 51 | 52 | def sz(w, h): 53 | pad = (w + h) * 0.5 54 | return np.sqrt((w + pad) * (h + pad)) 55 | 56 | # scale penalty 57 | s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) / 58 | (sz(self.size[0] * scale_z, self.size[1] * scale_z))) 59 | # ratio penalty 60 | r_c = change((self.size[0] / self.size[1]) / 61 | (pred_bbox[2, :] / pred_bbox[3, :])) 62 | penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K) 63 | pscore = penalty * score 64 | 65 | # window 66 | if not self.longterm_state: 67 | pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \ 68 | window * cfg.TRACK.WINDOW_INFLUENCE 69 | else: 70 | pscore = pscore * (1 - 0.001) + window * 0.001 71 | best_idx = np.argmax(pscore) 72 | 73 | bbox = pred_bbox[:, best_idx] / scale_z 74 | lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR 75 | 76 | best_score = score[best_idx] 77 | if best_score >= cfg.TRACK.CONFIDENCE_LOW: 78 | cx = bbox[0] + self.center_pos[0] 79 | cy = bbox[1] + self.center_pos[1] 80 | 81 | width = self.size[0] * (1 - lr) + bbox[2] * lr 82 | height = self.size[1] * (1 - lr) + bbox[3] * lr 83 | else: 84 | cx = self.center_pos[0] 85 | cy = self.center_pos[1] 86 | 87 | width = self.size[0] 88 | height = self.size[1] 89 | 90 | self.center_pos = np.array([cx, cy]) 91 | self.size = np.array([width, height]) 92 | 93 | cx, cy, width, height = self._bbox_clip(cx, cy, width, 94 | height, img.shape[:2]) 95 | bbox = [cx - width / 2, 96 | cy - height / 2, 97 | width, 98 | height] 99 | 100 | if best_score < cfg.TRACK.CONFIDENCE_LOW: 101 | self.longterm_state = True 102 | elif best_score > cfg.TRACK.CONFIDENCE_HIGH: 103 | self.longterm_state = False 104 | 105 | return { 106 | 'bbox': bbox, 107 | 'best_score': best_score 108 | } 109 | -------------------------------------------------------------------------------- /pysot/tracker/tracker_builder.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from pysot.core.config import cfg 7 | from pysot.tracker.siamrpn_tracker import SiamRPNTracker 8 | from pysot.tracker.siamrpn_tracker_f import SiamRPNTracker_f 9 | from pysot.tracker.siammask_tracker import SiamMaskTracker 10 | from pysot.tracker.siammask_tracker_f import SiamMaskTracker_f 11 | from pysot.tracker.siamrpn_tracker_ntr import SiamRPNTracker_ntr 12 | from pysot.tracker.siamrpnlt_tracker import SiamRPNLTTracker 13 | 14 | TRACKS = { 15 | 'SiamRPNTracker': SiamRPNTracker, 16 | 'SiamMaskTracker': SiamMaskTracker, 17 | 'SiamRPNLTTracker': SiamRPNLTTracker 18 | } 19 | 20 | TRACKSF = { 21 | 'SiamRPNTracker': SiamRPNTracker_f, 22 | 'SiamMaskTracker': SiamRPNTracker_f, 23 | 'SiamRPNTracker_ntr': SiamRPNTracker_ntr, 24 | } 25 | 26 | 27 | def build_tracker(model): 28 | return TRACKS[cfg.TRACK.TYPE](model) 29 | 30 | def build_tracker_f(model): 31 | return TRACKSF[cfg.TRACK.TYPE](model) 32 | -------------------------------------------------------------------------------- /pysot/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__init__.py -------------------------------------------------------------------------------- /pysot/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/anchor.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/anchor.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/bbox.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/bbox.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/model_load.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/model_load.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/anchor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import math 7 | 8 | import numpy as np 9 | 10 | from pysot.utils.bbox import corner2center, center2corner 11 | 12 | 13 | class Anchors: 14 | """ 15 | This class generate anchors. 16 | """ 17 | def __init__(self, stride, ratios, scales, image_center=0, size=0): 18 | self.stride = stride 19 | self.ratios = ratios 20 | self.scales = scales 21 | self.image_center = image_center 22 | self.size = size 23 | 24 | self.anchor_num = len(self.scales) * len(self.ratios) 25 | 26 | self.anchors = None 27 | 28 | self.generate_anchors() 29 | 30 | def generate_anchors(self): 31 | """ 32 | generate anchors based on predefined configuration 33 | """ 34 | self.anchors = np.zeros((self.anchor_num, 4), dtype=np.float32) 35 | size = self.stride * self.stride 36 | count = 0 37 | for r in self.ratios: 38 | ws = int(math.sqrt(size*1. / r)) 39 | hs = int(ws * r) 40 | 41 | for s in self.scales: 42 | w = ws * s 43 | h = hs * s 44 | self.anchors[count][:] = [-w*0.5, -h*0.5, w*0.5, h*0.5][:] 45 | count += 1 46 | 47 | def generate_all_anchors(self, im_c, size): 48 | """ 49 | im_c: image center 50 | size: image size 51 | """ 52 | if self.image_center == im_c and self.size == size: 53 | return False 54 | self.image_center = im_c 55 | self.size = size 56 | 57 | a0x = im_c - size // 2 * self.stride 58 | ori = np.array([a0x] * 4, dtype=np.float32) 59 | zero_anchors = self.anchors + ori 60 | 61 | x1 = zero_anchors[:, 0] 62 | y1 = zero_anchors[:, 1] 63 | x2 = zero_anchors[:, 2] 64 | y2 = zero_anchors[:, 3] 65 | 66 | x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1), 67 | [x1, y1, x2, y2]) 68 | cx, cy, w, h = corner2center([x1, y1, x2, y2]) 69 | 70 | disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride 71 | disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride 72 | 73 | cx = cx + disp_x 74 | cy = cy + disp_y 75 | 76 | # broadcast 77 | zero = np.zeros((self.anchor_num, size, size), dtype=np.float32) 78 | cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h]) 79 | x1, y1, x2, y2 = center2corner([cx, cy, w, h]) 80 | 81 | self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32), 82 | np.stack([cx, cy, w, h]).astype(np.float32)) 83 | return True 84 | -------------------------------------------------------------------------------- /pysot/utils/average_meter.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | 7 | class Meter(object): 8 | def __init__(self, name, val, avg): 9 | self.name = name 10 | self.val = val 11 | self.avg = avg 12 | 13 | def __repr__(self): 14 | return "{name}: {val:.6f} ({avg:.6f})".format( 15 | name=self.name, val=self.val, avg=self.avg 16 | ) 17 | 18 | def __format__(self, *tuples, **kwargs): 19 | return self.__repr__() 20 | 21 | 22 | class AverageMeter: 23 | """Computes and stores the average and current value""" 24 | def __init__(self, num=100): 25 | self.num = num 26 | self.reset() 27 | 28 | def reset(self): 29 | self.val = {} 30 | self.sum = {} 31 | self.count = {} 32 | self.history = {} 33 | 34 | def update(self, batch=1, **kwargs): 35 | val = {} 36 | for k in kwargs: 37 | val[k] = kwargs[k] / float(batch) 38 | self.val.update(val) 39 | for k in kwargs: 40 | if k not in self.sum: 41 | self.sum[k] = 0 42 | self.count[k] = 0 43 | self.history[k] = [] 44 | self.sum[k] += kwargs[k] 45 | self.count[k] += batch 46 | for _ in range(batch): 47 | self.history[k].append(val[k]) 48 | 49 | if self.num <= 0: 50 | # < 0, average all 51 | self.history[k] = [] 52 | 53 | # == 0: no average 54 | if self.num == 0: 55 | self.sum[k] = self.val[k] 56 | self.count[k] = 1 57 | 58 | elif len(self.history[k]) > self.num: 59 | pop_num = len(self.history[k]) - self.num 60 | for _ in range(pop_num): 61 | self.sum[k] -= self.history[k][0] 62 | del self.history[k][0] 63 | self.count[k] -= 1 64 | 65 | def __repr__(self): 66 | s = '' 67 | for k in self.sum: 68 | s += self.format_str(k) 69 | return s 70 | 71 | def format_str(self, attr): 72 | return "{name}: {val:.6f} ({avg:.6f}) ".format( 73 | name=attr, 74 | val=float(self.val[attr]), 75 | avg=float(self.sum[attr]) / self.count[attr]) 76 | 77 | def __getattr__(self, attr): 78 | if attr in self.__dict__: 79 | return super(AverageMeter, self).__getattr__(attr) 80 | if attr not in self.sum: 81 | print("invalid key '{}'".format(attr)) 82 | return Meter(attr, 0, 0) 83 | return Meter(attr, self.val[attr], self.avg(attr)) 84 | 85 | def avg(self, attr): 86 | return float(self.sum[attr]) / self.count[attr] 87 | 88 | 89 | if __name__ == '__main__': 90 | avg1 = AverageMeter(10) 91 | avg2 = AverageMeter(0) 92 | avg3 = AverageMeter(-1) 93 | 94 | for i in range(20): 95 | avg1.update(s=i) 96 | avg2.update(s=i) 97 | avg3.update(s=i) 98 | 99 | print('iter {}'.format(i)) 100 | print(avg1.s) 101 | print(avg2.s) 102 | print(avg3.s) 103 | -------------------------------------------------------------------------------- /pysot/utils/bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from collections import namedtuple 7 | 8 | import numpy as np 9 | 10 | 11 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 12 | # alias 13 | BBox = Corner 14 | Center = namedtuple('Center', 'x y w h') 15 | 16 | 17 | def corner2center(corner): 18 | """ convert (x1, y1, x2, y2) to (cx, cy, w, h) 19 | Args: 20 | conrner: Corner or np.array (4*N) 21 | Return: 22 | Center or np.array (4 * N) 23 | """ 24 | if isinstance(corner, Corner): 25 | x1, y1, x2, y2 = corner 26 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 27 | else: 28 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 29 | x = (x1 + x2) * 0.5 30 | y = (y1 + y2) * 0.5 31 | w = x2 - x1 32 | h = y2 - y1 33 | return x, y, w, h 34 | 35 | 36 | def center2corner(center): 37 | """ convert (cx, cy, w, h) to (x1, y1, x2, y2) 38 | Args: 39 | center: Center or np.array (4 * N) 40 | Return: 41 | center or np.array (4 * N) 42 | """ 43 | if isinstance(center, Center): 44 | x, y, w, h = center 45 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 46 | else: 47 | x, y, w, h = center[0], center[1], center[2], center[3] 48 | x1 = x - w * 0.5 49 | y1 = y - h * 0.5 50 | x2 = x + w * 0.5 51 | y2 = y + h * 0.5 52 | return x1, y1, x2, y2 53 | 54 | 55 | def IoU(rect1, rect2): 56 | """ caculate interection over union 57 | Args: 58 | rect1: (x1, y1, x2, y2) 59 | rect2: (x1, y1, x2, y2) 60 | Returns: 61 | iou 62 | """ 63 | # overlap 64 | x1, y1, x2, y2 = rect1[0], rect1[1], rect1[2], rect1[3] 65 | tx1, ty1, tx2, ty2 = rect2[0], rect2[1], rect2[2], rect2[3] 66 | 67 | xx1 = np.maximum(tx1, x1) 68 | yy1 = np.maximum(ty1, y1) 69 | xx2 = np.minimum(tx2, x2) 70 | yy2 = np.minimum(ty2, y2) 71 | 72 | ww = np.maximum(0, xx2 - xx1) 73 | hh = np.maximum(0, yy2 - yy1) 74 | 75 | area = (x2-x1) * (y2-y1) 76 | target_a = (tx2-tx1) * (ty2 - ty1) 77 | inter = ww * hh 78 | iou = inter / (area + target_a - inter) 79 | return iou 80 | 81 | 82 | def cxy_wh_2_rect(pos, sz): 83 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 0-index 84 | """ 85 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) 86 | 87 | 88 | def rect_2_cxy_wh(rect): 89 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 0-index 90 | """ 91 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2, rect[2], rect[3]]) 92 | 93 | 94 | def cxy_wh_2_rect1(pos, sz): 95 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 1-index 96 | """ 97 | return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]]) 98 | 99 | 100 | def rect1_2_cxy_wh(rect): 101 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 1-index 102 | """ 103 | return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), \ 104 | np.array([rect[2], rect[3]]) 105 | 106 | 107 | def get_axis_aligned_bbox(region): 108 | """ convert region to (cx, cy, w, h) that represent by axis aligned box 109 | """ 110 | nv = region.size 111 | if nv == 8: 112 | cx = np.mean(region[0::2]) 113 | cy = np.mean(region[1::2]) 114 | x1 = min(region[0::2]) 115 | x2 = max(region[0::2]) 116 | y1 = min(region[1::2]) 117 | y2 = max(region[1::2]) 118 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * \ 119 | np.linalg.norm(region[2:4] - region[4:6]) 120 | A2 = (x2 - x1) * (y2 - y1) 121 | s = np.sqrt(A1 / A2) 122 | w = s * (x2 - x1) + 1 123 | h = s * (y2 - y1) + 1 124 | else: 125 | x = region[0] 126 | y = region[1] 127 | w = region[2] 128 | h = region[3] 129 | cx = x+w/2 130 | cy = y+h/2 131 | return cx, cy, w, h 132 | 133 | 134 | def get_min_max_bbox(region): 135 | """ convert region to (cx, cy, w, h) that represent by mim-max box 136 | """ 137 | nv = region.size 138 | if nv == 8: 139 | cx = np.mean(region[0::2]) 140 | cy = np.mean(region[1::2]) 141 | x1 = min(region[0::2]) 142 | x2 = max(region[0::2]) 143 | y1 = min(region[1::2]) 144 | y2 = max(region[1::2]) 145 | w = x2 - x1 146 | h = y2 - y1 147 | else: 148 | x = region[0] 149 | y = region[1] 150 | w = region[2] 151 | h = region[3] 152 | cx = x+w/2 153 | cy = y+h/2 154 | return cx, cy, w, h 155 | -------------------------------------------------------------------------------- /pysot/utils/distributed.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import socket 8 | import logging 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.distributed as dist 13 | 14 | from pysot.utils.log_helper import log_once 15 | 16 | logger = logging.getLogger('global') 17 | 18 | 19 | def average_reduce(v): 20 | if get_world_size() == 1: 21 | return v 22 | tensor = torch.cuda.FloatTensor(1) 23 | tensor[0] = v 24 | dist.all_reduce(tensor) 25 | v = tensor[0] / get_world_size() 26 | return v 27 | 28 | 29 | class DistModule(nn.Module): 30 | def __init__(self, module, bn_method=0): 31 | super(DistModule, self).__init__() 32 | self.module = module 33 | self.bn_method = bn_method 34 | if get_world_size() > 1: 35 | broadcast_params(self.module) 36 | else: 37 | self.bn_method = 0 # single proccess 38 | 39 | def forward(self, *args, **kwargs): 40 | broadcast_buffers(self.module, self.bn_method) 41 | return self.module(*args, **kwargs) 42 | 43 | def train(self, mode=True): 44 | super(DistModule, self).train(mode) 45 | self.module.train(mode) 46 | return self 47 | 48 | 49 | def broadcast_params(model): 50 | """ broadcast model parameters """ 51 | for p in model.state_dict().values(): 52 | dist.broadcast(p, 0) 53 | 54 | 55 | def broadcast_buffers(model, method=0): 56 | """ broadcast model buffers """ 57 | if method == 0: 58 | return 59 | 60 | world_size = get_world_size() 61 | 62 | for b in model._all_buffers(): 63 | if method == 1: # broadcast from main proccess 64 | dist.broadcast(b, 0) 65 | elif method == 2: # average 66 | dist.all_reduce(b) 67 | b /= world_size 68 | else: 69 | raise Exception('Invalid buffer broadcast code {}'.format(method)) 70 | 71 | 72 | inited = False 73 | 74 | 75 | def _dist_init(): 76 | ''' 77 | if guess right: 78 | ntasks: world_size (process num) 79 | proc_id: rank 80 | ''' 81 | # rank = int(os.environ['RANK']) 82 | rank = 0 83 | num_gpus = torch.cuda.device_count() 84 | torch.cuda.set_device(rank % num_gpus) 85 | dist.init_process_group(backend='nccl') 86 | world_size = dist.get_world_size() 87 | return rank, world_size 88 | 89 | 90 | def _get_local_ip(): 91 | try: 92 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 93 | s.connect(('8.8.8.8', 80)) 94 | ip = s.getsockname()[0] 95 | finally: 96 | s.close() 97 | return ip 98 | 99 | 100 | def dist_init(): 101 | global rank, world_size, inited 102 | # try: 103 | # rank, world_size = _dist_init() 104 | # except RuntimeError as e: 105 | # if 'public' in e.args[0]: 106 | # logger.info(e) 107 | # logger.info('Warning: use single process') 108 | # rank, world_size = 0, 1 109 | # else: 110 | # raise RuntimeError(*e.args) 111 | rank, world_size = 0, 1 112 | inited = True 113 | return rank, world_size 114 | 115 | 116 | def get_rank(): 117 | if not inited: 118 | raise(Exception('dist not inited')) 119 | return rank 120 | 121 | 122 | def get_world_size(): 123 | if not inited: 124 | raise(Exception('dist not inited')) 125 | return world_size 126 | 127 | 128 | def reduce_gradients(model, _type='sum'): 129 | types = ['sum', 'avg'] 130 | assert _type in types, 'gradients method must be in "{}"'.format(types) 131 | log_once("gradients method is {}".format(_type)) 132 | if get_world_size() > 1: 133 | for param in model.parameters(): 134 | if param.requires_grad: 135 | dist.all_reduce(param.grad.data) 136 | if _type == 'avg': 137 | param.grad.data /= get_world_size() 138 | else: 139 | return None 140 | -------------------------------------------------------------------------------- /pysot/utils/misc.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | 8 | from colorama import Fore, Style 9 | 10 | 11 | __all__ = ['commit', 'describe'] 12 | 13 | 14 | def _exec(cmd): 15 | f = os.popen(cmd, 'r', 1) 16 | return f.read().strip() 17 | 18 | 19 | def _bold(s): 20 | return "\033[1m%s\033[0m" % s 21 | 22 | 23 | def _color(s): 24 | return f'{Fore.RED}{s}{Style.RESET_ALL}' 25 | 26 | 27 | def _describe(model, lines=None, spaces=0): 28 | head = " " * spaces 29 | for name, p in model.named_parameters(): 30 | if '.' in name: 31 | continue 32 | if p.requires_grad: 33 | name = _color(name) 34 | line = "{head}- {name}".format(head=head, name=name) 35 | lines.append(line) 36 | 37 | for name, m in model.named_children(): 38 | space_num = len(name) + spaces + 1 39 | if m.training: 40 | name = _color(name) 41 | line = "{head}.{name} ({type})".format( 42 | head=head, 43 | name=name, 44 | type=m.__class__.__name__) 45 | lines.append(line) 46 | _describe(m, lines, space_num) 47 | 48 | 49 | def commit(): 50 | root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')) 51 | cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root) 52 | commit = _exec(cmd) 53 | cmd = "cd {}; git log --oneline | head -n1".format(root) 54 | commit_log = _exec(cmd) 55 | return "commit : {}\n log : {}".format(commit, commit_log) 56 | 57 | 58 | def describe(net, name=None): 59 | num = 0 60 | lines = [] 61 | if name is not None: 62 | lines.append(name) 63 | num = len(name) 64 | _describe(net, lines, num) 65 | return "\n".join(lines) 66 | -------------------------------------------------------------------------------- /pysot/utils/model_load.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import logging 7 | 8 | import torch 9 | 10 | 11 | logger = logging.getLogger('global') 12 | 13 | 14 | def check_keys(model, pretrained_state_dict): 15 | ckpt_keys = set(pretrained_state_dict.keys()) 16 | model_keys = set(model.state_dict().keys()) 17 | used_pretrained_keys = model_keys & ckpt_keys 18 | unused_pretrained_keys = ckpt_keys - model_keys 19 | missing_keys = model_keys - ckpt_keys 20 | # filter 'num_batches_tracked' 21 | missing_keys = [x for x in missing_keys 22 | if not x.endswith('num_batches_tracked')] 23 | if len(missing_keys) > 0: 24 | logger.info('[Warning] missing keys: {}'.format(missing_keys)) 25 | logger.info('missing keys:{}'.format(len(missing_keys))) 26 | if len(unused_pretrained_keys) > 0: 27 | logger.info('[Warning] unused_pretrained_keys: {}'.format( 28 | unused_pretrained_keys)) 29 | logger.info('unused checkpoint keys:{}'.format( 30 | len(unused_pretrained_keys))) 31 | logger.info('used keys:{}'.format(len(used_pretrained_keys))) 32 | assert len(used_pretrained_keys) > 0, \ 33 | 'load NONE from pretrained checkpoint' 34 | return True 35 | 36 | 37 | def remove_prefix(state_dict, prefix): 38 | ''' Old style model is stored with all names of parameters 39 | share common prefix 'module.' ''' 40 | logger.info('remove prefix \'{}\''.format(prefix)) 41 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 42 | return {f(key): value for key, value in state_dict.items()} 43 | 44 | 45 | def load_pretrain(model, pretrained_path): 46 | logger.info('load pretrained model from {}'.format(pretrained_path)) 47 | device = torch.cuda.current_device() 48 | pretrained_dict = torch.load(pretrained_path, 49 | map_location=lambda storage, loc: storage.cuda(device)) 50 | if "state_dict" in pretrained_dict.keys(): 51 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 52 | 'module.') 53 | else: 54 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 55 | 56 | try: 57 | check_keys(model, pretrained_dict) 58 | except: 59 | logger.info('[Warning]: using pretrain as features.\ 60 | Adding "features." as prefix') 61 | new_dict = {} 62 | for k, v in pretrained_dict.items(): 63 | k = 'features.' + k 64 | new_dict[k] = v 65 | pretrained_dict = new_dict 66 | check_keys(model, pretrained_dict) 67 | model.load_state_dict(pretrained_dict, strict=False) 68 | return model 69 | 70 | 71 | def restore_from(model, optimizer, ckpt_path): 72 | device = torch.cuda.current_device() 73 | ckpt = torch.load(ckpt_path, 74 | map_location=lambda storage, loc: storage.cuda(device)) 75 | if 'epoch' not in ckpt: 76 | epoch = 0 77 | else: 78 | epoch = ckpt['epoch'] 79 | 80 | # ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.') 81 | if 'state_dict' in ckpt: 82 | model_cp = ckpt['state_dict'] 83 | else: 84 | model_cp = ckpt 85 | check_keys(model, model_cp) 86 | model.load_state_dict(model_cp, strict=False) 87 | 88 | if 'optimizer' in ckpt: 89 | check_keys(optimizer, ckpt['optimizer']) 90 | optimizer.load_state_dict(ckpt['optimizer']) 91 | 92 | return model, optimizer, epoch 93 | -------------------------------------------------------------------------------- /test_agx_mob.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH 2 | # Mob 3 | # DTB70 4 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 5 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' 6 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 7 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' 8 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 9 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' 10 | # # UAVDT 11 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 12 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' 13 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 14 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' 15 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 16 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' 17 | # # UAV20 18 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 19 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' 20 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 21 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' 22 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 23 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' 24 | # # UAV123 25 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 26 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' 27 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 28 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' 29 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 30 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' -------------------------------------------------------------------------------- /test_sim_mob.sh: -------------------------------------------------------------------------------- 1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH 2 | # Mob 3 | # DTB70 4 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 5 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' --sim_info 'testing_dataset/sim_info/DTB70_SiamRPN++_Mob_sim.pkl' 6 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 7 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' --sim_info 'testing_dataset/sim_info/DTB70_SiamRPN++_Mob_sim.pkl' 8 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 9 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' --sim_info 'testing_dataset/sim_info/DTB70_SiamRPN++_Mob_sim.pkl' 10 | # # UAVDT 11 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 12 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' --sim_info 'testing_dataset/sim_info/UAVDT_SiamRPN++_Mob_sim.pkl' 13 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 14 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' --sim_info 'testing_dataset/sim_info/UAVDT_SiamRPN++_Mob_sim.pkl' 15 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 16 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' --sim_info 'testing_dataset/sim_info/UAVDT_SiamRPN++_Mob_sim.pkl' 17 | # # UAV20 18 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 19 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' --sim_info 'testing_dataset/sim_info/UAV20_SiamRPN++_Mob_sim.pkl' 20 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 21 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' --sim_info 'testing_dataset/sim_info/UAV20_SiamRPN++_Mob_sim.pkl' 22 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 23 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' --sim_info 'testing_dataset/sim_info/UAV20_SiamRPN++_Mob_sim.pkl' 24 | # # UAV123 25 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \ 26 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' --sim_info 'testing_dataset/sim_info/UAV123_SiamRPN++_Mob_sim.pkl' 27 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \ 28 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' --sim_info 'testing_dataset/sim_info/UAV123_SiamRPN++_Mob_sim.pkl' 29 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \ 30 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' --sim_info 'testing_dataset/sim_info/UAV123_SiamRPN++_Mob_sim.pkl' -------------------------------------------------------------------------------- /toolkit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/__init__.py -------------------------------------------------------------------------------- /toolkit/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .otb import OTBDataset 2 | from .lasot import LaSOTDataset 3 | from .got10k import GOT10kDataset 4 | from .uav10fps import UAV10Dataset 5 | from .uavdark import UAVDARKDataset 6 | from .uavdt import UAVDTDataset 7 | from .dtb import DTB70Dataset 8 | from .uav20l import UAV20Dataset 9 | from .uav123 import UAV123Dataset 10 | from .realworld import RealWorldDataset 11 | from .visdrone import VISDRONEDataset 12 | class DatasetFactory(object): 13 | @staticmethod 14 | def create_dataset(**kwargs): 15 | """ 16 | Args: 17 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30', 18 | 'VOT2018', 'VOT2016', 'VOT2018-LT' 19 | dataset_root: dataset root 20 | load_img: wether to load image 21 | Return: 22 | dataset 23 | """ 24 | assert 'name' in kwargs, "should provide dataset name" 25 | name = kwargs['name'] 26 | if 'OTB' in name: 27 | dataset = OTBDataset(**kwargs) 28 | elif 'DTB70' in name: 29 | dataset = DTB70Dataset(**kwargs) 30 | elif 'UAV10' in name: 31 | dataset = UAV10Dataset(**kwargs) 32 | elif 'UAV20' in name: 33 | dataset = UAV20Dataset(**kwargs) 34 | elif "RealWorld" in name: 35 | dataset = RealWorldDataset(**kwargs) 36 | elif 'VISDRONE' in name: 37 | dataset = VISDRONEDataset(**kwargs) 38 | elif 'UAVDT' in name: 39 | dataset = UAVDTDataset(**kwargs) 40 | elif 'LaSOT' == name: 41 | dataset = LaSOTDataset(**kwargs) 42 | elif 'UAVDARK' in name: 43 | dataset = UAVDARKDataset(**kwargs) 44 | elif 'UAV123' in name: 45 | dataset = UAV123Dataset(**kwargs) 46 | elif 'UAVDARK' in name: 47 | dataset = UAVDARKDataset(**kwargs) 48 | elif 'GOT-10k' == name: 49 | dataset = GOT10kDataset(**kwargs) 50 | else: 51 | raise Exception("unknow dataset {}".format(kwargs['name'])) 52 | return dataset 53 | 54 | -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/dtb.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/dtb.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/got10k.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/got10k.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/lasot.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/lasot.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/otb.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/otb.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/realworld.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/realworld.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav123.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uav123.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav20l.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uav20l.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uavdark.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uavdark.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uavdt.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uavdt.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/video.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/video.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/visdrone.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/visdrone.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | class Dataset(object): 4 | def __init__(self, name, dataset_root): 5 | self.name = name 6 | self.dataset_root = dataset_root 7 | self.videos = None 8 | 9 | def __getitem__(self, idx): 10 | if isinstance(idx, str): 11 | return self.videos[idx] 12 | elif isinstance(idx, int): 13 | return self.videos[sorted(list(self.videos.keys()))[idx]] 14 | 15 | def __len__(self): 16 | return len(self.videos) 17 | 18 | def __iter__(self): 19 | keys = sorted(list(self.videos.keys())) 20 | for key in keys: 21 | yield self.videos[key] 22 | 23 | def set_tracker(self, path, tracker_names): 24 | """ 25 | Args: 26 | path: path to tracker results, 27 | tracker_names: list of tracker name 28 | """ 29 | self.tracker_path = path 30 | self.tracker_names = tracker_names 31 | # for video in tqdm(self.videos.values(), 32 | # desc='loading tacker result', ncols=100): 33 | # video.load_tracker(path, tracker_names) 34 | -------------------------------------------------------------------------------- /toolkit/datasets/dtb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | def ca(): 14 | 15 | path='./testing_dataset/DTB70/' 16 | 17 | name_list=os.listdir(path) 18 | name_list.sort() 19 | 20 | b=[] 21 | for i in range(len(name_list)): 22 | b.append(name_list[i]) 23 | c=[] 24 | 25 | for jj in range(len(name_list)): 26 | imgs=path+str(name_list[jj])+'/img/' 27 | txt=path+str(name_list[jj])+'/groundtruth_rect.txt' 28 | bbox=[] 29 | f = open(txt) # 返回一个文件对象 30 | file= f.readlines() 31 | li=os.listdir(imgs) 32 | li.sort() 33 | for ii in range(len(file)): 34 | li[ii]=name_list[jj]+'/img/'+li[ii] 35 | 36 | line = file[ii].strip('\n').split(',') 37 | 38 | try: 39 | line[0]=int(line[0]) 40 | except: 41 | line[0]=float(line[0]) 42 | try: 43 | line[1]=int(line[1]) 44 | except: 45 | line[1]=float(line[1]) 46 | try: 47 | line[2]=int(line[2]) 48 | except: 49 | line[2]=float(line[2]) 50 | try: 51 | line[3]=int(line[3]) 52 | except: 53 | line[3]=float(line[3]) 54 | bbox.append(line) 55 | 56 | if len(bbox)!=len(li): 57 | print (jj) 58 | f.close() 59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 60 | 61 | d=dict(zip(b,c)) 62 | 63 | return d 64 | 65 | class UAVVideo(Video): 66 | """ 67 | Args: 68 | name: video name 69 | root: dataset root 70 | video_dir: video directory 71 | init_rect: init rectangle 72 | img_names: image names 73 | gt_rect: groundtruth rectangle 74 | attr: attribute of video 75 | """ 76 | def __init__(self, name, root, video_dir, init_rect, img_names, 77 | gt_rect, attr, load_img=False): 78 | super(UAVVideo, self).__init__(name, root, video_dir, 79 | init_rect, img_names, gt_rect, attr, load_img) 80 | 81 | 82 | class DTB70Dataset(Dataset): 83 | """ 84 | Args: 85 | name: dataset name, should be 'UAV123', 'UAV20L' 86 | dataset_root: dataset root 87 | load_img: wether to load all imgs 88 | """ 89 | def __init__(self, name, dataset_root, load_img=False): 90 | super(DTB70Dataset, self).__init__(name, dataset_root) 91 | meta_data = ca() 92 | 93 | # load videos 94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 95 | self.videos = {} 96 | for video in pbar: 97 | pbar.set_postfix_str(video) 98 | self.videos[video] = UAVVideo(video, 99 | dataset_root, 100 | meta_data[video]['video_dir'], 101 | meta_data[video]['init_rect'], 102 | meta_data[video]['img_names'], 103 | meta_data[video]['gt_rect'], 104 | meta_data[video]['attr']) 105 | 106 | 107 | -------------------------------------------------------------------------------- /toolkit/datasets/got10k.py: -------------------------------------------------------------------------------- 1 | 2 | import json 3 | import os 4 | 5 | from tqdm import tqdm 6 | 7 | from .dataset import Dataset 8 | from .video import Video 9 | 10 | class GOT10kVideo(Video): 11 | """ 12 | Args: 13 | name: video name 14 | root: dataset root 15 | video_dir: video directory 16 | init_rect: init rectangle 17 | img_names: image names 18 | gt_rect: groundtruth rectangle 19 | attr: attribute of video 20 | """ 21 | def __init__(self, name, root, video_dir, init_rect, img_names, 22 | gt_rect, attr, load_img=False): 23 | super(GOT10kVideo, self).__init__(name, root, video_dir, 24 | init_rect, img_names, gt_rect, attr, load_img) 25 | 26 | # def load_tracker(self, path, tracker_names=None): 27 | # """ 28 | # Args: 29 | # path(str): path to result 30 | # tracker_name(list): name of tracker 31 | # """ 32 | # if not tracker_names: 33 | # tracker_names = [x.split('/')[-1] for x in glob(path) 34 | # if os.path.isdir(x)] 35 | # if isinstance(tracker_names, str): 36 | # tracker_names = [tracker_names] 37 | # # self.pred_trajs = {} 38 | # for name in tracker_names: 39 | # traj_file = os.path.join(path, name, self.name+'.txt') 40 | # if os.path.exists(traj_file): 41 | # with open(traj_file, 'r') as f : 42 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 43 | # for x in f.readlines()] 44 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 45 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 46 | # else: 47 | 48 | # self.tracker_names = list(self.pred_trajs.keys()) 49 | 50 | class GOT10kDataset(Dataset): 51 | """ 52 | Args: 53 | name: dataset name, should be "NFS30" or "NFS240" 54 | dataset_root, dataset root dir 55 | """ 56 | def __init__(self, name, dataset_root, load_img=False): 57 | super(GOT10kDataset, self).__init__(name, dataset_root) 58 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 59 | meta_data = json.load(f) 60 | 61 | # load videos 62 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 63 | self.videos = {} 64 | for video in pbar: 65 | pbar.set_postfix_str(video) 66 | self.videos[video] = GOT10kVideo(video, 67 | dataset_root, 68 | meta_data[video]['video_dir'], 69 | meta_data[video]['init_rect'], 70 | meta_data[video]['img_names'], 71 | meta_data[video]['gt_rect'], 72 | None) 73 | self.attr = {} 74 | self.attr['ALL'] = list(self.videos.keys()) 75 | -------------------------------------------------------------------------------- /toolkit/datasets/lasot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class LaSOTVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, absent, load_img=False): 24 | super(LaSOTVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | self.absent = np.array(absent, np.int8) 27 | 28 | def load_tracker(self, path, tracker_names=None, store=True): 29 | """ 30 | Args: 31 | path(str): path to result 32 | tracker_name(list): name of tracker 33 | """ 34 | if not tracker_names: 35 | tracker_names = [x.split('/')[-1] for x in glob(path) 36 | if os.path.isdir(x)] 37 | if isinstance(tracker_names, str): 38 | tracker_names = [tracker_names] 39 | for name in tracker_names: 40 | traj_file = os.path.join(path, name, self.name+'.txt') 41 | if os.path.exists(traj_file): 42 | with open(traj_file, 'r') as f : 43 | pred_traj = [list(map(float, x.strip().split(','))) 44 | for x in f.readlines()] 45 | else: 46 | print("File not exists: ", traj_file) 47 | if self.name == 'monkey-17': 48 | pred_traj = pred_traj[:len(self.gt_traj)] 49 | if store: 50 | self.pred_trajs[name] = pred_traj 51 | else: 52 | return pred_traj 53 | self.tracker_names = list(self.pred_trajs.keys()) 54 | 55 | 56 | 57 | class LaSOTDataset(Dataset): 58 | """ 59 | Args: 60 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 61 | dataset_root: dataset root 62 | load_img: wether to load all imgs 63 | """ 64 | def __init__(self, name, dataset_root, load_img=False): 65 | super(LaSOTDataset, self).__init__(name, dataset_root) 66 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 67 | meta_data = json.load(f) 68 | 69 | # load videos 70 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 71 | self.videos = {} 72 | for video in pbar: 73 | pbar.set_postfix_str(video) 74 | self.videos[video] = LaSOTVideo(video, 75 | dataset_root, 76 | meta_data[video]['video_dir'], 77 | meta_data[video]['init_rect'], 78 | meta_data[video]['img_names'], 79 | meta_data[video]['gt_rect'], 80 | meta_data[video]['attr'], 81 | meta_data[video]['absent']) 82 | 83 | # set attr 84 | attr = [] 85 | for x in self.videos.values(): 86 | attr += x.attr 87 | attr = set(attr) 88 | self.attr = {} 89 | self.attr['ALL'] = list(self.videos.keys()) 90 | for x in attr: 91 | self.attr[x] = [] 92 | for k, v in self.videos.items(): 93 | for attr_ in v.attr: 94 | self.attr[attr_].append(k) 95 | 96 | 97 | -------------------------------------------------------------------------------- /toolkit/datasets/nfs.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | 12 | class NFSVideo(Video): 13 | """ 14 | Args: 15 | name: video name 16 | root: dataset root 17 | video_dir: video directory 18 | init_rect: init rectangle 19 | img_names: image names 20 | gt_rect: groundtruth rectangle 21 | attr: attribute of video 22 | """ 23 | def __init__(self, name, root, video_dir, init_rect, img_names, 24 | gt_rect, attr, load_img=False): 25 | super(NFSVideo, self).__init__(name, root, video_dir, 26 | init_rect, img_names, gt_rect, attr, load_img) 27 | 28 | # def load_tracker(self, path, tracker_names=None): 29 | # """ 30 | # Args: 31 | # path(str): path to result 32 | # tracker_name(list): name of tracker 33 | # """ 34 | # if not tracker_names: 35 | # tracker_names = [x.split('/')[-1] for x in glob(path) 36 | # if os.path.isdir(x)] 37 | # if isinstance(tracker_names, str): 38 | # tracker_names = [tracker_names] 39 | # # self.pred_trajs = {} 40 | # for name in tracker_names: 41 | # traj_file = os.path.join(path, name, self.name+'.txt') 42 | # if os.path.exists(traj_file): 43 | # with open(traj_file, 'r') as f : 44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 45 | # for x in f.readlines()] 46 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 48 | # else: 49 | 50 | # self.tracker_names = list(self.pred_trajs.keys()) 51 | 52 | class NFSDataset(Dataset): 53 | """ 54 | Args: 55 | name: dataset name, should be "NFS30" or "NFS240" 56 | dataset_root, dataset root dir 57 | """ 58 | def __init__(self, name, dataset_root, load_img=False): 59 | super(NFSDataset, self).__init__(name, dataset_root) 60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 61 | meta_data = json.load(f) 62 | 63 | # load videos 64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 65 | self.videos = {} 66 | for video in pbar: 67 | pbar.set_postfix_str(video) 68 | self.videos[video] = NFSVideo(video, 69 | dataset_root, 70 | meta_data[video]['video_dir'], 71 | meta_data[video]['init_rect'], 72 | meta_data[video]['img_names'], 73 | meta_data[video]['gt_rect'], 74 | None) 75 | 76 | self.attr = {} 77 | self.attr['ALL'] = list(self.videos.keys()) 78 | -------------------------------------------------------------------------------- /toolkit/datasets/otb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | class OTBVideo(Video): 14 | """ 15 | Args: 16 | name: video name 17 | root: dataset root 18 | video_dir: video directory 19 | init_rect: init rectangle 20 | img_names: image names 21 | gt_rect: groundtruth rectangle 22 | attr: attribute of video 23 | """ 24 | def __init__(self, name, root, video_dir, init_rect, img_names, 25 | gt_rect, attr, load_img=False): 26 | super(OTBVideo, self).__init__(name, root, video_dir, 27 | init_rect, img_names, gt_rect, attr, load_img) 28 | 29 | def load_tracker(self, path, tracker_names=None, store=True): 30 | """ 31 | Args: 32 | path(str): path to result 33 | tracker_name(list): name of tracker 34 | """ 35 | if not tracker_names: 36 | tracker_names = [x.split('/')[-1] for x in glob(path) 37 | if os.path.isdir(x)] 38 | if isinstance(tracker_names, str): 39 | tracker_names = [tracker_names] 40 | for name in tracker_names: 41 | traj_file = os.path.join(path, name, self.name+'.txt') 42 | if not os.path.exists(traj_file): 43 | if self.name == 'FleetFace': 44 | txt_name = 'fleetface.txt' 45 | elif self.name == 'Jogging-1': 46 | txt_name = 'jogging_1.txt' 47 | elif self.name == 'Jogging-2': 48 | txt_name = 'jogging_2.txt' 49 | elif self.name == 'Skating2-1': 50 | txt_name = 'skating2_1.txt' 51 | elif self.name == 'Skating2-2': 52 | txt_name = 'skating2_2.txt' 53 | elif self.name == 'FaceOcc1': 54 | txt_name = 'faceocc1.txt' 55 | elif self.name == 'FaceOcc2': 56 | txt_name = 'faceocc2.txt' 57 | elif self.name == 'Human4-2': 58 | txt_name = 'human4_2.txt' 59 | else: 60 | txt_name = self.name[0].lower()+self.name[1:]+'.txt' 61 | traj_file = os.path.join(path, name, txt_name) 62 | if os.path.exists(traj_file): 63 | with open(traj_file, 'r') as f : 64 | pred_traj = [list(map(float, x.strip().split(','))) 65 | for x in f.readlines()] 66 | if len(pred_traj) != len(self.gt_traj): 67 | print(name, len(pred_traj), len(self.gt_traj), self.name) 68 | if store: 69 | self.pred_trajs[name] = pred_traj 70 | else: 71 | return pred_traj 72 | else: 73 | print(traj_file) 74 | self.tracker_names = list(self.pred_trajs.keys()) 75 | 76 | 77 | 78 | class OTBDataset(Dataset): 79 | """ 80 | Args: 81 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 82 | dataset_root: dataset root 83 | load_img: wether to load all imgs 84 | """ 85 | def __init__(self, name, dataset_root, load_img=False): 86 | super(OTBDataset, self).__init__(name, dataset_root) 87 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 88 | meta_data = json.load(f) 89 | 90 | # load videos 91 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 92 | self.videos = {} 93 | for video in pbar: 94 | pbar.set_postfix_str(video) 95 | self.videos[video] = OTBVideo(video, 96 | dataset_root, 97 | meta_data[video]['video_dir'], 98 | meta_data[video]['init_rect'], 99 | meta_data[video]['img_names'], 100 | meta_data[video]['gt_rect'], 101 | meta_data[video]['attr'], 102 | load_img) 103 | 104 | # set attr 105 | attr = [] 106 | for x in self.videos.values(): 107 | attr += x.attr 108 | attr = set(attr) 109 | self.attr = {} 110 | self.attr['ALL'] = list(self.videos.keys()) 111 | for x in attr: 112 | self.attr[x] = [] 113 | for k, v in self.videos.items(): 114 | for attr_ in v.attr: 115 | self.attr[attr_].append(k) 116 | -------------------------------------------------------------------------------- /toolkit/datasets/realworld.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | def ca(dataset_root='./testing_dataset/real_world'): 14 | 15 | path=dataset_root 16 | 17 | name_list=os.listdir(path+'/data_seq') 18 | name_list.sort() 19 | 20 | b=[] 21 | for i in range(len(name_list)): 22 | b.append(name_list[i]) 23 | c=[] 24 | 25 | for jj in range(len(name_list)): 26 | imgs=path+'/data_seq/'+str(name_list[jj]) 27 | txt=path+'/anno/'+str(name_list[jj])+'.txt' 28 | bbox=[] 29 | f = open(txt) # 返回一个文件对象 30 | file= f.readlines() 31 | li=os.listdir(imgs) 32 | li.sort() 33 | for ii in range(len(file)): 34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii] 35 | 36 | if ',' in file[ii].strip('\n'): 37 | line = file[ii].strip('\n').split(',') 38 | else: 39 | line = file[ii].strip('\n').split() 40 | 41 | try: 42 | line[0]=int(line[0]) 43 | except: 44 | line[0]=float(line[0]) 45 | try: 46 | line[1]=int(line[1]) 47 | except: 48 | line[1]=float(line[1]) 49 | try: 50 | line[2]=int(line[2]) 51 | except: 52 | line[2]=float(line[2]) 53 | try: 54 | line[3]=int(line[3]) 55 | except: 56 | line[3]=float(line[3]) 57 | bbox.append(line) 58 | 59 | if len(bbox)!=len(li): 60 | print (jj) 61 | f.close() 62 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 63 | 64 | d=dict(zip(b,c)) 65 | 66 | return d 67 | 68 | class UAVVideo(Video): 69 | """ 70 | Args: 71 | name: video name 72 | root: dataset root 73 | video_dir: video directory 74 | init_rect: init rectangle 75 | img_names: image names 76 | gt_rect: groundtruth rectangle 77 | attr: attribute of video 78 | """ 79 | def __init__(self, name, root, video_dir, init_rect, img_names, 80 | gt_rect, attr, load_img=False): 81 | super(UAVVideo, self).__init__(name, root, video_dir, 82 | init_rect, img_names, gt_rect, attr, load_img) 83 | 84 | 85 | class RealWorldDataset(Dataset): 86 | """ 87 | Args: 88 | name: dataset name, should be 'UAV123', 'UAV20L' 89 | dataset_root: dataset root 90 | load_img: wether to load all imgs 91 | """ 92 | def __init__(self, name, dataset_root, load_img=False): 93 | super(RealWorldDataset, self).__init__(name, dataset_root) 94 | meta_data = ca() 95 | 96 | # load videos 97 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 98 | self.videos = {} 99 | for video in pbar: 100 | pbar.set_postfix_str(video) 101 | self.videos[video] = UAVVideo(video, 102 | dataset_root, 103 | meta_data[video]['video_dir'], 104 | meta_data[video]['init_rect'], 105 | meta_data[video]['img_names'], 106 | meta_data[video]['gt_rect'], 107 | meta_data[video]['attr']) 108 | 109 | # set attr 110 | attr = [] 111 | for x in self.videos.values(): 112 | attr += x.attr 113 | attr = set(attr) 114 | self.attr = {} 115 | self.attr['ALL'] = list(self.videos.keys()) 116 | for x in attr: 117 | self.attr[x] = [] 118 | for k, v in self.videos.items(): 119 | for attr_ in v.attr: 120 | self.attr[attr_].append(k) 121 | 122 | -------------------------------------------------------------------------------- /toolkit/datasets/trackingnet.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class TrackingNetVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, load_img=False): 24 | super(TrackingNetVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | 27 | # def load_tracker(self, path, tracker_names=None): 28 | # """ 29 | # Args: 30 | # path(str): path to result 31 | # tracker_name(list): name of tracker 32 | # """ 33 | # if not tracker_names: 34 | # tracker_names = [x.split('/')[-1] for x in glob(path) 35 | # if os.path.isdir(x)] 36 | # if isinstance(tracker_names, str): 37 | # tracker_names = [tracker_names] 38 | # # self.pred_trajs = {} 39 | # for name in tracker_names: 40 | # traj_file = os.path.join(path, name, self.name+'.txt') 41 | # if os.path.exists(traj_file): 42 | # with open(traj_file, 'r') as f : 43 | # self.pred_trajs[name] = [list(map(float, x.strip().split(','))) 44 | # for x in f.readlines()] 45 | # if len(self.pred_trajs[name]) != len(self.gt_traj): 46 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name) 47 | # else: 48 | 49 | # self.tracker_names = list(self.pred_trajs.keys()) 50 | 51 | class TrackingNetDataset(Dataset): 52 | """ 53 | Args: 54 | name: dataset name, should be "NFS30" or "NFS240" 55 | dataset_root, dataset root dir 56 | """ 57 | def __init__(self, name, dataset_root, load_img=False): 58 | super(TrackingNetDataset, self).__init__(name, dataset_root) 59 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 60 | meta_data = json.load(f) 61 | 62 | # load videos 63 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 64 | self.videos = {} 65 | for video in pbar: 66 | pbar.set_postfix_str(video) 67 | self.videos[video] = TrackingNetVideo(video, 68 | dataset_root, 69 | meta_data[video]['video_dir'], 70 | meta_data[video]['init_rect'], 71 | meta_data[video]['img_names'], 72 | meta_data[video]['gt_rect'], 73 | None) 74 | self.attr = {} 75 | self.attr['ALL'] = list(self.videos.keys()) 76 | -------------------------------------------------------------------------------- /toolkit/datasets/uav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from tqdm import tqdm 5 | from glob import glob 6 | 7 | from .dataset import Dataset 8 | from .video import Video 9 | 10 | class UAVVideo(Video): 11 | """ 12 | Args: 13 | name: video name 14 | root: dataset root 15 | video_dir: video directory 16 | init_rect: init rectangle 17 | img_names: image names 18 | gt_rect: groundtruth rectangle 19 | attr: attribute of video 20 | """ 21 | def __init__(self, name, root, video_dir, init_rect, img_names, 22 | gt_rect, attr, load_img=False): 23 | super(UAVVideo, self).__init__(name, root, video_dir, 24 | init_rect, img_names, gt_rect, attr, load_img) 25 | 26 | 27 | class UAVDataset(Dataset): 28 | """ 29 | Args: 30 | name: dataset name, should be 'UAV123', 'UAV20L' 31 | dataset_root: dataset root 32 | load_img: wether to load all imgs 33 | """ 34 | def __init__(self, name, dataset_root, load_img=False): 35 | super(UAVDataset, self).__init__(name, dataset_root) 36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 37 | meta_data = json.load(f) 38 | 39 | # load videos 40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 41 | self.videos = {} 42 | for video in pbar: 43 | pbar.set_postfix_str(video) 44 | self.videos[video] = UAVVideo(video, 45 | dataset_root, 46 | meta_data[video]['video_dir'], 47 | meta_data[video]['init_rect'], 48 | meta_data[video]['img_names'], 49 | meta_data[video]['gt_rect'], 50 | meta_data[video]['attr']) 51 | 52 | # set attr 53 | attr = [] 54 | for x in self.videos.values(): 55 | attr += x.attr 56 | attr = set(attr) 57 | self.attr = {} 58 | self.attr['ALL'] = list(self.videos.keys()) 59 | for x in attr: 60 | self.attr[x] = [] 61 | for k, v in self.videos.items(): 62 | for attr_ in v.attr: 63 | self.attr[attr_].append(k) 64 | 65 | -------------------------------------------------------------------------------- /toolkit/datasets/uav10fps.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | def ca(): 13 | path='./UAV123_10fps' 14 | 15 | name_list=os.listdir(path+'/data_seq') 16 | name_list.sort() 17 | a=123 18 | b=[] 19 | for i in range(a): 20 | b.append(name_list[i]) 21 | c=[] 22 | 23 | for jj in range(a): 24 | imgs=path+'/data_seq/'+str(name_list[jj]) 25 | txt=path+'/anno/'+str(name_list[jj])+'.txt' 26 | bbox=[] 27 | f = open(txt) # 返回一个文件对象 28 | file= f.readlines() 29 | li=os.listdir(imgs) 30 | li.sort() 31 | for ii in range(len(file)): 32 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii] 33 | 34 | line = file[ii].strip('\n').split(',') 35 | 36 | try: 37 | line[0]=int(line[0]) 38 | except: 39 | line[0]=float(line[0]) 40 | try: 41 | line[1]=int(line[1]) 42 | except: 43 | line[1]=float(line[1]) 44 | try: 45 | line[2]=int(line[2]) 46 | except: 47 | line[2]=float(line[2]) 48 | try: 49 | line[3]=int(line[3]) 50 | except: 51 | line[3]=float(line[3]) 52 | bbox.append(line) 53 | 54 | if len(bbox)!=len(li): 55 | print (jj) 56 | f.close() 57 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 58 | 59 | d=dict(zip(b,c)) 60 | 61 | return d 62 | 63 | class UAVVideo(Video): 64 | """ 65 | Args: 66 | name: video name 67 | root: dataset root 68 | video_dir: video directory 69 | init_rect: init rectangle 70 | img_names: image names 71 | gt_rect: groundtruth rectangle 72 | attr: attribute of video 73 | """ 74 | def __init__(self, name, root, video_dir, init_rect, img_names, 75 | gt_rect, attr, load_img=False): 76 | super(UAVVideo, self).__init__(name, root, video_dir, 77 | init_rect, img_names, gt_rect, attr, load_img) 78 | 79 | 80 | class UAV10Dataset(Dataset): 81 | """ 82 | Args: 83 | name: dataset name, should be 'UAV123', 'UAV20L' 84 | dataset_root: dataset root 85 | load_img: wether to load all imgs 86 | """ 87 | def __init__(self, name, dataset_root, load_img=False): 88 | super(UAV10Dataset, self).__init__(name, dataset_root) 89 | meta_data = ca() 90 | 91 | # load videos 92 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 93 | self.videos = {} 94 | for video in pbar: 95 | pbar.set_postfix_str(video) 96 | self.videos[video] = UAVVideo(video, 97 | dataset_root, 98 | meta_data[video]['video_dir'], 99 | meta_data[video]['init_rect'], 100 | meta_data[video]['img_names'], 101 | meta_data[video]['gt_rect'], 102 | meta_data[video]['attr']) 103 | 104 | # set attr 105 | attr = [] 106 | for x in self.videos.values(): 107 | attr += x.attr 108 | attr = set(attr) 109 | self.attr = {} 110 | self.attr['ALL'] = list(self.videos.keys()) 111 | for x in attr: 112 | self.attr[x] = [] 113 | for k, v in self.videos.items(): 114 | for attr_ in v.attr: 115 | self.attr[attr_].append(k) 116 | 117 | -------------------------------------------------------------------------------- /toolkit/datasets/uav123.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | def ca(dataset_root='./testing_dataset/UAV123'): 14 | 15 | path=dataset_root 16 | 17 | name_list=os.listdir(path+'/data_seq') 18 | name_list.sort() 19 | 20 | b=[] 21 | for i in range(len(name_list)): 22 | b.append(name_list[i]) 23 | c=[] 24 | 25 | for jj in range(len(name_list)): 26 | imgs=path+'/data_seq/'+str(name_list[jj]) 27 | txt=path+'/anno/'+str(name_list[jj])+'.txt' 28 | bbox=[] 29 | f = open(txt) # 返回一个文件对象 30 | file= f.readlines() 31 | li=os.listdir(imgs) 32 | li.sort() 33 | for ii in range(len(file)): 34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii] 35 | 36 | line = file[ii].strip('\n').split(',') 37 | 38 | try: 39 | line[0]=int(line[0]) 40 | except: 41 | line[0]=float(line[0]) 42 | try: 43 | line[1]=int(line[1]) 44 | except: 45 | line[1]=float(line[1]) 46 | try: 47 | line[2]=int(line[2]) 48 | except: 49 | line[2]=float(line[2]) 50 | try: 51 | line[3]=int(line[3]) 52 | except: 53 | line[3]=float(line[3]) 54 | bbox.append(line) 55 | 56 | if len(bbox)!=len(li): 57 | print (jj) 58 | f.close() 59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 60 | 61 | d=dict(zip(b,c)) 62 | 63 | return d 64 | 65 | class UAVVideo(Video): 66 | """ 67 | Args: 68 | name: video name 69 | root: dataset root 70 | video_dir: video directory 71 | init_rect: init rectangle 72 | img_names: image names 73 | gt_rect: groundtruth rectangle 74 | attr: attribute of video 75 | """ 76 | def __init__(self, name, root, video_dir, init_rect, img_names, 77 | gt_rect, attr, load_img=False): 78 | super(UAVVideo, self).__init__(name, root, video_dir, 79 | init_rect, img_names, gt_rect, attr, load_img) 80 | 81 | 82 | class UAV123Dataset(Dataset): 83 | """ 84 | Args: 85 | name: dataset name, should be 'UAV123', 'UAV20L' 86 | dataset_root: dataset root 87 | load_img: wether to load all imgs 88 | """ 89 | def __init__(self, name, dataset_root, load_img=False): 90 | super(UAV123Dataset, self).__init__(name, dataset_root) 91 | meta_data = ca() 92 | 93 | # load videos 94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 95 | self.videos = {} 96 | for video in pbar: 97 | pbar.set_postfix_str(video) 98 | self.videos[video] = UAVVideo(video, 99 | dataset_root, 100 | meta_data[video]['video_dir'], 101 | meta_data[video]['init_rect'], 102 | meta_data[video]['img_names'], 103 | meta_data[video]['gt_rect'], 104 | meta_data[video]['attr']) 105 | 106 | # set attr 107 | attr = [] 108 | for x in self.videos.values(): 109 | attr += x.attr 110 | attr = set(attr) 111 | self.attr = {} 112 | self.attr['ALL'] = list(self.videos.keys()) 113 | for x in attr: 114 | self.attr[x] = [] 115 | for k, v in self.videos.items(): 116 | for attr_ in v.attr: 117 | self.attr[attr_].append(k) 118 | 119 | -------------------------------------------------------------------------------- /toolkit/datasets/uav20l.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | def ca(): 14 | 15 | path='./testing_dataset/UAV20L' 16 | 17 | name_list=os.listdir(path+'/data_seq/') 18 | name_list.sort() 19 | 20 | b=[] 21 | for i in range(len(name_list)): 22 | b.append(name_list[i]) 23 | c=[] 24 | 25 | for jj in range(len(name_list)): 26 | imgs=path+'/data_seq/'+str(name_list[jj]) 27 | txt=path+'/anno/'+str(name_list[jj])+'.txt' 28 | bbox=[] 29 | f = open(txt) # 返回一个文件对象 30 | file= f.readlines() 31 | li=os.listdir(imgs) 32 | li.sort() 33 | for ii in range(len(file)): 34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii] 35 | 36 | line = file[ii].strip('\n').split(',') 37 | 38 | try: 39 | line[0]=int(line[0]) 40 | except: 41 | line[0]=float(line[0]) 42 | try: 43 | line[1]=int(line[1]) 44 | except: 45 | line[1]=float(line[1]) 46 | try: 47 | line[2]=int(line[2]) 48 | except: 49 | line[2]=float(line[2]) 50 | try: 51 | line[3]=int(line[3]) 52 | except: 53 | line[3]=float(line[3]) 54 | bbox.append(line) 55 | 56 | if len(bbox)!=len(li): 57 | print (jj) 58 | f.close() 59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 60 | 61 | d=dict(zip(b,c)) 62 | 63 | return d 64 | 65 | class UAVVideo(Video): 66 | """ 67 | Args: 68 | name: video name 69 | root: dataset root 70 | video_dir: video directory 71 | init_rect: init rectangle 72 | img_names: image names 73 | gt_rect: groundtruth rectangle 74 | attr: attribute of video 75 | """ 76 | def __init__(self, name, root, video_dir, init_rect, img_names, 77 | gt_rect, attr, load_img=False): 78 | super(UAVVideo, self).__init__(name, root, video_dir, 79 | init_rect, img_names, gt_rect, attr, load_img) 80 | 81 | 82 | class UAV20Dataset(Dataset): 83 | """ 84 | Args: 85 | name: dataset name, should be 'UAV123', 'UAV20L' 86 | dataset_root: dataset root 87 | load_img: wether to load all imgs 88 | """ 89 | def __init__(self, name, dataset_root, load_img=False): 90 | super(UAV20Dataset, self).__init__(name, dataset_root) 91 | meta_data = ca() 92 | 93 | # load videos 94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 95 | self.videos = {} 96 | for video in pbar: 97 | pbar.set_postfix_str(video) 98 | self.videos[video] = UAVVideo(video, 99 | dataset_root, 100 | meta_data[video]['video_dir'], 101 | meta_data[video]['init_rect'], 102 | meta_data[video]['img_names'], 103 | meta_data[video]['gt_rect'], 104 | meta_data[video]['attr']) 105 | 106 | # set attr 107 | attr = [] 108 | for x in self.videos.values(): 109 | attr += x.attr 110 | attr = set(attr) 111 | self.attr = {} 112 | self.attr['ALL'] = list(self.videos.keys()) 113 | for x in attr: 114 | self.attr[x] = [] 115 | for k, v in self.videos.items(): 116 | for attr_ in v.attr: 117 | self.attr[attr_].append(k) 118 | 119 | -------------------------------------------------------------------------------- /toolkit/datasets/uavdark.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | def ca(): 14 | 15 | path='./UAVDark135' 16 | 17 | name_list=os.listdir(path+'/data_seq/') 18 | name_list.sort() 19 | 20 | b=[] 21 | for i in range(len(name_list)): 22 | b.append(name_list[i]) 23 | c=[] 24 | 25 | for jj in range(len(name_list)): 26 | imgs=path+'/data_seq/'+str(name_list[jj]) 27 | txt=path+'/anno/'+str(name_list[jj])+'.txt' 28 | bbox=[] 29 | f = open(txt) # 返回一个文件对象 30 | file= f.readlines() 31 | li=os.listdir(imgs) 32 | li.sort() 33 | for ii in range(len(file)): 34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii] 35 | 36 | line = file[ii].strip('\n').split(',') 37 | 38 | try: 39 | line[0]=int(line[0]) 40 | except: 41 | line[0]=float(line[0]) 42 | try: 43 | line[1]=int(line[1]) 44 | except: 45 | line[1]=float(line[1]) 46 | try: 47 | line[2]=int(line[2]) 48 | except: 49 | line[2]=float(line[2]) 50 | try: 51 | line[3]=int(line[3]) 52 | except: 53 | line[3]=float(line[3]) 54 | bbox.append(line) 55 | 56 | if len(bbox)!=len(li): 57 | print (jj) 58 | f.close() 59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 60 | 61 | d=dict(zip(b,c)) 62 | 63 | return d 64 | 65 | class UAVVideo(Video): 66 | """ 67 | Args: 68 | name: video name 69 | root: dataset root 70 | video_dir: video directory 71 | init_rect: init rectangle 72 | img_names: image names 73 | gt_rect: groundtruth rectangle 74 | attr: attribute of video 75 | """ 76 | def __init__(self, name, root, video_dir, init_rect, img_names, 77 | gt_rect, attr, load_img=False): 78 | super(UAVVideo, self).__init__(name, root, video_dir, 79 | init_rect, img_names, gt_rect, attr, load_img) 80 | 81 | 82 | class UAVDARKDataset(Dataset): 83 | """ 84 | Args: 85 | name: dataset name, should be 'UAV123', 'UAV20L' 86 | dataset_root: dataset root 87 | load_img: wether to load all imgs 88 | """ 89 | def __init__(self, name, dataset_root, load_img=False): 90 | super(UAVDARKDataset, self).__init__(name, dataset_root) 91 | meta_data = ca() 92 | 93 | # load videos 94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 95 | self.videos = {} 96 | for video in pbar: 97 | pbar.set_postfix_str(video) 98 | self.videos[video] = UAVVideo(video, 99 | dataset_root, 100 | meta_data[video]['video_dir'], 101 | meta_data[video]['init_rect'], 102 | meta_data[video]['img_names'], 103 | meta_data[video]['gt_rect'], 104 | meta_data[video]['attr']) 105 | 106 | # set attr 107 | attr = [] 108 | for x in self.videos.values(): 109 | attr += x.attr 110 | attr = set(attr) 111 | self.attr = {} 112 | self.attr['ALL'] = list(self.videos.keys()) 113 | for x in attr: 114 | self.attr[x] = [] 115 | for k, v in self.videos.items(): 116 | for attr_ in v.attr: 117 | self.attr[attr_].append(k) 118 | 119 | -------------------------------------------------------------------------------- /toolkit/datasets/visdrone.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | class UVADTVideo(Video): 14 | """ 15 | Args: 16 | name: video name 17 | root: dataset root 18 | video_dir: video directory 19 | init_rect: init rectangle 20 | img_names: image names 21 | gt_rect: groundtruth rectangle 22 | attr: attribute of video 23 | """ 24 | def __init__(self, name, root, video_dir, init_rect, img_names, 25 | gt_rect, attr, load_img=False): 26 | super(UVADTVideo, self).__init__(name, root, video_dir, 27 | init_rect, img_names, gt_rect, attr, load_img) 28 | 29 | def ca(): 30 | 31 | 32 | path='./VisDrone2018-SOT-test' 33 | 34 | name_list=os.listdir(path+'/sequences') 35 | name_list.sort() 36 | 37 | b=[] 38 | for i in range(len(name_list)): 39 | b.append(name_list[i]) 40 | c=[] 41 | 42 | for jj in range(len(name_list)): 43 | imgs=path+'/sequences/'+str(name_list[jj]) 44 | txt=path+'/annotations/'+str(name_list[jj])+'.txt' 45 | bbox=[] 46 | f = open(txt) # 返回一个文件对象 47 | file= f.readlines() 48 | li=os.listdir(imgs) 49 | li.sort() 50 | for ii in range(len(file)): 51 | li[ii]='sequences/'+name_list[jj]+'/'+li[ii] 52 | 53 | line = file[ii].strip('\n').split(',') 54 | 55 | try: 56 | line[0]=int(line[0]) 57 | except: 58 | line[0]=float(line[0]) 59 | try: 60 | line[1]=int(line[1]) 61 | except: 62 | line[1]=float(line[1]) 63 | try: 64 | line[2]=int(line[2]) 65 | except: 66 | line[2]=float(line[2]) 67 | try: 68 | line[3]=int(line[3]) 69 | except: 70 | line[3]=float(line[3]) 71 | bbox.append(line) 72 | 73 | if len(bbox)!=len(li): 74 | print (jj) 75 | f.close() 76 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 77 | 78 | d=dict(zip(b,c)) 79 | 80 | return d 81 | class VISDRONEDataset(Dataset): 82 | """ 83 | Args: 84 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 85 | dataset_root: dataset root 86 | load_img: wether to load all imgs 87 | """ 88 | def __init__(self, name, dataset_root, load_img=False): 89 | super(VISDRONEDataset, self).__init__(name, dataset_root) 90 | # with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 91 | # meta_data = json.load(f) 92 | meta_data=ca() 93 | # load videos 94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 95 | self.videos = {} 96 | for video in pbar: 97 | pbar.set_postfix_str(video) 98 | self.videos[video] = UVADTVideo(video, 99 | dataset_root, 100 | meta_data[video]['video_dir'], 101 | meta_data[video]['init_rect'], 102 | meta_data[video]['img_names'], 103 | meta_data[video]['gt_rect'], 104 | meta_data[video]['attr'], 105 | load_img) 106 | 107 | # set attr 108 | attr = [] 109 | for x in self.videos.values(): 110 | attr += x.attr 111 | attr = set(attr) 112 | self.attr = {} 113 | self.attr['ALL'] = list(self.videos.keys()) 114 | for x in attr: 115 | self.attr[x] = [] 116 | for k, v in self.videos.items(): 117 | for attr_ in v.attr: 118 | self.attr[attr_].append(k) 119 | -------------------------------------------------------------------------------- /toolkit/datasets/visdrone1.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | class UVADTVideo(Video): 14 | """ 15 | Args: 16 | name: video name 17 | root: dataset root 18 | video_dir: video directory 19 | init_rect: init rectangle 20 | img_names: image names 21 | gt_rect: groundtruth rectangle 22 | attr: attribute of video 23 | """ 24 | def __init__(self, name, root, video_dir, init_rect, img_names, 25 | gt_rect, attr, load_img=False): 26 | super(UVADTVideo, self).__init__(name, root, video_dir, 27 | init_rect, img_names, gt_rect, attr, load_img) 28 | 29 | def ca(): 30 | 31 | 32 | path='./VisDrone2018-SOT-test' 33 | 34 | name_list=os.listdir(path+'/sequences') 35 | name_list.sort() 36 | 37 | b=[] 38 | for i in range(len(name_list)): 39 | b.append(name_list[i]) 40 | c=[] 41 | 42 | for jj in range(len(name_list)): 43 | imgs=path+'/sequences/'+str(name_list[jj]) 44 | txt=path+'/annotations/'+str(name_list[jj])+'.txt' 45 | bbox=[] 46 | f = open(txt) # 返回一个文件对象 47 | file= f.readlines() 48 | li=os.listdir(imgs) 49 | li.sort() 50 | for ii in range(len(file)): 51 | li[ii]=name_list[jj]+'/'+li[ii] 52 | 53 | line = file[ii].strip('\n').split(',') 54 | 55 | try: 56 | line[0]=int(line[0]) 57 | except: 58 | line[0]=float(line[0]) 59 | try: 60 | line[1]=int(line[1]) 61 | except: 62 | line[1]=float(line[1]) 63 | try: 64 | line[2]=int(line[2]) 65 | except: 66 | line[2]=float(line[2]) 67 | try: 68 | line[3]=int(line[3]) 69 | except: 70 | line[3]=float(line[3]) 71 | bbox.append(line) 72 | 73 | if len(bbox)!=len(li): 74 | print (jj) 75 | f.close() 76 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]}) 77 | 78 | d=dict(zip(b,c)) 79 | 80 | return d 81 | class VISDRONED2018Dataset(Dataset): 82 | """ 83 | Args: 84 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 85 | dataset_root: dataset root 86 | load_img: wether to load all imgs 87 | """ 88 | def __init__(self, name, dataset_root, load_img=False): 89 | super(VISDRONED2018Dataset, self).__init__(name, dataset_root) 90 | # with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 91 | # meta_data = json.load(f) 92 | meta_data=ca() 93 | # load videos 94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 95 | self.videos = {} 96 | for video in pbar: 97 | pbar.set_postfix_str(video) 98 | self.videos[video] = UVADTVideo(video, 99 | dataset_root, 100 | meta_data[video]['video_dir'], 101 | meta_data[video]['init_rect'], 102 | meta_data[video]['img_names'], 103 | meta_data[video]['gt_rect'], 104 | meta_data[video]['attr'], 105 | load_img) 106 | 107 | # set attr 108 | attr = [] 109 | for x in self.videos.values(): 110 | attr += x.attr 111 | attr = set(attr) 112 | self.attr = {} 113 | self.attr['ALL'] = list(self.videos.keys()) 114 | for x in attr: 115 | self.attr[x] = [] 116 | for k, v in self.videos.items(): 117 | for attr_ in v.attr: 118 | self.attr[attr_].append(k) 119 | -------------------------------------------------------------------------------- /toolkit/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .ar_benchmark import AccuracyRobustnessBenchmark 2 | from .eao_benchmark import EAOBenchmark 3 | from .ope_benchmark import OPEBenchmark 4 | from .f1_benchmark import F1Benchmark 5 | -------------------------------------------------------------------------------- /toolkit/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from . import region 2 | from .statistics import * 3 | -------------------------------------------------------------------------------- /toolkit/utils/c_region.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "src/region.h": 2 | ctypedef enum region_type "RegionType": 3 | EMTPY 4 | SPECIAL 5 | RECTANGEL 6 | POLYGON 7 | MASK 8 | 9 | ctypedef struct region_bounds: 10 | float top 11 | float bottom 12 | float left 13 | float right 14 | 15 | ctypedef struct region_rectangle: 16 | float x 17 | float y 18 | float width 19 | float height 20 | 21 | # ctypedef struct region_mask: 22 | # int x 23 | # int y 24 | # int width 25 | # int height 26 | # char *data 27 | 28 | ctypedef struct region_polygon: 29 | int count 30 | float *x 31 | float *y 32 | 33 | ctypedef union region_container_data: 34 | region_rectangle rectangle 35 | region_polygon polygon 36 | # region_mask mask 37 | int special 38 | 39 | ctypedef struct region_container: 40 | region_type type 41 | region_container_data data 42 | 43 | # ctypedef struct region_overlap: 44 | # float overlap 45 | # float only1 46 | # float only2 47 | 48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds) 49 | 50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds) 51 | -------------------------------------------------------------------------------- /toolkit/utils/misc.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author 3 | """ 4 | import numpy as np 5 | 6 | def determine_thresholds(confidence, resolution=100): 7 | """choose threshold according to confidence 8 | 9 | Args: 10 | confidence: list or numpy array or numpy array 11 | reolution: number of threshold to choose 12 | 13 | Restures: 14 | threshold: numpy array 15 | """ 16 | if isinstance(confidence, list): 17 | confidence = np.array(confidence) 18 | confidence = confidence.flatten() 19 | confidence = confidence[~np.isnan(confidence)] 20 | confidence.sort() 21 | 22 | assert len(confidence) > resolution and resolution > 2 23 | 24 | thresholds = np.ones((resolution)) 25 | thresholds[0] = - np.inf 26 | thresholds[-1] = np.inf 27 | delta = np.floor(len(confidence) / (resolution - 2)) 28 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32) 29 | thresholds[1:-1] = confidence[idxs] 30 | return thresholds 31 | -------------------------------------------------------------------------------- /toolkit/utils/src/region.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */ 2 | 3 | #ifndef _REGION_H_ 4 | #define _REGION_H_ 5 | 6 | #ifdef TRAX_STATIC_DEFINE 7 | # define __TRAX_EXPORT 8 | #else 9 | # ifndef __TRAX_EXPORT 10 | # if defined(_MSC_VER) 11 | # ifdef trax_EXPORTS 12 | /* We are building this library */ 13 | # define __TRAX_EXPORT __declspec(dllexport) 14 | # else 15 | /* We are using this library */ 16 | # define __TRAX_EXPORT __declspec(dllimport) 17 | # endif 18 | # elif defined(__GNUC__) 19 | # ifdef trax_EXPORTS 20 | /* We are building this library */ 21 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 22 | # else 23 | /* We are using this library */ 24 | # define __TRAX_EXPORT __attribute__((visibility("default"))) 25 | # endif 26 | # endif 27 | # endif 28 | #endif 29 | 30 | #ifndef MAX 31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b)) 32 | #endif 33 | 34 | #ifndef MIN 35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b)) 36 | #endif 37 | 38 | #define TRAX_DEFAULT_CODE 0 39 | 40 | #define REGION_LEGACY_RASTERIZATION 1 41 | 42 | #ifdef __cplusplus 43 | extern "C" { 44 | #endif 45 | 46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type; 47 | 48 | typedef struct region_bounds { 49 | 50 | float top; 51 | float bottom; 52 | float left; 53 | float right; 54 | 55 | } region_bounds; 56 | 57 | typedef struct region_polygon { 58 | 59 | int count; 60 | 61 | float* x; 62 | float* y; 63 | 64 | } region_polygon; 65 | 66 | typedef struct region_mask { 67 | 68 | int x; 69 | int y; 70 | 71 | int width; 72 | int height; 73 | 74 | char* data; 75 | 76 | } region_mask; 77 | 78 | typedef struct region_rectangle { 79 | 80 | float x; 81 | float y; 82 | float width; 83 | float height; 84 | 85 | } region_rectangle; 86 | 87 | typedef struct region_container { 88 | enum region_type type; 89 | union { 90 | region_rectangle rectangle; 91 | region_polygon polygon; 92 | region_mask mask; 93 | int special; 94 | } data; 95 | } region_container; 96 | 97 | typedef struct region_overlap { 98 | 99 | float overlap; 100 | float only1; 101 | float only2; 102 | 103 | } region_overlap; 104 | 105 | extern const region_bounds region_no_bounds; 106 | 107 | __TRAX_EXPORT int region_set_flags(int mask); 108 | 109 | __TRAX_EXPORT int region_clear_flags(int mask); 110 | 111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds); 112 | 113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds); 114 | 115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom); 116 | 117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region); 118 | 119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region); 120 | 121 | __TRAX_EXPORT char* region_string(region_container* region); 122 | 123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region); 124 | 125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type); 126 | 127 | __TRAX_EXPORT void region_release(region_container** region); 128 | 129 | __TRAX_EXPORT region_container* region_create_special(int code); 130 | 131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height); 132 | 133 | __TRAX_EXPORT region_container* region_create_polygon(int count); 134 | 135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y); 136 | 137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height); 138 | 139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height); 140 | 141 | #ifdef __cplusplus 142 | } 143 | #endif 144 | 145 | #endif 146 | -------------------------------------------------------------------------------- /toolkit/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .draw_f1 import draw_f1 2 | from .draw_success_precision import draw_success_precision 3 | from .draw_eao import draw_eao 4 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_eao.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | import pickle 4 | 5 | from matplotlib import rc 6 | from .draw_utils import COLOR, MARKER_STYLE 7 | 8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 9 | rc('text', usetex=True) 10 | 11 | def draw_eao(result): 12 | fig = plt.figure() 13 | ax = fig.add_subplot(111, projection='polar') 14 | angles = np.linspace(0, 2*np.pi, 8, endpoint=True) 15 | 16 | attr2value = [] 17 | for i, (tracker_name, ret) in enumerate(result.items()): 18 | value = list(ret.values()) 19 | attr2value.append(value) 20 | value.append(value[0]) 21 | attr2value = np.array(attr2value) 22 | max_value = np.max(attr2value, axis=0) 23 | min_value = np.min(attr2value, axis=0) 24 | for i, (tracker_name, ret) in enumerate(result.items()): 25 | value = list(ret.values()) 26 | value.append(value[0]) 27 | value = np.array(value) 28 | value *= (1 / max_value) 29 | plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i], 30 | label=tracker_name, linewidth=1.5, markersize=6) 31 | 32 | attrs = ["Overall", "Camera motion", 33 | "Illumination change","Motion Change", 34 | "Size change","Occlusion", 35 | "Unassigned"] 36 | attr_value = [] 37 | for attr, maxv, minv in zip(attrs, max_value, min_value): 38 | attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv)) 39 | ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value) 40 | ax.spines['polar'].set_visible(False) 41 | ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5) 42 | ax.grid(b=False) 43 | ax.set_ylim(0, 1.18) 44 | ax.set_yticks([]) 45 | plt.show() 46 | 47 | if __name__ == '__main__': 48 | result = pickle.load(open("../../result.pkl", 'rb')) 49 | draw_eao(result) 50 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_f1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import numpy as np 3 | 4 | from matplotlib import rc 5 | from .draw_utils import COLOR, LINE_STYLE 6 | 7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) 8 | rc('text', usetex=True) 9 | 10 | def draw_f1(result, bold_name=None): 11 | # drawing f1 contour 12 | fig, ax = plt.subplots() 13 | for f1 in np.arange(0.1, 1, 0.1): 14 | recall = np.arange(f1, 1+0.01, 0.01) 15 | precision = f1 * recall / (2 * recall - f1) 16 | ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5) 17 | ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5) 18 | ax.grid(b=True) 19 | ax.set_aspect(1) 20 | plt.xlabel('Recall') 21 | plt.ylabel('Precision') 22 | plt.axis([0, 1, 0, 1]) 23 | plt.title(r'\textbf{VOT2018-LT Precision vs Recall}') 24 | 25 | # draw result line 26 | all_precision = {} 27 | all_recall = {} 28 | best_f1 = {} 29 | best_idx = {} 30 | for tracker_name, ret in result.items(): 31 | precision = np.mean(list(ret['precision'].values()), axis=0) 32 | recall = np.mean(list(ret['recall'].values()), axis=0) 33 | f1 = 2 * precision * recall / (precision + recall) 34 | max_idx = np.argmax(f1) 35 | all_precision[tracker_name] = precision 36 | all_recall[tracker_name] = recall 37 | best_f1[tracker_name] = f1[max_idx] 38 | best_idx[tracker_name] = max_idx 39 | 40 | for idx, (tracker_name, best_f1) in \ 41 | enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)): 42 | if tracker_name == bold_name: 43 | label = r"\textbf{[%.3f] Ours}" % (best_f1) 44 | else: 45 | label = "[%.3f] " % (best_f1) + tracker_name 46 | recall = all_recall[tracker_name][:-1] 47 | precision = all_precision[tracker_name][:-1] 48 | ax.plot(recall, precision, color=COLOR[idx], linestyle='-', 49 | label=label) 50 | f1_idx = best_idx[tracker_name] 51 | ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o', 52 | markerfacecolor=COLOR[idx], markersize=5) 53 | ax.legend(loc='lower right', labelspacing=0.2) 54 | plt.xticks(np.arange(0, 1+0.1, 0.1)) 55 | plt.yticks(np.arange(0, 1+0.1, 0.1)) 56 | plt.show() 57 | 58 | if __name__ == '__main__': 59 | draw_f1(None) 60 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_utils.py: -------------------------------------------------------------------------------- 1 | 2 | COLOR = ((1, 0, 0), 3 | (0, 1, 0), 4 | (1, 0, 1), 5 | (1, 1, 0), 6 | (0 , 162/255, 232/255), 7 | (0.5, 0.5, 0.5), 8 | (0, 0, 1), 9 | (0, 1, 1), 10 | (136/255, 0 , 21/255), 11 | (255/255, 127/255, 39/255), 12 | (0, 0, 0)) 13 | 14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-'] 15 | 16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.'] 17 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import argparse 8 | 9 | import cv2 10 | import torch 11 | import numpy as np 12 | from glob import glob 13 | 14 | from pysot.core.config import cfg 15 | from pysot.models.model_builder import ModelBuilder 16 | from pysot.tracker.tracker_builder import build_tracker 17 | 18 | torch.set_num_threads(1) 19 | 20 | parser = argparse.ArgumentParser(description='tracking demo') 21 | parser.add_argument('--config', type=str, help='config file') 22 | parser.add_argument('--snapshot', type=str, help='model name') 23 | parser.add_argument('--video_name', default='', type=str, 24 | help='videos or image files') 25 | args = parser.parse_args() 26 | 27 | 28 | def get_frames(video_name): 29 | if not video_name: 30 | cap = cv2.VideoCapture(0) 31 | # warmup 32 | for i in range(5): 33 | cap.read() 34 | while True: 35 | ret, frame = cap.read() 36 | if ret: 37 | yield frame 38 | else: 39 | break 40 | elif video_name.endswith('avi') or \ 41 | video_name.endswith('mp4'): 42 | cap = cv2.VideoCapture(args.video_name) 43 | while True: 44 | ret, frame = cap.read() 45 | if ret: 46 | yield frame 47 | else: 48 | break 49 | else: 50 | images = glob(os.path.join(video_name, '*.jp*')) 51 | images = sorted(images, 52 | key=lambda x: int(x.split('/')[-1].split('.')[0])) 53 | for img in images: 54 | frame = cv2.imread(img) 55 | yield frame 56 | 57 | 58 | def main(): 59 | # load config 60 | cfg.merge_from_file(args.config) 61 | cfg.CUDA = torch.cuda.is_available() and cfg.CUDA 62 | device = torch.device('cuda' if cfg.CUDA else 'cpu') 63 | 64 | # create model 65 | model = ModelBuilder() 66 | 67 | # load model 68 | model.load_state_dict(torch.load(args.snapshot, 69 | map_location=lambda storage, loc: storage.cpu())) 70 | model.eval().to(device) 71 | 72 | # build tracker 73 | tracker = build_tracker(model) 74 | 75 | first_frame = True 76 | if args.video_name: 77 | video_name = args.video_name.split('/')[-1].split('.')[0] 78 | else: 79 | video_name = 'webcam' 80 | cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) 81 | for frame in get_frames(args.video_name): 82 | if first_frame: 83 | try: 84 | init_rect = cv2.selectROI(video_name, frame, False, False) 85 | except: 86 | exit() 87 | tracker.init(frame, init_rect) 88 | first_frame = False 89 | else: 90 | outputs = tracker.track(frame) 91 | if 'polygon' in outputs: 92 | polygon = np.array(outputs['polygon']).astype(np.int32) 93 | cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], 94 | True, (0, 255, 0), 3) 95 | mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) 96 | mask = mask.astype(np.uint8) 97 | mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0) 98 | frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) 99 | else: 100 | bbox = list(map(int, outputs['bbox'])) 101 | cv2.rectangle(frame, (bbox[0], bbox[1]), 102 | (bbox[0]+bbox[2], bbox[1]+bbox[3]), 103 | (0, 255, 0), 3) 104 | cv2.imshow(video_name, frame) 105 | cv2.waitKey(40) 106 | 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /tools/gen_sim_info.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import os 3 | 4 | pkl_path = 'Raw_Results_RPN_Mob/UAVDT/SiamRPN++_Mob' 5 | tracker = 'SiamRPN++_Mob' 6 | dataset = 'UAVDT' 7 | tgt_path = 'testing_dataset/sim_info' 8 | 9 | pkls = os.listdir(pkl_path) 10 | info_dict = {} 11 | 12 | for pkl in pkls: 13 | name = pkl[0:-4] 14 | with open(os.path.join(pkl_path, pkl), 'rb') as run_file: 15 | pkl_info = pickle.load(run_file) 16 | init_time = pkl_info['runtime'][0] 17 | running_time = sum(pkl_info['runtime'][1:])/len(pkl_info['runtime'][1:]) 18 | info_dict[name] = {'init_time': init_time, 'running_time': running_time} 19 | 20 | with open(os.path.join(tgt_path, '{}_{}_sim.pkl'.format(dataset, tracker)), "wb") as f_sim: 21 | pickle.dump(info_dict, f_sim) 22 | -------------------------------------------------------------------------------- /tools/rt_eva.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Streaming evaluation 3 | Given real-time tracking outputs, 4 | it pairs them with the ground truth. 5 | 6 | Note that this script does not need to run in real-time 7 | ''' 8 | 9 | import argparse, pickle 10 | from os.path import join, isfile 11 | import numpy as np 12 | import sys 13 | import os 14 | 15 | # the line below is for running in both the current directory 16 | # and the repo's root directory 17 | 18 | 19 | def parse_args(): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--raw_root', default='./results_rt_raw/UAV20/Raw_sim',type=str, 22 | help='raw result root') 23 | parser.add_argument('--tar_root', default='./results_rt/UAV20',type=str, 24 | help='target result root') 25 | parser.add_argument('--gtroot',default='./testing_dataset/UAV123_20L/anno', type=str) 26 | parser.add_argument('--fps', type=float, default=30) 27 | parser.add_argument('--eta', type=float, default=0, help='eta >= -1') 28 | parser.add_argument('--overwrite', action='store_true', default=False) 29 | 30 | args = parser.parse_args() 31 | return args 32 | 33 | def main(): 34 | args = parse_args() 35 | trackers=os.listdir(args.raw_root) 36 | gt_path=args.gtroot 37 | if 'DTB70' in gt_path: 38 | seqs = os.listdir(gt_path) 39 | gt_list=[] 40 | for seq in seqs: 41 | gt_list.append(os.path.join(gt_path, seq, 'groundtruth_rect.txt')) 42 | else: 43 | gt_list=os.listdir(gt_path) 44 | gt_list = [os.path.join(gt_path, i) for i in os.listdir(gt_path) if i.endswith('.txt')] 45 | for tracker in trackers: 46 | ra_path=join(args.raw_root,tracker) 47 | ou_path=join(args.tar_root,tracker) 48 | if os.path.isdir(ou_path): 49 | continue 50 | mismatch = 0 51 | fps_a=[] 52 | 53 | for gt_idx, video in enumerate(gt_list): 54 | name=video.split('/')[-1][0:-4] 55 | name_rt=name 56 | # name=video 57 | if 'DTB70' in gt_path: 58 | name=video.split('/')[-2] 59 | name_rt=name 60 | if 'UAVDT' in gt_path: 61 | name_rt=name[0:-3] 62 | print('Pairing {:s} output with the ground truth ({:d}/{:d}): {:s}'.format(tracker,len(gt_list),gt_idx,name)) 63 | results = pickle.load(open(join(ra_path, name_rt + '.pkl'), 'rb')) 64 | gtlen = len(open(join(video)).readlines()) 65 | # use raw results when possible in case we change class subset during evaluation 66 | results_raw = results.get('results_raw', None) 67 | timestamps = results['timestamps'] 68 | # assume the init box don't need time to process 69 | timestamps[0]=0 70 | input_fidx = results['input_fidx'] 71 | run_time = results['runtime'] 72 | fps_a.append(len(run_time)/sum(run_time)) 73 | tidx_p1 = 0 74 | pred_bboxes=[] 75 | 76 | for idx in range(gtlen): 77 | # input frame time, i.e., [0, 0.03, 0.06, 0.09, ...] 78 | t = (idx - args.eta)/args.fps 79 | # which is the latest result? 80 | while tidx_p1 < len(timestamps) and timestamps[tidx_p1] <= t: 81 | tidx_p1 += 1 82 | # there exists at least one result for eva, i.e., the init box, 0 83 | 84 | # if tidx_p1 == 0: 85 | # # no output 86 | # miss += 1 87 | # bboxes, scores, labels = [], [], [] 88 | # masks, tracks = None, None 89 | 90 | # the latest result given is tidx 91 | tidx = tidx_p1 - 1 92 | 93 | # compute gt idx and the fidx where the result comes to obtain mismatch 94 | ifidx = input_fidx[tidx] 95 | mismatch += idx - ifidx 96 | # print('GT time is {:3f}, latest tracker time is {:3f}, matching GT id {:3d} with precessed frame {:3d}'.format(t, timestamps[tidx],idx,ifidx)) 97 | pred_bboxes.append(results_raw[tidx]) 98 | 99 | if not os.path.isdir(ou_path): 100 | os.makedirs(ou_path) 101 | result_path = join(ou_path, '{}.txt'.format(name_rt)) 102 | with open(result_path, 'w') as f: 103 | for x in pred_bboxes: 104 | f.write(','.join([str(i) for i in x])+'\n') 105 | fps_path = join(ou_path, '{}.txt'.format('Speed')) 106 | with open(fps_path, 'w') as f: 107 | f.write(str(sum(fps_a)/len(fps_a))) 108 | 109 | if __name__ == '__main__': 110 | main() 111 | -------------------------------------------------------------------------------- /tools/rt_eva_pre.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Streaming evaluation 3 | Given real-time tracking outputs, 4 | it pairs them with the ground truth. 5 | 6 | Note that this script does not need to run in real-time 7 | ''' 8 | 9 | import argparse, pickle 10 | from os.path import join, isfile 11 | import numpy as np 12 | import sys 13 | import os 14 | from tqdm import tqdm 15 | 16 | # the line below is for running in both the current directory 17 | # and the repo's root directory 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--raw_root', default='./results_rt_raw/UAV20/Raw_pred_sim',type=str, 23 | help='raw result root') 24 | parser.add_argument('--tar_root', default='./results_rt/UAV20',type=str, 25 | help='target result root') 26 | parser.add_argument('--gtroot',default='./testing_dataset/UAV123_20L/anno', type=str) 27 | parser.add_argument('--fps', type=float, default=30) 28 | parser.add_argument('--eta', type=float, default=0, help='eta >= -1') 29 | parser.add_argument('--overwrite', action='store_true', default=False) 30 | 31 | args = parser.parse_args() 32 | return args 33 | 34 | def main(): 35 | args = parse_args() 36 | trackers=os.listdir(args.raw_root) 37 | gt_path=args.gtroot 38 | if 'DTB70' in gt_path: 39 | seqs = os.listdir(gt_path) 40 | gt_list=[] 41 | for seq in seqs: 42 | gt_list.append(os.path.join(gt_path, seq, 'groundtruth_rect.txt')) 43 | else: 44 | gt_list=os.listdir(gt_path) 45 | gt_list = [os.path.join(gt_path, i) for i in os.listdir(gt_path) if i.endswith('.txt')] 46 | for tracker in tqdm(trackers): 47 | ra_path=join(args.raw_root,tracker) 48 | ou_path=join(args.tar_root,tracker) 49 | if os.path.isdir(ou_path): 50 | continue 51 | mismatch = 0 52 | fps_a=[] 53 | 54 | for gt_idx, video in enumerate(gt_list): 55 | name=video.split('/')[-1][0:-4] 56 | name_rt=name 57 | # name=video 58 | if 'DTB70' in gt_path: 59 | name=video.split('/')[-2] 60 | name_rt=name 61 | if 'UAVDT' in gt_path: 62 | name_rt=name[0:-3] 63 | # print('Pairing {:s} output with the ground truth ({:d}/{:d}): {:s}'.format(tracker,len(gt_list),gt_idx,name)) 64 | results = pickle.load(open(join(ra_path, name_rt + '.pkl'), 'rb')) 65 | gtlen = len(open(join(video)).readlines()) 66 | # use raw results when possible in case we change class subset during evaluation 67 | tra_results_raw = results.get('results_raw_t', None) 68 | tra_timestamps = results['timestamps_t'] 69 | pre_results = results.get('results_raw_p', None) 70 | # assume the init box don't need time to process 71 | tra_timestamps[0]=0 72 | 73 | run_time = results['runtime_all'] 74 | fps_a.append(len(run_time)/sum(run_time)) 75 | tidx_p1 = 0 76 | pred_bboxes=[] 77 | 78 | for idx in range(gtlen): 79 | # input frame time, i.e., [0, 0.03, 0.06, 0.09, ...] 80 | t = (idx - args.eta)/args.fps 81 | # Can predictor give results? 82 | if ('boxes_eva' in pre_results.keys()) and (str(idx) in pre_results['boxes_eva'].keys()) and pre_results['time'][str(idx)]<=t: 83 | # print('Frame {} use predictor results'.format(idx)) 84 | pred_bboxes.append(pre_results['boxes_eva'][str(idx)]) 85 | continue 86 | else: 87 | # which is the tracker's latest result? 88 | while tidx_p1 < len(tra_timestamps) and tra_timestamps[tidx_p1] <= t: 89 | tidx_p1 += 1 90 | # there exists at least one result for eva, i.e., the init box, 0 91 | 92 | # the latest result given is tidx 93 | tidx = tidx_p1 - 1 94 | 95 | # print('GT time is {:3f}, latest tracker time is {:3f}, matching GT id {:3d} with tracker result'.format(t, tra_timestamps[tidx], idx)) 96 | pred_bboxes.append(tra_results_raw[tidx]) 97 | 98 | if not os.path.isdir(ou_path): 99 | os.makedirs(ou_path) 100 | result_path = join(ou_path, '{}.txt'.format(name_rt)) 101 | with open(result_path, 'w') as f: 102 | for x in pred_bboxes: 103 | f.write(','.join([str(i) for i in x])+'\n') 104 | fps_path = join(ou_path, '{}.txt'.format('Speed')) 105 | with open(fps_path, 'w') as f: 106 | f.write(str(sum(fps_a)/len(fps_a))) 107 | 108 | if __name__ == '__main__': 109 | main() 110 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | export PYTHONPATH=/ocean/projects/cis220061p/bli5/CVPR23/code/PVT_pp:$PYTHONPATH 3 | 4 | # RPN_Mob 5 | python ./tools/train.py --cfg 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' 6 | python ./tools/train.py --cfg 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' 7 | python ./tools/train.py --cfg 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' 8 | -------------------------------------------------------------------------------- /training_dataset/got10k/gen_json.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | import json 3 | from os.path import join, exists 4 | import os 5 | import pandas as pd 6 | from tqdm import tqdm 7 | 8 | dataset_path = 'data' 9 | train_sets = ['GOT-10k_Train_split_01','GOT-10k_Train_split_02','GOT-10k_Train_split_03','GOT-10k_Train_split_04', 10 | 'GOT-10k_Train_split_05','GOT-10k_Train_split_06','GOT-10k_Train_split_07','GOT-10k_Train_split_08', 11 | 'GOT-10k_Train_split_09','GOT-10k_Train_split_10','GOT-10k_Train_split_11','GOT-10k_Train_split_12', 12 | 'GOT-10k_Train_split_13','GOT-10k_Train_split_14','GOT-10k_Train_split_15','GOT-10k_Train_split_16', 13 | 'GOT-10k_Train_split_17','GOT-10k_Train_split_18','GOT-10k_Train_split_19'] 14 | val_set = ['val'] 15 | d_sets = {'videos_val':val_set,'videos_train':train_sets} 16 | 17 | 18 | def parse_and_sched(dl_dir='.'): 19 | js = {} 20 | videos = os.listdir(dataset_path) 21 | for video in tqdm(videos): 22 | if video == 'list.txt': 23 | continue 24 | gt_path = join(dataset_path, video, 'groundtruth.txt') 25 | f = open(gt_path, 'r') 26 | groundtruth = f.readlines() 27 | f.close() 28 | for idx, gt_line in enumerate(groundtruth): 29 | gt_image = gt_line.strip().split(',') 30 | frame = '%08d' % (int(idx+1)) 31 | obj = '%02d' % (int(0)) 32 | bbox = [int(float(gt_image[0])), int(float(gt_image[1])), 33 | int(float(gt_image[0])) + int(float(gt_image[2])), 34 | int(float(gt_image[1])) + int(float(gt_image[3]))] # xmin,ymin,xmax,ymax 35 | 36 | x1 = bbox[0] 37 | y1 = bbox[1] 38 | w = bbox[2] - x1 39 | h = bbox[3] - y1 40 | if x1 < 0 or y1 < 0 or w <= 0 or h <= 0: 41 | break 42 | 43 | if video not in js: 44 | js[video] = {} 45 | if obj not in js[video]: 46 | js[video][obj] = {} 47 | js[video][obj][frame] = bbox 48 | json.dump(js, open('train.json', 'w'), indent=4, sort_keys=True) 49 | 50 | # print(d_set+': All videos downloaded' ) 51 | 52 | 53 | if __name__ == '__main__': 54 | parse_and_sched() -------------------------------------------------------------------------------- /training_dataset/lasot/gen_json.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import unicode_literals 3 | import json 4 | from os.path import join, exists 5 | import os 6 | import pandas as pd 7 | dataset_path = './data' 8 | 9 | def parse_and_sched(dl_dir='.'): 10 | # For each of the two datasets 11 | f = open('./test_id.txt', 'r') 12 | videos = f.readlines() 13 | f.close() 14 | n_videos = len(videos) 15 | js = {} 16 | for idx,video in enumerate(videos): 17 | print('{}/{}'.format(idx,n_videos)) 18 | video = video.strip() 19 | class_name = video.split('-')[0] 20 | # class_path = join(dataset_path, class_name) 21 | gt_path = join(dataset_path, video, 'groundtruth.txt') 22 | f = open(gt_path, 'r') 23 | groundtruth = f.readlines() 24 | f.close() 25 | video = video + '/img' 26 | for idx, gt_line in enumerate(groundtruth): 27 | gt_image = gt_line.strip().split(',') 28 | frame = '%08d' % (int(idx+1)) 29 | obj = '%02d' % (int(0)) 30 | bbox = [int(float(gt_image[0])), int(float(gt_image[1])), 31 | int(float(gt_image[0])) + int(float(gt_image[2])), 32 | int(float(gt_image[1])) + int(float(gt_image[3]))] # xmin,ymin,xmax,ymax 33 | x1 = bbox[0] 34 | y1 = bbox[1] 35 | w = bbox[2] - x1 36 | h = bbox[3] - y1 37 | if x1 < 0 or y1 < 0 or w <= 0 or h <= 0: 38 | break 39 | 40 | if video not in js: 41 | js[video] = {} 42 | if obj not in js[video]: 43 | js[video][obj] = {} 44 | js[video][obj][frame] = bbox 45 | json.dump(js, open('train.json', 'w'), indent=4, sort_keys=True) 46 | js = {} 47 | json.dump(js, open('val.json', 'w'), indent=4, sort_keys=True) 48 | print('done') 49 | 50 | 51 | if __name__ == '__main__': 52 | parse_and_sched() -------------------------------------------------------------------------------- /training_dataset/lasot/gen_txt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | seq_path = 'data' 4 | seq_name = os.listdir(seq_path) 5 | with open('test_id.txt', 'w') as f: 6 | for seq in seq_name: 7 | f.write(seq+'\n') -------------------------------------------------------------------------------- /training_dataset/vid/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import numpy as np 5 | 6 | print('load json (raw vid info), please wait 20 seconds~') 7 | vid = json.load(open('vid.json', 'r')) 8 | 9 | 10 | def check_size(frame_sz, bbox): 11 | min_ratio = 0.1 12 | max_ratio = 0.75 13 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 14 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 15 | return ok 16 | 17 | 18 | def check_borders(frame_sz, bbox): 19 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 20 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 21 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 22 | ((frame_sz[1] - bbox[3]) > dist_from_border) 23 | return ok 24 | 25 | 26 | snippets = dict() 27 | n_snippets = 0 28 | n_videos = 0 29 | for subset in vid: 30 | for video in subset: 31 | n_videos += 1 32 | frames = video['frame'] 33 | id_set = [] 34 | id_frames = [[]] * 60 # at most 60 objects 35 | for f, frame in enumerate(frames): 36 | objs = frame['objs'] 37 | frame_sz = frame['frame_sz'] 38 | for obj in objs: 39 | trackid = obj['trackid'] 40 | occluded = obj['occ'] 41 | bbox = obj['bbox'] 42 | 43 | if trackid not in id_set: 44 | id_set.append(trackid) 45 | id_frames[trackid] = [] 46 | id_frames[trackid].append(f) 47 | if len(id_set) > 0: 48 | snippets[video['base_path']] = dict() 49 | for selected in id_set: 50 | frame_ids = sorted(id_frames[selected]) 51 | sequences = np.split(frame_ids, np.array(np.where(np.diff(frame_ids) > 1)[0]) + 1) 52 | sequences = [s for s in sequences if len(s) > 1] # remove isolated frame. 53 | for seq in sequences: 54 | snippet = dict() 55 | for frame_id in seq: 56 | frame = frames[frame_id] 57 | for obj in frame['objs']: 58 | if obj['trackid'] == selected: 59 | o = obj 60 | continue 61 | snippet[frame['img_path'].split('.')[0]] = o['bbox'] 62 | snippets[video['base_path']]['{:02d}'.format(selected)] = snippet 63 | n_snippets += 1 64 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets)) 65 | 66 | train = {k:v for (k,v) in snippets.items() if 'train' in k} 67 | val = {k:v for (k,v) in snippets.items() if 'val' in k} 68 | 69 | json.dump(train, open('train.json', 'w'), indent=4, sort_keys=True) 70 | json.dump(val, open('val.json', 'w'), indent=4, sort_keys=True) 71 | print('done!') 72 | -------------------------------------------------------------------------------- /training_dataset/vid/parse_vid.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import glob 5 | import xml.etree.ElementTree as ET 6 | 7 | VID_base_path = './ILSVRC2015' 8 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 9 | img_base_path = join(VID_base_path, 'Data/VID/train/') 10 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'}) 11 | 12 | vid = [] 13 | for sub_set in sub_sets: 14 | sub_set_base_path = join(ann_base_path, sub_set) 15 | videos = sorted(listdir(sub_set_base_path)) 16 | s = [] 17 | for vi, video in enumerate(videos): 18 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 19 | v = dict() 20 | v['base_path'] = join(sub_set, video) 21 | v['frame'] = [] 22 | video_base_path = join(sub_set_base_path, video) 23 | xmls = sorted(glob.glob(join(video_base_path, '*.xml'))) 24 | for xml in xmls: 25 | f = dict() 26 | xmltree = ET.parse(xml) 27 | size = xmltree.findall('size')[0] 28 | frame_sz = [int(it.text) for it in size] 29 | objects = xmltree.findall('object') 30 | objs = [] 31 | for object_iter in objects: 32 | trackid = int(object_iter.find('trackid').text) 33 | name = (object_iter.find('name')).text 34 | bndbox = object_iter.find('bndbox') 35 | occluded = int(object_iter.find('occluded').text) 36 | o = dict() 37 | o['c'] = name 38 | o['bbox'] = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 39 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 40 | o['trackid'] = trackid 41 | o['occ'] = occluded 42 | objs.append(o) 43 | f['frame_sz'] = frame_sz 44 | f['img_path'] = xml.split('/')[-1].replace('xml', 'JPEG') 45 | f['objs'] = objs 46 | v['frame'].append(f) 47 | s.append(v) 48 | vid.append(s) 49 | print('save json (raw vid info), please wait 1 min~') 50 | json.dump(vid, open('vid.json', 'w'), indent=4, sort_keys=True) 51 | print('done!') 52 | -------------------------------------------------------------------------------- /vot_iter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/vot_iter/__init__.py -------------------------------------------------------------------------------- /vot_iter/tracker_SiamRPNpp.m: -------------------------------------------------------------------------------- 1 | 2 | % error('Tracker not configured! Please edit the tracker_test.m file.'); % Remove this line after proper configuration 3 | 4 | % The human readable label for the tracker, used to identify the tracker in reports 5 | % If not set, it will be set to the same value as the identifier. 6 | % It does not have to be unique, but it is best that it is. 7 | tracker_label = ['SiamRPNpp']; 8 | 9 | % For Python implementations we have created a handy function that generates the appropritate 10 | % command that will run the python executable and execute the given script that includes your 11 | % tracker implementation. 12 | % 13 | % Please customize the line below by substituting the first argument with the name of the 14 | % script of your tracker (not the .py file but just the name of the script) and also provide the 15 | % path (or multiple paths) where the tracker sources % are found as the elements of the cell 16 | % array (second argument). 17 | setenv('MKL_NUM_THREADS','1'); 18 | pysot_root = 'path/to/pysot'; 19 | track_build_path = 'path/to/track/build'; 20 | tracker_command = generate_python_command('vot_iter.vot_iter', {pysot_root; [track_build_path '/python/lib']}) 21 | 22 | tracker_interpreter = 'python'; 23 | 24 | tracker_linkpath = {track_build_path}; 25 | 26 | % tracker_linkpath = {}; % A cell array of custom library directories used by the tracker executable (optional) 27 | 28 | -------------------------------------------------------------------------------- /vot_iter/vot.py: -------------------------------------------------------------------------------- 1 | """ 2 | \file vot.py 3 | """ 4 | 5 | import sys 6 | import copy 7 | import collections 8 | 9 | try: 10 | import trax 11 | except ImportError: 12 | raise Exception('TraX support not found. Please add trax module to Python path.') 13 | 14 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height']) 15 | Point = collections.namedtuple('Point', ['x', 'y']) 16 | Polygon = collections.namedtuple('Polygon', ['points']) 17 | 18 | class VOT(object): 19 | """ Base class for Python VOT integration """ 20 | def __init__(self, region_format, channels=None): 21 | """ Constructor 22 | 23 | Args: 24 | region_format: Region format options 25 | """ 26 | assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON]) 27 | 28 | if channels is None: 29 | channels = ['color'] 30 | elif channels == 'rgbd': 31 | channels = ['color', 'depth'] 32 | elif channels == 'rgbt': 33 | channels = ['color', 'ir'] 34 | elif channels == 'ir': 35 | channels = ['ir'] 36 | else: 37 | raise Exception('Illegal configuration {}.'.format(channels)) 38 | 39 | self._trax = trax.Server([region_format], [trax.Image.PATH], channels) 40 | 41 | request = self._trax.wait() 42 | assert(request.type == 'initialize') 43 | if isinstance(request.region, trax.Polygon): 44 | self._region = Polygon([Point(x[0], x[1]) for x in request.region]) 45 | else: 46 | self._region = Rectangle(*request.region.bounds()) 47 | self._image = [x.path() for k, x in request.image.items()] 48 | if len(self._image) == 1: 49 | self._image = self._image[0] 50 | 51 | self._trax.status(request.region) 52 | 53 | def region(self): 54 | """ 55 | Send configuration message to the client and receive the initialization 56 | region and the path of the first image 57 | 58 | Returns: 59 | initialization region 60 | """ 61 | 62 | return self._region 63 | 64 | def report(self, region, confidence = None): 65 | """ 66 | Report the tracking results to the client 67 | 68 | Arguments: 69 | region: region for the frame 70 | """ 71 | assert(isinstance(region, Rectangle) or isinstance(region, Polygon)) 72 | if isinstance(region, Polygon): 73 | tregion = trax.Polygon.create([(x.x, x.y) for x in region.points]) 74 | else: 75 | tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height) 76 | properties = {} 77 | if not confidence is None: 78 | properties['confidence'] = confidence 79 | self._trax.status(tregion, properties) 80 | 81 | def frame(self): 82 | """ 83 | Get a frame (image path) from client 84 | 85 | Returns: 86 | absolute path of the image 87 | """ 88 | if hasattr(self, "_image"): 89 | image = self._image 90 | del self._image 91 | return image 92 | 93 | request = self._trax.wait() 94 | 95 | if request.type == 'frame': 96 | image = [x.path() for k, x in request.image.items()] 97 | if len(image) == 1: 98 | return image[0] 99 | return image 100 | else: 101 | return None 102 | 103 | 104 | def quit(self): 105 | if hasattr(self, '_trax'): 106 | self._trax.quit() 107 | 108 | def __del__(self): 109 | self.quit() 110 | 111 | -------------------------------------------------------------------------------- /vot_iter/vot_iter.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import cv2 3 | import torch 4 | import numpy as np 5 | import os 6 | from os.path import join 7 | 8 | from pysot.core.config import cfg 9 | from pysot.models.model_builder import ModelBuilder 10 | from pysot.tracker.tracker_builder import build_tracker 11 | from pysot.utils.bbox import get_axis_aligned_bbox 12 | from pysot.utils.model_load import load_pretrain 13 | from toolkit.datasets import DatasetFactory 14 | from toolkit.utils.region import vot_overlap, vot_float2str 15 | 16 | from . import vot 17 | from .vot import Rectangle, Polygon, Point 18 | 19 | 20 | # modify root 21 | 22 | cfg_root = "path/to/expr" 23 | model_file = join(cfg_root, 'model.pth') 24 | cfg_file = join(cfg_root, 'config.yaml') 25 | 26 | def warmup(model): 27 | for i in range(10): 28 | model.template(torch.FloatTensor(1,3,127,127).cuda()) 29 | 30 | def setup_tracker(): 31 | cfg.merge_from_file(cfg_file) 32 | 33 | model = ModelBuilder() 34 | model = load_pretrain(model, model_file).cuda().eval() 35 | 36 | tracker = build_tracker(model) 37 | warmup(model) 38 | return tracker 39 | 40 | 41 | tracker = setup_tracker() 42 | 43 | handle = vot.VOT("polygon") 44 | region = handle.region() 45 | try: 46 | region = np.array([region[0][0][0], region[0][0][1], region[0][1][0], region[0][1][1], 47 | region[0][2][0], region[0][2][1], region[0][3][0], region[0][3][1]]) 48 | except: 49 | region = np.array(region) 50 | 51 | cx, cy, w, h = get_axis_aligned_bbox(region) 52 | 53 | image_file = handle.frame() 54 | if not image_file: 55 | sys.exit(0) 56 | 57 | im = cv2.imread(image_file) # HxWxC 58 | # init 59 | target_pos, target_sz = np.array([cx, cy]), np.array([w, h]) 60 | gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h] 61 | tracker.init(im, gt_bbox_) 62 | 63 | while True: 64 | img_file = handle.frame() 65 | if not img_file: 66 | break 67 | im = cv2.imread(img_file) 68 | outputs = tracker.track(im) 69 | pred_bbox = outputs['bbox'] 70 | result = Rectangle(*pred_bbox) 71 | score = outputs['best_score'] 72 | if cfg.MASK.MASK: 73 | pred_bbox = outputs['polygon'] 74 | result = Polygon(Point(x[0], x[1]) for x in pred_bbox) 75 | 76 | handle.report(result, score) 77 | --------------------------------------------------------------------------------