├── .gitignore
├── MODEL_ZOO.md
├── README.md
├── convert.sh
├── convert_new.sh
├── convert_psc.sh
├── experiments
├── siammask_r50_l3
│ ├── config.yaml
│ ├── pre_kf_config.yaml
│ ├── pre_lb_config.yaml
│ ├── pre_lbv_config.yaml
│ └── pre_mv_config.yaml
├── siamrpn_mobilev2_l234_dwxcorr
│ ├── config.yaml
│ ├── pre_kf_config.yaml
│ ├── pre_lb_config.yaml
│ ├── pre_lbv_config.yaml
│ └── pre_mv_config.yaml
└── siamrpn_r50_l234_dwxcorr
│ ├── config.yaml
│ ├── pre_kf_config.yaml
│ ├── pre_lb_config.yaml
│ ├── pre_lbv_config.yaml
│ └── pre_mv_config.yaml
├── onboard
├── test_rt.py
└── test_rt_f.py
├── pysot
├── __init__.py
├── __pycache__
│ └── __init__.cpython-38.pyc
├── core
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── config.cpython-38.pyc
│ │ └── xcorr.cpython-38.pyc
│ ├── config.py
│ └── xcorr.py
├── datasets
│ ├── __init__.py
│ ├── anchor_target.py
│ ├── augmentation.py
│ ├── dataset.py
│ └── la_dataset.py
├── models
│ ├── __init__.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── alexnet.py
│ │ ├── mobile_v2.py
│ │ └── resnet_atrous.py
│ ├── centernet
│ │ ├── ddd_utils.py
│ │ ├── decode.py
│ │ ├── image.py
│ │ ├── losses.py
│ │ ├── post_process.py
│ │ └── utils.py
│ ├── head
│ │ ├── __init__.py
│ │ ├── mask.py
│ │ └── rpn.py
│ ├── init_weight.py
│ ├── loss.py
│ ├── model_builder.py
│ ├── neck
│ │ ├── __init__.py
│ │ └── neck.py
│ ├── pred_model_builder.py
│ └── predictor
│ │ ├── __init__.py
│ │ ├── base_predictor.py
│ │ ├── kf.py
│ │ ├── lb_5.py
│ │ ├── lbv_5.py
│ │ └── mv_v16.py
├── tracker
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── base_tracker.cpython-38.pyc
│ │ ├── siammask_tracker.cpython-38.pyc
│ │ ├── siammask_tracker_f.cpython-38.pyc
│ │ ├── siamrpn_tracker.cpython-38.pyc
│ │ ├── siamrpn_tracker_f.cpython-38.pyc
│ │ ├── siamrpnlt_tracker.cpython-38.pyc
│ │ └── tracker_builder.cpython-38.pyc
│ ├── base_tracker.py
│ ├── siammask_tracker.py
│ ├── siammask_tracker_f.py
│ ├── siamrpn_tracker.py
│ ├── siamrpn_tracker_f.py
│ ├── siamrpn_tracker_ntr.py
│ ├── siamrpnlt_tracker.py
│ └── tracker_builder.py
└── utils
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-38.pyc
│ ├── anchor.cpython-38.pyc
│ ├── bbox.cpython-38.pyc
│ └── model_load.cpython-38.pyc
│ ├── anchor.py
│ ├── average_meter.py
│ ├── bbox.py
│ ├── distributed.py
│ ├── image.py
│ ├── img_crop.py
│ ├── log_helper.py
│ ├── lr_scheduler.py
│ ├── misc.py
│ └── model_load.py
├── test_agx_mob.sh
├── test_sim_mob.sh
├── toolkit
├── __init__.py
├── __pycache__
│ └── __init__.cpython-38.pyc
├── datasets
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-38.pyc
│ │ ├── dataset.cpython-38.pyc
│ │ ├── dtb.cpython-38.pyc
│ │ ├── got10k.cpython-38.pyc
│ │ ├── lasot.cpython-38.pyc
│ │ ├── otb.cpython-38.pyc
│ │ ├── realworld.cpython-38.pyc
│ │ ├── uav10fps.cpython-38.pyc
│ │ ├── uav123.cpython-38.pyc
│ │ ├── uav20l.cpython-38.pyc
│ │ ├── uavdark.cpython-38.pyc
│ │ ├── uavdt.cpython-38.pyc
│ │ ├── video.cpython-38.pyc
│ │ └── visdrone.cpython-38.pyc
│ ├── dataset.py
│ ├── dtb.py
│ ├── got10k.py
│ ├── lasot.py
│ ├── nfs.py
│ ├── otb.py
│ ├── realworld.py
│ ├── trackingnet.py
│ ├── uav.py
│ ├── uav10fps.py
│ ├── uav123.py
│ ├── uav20l.py
│ ├── uavdark.py
│ ├── uavdt.py
│ ├── video.py
│ ├── visdrone.py
│ ├── visdrone1.py
│ └── vot.py
├── evaluation
│ ├── __init__.py
│ ├── ar_benchmark.py
│ ├── eao_benchmark.py
│ ├── f1_benchmark.py
│ └── ope_benchmark.py
├── utils
│ ├── __init__.py
│ ├── c_region.pxd
│ ├── misc.py
│ ├── region.pyx
│ ├── src
│ │ ├── buffer.h
│ │ ├── region.c
│ │ └── region.h
│ └── statistics.py
└── visualization
│ ├── __init__.py
│ ├── draw_eao.py
│ ├── draw_f1.py
│ ├── draw_success_precision.py
│ └── draw_utils.py
├── tools
├── demo.py
├── eval.py
├── gen_sim_info.py
├── hp_search.py
├── rt_eva.py
├── rt_eva_new.py
├── rt_eva_pre.py
├── test.py
├── test_flop.py
├── test_rt.py
├── test_rt_f.py
├── test_rt_f_ntr.py
├── test_rt_f_sim.py
├── test_rt_sim.py
└── train.py
├── train.sh
├── training_dataset
├── got10k
│ └── gen_json.py
├── lasot
│ ├── gen_json.py
│ └── gen_txt.py
└── vid
│ ├── gen_json.py
│ └── parse_vid.py
└── vot_iter
├── __init__.py
├── tracker_SiamRPNpp.m
├── vot.py
└── vot_iter.py
/.gitignore:
--------------------------------------------------------------------------------
1 | testing_dataset/
2 | Raw_Results_RPN_Mob/
3 | training_dataset/
4 | .vscode/
5 | *.zip
6 | results_rt/
7 | results_rt_raw/
8 | results_eLAE/
9 | Raw/
10 | models/
11 | __pycache__/
12 | *.py[cod]
--------------------------------------------------------------------------------
/MODEL_ZOO.md:
--------------------------------------------------------------------------------
1 | # PVT++ Model Zoo
2 |
3 | ## Introduction
4 |
5 | This file documents a collection of baselines trained with PVT++. All configurations for these baselines are located in the [`experiments`](experiments) directory. The tables below provide results about inference. Links to the trained models as well as their output are provided. All the results are obtained on the same Nvidia Jetson AGX Xavier platform.
6 |
7 | ## *Online* Visual Tracking
8 |
9 | | Model | DTB70 (mAUC/mDP) | UAVDT (mAUC/mDP) | UAV20L (mAUC/mDP) | UAV123 (mAUC/mDP) | URL |
10 | | :-----------------------: | :-------------------------------: | :-------------------------------: | :--------------------------------: | :--------------------------------: | :----------------------------------------------------------: |
11 | | RPN_mob | 0.298\|0.392 | 0.494\|0.719 | 0.448\|0.619 | 0.472\|0.678 | [RPN_Mob](https://mega.nz/file/8VlQXBIQ#ZbEBQnpMbQLJPQ0KqpALeHCZvxvOzW6QjTxX3hfnXS0) |
12 | | RPN_mob+Motion | 0.385\|0.523 | 0.529\|0.745 | 0.481\|0.647 | 0.537\|0.737 | [RPN_Mob_M](https://mega.nz/file/hFVklIpZ#0M1VJ7C1zmz4NrfwqWVuVMKRVjyEHedqaAVco2UkYX8) |
13 | | RPN_mob+Visual | 0.352\|0.472 | 0.564\|0.799 | 0.488\|0.675 | 0.504\|0.703 | [RPN_Mob_V](https://mega.nz/file/NRdlTTDS#TAcQwgEJmHLghFxFmDCTOv0gu5z57Eo3iiCaw-dRREw) |
14 | | RPN_mob+MV | 0.399\|0.536 | 0.576\|0.807 | 0.508\|0.697 | 0.537\|0.741 | [RPN_Mob_MV](https://mega.nz/file/EVFxSSYB#4TFSJoVELbztvhJX8xkDlqwldmJT6XucHBEy9nINdlM) |
15 |
16 | We also provide the [Raw_results](https://mega.nz/file/tFd02RxC#98PDk3XDhcXo9sZ-seKP5aklT0xC8rvbcUm77xu1Cmo).
17 | These files can also be found at [Google Drive](https://drive.google.com/file/d/1oZjoHGGXqKSC43yKTwn2zwxFQprDXp7L/view?usp=sharing).
18 |
--------------------------------------------------------------------------------
/convert.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH
2 | # \sigma=0
3 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/DTB70/' --tar_root './results_rt/DTB70' --gtroot 'testing_dataset/DTB70'
4 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAVDT/' --tar_root './results_rt/UAVDT' --gtroot 'testing_dataset/UAVDT/anno'
5 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV20L/' --tar_root './results_rt/UAV20L' --gtroot 'testing_dataset/UAV20L/anno'
6 | python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV123/' --tar_root './results_rt/UAV123' --gtroot 'testing_dataset/UAV123/anno'
--------------------------------------------------------------------------------
/convert_new.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH
2 | # DTB70
3 | python tools/rt_eva_new.py --raw_root Raw/DTB70 --tar_root results_eLAE/DTB70 --gtroot testing_dataset/DTB70
4 | # UAVDT
5 | python tools/rt_eva_new.py --raw_root Raw/UAVDT --tar_root results_eLAE/UAVDT --gtroot testing_dataset/UAVDT/anno
6 | # UAV20L
7 | python tools/rt_eva_new.py --raw_root Raw/UAV20L --tar_root results_eLAE/UAV20L --gtroot testing_dataset/UAV20L/anno
8 | # UAV123
9 | python tools/rt_eva_new.py --raw_root Raw/UAV123 --tar_root results_eLAE/UAV123 --gtroot testing_dataset/UAV123/anno
--------------------------------------------------------------------------------
/convert_psc.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH
2 | # PVT_pp2
3 | RAW_ROOT='/ocean/projects/cis220061p/bli5/CVPR23/code/PVT_pp2/output_rt/test/tracking_results'
4 | TAR_ROOT='/ocean/projects/cis220061p/bli5/CVPR23/code/PVT_pp2/rt_eva'
5 | GT_ROOT='/ocean/projects/cis220061p/bli5/CVPR23/data'
6 | # PVT_pp
7 | RAW_ROOT='results_rt_raw'
8 | TAR_ROOT='results_rt'
9 | GT_ROOT='testing_dataset'
10 | # \sigma=0
11 | # python3 tools/rt_eva.py --raw_root "${RAW_ROOT}/DTB/" --tar_root "${TAR_ROOT}/DTB70/" --gtroot "${GT_ROOT}/DTB70"
12 | python3 tools/rt_eva_pre.py --raw_root "${RAW_ROOT}/RealWorld/" --tar_root "${TAR_ROOT}/RealWorld/" --gtroot "${GT_ROOT}/real_world/anno"
13 | # python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV20L/' --tar_root './results_rt/UAV20L' --gtroot 'testing_dataset/UAV20L/anno'
14 | # python tools/rt_eva_pre.py --raw_root './results_rt_raw/UAV123/' --tar_root './results_rt/UAV123' --gtroot 'testing_dataset/UAV123/anno'
--------------------------------------------------------------------------------
/experiments/siammask_r50_l3/config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [0, 1, 2, 3]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [1024]
13 | out_channels: [256]
14 |
15 | RPN:
16 | TYPE: 'DepthwiseRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: 256
20 | out_channels: 256
21 |
22 | MASK:
23 | MASK: True
24 | TYPE: 'MaskCorr'
25 | KWARGS:
26 | in_channels: 256
27 | hidden: 256
28 | out_channels: 3969
29 |
30 | REFINE:
31 | REFINE: True
32 | TYPE: 'Refine'
33 |
34 | ANCHOR:
35 | STRIDE: 8
36 | RATIOS: [0.33, 0.5, 1, 2, 3]
37 | SCALES: [8]
38 | ANCHOR_NUM: 5
39 |
40 | TRACK:
41 | TYPE: 'SiamMaskTracker'
42 | PENALTY_K: 0.10
43 | WINDOW_INFLUENCE: 0.41
44 | LR: 0.32
45 | EXEMPLAR_SIZE: 127
46 | INSTANCE_SIZE: 255
47 | BASE_SIZE: 8
48 | CONTEXT_AMOUNT: 0.5
49 | MASK_THERSHOLD: 0.15
50 |
--------------------------------------------------------------------------------
/experiments/siammask_r50_l3/pre_kf_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [0, 1, 2, 3]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [1024]
13 | out_channels: [256]
14 |
15 | RPN:
16 | TYPE: 'DepthwiseRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: 256
20 | out_channels: 256
21 |
22 | MASK:
23 | MASK: True
24 | TYPE: 'MaskCorr'
25 | KWARGS:
26 | in_channels: 256
27 | hidden: 256
28 | out_channels: 3969
29 |
30 | REFINE:
31 | REFINE: True
32 | TYPE: 'Refine'
33 |
34 | ANCHOR:
35 | STRIDE: 8
36 | RATIOS: [0.33, 0.5, 1, 2, 3]
37 | SCALES: [8]
38 | ANCHOR_NUM: 5
39 |
40 | TRACK:
41 | TYPE: 'SiamMaskTracker'
42 | PENALTY_K: 0.10
43 | WINDOW_INFLUENCE: 0.41
44 | LR: 0.32
45 | EXEMPLAR_SIZE: 127
46 | INSTANCE_SIZE: 255
47 | BASE_SIZE: 8
48 | CONTEXT_AMOUNT: 0.5
49 | MASK_THERSHOLD: 0.15
50 |
51 | # Predictive fine-tuning settings
52 | DATASET:
53 | NAMES: ('VID',)
54 | TRAIN:
55 | LATENCY: 2 # number of frames that will be skipped
56 | RESUME: '../pretrained/Mask_R50.pth' # Original trained tracking model
57 | NUM_FRAME: 3
58 | BATCH_SIZE: 4
59 | PRED:
60 | TYPE: 'KF'
61 |
62 |
63 |
--------------------------------------------------------------------------------
/experiments/siammask_r50_l3/pre_lb_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [0, 1, 2, 3]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [1024]
13 | out_channels: [256]
14 |
15 | RPN:
16 | TYPE: 'DepthwiseRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: 256
20 | out_channels: 256
21 |
22 | MASK:
23 | MASK: True
24 | TYPE: 'MaskCorr'
25 | KWARGS:
26 | in_channels: 256
27 | hidden: 256
28 | out_channels: 3969
29 |
30 | # REFINE:
31 | # REFINE: True
32 | # TYPE: 'Refine'
33 |
34 | ANCHOR:
35 | STRIDE: 8
36 | RATIOS: [0.33, 0.5, 1, 2, 3]
37 | SCALES: [8]
38 | ANCHOR_NUM: 5
39 |
40 | TRACK:
41 | TYPE: 'SiamMaskTracker'
42 | PENALTY_K: 0.10
43 | WINDOW_INFLUENCE: 0.41
44 | LR: 0.32
45 | EXEMPLAR_SIZE: 127
46 | INSTANCE_SIZE: 255
47 | BASE_SIZE: 8
48 | CONTEXT_AMOUNT: 0.5
49 | MASK_THERSHOLD: 0.15
50 |
51 | # Predictive fine-tuning settings
52 | DATASET:
53 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
54 | VIDEOS_PER_EPOCH: 10000 # 10000
55 | USE_IMG: False
56 |
57 | TRAIN:
58 | EPOCH: 100
59 | LATENCY: 3 # number of frames that will be skipped
60 | LOG_DIR: './logs/Mask_LB5'
61 | SNAPSHOT_DIR: './snapshot/Mask_LB5'
62 | JITTER: 2 # jitter for input latency
63 | PRE_TARGET: 6 # target of prediction
64 | RESUME: 'pretrained/Mask_R50.pth' # Original trained tracking model
65 | # RESUME: './snapshot/checkpoint_e45_l6_vid.pth'
66 | NUM_FRAME: 3
67 | BATCH_SIZE: 128
68 | LR: # for learning rate scheduler
69 | PRED_LR: 0.03 #0.1
70 | TYPE: 'multi-step'
71 | KWARGS:
72 | start_lr: 0.03
73 | steps: [15, 40, 30, 50, 80]
74 | mult: 0.5
75 | epochs: 100
76 | LR_WARMUP:
77 | WARMUP: False
78 | NUM_WORKERS: 24
79 | # TRAIN.LR_WARMUP.WARMUP = True
80 | # Predictor setting
81 | PRED:
82 | MODE: 'A+B'
83 | TRAIN: True
84 | TYPE: 'LB_v5'
85 | INPUT_RATIO: 0.0
86 | KWARGS:
87 | hidden_1: 64
88 | hidden_2: 32
89 | hidden_3: 32
90 | num_input: 3
91 | num_output: 6
92 |
--------------------------------------------------------------------------------
/experiments/siammask_r50_l3/pre_lbv_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [0, 1, 2, 3]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [1024]
13 | out_channels: [256]
14 |
15 | RPN:
16 | TYPE: 'DepthwiseRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: 256
20 | out_channels: 256
21 |
22 | MASK:
23 | MASK: True
24 | TYPE: 'MaskCorr'
25 | KWARGS:
26 | in_channels: 256
27 | hidden: 256
28 | out_channels: 3969
29 |
30 | # REFINE:
31 | # REFINE: True
32 | # TYPE: 'Refine'
33 |
34 | ANCHOR:
35 | STRIDE: 8
36 | RATIOS: [0.33, 0.5, 1, 2, 3]
37 | SCALES: [8]
38 | ANCHOR_NUM: 5
39 |
40 | TRACK:
41 | TYPE: 'SiamMaskTracker'
42 | PENALTY_K: 0.10
43 | WINDOW_INFLUENCE: 0.41
44 | LR: 0.32
45 | EXEMPLAR_SIZE: 127
46 | INSTANCE_SIZE: 255
47 | BASE_SIZE: 8
48 | CONTEXT_AMOUNT: 0.5
49 | MASK_THERSHOLD: 0.15
50 |
51 | # Predictive fine-tuning settings
52 | DATASET:
53 | NEG: 0.0
54 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
55 | USE_IMG: True
56 | SEARCH:
57 | SHIFT: 0
58 | SCALE: 0.0
59 | VIDEOS_PER_EPOCH: 10000 # 10000
60 |
61 | TRAIN:
62 | EPOCH: 240
63 | TRACKER_EPOCH: 20
64 | LOG_DIR: './logs/Mask_lbv5'
65 | SNAPSHOT_DIR: './snapshot/Mask_lbv5'
66 | LATENCY: 3 # number of frames that will be skipped
67 | JITTER: 2 # jitter for input latency
68 | PRE_TARGET: 6 # target of prediction
69 | RESUME: 'pretrained/Mask_R50.pth' # Original trained tracking model
70 | NUM_FRAME: 3
71 | BATCH_SIZE: 128
72 | FIX_T: False
73 | BASE_LR: 0.00001
74 | LR: # for learning rate scheduler
75 | PRED_LR: 0.01 # 0.1
76 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR
77 | NECK_LR: 10 # Times of BASE_LR
78 | TYPE: 'multi-step'
79 | KWARGS:
80 | start_lr: 0.01
81 | steps: [200]
82 | mult: 0.1
83 | epochs: 240
84 | LR_WARMUP:
85 | WARMUP: False
86 | NUM_WORKERS: 24
87 | # TRAIN.LR_WARMUP.WARMUP = True
88 | # Predictor setting
89 | PRED:
90 | MODE: 'AB'
91 | TRAIN: True
92 | TYPE: 'LBv_v5'
93 | M_WEIGHT: 0.1
94 | V_WEIGHT: 0.1
95 | MV_WEIGHT: 1.0
96 | KWARGS:
97 | dwconv_k: 3
98 | dwconv_id: 256 # last layer channel
99 | dwconv_hd: 64
100 | hidden_2: 32
101 | hidden_3: 32
102 | num_input: 3
103 | num_output: 6
104 |
--------------------------------------------------------------------------------
/experiments/siammask_r50_l3/pre_mv_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [0, 1, 2, 3]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [1024]
13 | out_channels: [256]
14 |
15 | RPN:
16 | TYPE: 'DepthwiseRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: 256
20 | out_channels: 256
21 |
22 | MASK:
23 | MASK: True
24 | TYPE: 'MaskCorr'
25 | KWARGS:
26 | in_channels: 256
27 | hidden: 256
28 | out_channels: 3969
29 |
30 | # REFINE:
31 | # REFINE: True
32 | # TYPE: 'Refine'
33 |
34 | ANCHOR:
35 | STRIDE: 8
36 | RATIOS: [0.33, 0.5, 1, 2, 3]
37 | SCALES: [8]
38 | ANCHOR_NUM: 5
39 |
40 | TRACK:
41 | TYPE: 'SiamMaskTracker'
42 | PENALTY_K: 0.10
43 | WINDOW_INFLUENCE: 0.41
44 | LR: 0.32
45 | EXEMPLAR_SIZE: 127
46 | INSTANCE_SIZE: 255
47 | BASE_SIZE: 8
48 | CONTEXT_AMOUNT: 0.5
49 | MASK_THERSHOLD: 0.15
50 |
51 | # Predictive fine-tuning settings
52 | DATASET:
53 | NEG: 0.0
54 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
55 | USE_IMG: True
56 | SEARCH:
57 | SHIFT: 0
58 | SCALE: 0.0
59 | VIDEOS_PER_EPOCH: 10000 # 10000
60 |
61 | TRAIN:
62 | EPOCH: 300
63 | TRACKER_EPOCH: 10
64 | LOG_DIR: './logs/Mask_mv16'
65 | SNAPSHOT_DIR: './snapshot/Mask_mv16'
66 | LATENCY: 3 # number of frames that will be skipped
67 | JITTER: 2 # jitter for input latency
68 | PRE_TARGET: 6 # target of prediction
69 | RESUME: 'pretrained/Mask_R50.pth' # Original trained tracking model
70 | NUM_FRAME: 3
71 | BATCH_SIZE: 128
72 | FIX_T: False
73 | BASE_LR: 0.00001
74 | LR: # for learning rate scheduler
75 | PRED_LR: 0.004 # 0.1
76 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR
77 | NECK_LR: 10 # Times of BASE_LR
78 | TYPE: 'multi-step'
79 | KWARGS:
80 | start_lr: 0.004
81 | steps: [200]
82 | mult: 0.1
83 | epochs: 300
84 | LR_WARMUP:
85 | WARMUP: False
86 | NUM_WORKERS: 24
87 | # TRAIN.LR_WARMUP.WARMUP = True
88 | # Predictor setting
89 | PRED:
90 | MODE: 'AB'
91 | TRAIN: True
92 | TYPE: 'MV_v16'
93 | M_WEIGHT: 0.1
94 | V_WEIGHT: 0.1
95 | MV_WEIGHT: 1.0
96 | KWARGS:
97 | dwconv_k: 3
98 | dwconv_id: 256 # last layer channel
99 | dwconv_hd: 64
100 | hidden_1: 64
101 | hidden_2: 32
102 | hidden_3: 32
103 | num_input: 3
104 | num_output: 6
105 |
--------------------------------------------------------------------------------
/experiments/siamrpn_mobilev2_l234_dwxcorr/config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "mobilenetv2"
5 | KWARGS:
6 | used_layers: [3, 5, 7]
7 | width_mult: 1.4
8 |
9 | ADJUST:
10 | ADJUST: true
11 | TYPE: "AdjustAllLayer"
12 | KWARGS:
13 | in_channels: [44, 134, 448]
14 | out_channels: [256, 256, 256]
15 |
16 | RPN:
17 | TYPE: 'MultiRPN'
18 | KWARGS:
19 | anchor_num: 5
20 | in_channels: [256, 256, 256]
21 | weighted: False
22 |
23 | MASK:
24 | MASK: False
25 |
26 | ANCHOR:
27 | STRIDE: 8
28 | RATIOS: [0.33, 0.5, 1, 2, 3]
29 | SCALES: [8]
30 | ANCHOR_NUM: 5
31 |
32 | TRACK:
33 | TYPE: 'SiamRPNTracker'
34 | PENALTY_K: 0.04
35 | WINDOW_INFLUENCE: 0.4
36 | LR: 0.5
37 | EXEMPLAR_SIZE: 127
38 | INSTANCE_SIZE: 255
39 | BASE_SIZE: 8
40 | CONTEXT_AMOUNT: 0.5
41 |
--------------------------------------------------------------------------------
/experiments/siamrpn_mobilev2_l234_dwxcorr/pre_kf_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "mobilenetv2"
5 | KWARGS:
6 | used_layers: [3, 5, 7]
7 | width_mult: 1.4
8 |
9 | ADJUST:
10 | ADJUST: true
11 | TYPE: "AdjustAllLayer"
12 | KWARGS:
13 | in_channels: [44, 134, 448]
14 | out_channels: [256, 256, 256]
15 |
16 | RPN:
17 | TYPE: 'MultiRPN'
18 | KWARGS:
19 | anchor_num: 5
20 | in_channels: [256, 256, 256]
21 | weighted: False
22 |
23 | MASK:
24 | MASK: False
25 |
26 | ANCHOR:
27 | STRIDE: 8
28 | RATIOS: [0.33, 0.5, 1, 2, 3]
29 | SCALES: [8]
30 | ANCHOR_NUM: 5
31 |
32 | TRACK:
33 | TYPE: 'SiamRPNTracker'
34 | PENALTY_K: 0.04
35 | WINDOW_INFLUENCE: 0.4
36 | LR: 0.5
37 | EXEMPLAR_SIZE: 127
38 | INSTANCE_SIZE: 255
39 | BASE_SIZE: 8
40 | CONTEXT_AMOUNT: 0.5
41 |
42 | # Predictive fine-tuning settings
43 | DATASET:
44 | NAMES: ('VID',)
45 | TRAIN:
46 | LATENCY: 2 # number of frames that will be skipped
47 | RESUME: '../pretrained/RPN_Mob.pth' # Original trained tracking model
48 | NUM_FRAME: 3
49 | BATCH_SIZE: 4
50 | PRED:
51 | TYPE: 'KF'
52 | MODE: 'A+B'
53 |
54 |
55 |
--------------------------------------------------------------------------------
/experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "mobilenetv2"
5 | KWARGS:
6 | used_layers: [3, 5, 7]
7 | width_mult: 1.4
8 |
9 | ADJUST:
10 | ADJUST: true
11 | TYPE: "AdjustAllLayer"
12 | KWARGS:
13 | in_channels: [44, 134, 448]
14 | out_channels: [256, 256, 256]
15 |
16 | RPN:
17 | TYPE: 'MultiRPN'
18 | KWARGS:
19 | anchor_num: 5
20 | in_channels: [256, 256, 256]
21 | weighted: False
22 |
23 | MASK:
24 | MASK: false
25 |
26 | ANCHOR:
27 | STRIDE: 8
28 | RATIOS: [0.33, 0.5, 1, 2, 3]
29 | SCALES: [8]
30 | ANCHOR_NUM: 5
31 |
32 | TRACK:
33 | TYPE: 'SiamRPNTracker'
34 | PENALTY_K: 0.04
35 | WINDOW_INFLUENCE: 0.4
36 | LR: 0.5
37 | EXEMPLAR_SIZE: 127
38 | INSTANCE_SIZE: 255
39 | BASE_SIZE: 8
40 | CONTEXT_AMOUNT: 0.5
41 |
42 | # Predictive fine-tuning settings
43 | DATASET:
44 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
45 | VIDEOS_PER_EPOCH: 10000 # 10000
46 | USE_IMG: False
47 |
48 | TRAIN:
49 | EPOCH: 100
50 | LOG_DIR: './logs/RPN_Mob_LB5'
51 | SNAPSHOT_DIR: './snapshot/RPN_Mob_LB5'
52 | LATENCY: 3 # number of frames that will be skipped
53 | JITTER: 1 # jitter for input latency
54 | PRE_TARGET: 3 # target of prediction
55 | RESUME: 'pretrained/RPN_Mob.pth' # Original trained tracking model
56 | # RESUME: './snapshot/checkpoint_e45_l6_vid.pth'
57 | NUM_FRAME: 3
58 | BATCH_SIZE: 128
59 | LR: # for learning rate scheduler
60 | PRED_LR: 0.01 #0.1
61 | TYPE: 'multi-step'
62 | KWARGS:
63 | start_lr: 0.01
64 | steps: [15, 40, 30, 50, 80]
65 | mult: 0.1
66 | epochs: 100
67 | LR_WARMUP:
68 | WARMUP: False
69 | NUM_WORKERS: 24
70 | # TRAIN.LR_WARMUP.WARMUP = True
71 | # Predictor setting
72 | PRED:
73 | MODE: 'A+B'
74 | TRAIN: True
75 | TYPE: 'LB_v5'
76 | INPUT_RATIO: 0.0
77 | KWARGS:
78 | hidden_1: 64
79 | hidden_2: 32
80 | hidden_3: 32
81 | num_input: 3
82 | num_output: 3
83 |
--------------------------------------------------------------------------------
/experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "mobilenetv2"
5 | KWARGS:
6 | used_layers: [3, 5, 7]
7 | width_mult: 1.4
8 |
9 | ADJUST:
10 | ADJUST: true
11 | TYPE: "AdjustAllLayer"
12 | KWARGS:
13 | in_channels: [44, 134, 448]
14 | out_channels: [256, 256, 256]
15 |
16 | RPN:
17 | TYPE: 'MultiRPN'
18 | KWARGS:
19 | anchor_num: 5
20 | in_channels: [256, 256, 256]
21 | weighted: False
22 |
23 | MASK:
24 | MASK: false
25 |
26 | ANCHOR:
27 | STRIDE: 8
28 | RATIOS: [0.33, 0.5, 1, 2, 3]
29 | SCALES: [8]
30 | ANCHOR_NUM: 5
31 |
32 | TRACK:
33 | TYPE: 'SiamRPNTracker'
34 | PENALTY_K: 0.04
35 | WINDOW_INFLUENCE: 0.4
36 | LR: 0.5
37 | EXEMPLAR_SIZE: 127
38 | INSTANCE_SIZE: 255
39 | BASE_SIZE: 8
40 | CONTEXT_AMOUNT: 0.5
41 |
42 | # Predictive fine-tuning settings
43 | DATASET:
44 | NEG: 0.0
45 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
46 | USE_IMG: True
47 | SEARCH:
48 | SHIFT: 0
49 | SCALE: 0.0
50 | VIDEOS_PER_EPOCH: 10000 # 10000
51 |
52 | TRAIN:
53 | EPOCH: 210
54 | TRACKER_EPOCH: 20
55 | LOG_DIR: './logs/RPN_Mob_lbv5'
56 | SNAPSHOT_DIR: './snapshot/RPN_Mob_lbv5'
57 | LATENCY: 3 # number of frames that will be skipped
58 | JITTER: 1 # jitter for input latency
59 | PRE_TARGET: 3 # target of prediction
60 | RESUME: 'pretrained/RPN_Mob.pth' # Original trained tracking model
61 | NUM_FRAME: 3
62 | BATCH_SIZE: 128
63 | FIX_T: False
64 | BASE_LR: 0.00001
65 | LR: # for learning rate scheduler
66 | PRED_LR: 0.0035 # 0.1
67 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR
68 | NECK_LR: 10 # Times of BASE_LR
69 | TYPE: 'multi-step'
70 | KWARGS:
71 | start_lr: 0.0035
72 | steps: [200]
73 | mult: 0.1
74 | epochs: 210
75 | LR_WARMUP:
76 | WARMUP: False
77 | NUM_WORKERS: 24
78 | # TRAIN.LR_WARMUP.WARMUP = True
79 | # Predictor setting
80 | PRED:
81 | MODE: 'AB'
82 | TRAIN: True
83 | TYPE: 'LBv_v5'
84 | M_WEIGHT: 0.0
85 | V_WEIGHT: 1.0
86 | MV_WEIGHT: 0.0
87 | KWARGS:
88 | dwconv_k: 3
89 | dwconv_id: 256 # last layer channel
90 | dwconv_hd: 64
91 | hidden_2: 32
92 | hidden_3: 32
93 | num_input: 3
94 | num_output: 3
95 |
--------------------------------------------------------------------------------
/experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_mobilev2_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "mobilenetv2"
5 | KWARGS:
6 | used_layers: [3, 5, 7]
7 | width_mult: 1.4
8 |
9 | ADJUST:
10 | ADJUST: true
11 | TYPE: "AdjustAllLayer"
12 | KWARGS:
13 | in_channels: [44, 134, 448]
14 | out_channels: [256, 256, 256]
15 |
16 | RPN:
17 | TYPE: 'MultiRPN'
18 | KWARGS:
19 | anchor_num: 5
20 | in_channels: [256, 256, 256]
21 | weighted: False
22 |
23 | MASK:
24 | MASK: false
25 |
26 | ANCHOR:
27 | STRIDE: 8
28 | RATIOS: [0.33, 0.5, 1, 2, 3]
29 | SCALES: [8]
30 | ANCHOR_NUM: 5
31 |
32 | TRACK:
33 | TYPE: 'SiamRPNTracker'
34 | PENALTY_K: 0.04
35 | WINDOW_INFLUENCE: 0.4
36 | LR: 0.5
37 | EXEMPLAR_SIZE: 127
38 | INSTANCE_SIZE: 255
39 | BASE_SIZE: 8
40 | CONTEXT_AMOUNT: 0.5
41 |
42 | # Predictive fine-tuning settings
43 | DATASET:
44 | NEG: 0.0
45 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
46 | USE_IMG: True
47 | SEARCH:
48 | SHIFT: 0
49 | SCALE: 0.0
50 | VIDEOS_PER_EPOCH: 10000 # 10000
51 |
52 | TRAIN:
53 | EPOCH: 280
54 | TRACKER_EPOCH: 20
55 | LOG_DIR: './logs/RPN_Mob_mv16'
56 | SNAPSHOT_DIR: './snapshot/RPN_Mob_mv16'
57 | LATENCY: 3 # number of frames that will be skipped
58 | JITTER: 1 # jitter for input latency
59 | PRE_TARGET: 3 # target of prediction
60 | RESUME: 'pretrained/RPN_Mob.pth' # Original trained tracking model
61 | NUM_FRAME: 3
62 | BATCH_SIZE: 128
63 | FIX_T: False
64 | BASE_LR: 0.00001
65 | LR: # for learning rate scheduler
66 | PRED_LR: 0.01 # 0.1
67 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR
68 | NECK_LR: 10 # Times of BASE_LR
69 | TYPE: 'multi-step'
70 | KWARGS:
71 | start_lr: 0.01
72 | steps: [200]
73 | mult: 0.1
74 | epochs: 280
75 | LR_WARMUP:
76 | WARMUP: False
77 | NUM_WORKERS: 24
78 | # TRAIN.LR_WARMUP.WARMUP = True
79 | # Predictor setting
80 | PRED:
81 | MODE: 'AB'
82 | TRAIN: True
83 | TYPE: 'MV_v16'
84 | M_WEIGHT: 0.1
85 | V_WEIGHT: 0.1
86 | MV_WEIGHT: 1.0
87 | KWARGS:
88 | dwconv_k: 3
89 | dwconv_id: 256 # last layer channel
90 | dwconv_hd: 64
91 | hidden_1: 64
92 | hidden_2: 32
93 | hidden_3: 32
94 | num_input: 3
95 | num_output: 3
96 |
--------------------------------------------------------------------------------
/experiments/siamrpn_r50_l234_dwxcorr/config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [2, 3, 4]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [512, 1024, 2048]
13 | out_channels: [256, 256, 256]
14 |
15 | RPN:
16 | TYPE: 'MultiRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: [256, 256, 256]
20 | weighted: true
21 |
22 | MASK:
23 | MASK: false
24 |
25 | ANCHOR:
26 | STRIDE: 8
27 | RATIOS: [0.33, 0.5, 1, 2, 3]
28 | SCALES: [8]
29 | ANCHOR_NUM: 5
30 |
31 | TRACK:
32 | TYPE: 'SiamRPNTracker'
33 | PENALTY_K: 0.05
34 | WINDOW_INFLUENCE: 0.42
35 | LR: 0.38
36 | EXEMPLAR_SIZE: 127
37 | INSTANCE_SIZE: 255
38 | BASE_SIZE: 8
39 | CONTEXT_AMOUNT: 0.5
40 |
41 | TRAIN:
42 | BATCH_SIZE: 4
43 |
--------------------------------------------------------------------------------
/experiments/siamrpn_r50_l234_dwxcorr/pre_kf_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [2, 3, 4]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [512, 1024, 2048]
13 | out_channels: [256, 256, 256]
14 |
15 | RPN:
16 | TYPE: 'MultiRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: [256, 256, 256]
20 | weighted: true
21 |
22 | MASK:
23 | MASK: false
24 |
25 | ANCHOR:
26 | STRIDE: 8
27 | RATIOS: [0.33, 0.5, 1, 2, 3]
28 | SCALES: [8]
29 | ANCHOR_NUM: 5
30 |
31 | TRACK:
32 | TYPE: 'SiamRPNTracker'
33 | PENALTY_K: 0.05
34 | WINDOW_INFLUENCE: 0.42
35 | LR: 0.38
36 | EXEMPLAR_SIZE: 127
37 | INSTANCE_SIZE: 255
38 | BASE_SIZE: 8
39 | CONTEXT_AMOUNT: 0.5
40 |
41 | # Predictive fine-tuning settings
42 | DATASET:
43 | NAMES: ('VID',)
44 | TRAIN:
45 | LATENCY: 2 # number of frames that will be skipped
46 | RESUME: '../pretrained/RPN_R50.model' # Original trained tracking model
47 | NUM_FRAME: 3
48 | BATCH_SIZE: 4
49 | PRED:
50 | TYPE: 'KF'
51 |
52 |
53 |
--------------------------------------------------------------------------------
/experiments/siamrpn_r50_l234_dwxcorr/pre_lb_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [2, 3, 4]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [512, 1024, 2048]
13 | out_channels: [256, 256, 256]
14 |
15 | RPN:
16 | TYPE: 'MultiRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: [256, 256, 256]
20 | weighted: true
21 |
22 | MASK:
23 | MASK: false
24 |
25 | ANCHOR:
26 | STRIDE: 8
27 | RATIOS: [0.33, 0.5, 1, 2, 3]
28 | SCALES: [8]
29 | ANCHOR_NUM: 5
30 |
31 | TRACK:
32 | TYPE: 'SiamRPNTracker'
33 | PENALTY_K: 0.05
34 | WINDOW_INFLUENCE: 0.42
35 | LR: 0.38
36 | EXEMPLAR_SIZE: 127
37 | INSTANCE_SIZE: 255
38 | BASE_SIZE: 8
39 | CONTEXT_AMOUNT: 0.5
40 |
41 | # Predictive fine-tuning settings
42 | DATASET:
43 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
44 | VIDEOS_PER_EPOCH: 10000 # 10000
45 | USE_IMG: False
46 |
47 | TRAIN:
48 | EPOCH: 50
49 | LOG_DIR: './logs/RPN_Res_lb5'
50 | SNAPSHOT_DIR: './snapshot/RPN_Res_lb5'
51 | LATENCY: 6 # number of frames that will be skipped
52 | JITTER: 2 # jitter for input latency
53 | PRE_TARGET: 12 # target of prediction
54 | RESUME: 'pretrained/RPN_R50.model' # Original trained tracking model
55 | NUM_FRAME: 3
56 | BATCH_SIZE: 128
57 | FIX_T: True
58 | BASE_LR: 0.0
59 | LR: # for learning rate scheduler
60 | PRED_LR: 0.03 #0.1
61 | TYPE: 'multi-step'
62 | KWARGS:
63 | start_lr: 0.03
64 | steps: [20, 40, 50]
65 | mult: 0.1
66 | epochs: 60
67 | LR_WARMUP:
68 | WARMUP: False
69 | NUM_WORKERS: 24
70 | # TRAIN.LR_WARMUP.WARMUP = True
71 | # Predictor setting
72 | PRED:
73 | MODE: 'A+B'
74 | TRAIN: True
75 | TYPE: 'LB_v5'
76 | INPUT_RATIO: 0.0
77 | KWARGS:
78 | hidden_1: 64
79 | hidden_2: 32
80 | hidden_3: 32
81 | num_input: 3
82 | num_output: 12
83 |
--------------------------------------------------------------------------------
/experiments/siamrpn_r50_l234_dwxcorr/pre_lbv_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [2, 3, 4]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [512, 1024, 2048]
13 | out_channels: [256, 256, 256]
14 |
15 | RPN:
16 | TYPE: 'MultiRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: [256, 256, 256]
20 | weighted: true
21 |
22 | MASK:
23 | MASK: false
24 |
25 | ANCHOR:
26 | STRIDE: 8
27 | RATIOS: [0.33, 0.5, 1, 2, 3]
28 | SCALES: [8]
29 | ANCHOR_NUM: 5
30 |
31 | TRACK:
32 | TYPE: 'SiamRPNTracker'
33 | PENALTY_K: 0.05
34 | WINDOW_INFLUENCE: 0.42
35 | LR: 0.38
36 | EXEMPLAR_SIZE: 127
37 | INSTANCE_SIZE: 255
38 | BASE_SIZE: 8
39 | CONTEXT_AMOUNT: 0.5
40 |
41 | # Predictive fine-tuning settings
42 | DATASET:
43 | NEG: 0.0
44 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
45 | USE_IMG: True
46 | SEARCH:
47 | SHIFT: 0
48 | SCALE: 0.0
49 | VIDEOS_PER_EPOCH: 10000 # 10000
50 |
51 | TRAIN:
52 | EPOCH: 150
53 | TRACKER_EPOCH: 20
54 | LOG_DIR: './logs/RPN_Res_lbv5'
55 | SNAPSHOT_DIR: './snapshot/RPN_Res_lbv5'
56 | LATENCY: 6 # number of frames that will be skipped
57 | JITTER: 2 # jitter for input latency
58 | PRE_TARGET: 12 # target of prediction
59 | RESUME: 'pretrained/RPN_R50.model' # Original trained tracking model
60 | NUM_FRAME: 3
61 | BATCH_SIZE: 64
62 | FIX_T: False
63 | BASE_LR: 0.00001
64 | LR: # for learning rate scheduler
65 | PRED_LR: 0.003 # 0.1
66 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR
67 | NECK_LR: 10 # Times of BASE_LR
68 | TYPE: 'multi-step'
69 | KWARGS:
70 | start_lr: 0.003
71 | steps: [100]
72 | mult: 0.1
73 | epochs: 150
74 | LR_WARMUP:
75 | WARMUP: False
76 | NUM_WORKERS: 24
77 | # TRAIN.LR_WARMUP.WARMUP = True
78 | # Predictor setting
79 | PRED:
80 | MODE: 'AB'
81 | TRAIN: True
82 | TYPE: 'LBv_v5'
83 | M_WEIGHT: 0.0
84 | V_WEIGHT: 1.0
85 | MV_WEIGHT: 0.0
86 | KWARGS:
87 | dwconv_k: 3
88 | dwconv_id: 256 # last layer channel
89 | dwconv_hd: 64
90 | hidden_2: 32
91 | hidden_3: 32
92 | num_input: 3
93 | num_output: 12
94 |
--------------------------------------------------------------------------------
/experiments/siamrpn_r50_l234_dwxcorr/pre_mv_config.yaml:
--------------------------------------------------------------------------------
1 | META_ARC: "siamrpn_r50_l234_dwxcorr"
2 |
3 | BACKBONE:
4 | TYPE: "resnet50"
5 | KWARGS:
6 | used_layers: [2, 3, 4]
7 |
8 | ADJUST:
9 | ADJUST: true
10 | TYPE: "AdjustAllLayer"
11 | KWARGS:
12 | in_channels: [512, 1024, 2048]
13 | out_channels: [256, 256, 256]
14 |
15 | RPN:
16 | TYPE: 'MultiRPN'
17 | KWARGS:
18 | anchor_num: 5
19 | in_channels: [256, 256, 256]
20 | weighted: true
21 |
22 | MASK:
23 | MASK: false
24 |
25 | ANCHOR:
26 | STRIDE: 8
27 | RATIOS: [0.33, 0.5, 1, 2, 3]
28 | SCALES: [8]
29 | ANCHOR_NUM: 5
30 |
31 | TRACK:
32 | TYPE: 'SiamRPNTracker'
33 | PENALTY_K: 0.05
34 | WINDOW_INFLUENCE: 0.42
35 | LR: 0.38
36 | EXEMPLAR_SIZE: 127
37 | INSTANCE_SIZE: 255
38 | BASE_SIZE: 8
39 | CONTEXT_AMOUNT: 0.5
40 |
41 | # Predictive fine-tuning settings
42 | DATASET:
43 | NEG: 0.0
44 | NAMES: ('VID', 'LaSOT', 'GOT') #('VID','LaSOT')
45 | USE_IMG: True
46 | SEARCH:
47 | SHIFT: 0
48 | SCALE: 0.0
49 | VIDEOS_PER_EPOCH: 10000 # 10000
50 |
51 | TRAIN:
52 | EPOCH: 150
53 | TRACKER_EPOCH: 20
54 | START_EPOCH: 220
55 | LOG_DIR: './logs/RPN_Res_mv16'
56 | SNAPSHOT_DIR: './snapshot/RPN_Res_mv16'
57 | LATENCY: 6 # number of frames that will be skipped
58 | JITTER: 2 # jitter for input latency
59 | PRE_TARGET: 12 # target of prediction
60 | RESUME: 'pretrained/RPN_R50.model' # Original trained tracking model
61 | NUM_FRAME: 3
62 | BATCH_SIZE: 64
63 | FIX_T: False
64 | BASE_LR: 0.00001
65 | LR: # for learning rate scheduler
66 | PRED_LR: 0.003 # 0.1
67 | BACKBONE_LR: 0 # Times of 0.1*BASE_LR
68 | NECK_LR: 10 # Times of BASE_LR
69 | TYPE: 'multi-step'
70 | KWARGS:
71 | start_lr: 0.003
72 | steps: [100]
73 | mult: 0.1
74 | epochs: 150
75 | LR_WARMUP:
76 | WARMUP: False
77 | NUM_WORKERS: 24
78 | # TRAIN.LR_WARMUP.WARMUP = True
79 | # Predictor setting
80 | PRED:
81 | MODE: 'AB'
82 | TRAIN: True
83 | TYPE: 'MV_v16'
84 | M_WEIGHT: 0.1
85 | V_WEIGHT: 0.1
86 | MV_WEIGHT: 1.0
87 | KWARGS:
88 | dwconv_k: 3
89 | dwconv_id: 256 # last layer channel
90 | dwconv_hd: 64
91 | hidden_1: 64
92 | hidden_2: 32
93 | hidden_3: 32
94 | num_input: 3
95 | num_output: 12
96 |
--------------------------------------------------------------------------------
/pysot/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/__init__.py
--------------------------------------------------------------------------------
/pysot/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/core/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__init__.py
--------------------------------------------------------------------------------
/pysot/core/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/core/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__pycache__/config.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/core/__pycache__/xcorr.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/core/__pycache__/xcorr.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/core/xcorr.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import absolute_import
3 | from __future__ import division
4 | from __future__ import print_function
5 | from __future__ import unicode_literals
6 |
7 | import torch
8 | import torch.nn.functional as F
9 |
10 |
11 | def xcorr_slow(x, kernel):
12 | """for loop to calculate cross correlation, slow version
13 | """
14 | batch = x.size()[0]
15 | out = []
16 | for i in range(batch):
17 | px = x[i]
18 | pk = kernel[i]
19 | px = px.view(1, -1, px.size()[1], px.size()[2])
20 | pk = pk.view(1, -1, pk.size()[1], pk.size()[2])
21 | po = F.conv2d(px, pk)
22 | out.append(po)
23 | out = torch.cat(out, 0)
24 | return out
25 |
26 |
27 | def xcorr_fast(x, kernel):
28 | """group conv2d to calculate cross correlation, fast version
29 | """
30 | batch = kernel.size()[0]
31 | pk = kernel.view(-1, x.size()[1], kernel.size()[2], kernel.size()[3])
32 | px = x.view(1, -1, x.size()[2], x.size()[3])
33 | po = F.conv2d(px, pk, groups=batch)
34 | po = po.view(batch, -1, po.size()[2], po.size()[3])
35 | return po
36 |
37 |
38 | def xcorr_depthwise(x, kernel):
39 | """depthwise cross correlation
40 | """
41 | batch = kernel.size(0)
42 | channel = kernel.size(1)
43 | x = x.view(1, batch*channel, x.size(2), x.size(3))
44 | kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3))
45 | out = F.conv2d(x, kernel, groups=batch*channel)
46 | out = out.view(batch, channel, out.size(2), out.size(3))
47 | return out
48 |
--------------------------------------------------------------------------------
/pysot/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/datasets/__init__.py
--------------------------------------------------------------------------------
/pysot/datasets/anchor_target.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import numpy as np
7 |
8 | from pysot.core.config import cfg
9 | from pysot.utils.bbox import IoU, corner2center
10 | from pysot.utils.anchor import Anchors
11 |
12 |
13 | class AnchorTarget:
14 | def __init__(self,):
15 | self.anchors = Anchors(cfg.ANCHOR.STRIDE,
16 | cfg.ANCHOR.RATIOS,
17 | cfg.ANCHOR.SCALES)
18 |
19 | self.anchors.generate_all_anchors(im_c=cfg.TRAIN.SEARCH_SIZE//2,
20 | size=cfg.TRAIN.OUTPUT_SIZE)
21 |
22 | def __call__(self, target, size, neg=False):
23 | anchor_num = len(cfg.ANCHOR.RATIOS) * len(cfg.ANCHOR.SCALES)
24 |
25 | # -1 ignore 0 negative 1 positive
26 | cls = -1 * np.ones((anchor_num, size, size), dtype=np.int64)
27 | delta = np.zeros((4, anchor_num, size, size), dtype=np.float32)
28 | delta_weight = np.zeros((anchor_num, size, size), dtype=np.float32)
29 |
30 | def select(position, keep_num=16):
31 | num = position[0].shape[0]
32 | if num <= keep_num:
33 | return position, num
34 | slt = np.arange(num)
35 | np.random.shuffle(slt)
36 | slt = slt[:keep_num]
37 | return tuple(p[slt] for p in position), keep_num
38 |
39 | tcx, tcy, tw, th = corner2center(target)
40 |
41 | if neg:
42 | # l = size // 2 - 3
43 | # r = size // 2 + 3 + 1
44 | # cls[:, l:r, l:r] = 0
45 |
46 | cx = size // 2
47 | cy = size // 2
48 | cx += int(np.ceil((tcx - cfg.TRAIN.SEARCH_SIZE // 2) /
49 | cfg.ANCHOR.STRIDE + 0.5))
50 | cy += int(np.ceil((tcy - cfg.TRAIN.SEARCH_SIZE // 2) /
51 | cfg.ANCHOR.STRIDE + 0.5))
52 | l = max(0, cx - 3)
53 | r = min(size, cx + 4)
54 | u = max(0, cy - 3)
55 | d = min(size, cy + 4)
56 | cls[:, u:d, l:r] = 0
57 |
58 | neg, neg_num = select(np.where(cls == 0), cfg.TRAIN.NEG_NUM)
59 | cls[:] = -1
60 | cls[neg] = 0
61 |
62 | overlap = np.zeros((anchor_num, size, size), dtype=np.float32)
63 | return cls, delta, delta_weight, overlap
64 |
65 | anchor_box = self.anchors.all_anchors[0]
66 | anchor_center = self.anchors.all_anchors[1]
67 | x1, y1, x2, y2 = anchor_box[0], anchor_box[1], \
68 | anchor_box[2], anchor_box[3]
69 | cx, cy, w, h = anchor_center[0], anchor_center[1], \
70 | anchor_center[2], anchor_center[3]
71 |
72 | delta[0] = (tcx - cx) / w
73 | delta[1] = (tcy - cy) / h
74 | delta[2] = np.log(tw / w)
75 | delta[3] = np.log(th / h)
76 |
77 | overlap = IoU([x1, y1, x2, y2], target)
78 |
79 | pos = np.where(overlap > cfg.TRAIN.THR_HIGH)
80 | neg = np.where(overlap < cfg.TRAIN.THR_LOW)
81 |
82 | pos, pos_num = select(pos, cfg.TRAIN.POS_NUM)
83 | neg, neg_num = select(neg, cfg.TRAIN.TOTAL_NUM - cfg.TRAIN.POS_NUM)
84 |
85 | cls[pos] = 1
86 | delta_weight[pos] = 1. / (pos_num + 1e-6)
87 |
88 | cls[neg] = 0
89 | return cls, delta, delta_weight, overlap
90 |
--------------------------------------------------------------------------------
/pysot/models/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | from pysot.models.model_builder import ModelBuilder #tracker + predictor
7 | from pysot.models.pred_model_builder import PredModelBuilder # predictive tracker
8 |
9 |
10 | Builders = {
11 | 'A+B': ModelBuilder,
12 | 'AB': PredModelBuilder,
13 | }
14 |
15 |
16 | def get_modelbuilder(name, **kwargs):
17 | return Builders[name](**kwargs)
18 |
19 |
--------------------------------------------------------------------------------
/pysot/models/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | from pysot.models.backbone.alexnet import alexnetlegacy, alexnet
7 | from pysot.models.backbone.mobile_v2 import mobilenetv2
8 | from pysot.models.backbone.resnet_atrous import resnet18, resnet34, resnet50
9 |
10 | BACKBONES = {
11 | 'alexnetlegacy': alexnetlegacy,
12 | 'mobilenetv2': mobilenetv2,
13 | 'resnet18': resnet18,
14 | 'resnet34': resnet34,
15 | 'resnet50': resnet50,
16 | 'alexnet': alexnet,
17 | }
18 |
19 |
20 | def get_backbone(name, **kwargs):
21 | return BACKBONES[name](**kwargs)
22 |
--------------------------------------------------------------------------------
/pysot/models/backbone/alexnet.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import torch.nn as nn
7 |
8 |
9 | class AlexNetLegacy(nn.Module):
10 | configs = [3, 96, 256, 384, 384, 256]
11 |
12 | def __init__(self, width_mult=1):
13 | configs = list(map(lambda x: 3 if x == 3 else
14 | int(x*width_mult), AlexNet.configs))
15 | super(AlexNetLegacy, self).__init__()
16 | self.features = nn.Sequential(
17 | nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2),
18 | nn.BatchNorm2d(configs[1]),
19 | nn.MaxPool2d(kernel_size=3, stride=2),
20 | nn.ReLU(inplace=True),
21 | nn.Conv2d(configs[1], configs[2], kernel_size=5),
22 | nn.BatchNorm2d(configs[2]),
23 | nn.MaxPool2d(kernel_size=3, stride=2),
24 | nn.ReLU(inplace=True),
25 | nn.Conv2d(configs[2], configs[3], kernel_size=3),
26 | nn.BatchNorm2d(configs[3]),
27 | nn.ReLU(inplace=True),
28 | nn.Conv2d(configs[3], configs[4], kernel_size=3),
29 | nn.BatchNorm2d(configs[4]),
30 | nn.ReLU(inplace=True),
31 | nn.Conv2d(configs[4], configs[5], kernel_size=3),
32 | nn.BatchNorm2d(configs[5]),
33 | )
34 | self.feature_size = configs[5]
35 |
36 | def forward(self, x):
37 | x = self.features(x)
38 | return x
39 |
40 |
41 | class AlexNet(nn.Module):
42 | configs = [3, 96, 256, 384, 384, 256]
43 |
44 | def __init__(self, width_mult=1):
45 | configs = list(map(lambda x: 3 if x == 3 else
46 | int(x*width_mult), AlexNet.configs))
47 | super(AlexNet, self).__init__()
48 | self.layer1 = nn.Sequential(
49 | nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2),
50 | nn.BatchNorm2d(configs[1]),
51 | nn.MaxPool2d(kernel_size=3, stride=2),
52 | nn.ReLU(inplace=True),
53 | )
54 | self.layer2 = nn.Sequential(
55 | nn.Conv2d(configs[1], configs[2], kernel_size=5),
56 | nn.BatchNorm2d(configs[2]),
57 | nn.MaxPool2d(kernel_size=3, stride=2),
58 | nn.ReLU(inplace=True),
59 | )
60 | self.layer3 = nn.Sequential(
61 | nn.Conv2d(configs[2], configs[3], kernel_size=3),
62 | nn.BatchNorm2d(configs[3]),
63 | nn.ReLU(inplace=True),
64 | )
65 | self.layer4 = nn.Sequential(
66 | nn.Conv2d(configs[3], configs[4], kernel_size=3),
67 | nn.BatchNorm2d(configs[4]),
68 | nn.ReLU(inplace=True),
69 | )
70 |
71 | self.layer5 = nn.Sequential(
72 | nn.Conv2d(configs[4], configs[5], kernel_size=3),
73 | nn.BatchNorm2d(configs[5]),
74 | )
75 | self.feature_size = configs[5]
76 |
77 | def forward(self, x):
78 | x = self.layer1(x)
79 | x = self.layer2(x)
80 | x = self.layer3(x)
81 | x = self.layer4(x)
82 | x = self.layer5(x)
83 | return x
84 |
85 |
86 | def alexnetlegacy(**kwargs):
87 | return AlexNetLegacy(**kwargs)
88 |
89 |
90 | def alexnet(**kwargs):
91 | return AlexNet(**kwargs)
92 |
--------------------------------------------------------------------------------
/pysot/models/backbone/mobile_v2.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import torch
7 | import torch.nn as nn
8 |
9 |
10 | def conv_bn(inp, oup, stride, padding=1):
11 | return nn.Sequential(
12 | nn.Conv2d(inp, oup, 3, stride, padding, bias=False),
13 | nn.BatchNorm2d(oup),
14 | nn.ReLU6(inplace=True)
15 | )
16 |
17 |
18 | def conv_1x1_bn(inp, oup):
19 | return nn.Sequential(
20 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
21 | nn.BatchNorm2d(oup),
22 | nn.ReLU6(inplace=True)
23 | )
24 |
25 |
26 | class InvertedResidual(nn.Module):
27 | def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
28 | super(InvertedResidual, self).__init__()
29 | self.stride = stride
30 |
31 | self.use_res_connect = self.stride == 1 and inp == oup
32 |
33 | padding = 2 - stride
34 | if dilation > 1:
35 | padding = dilation
36 |
37 | self.conv = nn.Sequential(
38 | # pw
39 | nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False),
40 | nn.BatchNorm2d(inp * expand_ratio),
41 | nn.ReLU6(inplace=True),
42 | # dw
43 | nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3,
44 | stride, padding, dilation=dilation,
45 | groups=inp * expand_ratio, bias=False),
46 | nn.BatchNorm2d(inp * expand_ratio),
47 | nn.ReLU6(inplace=True),
48 | # pw-linear
49 | nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False),
50 | nn.BatchNorm2d(oup),
51 | )
52 |
53 | def forward(self, x):
54 | if self.use_res_connect:
55 | return x + self.conv(x)
56 | else:
57 | return self.conv(x)
58 |
59 |
60 | class MobileNetV2(nn.Sequential):
61 | def __init__(self, width_mult=1.0, used_layers=[3, 5, 7]):
62 | super(MobileNetV2, self).__init__()
63 |
64 | self.interverted_residual_setting = [
65 | # t, c, n, s
66 | [1, 16, 1, 1, 1],
67 | [6, 24, 2, 2, 1],
68 | [6, 32, 3, 2, 1],
69 | [6, 64, 4, 2, 1],
70 | [6, 96, 3, 1, 1],
71 | [6, 160, 3, 2, 1],
72 | [6, 320, 1, 1, 1],
73 | ]
74 | # 0,2,3,4,6
75 |
76 | self.interverted_residual_setting = [
77 | # t, c, n, s
78 | [1, 16, 1, 1, 1],
79 | [6, 24, 2, 2, 1],
80 | [6, 32, 3, 2, 1],
81 | [6, 64, 4, 1, 2],
82 | [6, 96, 3, 1, 2],
83 | [6, 160, 3, 1, 4],
84 | [6, 320, 1, 1, 4],
85 | ]
86 |
87 | self.channels = [24, 32, 96, 320]
88 | self.channels = [int(c * width_mult) for c in self.channels]
89 |
90 | input_channel = int(32 * width_mult)
91 | self.last_channel = int(1280 * width_mult) \
92 | if width_mult > 1.0 else 1280
93 |
94 | self.add_module('layer0', conv_bn(3, input_channel, 2, 0))
95 |
96 | last_dilation = 1
97 |
98 | self.used_layers = used_layers
99 |
100 | for idx, (t, c, n, s, d) in \
101 | enumerate(self.interverted_residual_setting, start=1):
102 | output_channel = int(c * width_mult)
103 |
104 | layers = []
105 |
106 | for i in range(n):
107 | if i == 0:
108 | if d == last_dilation:
109 | dd = d
110 | else:
111 | dd = max(d // 2, 1)
112 | layers.append(InvertedResidual(input_channel,
113 | output_channel, s, t, dd))
114 | else:
115 | layers.append(InvertedResidual(input_channel,
116 | output_channel, 1, t, d))
117 | input_channel = output_channel
118 |
119 | last_dilation = d
120 |
121 | self.add_module('layer%d' % (idx), nn.Sequential(*layers))
122 |
123 | def forward(self, x):
124 | outputs = []
125 | for idx in range(8):
126 | name = "layer%d" % idx
127 | x = getattr(self, name)(x)
128 | outputs.append(x)
129 | p0, p1, p2, p3, p4 = [outputs[i] for i in [1, 2, 3, 5, 7]]
130 | out = [outputs[i] for i in self.used_layers]
131 | if len(out) == 1:
132 | return out[0]
133 | return out
134 |
135 |
136 | def mobilenetv2(**kwargs):
137 | model = MobileNetV2(**kwargs)
138 | return model
139 |
140 |
141 | if __name__ == '__main__':
142 | net = mobilenetv2()
143 |
144 | print(net)
145 |
146 | from torch.autograd import Variable
147 | tensor = Variable(torch.Tensor(1, 3, 255, 255)).cuda()
148 |
149 | net = net.cuda()
150 |
151 | out = net(tensor)
152 |
153 | for i, p in enumerate(out):
154 | print(i, p.size())
155 |
--------------------------------------------------------------------------------
/pysot/models/centernet/post_process.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | from .image import transform_preds
7 | from .ddd_utils import ddd2locrot
8 |
9 |
10 | def get_pred_depth(depth):
11 | return depth
12 |
13 | def get_alpha(rot):
14 | # output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
15 | # bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
16 | # return rot[:, 0]
17 | idx = rot[:, 1] > rot[:, 5]
18 | alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
19 | alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
20 | return alpha1 * idx + alpha2 * (1 - idx)
21 |
22 |
23 | def ddd_post_process_2d(dets, c, s, opt):
24 | # dets: batch x max_dets x dim
25 | # return 1-based class det list
26 | ret = []
27 | include_wh = dets.shape[2] > 16
28 | for i in range(dets.shape[0]):
29 | top_preds = {}
30 | dets[i, :, :2] = transform_preds(
31 | dets[i, :, 0:2], c[i], s[i], (opt.output_w, opt.output_h))
32 | classes = dets[i, :, -1]
33 | for j in range(opt.num_classes):
34 | inds = (classes == j)
35 | top_preds[j + 1] = np.concatenate([
36 | dets[i, inds, :3].astype(np.float32),
37 | get_alpha(dets[i, inds, 3:11])[:, np.newaxis].astype(np.float32),
38 | get_pred_depth(dets[i, inds, 11:12]).astype(np.float32),
39 | dets[i, inds, 12:15].astype(np.float32)], axis=1)
40 | if include_wh:
41 | top_preds[j + 1] = np.concatenate([
42 | top_preds[j + 1],
43 | transform_preds(
44 | dets[i, inds, 15:17], c[i], s[i], (opt.output_w, opt.output_h))
45 | .astype(np.float32)], axis=1)
46 | ret.append(top_preds)
47 | return ret
48 |
49 | def ddd_post_process_3d(dets, calibs):
50 | # dets: batch x max_dets x dim
51 | # return 1-based class det list
52 | ret = []
53 | for i in range(len(dets)):
54 | preds = {}
55 | for cls_ind in dets[i].keys():
56 | preds[cls_ind] = []
57 | for j in range(len(dets[i][cls_ind])):
58 | center = dets[i][cls_ind][j][:2]
59 | score = dets[i][cls_ind][j][2]
60 | alpha = dets[i][cls_ind][j][3]
61 | depth = dets[i][cls_ind][j][4]
62 | dimensions = dets[i][cls_ind][j][5:8]
63 | wh = dets[i][cls_ind][j][8:10]
64 | locations, rotation_y = ddd2locrot(
65 | center, alpha, dimensions, depth, calibs[0])
66 | bbox = [center[0] - wh[0] / 2, center[1] - wh[1] / 2,
67 | center[0] + wh[0] / 2, center[1] + wh[1] / 2]
68 | pred = [alpha] + bbox + dimensions.tolist() + \
69 | locations.tolist() + [rotation_y, score]
70 | preds[cls_ind].append(pred)
71 | preds[cls_ind] = np.array(preds[cls_ind], dtype=np.float32)
72 | ret.append(preds)
73 | return ret
74 |
75 | def ddd_post_process(dets, c, s, calibs, opt):
76 | # dets: batch x max_dets x dim
77 | # return 1-based class det list
78 | dets = ddd_post_process_2d(dets, c, s, opt)
79 | dets = ddd_post_process_3d(dets, calibs)
80 | return dets
81 |
82 |
83 | def ctdet_post_process(dets, c, s, h, w, num_classes):
84 | # dets: batch x max_dets x dim
85 | # return 1-based class det dict
86 | for i in range(dets.shape[0]):
87 | dets[i, :, :2] = transform_preds(
88 | dets[i, :, 0:2], c, s, (w, h))
89 | dets[i, :, 2:4] = transform_preds(
90 | dets[i, :, 2:4], c, s, (w, h))
91 | # classes = dets[i, :, -1]
92 | # for j in range(num_classes):
93 | # inds = (classes == j)
94 | # top_preds[j + 1] = np.concatenate([
95 | # dets[i, inds, :4].astype(np.float32),
96 | # dets[i, inds, 4:5].astype(np.float32)], axis=1).tolist()
97 | # ret.append(top_preds)
98 | return dets
99 |
100 |
101 | def multi_pose_post_process(dets, c, s, h, w):
102 | # dets: batch x max_dets x 40
103 | # return list of 39 in image coord
104 | ret = []
105 | for i in range(dets.shape[0]):
106 | bbox = transform_preds(dets[i, :, :4].reshape(-1, 2), c[i], s[i], (w, h))
107 | pts = transform_preds(dets[i, :, 5:39].reshape(-1, 2), c[i], s[i], (w, h))
108 | top_preds = np.concatenate(
109 | [bbox.reshape(-1, 4), dets[i, :, 4:5],
110 | pts.reshape(-1, 34)], axis=1).astype(np.float32).tolist()
111 | ret.append({np.ones(1, dtype=np.int32)[0]: top_preds})
112 | return ret
113 |
--------------------------------------------------------------------------------
/pysot/models/centernet/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import torch
6 |
7 | class AverageMeter(object):
8 | """Computes and stores the average and current value"""
9 | def __init__(self):
10 | self.reset()
11 |
12 | def reset(self):
13 | self.val = 0
14 | self.avg = 0
15 | self.sum = 0
16 | self.count = 0
17 |
18 | def update(self, val, n=1):
19 | self.val = val
20 | self.sum += val * n
21 | self.count += n
22 | if self.count > 0:
23 | self.avg = self.sum / self.count
--------------------------------------------------------------------------------
/pysot/models/head/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | from pysot.models.head.mask import MaskCorr, Refine
7 | from pysot.models.head.rpn import UPChannelRPN, DepthwiseRPN, MultiRPN
8 |
9 | RPNS = {
10 | 'UPChannelRPN': UPChannelRPN,
11 | 'DepthwiseRPN': DepthwiseRPN,
12 | 'MultiRPN': MultiRPN
13 | }
14 |
15 | MASKS = {
16 | 'MaskCorr': MaskCorr,
17 | }
18 |
19 | REFINE = {
20 | 'Refine': Refine,
21 | }
22 |
23 |
24 | def get_rpn_head(name, **kwargs):
25 | return RPNS[name](**kwargs)
26 |
27 |
28 | def get_mask_head(name, **kwargs):
29 | return MASKS[name](**kwargs)
30 |
31 |
32 | def get_refine_head(name):
33 | return REFINE[name]()
34 |
--------------------------------------------------------------------------------
/pysot/models/head/mask.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 | from pysot.models.head.rpn import DepthwiseXCorr
10 | from pysot.core.xcorr import xcorr_depthwise
11 |
12 |
13 | class MaskCorr(DepthwiseXCorr):
14 | def __init__(self, in_channels, hidden, out_channels,
15 | kernel_size=3, hidden_kernel_size=5):
16 | super(MaskCorr, self).__init__(in_channels, hidden,
17 | out_channels, kernel_size,
18 | hidden_kernel_size)
19 |
20 | def forward(self, kernel, search):
21 | kernel = self.conv_kernel(kernel)
22 | search = self.conv_search(search)
23 | feature = xcorr_depthwise(search, kernel)
24 | out = self.head(feature)
25 | return out, feature
26 |
27 |
28 | class Refine(nn.Module):
29 | def __init__(self):
30 | super(Refine, self).__init__()
31 | self.v0 = nn.Sequential(
32 | nn.Conv2d(64, 16, 3, padding=1),
33 | nn.ReLU(inplace=True),
34 | nn.Conv2d(16, 4, 3, padding=1),
35 | nn.ReLU(inplace=True),
36 | )
37 | self.v1 = nn.Sequential(
38 | nn.Conv2d(256, 64, 3, padding=1),
39 | nn.ReLU(inplace=True),
40 | nn.Conv2d(64, 16, 3, padding=1),
41 | nn.ReLU(inplace=True),
42 | )
43 | self.v2 = nn.Sequential(
44 | nn.Conv2d(512, 128, 3, padding=1),
45 | nn.ReLU(inplace=True),
46 | nn.Conv2d(128, 32, 3, padding=1),
47 | nn.ReLU(inplace=True),
48 | )
49 | self.h2 = nn.Sequential(
50 | nn.Conv2d(32, 32, 3, padding=1),
51 | nn.ReLU(inplace=True),
52 | nn.Conv2d(32, 32, 3, padding=1),
53 | nn.ReLU(inplace=True),
54 | )
55 | self.h1 = nn.Sequential(
56 | nn.Conv2d(16, 16, 3, padding=1),
57 | nn.ReLU(inplace=True),
58 | nn.Conv2d(16, 16, 3, padding=1),
59 | nn.ReLU(inplace=True),
60 | )
61 | self.h0 = nn.Sequential(
62 | nn.Conv2d(4, 4, 3, padding=1),
63 | nn.ReLU(inplace=True),
64 | nn.Conv2d(4, 4, 3, padding=1),
65 | nn.ReLU(inplace=True),
66 | )
67 |
68 | self.deconv = nn.ConvTranspose2d(256, 32, 15, 15)
69 | self.post0 = nn.Conv2d(32, 16, 3, padding=1)
70 | self.post1 = nn.Conv2d(16, 4, 3, padding=1)
71 | self.post2 = nn.Conv2d(4, 1, 3, padding=1)
72 |
73 | def forward(self, f, corr_feature, pos):
74 | p0 = F.pad(f[0], [16, 16, 16, 16])[:, :, 4*pos[0]:4*pos[0]+61, 4*pos[1]:4*pos[1]+61]
75 | p1 = F.pad(f[1], [8, 8, 8, 8])[:, :, 2*pos[0]:2*pos[0]+31, 2*pos[1]:2*pos[1]+31]
76 | p2 = F.pad(f[2], [4, 4, 4, 4])[:, :, pos[0]:pos[0]+15, pos[1]:pos[1]+15]
77 |
78 | p3 = corr_feature[:, :, pos[0], pos[1]].view(-1, 256, 1, 1)
79 |
80 | out = self.deconv(p3)
81 | out = self.post0(F.upsample(self.h2(out) + self.v2(p2), size=(31, 31)))
82 | out = self.post1(F.upsample(self.h1(out) + self.v1(p1), size=(61, 61)))
83 | out = self.post2(F.upsample(self.h0(out) + self.v0(p0), size=(127, 127)))
84 | out = out.view(-1, 127*127)
85 | return out
86 |
--------------------------------------------------------------------------------
/pysot/models/head/rpn.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 |
10 | from pysot.core.xcorr import xcorr_fast, xcorr_depthwise
11 | from pysot.models.init_weight import init_weights
12 |
13 | class RPN(nn.Module):
14 | def __init__(self):
15 | super(RPN, self).__init__()
16 |
17 | def forward(self, z_f, x_f):
18 | raise NotImplementedError
19 |
20 | class UPChannelRPN(RPN):
21 | def __init__(self, anchor_num=5, feature_in=256):
22 | super(UPChannelRPN, self).__init__()
23 |
24 | cls_output = 2 * anchor_num
25 | loc_output = 4 * anchor_num
26 |
27 | self.template_cls_conv = nn.Conv2d(feature_in,
28 | feature_in * cls_output, kernel_size=3)
29 | self.template_loc_conv = nn.Conv2d(feature_in,
30 | feature_in * loc_output, kernel_size=3)
31 |
32 | self.search_cls_conv = nn.Conv2d(feature_in,
33 | feature_in, kernel_size=3)
34 | self.search_loc_conv = nn.Conv2d(feature_in,
35 | feature_in, kernel_size=3)
36 |
37 | self.loc_adjust = nn.Conv2d(loc_output, loc_output, kernel_size=1)
38 |
39 |
40 | def forward(self, z_f, x_f):
41 | cls_kernel = self.template_cls_conv(z_f)
42 | loc_kernel = self.template_loc_conv(z_f)
43 |
44 | cls_feature = self.search_cls_conv(x_f)
45 | loc_feature = self.search_loc_conv(x_f)
46 |
47 | cls = xcorr_fast(cls_feature, cls_kernel)
48 | loc = self.loc_adjust(xcorr_fast(loc_feature, loc_kernel))
49 | return cls, loc
50 |
51 |
52 | class DepthwiseXCorr(nn.Module):
53 | def __init__(self, in_channels, hidden, out_channels, kernel_size=3, hidden_kernel_size=5):
54 | super(DepthwiseXCorr, self).__init__()
55 | self.conv_kernel = nn.Sequential(
56 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False),
57 | nn.BatchNorm2d(hidden),
58 | nn.ReLU(inplace=True),
59 | )
60 | self.conv_search = nn.Sequential(
61 | nn.Conv2d(in_channels, hidden, kernel_size=kernel_size, bias=False),
62 | nn.BatchNorm2d(hidden),
63 | nn.ReLU(inplace=True),
64 | )
65 | self.head = nn.Sequential(
66 | nn.Conv2d(hidden, hidden, kernel_size=1, bias=False),
67 | nn.BatchNorm2d(hidden),
68 | nn.ReLU(inplace=True),
69 | nn.Conv2d(hidden, out_channels, kernel_size=1)
70 | )
71 |
72 |
73 | def forward(self, kernel, search):
74 | kernel = self.conv_kernel(kernel)
75 | search = self.conv_search(search)
76 | feature = xcorr_depthwise(search, kernel)
77 | out = self.head(feature)
78 | return out
79 |
80 |
81 | class DepthwiseRPN(RPN):
82 | def __init__(self, anchor_num=5, in_channels=256, out_channels=256):
83 | super(DepthwiseRPN, self).__init__()
84 | self.cls = DepthwiseXCorr(in_channels, out_channels, 2 * anchor_num)
85 | self.loc = DepthwiseXCorr(in_channels, out_channels, 4 * anchor_num)
86 |
87 | def forward(self, z_f, x_f):
88 | cls = self.cls(z_f, x_f)
89 | loc = self.loc(z_f, x_f)
90 | return cls, loc
91 |
92 |
93 | class MultiRPN(RPN):
94 | def __init__(self, anchor_num, in_channels, weighted=False):
95 | super(MultiRPN, self).__init__()
96 | self.weighted = weighted
97 | for i in range(len(in_channels)):
98 | self.add_module('rpn'+str(i+2),
99 | DepthwiseRPN(anchor_num, in_channels[i], in_channels[i]))
100 | if self.weighted:
101 | self.cls_weight = nn.Parameter(torch.ones(len(in_channels)))
102 | self.loc_weight = nn.Parameter(torch.ones(len(in_channels)))
103 |
104 | def forward(self, z_fs, x_fs):
105 | cls = []
106 | loc = []
107 | for idx, (z_f, x_f) in enumerate(zip(z_fs, x_fs), start=2):
108 | rpn = getattr(self, 'rpn'+str(idx))
109 | c, l = rpn(z_f, x_f)
110 | cls.append(c)
111 | loc.append(l)
112 |
113 | if self.weighted:
114 | cls_weight = F.softmax(self.cls_weight, 0)
115 | loc_weight = F.softmax(self.loc_weight, 0)
116 |
117 | def avg(lst):
118 | return sum(lst) / len(lst)
119 |
120 | def weighted_avg(lst, weight):
121 | s = 0
122 | for i in range(len(weight)):
123 | s += lst[i] * weight[i]
124 | return s
125 |
126 | if self.weighted:
127 | return weighted_avg(cls, cls_weight), weighted_avg(loc, loc_weight)
128 | else:
129 | return avg(cls), avg(loc)
130 |
--------------------------------------------------------------------------------
/pysot/models/init_weight.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | import math
4 | import warnings
5 |
6 | from torch.nn.init import _calculate_fan_in_and_fan_out
7 |
8 | def init_weights(model):
9 | for m in model.modules():
10 | if isinstance(m, nn.Conv2d):
11 | nn.init.kaiming_normal_(m.weight.data,
12 | mode='fan_out',
13 | nonlinearity='relu')
14 | elif isinstance(m, nn.BatchNorm2d):
15 | m.weight.data.fill_(1)
16 | m.bias.data.zero_()
17 |
18 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
19 | # Cut & paste from PyTorch official master until it's in a few official releases - RW
20 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
21 | def norm_cdf(x):
22 | # Computes standard normal cumulative distribution function
23 | return (1. + math.erf(x / math.sqrt(2.))) / 2.
24 |
25 | if (mean < a - 2 * std) or (mean > b + 2 * std):
26 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
27 | "The distribution of values may be incorrect.",
28 | stacklevel=2)
29 |
30 | with torch.no_grad():
31 | # Values are generated by using a truncated uniform distribution and
32 | # then using the inverse CDF for the normal distribution.
33 | # Get upper and lower cdf values
34 | l = norm_cdf((a - mean) / std)
35 | u = norm_cdf((b - mean) / std)
36 |
37 | # Uniformly fill tensor with values from [l, u], then translate to
38 | # [2l-1, 2u-1].
39 | tensor.uniform_(2 * l - 1, 2 * u - 1)
40 |
41 | # Use inverse cdf transform for normal distribution to get truncated
42 | # standard normal
43 | tensor.erfinv_()
44 |
45 | # Transform to proper mean, std
46 | tensor.mul_(std * math.sqrt(2.))
47 | tensor.add_(mean)
48 |
49 | # Clamp to ensure it's in the proper range
50 | tensor.clamp_(min=a, max=b)
51 | return tensor
52 |
53 |
54 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
55 | # type: (Tensor, float, float, float, float) -> Tensor
56 | r"""Fills the input Tensor with values drawn from a truncated
57 | normal distribution. The values are effectively drawn from the
58 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
59 | with values outside :math:`[a, b]` redrawn until they are within
60 | the bounds. The method used for generating the random values works
61 | best when :math:`a \leq \text{mean} \leq b`.
62 | Args:
63 | tensor: an n-dimensional `torch.Tensor`
64 | mean: the mean of the normal distribution
65 | std: the standard deviation of the normal distribution
66 | a: the minimum cutoff value
67 | b: the maximum cutoff value
68 | Examples:
69 | >>> w = torch.empty(3, 5)
70 | >>> nn.init.trunc_normal_(w)
71 | """
72 | return _no_grad_trunc_normal_(tensor, mean, std, a, b)
73 |
74 |
75 | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'):
76 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
77 | if mode == 'fan_in':
78 | denom = fan_in
79 | elif mode == 'fan_out':
80 | denom = fan_out
81 | elif mode == 'fan_avg':
82 | denom = (fan_in + fan_out) / 2
83 |
84 | variance = scale / denom
85 |
86 | if distribution == "truncated_normal":
87 | # constant is stddev of standard normal truncated to (-2, 2)
88 | trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978)
89 | elif distribution == "normal":
90 | tensor.normal_(std=math.sqrt(variance))
91 | elif distribution == "uniform":
92 | bound = math.sqrt(3 * variance)
93 | tensor.uniform_(-bound, bound)
94 | else:
95 | raise ValueError(f"invalid distribution {distribution}")
96 |
97 |
98 | def lecun_normal_(tensor):
99 | variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal')
100 |
--------------------------------------------------------------------------------
/pysot/models/loss.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import torch
7 | import torch.nn.functional as F
8 | from .centernet.losses import FocalLoss, RegL1Loss
9 |
10 | def _sigmoid(x):
11 | y = torch.clamp(x.sigmoid_(), min=1e-4, max=1-1e-4)
12 | return y
13 |
14 | def get_cls_loss(pred, label, select):
15 | if len(select.size()) == 0 or \
16 | select.size() == torch.Size([0]):
17 | return 0
18 | pred = torch.index_select(pred, 0, select)
19 | label = torch.index_select(label, 0, select)
20 | return F.nll_loss(pred, label)
21 |
22 |
23 | def select_cross_entropy_loss(pred, label):
24 | pred = pred.view(-1, 2)
25 | label = label.view(-1)
26 | pos = label.data.eq(1).nonzero().squeeze().cuda()
27 | neg = label.data.eq(0).nonzero().squeeze().cuda()
28 | loss_pos = get_cls_loss(pred, label, pos)
29 | loss_neg = get_cls_loss(pred, label, neg)
30 | return loss_pos * 0.5 + loss_neg * 0.5
31 |
32 |
33 | def weight_l1_loss(pred_loc, label_loc, loss_weight):
34 | b, _, sh, sw = pred_loc.size()
35 | pred_loc = pred_loc.view(b, 4, -1, sh, sw)
36 | diff = (pred_loc - label_loc).abs()
37 | diff = diff.sum(dim=1).view(b, -1, sh, sw)
38 | loss = diff * loss_weight
39 | return loss.sum().div(b)
40 |
41 | def l1_loss(pred_loc, label_loc):
42 | b = pred_loc.shape[0]
43 | # pred_loc = pred_loc.view(b, 4, -1, sh, sw)
44 | diff = (pred_loc - label_loc).abs()
45 | loss = diff.sum()
46 | return loss.sum().div(b)
47 |
48 | def trend_l1_loss(pred_loc, label_loc):
49 | b, _, _ = pred_loc.size()
50 | d = torch.FloatTensor([0.0476, 0.0476, 0.1429, 0.381, 0.381]).cuda()
51 | diff = (pred_loc - label_loc).abs()
52 | loss = diff.sum(dim=[0,2])*d*5
53 | return loss.sum().div(b)
54 |
55 | def norm_l1_loss(pred_loc, label_loc, norm_wc, norm_ws):
56 | b, _, _ = pred_loc.size()
57 | # pred_loc = pred_loc.view(b, 4, -1, sh, sw)
58 | norm_loc = torch.zeros_like(label_loc)
59 | # x y use norm
60 | norm_loc[:,:,0:2] = label_loc[:,:,0:2].div(norm_wc.unsqueeze(-1).unsqueeze(-1))
61 | # w h use origin
62 | norm_loc[:,:,2:4] = label_loc[:,:,2:4].div(norm_ws.unsqueeze(-1).unsqueeze(-1))
63 | diff = (pred_loc - norm_loc).abs()
64 | # diff = diff.div(norm_w.unsqueeze(-1).unsqueeze(-1))
65 | loss = diff.sum()
66 | return loss.sum().div(b)
67 |
68 | class CtdetLoss(torch.nn.Module):
69 | def __init__(self, cfg):
70 | super(CtdetLoss, self).__init__()
71 | self.crit = torch.nn.MSELoss() if cfg.PRED.MSE_LOSS else FocalLoss()
72 | self.crit_reg = l1_loss
73 | self.crit_wh = l1_loss
74 | self.cfg = cfg
75 |
76 | def forward(self, output, batch):
77 | cfg = self.cfg
78 | # hm_loss, wh_loss, off_loss = 0, 0, 0
79 | if not cfg.PRED.MSE_LOSS:
80 | output['hm'] = _sigmoid(output['hm'])
81 | hm_loss = self.crit(output['hm'], batch['pred_hm'].cuda())
82 | wh_loss = self.crit_wh(output['wh'], batch['pred_wh'].cuda())
83 | off_loss = self.crit_reg(output['reg'], batch['reg'].cuda())
84 | # for s in range(cfg.TRAIN.PRE_TARGET):
85 | # hm_loss += self.crit(output['hm'][:,s:(s+1)], batch['pred_hm'][:,s:(s+1)].cuda())
86 | # wh_loss += self.crit_wh(output['wh'][:,2*s:2*(s+1)], batch['reg_mask'][:,:,s].cuda(), batch['ind'][:,:,s].cuda(), batch['pred_wh'][:,:,s].cuda())
87 | # off_loss += self.crit_reg(output['reg'][:,2*s:2*(s+1)], batch['reg_mask'][:,:,s].cuda(), batch['ind'][:,:,s].cuda(), batch['reg'][:,:,s].cuda())
88 |
89 | loss = cfg.PRED.HM_W * hm_loss + cfg.PRED.WH_W * wh_loss + \
90 | cfg.PRED.REG_W * off_loss
91 | loss_stats = {'pred_loss': loss, 'hm_loss': hm_loss,
92 | 'wh_loss': wh_loss, 'off_loss': off_loss}
93 | return loss, loss_stats
94 |
--------------------------------------------------------------------------------
/pysot/models/neck/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 |
10 | from pysot.models.neck.neck import AdjustLayer, AdjustAllLayer
11 |
12 | NECKS = {
13 | 'AdjustLayer': AdjustLayer,
14 | 'AdjustAllLayer': AdjustAllLayer
15 | }
16 |
17 | def get_neck(name, **kwargs):
18 | return NECKS[name](**kwargs)
19 |
--------------------------------------------------------------------------------
/pysot/models/neck/neck.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import torch.nn as nn
7 |
8 |
9 | class AdjustLayer(nn.Module):
10 | def __init__(self, in_channels, out_channels, center_size=7):
11 | super(AdjustLayer, self).__init__()
12 | self.downsample = nn.Sequential(
13 | nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
14 | nn.BatchNorm2d(out_channels),
15 | )
16 | self.center_size = center_size
17 |
18 | def forward(self, x):
19 | x = self.downsample(x)
20 | if x.size(3) < 20:
21 | l = (x.size(3) - self.center_size) // 2
22 | r = l + self.center_size
23 | x = x[:, :, l:r, l:r]
24 | return x
25 |
26 |
27 | class AdjustAllLayer(nn.Module):
28 | def __init__(self, in_channels, out_channels, center_size=7):
29 | super(AdjustAllLayer, self).__init__()
30 | self.num = len(out_channels)
31 | if self.num == 1:
32 | self.downsample = AdjustLayer(in_channels[0],
33 | out_channels[0],
34 | center_size)
35 | else:
36 | for i in range(self.num):
37 | self.add_module('downsample'+str(i+2),
38 | AdjustLayer(in_channels[i],
39 | out_channels[i],
40 | center_size))
41 |
42 | def forward(self, features):
43 | if self.num == 1:
44 | return self.downsample(features)
45 | else:
46 | out = []
47 | for i in range(self.num):
48 | adj_layer = getattr(self, 'downsample'+str(i+2))
49 | out.append(adj_layer(features[i]))
50 | return out
51 |
--------------------------------------------------------------------------------
/pysot/models/predictor/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | from pysot.models.predictor.base_predictor import BasePredictor
7 | # motion predictor
8 | from pysot.models.predictor.kf import KalmanF
9 | from pysot.models.predictor.lb_5 import LearnBaseV5
10 | # visual predictor
11 | from pysot.models.predictor.lbv_5 import VisualBaseV5
12 | # joint predictor
13 | from pysot.models.predictor.mv_v16 import MVV16
14 |
15 | Predictors = {
16 | 'KF': KalmanF,
17 | 'LB_v5': LearnBaseV5,
18 | 'LBv_v5':VisualBaseV5,
19 | 'MV_v16': MVV16,
20 | }
21 |
22 |
23 | def get_predictor(name, **kwargs):
24 | return Predictors[name](**kwargs)
25 |
26 |
--------------------------------------------------------------------------------
/pysot/models/predictor/base_predictor.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import cv2
7 | import numpy as np
8 | import torch
9 |
10 | from pysot.core.config import cfg
11 | import torch
12 | import torch.nn as nn
13 |
14 |
15 | class BasePredictor(nn.Module):
16 | """ Base predictor for prediction
17 | """
18 | def init(self, box_init, img_0):
19 | """
20 | args:
21 | box_init(np.ndarray): [l, t, w, h]
22 | img_0(np.ndarray): BGR image
23 | """
24 | raise NotImplementedError
25 |
26 | def predict(self, curr_fid, data, delta_t):
27 | """
28 | args:
29 | curr_fid(int): latest processed frame (base frame)
30 | data(dict): output of tracker
31 | delta_t(list/ndarray): target delta_t for prediction (target frame)
32 | return:
33 | bbox(list/ndarray): predicted boxes [[cx, cy, w, h]_1, [cx, cy, w, h]_2, ...]
34 | pre_fidx(list/ndarray): future frame id for predicted boxes [fidx_1, fidx_2, ...]
35 | """
36 | raise NotImplementedError
--------------------------------------------------------------------------------
/pysot/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__init__.py
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/base_tracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/base_tracker.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/siammask_tracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siammask_tracker.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/siammask_tracker_f.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siammask_tracker_f.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/siamrpn_tracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siamrpn_tracker.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/siamrpn_tracker_f.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siamrpn_tracker_f.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/siamrpnlt_tracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/siamrpnlt_tracker.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/tracker_builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/tracker/__pycache__/tracker_builder.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/tracker/base_tracker.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import cv2
7 | import numpy as np
8 | import torch
9 |
10 | from pysot.core.config import cfg
11 |
12 |
13 | class BaseTracker(object):
14 | """ Base tracker of single objec tracking
15 | """
16 | def init(self, img, bbox):
17 | """
18 | args:
19 | img(np.ndarray): BGR image
20 | bbox(list): [x, y, width, height]
21 | x, y need to be 0-based
22 | """
23 | raise NotImplementedError
24 |
25 | def track(self, img):
26 | """
27 | args:
28 | img(np.ndarray): BGR image
29 | return:
30 | bbox(list):[x, y, width, height]
31 | """
32 | raise NotImplementedError
33 |
34 |
35 | class SiameseTracker(BaseTracker):
36 | def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans):
37 | """
38 | args:
39 | im: bgr based image
40 | pos: center position
41 | model_sz: exemplar size
42 | s_z: original size
43 | avg_chans: channel average
44 | """
45 | if isinstance(pos, float):
46 | pos = [pos, pos]
47 | sz = original_sz
48 | im_sz = im.shape
49 | c = (original_sz + 1) / 2
50 | # context_xmin = round(pos[0] - c) # py2 and py3 round
51 | context_xmin = np.floor(pos[0] - c + 0.5)
52 | context_xmax = context_xmin + sz - 1
53 | # context_ymin = round(pos[1] - c)
54 | context_ymin = np.floor(pos[1] - c + 0.5)
55 | context_ymax = context_ymin + sz - 1
56 | left_pad = int(max(0., -context_xmin))
57 | top_pad = int(max(0., -context_ymin))
58 | right_pad = int(max(0., context_xmax - im_sz[1] + 1))
59 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
60 |
61 | context_xmin = context_xmin + left_pad
62 | context_xmax = context_xmax + left_pad
63 | context_ymin = context_ymin + top_pad
64 | context_ymax = context_ymax + top_pad
65 |
66 | r, c, k = im.shape
67 | if any([top_pad, bottom_pad, left_pad, right_pad]):
68 | size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k)
69 | te_im = np.zeros(size, np.uint8)
70 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
71 | if top_pad:
72 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
73 | if bottom_pad:
74 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
75 | if left_pad:
76 | te_im[:, 0:left_pad, :] = avg_chans
77 | if right_pad:
78 | te_im[:, c + left_pad:, :] = avg_chans
79 | im_patch = te_im[int(context_ymin):int(context_ymax + 1),
80 | int(context_xmin):int(context_xmax + 1), :]
81 | else:
82 | im_patch = im[int(context_ymin):int(context_ymax + 1),
83 | int(context_xmin):int(context_xmax + 1), :]
84 |
85 | if not np.array_equal(model_sz, original_sz):
86 | im_patch = cv2.resize(im_patch, (model_sz, model_sz))
87 | im_patch = im_patch.transpose(2, 0, 1)
88 | im_patch = im_patch[np.newaxis, :, :, :]
89 | im_patch = im_patch.astype(np.float32)
90 | im_patch = torch.from_numpy(im_patch)
91 | if cfg.CUDA:
92 | im_patch = im_patch.cuda()
93 | return im_patch
94 |
--------------------------------------------------------------------------------
/pysot/tracker/siamrpnlt_tracker.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import numpy as np
7 |
8 | from pysot.core.config import cfg
9 | from pysot.tracker.siamrpn_tracker import SiamRPNTracker
10 |
11 |
12 | class SiamRPNLTTracker(SiamRPNTracker):
13 | def __init__(self, model):
14 | super(SiamRPNLTTracker, self).__init__(model)
15 | self.longterm_state = False
16 |
17 | def track(self, img):
18 | """
19 | args:
20 | img(np.ndarray): BGR image
21 | return:
22 | bbox(list):[x, y, width, height]
23 | """
24 | w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
25 | h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
26 | s_z = np.sqrt(w_z * h_z)
27 | scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z
28 |
29 | if self.longterm_state:
30 | instance_size = cfg.TRACK.LOST_INSTANCE_SIZE
31 | else:
32 | instance_size = cfg.TRACK.INSTANCE_SIZE
33 |
34 | score_size = (instance_size - cfg.TRACK.EXEMPLAR_SIZE) // \
35 | cfg.ANCHOR.STRIDE + 1 + cfg.TRACK.BASE_SIZE
36 | hanning = np.hanning(score_size)
37 | window = np.outer(hanning, hanning)
38 | window = np.tile(window.flatten(), self.anchor_num)
39 | anchors = self.generate_anchor(score_size)
40 |
41 | s_x = s_z * (instance_size / cfg.TRACK.EXEMPLAR_SIZE)
42 |
43 | x_crop = self.get_subwindow(img, self.center_pos, instance_size,
44 | round(s_x), self.channel_average)
45 | outputs = self.model.track(x_crop)
46 | score = self._convert_score(outputs['cls'])
47 | pred_bbox = self._convert_bbox(outputs['loc'], anchors)
48 |
49 | def change(r):
50 | return np.maximum(r, 1. / r)
51 |
52 | def sz(w, h):
53 | pad = (w + h) * 0.5
54 | return np.sqrt((w + pad) * (h + pad))
55 |
56 | # scale penalty
57 | s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
58 | (sz(self.size[0] * scale_z, self.size[1] * scale_z)))
59 | # ratio penalty
60 | r_c = change((self.size[0] / self.size[1]) /
61 | (pred_bbox[2, :] / pred_bbox[3, :]))
62 | penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K)
63 | pscore = penalty * score
64 |
65 | # window
66 | if not self.longterm_state:
67 | pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
68 | window * cfg.TRACK.WINDOW_INFLUENCE
69 | else:
70 | pscore = pscore * (1 - 0.001) + window * 0.001
71 | best_idx = np.argmax(pscore)
72 |
73 | bbox = pred_bbox[:, best_idx] / scale_z
74 | lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR
75 |
76 | best_score = score[best_idx]
77 | if best_score >= cfg.TRACK.CONFIDENCE_LOW:
78 | cx = bbox[0] + self.center_pos[0]
79 | cy = bbox[1] + self.center_pos[1]
80 |
81 | width = self.size[0] * (1 - lr) + bbox[2] * lr
82 | height = self.size[1] * (1 - lr) + bbox[3] * lr
83 | else:
84 | cx = self.center_pos[0]
85 | cy = self.center_pos[1]
86 |
87 | width = self.size[0]
88 | height = self.size[1]
89 |
90 | self.center_pos = np.array([cx, cy])
91 | self.size = np.array([width, height])
92 |
93 | cx, cy, width, height = self._bbox_clip(cx, cy, width,
94 | height, img.shape[:2])
95 | bbox = [cx - width / 2,
96 | cy - height / 2,
97 | width,
98 | height]
99 |
100 | if best_score < cfg.TRACK.CONFIDENCE_LOW:
101 | self.longterm_state = True
102 | elif best_score > cfg.TRACK.CONFIDENCE_HIGH:
103 | self.longterm_state = False
104 |
105 | return {
106 | 'bbox': bbox,
107 | 'best_score': best_score
108 | }
109 |
--------------------------------------------------------------------------------
/pysot/tracker/tracker_builder.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | from pysot.core.config import cfg
7 | from pysot.tracker.siamrpn_tracker import SiamRPNTracker
8 | from pysot.tracker.siamrpn_tracker_f import SiamRPNTracker_f
9 | from pysot.tracker.siammask_tracker import SiamMaskTracker
10 | from pysot.tracker.siammask_tracker_f import SiamMaskTracker_f
11 | from pysot.tracker.siamrpn_tracker_ntr import SiamRPNTracker_ntr
12 | from pysot.tracker.siamrpnlt_tracker import SiamRPNLTTracker
13 |
14 | TRACKS = {
15 | 'SiamRPNTracker': SiamRPNTracker,
16 | 'SiamMaskTracker': SiamMaskTracker,
17 | 'SiamRPNLTTracker': SiamRPNLTTracker
18 | }
19 |
20 | TRACKSF = {
21 | 'SiamRPNTracker': SiamRPNTracker_f,
22 | 'SiamMaskTracker': SiamRPNTracker_f,
23 | 'SiamRPNTracker_ntr': SiamRPNTracker_ntr,
24 | }
25 |
26 |
27 | def build_tracker(model):
28 | return TRACKS[cfg.TRACK.TYPE](model)
29 |
30 | def build_tracker_f(model):
31 | return TRACKSF[cfg.TRACK.TYPE](model)
32 |
--------------------------------------------------------------------------------
/pysot/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__init__.py
--------------------------------------------------------------------------------
/pysot/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/utils/__pycache__/anchor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/anchor.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/utils/__pycache__/bbox.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/bbox.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/utils/__pycache__/model_load.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/pysot/utils/__pycache__/model_load.cpython-38.pyc
--------------------------------------------------------------------------------
/pysot/utils/anchor.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import math
7 |
8 | import numpy as np
9 |
10 | from pysot.utils.bbox import corner2center, center2corner
11 |
12 |
13 | class Anchors:
14 | """
15 | This class generate anchors.
16 | """
17 | def __init__(self, stride, ratios, scales, image_center=0, size=0):
18 | self.stride = stride
19 | self.ratios = ratios
20 | self.scales = scales
21 | self.image_center = image_center
22 | self.size = size
23 |
24 | self.anchor_num = len(self.scales) * len(self.ratios)
25 |
26 | self.anchors = None
27 |
28 | self.generate_anchors()
29 |
30 | def generate_anchors(self):
31 | """
32 | generate anchors based on predefined configuration
33 | """
34 | self.anchors = np.zeros((self.anchor_num, 4), dtype=np.float32)
35 | size = self.stride * self.stride
36 | count = 0
37 | for r in self.ratios:
38 | ws = int(math.sqrt(size*1. / r))
39 | hs = int(ws * r)
40 |
41 | for s in self.scales:
42 | w = ws * s
43 | h = hs * s
44 | self.anchors[count][:] = [-w*0.5, -h*0.5, w*0.5, h*0.5][:]
45 | count += 1
46 |
47 | def generate_all_anchors(self, im_c, size):
48 | """
49 | im_c: image center
50 | size: image size
51 | """
52 | if self.image_center == im_c and self.size == size:
53 | return False
54 | self.image_center = im_c
55 | self.size = size
56 |
57 | a0x = im_c - size // 2 * self.stride
58 | ori = np.array([a0x] * 4, dtype=np.float32)
59 | zero_anchors = self.anchors + ori
60 |
61 | x1 = zero_anchors[:, 0]
62 | y1 = zero_anchors[:, 1]
63 | x2 = zero_anchors[:, 2]
64 | y2 = zero_anchors[:, 3]
65 |
66 | x1, y1, x2, y2 = map(lambda x: x.reshape(self.anchor_num, 1, 1),
67 | [x1, y1, x2, y2])
68 | cx, cy, w, h = corner2center([x1, y1, x2, y2])
69 |
70 | disp_x = np.arange(0, size).reshape(1, 1, -1) * self.stride
71 | disp_y = np.arange(0, size).reshape(1, -1, 1) * self.stride
72 |
73 | cx = cx + disp_x
74 | cy = cy + disp_y
75 |
76 | # broadcast
77 | zero = np.zeros((self.anchor_num, size, size), dtype=np.float32)
78 | cx, cy, w, h = map(lambda x: x + zero, [cx, cy, w, h])
79 | x1, y1, x2, y2 = center2corner([cx, cy, w, h])
80 |
81 | self.all_anchors = (np.stack([x1, y1, x2, y2]).astype(np.float32),
82 | np.stack([cx, cy, w, h]).astype(np.float32))
83 | return True
84 |
--------------------------------------------------------------------------------
/pysot/utils/average_meter.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 |
7 | class Meter(object):
8 | def __init__(self, name, val, avg):
9 | self.name = name
10 | self.val = val
11 | self.avg = avg
12 |
13 | def __repr__(self):
14 | return "{name}: {val:.6f} ({avg:.6f})".format(
15 | name=self.name, val=self.val, avg=self.avg
16 | )
17 |
18 | def __format__(self, *tuples, **kwargs):
19 | return self.__repr__()
20 |
21 |
22 | class AverageMeter:
23 | """Computes and stores the average and current value"""
24 | def __init__(self, num=100):
25 | self.num = num
26 | self.reset()
27 |
28 | def reset(self):
29 | self.val = {}
30 | self.sum = {}
31 | self.count = {}
32 | self.history = {}
33 |
34 | def update(self, batch=1, **kwargs):
35 | val = {}
36 | for k in kwargs:
37 | val[k] = kwargs[k] / float(batch)
38 | self.val.update(val)
39 | for k in kwargs:
40 | if k not in self.sum:
41 | self.sum[k] = 0
42 | self.count[k] = 0
43 | self.history[k] = []
44 | self.sum[k] += kwargs[k]
45 | self.count[k] += batch
46 | for _ in range(batch):
47 | self.history[k].append(val[k])
48 |
49 | if self.num <= 0:
50 | # < 0, average all
51 | self.history[k] = []
52 |
53 | # == 0: no average
54 | if self.num == 0:
55 | self.sum[k] = self.val[k]
56 | self.count[k] = 1
57 |
58 | elif len(self.history[k]) > self.num:
59 | pop_num = len(self.history[k]) - self.num
60 | for _ in range(pop_num):
61 | self.sum[k] -= self.history[k][0]
62 | del self.history[k][0]
63 | self.count[k] -= 1
64 |
65 | def __repr__(self):
66 | s = ''
67 | for k in self.sum:
68 | s += self.format_str(k)
69 | return s
70 |
71 | def format_str(self, attr):
72 | return "{name}: {val:.6f} ({avg:.6f}) ".format(
73 | name=attr,
74 | val=float(self.val[attr]),
75 | avg=float(self.sum[attr]) / self.count[attr])
76 |
77 | def __getattr__(self, attr):
78 | if attr in self.__dict__:
79 | return super(AverageMeter, self).__getattr__(attr)
80 | if attr not in self.sum:
81 | print("invalid key '{}'".format(attr))
82 | return Meter(attr, 0, 0)
83 | return Meter(attr, self.val[attr], self.avg(attr))
84 |
85 | def avg(self, attr):
86 | return float(self.sum[attr]) / self.count[attr]
87 |
88 |
89 | if __name__ == '__main__':
90 | avg1 = AverageMeter(10)
91 | avg2 = AverageMeter(0)
92 | avg3 = AverageMeter(-1)
93 |
94 | for i in range(20):
95 | avg1.update(s=i)
96 | avg2.update(s=i)
97 | avg3.update(s=i)
98 |
99 | print('iter {}'.format(i))
100 | print(avg1.s)
101 | print(avg2.s)
102 | print(avg3.s)
103 |
--------------------------------------------------------------------------------
/pysot/utils/bbox.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | from collections import namedtuple
7 |
8 | import numpy as np
9 |
10 |
11 | Corner = namedtuple('Corner', 'x1 y1 x2 y2')
12 | # alias
13 | BBox = Corner
14 | Center = namedtuple('Center', 'x y w h')
15 |
16 |
17 | def corner2center(corner):
18 | """ convert (x1, y1, x2, y2) to (cx, cy, w, h)
19 | Args:
20 | conrner: Corner or np.array (4*N)
21 | Return:
22 | Center or np.array (4 * N)
23 | """
24 | if isinstance(corner, Corner):
25 | x1, y1, x2, y2 = corner
26 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1))
27 | else:
28 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3]
29 | x = (x1 + x2) * 0.5
30 | y = (y1 + y2) * 0.5
31 | w = x2 - x1
32 | h = y2 - y1
33 | return x, y, w, h
34 |
35 |
36 | def center2corner(center):
37 | """ convert (cx, cy, w, h) to (x1, y1, x2, y2)
38 | Args:
39 | center: Center or np.array (4 * N)
40 | Return:
41 | center or np.array (4 * N)
42 | """
43 | if isinstance(center, Center):
44 | x, y, w, h = center
45 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5)
46 | else:
47 | x, y, w, h = center[0], center[1], center[2], center[3]
48 | x1 = x - w * 0.5
49 | y1 = y - h * 0.5
50 | x2 = x + w * 0.5
51 | y2 = y + h * 0.5
52 | return x1, y1, x2, y2
53 |
54 |
55 | def IoU(rect1, rect2):
56 | """ caculate interection over union
57 | Args:
58 | rect1: (x1, y1, x2, y2)
59 | rect2: (x1, y1, x2, y2)
60 | Returns:
61 | iou
62 | """
63 | # overlap
64 | x1, y1, x2, y2 = rect1[0], rect1[1], rect1[2], rect1[3]
65 | tx1, ty1, tx2, ty2 = rect2[0], rect2[1], rect2[2], rect2[3]
66 |
67 | xx1 = np.maximum(tx1, x1)
68 | yy1 = np.maximum(ty1, y1)
69 | xx2 = np.minimum(tx2, x2)
70 | yy2 = np.minimum(ty2, y2)
71 |
72 | ww = np.maximum(0, xx2 - xx1)
73 | hh = np.maximum(0, yy2 - yy1)
74 |
75 | area = (x2-x1) * (y2-y1)
76 | target_a = (tx2-tx1) * (ty2 - ty1)
77 | inter = ww * hh
78 | iou = inter / (area + target_a - inter)
79 | return iou
80 |
81 |
82 | def cxy_wh_2_rect(pos, sz):
83 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 0-index
84 | """
85 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]])
86 |
87 |
88 | def rect_2_cxy_wh(rect):
89 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 0-index
90 | """
91 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2, rect[2], rect[3]])
92 |
93 |
94 | def cxy_wh_2_rect1(pos, sz):
95 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 1-index
96 | """
97 | return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]])
98 |
99 |
100 | def rect1_2_cxy_wh(rect):
101 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 1-index
102 | """
103 | return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), \
104 | np.array([rect[2], rect[3]])
105 |
106 |
107 | def get_axis_aligned_bbox(region):
108 | """ convert region to (cx, cy, w, h) that represent by axis aligned box
109 | """
110 | nv = region.size
111 | if nv == 8:
112 | cx = np.mean(region[0::2])
113 | cy = np.mean(region[1::2])
114 | x1 = min(region[0::2])
115 | x2 = max(region[0::2])
116 | y1 = min(region[1::2])
117 | y2 = max(region[1::2])
118 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * \
119 | np.linalg.norm(region[2:4] - region[4:6])
120 | A2 = (x2 - x1) * (y2 - y1)
121 | s = np.sqrt(A1 / A2)
122 | w = s * (x2 - x1) + 1
123 | h = s * (y2 - y1) + 1
124 | else:
125 | x = region[0]
126 | y = region[1]
127 | w = region[2]
128 | h = region[3]
129 | cx = x+w/2
130 | cy = y+h/2
131 | return cx, cy, w, h
132 |
133 |
134 | def get_min_max_bbox(region):
135 | """ convert region to (cx, cy, w, h) that represent by mim-max box
136 | """
137 | nv = region.size
138 | if nv == 8:
139 | cx = np.mean(region[0::2])
140 | cy = np.mean(region[1::2])
141 | x1 = min(region[0::2])
142 | x2 = max(region[0::2])
143 | y1 = min(region[1::2])
144 | y2 = max(region[1::2])
145 | w = x2 - x1
146 | h = y2 - y1
147 | else:
148 | x = region[0]
149 | y = region[1]
150 | w = region[2]
151 | h = region[3]
152 | cx = x+w/2
153 | cy = y+h/2
154 | return cx, cy, w, h
155 |
--------------------------------------------------------------------------------
/pysot/utils/distributed.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import os
7 | import socket
8 | import logging
9 |
10 | import torch
11 | import torch.nn as nn
12 | import torch.distributed as dist
13 |
14 | from pysot.utils.log_helper import log_once
15 |
16 | logger = logging.getLogger('global')
17 |
18 |
19 | def average_reduce(v):
20 | if get_world_size() == 1:
21 | return v
22 | tensor = torch.cuda.FloatTensor(1)
23 | tensor[0] = v
24 | dist.all_reduce(tensor)
25 | v = tensor[0] / get_world_size()
26 | return v
27 |
28 |
29 | class DistModule(nn.Module):
30 | def __init__(self, module, bn_method=0):
31 | super(DistModule, self).__init__()
32 | self.module = module
33 | self.bn_method = bn_method
34 | if get_world_size() > 1:
35 | broadcast_params(self.module)
36 | else:
37 | self.bn_method = 0 # single proccess
38 |
39 | def forward(self, *args, **kwargs):
40 | broadcast_buffers(self.module, self.bn_method)
41 | return self.module(*args, **kwargs)
42 |
43 | def train(self, mode=True):
44 | super(DistModule, self).train(mode)
45 | self.module.train(mode)
46 | return self
47 |
48 |
49 | def broadcast_params(model):
50 | """ broadcast model parameters """
51 | for p in model.state_dict().values():
52 | dist.broadcast(p, 0)
53 |
54 |
55 | def broadcast_buffers(model, method=0):
56 | """ broadcast model buffers """
57 | if method == 0:
58 | return
59 |
60 | world_size = get_world_size()
61 |
62 | for b in model._all_buffers():
63 | if method == 1: # broadcast from main proccess
64 | dist.broadcast(b, 0)
65 | elif method == 2: # average
66 | dist.all_reduce(b)
67 | b /= world_size
68 | else:
69 | raise Exception('Invalid buffer broadcast code {}'.format(method))
70 |
71 |
72 | inited = False
73 |
74 |
75 | def _dist_init():
76 | '''
77 | if guess right:
78 | ntasks: world_size (process num)
79 | proc_id: rank
80 | '''
81 | # rank = int(os.environ['RANK'])
82 | rank = 0
83 | num_gpus = torch.cuda.device_count()
84 | torch.cuda.set_device(rank % num_gpus)
85 | dist.init_process_group(backend='nccl')
86 | world_size = dist.get_world_size()
87 | return rank, world_size
88 |
89 |
90 | def _get_local_ip():
91 | try:
92 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
93 | s.connect(('8.8.8.8', 80))
94 | ip = s.getsockname()[0]
95 | finally:
96 | s.close()
97 | return ip
98 |
99 |
100 | def dist_init():
101 | global rank, world_size, inited
102 | # try:
103 | # rank, world_size = _dist_init()
104 | # except RuntimeError as e:
105 | # if 'public' in e.args[0]:
106 | # logger.info(e)
107 | # logger.info('Warning: use single process')
108 | # rank, world_size = 0, 1
109 | # else:
110 | # raise RuntimeError(*e.args)
111 | rank, world_size = 0, 1
112 | inited = True
113 | return rank, world_size
114 |
115 |
116 | def get_rank():
117 | if not inited:
118 | raise(Exception('dist not inited'))
119 | return rank
120 |
121 |
122 | def get_world_size():
123 | if not inited:
124 | raise(Exception('dist not inited'))
125 | return world_size
126 |
127 |
128 | def reduce_gradients(model, _type='sum'):
129 | types = ['sum', 'avg']
130 | assert _type in types, 'gradients method must be in "{}"'.format(types)
131 | log_once("gradients method is {}".format(_type))
132 | if get_world_size() > 1:
133 | for param in model.parameters():
134 | if param.requires_grad:
135 | dist.all_reduce(param.grad.data)
136 | if _type == 'avg':
137 | param.grad.data /= get_world_size()
138 | else:
139 | return None
140 |
--------------------------------------------------------------------------------
/pysot/utils/misc.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import os
7 |
8 | from colorama import Fore, Style
9 |
10 |
11 | __all__ = ['commit', 'describe']
12 |
13 |
14 | def _exec(cmd):
15 | f = os.popen(cmd, 'r', 1)
16 | return f.read().strip()
17 |
18 |
19 | def _bold(s):
20 | return "\033[1m%s\033[0m" % s
21 |
22 |
23 | def _color(s):
24 | return f'{Fore.RED}{s}{Style.RESET_ALL}'
25 |
26 |
27 | def _describe(model, lines=None, spaces=0):
28 | head = " " * spaces
29 | for name, p in model.named_parameters():
30 | if '.' in name:
31 | continue
32 | if p.requires_grad:
33 | name = _color(name)
34 | line = "{head}- {name}".format(head=head, name=name)
35 | lines.append(line)
36 |
37 | for name, m in model.named_children():
38 | space_num = len(name) + spaces + 1
39 | if m.training:
40 | name = _color(name)
41 | line = "{head}.{name} ({type})".format(
42 | head=head,
43 | name=name,
44 | type=m.__class__.__name__)
45 | lines.append(line)
46 | _describe(m, lines, space_num)
47 |
48 |
49 | def commit():
50 | root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))
51 | cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root)
52 | commit = _exec(cmd)
53 | cmd = "cd {}; git log --oneline | head -n1".format(root)
54 | commit_log = _exec(cmd)
55 | return "commit : {}\n log : {}".format(commit, commit_log)
56 |
57 |
58 | def describe(net, name=None):
59 | num = 0
60 | lines = []
61 | if name is not None:
62 | lines.append(name)
63 | num = len(name)
64 | _describe(net, lines, num)
65 | return "\n".join(lines)
66 |
--------------------------------------------------------------------------------
/pysot/utils/model_load.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import logging
7 |
8 | import torch
9 |
10 |
11 | logger = logging.getLogger('global')
12 |
13 |
14 | def check_keys(model, pretrained_state_dict):
15 | ckpt_keys = set(pretrained_state_dict.keys())
16 | model_keys = set(model.state_dict().keys())
17 | used_pretrained_keys = model_keys & ckpt_keys
18 | unused_pretrained_keys = ckpt_keys - model_keys
19 | missing_keys = model_keys - ckpt_keys
20 | # filter 'num_batches_tracked'
21 | missing_keys = [x for x in missing_keys
22 | if not x.endswith('num_batches_tracked')]
23 | if len(missing_keys) > 0:
24 | logger.info('[Warning] missing keys: {}'.format(missing_keys))
25 | logger.info('missing keys:{}'.format(len(missing_keys)))
26 | if len(unused_pretrained_keys) > 0:
27 | logger.info('[Warning] unused_pretrained_keys: {}'.format(
28 | unused_pretrained_keys))
29 | logger.info('unused checkpoint keys:{}'.format(
30 | len(unused_pretrained_keys)))
31 | logger.info('used keys:{}'.format(len(used_pretrained_keys)))
32 | assert len(used_pretrained_keys) > 0, \
33 | 'load NONE from pretrained checkpoint'
34 | return True
35 |
36 |
37 | def remove_prefix(state_dict, prefix):
38 | ''' Old style model is stored with all names of parameters
39 | share common prefix 'module.' '''
40 | logger.info('remove prefix \'{}\''.format(prefix))
41 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
42 | return {f(key): value for key, value in state_dict.items()}
43 |
44 |
45 | def load_pretrain(model, pretrained_path):
46 | logger.info('load pretrained model from {}'.format(pretrained_path))
47 | device = torch.cuda.current_device()
48 | pretrained_dict = torch.load(pretrained_path,
49 | map_location=lambda storage, loc: storage.cuda(device))
50 | if "state_dict" in pretrained_dict.keys():
51 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'],
52 | 'module.')
53 | else:
54 | pretrained_dict = remove_prefix(pretrained_dict, 'module.')
55 |
56 | try:
57 | check_keys(model, pretrained_dict)
58 | except:
59 | logger.info('[Warning]: using pretrain as features.\
60 | Adding "features." as prefix')
61 | new_dict = {}
62 | for k, v in pretrained_dict.items():
63 | k = 'features.' + k
64 | new_dict[k] = v
65 | pretrained_dict = new_dict
66 | check_keys(model, pretrained_dict)
67 | model.load_state_dict(pretrained_dict, strict=False)
68 | return model
69 |
70 |
71 | def restore_from(model, optimizer, ckpt_path):
72 | device = torch.cuda.current_device()
73 | ckpt = torch.load(ckpt_path,
74 | map_location=lambda storage, loc: storage.cuda(device))
75 | if 'epoch' not in ckpt:
76 | epoch = 0
77 | else:
78 | epoch = ckpt['epoch']
79 |
80 | # ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.')
81 | if 'state_dict' in ckpt:
82 | model_cp = ckpt['state_dict']
83 | else:
84 | model_cp = ckpt
85 | check_keys(model, model_cp)
86 | model.load_state_dict(model_cp, strict=False)
87 |
88 | if 'optimizer' in ckpt:
89 | check_keys(optimizer, ckpt['optimizer'])
90 | optimizer.load_state_dict(ckpt['optimizer'])
91 |
92 | return model, optimizer, epoch
93 |
--------------------------------------------------------------------------------
/test_agx_mob.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH
2 | # Mob
3 | # DTB70
4 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
5 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70'
6 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
7 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70'
8 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
9 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70'
10 | # # UAVDT
11 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
12 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT'
13 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
14 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT'
15 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
16 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT'
17 | # # UAV20
18 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
19 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L'
20 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
21 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L'
22 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
23 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L'
24 | # # UAV123
25 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
26 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123'
27 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
28 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123'
29 | python onboard/test_rt_f.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
30 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123'
--------------------------------------------------------------------------------
/test_sim_mob.sh:
--------------------------------------------------------------------------------
1 | export PYTHONPATH=/path/to/PVT++:$PYTHONPATH
2 | # Mob
3 | # DTB70
4 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
5 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' --sim_info 'testing_dataset/sim_info/DTB70_SiamRPN++_Mob_sim.pkl'
6 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
7 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' --sim_info 'testing_dataset/sim_info/DTB70_SiamRPN++_Mob_sim.pkl'
8 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
9 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'DTB70' --datasetroot 'testing_dataset/DTB70' --sim_info 'testing_dataset/sim_info/DTB70_SiamRPN++_Mob_sim.pkl'
10 | # # UAVDT
11 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
12 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' --sim_info 'testing_dataset/sim_info/UAVDT_SiamRPN++_Mob_sim.pkl'
13 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
14 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' --sim_info 'testing_dataset/sim_info/UAVDT_SiamRPN++_Mob_sim.pkl'
15 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
16 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAVDT' --datasetroot 'testing_dataset/UAVDT' --sim_info 'testing_dataset/sim_info/UAVDT_SiamRPN++_Mob_sim.pkl'
17 | # # UAV20
18 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
19 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' --sim_info 'testing_dataset/sim_info/UAV20_SiamRPN++_Mob_sim.pkl'
20 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
21 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' --sim_info 'testing_dataset/sim_info/UAV20_SiamRPN++_Mob_sim.pkl'
22 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
23 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV20' --datasetroot 'testing_dataset/UAV20L' --sim_info 'testing_dataset/sim_info/UAV20_SiamRPN++_Mob_sim.pkl'
24 | # # UAV123
25 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml' \
26 | --snapshot "my_models/RPN_Mob_M.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' --sim_info 'testing_dataset/sim_info/UAV123_SiamRPN++_Mob_sim.pkl'
27 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml' \
28 | --snapshot "my_models/RPN_Mob_V.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' --sim_info 'testing_dataset/sim_info/UAV123_SiamRPN++_Mob_sim.pkl'
29 | python tools/test_rt_f_sim.py --config 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml' \
30 | --snapshot "my_models/RPN_Mob_MV.pth" --dataset 'UAV123' --datasetroot 'testing_dataset/UAV123' --sim_info 'testing_dataset/sim_info/UAV123_SiamRPN++_Mob_sim.pkl'
--------------------------------------------------------------------------------
/toolkit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/__init__.py
--------------------------------------------------------------------------------
/toolkit/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .otb import OTBDataset
2 | from .lasot import LaSOTDataset
3 | from .got10k import GOT10kDataset
4 | from .uav10fps import UAV10Dataset
5 | from .uavdark import UAVDARKDataset
6 | from .uavdt import UAVDTDataset
7 | from .dtb import DTB70Dataset
8 | from .uav20l import UAV20Dataset
9 | from .uav123 import UAV123Dataset
10 | from .realworld import RealWorldDataset
11 | from .visdrone import VISDRONEDataset
12 | class DatasetFactory(object):
13 | @staticmethod
14 | def create_dataset(**kwargs):
15 | """
16 | Args:
17 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30',
18 | 'VOT2018', 'VOT2016', 'VOT2018-LT'
19 | dataset_root: dataset root
20 | load_img: wether to load image
21 | Return:
22 | dataset
23 | """
24 | assert 'name' in kwargs, "should provide dataset name"
25 | name = kwargs['name']
26 | if 'OTB' in name:
27 | dataset = OTBDataset(**kwargs)
28 | elif 'DTB70' in name:
29 | dataset = DTB70Dataset(**kwargs)
30 | elif 'UAV10' in name:
31 | dataset = UAV10Dataset(**kwargs)
32 | elif 'UAV20' in name:
33 | dataset = UAV20Dataset(**kwargs)
34 | elif "RealWorld" in name:
35 | dataset = RealWorldDataset(**kwargs)
36 | elif 'VISDRONE' in name:
37 | dataset = VISDRONEDataset(**kwargs)
38 | elif 'UAVDT' in name:
39 | dataset = UAVDTDataset(**kwargs)
40 | elif 'LaSOT' == name:
41 | dataset = LaSOTDataset(**kwargs)
42 | elif 'UAVDARK' in name:
43 | dataset = UAVDARKDataset(**kwargs)
44 | elif 'UAV123' in name:
45 | dataset = UAV123Dataset(**kwargs)
46 | elif 'UAVDARK' in name:
47 | dataset = UAVDARKDataset(**kwargs)
48 | elif 'GOT-10k' == name:
49 | dataset = GOT10kDataset(**kwargs)
50 | else:
51 | raise Exception("unknow dataset {}".format(kwargs['name']))
52 | return dataset
53 |
54 |
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/dataset.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/dtb.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/dtb.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/got10k.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/got10k.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/lasot.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/lasot.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/otb.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/otb.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/realworld.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/realworld.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav123.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uav123.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav20l.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uav20l.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uavdark.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uavdark.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uavdt.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/uavdt.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/video.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/video.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/visdrone.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/toolkit/datasets/__pycache__/visdrone.cpython-38.pyc
--------------------------------------------------------------------------------
/toolkit/datasets/dataset.py:
--------------------------------------------------------------------------------
1 | from tqdm import tqdm
2 |
3 | class Dataset(object):
4 | def __init__(self, name, dataset_root):
5 | self.name = name
6 | self.dataset_root = dataset_root
7 | self.videos = None
8 |
9 | def __getitem__(self, idx):
10 | if isinstance(idx, str):
11 | return self.videos[idx]
12 | elif isinstance(idx, int):
13 | return self.videos[sorted(list(self.videos.keys()))[idx]]
14 |
15 | def __len__(self):
16 | return len(self.videos)
17 |
18 | def __iter__(self):
19 | keys = sorted(list(self.videos.keys()))
20 | for key in keys:
21 | yield self.videos[key]
22 |
23 | def set_tracker(self, path, tracker_names):
24 | """
25 | Args:
26 | path: path to tracker results,
27 | tracker_names: list of tracker name
28 | """
29 | self.tracker_path = path
30 | self.tracker_names = tracker_names
31 | # for video in tqdm(self.videos.values(),
32 | # desc='loading tacker result', ncols=100):
33 | # video.load_tracker(path, tracker_names)
34 |
--------------------------------------------------------------------------------
/toolkit/datasets/dtb.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | def ca():
14 |
15 | path='./testing_dataset/DTB70/'
16 |
17 | name_list=os.listdir(path)
18 | name_list.sort()
19 |
20 | b=[]
21 | for i in range(len(name_list)):
22 | b.append(name_list[i])
23 | c=[]
24 |
25 | for jj in range(len(name_list)):
26 | imgs=path+str(name_list[jj])+'/img/'
27 | txt=path+str(name_list[jj])+'/groundtruth_rect.txt'
28 | bbox=[]
29 | f = open(txt) # 返回一个文件对象
30 | file= f.readlines()
31 | li=os.listdir(imgs)
32 | li.sort()
33 | for ii in range(len(file)):
34 | li[ii]=name_list[jj]+'/img/'+li[ii]
35 |
36 | line = file[ii].strip('\n').split(',')
37 |
38 | try:
39 | line[0]=int(line[0])
40 | except:
41 | line[0]=float(line[0])
42 | try:
43 | line[1]=int(line[1])
44 | except:
45 | line[1]=float(line[1])
46 | try:
47 | line[2]=int(line[2])
48 | except:
49 | line[2]=float(line[2])
50 | try:
51 | line[3]=int(line[3])
52 | except:
53 | line[3]=float(line[3])
54 | bbox.append(line)
55 |
56 | if len(bbox)!=len(li):
57 | print (jj)
58 | f.close()
59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
60 |
61 | d=dict(zip(b,c))
62 |
63 | return d
64 |
65 | class UAVVideo(Video):
66 | """
67 | Args:
68 | name: video name
69 | root: dataset root
70 | video_dir: video directory
71 | init_rect: init rectangle
72 | img_names: image names
73 | gt_rect: groundtruth rectangle
74 | attr: attribute of video
75 | """
76 | def __init__(self, name, root, video_dir, init_rect, img_names,
77 | gt_rect, attr, load_img=False):
78 | super(UAVVideo, self).__init__(name, root, video_dir,
79 | init_rect, img_names, gt_rect, attr, load_img)
80 |
81 |
82 | class DTB70Dataset(Dataset):
83 | """
84 | Args:
85 | name: dataset name, should be 'UAV123', 'UAV20L'
86 | dataset_root: dataset root
87 | load_img: wether to load all imgs
88 | """
89 | def __init__(self, name, dataset_root, load_img=False):
90 | super(DTB70Dataset, self).__init__(name, dataset_root)
91 | meta_data = ca()
92 |
93 | # load videos
94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
95 | self.videos = {}
96 | for video in pbar:
97 | pbar.set_postfix_str(video)
98 | self.videos[video] = UAVVideo(video,
99 | dataset_root,
100 | meta_data[video]['video_dir'],
101 | meta_data[video]['init_rect'],
102 | meta_data[video]['img_names'],
103 | meta_data[video]['gt_rect'],
104 | meta_data[video]['attr'])
105 |
106 |
107 |
--------------------------------------------------------------------------------
/toolkit/datasets/got10k.py:
--------------------------------------------------------------------------------
1 |
2 | import json
3 | import os
4 |
5 | from tqdm import tqdm
6 |
7 | from .dataset import Dataset
8 | from .video import Video
9 |
10 | class GOT10kVideo(Video):
11 | """
12 | Args:
13 | name: video name
14 | root: dataset root
15 | video_dir: video directory
16 | init_rect: init rectangle
17 | img_names: image names
18 | gt_rect: groundtruth rectangle
19 | attr: attribute of video
20 | """
21 | def __init__(self, name, root, video_dir, init_rect, img_names,
22 | gt_rect, attr, load_img=False):
23 | super(GOT10kVideo, self).__init__(name, root, video_dir,
24 | init_rect, img_names, gt_rect, attr, load_img)
25 |
26 | # def load_tracker(self, path, tracker_names=None):
27 | # """
28 | # Args:
29 | # path(str): path to result
30 | # tracker_name(list): name of tracker
31 | # """
32 | # if not tracker_names:
33 | # tracker_names = [x.split('/')[-1] for x in glob(path)
34 | # if os.path.isdir(x)]
35 | # if isinstance(tracker_names, str):
36 | # tracker_names = [tracker_names]
37 | # # self.pred_trajs = {}
38 | # for name in tracker_names:
39 | # traj_file = os.path.join(path, name, self.name+'.txt')
40 | # if os.path.exists(traj_file):
41 | # with open(traj_file, 'r') as f :
42 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
43 | # for x in f.readlines()]
44 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
45 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
46 | # else:
47 |
48 | # self.tracker_names = list(self.pred_trajs.keys())
49 |
50 | class GOT10kDataset(Dataset):
51 | """
52 | Args:
53 | name: dataset name, should be "NFS30" or "NFS240"
54 | dataset_root, dataset root dir
55 | """
56 | def __init__(self, name, dataset_root, load_img=False):
57 | super(GOT10kDataset, self).__init__(name, dataset_root)
58 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
59 | meta_data = json.load(f)
60 |
61 | # load videos
62 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
63 | self.videos = {}
64 | for video in pbar:
65 | pbar.set_postfix_str(video)
66 | self.videos[video] = GOT10kVideo(video,
67 | dataset_root,
68 | meta_data[video]['video_dir'],
69 | meta_data[video]['init_rect'],
70 | meta_data[video]['img_names'],
71 | meta_data[video]['gt_rect'],
72 | None)
73 | self.attr = {}
74 | self.attr['ALL'] = list(self.videos.keys())
75 |
--------------------------------------------------------------------------------
/toolkit/datasets/lasot.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 | class LaSOTVideo(Video):
12 | """
13 | Args:
14 | name: video name
15 | root: dataset root
16 | video_dir: video directory
17 | init_rect: init rectangle
18 | img_names: image names
19 | gt_rect: groundtruth rectangle
20 | attr: attribute of video
21 | """
22 | def __init__(self, name, root, video_dir, init_rect, img_names,
23 | gt_rect, attr, absent, load_img=False):
24 | super(LaSOTVideo, self).__init__(name, root, video_dir,
25 | init_rect, img_names, gt_rect, attr, load_img)
26 | self.absent = np.array(absent, np.int8)
27 |
28 | def load_tracker(self, path, tracker_names=None, store=True):
29 | """
30 | Args:
31 | path(str): path to result
32 | tracker_name(list): name of tracker
33 | """
34 | if not tracker_names:
35 | tracker_names = [x.split('/')[-1] for x in glob(path)
36 | if os.path.isdir(x)]
37 | if isinstance(tracker_names, str):
38 | tracker_names = [tracker_names]
39 | for name in tracker_names:
40 | traj_file = os.path.join(path, name, self.name+'.txt')
41 | if os.path.exists(traj_file):
42 | with open(traj_file, 'r') as f :
43 | pred_traj = [list(map(float, x.strip().split(',')))
44 | for x in f.readlines()]
45 | else:
46 | print("File not exists: ", traj_file)
47 | if self.name == 'monkey-17':
48 | pred_traj = pred_traj[:len(self.gt_traj)]
49 | if store:
50 | self.pred_trajs[name] = pred_traj
51 | else:
52 | return pred_traj
53 | self.tracker_names = list(self.pred_trajs.keys())
54 |
55 |
56 |
57 | class LaSOTDataset(Dataset):
58 | """
59 | Args:
60 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
61 | dataset_root: dataset root
62 | load_img: wether to load all imgs
63 | """
64 | def __init__(self, name, dataset_root, load_img=False):
65 | super(LaSOTDataset, self).__init__(name, dataset_root)
66 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
67 | meta_data = json.load(f)
68 |
69 | # load videos
70 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
71 | self.videos = {}
72 | for video in pbar:
73 | pbar.set_postfix_str(video)
74 | self.videos[video] = LaSOTVideo(video,
75 | dataset_root,
76 | meta_data[video]['video_dir'],
77 | meta_data[video]['init_rect'],
78 | meta_data[video]['img_names'],
79 | meta_data[video]['gt_rect'],
80 | meta_data[video]['attr'],
81 | meta_data[video]['absent'])
82 |
83 | # set attr
84 | attr = []
85 | for x in self.videos.values():
86 | attr += x.attr
87 | attr = set(attr)
88 | self.attr = {}
89 | self.attr['ALL'] = list(self.videos.keys())
90 | for x in attr:
91 | self.attr[x] = []
92 | for k, v in self.videos.items():
93 | for attr_ in v.attr:
94 | self.attr[attr_].append(k)
95 |
96 |
97 |
--------------------------------------------------------------------------------
/toolkit/datasets/nfs.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 |
12 | class NFSVideo(Video):
13 | """
14 | Args:
15 | name: video name
16 | root: dataset root
17 | video_dir: video directory
18 | init_rect: init rectangle
19 | img_names: image names
20 | gt_rect: groundtruth rectangle
21 | attr: attribute of video
22 | """
23 | def __init__(self, name, root, video_dir, init_rect, img_names,
24 | gt_rect, attr, load_img=False):
25 | super(NFSVideo, self).__init__(name, root, video_dir,
26 | init_rect, img_names, gt_rect, attr, load_img)
27 |
28 | # def load_tracker(self, path, tracker_names=None):
29 | # """
30 | # Args:
31 | # path(str): path to result
32 | # tracker_name(list): name of tracker
33 | # """
34 | # if not tracker_names:
35 | # tracker_names = [x.split('/')[-1] for x in glob(path)
36 | # if os.path.isdir(x)]
37 | # if isinstance(tracker_names, str):
38 | # tracker_names = [tracker_names]
39 | # # self.pred_trajs = {}
40 | # for name in tracker_names:
41 | # traj_file = os.path.join(path, name, self.name+'.txt')
42 | # if os.path.exists(traj_file):
43 | # with open(traj_file, 'r') as f :
44 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
45 | # for x in f.readlines()]
46 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
47 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
48 | # else:
49 |
50 | # self.tracker_names = list(self.pred_trajs.keys())
51 |
52 | class NFSDataset(Dataset):
53 | """
54 | Args:
55 | name: dataset name, should be "NFS30" or "NFS240"
56 | dataset_root, dataset root dir
57 | """
58 | def __init__(self, name, dataset_root, load_img=False):
59 | super(NFSDataset, self).__init__(name, dataset_root)
60 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
61 | meta_data = json.load(f)
62 |
63 | # load videos
64 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
65 | self.videos = {}
66 | for video in pbar:
67 | pbar.set_postfix_str(video)
68 | self.videos[video] = NFSVideo(video,
69 | dataset_root,
70 | meta_data[video]['video_dir'],
71 | meta_data[video]['init_rect'],
72 | meta_data[video]['img_names'],
73 | meta_data[video]['gt_rect'],
74 | None)
75 |
76 | self.attr = {}
77 | self.attr['ALL'] = list(self.videos.keys())
78 |
--------------------------------------------------------------------------------
/toolkit/datasets/otb.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | class OTBVideo(Video):
14 | """
15 | Args:
16 | name: video name
17 | root: dataset root
18 | video_dir: video directory
19 | init_rect: init rectangle
20 | img_names: image names
21 | gt_rect: groundtruth rectangle
22 | attr: attribute of video
23 | """
24 | def __init__(self, name, root, video_dir, init_rect, img_names,
25 | gt_rect, attr, load_img=False):
26 | super(OTBVideo, self).__init__(name, root, video_dir,
27 | init_rect, img_names, gt_rect, attr, load_img)
28 |
29 | def load_tracker(self, path, tracker_names=None, store=True):
30 | """
31 | Args:
32 | path(str): path to result
33 | tracker_name(list): name of tracker
34 | """
35 | if not tracker_names:
36 | tracker_names = [x.split('/')[-1] for x in glob(path)
37 | if os.path.isdir(x)]
38 | if isinstance(tracker_names, str):
39 | tracker_names = [tracker_names]
40 | for name in tracker_names:
41 | traj_file = os.path.join(path, name, self.name+'.txt')
42 | if not os.path.exists(traj_file):
43 | if self.name == 'FleetFace':
44 | txt_name = 'fleetface.txt'
45 | elif self.name == 'Jogging-1':
46 | txt_name = 'jogging_1.txt'
47 | elif self.name == 'Jogging-2':
48 | txt_name = 'jogging_2.txt'
49 | elif self.name == 'Skating2-1':
50 | txt_name = 'skating2_1.txt'
51 | elif self.name == 'Skating2-2':
52 | txt_name = 'skating2_2.txt'
53 | elif self.name == 'FaceOcc1':
54 | txt_name = 'faceocc1.txt'
55 | elif self.name == 'FaceOcc2':
56 | txt_name = 'faceocc2.txt'
57 | elif self.name == 'Human4-2':
58 | txt_name = 'human4_2.txt'
59 | else:
60 | txt_name = self.name[0].lower()+self.name[1:]+'.txt'
61 | traj_file = os.path.join(path, name, txt_name)
62 | if os.path.exists(traj_file):
63 | with open(traj_file, 'r') as f :
64 | pred_traj = [list(map(float, x.strip().split(',')))
65 | for x in f.readlines()]
66 | if len(pred_traj) != len(self.gt_traj):
67 | print(name, len(pred_traj), len(self.gt_traj), self.name)
68 | if store:
69 | self.pred_trajs[name] = pred_traj
70 | else:
71 | return pred_traj
72 | else:
73 | print(traj_file)
74 | self.tracker_names = list(self.pred_trajs.keys())
75 |
76 |
77 |
78 | class OTBDataset(Dataset):
79 | """
80 | Args:
81 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
82 | dataset_root: dataset root
83 | load_img: wether to load all imgs
84 | """
85 | def __init__(self, name, dataset_root, load_img=False):
86 | super(OTBDataset, self).__init__(name, dataset_root)
87 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
88 | meta_data = json.load(f)
89 |
90 | # load videos
91 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
92 | self.videos = {}
93 | for video in pbar:
94 | pbar.set_postfix_str(video)
95 | self.videos[video] = OTBVideo(video,
96 | dataset_root,
97 | meta_data[video]['video_dir'],
98 | meta_data[video]['init_rect'],
99 | meta_data[video]['img_names'],
100 | meta_data[video]['gt_rect'],
101 | meta_data[video]['attr'],
102 | load_img)
103 |
104 | # set attr
105 | attr = []
106 | for x in self.videos.values():
107 | attr += x.attr
108 | attr = set(attr)
109 | self.attr = {}
110 | self.attr['ALL'] = list(self.videos.keys())
111 | for x in attr:
112 | self.attr[x] = []
113 | for k, v in self.videos.items():
114 | for attr_ in v.attr:
115 | self.attr[attr_].append(k)
116 |
--------------------------------------------------------------------------------
/toolkit/datasets/realworld.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | def ca(dataset_root='./testing_dataset/real_world'):
14 |
15 | path=dataset_root
16 |
17 | name_list=os.listdir(path+'/data_seq')
18 | name_list.sort()
19 |
20 | b=[]
21 | for i in range(len(name_list)):
22 | b.append(name_list[i])
23 | c=[]
24 |
25 | for jj in range(len(name_list)):
26 | imgs=path+'/data_seq/'+str(name_list[jj])
27 | txt=path+'/anno/'+str(name_list[jj])+'.txt'
28 | bbox=[]
29 | f = open(txt) # 返回一个文件对象
30 | file= f.readlines()
31 | li=os.listdir(imgs)
32 | li.sort()
33 | for ii in range(len(file)):
34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii]
35 |
36 | if ',' in file[ii].strip('\n'):
37 | line = file[ii].strip('\n').split(',')
38 | else:
39 | line = file[ii].strip('\n').split()
40 |
41 | try:
42 | line[0]=int(line[0])
43 | except:
44 | line[0]=float(line[0])
45 | try:
46 | line[1]=int(line[1])
47 | except:
48 | line[1]=float(line[1])
49 | try:
50 | line[2]=int(line[2])
51 | except:
52 | line[2]=float(line[2])
53 | try:
54 | line[3]=int(line[3])
55 | except:
56 | line[3]=float(line[3])
57 | bbox.append(line)
58 |
59 | if len(bbox)!=len(li):
60 | print (jj)
61 | f.close()
62 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
63 |
64 | d=dict(zip(b,c))
65 |
66 | return d
67 |
68 | class UAVVideo(Video):
69 | """
70 | Args:
71 | name: video name
72 | root: dataset root
73 | video_dir: video directory
74 | init_rect: init rectangle
75 | img_names: image names
76 | gt_rect: groundtruth rectangle
77 | attr: attribute of video
78 | """
79 | def __init__(self, name, root, video_dir, init_rect, img_names,
80 | gt_rect, attr, load_img=False):
81 | super(UAVVideo, self).__init__(name, root, video_dir,
82 | init_rect, img_names, gt_rect, attr, load_img)
83 |
84 |
85 | class RealWorldDataset(Dataset):
86 | """
87 | Args:
88 | name: dataset name, should be 'UAV123', 'UAV20L'
89 | dataset_root: dataset root
90 | load_img: wether to load all imgs
91 | """
92 | def __init__(self, name, dataset_root, load_img=False):
93 | super(RealWorldDataset, self).__init__(name, dataset_root)
94 | meta_data = ca()
95 |
96 | # load videos
97 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
98 | self.videos = {}
99 | for video in pbar:
100 | pbar.set_postfix_str(video)
101 | self.videos[video] = UAVVideo(video,
102 | dataset_root,
103 | meta_data[video]['video_dir'],
104 | meta_data[video]['init_rect'],
105 | meta_data[video]['img_names'],
106 | meta_data[video]['gt_rect'],
107 | meta_data[video]['attr'])
108 |
109 | # set attr
110 | attr = []
111 | for x in self.videos.values():
112 | attr += x.attr
113 | attr = set(attr)
114 | self.attr = {}
115 | self.attr['ALL'] = list(self.videos.keys())
116 | for x in attr:
117 | self.attr[x] = []
118 | for k, v in self.videos.items():
119 | for attr_ in v.attr:
120 | self.attr[attr_].append(k)
121 |
122 |
--------------------------------------------------------------------------------
/toolkit/datasets/trackingnet.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from tqdm import tqdm
6 | from glob import glob
7 |
8 | from .dataset import Dataset
9 | from .video import Video
10 |
11 | class TrackingNetVideo(Video):
12 | """
13 | Args:
14 | name: video name
15 | root: dataset root
16 | video_dir: video directory
17 | init_rect: init rectangle
18 | img_names: image names
19 | gt_rect: groundtruth rectangle
20 | attr: attribute of video
21 | """
22 | def __init__(self, name, root, video_dir, init_rect, img_names,
23 | gt_rect, attr, load_img=False):
24 | super(TrackingNetVideo, self).__init__(name, root, video_dir,
25 | init_rect, img_names, gt_rect, attr, load_img)
26 |
27 | # def load_tracker(self, path, tracker_names=None):
28 | # """
29 | # Args:
30 | # path(str): path to result
31 | # tracker_name(list): name of tracker
32 | # """
33 | # if not tracker_names:
34 | # tracker_names = [x.split('/')[-1] for x in glob(path)
35 | # if os.path.isdir(x)]
36 | # if isinstance(tracker_names, str):
37 | # tracker_names = [tracker_names]
38 | # # self.pred_trajs = {}
39 | # for name in tracker_names:
40 | # traj_file = os.path.join(path, name, self.name+'.txt')
41 | # if os.path.exists(traj_file):
42 | # with open(traj_file, 'r') as f :
43 | # self.pred_trajs[name] = [list(map(float, x.strip().split(',')))
44 | # for x in f.readlines()]
45 | # if len(self.pred_trajs[name]) != len(self.gt_traj):
46 | # print(name, len(self.pred_trajs[name]), len(self.gt_traj), self.name)
47 | # else:
48 |
49 | # self.tracker_names = list(self.pred_trajs.keys())
50 |
51 | class TrackingNetDataset(Dataset):
52 | """
53 | Args:
54 | name: dataset name, should be "NFS30" or "NFS240"
55 | dataset_root, dataset root dir
56 | """
57 | def __init__(self, name, dataset_root, load_img=False):
58 | super(TrackingNetDataset, self).__init__(name, dataset_root)
59 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
60 | meta_data = json.load(f)
61 |
62 | # load videos
63 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
64 | self.videos = {}
65 | for video in pbar:
66 | pbar.set_postfix_str(video)
67 | self.videos[video] = TrackingNetVideo(video,
68 | dataset_root,
69 | meta_data[video]['video_dir'],
70 | meta_data[video]['init_rect'],
71 | meta_data[video]['img_names'],
72 | meta_data[video]['gt_rect'],
73 | None)
74 | self.attr = {}
75 | self.attr['ALL'] = list(self.videos.keys())
76 |
--------------------------------------------------------------------------------
/toolkit/datasets/uav.py:
--------------------------------------------------------------------------------
1 | import os
2 | import json
3 |
4 | from tqdm import tqdm
5 | from glob import glob
6 |
7 | from .dataset import Dataset
8 | from .video import Video
9 |
10 | class UAVVideo(Video):
11 | """
12 | Args:
13 | name: video name
14 | root: dataset root
15 | video_dir: video directory
16 | init_rect: init rectangle
17 | img_names: image names
18 | gt_rect: groundtruth rectangle
19 | attr: attribute of video
20 | """
21 | def __init__(self, name, root, video_dir, init_rect, img_names,
22 | gt_rect, attr, load_img=False):
23 | super(UAVVideo, self).__init__(name, root, video_dir,
24 | init_rect, img_names, gt_rect, attr, load_img)
25 |
26 |
27 | class UAVDataset(Dataset):
28 | """
29 | Args:
30 | name: dataset name, should be 'UAV123', 'UAV20L'
31 | dataset_root: dataset root
32 | load_img: wether to load all imgs
33 | """
34 | def __init__(self, name, dataset_root, load_img=False):
35 | super(UAVDataset, self).__init__(name, dataset_root)
36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
37 | meta_data = json.load(f)
38 |
39 | # load videos
40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
41 | self.videos = {}
42 | for video in pbar:
43 | pbar.set_postfix_str(video)
44 | self.videos[video] = UAVVideo(video,
45 | dataset_root,
46 | meta_data[video]['video_dir'],
47 | meta_data[video]['init_rect'],
48 | meta_data[video]['img_names'],
49 | meta_data[video]['gt_rect'],
50 | meta_data[video]['attr'])
51 |
52 | # set attr
53 | attr = []
54 | for x in self.videos.values():
55 | attr += x.attr
56 | attr = set(attr)
57 | self.attr = {}
58 | self.attr['ALL'] = list(self.videos.keys())
59 | for x in attr:
60 | self.attr[x] = []
61 | for k, v in self.videos.items():
62 | for attr_ in v.attr:
63 | self.attr[attr_].append(k)
64 |
65 |
--------------------------------------------------------------------------------
/toolkit/datasets/uav10fps.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 | def ca():
13 | path='./UAV123_10fps'
14 |
15 | name_list=os.listdir(path+'/data_seq')
16 | name_list.sort()
17 | a=123
18 | b=[]
19 | for i in range(a):
20 | b.append(name_list[i])
21 | c=[]
22 |
23 | for jj in range(a):
24 | imgs=path+'/data_seq/'+str(name_list[jj])
25 | txt=path+'/anno/'+str(name_list[jj])+'.txt'
26 | bbox=[]
27 | f = open(txt) # 返回一个文件对象
28 | file= f.readlines()
29 | li=os.listdir(imgs)
30 | li.sort()
31 | for ii in range(len(file)):
32 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii]
33 |
34 | line = file[ii].strip('\n').split(',')
35 |
36 | try:
37 | line[0]=int(line[0])
38 | except:
39 | line[0]=float(line[0])
40 | try:
41 | line[1]=int(line[1])
42 | except:
43 | line[1]=float(line[1])
44 | try:
45 | line[2]=int(line[2])
46 | except:
47 | line[2]=float(line[2])
48 | try:
49 | line[3]=int(line[3])
50 | except:
51 | line[3]=float(line[3])
52 | bbox.append(line)
53 |
54 | if len(bbox)!=len(li):
55 | print (jj)
56 | f.close()
57 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
58 |
59 | d=dict(zip(b,c))
60 |
61 | return d
62 |
63 | class UAVVideo(Video):
64 | """
65 | Args:
66 | name: video name
67 | root: dataset root
68 | video_dir: video directory
69 | init_rect: init rectangle
70 | img_names: image names
71 | gt_rect: groundtruth rectangle
72 | attr: attribute of video
73 | """
74 | def __init__(self, name, root, video_dir, init_rect, img_names,
75 | gt_rect, attr, load_img=False):
76 | super(UAVVideo, self).__init__(name, root, video_dir,
77 | init_rect, img_names, gt_rect, attr, load_img)
78 |
79 |
80 | class UAV10Dataset(Dataset):
81 | """
82 | Args:
83 | name: dataset name, should be 'UAV123', 'UAV20L'
84 | dataset_root: dataset root
85 | load_img: wether to load all imgs
86 | """
87 | def __init__(self, name, dataset_root, load_img=False):
88 | super(UAV10Dataset, self).__init__(name, dataset_root)
89 | meta_data = ca()
90 |
91 | # load videos
92 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
93 | self.videos = {}
94 | for video in pbar:
95 | pbar.set_postfix_str(video)
96 | self.videos[video] = UAVVideo(video,
97 | dataset_root,
98 | meta_data[video]['video_dir'],
99 | meta_data[video]['init_rect'],
100 | meta_data[video]['img_names'],
101 | meta_data[video]['gt_rect'],
102 | meta_data[video]['attr'])
103 |
104 | # set attr
105 | attr = []
106 | for x in self.videos.values():
107 | attr += x.attr
108 | attr = set(attr)
109 | self.attr = {}
110 | self.attr['ALL'] = list(self.videos.keys())
111 | for x in attr:
112 | self.attr[x] = []
113 | for k, v in self.videos.items():
114 | for attr_ in v.attr:
115 | self.attr[attr_].append(k)
116 |
117 |
--------------------------------------------------------------------------------
/toolkit/datasets/uav123.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | def ca(dataset_root='./testing_dataset/UAV123'):
14 |
15 | path=dataset_root
16 |
17 | name_list=os.listdir(path+'/data_seq')
18 | name_list.sort()
19 |
20 | b=[]
21 | for i in range(len(name_list)):
22 | b.append(name_list[i])
23 | c=[]
24 |
25 | for jj in range(len(name_list)):
26 | imgs=path+'/data_seq/'+str(name_list[jj])
27 | txt=path+'/anno/'+str(name_list[jj])+'.txt'
28 | bbox=[]
29 | f = open(txt) # 返回一个文件对象
30 | file= f.readlines()
31 | li=os.listdir(imgs)
32 | li.sort()
33 | for ii in range(len(file)):
34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii]
35 |
36 | line = file[ii].strip('\n').split(',')
37 |
38 | try:
39 | line[0]=int(line[0])
40 | except:
41 | line[0]=float(line[0])
42 | try:
43 | line[1]=int(line[1])
44 | except:
45 | line[1]=float(line[1])
46 | try:
47 | line[2]=int(line[2])
48 | except:
49 | line[2]=float(line[2])
50 | try:
51 | line[3]=int(line[3])
52 | except:
53 | line[3]=float(line[3])
54 | bbox.append(line)
55 |
56 | if len(bbox)!=len(li):
57 | print (jj)
58 | f.close()
59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
60 |
61 | d=dict(zip(b,c))
62 |
63 | return d
64 |
65 | class UAVVideo(Video):
66 | """
67 | Args:
68 | name: video name
69 | root: dataset root
70 | video_dir: video directory
71 | init_rect: init rectangle
72 | img_names: image names
73 | gt_rect: groundtruth rectangle
74 | attr: attribute of video
75 | """
76 | def __init__(self, name, root, video_dir, init_rect, img_names,
77 | gt_rect, attr, load_img=False):
78 | super(UAVVideo, self).__init__(name, root, video_dir,
79 | init_rect, img_names, gt_rect, attr, load_img)
80 |
81 |
82 | class UAV123Dataset(Dataset):
83 | """
84 | Args:
85 | name: dataset name, should be 'UAV123', 'UAV20L'
86 | dataset_root: dataset root
87 | load_img: wether to load all imgs
88 | """
89 | def __init__(self, name, dataset_root, load_img=False):
90 | super(UAV123Dataset, self).__init__(name, dataset_root)
91 | meta_data = ca()
92 |
93 | # load videos
94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
95 | self.videos = {}
96 | for video in pbar:
97 | pbar.set_postfix_str(video)
98 | self.videos[video] = UAVVideo(video,
99 | dataset_root,
100 | meta_data[video]['video_dir'],
101 | meta_data[video]['init_rect'],
102 | meta_data[video]['img_names'],
103 | meta_data[video]['gt_rect'],
104 | meta_data[video]['attr'])
105 |
106 | # set attr
107 | attr = []
108 | for x in self.videos.values():
109 | attr += x.attr
110 | attr = set(attr)
111 | self.attr = {}
112 | self.attr['ALL'] = list(self.videos.keys())
113 | for x in attr:
114 | self.attr[x] = []
115 | for k, v in self.videos.items():
116 | for attr_ in v.attr:
117 | self.attr[attr_].append(k)
118 |
119 |
--------------------------------------------------------------------------------
/toolkit/datasets/uav20l.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | def ca():
14 |
15 | path='./testing_dataset/UAV20L'
16 |
17 | name_list=os.listdir(path+'/data_seq/')
18 | name_list.sort()
19 |
20 | b=[]
21 | for i in range(len(name_list)):
22 | b.append(name_list[i])
23 | c=[]
24 |
25 | for jj in range(len(name_list)):
26 | imgs=path+'/data_seq/'+str(name_list[jj])
27 | txt=path+'/anno/'+str(name_list[jj])+'.txt'
28 | bbox=[]
29 | f = open(txt) # 返回一个文件对象
30 | file= f.readlines()
31 | li=os.listdir(imgs)
32 | li.sort()
33 | for ii in range(len(file)):
34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii]
35 |
36 | line = file[ii].strip('\n').split(',')
37 |
38 | try:
39 | line[0]=int(line[0])
40 | except:
41 | line[0]=float(line[0])
42 | try:
43 | line[1]=int(line[1])
44 | except:
45 | line[1]=float(line[1])
46 | try:
47 | line[2]=int(line[2])
48 | except:
49 | line[2]=float(line[2])
50 | try:
51 | line[3]=int(line[3])
52 | except:
53 | line[3]=float(line[3])
54 | bbox.append(line)
55 |
56 | if len(bbox)!=len(li):
57 | print (jj)
58 | f.close()
59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
60 |
61 | d=dict(zip(b,c))
62 |
63 | return d
64 |
65 | class UAVVideo(Video):
66 | """
67 | Args:
68 | name: video name
69 | root: dataset root
70 | video_dir: video directory
71 | init_rect: init rectangle
72 | img_names: image names
73 | gt_rect: groundtruth rectangle
74 | attr: attribute of video
75 | """
76 | def __init__(self, name, root, video_dir, init_rect, img_names,
77 | gt_rect, attr, load_img=False):
78 | super(UAVVideo, self).__init__(name, root, video_dir,
79 | init_rect, img_names, gt_rect, attr, load_img)
80 |
81 |
82 | class UAV20Dataset(Dataset):
83 | """
84 | Args:
85 | name: dataset name, should be 'UAV123', 'UAV20L'
86 | dataset_root: dataset root
87 | load_img: wether to load all imgs
88 | """
89 | def __init__(self, name, dataset_root, load_img=False):
90 | super(UAV20Dataset, self).__init__(name, dataset_root)
91 | meta_data = ca()
92 |
93 | # load videos
94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
95 | self.videos = {}
96 | for video in pbar:
97 | pbar.set_postfix_str(video)
98 | self.videos[video] = UAVVideo(video,
99 | dataset_root,
100 | meta_data[video]['video_dir'],
101 | meta_data[video]['init_rect'],
102 | meta_data[video]['img_names'],
103 | meta_data[video]['gt_rect'],
104 | meta_data[video]['attr'])
105 |
106 | # set attr
107 | attr = []
108 | for x in self.videos.values():
109 | attr += x.attr
110 | attr = set(attr)
111 | self.attr = {}
112 | self.attr['ALL'] = list(self.videos.keys())
113 | for x in attr:
114 | self.attr[x] = []
115 | for k, v in self.videos.items():
116 | for attr_ in v.attr:
117 | self.attr[attr_].append(k)
118 |
119 |
--------------------------------------------------------------------------------
/toolkit/datasets/uavdark.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | def ca():
14 |
15 | path='./UAVDark135'
16 |
17 | name_list=os.listdir(path+'/data_seq/')
18 | name_list.sort()
19 |
20 | b=[]
21 | for i in range(len(name_list)):
22 | b.append(name_list[i])
23 | c=[]
24 |
25 | for jj in range(len(name_list)):
26 | imgs=path+'/data_seq/'+str(name_list[jj])
27 | txt=path+'/anno/'+str(name_list[jj])+'.txt'
28 | bbox=[]
29 | f = open(txt) # 返回一个文件对象
30 | file= f.readlines()
31 | li=os.listdir(imgs)
32 | li.sort()
33 | for ii in range(len(file)):
34 | li[ii]='data_seq/'+name_list[jj]+'/'+li[ii]
35 |
36 | line = file[ii].strip('\n').split(',')
37 |
38 | try:
39 | line[0]=int(line[0])
40 | except:
41 | line[0]=float(line[0])
42 | try:
43 | line[1]=int(line[1])
44 | except:
45 | line[1]=float(line[1])
46 | try:
47 | line[2]=int(line[2])
48 | except:
49 | line[2]=float(line[2])
50 | try:
51 | line[3]=int(line[3])
52 | except:
53 | line[3]=float(line[3])
54 | bbox.append(line)
55 |
56 | if len(bbox)!=len(li):
57 | print (jj)
58 | f.close()
59 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
60 |
61 | d=dict(zip(b,c))
62 |
63 | return d
64 |
65 | class UAVVideo(Video):
66 | """
67 | Args:
68 | name: video name
69 | root: dataset root
70 | video_dir: video directory
71 | init_rect: init rectangle
72 | img_names: image names
73 | gt_rect: groundtruth rectangle
74 | attr: attribute of video
75 | """
76 | def __init__(self, name, root, video_dir, init_rect, img_names,
77 | gt_rect, attr, load_img=False):
78 | super(UAVVideo, self).__init__(name, root, video_dir,
79 | init_rect, img_names, gt_rect, attr, load_img)
80 |
81 |
82 | class UAVDARKDataset(Dataset):
83 | """
84 | Args:
85 | name: dataset name, should be 'UAV123', 'UAV20L'
86 | dataset_root: dataset root
87 | load_img: wether to load all imgs
88 | """
89 | def __init__(self, name, dataset_root, load_img=False):
90 | super(UAVDARKDataset, self).__init__(name, dataset_root)
91 | meta_data = ca()
92 |
93 | # load videos
94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
95 | self.videos = {}
96 | for video in pbar:
97 | pbar.set_postfix_str(video)
98 | self.videos[video] = UAVVideo(video,
99 | dataset_root,
100 | meta_data[video]['video_dir'],
101 | meta_data[video]['init_rect'],
102 | meta_data[video]['img_names'],
103 | meta_data[video]['gt_rect'],
104 | meta_data[video]['attr'])
105 |
106 | # set attr
107 | attr = []
108 | for x in self.videos.values():
109 | attr += x.attr
110 | attr = set(attr)
111 | self.attr = {}
112 | self.attr['ALL'] = list(self.videos.keys())
113 | for x in attr:
114 | self.attr[x] = []
115 | for k, v in self.videos.items():
116 | for attr_ in v.attr:
117 | self.attr[attr_].append(k)
118 |
119 |
--------------------------------------------------------------------------------
/toolkit/datasets/visdrone.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | class UVADTVideo(Video):
14 | """
15 | Args:
16 | name: video name
17 | root: dataset root
18 | video_dir: video directory
19 | init_rect: init rectangle
20 | img_names: image names
21 | gt_rect: groundtruth rectangle
22 | attr: attribute of video
23 | """
24 | def __init__(self, name, root, video_dir, init_rect, img_names,
25 | gt_rect, attr, load_img=False):
26 | super(UVADTVideo, self).__init__(name, root, video_dir,
27 | init_rect, img_names, gt_rect, attr, load_img)
28 |
29 | def ca():
30 |
31 |
32 | path='./VisDrone2018-SOT-test'
33 |
34 | name_list=os.listdir(path+'/sequences')
35 | name_list.sort()
36 |
37 | b=[]
38 | for i in range(len(name_list)):
39 | b.append(name_list[i])
40 | c=[]
41 |
42 | for jj in range(len(name_list)):
43 | imgs=path+'/sequences/'+str(name_list[jj])
44 | txt=path+'/annotations/'+str(name_list[jj])+'.txt'
45 | bbox=[]
46 | f = open(txt) # 返回一个文件对象
47 | file= f.readlines()
48 | li=os.listdir(imgs)
49 | li.sort()
50 | for ii in range(len(file)):
51 | li[ii]='sequences/'+name_list[jj]+'/'+li[ii]
52 |
53 | line = file[ii].strip('\n').split(',')
54 |
55 | try:
56 | line[0]=int(line[0])
57 | except:
58 | line[0]=float(line[0])
59 | try:
60 | line[1]=int(line[1])
61 | except:
62 | line[1]=float(line[1])
63 | try:
64 | line[2]=int(line[2])
65 | except:
66 | line[2]=float(line[2])
67 | try:
68 | line[3]=int(line[3])
69 | except:
70 | line[3]=float(line[3])
71 | bbox.append(line)
72 |
73 | if len(bbox)!=len(li):
74 | print (jj)
75 | f.close()
76 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
77 |
78 | d=dict(zip(b,c))
79 |
80 | return d
81 | class VISDRONEDataset(Dataset):
82 | """
83 | Args:
84 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
85 | dataset_root: dataset root
86 | load_img: wether to load all imgs
87 | """
88 | def __init__(self, name, dataset_root, load_img=False):
89 | super(VISDRONEDataset, self).__init__(name, dataset_root)
90 | # with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
91 | # meta_data = json.load(f)
92 | meta_data=ca()
93 | # load videos
94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
95 | self.videos = {}
96 | for video in pbar:
97 | pbar.set_postfix_str(video)
98 | self.videos[video] = UVADTVideo(video,
99 | dataset_root,
100 | meta_data[video]['video_dir'],
101 | meta_data[video]['init_rect'],
102 | meta_data[video]['img_names'],
103 | meta_data[video]['gt_rect'],
104 | meta_data[video]['attr'],
105 | load_img)
106 |
107 | # set attr
108 | attr = []
109 | for x in self.videos.values():
110 | attr += x.attr
111 | attr = set(attr)
112 | self.attr = {}
113 | self.attr['ALL'] = list(self.videos.keys())
114 | for x in attr:
115 | self.attr[x] = []
116 | for k, v in self.videos.items():
117 | for attr_ in v.attr:
118 | self.attr[attr_].append(k)
119 |
--------------------------------------------------------------------------------
/toolkit/datasets/visdrone1.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import numpy as np
4 |
5 | from PIL import Image
6 | from tqdm import tqdm
7 | from glob import glob
8 |
9 | from .dataset import Dataset
10 | from .video import Video
11 |
12 |
13 | class UVADTVideo(Video):
14 | """
15 | Args:
16 | name: video name
17 | root: dataset root
18 | video_dir: video directory
19 | init_rect: init rectangle
20 | img_names: image names
21 | gt_rect: groundtruth rectangle
22 | attr: attribute of video
23 | """
24 | def __init__(self, name, root, video_dir, init_rect, img_names,
25 | gt_rect, attr, load_img=False):
26 | super(UVADTVideo, self).__init__(name, root, video_dir,
27 | init_rect, img_names, gt_rect, attr, load_img)
28 |
29 | def ca():
30 |
31 |
32 | path='./VisDrone2018-SOT-test'
33 |
34 | name_list=os.listdir(path+'/sequences')
35 | name_list.sort()
36 |
37 | b=[]
38 | for i in range(len(name_list)):
39 | b.append(name_list[i])
40 | c=[]
41 |
42 | for jj in range(len(name_list)):
43 | imgs=path+'/sequences/'+str(name_list[jj])
44 | txt=path+'/annotations/'+str(name_list[jj])+'.txt'
45 | bbox=[]
46 | f = open(txt) # 返回一个文件对象
47 | file= f.readlines()
48 | li=os.listdir(imgs)
49 | li.sort()
50 | for ii in range(len(file)):
51 | li[ii]=name_list[jj]+'/'+li[ii]
52 |
53 | line = file[ii].strip('\n').split(',')
54 |
55 | try:
56 | line[0]=int(line[0])
57 | except:
58 | line[0]=float(line[0])
59 | try:
60 | line[1]=int(line[1])
61 | except:
62 | line[1]=float(line[1])
63 | try:
64 | line[2]=int(line[2])
65 | except:
66 | line[2]=float(line[2])
67 | try:
68 | line[3]=int(line[3])
69 | except:
70 | line[3]=float(line[3])
71 | bbox.append(line)
72 |
73 | if len(bbox)!=len(li):
74 | print (jj)
75 | f.close()
76 | c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
77 |
78 | d=dict(zip(b,c))
79 |
80 | return d
81 | class VISDRONED2018Dataset(Dataset):
82 | """
83 | Args:
84 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
85 | dataset_root: dataset root
86 | load_img: wether to load all imgs
87 | """
88 | def __init__(self, name, dataset_root, load_img=False):
89 | super(VISDRONED2018Dataset, self).__init__(name, dataset_root)
90 | # with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
91 | # meta_data = json.load(f)
92 | meta_data=ca()
93 | # load videos
94 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
95 | self.videos = {}
96 | for video in pbar:
97 | pbar.set_postfix_str(video)
98 | self.videos[video] = UVADTVideo(video,
99 | dataset_root,
100 | meta_data[video]['video_dir'],
101 | meta_data[video]['init_rect'],
102 | meta_data[video]['img_names'],
103 | meta_data[video]['gt_rect'],
104 | meta_data[video]['attr'],
105 | load_img)
106 |
107 | # set attr
108 | attr = []
109 | for x in self.videos.values():
110 | attr += x.attr
111 | attr = set(attr)
112 | self.attr = {}
113 | self.attr['ALL'] = list(self.videos.keys())
114 | for x in attr:
115 | self.attr[x] = []
116 | for k, v in self.videos.items():
117 | for attr_ in v.attr:
118 | self.attr[attr_].append(k)
119 |
--------------------------------------------------------------------------------
/toolkit/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .ar_benchmark import AccuracyRobustnessBenchmark
2 | from .eao_benchmark import EAOBenchmark
3 | from .ope_benchmark import OPEBenchmark
4 | from .f1_benchmark import F1Benchmark
5 |
--------------------------------------------------------------------------------
/toolkit/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # from . import region
2 | from .statistics import *
3 |
--------------------------------------------------------------------------------
/toolkit/utils/c_region.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "src/region.h":
2 | ctypedef enum region_type "RegionType":
3 | EMTPY
4 | SPECIAL
5 | RECTANGEL
6 | POLYGON
7 | MASK
8 |
9 | ctypedef struct region_bounds:
10 | float top
11 | float bottom
12 | float left
13 | float right
14 |
15 | ctypedef struct region_rectangle:
16 | float x
17 | float y
18 | float width
19 | float height
20 |
21 | # ctypedef struct region_mask:
22 | # int x
23 | # int y
24 | # int width
25 | # int height
26 | # char *data
27 |
28 | ctypedef struct region_polygon:
29 | int count
30 | float *x
31 | float *y
32 |
33 | ctypedef union region_container_data:
34 | region_rectangle rectangle
35 | region_polygon polygon
36 | # region_mask mask
37 | int special
38 |
39 | ctypedef struct region_container:
40 | region_type type
41 | region_container_data data
42 |
43 | # ctypedef struct region_overlap:
44 | # float overlap
45 | # float only1
46 | # float only2
47 |
48 | # region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds)
49 |
50 | float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds)
51 |
--------------------------------------------------------------------------------
/toolkit/utils/misc.py:
--------------------------------------------------------------------------------
1 | """
2 | @author
3 | """
4 | import numpy as np
5 |
6 | def determine_thresholds(confidence, resolution=100):
7 | """choose threshold according to confidence
8 |
9 | Args:
10 | confidence: list or numpy array or numpy array
11 | reolution: number of threshold to choose
12 |
13 | Restures:
14 | threshold: numpy array
15 | """
16 | if isinstance(confidence, list):
17 | confidence = np.array(confidence)
18 | confidence = confidence.flatten()
19 | confidence = confidence[~np.isnan(confidence)]
20 | confidence.sort()
21 |
22 | assert len(confidence) > resolution and resolution > 2
23 |
24 | thresholds = np.ones((resolution))
25 | thresholds[0] = - np.inf
26 | thresholds[-1] = np.inf
27 | delta = np.floor(len(confidence) / (resolution - 2))
28 | idxs = np.linspace(delta, len(confidence)-delta, resolution-2, dtype=np.int32)
29 | thresholds[1:-1] = confidence[idxs]
30 | return thresholds
31 |
--------------------------------------------------------------------------------
/toolkit/utils/src/region.h:
--------------------------------------------------------------------------------
1 | /* -*- Mode: C; indent-tabs-mode: nil; c-basic-offset: 4; tab-width: 4 -*- */
2 |
3 | #ifndef _REGION_H_
4 | #define _REGION_H_
5 |
6 | #ifdef TRAX_STATIC_DEFINE
7 | # define __TRAX_EXPORT
8 | #else
9 | # ifndef __TRAX_EXPORT
10 | # if defined(_MSC_VER)
11 | # ifdef trax_EXPORTS
12 | /* We are building this library */
13 | # define __TRAX_EXPORT __declspec(dllexport)
14 | # else
15 | /* We are using this library */
16 | # define __TRAX_EXPORT __declspec(dllimport)
17 | # endif
18 | # elif defined(__GNUC__)
19 | # ifdef trax_EXPORTS
20 | /* We are building this library */
21 | # define __TRAX_EXPORT __attribute__((visibility("default")))
22 | # else
23 | /* We are using this library */
24 | # define __TRAX_EXPORT __attribute__((visibility("default")))
25 | # endif
26 | # endif
27 | # endif
28 | #endif
29 |
30 | #ifndef MAX
31 | #define MAX(a,b) (((a) > (b)) ? (a) : (b))
32 | #endif
33 |
34 | #ifndef MIN
35 | #define MIN(a,b) (((a) < (b)) ? (a) : (b))
36 | #endif
37 |
38 | #define TRAX_DEFAULT_CODE 0
39 |
40 | #define REGION_LEGACY_RASTERIZATION 1
41 |
42 | #ifdef __cplusplus
43 | extern "C" {
44 | #endif
45 |
46 | typedef enum region_type {EMPTY, SPECIAL, RECTANGLE, POLYGON, MASK} region_type;
47 |
48 | typedef struct region_bounds {
49 |
50 | float top;
51 | float bottom;
52 | float left;
53 | float right;
54 |
55 | } region_bounds;
56 |
57 | typedef struct region_polygon {
58 |
59 | int count;
60 |
61 | float* x;
62 | float* y;
63 |
64 | } region_polygon;
65 |
66 | typedef struct region_mask {
67 |
68 | int x;
69 | int y;
70 |
71 | int width;
72 | int height;
73 |
74 | char* data;
75 |
76 | } region_mask;
77 |
78 | typedef struct region_rectangle {
79 |
80 | float x;
81 | float y;
82 | float width;
83 | float height;
84 |
85 | } region_rectangle;
86 |
87 | typedef struct region_container {
88 | enum region_type type;
89 | union {
90 | region_rectangle rectangle;
91 | region_polygon polygon;
92 | region_mask mask;
93 | int special;
94 | } data;
95 | } region_container;
96 |
97 | typedef struct region_overlap {
98 |
99 | float overlap;
100 | float only1;
101 | float only2;
102 |
103 | } region_overlap;
104 |
105 | extern const region_bounds region_no_bounds;
106 |
107 | __TRAX_EXPORT int region_set_flags(int mask);
108 |
109 | __TRAX_EXPORT int region_clear_flags(int mask);
110 |
111 | __TRAX_EXPORT region_overlap region_compute_overlap(const region_container* ra, const region_container* rb, region_bounds bounds);
112 |
113 | __TRAX_EXPORT float compute_polygon_overlap(const region_polygon* p1, const region_polygon* p2, float *only1, float *only2, region_bounds bounds);
114 |
115 | __TRAX_EXPORT region_bounds region_create_bounds(float left, float top, float right, float bottom);
116 |
117 | __TRAX_EXPORT region_bounds region_compute_bounds(const region_container* region);
118 |
119 | __TRAX_EXPORT int region_parse(const char* buffer, region_container** region);
120 |
121 | __TRAX_EXPORT char* region_string(region_container* region);
122 |
123 | __TRAX_EXPORT void region_print(FILE* out, region_container* region);
124 |
125 | __TRAX_EXPORT region_container* region_convert(const region_container* region, region_type type);
126 |
127 | __TRAX_EXPORT void region_release(region_container** region);
128 |
129 | __TRAX_EXPORT region_container* region_create_special(int code);
130 |
131 | __TRAX_EXPORT region_container* region_create_rectangle(float x, float y, float width, float height);
132 |
133 | __TRAX_EXPORT region_container* region_create_polygon(int count);
134 |
135 | __TRAX_EXPORT int region_contains_point(region_container* r, float x, float y);
136 |
137 | __TRAX_EXPORT void region_get_mask(region_container* r, char* mask, int width, int height);
138 |
139 | __TRAX_EXPORT void region_get_mask_offset(region_container* r, char* mask, int x, int y, int width, int height);
140 |
141 | #ifdef __cplusplus
142 | }
143 | #endif
144 |
145 | #endif
146 |
--------------------------------------------------------------------------------
/toolkit/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .draw_f1 import draw_f1
2 | from .draw_success_precision import draw_success_precision
3 | from .draw_eao import draw_eao
4 |
--------------------------------------------------------------------------------
/toolkit/visualization/draw_eao.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 | import pickle
4 |
5 | from matplotlib import rc
6 | from .draw_utils import COLOR, MARKER_STYLE
7 |
8 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
9 | rc('text', usetex=True)
10 |
11 | def draw_eao(result):
12 | fig = plt.figure()
13 | ax = fig.add_subplot(111, projection='polar')
14 | angles = np.linspace(0, 2*np.pi, 8, endpoint=True)
15 |
16 | attr2value = []
17 | for i, (tracker_name, ret) in enumerate(result.items()):
18 | value = list(ret.values())
19 | attr2value.append(value)
20 | value.append(value[0])
21 | attr2value = np.array(attr2value)
22 | max_value = np.max(attr2value, axis=0)
23 | min_value = np.min(attr2value, axis=0)
24 | for i, (tracker_name, ret) in enumerate(result.items()):
25 | value = list(ret.values())
26 | value.append(value[0])
27 | value = np.array(value)
28 | value *= (1 / max_value)
29 | plt.plot(angles, value, linestyle='-', color=COLOR[i], marker=MARKER_STYLE[i],
30 | label=tracker_name, linewidth=1.5, markersize=6)
31 |
32 | attrs = ["Overall", "Camera motion",
33 | "Illumination change","Motion Change",
34 | "Size change","Occlusion",
35 | "Unassigned"]
36 | attr_value = []
37 | for attr, maxv, minv in zip(attrs, max_value, min_value):
38 | attr_value.append(attr + "\n({:.3f},{:.3f})".format(minv, maxv))
39 | ax.set_thetagrids(angles[:-1] * 180/np.pi, attr_value)
40 | ax.spines['polar'].set_visible(False)
41 | ax.legend(loc='upper center', bbox_to_anchor=(0.5,-0.07), frameon=False, ncol=5)
42 | ax.grid(b=False)
43 | ax.set_ylim(0, 1.18)
44 | ax.set_yticks([])
45 | plt.show()
46 |
47 | if __name__ == '__main__':
48 | result = pickle.load(open("../../result.pkl", 'rb'))
49 | draw_eao(result)
50 |
--------------------------------------------------------------------------------
/toolkit/visualization/draw_f1.py:
--------------------------------------------------------------------------------
1 | import matplotlib.pyplot as plt
2 | import numpy as np
3 |
4 | from matplotlib import rc
5 | from .draw_utils import COLOR, LINE_STYLE
6 |
7 | rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']})
8 | rc('text', usetex=True)
9 |
10 | def draw_f1(result, bold_name=None):
11 | # drawing f1 contour
12 | fig, ax = plt.subplots()
13 | for f1 in np.arange(0.1, 1, 0.1):
14 | recall = np.arange(f1, 1+0.01, 0.01)
15 | precision = f1 * recall / (2 * recall - f1)
16 | ax.plot(recall, precision, color=[0,1,0], linestyle='-', linewidth=0.5)
17 | ax.plot(precision, recall, color=[0,1,0], linestyle='-', linewidth=0.5)
18 | ax.grid(b=True)
19 | ax.set_aspect(1)
20 | plt.xlabel('Recall')
21 | plt.ylabel('Precision')
22 | plt.axis([0, 1, 0, 1])
23 | plt.title(r'\textbf{VOT2018-LT Precision vs Recall}')
24 |
25 | # draw result line
26 | all_precision = {}
27 | all_recall = {}
28 | best_f1 = {}
29 | best_idx = {}
30 | for tracker_name, ret in result.items():
31 | precision = np.mean(list(ret['precision'].values()), axis=0)
32 | recall = np.mean(list(ret['recall'].values()), axis=0)
33 | f1 = 2 * precision * recall / (precision + recall)
34 | max_idx = np.argmax(f1)
35 | all_precision[tracker_name] = precision
36 | all_recall[tracker_name] = recall
37 | best_f1[tracker_name] = f1[max_idx]
38 | best_idx[tracker_name] = max_idx
39 |
40 | for idx, (tracker_name, best_f1) in \
41 | enumerate(sorted(best_f1.items(), key=lambda x:x[1], reverse=True)):
42 | if tracker_name == bold_name:
43 | label = r"\textbf{[%.3f] Ours}" % (best_f1)
44 | else:
45 | label = "[%.3f] " % (best_f1) + tracker_name
46 | recall = all_recall[tracker_name][:-1]
47 | precision = all_precision[tracker_name][:-1]
48 | ax.plot(recall, precision, color=COLOR[idx], linestyle='-',
49 | label=label)
50 | f1_idx = best_idx[tracker_name]
51 | ax.plot(recall[f1_idx], precision[f1_idx], color=[0,0,0], marker='o',
52 | markerfacecolor=COLOR[idx], markersize=5)
53 | ax.legend(loc='lower right', labelspacing=0.2)
54 | plt.xticks(np.arange(0, 1+0.1, 0.1))
55 | plt.yticks(np.arange(0, 1+0.1, 0.1))
56 | plt.show()
57 |
58 | if __name__ == '__main__':
59 | draw_f1(None)
60 |
--------------------------------------------------------------------------------
/toolkit/visualization/draw_utils.py:
--------------------------------------------------------------------------------
1 |
2 | COLOR = ((1, 0, 0),
3 | (0, 1, 0),
4 | (1, 0, 1),
5 | (1, 1, 0),
6 | (0 , 162/255, 232/255),
7 | (0.5, 0.5, 0.5),
8 | (0, 0, 1),
9 | (0, 1, 1),
10 | (136/255, 0 , 21/255),
11 | (255/255, 127/255, 39/255),
12 | (0, 0, 0))
13 |
14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-']
15 |
16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.']
17 |
--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 |
6 | import os
7 | import argparse
8 |
9 | import cv2
10 | import torch
11 | import numpy as np
12 | from glob import glob
13 |
14 | from pysot.core.config import cfg
15 | from pysot.models.model_builder import ModelBuilder
16 | from pysot.tracker.tracker_builder import build_tracker
17 |
18 | torch.set_num_threads(1)
19 |
20 | parser = argparse.ArgumentParser(description='tracking demo')
21 | parser.add_argument('--config', type=str, help='config file')
22 | parser.add_argument('--snapshot', type=str, help='model name')
23 | parser.add_argument('--video_name', default='', type=str,
24 | help='videos or image files')
25 | args = parser.parse_args()
26 |
27 |
28 | def get_frames(video_name):
29 | if not video_name:
30 | cap = cv2.VideoCapture(0)
31 | # warmup
32 | for i in range(5):
33 | cap.read()
34 | while True:
35 | ret, frame = cap.read()
36 | if ret:
37 | yield frame
38 | else:
39 | break
40 | elif video_name.endswith('avi') or \
41 | video_name.endswith('mp4'):
42 | cap = cv2.VideoCapture(args.video_name)
43 | while True:
44 | ret, frame = cap.read()
45 | if ret:
46 | yield frame
47 | else:
48 | break
49 | else:
50 | images = glob(os.path.join(video_name, '*.jp*'))
51 | images = sorted(images,
52 | key=lambda x: int(x.split('/')[-1].split('.')[0]))
53 | for img in images:
54 | frame = cv2.imread(img)
55 | yield frame
56 |
57 |
58 | def main():
59 | # load config
60 | cfg.merge_from_file(args.config)
61 | cfg.CUDA = torch.cuda.is_available() and cfg.CUDA
62 | device = torch.device('cuda' if cfg.CUDA else 'cpu')
63 |
64 | # create model
65 | model = ModelBuilder()
66 |
67 | # load model
68 | model.load_state_dict(torch.load(args.snapshot,
69 | map_location=lambda storage, loc: storage.cpu()))
70 | model.eval().to(device)
71 |
72 | # build tracker
73 | tracker = build_tracker(model)
74 |
75 | first_frame = True
76 | if args.video_name:
77 | video_name = args.video_name.split('/')[-1].split('.')[0]
78 | else:
79 | video_name = 'webcam'
80 | cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
81 | for frame in get_frames(args.video_name):
82 | if first_frame:
83 | try:
84 | init_rect = cv2.selectROI(video_name, frame, False, False)
85 | except:
86 | exit()
87 | tracker.init(frame, init_rect)
88 | first_frame = False
89 | else:
90 | outputs = tracker.track(frame)
91 | if 'polygon' in outputs:
92 | polygon = np.array(outputs['polygon']).astype(np.int32)
93 | cv2.polylines(frame, [polygon.reshape((-1, 1, 2))],
94 | True, (0, 255, 0), 3)
95 | mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255)
96 | mask = mask.astype(np.uint8)
97 | mask = np.stack([mask, mask*255, mask]).transpose(1, 2, 0)
98 | frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1)
99 | else:
100 | bbox = list(map(int, outputs['bbox']))
101 | cv2.rectangle(frame, (bbox[0], bbox[1]),
102 | (bbox[0]+bbox[2], bbox[1]+bbox[3]),
103 | (0, 255, 0), 3)
104 | cv2.imshow(video_name, frame)
105 | cv2.waitKey(40)
106 |
107 |
108 | if __name__ == '__main__':
109 | main()
110 |
--------------------------------------------------------------------------------
/tools/gen_sim_info.py:
--------------------------------------------------------------------------------
1 | import pickle
2 | import os
3 |
4 | pkl_path = 'Raw_Results_RPN_Mob/UAVDT/SiamRPN++_Mob'
5 | tracker = 'SiamRPN++_Mob'
6 | dataset = 'UAVDT'
7 | tgt_path = 'testing_dataset/sim_info'
8 |
9 | pkls = os.listdir(pkl_path)
10 | info_dict = {}
11 |
12 | for pkl in pkls:
13 | name = pkl[0:-4]
14 | with open(os.path.join(pkl_path, pkl), 'rb') as run_file:
15 | pkl_info = pickle.load(run_file)
16 | init_time = pkl_info['runtime'][0]
17 | running_time = sum(pkl_info['runtime'][1:])/len(pkl_info['runtime'][1:])
18 | info_dict[name] = {'init_time': init_time, 'running_time': running_time}
19 |
20 | with open(os.path.join(tgt_path, '{}_{}_sim.pkl'.format(dataset, tracker)), "wb") as f_sim:
21 | pickle.dump(info_dict, f_sim)
22 |
--------------------------------------------------------------------------------
/tools/rt_eva.py:
--------------------------------------------------------------------------------
1 | '''
2 | Streaming evaluation
3 | Given real-time tracking outputs,
4 | it pairs them with the ground truth.
5 |
6 | Note that this script does not need to run in real-time
7 | '''
8 |
9 | import argparse, pickle
10 | from os.path import join, isfile
11 | import numpy as np
12 | import sys
13 | import os
14 |
15 | # the line below is for running in both the current directory
16 | # and the repo's root directory
17 |
18 |
19 | def parse_args():
20 | parser = argparse.ArgumentParser()
21 | parser.add_argument('--raw_root', default='./results_rt_raw/UAV20/Raw_sim',type=str,
22 | help='raw result root')
23 | parser.add_argument('--tar_root', default='./results_rt/UAV20',type=str,
24 | help='target result root')
25 | parser.add_argument('--gtroot',default='./testing_dataset/UAV123_20L/anno', type=str)
26 | parser.add_argument('--fps', type=float, default=30)
27 | parser.add_argument('--eta', type=float, default=0, help='eta >= -1')
28 | parser.add_argument('--overwrite', action='store_true', default=False)
29 |
30 | args = parser.parse_args()
31 | return args
32 |
33 | def main():
34 | args = parse_args()
35 | trackers=os.listdir(args.raw_root)
36 | gt_path=args.gtroot
37 | if 'DTB70' in gt_path:
38 | seqs = os.listdir(gt_path)
39 | gt_list=[]
40 | for seq in seqs:
41 | gt_list.append(os.path.join(gt_path, seq, 'groundtruth_rect.txt'))
42 | else:
43 | gt_list=os.listdir(gt_path)
44 | gt_list = [os.path.join(gt_path, i) for i in os.listdir(gt_path) if i.endswith('.txt')]
45 | for tracker in trackers:
46 | ra_path=join(args.raw_root,tracker)
47 | ou_path=join(args.tar_root,tracker)
48 | if os.path.isdir(ou_path):
49 | continue
50 | mismatch = 0
51 | fps_a=[]
52 |
53 | for gt_idx, video in enumerate(gt_list):
54 | name=video.split('/')[-1][0:-4]
55 | name_rt=name
56 | # name=video
57 | if 'DTB70' in gt_path:
58 | name=video.split('/')[-2]
59 | name_rt=name
60 | if 'UAVDT' in gt_path:
61 | name_rt=name[0:-3]
62 | print('Pairing {:s} output with the ground truth ({:d}/{:d}): {:s}'.format(tracker,len(gt_list),gt_idx,name))
63 | results = pickle.load(open(join(ra_path, name_rt + '.pkl'), 'rb'))
64 | gtlen = len(open(join(video)).readlines())
65 | # use raw results when possible in case we change class subset during evaluation
66 | results_raw = results.get('results_raw', None)
67 | timestamps = results['timestamps']
68 | # assume the init box don't need time to process
69 | timestamps[0]=0
70 | input_fidx = results['input_fidx']
71 | run_time = results['runtime']
72 | fps_a.append(len(run_time)/sum(run_time))
73 | tidx_p1 = 0
74 | pred_bboxes=[]
75 |
76 | for idx in range(gtlen):
77 | # input frame time, i.e., [0, 0.03, 0.06, 0.09, ...]
78 | t = (idx - args.eta)/args.fps
79 | # which is the latest result?
80 | while tidx_p1 < len(timestamps) and timestamps[tidx_p1] <= t:
81 | tidx_p1 += 1
82 | # there exists at least one result for eva, i.e., the init box, 0
83 |
84 | # if tidx_p1 == 0:
85 | # # no output
86 | # miss += 1
87 | # bboxes, scores, labels = [], [], []
88 | # masks, tracks = None, None
89 |
90 | # the latest result given is tidx
91 | tidx = tidx_p1 - 1
92 |
93 | # compute gt idx and the fidx where the result comes to obtain mismatch
94 | ifidx = input_fidx[tidx]
95 | mismatch += idx - ifidx
96 | # print('GT time is {:3f}, latest tracker time is {:3f}, matching GT id {:3d} with precessed frame {:3d}'.format(t, timestamps[tidx],idx,ifidx))
97 | pred_bboxes.append(results_raw[tidx])
98 |
99 | if not os.path.isdir(ou_path):
100 | os.makedirs(ou_path)
101 | result_path = join(ou_path, '{}.txt'.format(name_rt))
102 | with open(result_path, 'w') as f:
103 | for x in pred_bboxes:
104 | f.write(','.join([str(i) for i in x])+'\n')
105 | fps_path = join(ou_path, '{}.txt'.format('Speed'))
106 | with open(fps_path, 'w') as f:
107 | f.write(str(sum(fps_a)/len(fps_a)))
108 |
109 | if __name__ == '__main__':
110 | main()
111 |
--------------------------------------------------------------------------------
/tools/rt_eva_pre.py:
--------------------------------------------------------------------------------
1 | '''
2 | Streaming evaluation
3 | Given real-time tracking outputs,
4 | it pairs them with the ground truth.
5 |
6 | Note that this script does not need to run in real-time
7 | '''
8 |
9 | import argparse, pickle
10 | from os.path import join, isfile
11 | import numpy as np
12 | import sys
13 | import os
14 | from tqdm import tqdm
15 |
16 | # the line below is for running in both the current directory
17 | # and the repo's root directory
18 |
19 |
20 | def parse_args():
21 | parser = argparse.ArgumentParser()
22 | parser.add_argument('--raw_root', default='./results_rt_raw/UAV20/Raw_pred_sim',type=str,
23 | help='raw result root')
24 | parser.add_argument('--tar_root', default='./results_rt/UAV20',type=str,
25 | help='target result root')
26 | parser.add_argument('--gtroot',default='./testing_dataset/UAV123_20L/anno', type=str)
27 | parser.add_argument('--fps', type=float, default=30)
28 | parser.add_argument('--eta', type=float, default=0, help='eta >= -1')
29 | parser.add_argument('--overwrite', action='store_true', default=False)
30 |
31 | args = parser.parse_args()
32 | return args
33 |
34 | def main():
35 | args = parse_args()
36 | trackers=os.listdir(args.raw_root)
37 | gt_path=args.gtroot
38 | if 'DTB70' in gt_path:
39 | seqs = os.listdir(gt_path)
40 | gt_list=[]
41 | for seq in seqs:
42 | gt_list.append(os.path.join(gt_path, seq, 'groundtruth_rect.txt'))
43 | else:
44 | gt_list=os.listdir(gt_path)
45 | gt_list = [os.path.join(gt_path, i) for i in os.listdir(gt_path) if i.endswith('.txt')]
46 | for tracker in tqdm(trackers):
47 | ra_path=join(args.raw_root,tracker)
48 | ou_path=join(args.tar_root,tracker)
49 | if os.path.isdir(ou_path):
50 | continue
51 | mismatch = 0
52 | fps_a=[]
53 |
54 | for gt_idx, video in enumerate(gt_list):
55 | name=video.split('/')[-1][0:-4]
56 | name_rt=name
57 | # name=video
58 | if 'DTB70' in gt_path:
59 | name=video.split('/')[-2]
60 | name_rt=name
61 | if 'UAVDT' in gt_path:
62 | name_rt=name[0:-3]
63 | # print('Pairing {:s} output with the ground truth ({:d}/{:d}): {:s}'.format(tracker,len(gt_list),gt_idx,name))
64 | results = pickle.load(open(join(ra_path, name_rt + '.pkl'), 'rb'))
65 | gtlen = len(open(join(video)).readlines())
66 | # use raw results when possible in case we change class subset during evaluation
67 | tra_results_raw = results.get('results_raw_t', None)
68 | tra_timestamps = results['timestamps_t']
69 | pre_results = results.get('results_raw_p', None)
70 | # assume the init box don't need time to process
71 | tra_timestamps[0]=0
72 |
73 | run_time = results['runtime_all']
74 | fps_a.append(len(run_time)/sum(run_time))
75 | tidx_p1 = 0
76 | pred_bboxes=[]
77 |
78 | for idx in range(gtlen):
79 | # input frame time, i.e., [0, 0.03, 0.06, 0.09, ...]
80 | t = (idx - args.eta)/args.fps
81 | # Can predictor give results?
82 | if ('boxes_eva' in pre_results.keys()) and (str(idx) in pre_results['boxes_eva'].keys()) and pre_results['time'][str(idx)]<=t:
83 | # print('Frame {} use predictor results'.format(idx))
84 | pred_bboxes.append(pre_results['boxes_eva'][str(idx)])
85 | continue
86 | else:
87 | # which is the tracker's latest result?
88 | while tidx_p1 < len(tra_timestamps) and tra_timestamps[tidx_p1] <= t:
89 | tidx_p1 += 1
90 | # there exists at least one result for eva, i.e., the init box, 0
91 |
92 | # the latest result given is tidx
93 | tidx = tidx_p1 - 1
94 |
95 | # print('GT time is {:3f}, latest tracker time is {:3f}, matching GT id {:3d} with tracker result'.format(t, tra_timestamps[tidx], idx))
96 | pred_bboxes.append(tra_results_raw[tidx])
97 |
98 | if not os.path.isdir(ou_path):
99 | os.makedirs(ou_path)
100 | result_path = join(ou_path, '{}.txt'.format(name_rt))
101 | with open(result_path, 'w') as f:
102 | for x in pred_bboxes:
103 | f.write(','.join([str(i) for i in x])+'\n')
104 | fps_path = join(ou_path, '{}.txt'.format('Speed'))
105 | with open(fps_path, 'w') as f:
106 | f.write(str(sum(fps_a)/len(fps_a)))
107 |
108 | if __name__ == '__main__':
109 | main()
110 |
--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | export CUDA_VISIBLE_DEVICES=0
2 | export PYTHONPATH=/ocean/projects/cis220061p/bli5/CVPR23/code/PVT_pp:$PYTHONPATH
3 |
4 | # RPN_Mob
5 | python ./tools/train.py --cfg 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lb_config.yaml'
6 | python ./tools/train.py --cfg 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_lbv_config.yaml'
7 | python ./tools/train.py --cfg 'experiments/siamrpn_mobilev2_l234_dwxcorr/pre_mv_config.yaml'
8 |
--------------------------------------------------------------------------------
/training_dataset/got10k/gen_json.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 | import json
3 | from os.path import join, exists
4 | import os
5 | import pandas as pd
6 | from tqdm import tqdm
7 |
8 | dataset_path = 'data'
9 | train_sets = ['GOT-10k_Train_split_01','GOT-10k_Train_split_02','GOT-10k_Train_split_03','GOT-10k_Train_split_04',
10 | 'GOT-10k_Train_split_05','GOT-10k_Train_split_06','GOT-10k_Train_split_07','GOT-10k_Train_split_08',
11 | 'GOT-10k_Train_split_09','GOT-10k_Train_split_10','GOT-10k_Train_split_11','GOT-10k_Train_split_12',
12 | 'GOT-10k_Train_split_13','GOT-10k_Train_split_14','GOT-10k_Train_split_15','GOT-10k_Train_split_16',
13 | 'GOT-10k_Train_split_17','GOT-10k_Train_split_18','GOT-10k_Train_split_19']
14 | val_set = ['val']
15 | d_sets = {'videos_val':val_set,'videos_train':train_sets}
16 |
17 |
18 | def parse_and_sched(dl_dir='.'):
19 | js = {}
20 | videos = os.listdir(dataset_path)
21 | for video in tqdm(videos):
22 | if video == 'list.txt':
23 | continue
24 | gt_path = join(dataset_path, video, 'groundtruth.txt')
25 | f = open(gt_path, 'r')
26 | groundtruth = f.readlines()
27 | f.close()
28 | for idx, gt_line in enumerate(groundtruth):
29 | gt_image = gt_line.strip().split(',')
30 | frame = '%08d' % (int(idx+1))
31 | obj = '%02d' % (int(0))
32 | bbox = [int(float(gt_image[0])), int(float(gt_image[1])),
33 | int(float(gt_image[0])) + int(float(gt_image[2])),
34 | int(float(gt_image[1])) + int(float(gt_image[3]))] # xmin,ymin,xmax,ymax
35 |
36 | x1 = bbox[0]
37 | y1 = bbox[1]
38 | w = bbox[2] - x1
39 | h = bbox[3] - y1
40 | if x1 < 0 or y1 < 0 or w <= 0 or h <= 0:
41 | break
42 |
43 | if video not in js:
44 | js[video] = {}
45 | if obj not in js[video]:
46 | js[video][obj] = {}
47 | js[video][obj][frame] = bbox
48 | json.dump(js, open('train.json', 'w'), indent=4, sort_keys=True)
49 |
50 | # print(d_set+': All videos downloaded' )
51 |
52 |
53 | if __name__ == '__main__':
54 | parse_and_sched()
--------------------------------------------------------------------------------
/training_dataset/lasot/gen_json.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import unicode_literals
3 | import json
4 | from os.path import join, exists
5 | import os
6 | import pandas as pd
7 | dataset_path = './data'
8 |
9 | def parse_and_sched(dl_dir='.'):
10 | # For each of the two datasets
11 | f = open('./test_id.txt', 'r')
12 | videos = f.readlines()
13 | f.close()
14 | n_videos = len(videos)
15 | js = {}
16 | for idx,video in enumerate(videos):
17 | print('{}/{}'.format(idx,n_videos))
18 | video = video.strip()
19 | class_name = video.split('-')[0]
20 | # class_path = join(dataset_path, class_name)
21 | gt_path = join(dataset_path, video, 'groundtruth.txt')
22 | f = open(gt_path, 'r')
23 | groundtruth = f.readlines()
24 | f.close()
25 | video = video + '/img'
26 | for idx, gt_line in enumerate(groundtruth):
27 | gt_image = gt_line.strip().split(',')
28 | frame = '%08d' % (int(idx+1))
29 | obj = '%02d' % (int(0))
30 | bbox = [int(float(gt_image[0])), int(float(gt_image[1])),
31 | int(float(gt_image[0])) + int(float(gt_image[2])),
32 | int(float(gt_image[1])) + int(float(gt_image[3]))] # xmin,ymin,xmax,ymax
33 | x1 = bbox[0]
34 | y1 = bbox[1]
35 | w = bbox[2] - x1
36 | h = bbox[3] - y1
37 | if x1 < 0 or y1 < 0 or w <= 0 or h <= 0:
38 | break
39 |
40 | if video not in js:
41 | js[video] = {}
42 | if obj not in js[video]:
43 | js[video][obj] = {}
44 | js[video][obj][frame] = bbox
45 | json.dump(js, open('train.json', 'w'), indent=4, sort_keys=True)
46 | js = {}
47 | json.dump(js, open('val.json', 'w'), indent=4, sort_keys=True)
48 | print('done')
49 |
50 |
51 | if __name__ == '__main__':
52 | parse_and_sched()
--------------------------------------------------------------------------------
/training_dataset/lasot/gen_txt.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | seq_path = 'data'
4 | seq_name = os.listdir(seq_path)
5 | with open('test_id.txt', 'w') as f:
6 | for seq in seq_name:
7 | f.write(seq+'\n')
--------------------------------------------------------------------------------
/training_dataset/vid/gen_json.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import numpy as np
5 |
6 | print('load json (raw vid info), please wait 20 seconds~')
7 | vid = json.load(open('vid.json', 'r'))
8 |
9 |
10 | def check_size(frame_sz, bbox):
11 | min_ratio = 0.1
12 | max_ratio = 0.75
13 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz)))
14 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio)
15 | return ok
16 |
17 |
18 | def check_borders(frame_sz, bbox):
19 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2
20 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \
21 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \
22 | ((frame_sz[1] - bbox[3]) > dist_from_border)
23 | return ok
24 |
25 |
26 | snippets = dict()
27 | n_snippets = 0
28 | n_videos = 0
29 | for subset in vid:
30 | for video in subset:
31 | n_videos += 1
32 | frames = video['frame']
33 | id_set = []
34 | id_frames = [[]] * 60 # at most 60 objects
35 | for f, frame in enumerate(frames):
36 | objs = frame['objs']
37 | frame_sz = frame['frame_sz']
38 | for obj in objs:
39 | trackid = obj['trackid']
40 | occluded = obj['occ']
41 | bbox = obj['bbox']
42 |
43 | if trackid not in id_set:
44 | id_set.append(trackid)
45 | id_frames[trackid] = []
46 | id_frames[trackid].append(f)
47 | if len(id_set) > 0:
48 | snippets[video['base_path']] = dict()
49 | for selected in id_set:
50 | frame_ids = sorted(id_frames[selected])
51 | sequences = np.split(frame_ids, np.array(np.where(np.diff(frame_ids) > 1)[0]) + 1)
52 | sequences = [s for s in sequences if len(s) > 1] # remove isolated frame.
53 | for seq in sequences:
54 | snippet = dict()
55 | for frame_id in seq:
56 | frame = frames[frame_id]
57 | for obj in frame['objs']:
58 | if obj['trackid'] == selected:
59 | o = obj
60 | continue
61 | snippet[frame['img_path'].split('.')[0]] = o['bbox']
62 | snippets[video['base_path']]['{:02d}'.format(selected)] = snippet
63 | n_snippets += 1
64 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets))
65 |
66 | train = {k:v for (k,v) in snippets.items() if 'train' in k}
67 | val = {k:v for (k,v) in snippets.items() if 'val' in k}
68 |
69 | json.dump(train, open('train.json', 'w'), indent=4, sort_keys=True)
70 | json.dump(val, open('val.json', 'w'), indent=4, sort_keys=True)
71 | print('done!')
72 |
--------------------------------------------------------------------------------
/training_dataset/vid/parse_vid.py:
--------------------------------------------------------------------------------
1 | from os.path import join
2 | from os import listdir
3 | import json
4 | import glob
5 | import xml.etree.ElementTree as ET
6 |
7 | VID_base_path = './ILSVRC2015'
8 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/')
9 | img_base_path = join(VID_base_path, 'Data/VID/train/')
10 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'})
11 |
12 | vid = []
13 | for sub_set in sub_sets:
14 | sub_set_base_path = join(ann_base_path, sub_set)
15 | videos = sorted(listdir(sub_set_base_path))
16 | s = []
17 | for vi, video in enumerate(videos):
18 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos)))
19 | v = dict()
20 | v['base_path'] = join(sub_set, video)
21 | v['frame'] = []
22 | video_base_path = join(sub_set_base_path, video)
23 | xmls = sorted(glob.glob(join(video_base_path, '*.xml')))
24 | for xml in xmls:
25 | f = dict()
26 | xmltree = ET.parse(xml)
27 | size = xmltree.findall('size')[0]
28 | frame_sz = [int(it.text) for it in size]
29 | objects = xmltree.findall('object')
30 | objs = []
31 | for object_iter in objects:
32 | trackid = int(object_iter.find('trackid').text)
33 | name = (object_iter.find('name')).text
34 | bndbox = object_iter.find('bndbox')
35 | occluded = int(object_iter.find('occluded').text)
36 | o = dict()
37 | o['c'] = name
38 | o['bbox'] = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text),
39 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)]
40 | o['trackid'] = trackid
41 | o['occ'] = occluded
42 | objs.append(o)
43 | f['frame_sz'] = frame_sz
44 | f['img_path'] = xml.split('/')[-1].replace('xml', 'JPEG')
45 | f['objs'] = objs
46 | v['frame'].append(f)
47 | s.append(v)
48 | vid.append(s)
49 | print('save json (raw vid info), please wait 1 min~')
50 | json.dump(vid, open('vid.json', 'w'), indent=4, sort_keys=True)
51 | print('done!')
52 |
--------------------------------------------------------------------------------
/vot_iter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Jaraxxus-Me/PVT_pp/d71cad0ed0bfede979469088704f6ba1d5a6f38c/vot_iter/__init__.py
--------------------------------------------------------------------------------
/vot_iter/tracker_SiamRPNpp.m:
--------------------------------------------------------------------------------
1 |
2 | % error('Tracker not configured! Please edit the tracker_test.m file.'); % Remove this line after proper configuration
3 |
4 | % The human readable label for the tracker, used to identify the tracker in reports
5 | % If not set, it will be set to the same value as the identifier.
6 | % It does not have to be unique, but it is best that it is.
7 | tracker_label = ['SiamRPNpp'];
8 |
9 | % For Python implementations we have created a handy function that generates the appropritate
10 | % command that will run the python executable and execute the given script that includes your
11 | % tracker implementation.
12 | %
13 | % Please customize the line below by substituting the first argument with the name of the
14 | % script of your tracker (not the .py file but just the name of the script) and also provide the
15 | % path (or multiple paths) where the tracker sources % are found as the elements of the cell
16 | % array (second argument).
17 | setenv('MKL_NUM_THREADS','1');
18 | pysot_root = 'path/to/pysot';
19 | track_build_path = 'path/to/track/build';
20 | tracker_command = generate_python_command('vot_iter.vot_iter', {pysot_root; [track_build_path '/python/lib']})
21 |
22 | tracker_interpreter = 'python';
23 |
24 | tracker_linkpath = {track_build_path};
25 |
26 | % tracker_linkpath = {}; % A cell array of custom library directories used by the tracker executable (optional)
27 |
28 |
--------------------------------------------------------------------------------
/vot_iter/vot.py:
--------------------------------------------------------------------------------
1 | """
2 | \file vot.py
3 | """
4 |
5 | import sys
6 | import copy
7 | import collections
8 |
9 | try:
10 | import trax
11 | except ImportError:
12 | raise Exception('TraX support not found. Please add trax module to Python path.')
13 |
14 | Rectangle = collections.namedtuple('Rectangle', ['x', 'y', 'width', 'height'])
15 | Point = collections.namedtuple('Point', ['x', 'y'])
16 | Polygon = collections.namedtuple('Polygon', ['points'])
17 |
18 | class VOT(object):
19 | """ Base class for Python VOT integration """
20 | def __init__(self, region_format, channels=None):
21 | """ Constructor
22 |
23 | Args:
24 | region_format: Region format options
25 | """
26 | assert(region_format in [trax.Region.RECTANGLE, trax.Region.POLYGON])
27 |
28 | if channels is None:
29 | channels = ['color']
30 | elif channels == 'rgbd':
31 | channels = ['color', 'depth']
32 | elif channels == 'rgbt':
33 | channels = ['color', 'ir']
34 | elif channels == 'ir':
35 | channels = ['ir']
36 | else:
37 | raise Exception('Illegal configuration {}.'.format(channels))
38 |
39 | self._trax = trax.Server([region_format], [trax.Image.PATH], channels)
40 |
41 | request = self._trax.wait()
42 | assert(request.type == 'initialize')
43 | if isinstance(request.region, trax.Polygon):
44 | self._region = Polygon([Point(x[0], x[1]) for x in request.region])
45 | else:
46 | self._region = Rectangle(*request.region.bounds())
47 | self._image = [x.path() for k, x in request.image.items()]
48 | if len(self._image) == 1:
49 | self._image = self._image[0]
50 |
51 | self._trax.status(request.region)
52 |
53 | def region(self):
54 | """
55 | Send configuration message to the client and receive the initialization
56 | region and the path of the first image
57 |
58 | Returns:
59 | initialization region
60 | """
61 |
62 | return self._region
63 |
64 | def report(self, region, confidence = None):
65 | """
66 | Report the tracking results to the client
67 |
68 | Arguments:
69 | region: region for the frame
70 | """
71 | assert(isinstance(region, Rectangle) or isinstance(region, Polygon))
72 | if isinstance(region, Polygon):
73 | tregion = trax.Polygon.create([(x.x, x.y) for x in region.points])
74 | else:
75 | tregion = trax.Rectangle.create(region.x, region.y, region.width, region.height)
76 | properties = {}
77 | if not confidence is None:
78 | properties['confidence'] = confidence
79 | self._trax.status(tregion, properties)
80 |
81 | def frame(self):
82 | """
83 | Get a frame (image path) from client
84 |
85 | Returns:
86 | absolute path of the image
87 | """
88 | if hasattr(self, "_image"):
89 | image = self._image
90 | del self._image
91 | return image
92 |
93 | request = self._trax.wait()
94 |
95 | if request.type == 'frame':
96 | image = [x.path() for k, x in request.image.items()]
97 | if len(image) == 1:
98 | return image[0]
99 | return image
100 | else:
101 | return None
102 |
103 |
104 | def quit(self):
105 | if hasattr(self, '_trax'):
106 | self._trax.quit()
107 |
108 | def __del__(self):
109 | self.quit()
110 |
111 |
--------------------------------------------------------------------------------
/vot_iter/vot_iter.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import cv2
3 | import torch
4 | import numpy as np
5 | import os
6 | from os.path import join
7 |
8 | from pysot.core.config import cfg
9 | from pysot.models.model_builder import ModelBuilder
10 | from pysot.tracker.tracker_builder import build_tracker
11 | from pysot.utils.bbox import get_axis_aligned_bbox
12 | from pysot.utils.model_load import load_pretrain
13 | from toolkit.datasets import DatasetFactory
14 | from toolkit.utils.region import vot_overlap, vot_float2str
15 |
16 | from . import vot
17 | from .vot import Rectangle, Polygon, Point
18 |
19 |
20 | # modify root
21 |
22 | cfg_root = "path/to/expr"
23 | model_file = join(cfg_root, 'model.pth')
24 | cfg_file = join(cfg_root, 'config.yaml')
25 |
26 | def warmup(model):
27 | for i in range(10):
28 | model.template(torch.FloatTensor(1,3,127,127).cuda())
29 |
30 | def setup_tracker():
31 | cfg.merge_from_file(cfg_file)
32 |
33 | model = ModelBuilder()
34 | model = load_pretrain(model, model_file).cuda().eval()
35 |
36 | tracker = build_tracker(model)
37 | warmup(model)
38 | return tracker
39 |
40 |
41 | tracker = setup_tracker()
42 |
43 | handle = vot.VOT("polygon")
44 | region = handle.region()
45 | try:
46 | region = np.array([region[0][0][0], region[0][0][1], region[0][1][0], region[0][1][1],
47 | region[0][2][0], region[0][2][1], region[0][3][0], region[0][3][1]])
48 | except:
49 | region = np.array(region)
50 |
51 | cx, cy, w, h = get_axis_aligned_bbox(region)
52 |
53 | image_file = handle.frame()
54 | if not image_file:
55 | sys.exit(0)
56 |
57 | im = cv2.imread(image_file) # HxWxC
58 | # init
59 | target_pos, target_sz = np.array([cx, cy]), np.array([w, h])
60 | gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
61 | tracker.init(im, gt_bbox_)
62 |
63 | while True:
64 | img_file = handle.frame()
65 | if not img_file:
66 | break
67 | im = cv2.imread(img_file)
68 | outputs = tracker.track(im)
69 | pred_bbox = outputs['bbox']
70 | result = Rectangle(*pred_bbox)
71 | score = outputs['best_score']
72 | if cfg.MASK.MASK:
73 | pred_bbox = outputs['polygon']
74 | result = Polygon(Point(x[0], x[1]) for x in pred_bbox)
75 |
76 | handle.report(result, score)
77 |
--------------------------------------------------------------------------------