├── .gitignore ├── LICENSE ├── README.md ├── demo.py ├── experiments └── uvltrack │ ├── baseline_base.yaml │ ├── baseline_base_grounding.yaml │ └── baseline_large.yaml ├── fig ├── arch.png └── results.png ├── install.sh ├── lib ├── __init__.py ├── config │ ├── __init__.py │ └── uvltrack │ │ └── config.py ├── models │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ ├── bert_backbone.py │ │ ├── block.py │ │ ├── mae_vit.py │ │ ├── modality_unified_feature_extractor.py │ │ └── utils.py │ ├── heads │ │ ├── __init__.py │ │ ├── modality_adaptive_box_head.py │ │ └── utils.py │ └── uvltrack │ │ ├── __init__.py │ │ ├── utils.py │ │ └── uvltrack.py ├── registry.py ├── test │ ├── __init__.py │ ├── analysis │ │ ├── __init__.py │ │ ├── extract_results.py │ │ └── plot_results.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── avistdataset.py │ │ ├── data.py │ │ ├── datasets.py │ │ ├── environment.py │ │ ├── got10kdataset.py │ │ ├── itbdataset.py │ │ ├── lasot_lmdbdataset.py │ │ ├── lasotdataset.py │ │ ├── lasotextdataset.py │ │ ├── local.py │ │ ├── nfsdataset.py │ │ ├── otb99dataset.py │ │ ├── otbdataset.py │ │ ├── running.py │ │ ├── tc128cedataset.py │ │ ├── tc128dataset.py │ │ ├── tnl2kdataset.py │ │ ├── tracker.py │ │ ├── trackingnetdataset.py │ │ ├── uavdataset.py │ │ └── utils.py │ ├── parameter │ │ ├── __init__.py │ │ └── uvltrack.py │ ├── tracker │ │ ├── __init__.py │ │ ├── basetracker.py │ │ ├── tracker_utils.py │ │ └── uvltrack.py │ └── utils │ │ ├── __init__.py │ │ ├── _init_paths.py │ │ ├── augmentation.py │ │ ├── hann.py │ │ ├── load_text.py │ │ ├── params.py │ │ ├── transform_got10k.py │ │ └── transform_trackingnet.py ├── train │ ├── __init__.py │ ├── _init_paths.py │ ├── actors │ │ ├── __init__.py │ │ ├── base_actor.py │ │ └── uvltrack.py │ ├── admin │ │ ├── __init__.py │ │ ├── environment.py │ │ ├── local.py │ │ ├── multigpu.py │ │ ├── settings.py │ │ ├── stats.py │ │ └── tensorboard.py │ ├── base_functions.py │ ├── data │ │ ├── __init__.py │ │ ├── bounding_box_utils.py │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ ├── image_loader.py │ │ ├── loader.py │ │ ├── processing.py │ │ ├── processing_utils.py │ │ ├── processing_utils_grounding.py │ │ ├── processing_utils_grounding2.py │ │ ├── sampler.py │ │ ├── transforms.py │ │ └── utils.py │ ├── data_specs │ │ ├── README.md │ │ ├── got10k_train_full_split.txt │ │ ├── got10k_train_split.txt │ │ ├── got10k_val_split.txt │ │ ├── got10k_vot_exclude.txt │ │ ├── got10k_vot_train_split.txt │ │ ├── got10k_vot_val_split.txt │ │ ├── lasot_test_split.txt │ │ ├── lasot_train_split.txt │ │ └── trackingnet_classmap.txt │ ├── dataset │ │ ├── COCO_tool.py │ │ ├── __init__.py │ │ ├── base_image_dataset.py │ │ ├── base_video_dataset.py │ │ ├── coco.py │ │ ├── coco_seq.py │ │ ├── coco_seq_lmdb.py │ │ ├── got10k.py │ │ ├── got10k_lmdb.py │ │ ├── imagenetvid.py │ │ ├── imagenetvid_lmdb.py │ │ ├── lasot.py │ │ ├── lasot_lmdb.py │ │ ├── lasot_test.py │ │ ├── lasotext.py │ │ ├── object365.py │ │ ├── otb99.py │ │ ├── refcoco_seq.py │ │ ├── refer.py │ │ ├── tnl2k.py │ │ ├── tnl2k_test.py │ │ ├── tracking_net.py │ │ ├── tracking_net_lmdb.py │ │ ├── utils.py │ │ ├── visualgenome.py │ │ └── webuav.py │ ├── run_training.py │ ├── train_script_mutrack.py │ └── trainers │ │ ├── __init__.py │ │ ├── base_trainer.py │ │ └── ltr_trainer.py └── utils │ ├── PreciseRoIPooling │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── _assets │ │ └── prroi_visualization.png │ ├── pytorch │ │ ├── prroi_pool │ │ │ ├── .gitignore │ │ │ ├── __init__.py │ │ │ ├── functional.py │ │ │ ├── prroi_pool.py │ │ │ └── src │ │ │ │ ├── prroi_pooling_gpu.c │ │ │ │ ├── prroi_pooling_gpu.h │ │ │ │ ├── prroi_pooling_gpu_impl.cu │ │ │ │ └── prroi_pooling_gpu_impl.cuh │ │ └── tests │ │ │ └── test_prroi_pooling2d.py │ ├── src │ │ ├── prroi_pooling_gpu_impl.cu │ │ └── prroi_pooling_gpu_impl.cuh │ └── tensorflow │ │ ├── prroi_pool │ │ ├── CMakeLists.txt │ │ ├── __init__.py │ │ ├── precise_roi_pooling_ops.py │ │ └── src │ │ │ ├── kernels │ │ │ ├── build_cuda.py │ │ │ ├── external │ │ │ │ ├── prroi_pooling_gpu_impl.cu │ │ │ │ └── prroi_pooling_gpu_impl.cuh │ │ │ ├── precise_roi_pooling.h │ │ │ ├── precise_roi_pooling_kernels.cc │ │ │ └── precise_roi_pooling_kernels.cu.cc │ │ │ └── ops │ │ │ └── precise_roi_pooling_ops.cc │ │ └── tests │ │ ├── precise_roi_pooling_ops_test.py │ │ └── test_binaries │ │ └── 2_2_0.5 │ │ ├── features.npy │ │ ├── gradients0.npy │ │ ├── gradients1.npy │ │ ├── real_outputs.npy │ │ └── rois.npy │ ├── __init__.py │ ├── box_ops.py │ ├── classification_loss.py │ ├── lmdb_utils.py │ ├── merge.py │ ├── misc.py │ ├── scheduler.py │ └── tensor.py ├── scripts ├── demo.sh ├── new_tracker.sh ├── test.sh └── train.sh ├── tracking ├── _init_paths.py ├── analysis_results.py ├── create_default_local_file.py ├── pre_read_datasets.py ├── profile_model.py ├── test.py └── train.py └── uvltrack_env.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | *.idea 2 | *~ 3 | *__pycache__* 4 | *.pyc 5 | *.pytest_cache 6 | *.csv 7 | /checkpoints 8 | /data 9 | /debug 10 | /logs 11 | /msic_logs 12 | /tensorboard 13 | /test 14 | release.zip 15 | /terminal_logs/* 16 | /workspace 17 | /pretrained 18 | /pretrain 19 | /compare 20 | /mutrack.zip -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 University of Science and Technology of China 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import cv2 3 | import argparse 4 | 5 | 6 | parser = argparse.ArgumentParser(description='Run the tracker on your webcam.') 7 | parser.add_argument('--tracker_name', type=str, help='Name of tracking method.') 8 | parser.add_argument('--tracker_param', type=str, help='Name of parameter file.') 9 | parser.add_argument('--input_video', type=str, help='Path to input video.') 10 | parser.add_argument('--output_video', type=str, help='Path to output video.') 11 | parser.add_argument('--init_bbox', nargs="*", type=int, help='Initial target bounding box') 12 | parser.add_argument('--language', type=str, help='Language description of target') 13 | args = parser.parse_args() 14 | 15 | 16 | def _read_image(image_file): 17 | if isinstance(image_file, str): 18 | im = cv2.imread(image_file) 19 | return cv2.cvtColor(im, cv2.COLOR_BGR2RGB) 20 | 21 | input_video = args.input_video 22 | output_video = args.output_video 23 | parameter_name = args.tracker_param 24 | 25 | init_info = {} 26 | # specify target reference 27 | init_info['language'] = args.language # for NL and NLBBOX mode 28 | init_info['init_bbox'] = args.init_bbox # for BBOX and NLBBOX mode 29 | 30 | param_module = importlib.import_module(f'lib.test.parameter.{args.tracker_name}') 31 | params = param_module.parameters(parameter_name, None) 32 | params.debug = False 33 | 34 | tracker_class = importlib.import_module(f'lib.test.tracker.{args.tracker_name}').get_tracker_class() 35 | tracker = tracker_class(params, '') 36 | 37 | output = {'target_bbox': [], 38 | 'time': []} 39 | if tracker.params.save_all_boxes: 40 | output['all_boxes'] = [] 41 | output['all_scores'] = [] 42 | 43 | def _store_outputs(tracker_out: dict, defaults=None): 44 | defaults = {} if defaults is None else defaults 45 | for key in output.keys(): 46 | val = tracker_out.get(key, defaults.get(key, None)) 47 | if key in tracker_out or val is not None: 48 | output[key].append(val) 49 | 50 | def _store_outputs(tracker_out: dict, defaults=None): 51 | defaults = {} if defaults is None else defaults 52 | for key in output.keys(): 53 | val = tracker_out.get(key, defaults.get(key, None)) 54 | if key in tracker_out or val is not None: 55 | output[key].append(val) 56 | 57 | videoCapture = cv2.VideoCapture(input_video) 58 | success, image = videoCapture.read() 59 | 60 | out = tracker.initialize(image, init_info) 61 | 62 | height, weight, _ = image.shape 63 | fps = 20 64 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 65 | videowriter = cv2.VideoWriter(output_video, fourcc, fps, (weight, height)) 66 | success, image = videoCapture.read() 67 | while success: 68 | info = {} 69 | out = tracker.track(image, info) 70 | x, y, w, h = out['target_bbox'] 71 | image = cv2.rectangle(image, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0)) 72 | videowriter.write(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) 73 | success, image = videoCapture.read() 74 | videowriter.release() -------------------------------------------------------------------------------- /experiments/uvltrack/baseline_base.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | CONTEXT_GAP: 20 3 | MAX_SAMPLE_INTERVAL: 200 4 | MEAN: 5 | - 0.485 6 | - 0.456 7 | - 0.406 8 | SEARCH: 9 | CENTER_JITTER: 3.5 10 | FACTOR: 4.0 11 | SCALE_JITTER: 0.5 12 | SIZE: 256 13 | NUMBER: 2 14 | STD: 15 | - 0.229 16 | - 0.224 17 | - 0.225 18 | TEMPLATE: 19 | CENTER_JITTER: 0 20 | FACTOR: 2.0 21 | SCALE_JITTER: 0 22 | SIZE: 128 23 | NUMBER: 1 24 | TRAIN: 25 | DATASETS_NAME: 26 | - GOT10K_vottrain 27 | - LASOT 28 | - COCO17 29 | - TRACKINGNET 30 | - TNL2K 31 | - OTB99 32 | - REFCOCOG 33 | DATASETS_RATIO: 34 | - 1 35 | - 1 36 | - 1 37 | - 1 38 | - 1 39 | - 0.2 40 | - 5 41 | SAMPLE_PER_EPOCH: 30000 42 | VAL: 43 | DATASETS_NAME: # [] 44 | - OTB99_test 45 | - TNL2K_test 46 | - LASOT_test 47 | VALTRACK: 48 | DATASETS_NAME: 49 | - LASOT_test 50 | - LASOTEXT 51 | - OTB99_test 52 | - TNL2K_test 53 | DATASETS_RATIO: 54 | - 1 55 | - 1 56 | - 1 57 | - 1 58 | SAMPLE_PER_EPOCH: 6400 59 | VALVL: 60 | DATASETS_NAME: 61 | - LASOT_test 62 | - LASOTEXT 63 | - OTB99_test 64 | - TNL2K_test 65 | DATASETS_RATIO: 66 | - 1 67 | - 1 68 | - 1 69 | - 1 70 | SAMPLE_PER_EPOCH: 6400 71 | MODEL: 72 | BACKBONE: 73 | FUSION_LAYER: [6,7,8,9,10,11] 74 | TYPE: modality_unified_feature_extractor 75 | PRETRAINED_PATH: 'pretrain/mae_pretrain_vit_base.pth' 76 | CONT_LOSS_LAYER: [3,4,5,6,7,8,9,10,11] 77 | TXT_TOKEN_MODE: 'cls' 78 | LANGUAGE: 79 | TYPE: 'pretrain/bert' 80 | PATH: 'pretrain/bert/bert-base-uncased.tar.gz' 81 | VOCAB_PATH: 'pretrain/bert/bert-base-uncased-vocab.txt' 82 | HEAD: 83 | HEAD_DIM: 256 84 | TYPE: modality_adaptive_box_head 85 | OFFSET_SIGMOID: true 86 | CLS_TOKENIZE: false 87 | JOINT_CLS: false 88 | SOFTMAX_ONE: true 89 | HIDDEN_DIM: 768 90 | POSITION_EMBEDDING: sine 91 | TRAIN: 92 | MODE: joint 93 | GROUNDING_RATIO: 0.11 94 | VL_RATIO: 0.44 95 | BACKBONE_MULTIPLIER: 0.1 96 | BATCH_SIZE: 8 97 | EPOCH: 300 98 | GIOU_WEIGHT: 2.0 99 | GRAD_CLIP_NORM: 0.1 100 | L1_WEIGHT: 5.0 101 | AUX_WEIGHT: 0.1 102 | CONT_WEIGHT: 1.0 103 | CIB_WEIGHT: 0.01 104 | CTR_RATIO: 0.75 105 | LR: 0.0004 106 | NUM_WORKER: 10 107 | OPTIMIZER: ADAMW 108 | PRINT_INTERVAL: 50 109 | SCHEDULER: 110 | TYPE: CosineAnnealingLR 111 | DECAY_RATE: 0.1 112 | VAL_EPOCH_INTERVAL: 1 113 | WEIGHT_DECAY: 0.0001 114 | DYNAMIC_CLS: true 115 | REDUCTION: mean 116 | GAUSSIAN_IOU: 0.7 117 | TEST: 118 | MODE: 'NLBBOX' 119 | EPOCH: 300 120 | SEARCH_FACTOR: 4.0 121 | SEARCH_SIZE: 256 122 | TEMPLATE_FACTOR: 2.0 123 | TEMPLATE_SIZE: 128 124 | UPDATE_INTERVAL: 20 -------------------------------------------------------------------------------- /experiments/uvltrack/baseline_base_grounding.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | CONTEXT_GAP: 20 3 | MAX_SAMPLE_INTERVAL: 200 4 | MEAN: 5 | - 0.485 6 | - 0.456 7 | - 0.406 8 | SEARCH: 9 | CENTER_JITTER: 3.5 10 | FACTOR: 4.0 11 | SCALE_JITTER: 0.5 12 | SIZE: 384 13 | NUMBER: 2 14 | STD: 15 | - 0.229 16 | - 0.224 17 | - 0.225 18 | TEMPLATE: 19 | CENTER_JITTER: 0 20 | FACTOR: 2.0 21 | SCALE_JITTER: 0 22 | SIZE: 128 23 | NUMBER: 1 24 | TRAIN: 25 | DATASETS_NAME: 26 | - REFCOCOG 27 | DATASETS_RATIO: 28 | - 1 29 | SAMPLE_PER_EPOCH: 30000 30 | VAL: 31 | DATASETS_NAME: # [] 32 | - REFCOCOG_val 33 | VALTRACK: 34 | DATASETS_NAME: 35 | - LASOT_test 36 | - LASOTEXT 37 | - OTB99_test 38 | - TNL2K_test 39 | DATASETS_RATIO: 40 | - 1 41 | - 1 42 | - 1 43 | - 1 44 | SAMPLE_PER_EPOCH: 6400 45 | VALVL: 46 | DATASETS_NAME: 47 | - LASOT_test 48 | - LASOTEXT 49 | - OTB99_test 50 | - TNL2K_test 51 | DATASETS_RATIO: 52 | - 1 53 | - 1 54 | - 1 55 | - 1 56 | SAMPLE_PER_EPOCH: 6400 57 | MODEL: 58 | BACKBONE: 59 | FUSION_LAYER: [6,7,8,9,10,11] 60 | TYPE: modality_unified_feature_extractor 61 | PRETRAINED_PATH: 'pretrain/mae_pretrain_vit_base.pth' 62 | CONT_LOSS_LAYER: [3,4,5,6,7,8,9,10,11] 63 | TXT_TOKEN_MODE: 'cls' 64 | LANGUAGE: 65 | TYPE: 'pretrain/bert' 66 | PATH: 'pretrain/bert/bert-base-uncased.tar.gz' 67 | VOCAB_PATH: 'pretrain/bert/bert-base-uncased-vocab.txt' 68 | HEAD: 69 | HEAD_DIM: 256 70 | TYPE: modality_adaptive_box_head 71 | OFFSET_SIGMOID: true 72 | CLS_TOKENIZE: false 73 | JOINT_CLS: false 74 | SOFTMAX_ONE: true 75 | HIDDEN_DIM: 768 76 | POSITION_EMBEDDING: sine 77 | TRAIN: 78 | MODE: grounding 79 | GROUNDING_RATIO: 0.11 80 | VL_RATIO: 0.44 81 | BACKBONE_MULTIPLIER: 0.1 82 | BATCH_SIZE: 16 83 | EPOCH: 100 84 | GIOU_WEIGHT: 2.0 85 | GRAD_CLIP_NORM: 0.1 86 | L1_WEIGHT: 5.0 87 | AUX_WEIGHT: 0.1 88 | CONT_WEIGHT: 1.0 89 | CIB_WEIGHT: 0.01 90 | CTR_RATIO: 0.75 91 | LR: 0.0004 92 | NUM_WORKER: 10 93 | OPTIMIZER: ADAMW 94 | PRINT_INTERVAL: 50 95 | SCHEDULER: 96 | TYPE: CosineAnnealingLR 97 | DECAY_RATE: 0.1 98 | VAL_EPOCH_INTERVAL: 1 99 | WEIGHT_DECAY: 0.0001 100 | DYNAMIC_CLS: true 101 | REDUCTION: mean 102 | GAUSSIAN_IOU: 0.7 103 | TEST: 104 | MODE: 'NLBBOX' 105 | EPOCH: 300 106 | SEARCH_FACTOR: 4.0 107 | SEARCH_SIZE: 256 108 | TEMPLATE_FACTOR: 2.0 109 | TEMPLATE_SIZE: 128 110 | UPDATE_INTERVAL: 20 -------------------------------------------------------------------------------- /experiments/uvltrack/baseline_large.yaml: -------------------------------------------------------------------------------- 1 | DATA: 2 | CONTEXT_GAP: 20 3 | MAX_SAMPLE_INTERVAL: 200 4 | MEAN: 5 | - 0.485 6 | - 0.456 7 | - 0.406 8 | SEARCH: 9 | CENTER_JITTER: 4.5 10 | FACTOR: 5.0 11 | SCALE_JITTER: 0.5 12 | SIZE: 256 13 | NUMBER: 2 14 | STD: 15 | - 0.229 16 | - 0.224 17 | - 0.225 18 | TEMPLATE: 19 | CENTER_JITTER: 0 20 | FACTOR: 2.0 21 | SCALE_JITTER: 0 22 | SIZE: 128 23 | NUMBER: 1 24 | TRAIN: 25 | DATASETS_NAME: 26 | - GOT10K_vottrain 27 | - LASOT 28 | - COCO17 29 | - TRACKINGNET 30 | - TNL2K 31 | - OTB99 32 | - REFCOCOG 33 | DATASETS_RATIO: 34 | - 1 35 | - 1 36 | - 1 37 | - 1 38 | - 1 39 | - 0.2 40 | - 5 41 | SAMPLE_PER_EPOCH: 30000 42 | VAL: 43 | DATASETS_NAME: # [] 44 | - OTB99_test 45 | - TNL2K_test 46 | - LASOT_test 47 | VALTRACK: 48 | DATASETS_NAME: 49 | - LASOT_test 50 | - LASOTEXT 51 | - OTB99_test 52 | - TNL2K_test 53 | DATASETS_RATIO: 54 | - 1 55 | - 1 56 | - 1 57 | - 1 58 | SAMPLE_PER_EPOCH: 6400 59 | VALVL: 60 | DATASETS_NAME: 61 | - LASOT_test 62 | - LASOTEXT 63 | - OTB99_test 64 | - TNL2K_test 65 | DATASETS_RATIO: 66 | - 1 67 | - 1 68 | - 1 69 | - 1 70 | SAMPLE_PER_EPOCH: 6400 71 | MODEL: 72 | BACKBONE: 73 | FUSION_LAYER: [12,13,14,15,16,17,18,19,20,21,22,23] 74 | TYPE: modality_unified_feature_extractor 75 | PRETRAINED_PATH: 'pretrain/mae_pretrain_vit_large.pth' 76 | CONT_LOSS_LAYER: [8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23] 77 | TXT_TOKEN_MODE: 'cls' 78 | LANGUAGE: 79 | TYPE: 'pretrain/bert-large-uncased' 80 | PATH: 'pretrain/bert-large-uncased/bert-large-uncased.tar.gz' 81 | VOCAB_PATH: 'pretrain/bert-large-uncased/bert-large-uncased-vocab.txt' 82 | HEAD: 83 | HEAD_DIM: 256 84 | TYPE: modality_adaptive_box_head 85 | OFFSET_SIGMOID: true 86 | CLS_TOKENIZE: false 87 | JOINT_CLS: false 88 | SOFTMAX_ONE: true 89 | HIDDEN_DIM: 1024 90 | POSITION_EMBEDDING: sine 91 | TRAIN: 92 | MODE: joint 93 | GROUNDING_RATIO: 0.11 94 | VL_RATIO: 0.44 95 | BACKBONE_MULTIPLIER: 0.1 96 | BATCH_SIZE: 4 97 | EPOCH: 300 98 | GIOU_WEIGHT: 2.0 99 | GRAD_CLIP_NORM: 0.1 100 | L1_WEIGHT: 5.0 101 | AUX_WEIGHT: 0.1 102 | CONT_WEIGHT: 1.0 103 | CIB_WEIGHT: 0.01 104 | CTR_RATIO: 0.75 105 | LR: 0.0002 106 | NUM_WORKER: 10 107 | OPTIMIZER: ADAMW 108 | PRINT_INTERVAL: 50 109 | SCHEDULER: 110 | TYPE: CosineAnnealingLR 111 | DECAY_RATE: 0.1 112 | VAL_EPOCH_INTERVAL: 1 113 | WEIGHT_DECAY: 0.0001 114 | DYNAMIC_CLS: true 115 | REDUCTION: mean 116 | GAUSSIAN_IOU: 0.7 117 | TEST: 118 | MODE: 'BBOX' 119 | EPOCH: 300 120 | SEARCH_FACTOR: 5.0 121 | SEARCH_SIZE: 256 122 | TEMPLATE_FACTOR: 2.0 123 | TEMPLATE_SIZE: 128 124 | UPDATE_INTERVAL: 20 -------------------------------------------------------------------------------- /fig/arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/fig/arch.png -------------------------------------------------------------------------------- /fig/results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/fig/results.png -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | echo "****************** Installing pytorch ******************" 2 | conda install pytorch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 cudatoolkit=11.0 -c pytorch -y 3 | 4 | echo "" 5 | echo "" 6 | echo "****************** Installing yaml ******************" 7 | pip install PyYAML 8 | 9 | echo "" 10 | echo "" 11 | echo "****************** Installing easydict ******************" 12 | pip install easydict 13 | 14 | echo "" 15 | echo "" 16 | echo "****************** Installing cython ******************" 17 | pip install cython 18 | 19 | echo "" 20 | echo "" 21 | echo "****************** Installing opencv-python ******************" 22 | pip install opencv-python 23 | 24 | echo "" 25 | echo "" 26 | echo "****************** Installing pandas ******************" 27 | pip install pandas 28 | 29 | echo "" 30 | echo "" 31 | echo "****************** Installing tqdm ******************" 32 | conda install -y tqdm 33 | 34 | echo "" 35 | echo "" 36 | echo "****************** Installing coco toolkit ******************" 37 | pip install pycocotools 38 | 39 | echo "" 40 | echo "" 41 | echo "****************** Installing jpeg4py python wrapper ******************" 42 | apt-get install libturbojpeg 43 | pip install jpeg4py 44 | 45 | echo "" 46 | echo "" 47 | echo "****************** Installing tensorboard ******************" 48 | pip install tb-nightly 49 | 50 | echo "" 51 | echo "" 52 | echo "****************** Installing tikzplotlib ******************" 53 | pip install tikzplotlib 54 | 55 | echo "" 56 | echo "" 57 | echo "****************** Installing thop tool for FLOPs and Params computing ******************" 58 | pip install --upgrade git+https://github.com/Lyken17/pytorch-OpCounter.git 59 | 60 | echo "" 61 | echo "" 62 | echo "****************** Installing colorama ******************" 63 | pip install colorama 64 | 65 | echo "" 66 | echo "" 67 | echo "****************** Installing lmdb ******************" 68 | pip install lmdb 69 | 70 | echo "" 71 | echo "" 72 | echo "****************** Installing scipy ******************" 73 | pip install scipy 74 | 75 | echo "" 76 | echo "" 77 | echo "****************** Installing visdom ******************" 78 | pip install visdom 79 | 80 | echo "" 81 | echo "" 82 | echo "****************** Installing vot-toolkit python ******************" 83 | pip install git+https://github.com/votchallenge/vot-toolkit-python 84 | 85 | echo "" 86 | echo "" 87 | echo "****************** Installing onnx and onnxruntime-gpu ******************" 88 | pip install onnx onnxruntime-gpu==1.6.0 89 | 90 | echo "" 91 | echo "" 92 | echo "****************** Installing timm ******************" 93 | pip install timm==0.3.2 94 | 95 | echo "****************** Installing yacs/einops/thop ******************" 96 | pip install yacs 97 | pip install einops 98 | pip install thop 99 | 100 | echo "****************** Install ninja-build for Precise ROI pooling ******************" 101 | apt-get install ninja-build 102 | 103 | echo "****************** Installation complete! ******************" 104 | 105 | python -m pip install -i http://pkg.sensetime.com/repository/pypi-proxy/simple/ --trusted-host pkg.sensetime.com http://10.5.41.14/packages/petrel-oss-sdk.tar.gz --user -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/__init__.py -------------------------------------------------------------------------------- /lib/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/config/__init__.py -------------------------------------------------------------------------------- /lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .uvltrack import uvltrack 2 | -------------------------------------------------------------------------------- /lib/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from lib import registry 2 | from .modality_unified_feature_extractor import modality_unified_feature_extractor 3 | 4 | @registry.BACKBONES.register('modality_unified_feature_extractor') 5 | def build_modality_unified_feature_extractor(cfg): 6 | vit = modality_unified_feature_extractor(cfg) 7 | return vit -------------------------------------------------------------------------------- /lib/models/backbones/block.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from .utils import LayerScale, DropPath, Mlp 3 | 4 | class Block(nn.Module): 5 | def __init__( 6 | self, 7 | dim, 8 | num_heads, 9 | mlp_ratio=4., 10 | qkv_bias=False, 11 | drop=0., 12 | attn_drop=0., 13 | init_values=None, 14 | drop_path=0., 15 | act_layer=nn.GELU, 16 | norm_layer=nn.LayerNorm 17 | ): 18 | super().__init__() 19 | self.norm1 = norm_layer(dim) 20 | self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop) 21 | self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity() 22 | self.drop_path1 = DropPath(drop_path) if drop_path > 0. else nn.Identity() 23 | 24 | self.norm2 = norm_layer(dim) 25 | self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop) 26 | self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity() 27 | self.drop_path2 = DropPath(drop_path) if drop_path > 0. else nn.Identity() 28 | 29 | def forward(self, x, mask=None, flag=None): 30 | x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x), mask, flag=flag))) 31 | x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x)))) 32 | return x 33 | 34 | class Attention(nn.Module): 35 | def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.): 36 | super().__init__() 37 | assert dim % num_heads == 0, 'dim should be divisible by num_heads' 38 | self.num_heads = num_heads 39 | head_dim = dim // num_heads 40 | self.scale = head_dim ** -0.5 41 | 42 | self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias) 43 | self.attn_drop = nn.Dropout(attn_drop) 44 | self.proj = nn.Linear(dim, dim) 45 | self.proj_drop = nn.Dropout(proj_drop) 46 | 47 | def forward(self, x, mask=None, flag=None): 48 | B, N, C = x.shape 49 | qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4) 50 | q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple) 51 | 52 | attn = (q @ k.transpose(-2, -1)) * self.scale 53 | if mask is not None: 54 | attn = attn.masked_fill(mask.unsqueeze(1).unsqueeze(1), -1e10) 55 | attn = attn.softmax(dim=-1) 56 | attn = self.attn_drop(attn) 57 | 58 | x = (attn @ v).transpose(1, 2).reshape(B, N, C) 59 | x = self.proj(x) 60 | x = self.proj_drop(x) 61 | return x 62 | -------------------------------------------------------------------------------- /lib/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from lib import registry 2 | from .modality_adaptive_box_head import ModalityAdaptiveBoxHead 3 | 4 | @registry.HEADS.register('modality_adaptive_box_head') 5 | def build_modality_adaptive_box_head(cfg): 6 | stride = 16 7 | feat_sz = int(cfg.DATA.SEARCH.SIZE / stride) 8 | channel = cfg.MODEL.HEAD.HEAD_DIM 9 | head = ModalityAdaptiveBoxHead(inplanes=cfg.MODEL.HIDDEN_DIM, channel=channel, feat_sz=feat_sz, stride=stride, 10 | cls_tokenize=cfg.MODEL.HEAD.CLS_TOKENIZE, offset_sigmoid=cfg.MODEL.HEAD.OFFSET_SIGMOID, 11 | joint_cls=cfg.MODEL.HEAD.JOINT_CLS, drop_rate=cfg.MODEL.HEAD.DROP, softmax_one=cfg.MODEL.HEAD.SOFTMAX_ONE, 12 | grounding_dilation=cfg.MODEL.HEAD.GROUNDING_DILATION, contrastive_conv=cfg.MODEL.HEAD.CONTRASTIVE_CONV) 13 | return head -------------------------------------------------------------------------------- /lib/models/uvltrack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/models/uvltrack/__init__.py -------------------------------------------------------------------------------- /lib/models/uvltrack/utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1, 6 | freeze_bn=False): 7 | if freeze_bn: 8 | return nn.Sequential( 9 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 10 | padding=padding, dilation=dilation, bias=True), 11 | FrozenBatchNorm2d(out_planes), 12 | nn.ReLU(inplace=True)) 13 | else: 14 | return nn.Sequential( 15 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, 16 | padding=padding, dilation=dilation, bias=True), 17 | nn.BatchNorm2d(out_planes), 18 | nn.ReLU(inplace=True)) 19 | 20 | class FrozenBatchNorm2d(torch.nn.Module): 21 | """ 22 | BatchNorm2d where the batch statistics and the affine parameters are fixed. 23 | 24 | Copy-paste from torchvision.misc.ops with added eps before rqsrt, 25 | without which any other models than torchvision.models.resnet[18,34,50,101] 26 | produce nans. 27 | """ 28 | 29 | def __init__(self, n): 30 | super(FrozenBatchNorm2d, self).__init__() 31 | self.register_buffer("weight", torch.ones(n)) 32 | self.register_buffer("bias", torch.zeros(n)) 33 | self.register_buffer("running_mean", torch.zeros(n)) 34 | self.register_buffer("running_var", torch.ones(n)) 35 | 36 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, 37 | missing_keys, unexpected_keys, error_msgs): 38 | num_batches_tracked_key = prefix + 'num_batches_tracked' 39 | if num_batches_tracked_key in state_dict: 40 | del state_dict[num_batches_tracked_key] 41 | 42 | super(FrozenBatchNorm2d, self)._load_from_state_dict( 43 | state_dict, prefix, local_metadata, strict, 44 | missing_keys, unexpected_keys, error_msgs) 45 | 46 | def forward(self, x): 47 | # move reshapes to the beginning 48 | # to make it fuser-friendly 49 | w = self.weight.reshape(1, -1, 1, 1) 50 | b = self.bias.reshape(1, -1, 1, 1) 51 | rv = self.running_var.reshape(1, -1, 1, 1) 52 | rm = self.running_mean.reshape(1, -1, 1, 1) 53 | eps = 1e-5 54 | scale = w * (rv + eps).rsqrt() # rsqrt(x): 1/sqrt(x), r: reciprocal 55 | bias = b - rm * scale 56 | return x * scale + bias 57 | -------------------------------------------------------------------------------- /lib/models/uvltrack/uvltrack.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from lib import registry 4 | from lib.models.backbones import * 5 | from lib.models.heads import * 6 | 7 | 8 | class UVLTrack(nn.Module): 9 | """ This is the base class for Transformer Tracking, whcih jointly perform feature extraction and interaction. """ 10 | def __init__(self, backbone, box_head): 11 | """ Initializes the model. 12 | """ 13 | super().__init__() 14 | # self.language_backbone = language_backbone 15 | self.backbone = backbone 16 | self.box_head = box_head 17 | 18 | def forward(self, template, search, text, template_mask, context_mask, flag): 19 | # text_feature = self.language_backbone(text) # b, s, c b, s FT 20 | backbone_info = self.backbone(template, search, text, flag) 21 | backbone_info['template_mask'] = template_mask 22 | backbone_info['context_mask'] = context_mask 23 | head_info = self.box_head(backbone_info) 24 | return head_info 25 | 26 | def forward_prompt_init(self, template, search, text, template_mask, context_mask, flag): 27 | backbone_info = self.backbone(template, search, text, flag) 28 | backbone_info['template_mask'] = template_mask 29 | backbone_info['context_mask'] = context_mask 30 | prompt = self.box_head.forward_prompt(backbone_info) 31 | return prompt 32 | 33 | def forward_prompt(self, out_dict, template_mask, context_mask): 34 | backbone_info = out_dict 35 | backbone_info['template_mask'] = template_mask 36 | backbone_info['context_mask'] = context_mask 37 | prompt = self.box_head.forward_prompt(backbone_info) 38 | return prompt 39 | 40 | 41 | def forward_test(self, template, search, text, prompt, flag): 42 | backbone_info = self.backbone(template, search, text, flag) 43 | backbone_info['prompt'] = prompt 44 | head_info = self.box_head(backbone_info) 45 | return head_info 46 | 47 | @registry.MODELS.register('uvltrack') 48 | def build_model(cfg): 49 | # language_backbone = registry.BACKBONES[cfg.MODEL.BACKBONE.LANGUAGE.TYPE](cfg) 50 | backbone = registry.BACKBONES[cfg.MODEL.BACKBONE.TYPE](cfg) 51 | head = registry.HEADS[cfg.MODEL.HEAD.TYPE](cfg) # a simple corner head 52 | model = UVLTrack( 53 | # language_backbone, 54 | backbone, 55 | head 56 | ) 57 | return model 58 | -------------------------------------------------------------------------------- /lib/registry.py: -------------------------------------------------------------------------------- 1 | def _register_generic(module_dict, module_name, module): 2 | assert module_name not in module_dict 3 | module_dict[module_name] = module 4 | 5 | 6 | class Registry(dict): 7 | ''' 8 | A helper class for managing registering modules, it extends a dictionary 9 | and provides a register functions. 10 | 11 | Eg. creeting a registry: 12 | some_registry = Registry({"default": default_module}) 13 | 14 | There're two ways of registering new modules: 15 | 1): normal way is just calling register function: 16 | def foo(): 17 | ... 18 | some_registry.register("foo_module", foo) 19 | 2): used as decorator when declaring the module: 20 | @some_registry.register("foo_module") 21 | @some_registry.register("foo_modeul_nickname") 22 | def foo(): 23 | ... 24 | 25 | Access of module is just like using a dictionary, eg: 26 | f = some_registry["foo_modeul"] 27 | ''' 28 | def __init__(self, *args, **kwargs): 29 | super(Registry, self).__init__(*args, **kwargs) 30 | 31 | def register(self, module_name, module=None): 32 | # used as function call 33 | if module is not None: 34 | _register_generic(self, module_name, module) 35 | return 36 | 37 | # used as decorator 38 | def register_fn(fn): 39 | _register_generic(self, module_name, fn) 40 | return fn 41 | 42 | return register_fn 43 | 44 | 45 | ACTORS = Registry() 46 | MODELS = Registry() 47 | BACKBONES = Registry() 48 | HEADS = Registry() 49 | LOSSES = Registry() -------------------------------------------------------------------------------- /lib/test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/__init__.py -------------------------------------------------------------------------------- /lib/test/analysis/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/analysis/__init__.py -------------------------------------------------------------------------------- /lib/test/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .data import Sequence 2 | from .tracker import Tracker, trackerlist 3 | from .datasets import get_dataset 4 | from .environment import create_default_local_file_ITP_test -------------------------------------------------------------------------------- /lib/test/evaluation/datasets.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import importlib 3 | from lib.test.evaluation.data import SequenceList 4 | 5 | DatasetInfo = namedtuple('DatasetInfo', ['module', 'class_name', 'kwargs']) 6 | 7 | pt = "lib.test.evaluation.%sdataset" # Useful abbreviations to reduce the clutter 8 | 9 | dataset_dict = dict( 10 | otb=DatasetInfo(module=pt % "otb", class_name="OTBDataset", kwargs=dict()), 11 | otb99=DatasetInfo(module=pt % "otb99", class_name="OTB99Dataset", kwargs=dict()), 12 | nfs=DatasetInfo(module=pt % "nfs", class_name="NFSDataset", kwargs=dict()), 13 | uav=DatasetInfo(module=pt % "uav", class_name="UAVDataset", kwargs=dict()), 14 | tc128=DatasetInfo(module=pt % "tc128", class_name="TC128Dataset", kwargs=dict()), 15 | tc128ce=DatasetInfo(module=pt % "tc128ce", class_name="TC128CEDataset", kwargs=dict()), 16 | trackingnet=DatasetInfo(module=pt % "trackingnet", class_name="TrackingNetDataset", kwargs=dict()), 17 | got10k_test=DatasetInfo(module=pt % "got10k", class_name="GOT10KDataset", kwargs=dict(split='test')), 18 | got10k_val=DatasetInfo(module=pt % "got10k", class_name="GOT10KDataset", kwargs=dict(split='val')), 19 | got10k_ltrval=DatasetInfo(module=pt % "got10k", class_name="GOT10KDataset", kwargs=dict(split='ltrval')), 20 | lasot=DatasetInfo(module=pt % "lasot", class_name="LaSOTDataset", kwargs=dict()), 21 | lasotext=DatasetInfo(module=pt % "lasotext", class_name="LaSOTextDataset", kwargs=dict()), 22 | lasot_lmdb=DatasetInfo(module=pt % "lasot_lmdb", class_name="LaSOTlmdbDataset", kwargs=dict()), 23 | tnl2k=DatasetInfo(module=pt % "tnl2k", class_name="TNL2KDataset", kwargs=dict()), 24 | itb=DatasetInfo(module=pt % "itb", class_name="ITBDataset", kwargs=dict()), 25 | avist=DatasetInfo(module=pt % "avist", class_name="AVisTDataset", kwargs=dict()), 26 | ) 27 | 28 | 29 | def load_dataset(name: str): 30 | """ Import and load a single dataset.""" 31 | name = name.lower() 32 | dset_info = dataset_dict.get(name) 33 | if dset_info is None: 34 | raise ValueError('Unknown dataset \'%s\'' % name) 35 | 36 | m = importlib.import_module(dset_info.module) 37 | dataset = getattr(m, dset_info.class_name)(**dset_info.kwargs) # Call the constructor 38 | return dataset.get_sequence_list() 39 | 40 | 41 | def get_dataset(*args): 42 | """ Get a single or set of datasets.""" 43 | dset = SequenceList() 44 | for name in args: 45 | dset.extend(load_dataset(name.split('_')[0])) 46 | return dset -------------------------------------------------------------------------------- /lib/test/evaluation/environment.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | 4 | 5 | class EnvSettings: 6 | def __init__(self): 7 | test_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 8 | 9 | self.results_path = '{}/tracking_results/'.format(test_path) 10 | self.segmentation_path = '{}/segmentation_results/'.format(test_path) 11 | self.network_path = '{}/networks/'.format(test_path) 12 | self.result_plot_path = '{}/result_plots/'.format(test_path) 13 | self.otb_path = '' 14 | self.nfs_path = '' 15 | self.uav_path = '' 16 | self.tpl_path = '' 17 | self.vot_path = '' 18 | self.got10k_path = '' 19 | self.lasot_path = '' 20 | self.trackingnet_path = '' 21 | self.davis_dir = '' 22 | self.youtubevos_dir = '' 23 | 24 | self.got_packed_results_path = '' 25 | self.got_reports_path = '' 26 | self.tn_packed_results_path = '' 27 | 28 | 29 | def create_default_local_file(): 30 | comment = {'results_path': 'Where to store tracking results', 31 | 'network_path': 'Where tracking networks are stored.'} 32 | 33 | path = os.path.join(os.path.dirname(__file__), 'local.py') 34 | with open(path, 'w') as f: 35 | settings = EnvSettings() 36 | 37 | f.write('from test.evaluation.environment import EnvSettings\n\n') 38 | f.write('def local_env_settings():\n') 39 | f.write(' settings = EnvSettings()\n\n') 40 | f.write(' # Set your local paths here.\n\n') 41 | 42 | for attr in dir(settings): 43 | comment_str = None 44 | if attr in comment: 45 | comment_str = comment[attr] 46 | attr_val = getattr(settings, attr) 47 | if not attr.startswith('__') and not callable(attr_val): 48 | if comment_str is None: 49 | f.write(' settings.{} = \'{}\'\n'.format(attr, attr_val)) 50 | else: 51 | f.write(' settings.{} = \'{}\' # {}\n'.format(attr, attr_val, comment_str)) 52 | f.write('\n return settings\n\n') 53 | 54 | 55 | class EnvSettings_ITP: 56 | def __init__(self, workspace_dir, data_dir, save_dir): 57 | self.prj_dir = workspace_dir 58 | self.save_dir = save_dir 59 | self.results_path = os.path.join(save_dir, 'test/tracking_results') 60 | self.segmentation_path = os.path.join(save_dir, 'test/segmentation_results') 61 | self.network_path = os.path.join(save_dir, 'test/networks') 62 | self.result_plot_path = os.path.join(save_dir, 'test/result_plots') 63 | self.otb_path = os.path.join(data_dir, 'OTB2015') 64 | self.nfs_path = os.path.join(data_dir, 'nfs') 65 | self.uav_path = os.path.join(data_dir, 'UAV123') 66 | self.tc128_path = os.path.join(data_dir, 'TC128') 67 | self.tpl_path = '' 68 | self.vot_path = os.path.join(data_dir, 'VOT2019') 69 | self.got10k_path = os.path.join(data_dir, 'got10k') 70 | self.got10k_lmdb_path = os.path.join(data_dir, 'got10k_lmdb') 71 | self.lasot_path = os.path.join(data_dir, 'lasot') 72 | self.lasot_lmdb_path = os.path.join(data_dir, 'lasot_lmdb') 73 | self.trackingnet_path = os.path.join(data_dir, 'trackingNet') 74 | self.davis_dir = '' 75 | self.youtubevos_dir = '' 76 | 77 | self.got_packed_results_path = '' 78 | self.got_reports_path = '' 79 | self.tn_packed_results_path = '' 80 | 81 | 82 | def create_default_local_file_ITP_test(workspace_dir, data_dir, save_dir): 83 | comment = {'results_path': 'Where to store tracking results', 84 | 'network_path': 'Where tracking networks are stored.'} 85 | 86 | path = os.path.join(os.path.dirname(__file__), 'local.py') 87 | with open(path, 'w') as f: 88 | settings = EnvSettings_ITP(workspace_dir, data_dir, save_dir) 89 | 90 | f.write('from lib.test.evaluation.environment import EnvSettings\n\n') 91 | f.write('def local_env_settings():\n') 92 | f.write(' settings = EnvSettings()\n\n') 93 | f.write(' # Set your local paths here.\n\n') 94 | 95 | for attr in dir(settings): 96 | comment_str = None 97 | if attr in comment: 98 | comment_str = comment[attr] 99 | attr_val = getattr(settings, attr) 100 | if not attr.startswith('__') and not callable(attr_val): 101 | if comment_str is None: 102 | f.write(' settings.{} = \'{}\'\n'.format(attr, attr_val)) 103 | else: 104 | f.write(' settings.{} = \'{}\' # {}\n'.format(attr, attr_val, comment_str)) 105 | f.write('\n return settings\n\n') 106 | 107 | 108 | def env_settings(): 109 | env_module_name = 'lib.test.evaluation.local' 110 | try: 111 | env_module = importlib.import_module(env_module_name) 112 | return env_module.local_env_settings() 113 | except: 114 | env_file = os.path.join(os.path.dirname(__file__), 'local.py') 115 | 116 | # Create a default file 117 | create_default_local_file() 118 | raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. ' 119 | 'Then try to run again.'.format(env_file)) -------------------------------------------------------------------------------- /lib/test/evaluation/got10kdataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList 3 | from lib.test.utils.load_text import load_text 4 | import os 5 | 6 | 7 | class GOT10KDataset(BaseDataset): 8 | """ GOT-10k dataset. 9 | 10 | Publication: 11 | GOT-10k: A Large High-Diversity Benchmark for Generic Object Tracking in the Wild 12 | Lianghua Huang, Xin Zhao, and Kaiqi Huang 13 | arXiv:1810.11981, 2018 14 | https://arxiv.org/pdf/1810.11981.pdf 15 | 16 | Download dataset from http://got-10k.aitestunion.com/downloads 17 | """ 18 | def __init__(self, split): 19 | super().__init__() 20 | # Split can be test, val, or ltrval (a validation split consisting of videos from the official train set) 21 | if split == 'test' or split == 'val': 22 | self.base_path = os.path.join(self.env_settings.got10k_path, split) 23 | else: 24 | self.base_path = os.path.join(self.env_settings.got10k_path, 'train') 25 | 26 | self.sequence_list = self._get_sequence_list(split) 27 | self.split = split 28 | 29 | def get_sequence_list(self): 30 | return SequenceList([self._construct_sequence(s) for s in self.sequence_list]) 31 | 32 | def _construct_sequence(self, sequence_name): 33 | anno_path = '{}/{}/groundtruth.txt'.format(self.base_path, sequence_name) 34 | 35 | ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64) 36 | 37 | frames_path = '{}/{}'.format(self.base_path, sequence_name) 38 | frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")] 39 | frame_list.sort(key=lambda f: int(f[:-4])) 40 | frames_list = [os.path.join(frames_path, frame) for frame in frame_list] 41 | 42 | return Sequence(sequence_name, frames_list, 'got10k', ground_truth_rect.reshape(-1, 4)) 43 | 44 | def __len__(self): 45 | return len(self.sequence_list) 46 | 47 | def _get_sequence_list(self, split): 48 | with open('{}/list.txt'.format(self.base_path)) as f: 49 | sequence_list = f.read().splitlines() 50 | 51 | if split == 'ltrval': 52 | with open('{}/got10k_val_split.txt'.format(self.env_settings.dataspec_path)) as f: 53 | seq_ids = f.read().splitlines() 54 | 55 | sequence_list = [sequence_list[int(x)] for x in seq_ids] 56 | return sequence_list 57 | -------------------------------------------------------------------------------- /lib/test/evaluation/itbdataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .data import Sequence, BaseDataset, SequenceList 3 | from .utils import load_text 4 | import os 5 | 6 | class ITBDataset(BaseDataset): 7 | """ NUS-PRO dataset 8 | """ 9 | def __init__(self): 10 | super().__init__() 11 | self.base_path = self.env_settings.itb_path 12 | self.sequence_info_list = self._get_sequence_info_list(self.base_path ) 13 | 14 | def get_sequence_list(self): 15 | return SequenceList([self._construct_sequence(s) for s in self.sequence_info_list]) 16 | 17 | def _construct_sequence(self, sequence_info): 18 | sequence_path = sequence_info['path'] 19 | nz = sequence_info['nz'] 20 | ext = sequence_info['ext'] 21 | start_frame = sequence_info['startFrame'] 22 | end_frame = sequence_info['endFrame'] 23 | 24 | init_omit = 0 25 | if 'initOmit' in sequence_info: 26 | init_omit = sequence_info['initOmit'] 27 | 28 | frames = ['{base_path}/{sequence_path}/{frame:0{nz}}.{ext}'.format(base_path=self.base_path, 29 | sequence_path=sequence_path, frame=frame_num, nz=nz, ext=ext) for frame_num in range(start_frame+init_omit, end_frame+1)] 30 | 31 | anno_path = '{}/{}'.format(self.base_path, sequence_info['anno_path']) 32 | 33 | # NOTE: NUS has some weird annos which panda cannot handle 34 | ground_truth_rect = load_text(str(anno_path), delimiter=(',', None), dtype=np.float64, backend='numpy') 35 | return Sequence(sequence_info['name'], frames, 'itb', ground_truth_rect[init_omit:,:], 36 | object_class=sequence_info['object_class'], language=sequence_info['object_class']) 37 | 38 | def __len__(self): 39 | return len(self.sequence_info_list) 40 | 41 | def get_fileNames(self, rootdir): 42 | fs = [] 43 | fs_all = [] 44 | for root, dirs, files in os.walk(rootdir, topdown=True): 45 | files.sort() 46 | files.sort(key = len) 47 | if files is not None: 48 | for name in files: 49 | _, ending = os.path.splitext(name) 50 | if ending == ".jpg": 51 | _, root_ = os.path.split(root) 52 | fs.append(os.path.join(root_, name)) 53 | fs_all.append(os.path.join(root, name)) 54 | 55 | return fs_all, fs 56 | 57 | def _get_sequence_info_list(self, base_path): 58 | sequence_info_list = [] 59 | for scene in os.listdir(base_path): 60 | if '.'in scene: 61 | continue 62 | videos = os.listdir(os.path.join(base_path,scene)) 63 | for video in videos: 64 | _,fs=self.get_fileNames(os.path.join(base_path,scene,video)) 65 | video_tmp = {"name": video, "path": scene+'/'+video, "startFrame": 1, "endFrame": len(fs), "nz": len(fs[0].split('/')[-1].split('.')[0]), "ext": "jpg", 66 | "anno_path": scene+'/'+video+"/groundtruth.txt", 67 | "object_class": "unknown"} 68 | sequence_info_list.append(video_tmp) 69 | 70 | return sequence_info_list #sequence_info_list_50 # 71 | -------------------------------------------------------------------------------- /lib/test/evaluation/lasotextdataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList 3 | from lib.test.utils.load_text import load_text 4 | import glob 5 | import os 6 | 7 | class LaSOTextDataset(BaseDataset): 8 | """ 9 | LaSOT test set consisting of 280 videos (see Protocol-II in the LaSOT paper) 10 | 11 | Publication: 12 | LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking 13 | Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling 14 | CVPR, 2019 15 | https://arxiv.org/pdf/1809.07845.pdf 16 | 17 | Download the dataset from https://cis.temple.edu/lasot/download.html 18 | """ 19 | def __init__(self): 20 | super().__init__() 21 | self.base_path = self.env_settings.lasot_ext_path 22 | self.sequence_list = self._get_sequence_list() 23 | self.clean_list = self.clean_seq_list() 24 | 25 | def clean_seq_list(self): 26 | clean_lst = [] 27 | for i in range(len(self.sequence_list)): 28 | cls, _ = self.sequence_list[i].split('-') 29 | clean_lst.append(cls) 30 | return clean_lst 31 | 32 | def get_sequence_list(self): 33 | return SequenceList([self._construct_sequence(s) for s in self.sequence_list]) 34 | 35 | def _construct_sequence(self, sequence_name): 36 | class_name = sequence_name.split('-')[0] 37 | anno_path = '{}/{}/{}/groundtruth.txt'.format(self.base_path, class_name, sequence_name) 38 | 39 | ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64) 40 | 41 | occlusion_label_path = '{}/{}/{}/full_occlusion.txt'.format(self.base_path, class_name, sequence_name) 42 | 43 | # NOTE: pandas backed seems super super slow for loading occlusion/oov masks 44 | full_occlusion = load_text(str(occlusion_label_path), delimiter=',', dtype=np.float64, backend='numpy') 45 | 46 | out_of_view_label_path = '{}/{}/{}/out_of_view.txt'.format(self.base_path, class_name, sequence_name) 47 | out_of_view = load_text(str(out_of_view_label_path), delimiter=',', dtype=np.float64, backend='numpy') 48 | 49 | target_visible = np.logical_and(full_occlusion == 0, out_of_view == 0) 50 | 51 | frames_path = '{}/{}/{}/img'.format(self.base_path, class_name, sequence_name) 52 | 53 | frames_list = ['{}/{:08d}.jpg'.format(frames_path, frame_number) for frame_number in range(1, ground_truth_rect.shape[0] + 1)] 54 | 55 | language_file = os.path.join(self.base_path, class_name, sequence_name, "nlp.txt") 56 | with open(language_file, 'r') as f: 57 | language = f.readlines()[0].rstrip() 58 | 59 | target_class = class_name 60 | return Sequence(sequence_name, frames_list, 'lasotext', ground_truth_rect.reshape(-1, 4), 61 | object_class=target_class, target_visible=target_visible, language=language) 62 | 63 | def __len__(self): 64 | return len(self.sequence_list) 65 | 66 | def _get_sequence_list(self): 67 | sequence_list = [path.split('/')[-2] for path in sorted(glob.glob(os.path.join(self.base_path, '*/*/')))] 68 | return sequence_list 69 | -------------------------------------------------------------------------------- /lib/test/evaluation/local.py: -------------------------------------------------------------------------------- 1 | from lib.test.evaluation.environment import EnvSettings 2 | import os 3 | prj_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")) 4 | 5 | def local_env_settings(): 6 | settings = EnvSettings() 7 | settings.prj_dir = prj_dir 8 | settings.save_dir = prj_dir 9 | settings.result_plot_path = os.path.join(prj_dir, 'test/result_plots') 10 | settings.results_path = os.path.join(prj_dir, 'test/tracking_results') 11 | settings.lasot_path = os.path.join(prj_dir, 'data/lasot') 12 | settings.nfs_path = os.path.join(prj_dir, 'data/nfs') 13 | settings.otb_path = os.path.join(prj_dir, 'data/otb99') 14 | settings.trackingnet_path = os.path.join(prj_dir, 'data/trackingnet') 15 | settings.uav_path = os.path.join(prj_dir, 'data/uav') 16 | settings.tnl2k_path = os.path.join(prj_dir, 'data/tnl2k/test') 17 | settings.otb99_path = os.path.join(prj_dir, 'data/otb99') 18 | settings.lasot_ext_path = os.path.join(prj_dir, 'data/lasotext') 19 | 20 | return settings -------------------------------------------------------------------------------- /lib/test/evaluation/otb99dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList 3 | from lib.test.utils.load_text import load_text 4 | import glob 5 | import os 6 | 7 | class OTB99Dataset(BaseDataset): 8 | """ 9 | LaSOT test set consisting of 280 videos (see Protocol-II in the LaSOT paper) 10 | 11 | Publication: 12 | LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking 13 | Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling 14 | CVPR, 2019 15 | https://arxiv.org/pdf/1809.07845.pdf 16 | 17 | Download the dataset from https://cis.temple.edu/lasot/download.html 18 | """ 19 | def __init__(self): 20 | super().__init__() 21 | self.base_path = self.env_settings.otb99_path 22 | self.sequence_list = self._get_sequence_list() 23 | 24 | def get_sequence_list(self): 25 | return SequenceList([self._construct_sequence(s) for s in self.sequence_list]) 26 | 27 | def _construct_sequence(self, sequence_name): 28 | anno_path = os.path.join(self.base_path, "OTB_videos", sequence_name, 'groundtruth_rect.txt') 29 | try: 30 | ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64) 31 | except: 32 | ground_truth_rect = load_text(str(anno_path), delimiter='\t', dtype=np.float64) 33 | 34 | frames_list = sorted(glob.glob(os.path.join(self.base_path, 'OTB_videos', sequence_name, 'img', '*'))) 35 | language_file = os.path.join(self.base_path, 'OTB_query_test', f"{sequence_name}.txt") 36 | 37 | with open(language_file, 'r') as f: 38 | language = f.readlines()[0].rstrip() 39 | 40 | return Sequence(sequence_name, frames_list, 'otb99', ground_truth_rect.reshape(-1, 4), 41 | object_class=None, target_visible=None, language=language) 42 | 43 | def __len__(self): 44 | return len(self.sequence_list) 45 | 46 | def _get_sequence_list(self): 47 | sequence_list = sorted([p.split('/')[-1].split('.')[0] for p in glob.glob(os.path.join(self.base_path, 'OTB_query_test/*'))]) 48 | return sequence_list 49 | -------------------------------------------------------------------------------- /lib/test/evaluation/tc128cedataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList 3 | import os 4 | import glob 5 | import six 6 | 7 | 8 | class TC128CEDataset(BaseDataset): 9 | """ 10 | TC-128 Dataset (78 newly added sequences) 11 | modified from the implementation in got10k-toolkit (https://github.com/got-10k/toolkit) 12 | """ 13 | def __init__(self): 14 | super().__init__() 15 | self.base_path = self.env_settings.tc128_path 16 | self.anno_files = sorted(glob.glob( 17 | os.path.join(self.base_path, '*/*_gt.txt'))) 18 | """filter the newly added sequences (_ce)""" 19 | self.anno_files = [s for s in self.anno_files if "_ce" in s] 20 | self.seq_dirs = [os.path.dirname(f) for f in self.anno_files] 21 | self.seq_names = [os.path.basename(d) for d in self.seq_dirs] 22 | # valid frame range for each sequence 23 | self.range_files = [glob.glob(os.path.join(d, '*_frames.txt'))[0] for d in self.seq_dirs] 24 | 25 | def get_sequence_list(self): 26 | return SequenceList([self._construct_sequence(s) for s in self.seq_names]) 27 | 28 | def _construct_sequence(self, sequence_name): 29 | if isinstance(sequence_name, six.string_types): 30 | if not sequence_name in self.seq_names: 31 | raise Exception('Sequence {} not found.'.format(sequence_name)) 32 | index = self.seq_names.index(sequence_name) 33 | # load valid frame range 34 | frames = np.loadtxt(self.range_files[index], dtype=int, delimiter=',') 35 | img_files = [os.path.join(self.seq_dirs[index], 'img/%04d.jpg' % f) for f in range(frames[0], frames[1] + 1)] 36 | 37 | # load annotations 38 | anno = np.loadtxt(self.anno_files[index], delimiter=',') 39 | assert len(img_files) == len(anno) 40 | assert anno.shape[1] == 4 41 | 42 | # return img_files, anno 43 | return Sequence(sequence_name, img_files, 'tc128', anno.reshape(-1, 4)) 44 | 45 | def __len__(self): 46 | return len(self.seq_names) 47 | -------------------------------------------------------------------------------- /lib/test/evaluation/tc128dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList 3 | import os 4 | import glob 5 | import six 6 | 7 | 8 | class TC128Dataset(BaseDataset): 9 | """ 10 | TC-128 Dataset 11 | modified from the implementation in got10k-toolkit (https://github.com/got-10k/toolkit) 12 | """ 13 | def __init__(self): 14 | super().__init__() 15 | self.base_path = self.env_settings.tc128_path 16 | self.anno_files = sorted(glob.glob( 17 | os.path.join(self.base_path, '*/*_gt.txt'))) 18 | self.seq_dirs = [os.path.dirname(f) for f in self.anno_files] 19 | self.seq_names = [os.path.basename(d) for d in self.seq_dirs] 20 | # valid frame range for each sequence 21 | self.range_files = [glob.glob(os.path.join(d, '*_frames.txt'))[0] for d in self.seq_dirs] 22 | 23 | def get_sequence_list(self): 24 | return SequenceList([self._construct_sequence(s) for s in self.seq_names]) 25 | 26 | def _construct_sequence(self, sequence_name): 27 | if isinstance(sequence_name, six.string_types): 28 | if not sequence_name in self.seq_names: 29 | raise Exception('Sequence {} not found.'.format(sequence_name)) 30 | index = self.seq_names.index(sequence_name) 31 | # load valid frame range 32 | frames = np.loadtxt(self.range_files[index], dtype=int, delimiter=',') 33 | img_files = [os.path.join(self.seq_dirs[index], 'img/%04d.jpg' % f) for f in range(frames[0], frames[1] + 1)] 34 | 35 | # load annotations 36 | anno = np.loadtxt(self.anno_files[index], delimiter=',') 37 | assert len(img_files) == len(anno) 38 | assert anno.shape[1] == 4 39 | 40 | # return img_files, anno 41 | return Sequence(sequence_name, img_files, 'tc128', anno.reshape(-1, 4)) 42 | 43 | def __len__(self): 44 | return len(self.seq_names) 45 | -------------------------------------------------------------------------------- /lib/test/evaluation/tnl2kdataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList 3 | from lib.test.utils.load_text import load_text 4 | import glob 5 | import os 6 | 7 | class TNL2KDataset(BaseDataset): 8 | """ 9 | LaSOT test set consisting of 280 videos (see Protocol-II in the LaSOT paper) 10 | 11 | Publication: 12 | LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking 13 | Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling 14 | CVPR, 2019 15 | https://arxiv.org/pdf/1809.07845.pdf 16 | 17 | Download the dataset from https://cis.temple.edu/lasot/download.html 18 | """ 19 | def __init__(self): 20 | super().__init__() 21 | self.base_path = self.env_settings.tnl2k_path 22 | self.sequence_list = self._get_sequence_list() 23 | 24 | def get_sequence_list(self): 25 | return SequenceList([self._construct_sequence(s) for s in self.sequence_list]) 26 | 27 | def _construct_sequence(self, sequence_name): 28 | anno_path = os.path.join(self.base_path, sequence_name, 'groundtruth.txt') 29 | ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64) 30 | 31 | frames_list = sorted(glob.glob(os.path.join(self.base_path, sequence_name, 'imgs', '*'))) 32 | 33 | language_file = os.path.join(self.base_path, sequence_name, "language.txt") 34 | 35 | with open(language_file, 'r') as f: 36 | language = f.readlines()[0].rstrip() 37 | 38 | return Sequence(sequence_name, frames_list, 'tnl2k', ground_truth_rect.reshape(-1, 4), 39 | object_class=None, target_visible=None, language=language) 40 | 41 | def __len__(self): 42 | return len(self.sequence_list) 43 | 44 | def _get_sequence_list(self): 45 | sequence_list = sorted([p.split('/')[-2] for p in glob.glob(os.path.join(self.base_path, '*/'))]) 46 | return sequence_list 47 | -------------------------------------------------------------------------------- /lib/test/evaluation/trackingnetdataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList 3 | import os 4 | from lib.test.utils.load_text import load_text 5 | 6 | 7 | class TrackingNetDataset(BaseDataset): 8 | """ TrackingNet test set. 9 | 10 | Publication: 11 | TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild. 12 | Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem 13 | ECCV, 2018 14 | https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf 15 | 16 | Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit. 17 | """ 18 | def __init__(self): 19 | super().__init__() 20 | self.base_path = self.env_settings.trackingnet_path 21 | 22 | sets = 'TEST' 23 | if not isinstance(sets, (list, tuple)): 24 | if sets == 'TEST': 25 | sets = ['TEST'] 26 | elif sets == 'TRAIN': 27 | sets = ['TRAIN_{}'.format(i) for i in range(5)] 28 | 29 | self.sequence_list = self._list_sequences(self.base_path, sets) 30 | 31 | def get_sequence_list(self): 32 | return SequenceList([self._construct_sequence(set, seq_name) for set, seq_name in self.sequence_list]) 33 | 34 | def _construct_sequence(self, set, sequence_name): 35 | anno_path = '{}/{}/anno/{}.txt'.format(self.base_path, set, sequence_name) 36 | 37 | ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64, backend='numpy') 38 | 39 | frames_path = '{}/{}/frames/{}'.format(self.base_path, set, sequence_name) 40 | frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")] 41 | frame_list.sort(key=lambda f: int(f[:-4])) 42 | frames_list = [os.path.join(frames_path, frame) for frame in frame_list] 43 | 44 | return Sequence(sequence_name, frames_list, 'trackingnet', ground_truth_rect.reshape(-1, 4)) 45 | 46 | def __len__(self): 47 | return len(self.sequence_list) 48 | 49 | def _list_sequences(self, root, set_ids): 50 | sequence_list = [] 51 | 52 | for s in set_ids: 53 | anno_dir = os.path.join(root, s, "anno") 54 | sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')] 55 | 56 | sequence_list += sequences_cur_set 57 | 58 | return sequence_list 59 | -------------------------------------------------------------------------------- /lib/test/evaluation/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | def load_text_numpy(path, delimiter, dtype): 5 | if isinstance(delimiter, (tuple, list)): 6 | for d in delimiter: 7 | try: 8 | ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype) 9 | return ground_truth_rect 10 | except: 11 | pass 12 | 13 | raise Exception('Could not read file {}'.format(path)) 14 | else: 15 | ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype) 16 | return ground_truth_rect 17 | 18 | 19 | def load_text_pandas(path, delimiter, dtype): 20 | if isinstance(delimiter, (tuple, list)): 21 | for d in delimiter: 22 | try: 23 | ground_truth_rect = pd.read_csv(path, delimiter=d, header=None, dtype=dtype, na_filter=False, 24 | low_memory=False).values 25 | return ground_truth_rect 26 | except Exception as e: 27 | pass 28 | 29 | raise Exception('Could not read file {}'.format(path)) 30 | else: 31 | ground_truth_rect = pd.read_csv(path, delimiter=delimiter, header=None, dtype=dtype, na_filter=False, 32 | low_memory=False).values 33 | return ground_truth_rect 34 | 35 | def load_text(path, delimiter=' ', dtype=np.float32, backend='numpy'): 36 | if backend == 'numpy': 37 | return load_text_numpy(path, delimiter, dtype) 38 | elif backend == 'pandas': 39 | return load_text_pandas(path, delimiter, dtype) -------------------------------------------------------------------------------- /lib/test/parameter/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/parameter/__init__.py -------------------------------------------------------------------------------- /lib/test/parameter/uvltrack.py: -------------------------------------------------------------------------------- 1 | from lib.test.utils import TrackerParams 2 | import os 3 | from easydict import EasyDict as edict 4 | from lib.test.evaluation.environment import env_settings 5 | from lib.config.uvltrack.config import cfg, update_config_from_file 6 | 7 | 8 | def _update_config(base_cfg, exp_cfg): 9 | if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict): 10 | for k, v in exp_cfg.items(): 11 | if k in base_cfg: 12 | if not isinstance(v, dict): 13 | base_cfg[k] = v 14 | else: 15 | _update_config(base_cfg[k], v) 16 | else: 17 | raise ValueError("{} not exist in config.py".format(k)) 18 | else: 19 | return 20 | 21 | def parameters(yaml_name: str, extra_cfg=None, epoch=None): 22 | params = TrackerParams() 23 | prj_dir = env_settings().prj_dir 24 | save_dir = env_settings().save_dir 25 | # update default config from yaml file 26 | params.yaml_name = yaml_name 27 | yaml_file = os.path.join(prj_dir, 'experiments/uvltrack/%s.yaml' % yaml_name) 28 | update_config_from_file(yaml_file) 29 | if epoch is not None: 30 | cfg.TEST.EPOCH = epoch 31 | params.cfg = cfg 32 | if extra_cfg is not None: 33 | _update_config(params.cfg, extra_cfg) 34 | # template and search region 35 | params.template_factor = cfg.TEST.TEMPLATE_FACTOR 36 | params.template_size = cfg.TEST.TEMPLATE_SIZE 37 | params.search_factor = cfg.TEST.SEARCH_FACTOR 38 | params.search_size = cfg.TEST.SEARCH_SIZE 39 | params.grounding_size = cfg.TEST.SEARCH_SIZE 40 | 41 | # Network checkpoint path 42 | params.checkpoint = os.path.join(save_dir, "checkpoints/train/uvltrack/%s/UVLTrack_ep%04d.pth.tar"%(yaml_name, cfg.TEST.EPOCH)) # 470 43 | 44 | # whether to save boxes from all queries 45 | params.save_all_boxes = False 46 | 47 | return params 48 | -------------------------------------------------------------------------------- /lib/test/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/tracker/__init__.py -------------------------------------------------------------------------------- /lib/test/tracker/basetracker.py: -------------------------------------------------------------------------------- 1 | from _collections import OrderedDict 2 | 3 | 4 | class BaseTracker: 5 | """Base class for all trackers.""" 6 | 7 | def __init__(self, params): 8 | self.params = params 9 | self.visdom = None 10 | 11 | def predicts_segmentation_mask(self): 12 | return False 13 | 14 | def initialize(self, image, info: dict) -> dict: 15 | """Overload this function in your tracker. This should initialize the model.""" 16 | raise NotImplementedError 17 | 18 | def track(self, image, info: dict = None) -> dict: 19 | """Overload this function in your tracker. This should track in the frame and update the model.""" 20 | raise NotImplementedError 21 | 22 | def visdom_draw_tracking(self, image, box, segmentation=None): 23 | if isinstance(box, OrderedDict): 24 | box = [v for k, v in box.items()] 25 | else: 26 | box = (box,) 27 | if segmentation is None: 28 | self.visdom.register((image, *box), 'Tracking', 1, 'Tracking') 29 | else: 30 | self.visdom.register((image, *box, segmentation), 'Tracking', 1, 'Tracking') -------------------------------------------------------------------------------- /lib/test/tracker/tracker_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from lib.utils.misc import NestedTensor 4 | import matplotlib.pyplot as plt 5 | import os 6 | 7 | class Preprocessor(object): 8 | def __init__(self): 9 | self.mean = torch.tensor([0.485, 0.456, 0.406]).view((1, 3, 1, 1)).cuda() 10 | self.std = torch.tensor([0.229, 0.224, 0.225]).view((1, 3, 1, 1)).cuda() 11 | 12 | def process(self, img_arr: np.ndarray, amask_arr: np.ndarray): 13 | # Deal with the image patch 14 | img_tensor = torch.tensor(img_arr).cuda().float().permute((2,0,1)).unsqueeze(dim=0) 15 | img_tensor_norm = ((img_tensor / 255.0) - self.mean) / self.std # (1,3,H,W) 16 | # Deal with the attention mask 17 | amask_tensor = torch.from_numpy(amask_arr).to(torch.bool).cuda().unsqueeze(dim=0) # (1,H,W) 18 | return NestedTensor(img_tensor_norm, amask_tensor) 19 | 20 | class Preprocessor_wo_mask(object): 21 | def __init__(self): 22 | self.mean = torch.tensor([0.485, 0.456, 0.406]).view((1, 3, 1, 1)).cuda() 23 | self.std = torch.tensor([0.229, 0.224, 0.225]).view((1, 3, 1, 1)).cuda() 24 | 25 | def process(self, img_arr: np.ndarray): 26 | # Deal with the image patch 27 | img_tensor = torch.tensor(img_arr).cuda().float().permute((2,0,1)).unsqueeze(dim=0) 28 | img_tensor_norm = ((img_tensor / 255.0) - self.mean) / self.std # (1,3,H,W) 29 | return img_tensor_norm 30 | 31 | 32 | class PreprocessorX(object): 33 | def __init__(self): 34 | self.mean = torch.tensor([0.485, 0.456, 0.406]).view((1, 3, 1, 1)).cuda() 35 | self.std = torch.tensor([0.229, 0.224, 0.225]).view((1, 3, 1, 1)).cuda() 36 | 37 | def process(self, img_arr: np.ndarray, amask_arr: np.ndarray): 38 | # Deal with the image patch 39 | img_tensor = torch.tensor(img_arr).cuda().float().permute((2,0,1)).unsqueeze(dim=0) 40 | img_tensor_norm = ((img_tensor / 255.0) - self.mean) / self.std # (1,3,H,W) 41 | # Deal with the attention mask 42 | amask_tensor = torch.from_numpy(amask_arr).to(torch.bool).cuda().unsqueeze(dim=0) # (1,H,W) 43 | return img_tensor_norm, amask_tensor 44 | 45 | 46 | class PreprocessorX_onnx(object): 47 | def __init__(self): 48 | self.mean = np.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1)) 49 | self.std = np.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) 50 | 51 | def process(self, img_arr: np.ndarray, amask_arr: np.ndarray): 52 | """img_arr: (H,W,3), amask_arr: (H,W)""" 53 | # Deal with the image patch 54 | img_arr_4d = img_arr[np.newaxis, :, :, :].transpose(0, 3, 1, 2) 55 | img_arr_4d = (img_arr_4d / 255.0 - self.mean) / self.std # (1, 3, H, W) 56 | # Deal with the attention mask 57 | amask_arr_3d = amask_arr[np.newaxis, :, :] # (1,H,W) 58 | return img_arr_4d.astype(np.float32), amask_arr_3d.astype(np.bool) 59 | 60 | def vis_attn_maps(attn_weights, q_w, k_w, skip_len, x1, x2, x1_title, x2_title, save_path='.', idxs=None): 61 | if not os.path.exists(save_path): 62 | os.makedirs(save_path) 63 | shape1 = [q_w, q_w] 64 | shape2 = [k_w, k_w] 65 | 66 | attn_weights_mean = [] 67 | for attn in attn_weights: 68 | attn_weights_mean.append(attn[..., skip_len:(skip_len+k_w**2)].mean(dim=1).squeeze().reshape(shape1+shape2).cpu()) 69 | 70 | # downsampling factor 71 | fact = 32 72 | 73 | # let's select 4 reference points for visualization 74 | # idxs = [(32, 32), (64, 64), (32, 96), (96, 96), ] 75 | if idxs is None: 76 | idxs = [(64, 64)] 77 | 78 | block_num=0 79 | idx_o = idxs[0] 80 | for attn_weight in attn_weights_mean: 81 | fig = plt.figure(constrained_layout=False, figsize=(5, 5), dpi=160) 82 | fig.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0) 83 | ax = fig.add_subplot(111) 84 | idx = (idx_o[0] // fact, idx_o[1] // fact) 85 | ax.imshow(attn_weight[..., idx[0], idx[1]], cmap='cividis', interpolation='nearest') 86 | ax.axis('off') 87 | # ax.set_title(f'Stage2-Block{block_num}') 88 | plt.savefig(save_path + '/Stage2-Block{}_attn_weight.png'.format(block_num)) 89 | plt.close() 90 | block_num += 1 91 | 92 | fig = plt.figure(constrained_layout=False, figsize=(5, 5), dpi=160) 93 | fig.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0) 94 | x2_ax = fig.add_subplot(111) 95 | x2_ax.imshow(x2) 96 | x2_ax.axis('off') 97 | plt.savefig(save_path + '/{}.png'.format(x2_title)) 98 | plt.close() 99 | 100 | # the reference points as red circles 101 | fig = plt.figure(constrained_layout=False, figsize=(5, 5), dpi=160) 102 | fig.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0) 103 | x1_ax = fig.add_subplot(111) 104 | x1_ax.imshow(x1) 105 | for (y, x) in idxs: 106 | # scale = im.height / img.shape[-2] 107 | x = ((x // fact) + 0.5) * fact 108 | y = ((y // fact) + 0.5) * fact 109 | x1_ax.add_patch(plt.Circle((x, y), fact // 2, color='r')) 110 | # x1_ax.set_title(x1_title) 111 | x1_ax.axis('off') 112 | plt.savefig(save_path+'/{}.png'.format(x1_title)) 113 | plt.close() 114 | 115 | del attn_weights_mean 116 | -------------------------------------------------------------------------------- /lib/test/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .params import TrackerParams, FeatureParams, Choice -------------------------------------------------------------------------------- /lib/test/utils/_init_paths.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os.path as osp 6 | import sys 7 | 8 | 9 | def add_path(path): 10 | if path not in sys.path: 11 | sys.path.insert(0, path) 12 | 13 | 14 | this_dir = osp.dirname(__file__) 15 | 16 | prj_path = osp.join(this_dir, '..', '..', '..') 17 | add_path(prj_path) 18 | -------------------------------------------------------------------------------- /lib/test/utils/augmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import torch 4 | import torch.nn.functional as F 5 | import cv2 as cv 6 | import random 7 | 8 | 9 | class Transform: 10 | """Base data augmentation transform class.""" 11 | 12 | def __init__(self, output_sz = None, shift = None): 13 | self.output_sz = output_sz 14 | self.shift = (0,0) if shift is None else shift 15 | 16 | def __call__(self, image, is_mask=False): 17 | raise NotImplementedError 18 | 19 | def crop_to_output(self, image): 20 | if isinstance(image, torch.Tensor): 21 | imsz = image.shape[2:] 22 | if self.output_sz is None: 23 | pad_h = 0 24 | pad_w = 0 25 | else: 26 | pad_h = (self.output_sz[0] - imsz[0]) / 2 27 | pad_w = (self.output_sz[1] - imsz[1]) / 2 28 | 29 | pad_left = math.floor(pad_w) + self.shift[1] 30 | pad_right = math.ceil(pad_w) - self.shift[1] 31 | pad_top = math.floor(pad_h) + self.shift[0] 32 | pad_bottom = math.ceil(pad_h) - self.shift[0] 33 | 34 | return F.pad(image, (pad_left, pad_right, pad_top, pad_bottom), 'replicate') 35 | else: 36 | raise NotImplementedError 37 | 38 | class Blur(Transform): 39 | """Blur with given sigma (can be axis dependent).""" 40 | def __init__(self, sigma, output_sz = None, shift = None): 41 | super().__init__(output_sz, shift) 42 | if isinstance(sigma, (float, int)): 43 | sigma = (sigma, sigma) 44 | self.sigma = sigma 45 | self.filter_size = [math.ceil(2*s) for s in self.sigma] 46 | x_coord = [torch.arange(-sz, sz+1, dtype=torch.float32) for sz in self.filter_size] 47 | self.filter = [torch.exp(-(x**2)/(2*s**2)) for x, s in zip(x_coord, self.sigma)] 48 | self.filter[0] = self.filter[0].view(1,1,-1,1) / self.filter[0].sum() 49 | self.filter[1] = self.filter[1].view(1,1,1,-1) / self.filter[1].sum() 50 | 51 | def __call__(self, image, is_mask=False): 52 | if isinstance(image, torch.Tensor): 53 | sz = image.shape[2:] 54 | im1 = F.conv2d(image.view(-1,1,sz[0],sz[1]), self.filter[0], padding=(self.filter_size[0],0)) 55 | return self.crop_to_output(F.conv2d(im1, self.filter[1], padding=(0,self.filter_size[1])).view(1,-1,sz[0],sz[1])) 56 | else: 57 | raise NotImplementedError 58 | 59 | 60 | class FlipHorizontal(Transform): 61 | """Flip along horizontal axis.""" 62 | def __call__(self, image, is_mask=False): 63 | if isinstance(image, torch.Tensor): 64 | return self.crop_to_output(image.flip((3,))) 65 | else: 66 | return np.fliplr(image) 67 | 68 | class FlipVertical(Transform): 69 | """Flip along vertical axis.""" 70 | def __call__(self, image: torch.Tensor, is_mask=False): 71 | if isinstance(image, torch.Tensor): 72 | return self.crop_to_output(image.flip((2,))) 73 | else: 74 | return np.flipud(image) -------------------------------------------------------------------------------- /lib/test/utils/hann.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn.functional as F 4 | 5 | 6 | def hann1d(sz: int, centered = True) -> torch.Tensor: 7 | """1D cosine window.""" 8 | if centered: 9 | return 0.5 * (1 - torch.cos((2 * math.pi / (sz + 1)) * torch.arange(1, sz + 1).float())) 10 | w = 0.5 * (1 + torch.cos((2 * math.pi / (sz + 2)) * torch.arange(0, sz//2 + 1).float())) 11 | return torch.cat([w, w[1:sz-sz//2].flip((0,))]) 12 | 13 | 14 | def hann2d(sz: torch.Tensor, centered = True) -> torch.Tensor: 15 | """2D cosine window.""" 16 | return hann1d(sz[0].item(), centered).reshape(1, 1, -1, 1) * hann1d(sz[1].item(), centered).reshape(1, 1, 1, -1) 17 | 18 | 19 | def hann2d_bias(sz: torch.Tensor, ctr_point: torch.Tensor, centered = True) -> torch.Tensor: 20 | """2D cosine window.""" 21 | distance = torch.stack([ctr_point, sz-ctr_point], dim=0) 22 | max_distance, _ = distance.max(dim=0) 23 | 24 | hann1d_x = hann1d(max_distance[0].item() * 2, centered) 25 | hann1d_x = hann1d_x[max_distance[0] - distance[0, 0]: max_distance[0] + distance[1, 0]] 26 | hann1d_y = hann1d(max_distance[1].item() * 2, centered) 27 | hann1d_y = hann1d_y[max_distance[1] - distance[0, 1]: max_distance[1] + distance[1, 1]] 28 | 29 | return hann1d_y.reshape(1, 1, -1, 1) * hann1d_x.reshape(1, 1, 1, -1) 30 | 31 | 32 | 33 | def hann2d_clipped(sz: torch.Tensor, effective_sz: torch.Tensor, centered = True) -> torch.Tensor: 34 | """1D clipped cosine window.""" 35 | 36 | # Ensure that the difference is even 37 | effective_sz += (effective_sz - sz) % 2 38 | effective_window = hann1d(effective_sz[0].item(), True).reshape(1, 1, -1, 1) * hann1d(effective_sz[1].item(), True).reshape(1, 1, 1, -1) 39 | 40 | pad = (sz - effective_sz) // 2 41 | 42 | window = F.pad(effective_window, (pad[1].item(), pad[1].item(), pad[0].item(), pad[0].item()), 'replicate') 43 | 44 | if centered: 45 | return window 46 | else: 47 | mid = (sz / 2).int() 48 | window_shift_lr = torch.cat((window[:, :, :, mid[1]:], window[:, :, :, :mid[1]]), 3) 49 | return torch.cat((window_shift_lr[:, :, mid[0]:, :], window_shift_lr[:, :, :mid[0], :]), 2) 50 | 51 | 52 | def gauss_fourier(sz: int, sigma: float, half: bool = False) -> torch.Tensor: 53 | if half: 54 | k = torch.arange(0, int(sz/2+1)) 55 | else: 56 | k = torch.arange(-int((sz-1)/2), int(sz/2+1)) 57 | return (math.sqrt(2*math.pi) * sigma / sz) * torch.exp(-2 * (math.pi * sigma * k.float() / sz)**2) 58 | 59 | 60 | def gauss_spatial(sz, sigma, center=0, end_pad=0): 61 | k = torch.arange(-(sz-1)/2, (sz+1)/2+end_pad) 62 | return torch.exp(-1.0/(2*sigma**2) * (k - center)**2) 63 | 64 | 65 | def label_function(sz: torch.Tensor, sigma: torch.Tensor): 66 | return gauss_fourier(sz[0].item(), sigma[0].item()).reshape(1, 1, -1, 1) * gauss_fourier(sz[1].item(), sigma[1].item(), True).reshape(1, 1, 1, -1) 67 | 68 | def label_function_spatial(sz: torch.Tensor, sigma: torch.Tensor, center: torch.Tensor = torch.zeros(2), end_pad: torch.Tensor = torch.zeros(2)): 69 | """The origin is in the middle of the image.""" 70 | return gauss_spatial(sz[0].item(), sigma[0].item(), center[0], end_pad[0].item()).reshape(1, 1, -1, 1) * \ 71 | gauss_spatial(sz[1].item(), sigma[1].item(), center[1], end_pad[1].item()).reshape(1, 1, 1, -1) 72 | 73 | 74 | def cubic_spline_fourier(f, a): 75 | """The continuous Fourier transform of a cubic spline kernel.""" 76 | 77 | bf = (6*(1 - torch.cos(2 * math.pi * f)) + 3*a*(1 - torch.cos(4 * math.pi * f)) 78 | - (6 + 8*a)*math.pi*f*torch.sin(2 * math.pi * f) - 2*a*math.pi*f*torch.sin(4 * math.pi * f)) \ 79 | / (4 * math.pi**4 * f**4) 80 | 81 | bf[f == 0] = 1 82 | 83 | return bf 84 | 85 | def max2d(a: torch.Tensor) -> (torch.Tensor, torch.Tensor): 86 | """Computes maximum and argmax in the last two dimensions.""" 87 | 88 | max_val_row, argmax_row = torch.max(a, dim=-2) 89 | max_val, argmax_col = torch.max(max_val_row, dim=-1) 90 | argmax_row = argmax_row.view(argmax_col.numel(),-1)[torch.arange(argmax_col.numel()), argmax_col.view(-1)] 91 | argmax_row = argmax_row.reshape(argmax_col.shape) 92 | argmax = torch.cat((argmax_row.unsqueeze(-1), argmax_col.unsqueeze(-1)), -1) 93 | return max_val, argmax 94 | -------------------------------------------------------------------------------- /lib/test/utils/load_text.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | 5 | def load_text_numpy(path, delimiter, dtype): 6 | if isinstance(delimiter, (tuple, list)): 7 | for d in delimiter: 8 | try: 9 | ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype) 10 | return ground_truth_rect 11 | except: 12 | pass 13 | 14 | raise Exception('Could not read file {}'.format(path)) 15 | else: 16 | ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype) 17 | return ground_truth_rect 18 | 19 | 20 | def load_text_pandas(path, delimiter, dtype): 21 | if isinstance(delimiter, (tuple, list)): 22 | for d in delimiter: 23 | try: 24 | ground_truth_rect = pd.read_csv(path, delimiter=d, header=None, dtype=dtype, na_filter=False, 25 | low_memory=False).values 26 | return ground_truth_rect 27 | except Exception as e: 28 | pass 29 | 30 | raise Exception('Could not read file {}'.format(path)) 31 | else: 32 | ground_truth_rect = pd.read_csv(path, delimiter=delimiter, header=None, dtype=dtype, na_filter=False, 33 | low_memory=False).values 34 | return ground_truth_rect 35 | 36 | 37 | def load_text(path, delimiter=' ', dtype=np.float32, backend='numpy'): 38 | if backend == 'numpy': 39 | return load_text_numpy(path, delimiter, dtype) 40 | elif backend == 'pandas': 41 | return load_text_pandas(path, delimiter, dtype) 42 | -------------------------------------------------------------------------------- /lib/test/utils/params.py: -------------------------------------------------------------------------------- 1 | from lib.utils import TensorList 2 | import random 3 | 4 | 5 | class TrackerParams: 6 | """Class for tracker parameters.""" 7 | def set_default_values(self, default_vals: dict): 8 | for name, val in default_vals.items(): 9 | if not hasattr(self, name): 10 | setattr(self, name, val) 11 | 12 | def get(self, name: str, *default): 13 | """Get a parameter value with the given name. If it does not exists, it return the default value given as a 14 | second argument or returns an error if no default value is given.""" 15 | if len(default) > 1: 16 | raise ValueError('Can only give one default value.') 17 | 18 | if not default: 19 | return getattr(self, name) 20 | 21 | return getattr(self, name, default[0]) 22 | 23 | def has(self, name: str): 24 | """Check if there exist a parameter with the given name.""" 25 | return hasattr(self, name) 26 | 27 | 28 | class FeatureParams: 29 | """Class for feature specific parameters""" 30 | def __init__(self, *args, **kwargs): 31 | if len(args) > 0: 32 | raise ValueError 33 | 34 | for name, val in kwargs.items(): 35 | if isinstance(val, list): 36 | setattr(self, name, TensorList(val)) 37 | else: 38 | setattr(self, name, val) 39 | 40 | 41 | def Choice(*args): 42 | """Can be used to sample random parameter values.""" 43 | return random.choice(args) 44 | -------------------------------------------------------------------------------- /lib/test/utils/transform_got10k.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import shutil 4 | import argparse 5 | import _init_paths 6 | from lib.test.evaluation.environment import env_settings 7 | 8 | 9 | def transform_got10k(tracker_name, cfg_name): 10 | env = env_settings() 11 | result_dir = env.results_path 12 | src_dir = os.path.join(result_dir, "%s/%s/got10k/" % (tracker_name, cfg_name)) 13 | dest_dir = os.path.join(result_dir, "%s/%s/got10k_submit/" % (tracker_name, cfg_name)) 14 | if not os.path.exists(dest_dir): 15 | os.makedirs(dest_dir) 16 | items = os.listdir(src_dir) 17 | for item in items: 18 | if "all" in item: 19 | continue 20 | src_path = os.path.join(src_dir, item) 21 | if "time" not in item: 22 | seq_name = item.replace(".txt", '') 23 | seq_dir = os.path.join(dest_dir, seq_name) 24 | if not os.path.exists(seq_dir): 25 | os.makedirs(seq_dir) 26 | new_item = item.replace(".txt", '_001.txt') 27 | dest_path = os.path.join(seq_dir, new_item) 28 | bbox_arr = np.loadtxt(src_path, dtype=np.int, delimiter='\t') 29 | np.savetxt(dest_path, bbox_arr, fmt='%d', delimiter=',') 30 | else: 31 | seq_name = item.replace("_time.txt", '') 32 | seq_dir = os.path.join(dest_dir, seq_name) 33 | if not os.path.exists(seq_dir): 34 | os.makedirs(seq_dir) 35 | dest_path = os.path.join(seq_dir, item) 36 | os.system("cp %s %s" % (src_path, dest_path)) 37 | # make zip archive 38 | shutil.make_archive(src_dir, "zip", src_dir) 39 | shutil.make_archive(dest_dir, "zip", dest_dir) 40 | # Remove the original files 41 | shutil.rmtree(src_dir) 42 | shutil.rmtree(dest_dir) 43 | 44 | 45 | if __name__ == "__main__": 46 | parser = argparse.ArgumentParser(description='transform got10k results.') 47 | parser.add_argument('--tracker_name', type=str, help='Name of tracking method.') 48 | parser.add_argument('--cfg_name', type=str, help='Name of config file.') 49 | 50 | args = parser.parse_args() 51 | transform_got10k(args.tracker_name, args.cfg_name) 52 | 53 | -------------------------------------------------------------------------------- /lib/test/utils/transform_trackingnet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import shutil 4 | import argparse 5 | import _init_paths 6 | from lib.test.evaluation.environment import env_settings 7 | 8 | 9 | def transform_trackingnet(tracker_name, cfg_name, epoch): 10 | env = env_settings() 11 | result_dir = env.results_path 12 | src_dir = os.path.join(result_dir, "%s/%s/trackingnet_BBOX_%s/" % (tracker_name, cfg_name, epoch)) 13 | dest_dir = os.path.join(result_dir, "%s/%s/trackingnet_submit_%s/" % (tracker_name, cfg_name, epoch)) 14 | if not os.path.exists(dest_dir): 15 | os.makedirs(dest_dir) 16 | items = os.listdir(src_dir) 17 | for item in items: 18 | if "all" in item: 19 | continue 20 | if "time" not in item: 21 | src_path = os.path.join(src_dir, item) 22 | dest_path = os.path.join(dest_dir, item) 23 | bbox_arr = np.loadtxt(src_path, dtype=np.int, delimiter='\t') 24 | np.savetxt(dest_path, bbox_arr, fmt='%d', delimiter=',') 25 | # make zip archive 26 | shutil.make_archive(src_dir, "zip", src_dir) 27 | shutil.make_archive(dest_dir, "zip", dest_dir) 28 | # Remove the original files 29 | shutil.rmtree(src_dir) 30 | shutil.rmtree(dest_dir) 31 | 32 | 33 | if __name__ == "__main__": 34 | parser = argparse.ArgumentParser(description='transform trackingnet results.') 35 | parser.add_argument('--tracker_name', type=str, help='Name of tracking method.') 36 | parser.add_argument('--cfg_name', type=str, help='Name of config file.') 37 | parser.add_argument('--epoch', type=str, help='Name of config file.') 38 | 39 | args = parser.parse_args() 40 | transform_trackingnet(args.tracker_name, args.cfg_name, args.epoch) 41 | -------------------------------------------------------------------------------- /lib/train/__init__.py: -------------------------------------------------------------------------------- 1 | from .admin.multigpu import MultiGPU 2 | -------------------------------------------------------------------------------- /lib/train/_init_paths.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os.path as osp 6 | import sys 7 | 8 | 9 | def add_path(path): 10 | if path not in sys.path: 11 | sys.path.insert(0, path) 12 | 13 | 14 | this_dir = osp.dirname(__file__) 15 | 16 | prj_path = osp.join(this_dir, '../..') 17 | add_path(prj_path) 18 | -------------------------------------------------------------------------------- /lib/train/actors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_actor import * 2 | from .uvltrack import * 3 | -------------------------------------------------------------------------------- /lib/train/actors/base_actor.py: -------------------------------------------------------------------------------- 1 | from lib.utils import TensorDict 2 | 3 | 4 | class BaseActor: 5 | """ Base class for actor. The actor class handles the passing of the data through the network 6 | and calculation the loss""" 7 | def __init__(self, net, objective=None): 8 | """ 9 | args: 10 | net - The network to train 11 | objective - The loss function 12 | """ 13 | self.net = net 14 | self.objective = objective 15 | 16 | def __call__(self, data: TensorDict): 17 | """ Called in each training iteration. Should pass in input data through the network, calculate the loss, and 18 | return the training stats for the input data 19 | args: 20 | data - A TensorDict containing all the necessary data blocks. 21 | 22 | returns: 23 | loss - loss for the input data 24 | stats - a dict containing detailed losses 25 | """ 26 | raise NotImplementedError 27 | 28 | def to(self, device): 29 | """ Move the network to device 30 | args: 31 | device - device to use. 'cpu' or 'cuda' 32 | """ 33 | self.net.to(device) 34 | 35 | def train(self, mode=True): 36 | """ Set whether the network is in train mode. 37 | args: 38 | mode (True) - Bool specifying whether in training mode. 39 | """ 40 | self.net.train(mode) 41 | 42 | def eval(self): 43 | """ Set network to eval mode""" 44 | self.train(False) -------------------------------------------------------------------------------- /lib/train/admin/__init__.py: -------------------------------------------------------------------------------- 1 | from .environment import env_settings, create_default_local_file_ITP_train 2 | from .stats import AverageMeter, StatValue 3 | from .tensorboard import TensorboardWriter 4 | -------------------------------------------------------------------------------- /lib/train/admin/environment.py: -------------------------------------------------------------------------------- 1 | import importlib 2 | import os 3 | from collections import OrderedDict 4 | 5 | 6 | def create_default_local_file(): 7 | path = os.path.join(os.path.dirname(__file__), 'local.py') 8 | 9 | empty_str = '\'\'' 10 | default_settings = OrderedDict({ 11 | 'workspace_dir': empty_str, 12 | 'tensorboard_dir': 'self.workspace_dir + \'/tensorboard/\'', 13 | 'pretrained_networks': 'self.workspace_dir + \'/pretrained_networks/\'', 14 | 'lasot_dir': empty_str, 15 | 'got10k_dir': empty_str, 16 | 'trackingnet_dir': empty_str, 17 | 'coco_dir': empty_str, 18 | 'lvis_dir': empty_str, 19 | 'sbd_dir': empty_str, 20 | 'imagenet_dir': empty_str, 21 | 'imagenetdet_dir': empty_str, 22 | 'ecssd_dir': empty_str, 23 | 'hkuis_dir': empty_str, 24 | 'msra10k_dir': empty_str, 25 | 'davis_dir': empty_str, 26 | 'youtubevos_dir': empty_str}) 27 | 28 | comment = {'workspace_dir': 'Base directory for saving network checkpoints.', 29 | 'tensorboard_dir': 'Directory for tensorboard files.'} 30 | 31 | with open(path, 'w') as f: 32 | f.write('class EnvironmentSettings:\n') 33 | f.write(' def __init__(self):\n') 34 | 35 | for attr, attr_val in default_settings.items(): 36 | comment_str = None 37 | if attr in comment: 38 | comment_str = comment[attr] 39 | if comment_str is None: 40 | f.write(' self.{} = {}\n'.format(attr, attr_val)) 41 | else: 42 | f.write(' self.{} = {} # {}\n'.format(attr, attr_val, comment_str)) 43 | 44 | 45 | def create_default_local_file_ITP_train(workspace_dir, data_dir): 46 | path = os.path.join(os.path.dirname(__file__), 'local.py') 47 | 48 | empty_str = '\'\'' 49 | default_settings = OrderedDict({ 50 | 'workspace_dir': workspace_dir, 51 | 'tensorboard_dir': os.path.join(workspace_dir, 'tensorboard'), # Directory for tensorboard files. 52 | 'pretrained_networks': os.path.join(workspace_dir, 'pretrained_networks'), 53 | 'lasot_dir': os.path.join(data_dir, 'lasot'), 54 | 'got10k_dir': os.path.join(data_dir, 'got10k', 'train'), 55 | 'lasot_lmdb_dir': os.path.join(data_dir, 'lasot_lmdb'), 56 | 'got10k_lmdb_dir': os.path.join(data_dir, 'got10k_lmdb'), 57 | 'trackingnet_dir': os.path.join(data_dir, 'trackingnet'), 58 | 'trackingnet_lmdb_dir': os.path.join(data_dir, 'trackingnet_lmdb'), 59 | 'coco_dir': os.path.join(data_dir, 'coco'), 60 | 'coco_lmdb_dir': os.path.join(data_dir, 'coco_lmdb'), 61 | 'lvis_dir': empty_str, 62 | 'sbd_dir': empty_str, 63 | 'imagenet_dir': os.path.join(data_dir, 'vid'), 64 | 'imagenet_lmdb_dir': os.path.join(data_dir, 'vid_lmdb'), 65 | 'imagenetdet_dir': empty_str, 66 | 'ecssd_dir': empty_str, 67 | 'hkuis_dir': empty_str, 68 | 'msra10k_dir': empty_str, 69 | 'davis_dir': empty_str, 70 | 'youtubevos_dir': empty_str}) 71 | 72 | comment = {'workspace_dir': 'Base directory for saving network checkpoints.', 73 | 'tensorboard_dir': 'Directory for tensorboard files.'} 74 | 75 | with open(path, 'w') as f: 76 | f.write('class EnvironmentSettings:\n') 77 | f.write(' def __init__(self):\n') 78 | 79 | for attr, attr_val in default_settings.items(): 80 | comment_str = None 81 | if attr in comment: 82 | comment_str = comment[attr] 83 | if comment_str is None: 84 | if attr_val == empty_str: 85 | f.write(' self.{} = {}\n'.format(attr, attr_val)) 86 | else: 87 | f.write(' self.{} = \'{}\'\n'.format(attr, attr_val)) 88 | else: 89 | f.write(' self.{} = \'{}\' # {}\n'.format(attr, attr_val, comment_str)) 90 | 91 | 92 | def env_settings(): 93 | env_module_name = 'lib.train.admin.local' 94 | try: 95 | env_module = importlib.import_module(env_module_name) 96 | return env_module.EnvironmentSettings() 97 | except: 98 | env_file = os.path.join(os.path.dirname(__file__), 'local.py') 99 | 100 | create_default_local_file() 101 | raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. Then try to run again.'.format(env_file)) 102 | -------------------------------------------------------------------------------- /lib/train/admin/local.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | prj_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../..")) 4 | class EnvironmentSettings: 5 | def __init__(self): 6 | self.workspace_dir = prj_dir # Base directory for saving network checkpoints. 7 | self.tensorboard_dir = os.path.join(prj_dir, 'tensorboard') # Directory for tensorboard files. 8 | self.pretrained_networks = os.path.join(prj_dir, 'pretrained_networks') 9 | self.lasot_dir = os.path.join(prj_dir, 'data/lasot') 10 | self.lasotext_dir = os.path.join(prj_dir, 'data/lasotext') 11 | self.got10k_dir = os.path.join(prj_dir, 'data/got10k') 12 | self.trackingnet_dir = os.path.join(prj_dir, 'data/trackingnet') 13 | self.coco_dir = os.path.join(prj_dir, 'data/coco') 14 | self.tnl2k_dir = os.path.join(prj_dir, 'data/tnl2k/train') 15 | self.tnl2k_test_dir = os.path.join(prj_dir, 'data/tnl2k/test') 16 | self.otb99_dir = os.path.join(prj_dir, 'data/otb99') 17 | self.refcoco_dir = os.path.join(prj_dir, 'data/refcocog') 18 | -------------------------------------------------------------------------------- /lib/train/admin/multigpu.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | # Here we use DistributedDataParallel(DDP) rather than DataParallel(DP) for multiple GPUs training 3 | 4 | 5 | def is_multi_gpu(net): 6 | return isinstance(net, (MultiGPU, nn.parallel.distributed.DistributedDataParallel)) 7 | 8 | 9 | class MultiGPU(nn.parallel.distributed.DistributedDataParallel): 10 | def __getattr__(self, item): 11 | try: 12 | return super().__getattr__(item) 13 | except: 14 | pass 15 | return getattr(self.module, item) 16 | -------------------------------------------------------------------------------- /lib/train/admin/settings.py: -------------------------------------------------------------------------------- 1 | from lib.train.admin.environment import env_settings 2 | 3 | 4 | class Settings: 5 | """ Training settings, e.g. the paths to datasets and networks.""" 6 | def __init__(self): 7 | self.set_default() 8 | 9 | def set_default(self): 10 | self.env = env_settings() 11 | self.use_gpu = True 12 | 13 | 14 | -------------------------------------------------------------------------------- /lib/train/admin/stats.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class StatValue: 4 | def __init__(self): 5 | self.clear() 6 | 7 | def reset(self): 8 | self.val = 0 9 | 10 | def clear(self): 11 | self.reset() 12 | self.history = [] 13 | 14 | def update(self, val): 15 | self.val = val 16 | self.history.append(self.val) 17 | 18 | 19 | class AverageMeter(object): 20 | """Computes and stores the average and current value""" 21 | def __init__(self): 22 | self.clear() 23 | self.has_new_data = False 24 | 25 | def reset(self): 26 | self.avg = 0 27 | self.val = 0 28 | self.sum = 0 29 | self.count = 0 30 | 31 | def clear(self): 32 | self.reset() 33 | self.history = [] 34 | 35 | def update(self, val, n=1): 36 | self.val = val 37 | self.sum += val * n 38 | self.count += n 39 | self.avg = self.sum / self.count 40 | 41 | def new_epoch(self): 42 | if self.count > 0: 43 | self.history.append(self.avg) 44 | self.reset() 45 | self.has_new_data = True 46 | else: 47 | self.has_new_data = False 48 | 49 | 50 | def topk_accuracy(output, target, topk=(1,)): 51 | """Computes the precision@k for the specified values of k""" 52 | single_input = not isinstance(topk, (tuple, list)) 53 | if single_input: 54 | topk = (topk,) 55 | 56 | maxk = max(topk) 57 | batch_size = target.size(0) 58 | 59 | _, pred = output.topk(maxk, 1, True, True) 60 | pred = pred.t() 61 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 62 | 63 | res = [] 64 | for k in topk: 65 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)[0] 66 | res.append(correct_k * 100.0 / batch_size) 67 | 68 | if single_input: 69 | return res[0] 70 | 71 | return res 72 | -------------------------------------------------------------------------------- /lib/train/admin/tensorboard.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections import OrderedDict 3 | try: 4 | from torch.utils.tensorboard import SummaryWriter 5 | except: 6 | print('WARNING: You are using tensorboardX instead sis you have a too old pytorch version.') 7 | from tensorboardX import SummaryWriter 8 | 9 | 10 | class TensorboardWriter: 11 | def __init__(self, directory, loader_names): 12 | self.directory = directory 13 | self.writer = OrderedDict({name: SummaryWriter(os.path.join(self.directory, name)) for name in loader_names}) 14 | 15 | def write_info(self, script_name, description): 16 | tb_info_writer = SummaryWriter(os.path.join(self.directory, 'info')) 17 | tb_info_writer.add_text('Script_name', script_name) 18 | tb_info_writer.add_text('Description', description) 19 | tb_info_writer.close() 20 | 21 | def write_epoch(self, stats: OrderedDict, epoch: int, ind=-1): 22 | for loader_name, loader_stats in stats.items(): 23 | if loader_stats is None: 24 | continue 25 | for var_name, val in loader_stats.items(): 26 | if hasattr(val, 'history') and getattr(val, 'has_new_data', True): 27 | self.writer[loader_name].add_scalar(var_name, val.history[ind], epoch) -------------------------------------------------------------------------------- /lib/train/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import LTRLoader 2 | from .image_loader import jpeg4py_loader, opencv_loader, jpeg4py_loader_w_failsafe, default_image_loader 3 | -------------------------------------------------------------------------------- /lib/train/data/bounding_box_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def rect_to_rel(bb, sz_norm=None): 5 | """Convert standard rectangular parametrization of the bounding box [x, y, w, h] 6 | to relative parametrization [cx/sw, cy/sh, log(w), log(h)], where [cx, cy] is the center coordinate. 7 | args: 8 | bb - N x 4 tensor of boxes. 9 | sz_norm - [N] x 2 tensor of value of [sw, sh] (optional). sw=w and sh=h if not given. 10 | """ 11 | 12 | c = bb[...,:2] + 0.5 * bb[...,2:] 13 | if sz_norm is None: 14 | c_rel = c / bb[...,2:] 15 | else: 16 | c_rel = c / sz_norm 17 | sz_rel = torch.log(bb[...,2:]) 18 | return torch.cat((c_rel, sz_rel), dim=-1) 19 | 20 | 21 | def rel_to_rect(bb, sz_norm=None): 22 | """Inverts the effect of rect_to_rel. See above.""" 23 | 24 | sz = torch.exp(bb[...,2:]) 25 | if sz_norm is None: 26 | c = bb[...,:2] * sz 27 | else: 28 | c = bb[...,:2] * sz_norm 29 | tl = c - 0.5 * sz 30 | return torch.cat((tl, sz), dim=-1) 31 | 32 | 33 | def masks_to_bboxes(mask, fmt='c'): 34 | 35 | """ Convert a mask tensor to one or more bounding boxes. 36 | Note: This function is a bit new, make sure it does what it says. /Andreas 37 | :param mask: Tensor of masks, shape = (..., H, W) 38 | :param fmt: bbox layout. 'c' => "center + size" or (x_center, y_center, width, height) 39 | 't' => "top left + size" or (x_left, y_top, width, height) 40 | 'v' => "vertices" or (x_left, y_top, x_right, y_bottom) 41 | :return: tensor containing a batch of bounding boxes, shape = (..., 4) 42 | """ 43 | batch_shape = mask.shape[:-2] 44 | mask = mask.reshape((-1, *mask.shape[-2:])) 45 | bboxes = [] 46 | 47 | for m in mask: 48 | mx = m.sum(dim=-2).nonzero() 49 | my = m.sum(dim=-1).nonzero() 50 | bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0] 51 | bboxes.append(bb) 52 | 53 | bboxes = torch.tensor(bboxes, dtype=torch.float32, device=mask.device) 54 | bboxes = bboxes.reshape(batch_shape + (4,)) 55 | 56 | if fmt == 'v': 57 | return bboxes 58 | 59 | x1 = bboxes[..., :2] 60 | s = bboxes[..., 2:] - x1 + 1 61 | 62 | if fmt == 'c': 63 | return torch.cat((x1 + 0.5 * s, s), dim=-1) 64 | elif fmt == 't': 65 | return torch.cat((x1, s), dim=-1) 66 | 67 | raise ValueError("Undefined bounding box layout '%s'" % fmt) 68 | 69 | 70 | def masks_to_bboxes_multi(mask, ids, fmt='c'): 71 | assert mask.dim() == 2 72 | bboxes = [] 73 | 74 | for id in ids: 75 | mx = (mask == id).sum(dim=-2).nonzero() 76 | my = (mask == id).float().sum(dim=-1).nonzero() 77 | bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0] 78 | 79 | bb = torch.tensor(bb, dtype=torch.float32, device=mask.device) 80 | 81 | x1 = bb[:2] 82 | s = bb[2:] - x1 + 1 83 | 84 | if fmt == 'v': 85 | pass 86 | elif fmt == 'c': 87 | bb = torch.cat((x1 + 0.5 * s, s), dim=-1) 88 | elif fmt == 't': 89 | bb = torch.cat((x1, s), dim=-1) 90 | else: 91 | raise ValueError("Undefined bounding box layout '%s'" % fmt) 92 | bboxes.append(bb) 93 | 94 | return bboxes 95 | -------------------------------------------------------------------------------- /lib/train/data/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/train/data/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /lib/train/data/image_loader.py: -------------------------------------------------------------------------------- 1 | # import jpeg4py 2 | import cv2 as cv 3 | from PIL import Image 4 | import numpy as np 5 | 6 | davis_palette = np.repeat(np.expand_dims(np.arange(0,256), 1), 3, 1).astype(np.uint8) 7 | davis_palette[:22, :] = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0], 8 | [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128], 9 | [64, 0, 0], [191, 0, 0], [64, 128, 0], [191, 128, 0], 10 | [64, 0, 128], [191, 0, 128], [64, 128, 128], [191, 128, 128], 11 | [0, 64, 0], [128, 64, 0], [0, 191, 0], [128, 191, 0], 12 | [0, 64, 128], [128, 64, 128]] 13 | 14 | 15 | def default_image_loader(path): 16 | """The default image loader, reads the image from the given path. It first tries to use the jpeg4py_loader, 17 | but reverts to the opencv_loader if the former is not available.""" 18 | # if default_image_loader.use_jpeg4py is None: 19 | # # Try using jpeg4py 20 | # im = jpeg4py_loader(path) 21 | # if im is None: 22 | # default_image_loader.use_jpeg4py = False 23 | # print('Using opencv_loader instead.') 24 | # else: 25 | # default_image_loader.use_jpeg4py = True 26 | # return im 27 | # if default_image_loader.use_jpeg4py: 28 | # return jpeg4py_loader(path) 29 | default_image_loader.use_jpeg4py = False 30 | return opencv_loader(path) 31 | 32 | default_image_loader.use_jpeg4py = None 33 | 34 | 35 | def jpeg4py_loader(path): 36 | """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py""" 37 | try: 38 | return jpeg4py.JPEG(path).decode() 39 | except Exception as e: 40 | print('ERROR: Could not read image "{}"'.format(path)) 41 | print(e) 42 | return None 43 | 44 | 45 | def opencv_loader(path): 46 | """ Read image using opencv's imread function and returns it in rgb format""" 47 | try: 48 | im = cv.imread(path, cv.IMREAD_COLOR) 49 | 50 | # convert to rgb and return 51 | return cv.cvtColor(im, cv.COLOR_BGR2RGB) 52 | except Exception as e: 53 | print('ERROR: Could not read image "{}"'.format(path)) 54 | print(e) 55 | return None 56 | 57 | 58 | def jpeg4py_loader_w_failsafe(path): 59 | """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py""" 60 | try: 61 | return jpeg4py.JPEG(path).decode() 62 | except: 63 | try: 64 | im = cv.imread(path, cv.IMREAD_COLOR) 65 | 66 | # convert to rgb and return 67 | return cv.cvtColor(im, cv.COLOR_BGR2RGB) 68 | except Exception as e: 69 | print('ERROR: Could not read image "{}"'.format(path)) 70 | print(e) 71 | return None 72 | 73 | 74 | def opencv_seg_loader(path): 75 | """ Read segmentation annotation using opencv's imread function""" 76 | try: 77 | return cv.imread(path) 78 | except Exception as e: 79 | print('ERROR: Could not read image "{}"'.format(path)) 80 | print(e) 81 | return None 82 | 83 | 84 | def imread_indexed(filename): 85 | """ Load indexed image with given filename. Used to read segmentation annotations.""" 86 | 87 | im = Image.open(filename) 88 | 89 | annotation = np.atleast_3d(im)[...,0] 90 | return annotation 91 | 92 | 93 | 94 | def imwrite_indexed(filename, array, color_palette=None): 95 | """ Save indexed image as png. Used to save segmentation annotation.""" 96 | 97 | if color_palette is None: 98 | color_palette = davis_palette 99 | 100 | if np.atleast_3d(array).shape[2] != 1: 101 | raise Exception("Saving indexed PNGs requires 2D array.") 102 | 103 | im = Image.fromarray(array) 104 | im.putpalette(color_palette.ravel()) 105 | im.save(filename, format='PNG') -------------------------------------------------------------------------------- /lib/train/data/utils.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import html 3 | import os 4 | from functools import lru_cache 5 | 6 | import ftfy 7 | import regex as re 8 | 9 | 10 | @lru_cache() 11 | def default_bpe(): 12 | return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz") 13 | 14 | 15 | @lru_cache() 16 | def bytes_to_unicode(): 17 | """ 18 | Returns list of utf-8 byte and a corresponding list of unicode strings. 19 | The reversible bpe codes work on unicode strings. 20 | This means you need a large # of unicode characters in your vocab if you want to avoid UNKs. 21 | When you're at something like a 10B token dataset you end up needing around 5K for decent coverage. 22 | This is a signficant percentage of your normal, say, 32K bpe vocab. 23 | To avoid that, we want lookup tables between utf-8 bytes and unicode strings. 24 | And avoids mapping to whitespace/control characters the bpe code barfs on. 25 | """ 26 | bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1)) 27 | cs = bs[:] 28 | n = 0 29 | for b in range(2**8): 30 | if b not in bs: 31 | bs.append(b) 32 | cs.append(2**8+n) 33 | n += 1 34 | cs = [chr(n) for n in cs] 35 | return dict(zip(bs, cs)) 36 | 37 | 38 | def get_pairs(word): 39 | """Return set of symbol pairs in a word. 40 | Word is represented as tuple of symbols (symbols being variable-length strings). 41 | """ 42 | pairs = set() 43 | prev_char = word[0] 44 | for char in word[1:]: 45 | pairs.add((prev_char, char)) 46 | prev_char = char 47 | return pairs 48 | 49 | 50 | def basic_clean(text): 51 | text = ftfy.fix_text(text) 52 | text = html.unescape(html.unescape(text)) 53 | return text.strip() 54 | 55 | 56 | def whitespace_clean(text): 57 | text = re.sub(r'\s+', ' ', text) 58 | text = text.strip() 59 | return text 60 | 61 | 62 | class SimpleTokenizer(object): 63 | def __init__(self, bpe_path: str = default_bpe()): 64 | self.byte_encoder = bytes_to_unicode() 65 | self.byte_decoder = {v: k for k, v in self.byte_encoder.items()} 66 | merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') 67 | merges = merges[1:49152-256-2+1] 68 | merges = [tuple(merge.split()) for merge in merges] 69 | vocab = list(bytes_to_unicode().values()) 70 | vocab = vocab + [v+'' for v in vocab] 71 | for merge in merges: 72 | vocab.append(''.join(merge)) 73 | vocab.extend(['<|startoftext|>', '<|endoftext|>']) 74 | self.encoder = dict(zip(vocab, range(len(vocab)))) 75 | self.decoder = {v: k for k, v in self.encoder.items()} 76 | self.bpe_ranks = dict(zip(merges, range(len(merges)))) 77 | self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'} 78 | self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE) 79 | 80 | def bpe(self, token): 81 | if token in self.cache: 82 | return self.cache[token] 83 | word = tuple(token[:-1]) + ( token[-1] + '',) 84 | pairs = get_pairs(word) 85 | 86 | if not pairs: 87 | return token+'' 88 | 89 | while True: 90 | bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf'))) 91 | if bigram not in self.bpe_ranks: 92 | break 93 | first, second = bigram 94 | new_word = [] 95 | i = 0 96 | while i < len(word): 97 | try: 98 | j = word.index(first, i) 99 | new_word.extend(word[i:j]) 100 | i = j 101 | except: 102 | new_word.extend(word[i:]) 103 | break 104 | 105 | if word[i] == first and i < len(word)-1 and word[i+1] == second: 106 | new_word.append(first+second) 107 | i += 2 108 | else: 109 | new_word.append(word[i]) 110 | i += 1 111 | new_word = tuple(new_word) 112 | word = new_word 113 | if len(word) == 1: 114 | break 115 | else: 116 | pairs = get_pairs(word) 117 | word = ' '.join(word) 118 | self.cache[token] = word 119 | return word 120 | 121 | def encode(self, text): 122 | bpe_tokens = [] 123 | text = whitespace_clean(basic_clean(text)).lower() 124 | for token in re.findall(self.pat, text): 125 | token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8')) 126 | bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' ')) 127 | return bpe_tokens 128 | 129 | def decode(self, tokens): 130 | text = ''.join([self.decoder[token] for token in tokens]) 131 | text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('', ' ') 132 | return text 133 | -------------------------------------------------------------------------------- /lib/train/data_specs/README.md: -------------------------------------------------------------------------------- 1 | # README 2 | 3 | ## Description for different text files 4 | GOT10K 5 | - got10k_train_full_split.txt: the complete GOT-10K training set. (9335 videos) 6 | - got10k_train_split.txt: part of videos from the GOT-10K training set 7 | - got10k_val_split.txt: another part of videos from the GOT-10K training set 8 | - got10k_vot_exclude.txt: 1k videos that are forbidden from "using to train models then testing on VOT" (as required by [VOT Challenge](https://www.votchallenge.net/vot2020/participation.html)) 9 | - got10k_vot_train_split.txt: part of videos from the "VOT-permitted" GOT-10K training set 10 | - got10k_vot_val_split.txt: another part of videos from the "VOT-permitted" GOT-10K training set 11 | 12 | LaSOT 13 | - lasot_train_split.txt: the complete LaSOT training set 14 | 15 | TrackingNnet 16 | - trackingnet_classmap.txt: The map from the sequence name to the target class for the TrackingNet -------------------------------------------------------------------------------- /lib/train/data_specs/lasot_test_split.txt: -------------------------------------------------------------------------------- 1 | airplane-1 2 | airplane-9 3 | airplane-13 4 | airplane-15 5 | basketball-1 6 | basketball-6 7 | basketball-7 8 | basketball-11 9 | bear-2 10 | bear-4 11 | bear-6 12 | bear-17 13 | bicycle-2 14 | bicycle-7 15 | bicycle-9 16 | bicycle-18 17 | bird-2 18 | bird-3 19 | bird-15 20 | bird-17 21 | boat-3 22 | boat-4 23 | boat-12 24 | boat-17 25 | book-3 26 | book-10 27 | book-11 28 | book-19 29 | bottle-1 30 | bottle-12 31 | bottle-14 32 | bottle-18 33 | bus-2 34 | bus-5 35 | bus-17 36 | bus-19 37 | car-2 38 | car-6 39 | car-9 40 | car-17 41 | cat-1 42 | cat-3 43 | cat-18 44 | cat-20 45 | cattle-2 46 | cattle-7 47 | cattle-12 48 | cattle-13 49 | spider-14 50 | spider-16 51 | spider-18 52 | spider-20 53 | coin-3 54 | coin-6 55 | coin-7 56 | coin-18 57 | crab-3 58 | crab-6 59 | crab-12 60 | crab-18 61 | surfboard-12 62 | surfboard-4 63 | surfboard-5 64 | surfboard-8 65 | cup-1 66 | cup-4 67 | cup-7 68 | cup-17 69 | deer-4 70 | deer-8 71 | deer-10 72 | deer-14 73 | dog-1 74 | dog-7 75 | dog-15 76 | dog-19 77 | guitar-3 78 | guitar-8 79 | guitar-10 80 | guitar-16 81 | person-1 82 | person-5 83 | person-10 84 | person-12 85 | pig-2 86 | pig-10 87 | pig-13 88 | pig-18 89 | rubicCube-1 90 | rubicCube-6 91 | rubicCube-14 92 | rubicCube-19 93 | swing-10 94 | swing-14 95 | swing-17 96 | swing-20 97 | drone-13 98 | drone-15 99 | drone-2 100 | drone-7 101 | pool-12 102 | pool-15 103 | pool-3 104 | pool-7 105 | rabbit-10 106 | rabbit-13 107 | rabbit-17 108 | rabbit-19 109 | racing-10 110 | racing-15 111 | racing-16 112 | racing-20 113 | robot-1 114 | robot-19 115 | robot-5 116 | robot-8 117 | sepia-13 118 | sepia-16 119 | sepia-6 120 | sepia-8 121 | sheep-3 122 | sheep-5 123 | sheep-7 124 | sheep-9 125 | skateboard-16 126 | skateboard-19 127 | skateboard-3 128 | skateboard-8 129 | tank-14 130 | tank-16 131 | tank-6 132 | tank-9 133 | tiger-12 134 | tiger-18 135 | tiger-4 136 | tiger-6 137 | train-1 138 | train-11 139 | train-20 140 | train-7 141 | truck-16 142 | truck-3 143 | truck-6 144 | truck-7 145 | turtle-16 146 | turtle-5 147 | turtle-8 148 | turtle-9 149 | umbrella-17 150 | umbrella-19 151 | umbrella-2 152 | umbrella-9 153 | yoyo-15 154 | yoyo-17 155 | yoyo-19 156 | yoyo-7 157 | zebra-10 158 | zebra-14 159 | zebra-16 160 | zebra-17 161 | elephant-1 162 | elephant-12 163 | elephant-16 164 | elephant-18 165 | goldfish-3 166 | goldfish-7 167 | goldfish-8 168 | goldfish-10 169 | hat-1 170 | hat-2 171 | hat-5 172 | hat-18 173 | kite-4 174 | kite-6 175 | kite-10 176 | kite-15 177 | motorcycle-1 178 | motorcycle-3 179 | motorcycle-9 180 | motorcycle-18 181 | mouse-1 182 | mouse-8 183 | mouse-9 184 | mouse-17 185 | flag-3 186 | flag-9 187 | flag-5 188 | flag-2 189 | frog-3 190 | frog-4 191 | frog-20 192 | frog-9 193 | gametarget-1 194 | gametarget-2 195 | gametarget-7 196 | gametarget-13 197 | hand-2 198 | hand-3 199 | hand-9 200 | hand-16 201 | helmet-5 202 | helmet-11 203 | helmet-19 204 | helmet-13 205 | licenseplate-6 206 | licenseplate-12 207 | licenseplate-13 208 | licenseplate-15 209 | electricfan-1 210 | electricfan-10 211 | electricfan-18 212 | electricfan-20 213 | chameleon-3 214 | chameleon-6 215 | chameleon-11 216 | chameleon-20 217 | crocodile-3 218 | crocodile-4 219 | crocodile-10 220 | crocodile-14 221 | gecko-1 222 | gecko-5 223 | gecko-16 224 | gecko-19 225 | fox-2 226 | fox-3 227 | fox-5 228 | fox-20 229 | giraffe-2 230 | giraffe-10 231 | giraffe-13 232 | giraffe-15 233 | gorilla-4 234 | gorilla-6 235 | gorilla-9 236 | gorilla-13 237 | hippo-1 238 | hippo-7 239 | hippo-9 240 | hippo-20 241 | horse-1 242 | horse-4 243 | horse-12 244 | horse-15 245 | kangaroo-2 246 | kangaroo-5 247 | kangaroo-11 248 | kangaroo-14 249 | leopard-1 250 | leopard-7 251 | leopard-16 252 | leopard-20 253 | lion-1 254 | lion-5 255 | lion-12 256 | lion-20 257 | lizard-1 258 | lizard-3 259 | lizard-6 260 | lizard-13 261 | microphone-2 262 | microphone-6 263 | microphone-14 264 | microphone-16 265 | monkey-3 266 | monkey-4 267 | monkey-9 268 | monkey-17 269 | shark-2 270 | shark-3 271 | shark-5 272 | shark-6 273 | squirrel-8 274 | squirrel-11 275 | squirrel-13 276 | squirrel-19 277 | volleyball-1 278 | volleyball-13 279 | volleyball-18 280 | volleyball-19 281 | -------------------------------------------------------------------------------- /lib/train/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .lasot import Lasot 2 | from .got10k import Got10k 3 | from .tracking_net import TrackingNet 4 | from .imagenetvid import ImagenetVID 5 | from .coco import MSCOCO 6 | from .coco_seq import MSCOCOSeq 7 | from .got10k_lmdb import Got10k_lmdb 8 | from .lasot_lmdb import Lasot_lmdb 9 | from .imagenetvid_lmdb import ImagenetVID_lmdb 10 | from .coco_seq_lmdb import MSCOCOSeq_lmdb 11 | from .tracking_net_lmdb import TrackingNet_lmdb 12 | from .tnl2k import TNL2K 13 | from .tnl2k_test import TNL2Ktest 14 | from .visualgenome import VisualGenome 15 | from .otb99 import OTB99 16 | from .object365 import Object365 17 | from .refcoco_seq import RefCOCOSeq 18 | from .lasotext import Lasotext 19 | from .lasot_test import Lasot_test 20 | from .webuav import WebUAV -------------------------------------------------------------------------------- /lib/train/dataset/base_image_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | from lib.train.data.image_loader import jpeg4py_loader 3 | 4 | 5 | class BaseImageDataset(torch.utils.data.Dataset): 6 | """ Base class for image datasets """ 7 | 8 | def __init__(self, name, root, image_loader=jpeg4py_loader): 9 | """ 10 | args: 11 | root - The root path to the dataset 12 | image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) 13 | is used by default. 14 | """ 15 | self.name = name 16 | self.root = root 17 | self.image_loader = image_loader 18 | 19 | self.image_list = [] # Contains the list of sequences. 20 | self.class_list = [] 21 | 22 | def __len__(self): 23 | """ Returns size of the dataset 24 | returns: 25 | int - number of samples in the dataset 26 | """ 27 | return self.get_num_images() 28 | 29 | def __getitem__(self, index): 30 | """ Not to be used! Check get_frames() instead. 31 | """ 32 | return None 33 | 34 | def get_name(self): 35 | """ Name of the dataset 36 | 37 | returns: 38 | string - Name of the dataset 39 | """ 40 | raise NotImplementedError 41 | 42 | def get_num_images(self): 43 | """ Number of sequences in a dataset 44 | 45 | returns: 46 | int - number of sequences in the dataset.""" 47 | return len(self.image_list) 48 | 49 | def has_class_info(self): 50 | return False 51 | 52 | def get_class_name(self, image_id): 53 | return None 54 | 55 | def get_num_classes(self): 56 | return len(self.class_list) 57 | 58 | def get_class_list(self): 59 | return self.class_list 60 | 61 | def get_images_in_class(self, class_name): 62 | raise NotImplementedError 63 | 64 | def has_segmentation_info(self): 65 | return False 66 | 67 | def get_image_info(self, seq_id): 68 | """ Returns information about a particular image, 69 | 70 | args: 71 | seq_id - index of the image 72 | 73 | returns: 74 | Dict 75 | """ 76 | raise NotImplementedError 77 | 78 | def get_image(self, image_id, anno=None): 79 | """ Get a image 80 | 81 | args: 82 | image_id - index of image 83 | anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded. 84 | 85 | returns: 86 | image - 87 | anno - 88 | dict - A dict containing meta information about the sequence, e.g. class of the target object. 89 | 90 | """ 91 | raise NotImplementedError 92 | 93 | -------------------------------------------------------------------------------- /lib/train/dataset/base_video_dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data 2 | # 2021.1.5 use jpeg4py_loader_w_failsafe as default 3 | from lib.train.data.image_loader import jpeg4py_loader_w_failsafe 4 | 5 | 6 | class BaseVideoDataset(torch.utils.data.Dataset): 7 | """ Base class for video datasets """ 8 | 9 | def __init__(self, name, root, image_loader=jpeg4py_loader_w_failsafe): 10 | """ 11 | args: 12 | root - The root path to the dataset 13 | image_loader (jpeg4py_loader) - The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py) 14 | is used by default. 15 | """ 16 | self.name = name 17 | self.root = root 18 | self.image_loader = image_loader 19 | 20 | self.sequence_list = [] # Contains the list of sequences. 21 | self.class_list = [] 22 | 23 | def __len__(self): 24 | """ Returns size of the dataset 25 | returns: 26 | int - number of samples in the dataset 27 | """ 28 | return self.get_num_sequences() 29 | 30 | def __getitem__(self, index): 31 | """ Not to be used! Check get_frames() instead. 32 | """ 33 | return None 34 | 35 | def is_video_sequence(self): 36 | """ Returns whether the dataset is a video dataset or an image dataset 37 | 38 | returns: 39 | bool - True if a video dataset 40 | """ 41 | return True 42 | 43 | def is_synthetic_video_dataset(self): 44 | """ Returns whether the dataset contains real videos or synthetic 45 | 46 | returns: 47 | bool - True if a video dataset 48 | """ 49 | return False 50 | 51 | def get_name(self): 52 | """ Name of the dataset 53 | 54 | returns: 55 | string - Name of the dataset 56 | """ 57 | raise NotImplementedError 58 | 59 | def get_num_sequences(self): 60 | """ Number of sequences in a dataset 61 | 62 | returns: 63 | int - number of sequences in the dataset.""" 64 | return len(self.sequence_list) 65 | 66 | def has_class_info(self): 67 | return False 68 | 69 | def has_occlusion_info(self): 70 | return False 71 | 72 | def get_num_classes(self): 73 | return len(self.class_list) 74 | 75 | def get_class_list(self): 76 | return self.class_list 77 | 78 | def get_sequences_in_class(self, class_name): 79 | raise NotImplementedError 80 | 81 | def has_segmentation_info(self): 82 | return False 83 | 84 | def get_sequence_info(self, seq_id): 85 | """ Returns information about a particular sequences, 86 | 87 | args: 88 | seq_id - index of the sequence 89 | 90 | returns: 91 | Dict 92 | """ 93 | raise NotImplementedError 94 | 95 | def get_frames(self, seq_id, frame_ids, anno=None): 96 | """ Get a set of frames from a particular sequence 97 | 98 | args: 99 | seq_id - index of sequence 100 | frame_ids - a list of frame numbers 101 | anno(None) - The annotation for the sequence (see get_sequence_info). If None, they will be loaded. 102 | 103 | returns: 104 | list - List of frames corresponding to frame_ids 105 | list - List of dicts for each frame 106 | dict - A dict containing meta information about the sequence, e.g. class of the target object. 107 | 108 | """ 109 | raise NotImplementedError 110 | 111 | -------------------------------------------------------------------------------- /lib/train/dataset/imagenetvid_lmdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .base_video_dataset import BaseVideoDataset 3 | from lib.train.data import jpeg4py_loader 4 | import torch 5 | from collections import OrderedDict 6 | from lib.train.admin import env_settings 7 | from lib.utils.lmdb_utils import decode_img, decode_json 8 | 9 | 10 | def get_target_to_image_ratio(seq): 11 | anno = torch.Tensor(seq['anno']) 12 | img_sz = torch.Tensor(seq['image_size']) 13 | return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt() 14 | 15 | 16 | class ImagenetVID_lmdb(BaseVideoDataset): 17 | """ Imagenet VID dataset. 18 | 19 | Publication: 20 | ImageNet Large Scale Visual Recognition Challenge 21 | Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy, 22 | Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei 23 | IJCV, 2015 24 | https://arxiv.org/pdf/1409.0575.pdf 25 | 26 | Download the dataset from http://image-net.org/ 27 | """ 28 | def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1): 29 | """ 30 | args: 31 | root - path to the imagenet vid dataset. 32 | image_loader (default_image_loader) - The function to read the images. If installed, 33 | jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else, 34 | opencv's imread is used. 35 | min_length - Minimum allowed sequence length. 36 | max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets 37 | which cover complete image. 38 | """ 39 | root = env_settings().imagenet_dir if root is None else root 40 | super().__init__("imagenetvid_lmdb", root, image_loader) 41 | 42 | sequence_list_dict = decode_json(root, "cache.json") 43 | self.sequence_list = sequence_list_dict 44 | 45 | # Filter the sequences based on min_length and max_target_area in the first frame 46 | self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and 47 | get_target_to_image_ratio(x) < max_target_area] 48 | 49 | def get_name(self): 50 | return 'imagenetvid_lmdb' 51 | 52 | def get_num_sequences(self): 53 | return len(self.sequence_list) 54 | 55 | def get_sequence_info(self, seq_id): 56 | bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno']) 57 | valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0) 58 | visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte() 59 | return {'bbox': bb_anno, 'valid': valid, 'visible': visible} 60 | 61 | def _get_frame(self, sequence, frame_id): 62 | set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id']) 63 | vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id']) 64 | frame_number = frame_id + sequence['start_frame'] 65 | frame_path = os.path.join('Data', 'VID', 'train', set_name, vid_name, 66 | '{:06d}.JPEG'.format(frame_number)) 67 | return decode_img(self.root, frame_path) 68 | 69 | def get_frames(self, seq_id, frame_ids, anno=None): 70 | sequence = self.sequence_list[seq_id] 71 | 72 | frame_list = [self._get_frame(sequence, f) for f in frame_ids] 73 | 74 | if anno is None: 75 | anno = self.get_sequence_info(seq_id) 76 | 77 | # Create anno dict 78 | anno_frames = {} 79 | for key, value in anno.items(): 80 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 81 | 82 | # added the class info to the meta info 83 | object_meta = OrderedDict({'object_class': sequence['class_name'], 84 | 'motion_class': None, 85 | 'major_class': None, 86 | 'root_class': None, 87 | 'motion_adverb': None}) 88 | 89 | return frame_list, anno_frames, object_meta 90 | 91 | -------------------------------------------------------------------------------- /lib/train/dataset/object365.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .base_video_dataset import BaseVideoDataset 3 | from lib.train.data import jpeg4py_loader 4 | import json 5 | import torch 6 | import random 7 | from pycocotools.coco import COCO 8 | from collections import OrderedDict 9 | from lib.train.admin import env_settings 10 | from .utils import generate_sentence 11 | 12 | class Object365(BaseVideoDataset): 13 | def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"): 14 | super().__init__('Object365', root, image_loader) 15 | 16 | self.img_pth = os.path.join(root, 'imgs/') 17 | self.anno_path = os.path.join(root, 'zhiyuan_objv2_train.json') 18 | self.sequence_list = self._get_sequence_list() 19 | self.id2class = {} 20 | for cat in self.region_descriptions['categories']: 21 | self.id2class[cat['id']] = cat['name'] 22 | 23 | def _get_sequence_list(self): 24 | with open(self.anno_path, 'r') as f: 25 | self.region_descriptions = json.load(f) 26 | seq_list = list(range(len(self.region_descriptions['annotations']))) 27 | return seq_list 28 | 29 | def is_video_sequence(self): 30 | return False 31 | 32 | def is_grounding_sequence(self): 33 | return False 34 | 35 | def get_name(self): 36 | return 'object365' 37 | 38 | def has_class_info(self): 39 | return True 40 | 41 | def has_segmentation_info(self): 42 | return True 43 | 44 | def get_num_sequences(self): 45 | return len(self.sequence_list) 46 | 47 | def get_sequence_info(self, seq_id): 48 | anno = self._get_anno(seq_id) 49 | bbox = torch.Tensor(anno['bbox']).view(1, 4) 50 | valid = torch.Tensor([True]) 51 | visible = torch.Tensor([True]) 52 | 53 | return {'bbox': bbox, 'valid': valid, 'visible': visible} 54 | 55 | def _get_anno(self, seq_id): 56 | desc = self.region_descriptions['annotations'][seq_id] 57 | anno = { 58 | 'bbox': desc['bbox'] 59 | } 60 | return anno 61 | 62 | def _get_frames(self, seq_id): 63 | desc = self.region_descriptions['annotations'][seq_id] 64 | img_path = os.path.join(self.img_pth, "objects365_v1_%08d.jpg"%(desc['image_id'])) 65 | if os.path.exists(img_path): 66 | img = self.image_loader(img_path) 67 | else: 68 | img = self.image_loader(os.path.join(self.img_pth, "objects365_v2_%08d.jpg"%(desc['image_id']))) 69 | return img 70 | 71 | def get_frames(self, seq_id=None, frame_ids=None, anno=None): 72 | frame = self._get_frames(seq_id) 73 | 74 | frame_list = [frame.copy() for _ in frame_ids] 75 | 76 | if anno is None: 77 | anno = self.get_sequence_info(seq_id) 78 | 79 | language = self.id2class[self.region_descriptions['annotations'][seq_id]['category_id']] 80 | anno_frames = {} 81 | for key, value in anno.items(): 82 | anno_frames[key] = [value[0, ...] for _ in frame_ids] 83 | 84 | object_meta = OrderedDict({'object_class_name': None, 85 | 'motion_class': None, 86 | 'major_class': None, 87 | 'root_class': None, 88 | 'motion_adverb': None, 89 | 'language': generate_sentence(language.lower())}) 90 | 91 | return frame_list, anno_frames, object_meta 92 | 93 | def get_annos(self, seq_id, frame_ids, anno=None): 94 | if anno is None: 95 | anno = self.get_sequence_info(seq_id) 96 | 97 | anno_frames = {} 98 | for key, value in anno.items(): 99 | anno_frames[key] = [value[0, ...].clone() for _ in frame_ids] 100 | 101 | return anno_frames -------------------------------------------------------------------------------- /lib/train/dataset/otb99.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import glob 4 | import torch 5 | import numpy as np 6 | import pandas 7 | import csv 8 | import random 9 | from collections import OrderedDict 10 | from .base_video_dataset import BaseVideoDataset 11 | from lib.train.data import jpeg4py_loader 12 | from lib.train.admin import env_settings 13 | 14 | 15 | class OTB99(BaseVideoDataset): 16 | def __init__(self, root=None, image_loader=jpeg4py_loader, split=None): 17 | root = env_settings().lasot_dir if root is None else root 18 | super().__init__('OTB99', root, image_loader) 19 | self.split = split 20 | self.sequence_list = self._build_sequence_list(split=split) 21 | 22 | def _build_sequence_list(self, vid_ids=None, split=None): 23 | seq_path = glob.glob(os.path.join(self.root, f'OTB_query_{split}/*.txt')) 24 | sequence_list = [p.split('/')[-1].split('.')[0] for p in seq_path] 25 | return sequence_list 26 | 27 | def get_name(self): 28 | return 'otb99' 29 | 30 | def is_grounding_sequence(self): 31 | return True 32 | 33 | def is_vl_sequence(self): 34 | return True 35 | 36 | def is_tracking_sequence(self): 37 | return True 38 | 39 | def get_num_sequences(self): 40 | return len(self.sequence_list) 41 | 42 | def _read_bb_anno(self, seq_path): 43 | bb_anno_file = os.path.join(seq_path, "groundtruth_rect.txt") 44 | try: 45 | gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values 46 | except: 47 | gt = pandas.read_csv(bb_anno_file, delimiter='\t', header=None, dtype=np.float32, na_filter=False, low_memory=False).values 48 | return torch.tensor(gt) 49 | 50 | def _get_sequence_path(self, seq_id): 51 | seq_name = self.sequence_list[seq_id].split('-')[0] if self.split=='train' else self.sequence_list[seq_id] 52 | return os.path.join(self.root, 'OTB_videos', seq_name) 53 | 54 | def _read_language(self, seq_id): 55 | seq_name = self.sequence_list[seq_id] 56 | language_file = os.path.join(self.root, f'OTB_query_{self.split}', f"{seq_name}.txt") 57 | with open(language_file, 'r') as f: 58 | language = f.readlines() 59 | return language[0].rstrip() 60 | 61 | def get_sequence_info(self, seq_id): 62 | seq_path = self._get_sequence_path(seq_id) 63 | bbox = self._read_bb_anno(seq_path) 64 | 65 | valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0) 66 | visible = valid.clone().byte() 67 | 68 | return {'bbox': bbox, 'valid': valid, 'visible': visible} 69 | 70 | def _get_frame(self, seq_path, frame_id): 71 | images = sorted(glob.glob(os.path.join(seq_path, 'img', '*'))) 72 | return self.image_loader(images[frame_id]) 73 | 74 | def get_frames(self, seq_id, frame_ids, anno=None): 75 | seq_path = self._get_sequence_path(seq_id) 76 | frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids] 77 | 78 | anno = self.get_sequence_info(seq_id) 79 | 80 | language = self._read_language(seq_id) 81 | anno_frames = {} 82 | for key, value in anno.items(): 83 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 84 | 85 | object_meta = OrderedDict({'object_class_name': None, 86 | 'motion_class': None, 87 | 'major_class': None, 88 | 'root_class': None, 89 | 'motion_adverb': None, 90 | 'language': language.lower()}) 91 | 92 | return frame_list, anno_frames, object_meta 93 | 94 | def get_annos(self, seq_id, frame_ids, anno=None): 95 | if anno is None: 96 | anno = self.get_sequence_info(seq_id) 97 | 98 | anno_frames = {} 99 | for key, value in anno.items(): 100 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 101 | 102 | return anno_frames 103 | -------------------------------------------------------------------------------- /lib/train/dataset/tnl2k.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import glob 4 | import torch 5 | import numpy as np 6 | import pandas 7 | import csv 8 | import random 9 | from collections import OrderedDict 10 | from .base_video_dataset import BaseVideoDataset 11 | from lib.train.data import jpeg4py_loader 12 | from lib.train.admin import env_settings 13 | 14 | 15 | class TNL2K(BaseVideoDataset): 16 | def __init__(self, root=None, image_loader=jpeg4py_loader, split=None): 17 | root = env_settings().tnl2k_dir if root is None else root 18 | super().__init__('TNL2K', root, image_loader) 19 | 20 | self.sequence_list = self._build_sequence_list() 21 | 22 | def _build_sequence_list(self, vid_ids=None, split=None): 23 | seq_path = glob.glob(os.path.join(self.root, '*/')) 24 | sequence_list = [p.split('/')[-2] for p in seq_path] 25 | return sequence_list 26 | 27 | def get_name(self): 28 | return 'tnl2k' 29 | 30 | def has_class_info(self): 31 | return True 32 | 33 | def has_occlusion_info(self): 34 | return True 35 | 36 | def is_grounding_sequence(self): 37 | return True 38 | 39 | def is_tracking_sequence(self): 40 | return True 41 | 42 | def is_vl_sequence(self): 43 | return True 44 | 45 | def get_num_sequences(self): 46 | return len(self.sequence_list) 47 | 48 | def get_sequences_in_class(self, class_name): 49 | return self.seq_per_class[class_name] 50 | 51 | def _read_bb_anno(self, seq_path): 52 | bb_anno_file = os.path.join(seq_path, "groundtruth.txt") 53 | gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values 54 | return torch.tensor(gt) 55 | 56 | def _read_target_visible(self, seq_path): 57 | # Read full occlusion and out_of_view 58 | occlusion_file = os.path.join(seq_path, "full_occlusion.txt") 59 | out_of_view_file = os.path.join(seq_path, "out_of_view.txt") 60 | 61 | with open(occlusion_file, 'r', newline='') as f: 62 | occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]]) 63 | with open(out_of_view_file, 'r') as f: 64 | out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]]) 65 | 66 | target_visible = ~occlusion & ~out_of_view 67 | 68 | return target_visible 69 | 70 | def _get_sequence_path(self, seq_id): 71 | seq_name = self.sequence_list[seq_id] 72 | return os.path.join(self.root, seq_name) 73 | 74 | def _read_language(self, seq_path): 75 | language_file = os.path.join(seq_path, "language.txt") 76 | with open(language_file, 'r') as f: 77 | language = f.readlines() 78 | return language[0].rstrip() 79 | 80 | def get_sequence_info(self, seq_id): 81 | seq_path = self._get_sequence_path(seq_id) 82 | bbox = self._read_bb_anno(seq_path) 83 | 84 | valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0) 85 | visible = valid.clone().byte() 86 | 87 | return {'bbox': bbox, 'valid': valid, 'visible': visible} 88 | 89 | def _get_frame(self, seq_path, frame_id): 90 | images = sorted(glob.glob(os.path.join(seq_path, 'imgs', '*'))) 91 | return self.image_loader(images[frame_id]) 92 | 93 | def get_frames(self, seq_id, frame_ids, anno=None): 94 | seq_path = self._get_sequence_path(seq_id) 95 | frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids] 96 | 97 | anno = self.get_sequence_info(seq_id) 98 | 99 | language = self._read_language(seq_path) 100 | anno_frames = {} 101 | for key, value in anno.items(): 102 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 103 | 104 | object_meta = OrderedDict({'object_class_name': None, 105 | 'motion_class': None, 106 | 'major_class': None, 107 | 'root_class': None, 108 | 'motion_adverb': None, 109 | 'language': language.lower()}) 110 | 111 | return frame_list, anno_frames, object_meta 112 | 113 | def get_annos(self, seq_id, frame_ids, anno=None): 114 | if anno is None: 115 | anno = self.get_sequence_info(seq_id) 116 | 117 | anno_frames = {} 118 | for key, value in anno.items(): 119 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 120 | 121 | return anno_frames 122 | -------------------------------------------------------------------------------- /lib/train/dataset/tnl2k_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import glob 4 | import torch 5 | import numpy as np 6 | import pandas 7 | import csv 8 | import random 9 | from collections import OrderedDict 10 | from .base_video_dataset import BaseVideoDataset 11 | from lib.train.data import jpeg4py_loader 12 | from lib.train.admin import env_settings 13 | 14 | 15 | class TNL2Ktest(BaseVideoDataset): 16 | def __init__(self, root=None, image_loader=jpeg4py_loader, split=None): 17 | root = env_settings().lasot_dir if root is None else root 18 | super().__init__('TNL2K', root, image_loader) 19 | 20 | self.sequence_list = self._build_sequence_list() 21 | 22 | def _build_sequence_list(self, vid_ids=None, split=None): 23 | seq_path = glob.glob(os.path.join(self.root, '*/')) 24 | sequence_list = [p.split('/')[-2] for p in seq_path] 25 | return sequence_list 26 | 27 | def get_name(self): 28 | return 'tnl2k_test' 29 | 30 | def has_class_info(self): 31 | return True 32 | 33 | def has_occlusion_info(self): 34 | return True 35 | 36 | def is_grounding_sequence(self): 37 | return True 38 | 39 | def is_tracking_sequence(self): 40 | return True 41 | 42 | def is_vl_sequence(self): 43 | return True 44 | 45 | def get_num_sequences(self): 46 | return len(self.sequence_list) 47 | 48 | def get_sequences_in_class(self, class_name): 49 | return self.seq_per_class[class_name] 50 | 51 | def _read_bb_anno(self, seq_path): 52 | bb_anno_file = os.path.join(seq_path, "groundtruth.txt") 53 | gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values 54 | return torch.tensor(gt) 55 | 56 | def _read_target_visible(self, seq_path): 57 | # Read full occlusion and out_of_view 58 | occlusion_file = os.path.join(seq_path, "full_occlusion.txt") 59 | out_of_view_file = os.path.join(seq_path, "out_of_view.txt") 60 | 61 | with open(occlusion_file, 'r', newline='') as f: 62 | occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]]) 63 | with open(out_of_view_file, 'r') as f: 64 | out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]]) 65 | 66 | target_visible = ~occlusion & ~out_of_view 67 | 68 | return target_visible 69 | 70 | def _get_sequence_path(self, seq_id): 71 | seq_name = self.sequence_list[seq_id] 72 | return os.path.join(self.root, seq_name) 73 | 74 | def _read_language(self, seq_path): 75 | language_file = os.path.join(seq_path, "language.txt") 76 | with open(language_file, 'r') as f: 77 | language = f.readlines() 78 | return language[0].rstrip() 79 | 80 | def get_sequence_info(self, seq_id): 81 | seq_path = self._get_sequence_path(seq_id) 82 | bbox = self._read_bb_anno(seq_path) 83 | 84 | valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0) 85 | visible = valid.clone().byte() 86 | 87 | return {'bbox': bbox, 'valid': valid, 'visible': visible} 88 | 89 | def _get_frame(self, seq_path, frame_id): 90 | images = sorted(glob.glob(os.path.join(seq_path, 'imgs', '*'))) 91 | return self.image_loader(images[frame_id]) 92 | 93 | def get_frames(self, seq_id, frame_ids, anno=None): 94 | seq_path = self._get_sequence_path(seq_id) 95 | frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids] 96 | 97 | anno = self.get_sequence_info(seq_id) 98 | 99 | language = self._read_language(seq_path) 100 | anno_frames = {} 101 | for key, value in anno.items(): 102 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 103 | 104 | object_meta = OrderedDict({'object_class_name': None, 105 | 'motion_class': None, 106 | 'major_class': None, 107 | 'root_class': None, 108 | 'motion_adverb': None, 109 | 'language': language.lower()}) 110 | 111 | return frame_list, anno_frames, object_meta 112 | 113 | def get_annos(self, seq_id, frame_ids, anno=None): 114 | if anno is None: 115 | anno = self.get_sequence_info(seq_id) 116 | 117 | anno_frames = {} 118 | for key, value in anno.items(): 119 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 120 | 121 | return anno_frames 122 | -------------------------------------------------------------------------------- /lib/train/dataset/utils.py: -------------------------------------------------------------------------------- 1 | def generate_sentence(name): 2 | return f"the {name} in the view" -------------------------------------------------------------------------------- /lib/train/dataset/visualgenome.py: -------------------------------------------------------------------------------- 1 | import os 2 | from .base_video_dataset import BaseVideoDataset 3 | from lib.train.data import jpeg4py_loader 4 | import json 5 | import torch 6 | import random 7 | from pycocotools.coco import COCO 8 | from collections import OrderedDict 9 | from lib.train.admin import env_settings 10 | 11 | 12 | class VisualGenome(BaseVideoDataset): 13 | def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"): 14 | super().__init__('VisualGenome', root, image_loader) 15 | 16 | self.img_pth = os.path.join(root, 'VG_100K/') 17 | self.anno_path = os.path.join(root, 'region_descriptions_new.json') 18 | self.sequence_list = self._get_sequence_list() 19 | 20 | def _get_sequence_list(self): 21 | with open(self.anno_path, 'r') as f: 22 | self.region_descriptions = json.load(f) 23 | seq_list = list(range(len(self.region_descriptions))) 24 | return seq_list 25 | 26 | def is_video_sequence(self): 27 | return False 28 | 29 | def get_name(self): 30 | return 'visualgenome' 31 | 32 | def has_class_info(self): 33 | return True 34 | 35 | def has_segmentation_info(self): 36 | return True 37 | 38 | def is_grounding_sequence(self): 39 | return True 40 | 41 | def get_num_sequences(self): 42 | return len(self.sequence_list) 43 | 44 | def get_sequence_info(self, seq_id): 45 | anno = self._get_anno(seq_id) 46 | bbox = torch.Tensor(anno['bbox']).view(1, 4) 47 | valid = torch.Tensor([True]) 48 | visible = torch.Tensor([True]) 49 | 50 | return {'bbox': bbox, 'valid': valid, 'visible': visible} 51 | 52 | def _get_anno(self, seq_id): 53 | desc = self.region_descriptions[seq_id] 54 | anno = { 55 | 'bbox': [desc['x'], desc['y'], desc['width'], desc['height']] 56 | } 57 | return anno 58 | 59 | def _get_frames(self, seq_id): 60 | desc = self.region_descriptions[seq_id] 61 | img = self.image_loader(os.path.join(self.img_pth, "%d.jpg"%(desc['image_id']))) 62 | return img 63 | 64 | def get_frames(self, seq_id=None, frame_ids=None, anno=None): 65 | frame = self._get_frames(seq_id) 66 | 67 | frame_list = [frame.copy() for _ in frame_ids] 68 | 69 | if anno is None: 70 | anno = self.get_sequence_info(seq_id) 71 | 72 | language = self.region_descriptions[seq_id]['phrase'] 73 | anno_frames = {} 74 | for key, value in anno.items(): 75 | anno_frames[key] = [value[0, ...] for _ in frame_ids] 76 | 77 | object_meta = OrderedDict({'object_class_name': None, 78 | 'motion_class': None, 79 | 'major_class': None, 80 | 'root_class': None, 81 | 'motion_adverb': None, 82 | 'language': language.lower()}) 83 | 84 | return frame_list, anno_frames, object_meta 85 | 86 | def get_annos(self, seq_id, frame_ids, anno=None): 87 | if anno is None: 88 | anno = self.get_sequence_info(seq_id) 89 | 90 | anno_frames = {} 91 | for key, value in anno.items(): 92 | anno_frames[key] = [value[0, ...].clone() for _ in frame_ids] 93 | 94 | return anno_frames -------------------------------------------------------------------------------- /lib/train/dataset/webuav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import glob 4 | import torch 5 | import numpy as np 6 | import pandas 7 | import csv 8 | import random 9 | from collections import OrderedDict 10 | from .base_video_dataset import BaseVideoDataset 11 | from lib.train.data import jpeg4py_loader 12 | from lib.train.admin import env_settings 13 | 14 | 15 | class WebUAV(BaseVideoDataset): 16 | def __init__(self, root=None, image_loader=jpeg4py_loader, split=None): 17 | root = env_settings().webuav_dir if root is None else root 18 | super().__init__('WebUAV', root, image_loader) 19 | 20 | self.sequence_list = self._build_sequence_list() 21 | 22 | def _build_sequence_list(self, vid_ids=None, split=None): 23 | seq_path = glob.glob(os.path.join(self.root, 'train/Train/', '*/')) 24 | sequence_list = [p.split('/')[-2] for p in seq_path] 25 | return sequence_list 26 | 27 | def get_name(self): 28 | return 'tnl2k' 29 | 30 | def has_class_info(self): 31 | return True 32 | 33 | def has_occlusion_info(self): 34 | return True 35 | 36 | def is_grounding_sequence(self): 37 | return True 38 | 39 | def is_tracking_sequence(self): 40 | return True 41 | 42 | def is_vl_sequence(self): 43 | return True 44 | 45 | def get_num_sequences(self): 46 | return len(self.sequence_list) 47 | 48 | def get_sequences_in_class(self, class_name): 49 | return self.seq_per_class[class_name] 50 | 51 | def _read_bb_anno(self, seq_path): 52 | bb_anno_file = os.path.join(seq_path, "groundtruth_rect.txt") 53 | gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values 54 | return torch.tensor(gt) 55 | 56 | def _read_target_visible(self, seq_path): 57 | # Read full occlusion and out_of_view 58 | occlusion_file = os.path.join(seq_path, "absent.txt") 59 | 60 | with open(occlusion_file, 'r', newline='') as f: 61 | occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]]) 62 | 63 | target_visible = ~occlusion 64 | 65 | return target_visible 66 | 67 | def _get_sequence_path(self, seq_id): 68 | seq_name = self.sequence_list[seq_id] 69 | return os.path.join(self.root, "train/Train", seq_name), seq_name 70 | 71 | def _read_language(self, seq): 72 | language_file = os.path.join(self.root, 'language/Language/Train', seq, "language.txt") 73 | with open(language_file, 'r') as f: 74 | language = f.readlines() 75 | return language[0].rstrip() 76 | 77 | def get_sequence_info(self, seq_id): 78 | seq_path, seq_name = self._get_sequence_path(seq_id) 79 | bbox = self._read_bb_anno(seq_path) 80 | 81 | valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0) 82 | visible = valid.clone().byte() 83 | 84 | return {'bbox': bbox, 'valid': valid, 'visible': visible} 85 | 86 | def _get_frame(self, seq_path, frame_id): 87 | images = sorted(glob.glob(os.path.join(seq_path, 'img', '*'))) 88 | return self.image_loader(images[frame_id]) 89 | 90 | def get_frames(self, seq_id, frame_ids, anno=None): 91 | seq_path, seq_name = self._get_sequence_path(seq_id) 92 | frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids] 93 | 94 | anno = self.get_sequence_info(seq_id) 95 | 96 | language = self._read_language(seq_name) 97 | anno_frames = {} 98 | for key, value in anno.items(): 99 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 100 | 101 | object_meta = OrderedDict({'object_class_name': None, 102 | 'motion_class': None, 103 | 'major_class': None, 104 | 'root_class': None, 105 | 'motion_adverb': None, 106 | 'language': language.lower()}) 107 | 108 | return frame_list, anno_frames, object_meta 109 | 110 | def get_annos(self, seq_id, frame_ids, anno=None): 111 | if anno is None: 112 | anno = self.get_sequence_info(seq_id) 113 | 114 | anno_frames = {} 115 | for key, value in anno.items(): 116 | anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids] 117 | 118 | return anno_frames 119 | -------------------------------------------------------------------------------- /lib/train/run_training.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import argparse 5 | import importlib 6 | import cv2 as cv 7 | import _init_paths 8 | import numpy as np 9 | import torch.backends.cudnn 10 | import torch.distributed as dist 11 | torch.backends.cudnn.benchmark = False 12 | import lib.train.admin.settings as ws_settings 13 | 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | 17 | 18 | def init_seeds(seed): 19 | random.seed(seed) 20 | np.random.seed(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed(seed) 23 | torch.backends.cudnn.deterministic = True 24 | torch.backends.cudnn.benchmark = False 25 | 26 | 27 | def run_training(script_name, config_name, cudnn_benchmark=True, local_rank=-1, save_dir=None, base_seed=None, 28 | use_lmdb=False, script_name_prv=None, config_name_prv=None, 29 | distill=None, script_teacher=None, config_teacher=None, stage1_model=None): 30 | """Run the train script. 31 | args: 32 | script_name: Name of emperiment in the "experiments/" folder. 33 | config_name: Name of the yaml file in the "experiments/". 34 | cudnn_benchmark: Use cudnn benchmark or not (default is True). 35 | """ 36 | if save_dir is None: 37 | print("save_dir dir is not given. Use the default dir instead.") 38 | # This is needed to avoid strange crashes related to opencv 39 | cv.setNumThreads(0) 40 | 41 | torch.backends.cudnn.benchmark = cudnn_benchmark 42 | 43 | if int(os.environ["LOCAL_RANK"]) <= 0: 44 | print('script_name: {}.py config_name: {}.yaml'.format(script_name, config_name)) 45 | 46 | '''2021.1.5 set seed for different process''' 47 | if base_seed is not None: 48 | if local_rank != -1: 49 | init_seeds(base_seed + local_rank) 50 | else: 51 | init_seeds(base_seed) 52 | 53 | settings = ws_settings.Settings() 54 | settings.script_name = script_name 55 | settings.config_name = config_name 56 | settings.stage1_model = stage1_model 57 | settings.project_path = 'train/{}/{}'.format(script_name, config_name) 58 | if script_name_prv is not None and config_name_prv is not None: 59 | settings.project_path_prv = 'train/{}/{}'.format(script_name_prv, config_name_prv) 60 | settings.local_rank = local_rank 61 | settings.save_dir = os.path.abspath(save_dir) 62 | settings.use_lmdb = use_lmdb 63 | prj_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) 64 | settings.cfg_file = os.path.join(prj_dir, 'experiments/%s/%s.yaml' % (script_name, config_name)) 65 | expr_module = importlib.import_module('lib.train.train_script_mutrack') 66 | expr_func = getattr(expr_module, 'run') 67 | 68 | expr_func(settings) 69 | 70 | 71 | def main(): 72 | parser = argparse.ArgumentParser(description='Run a train scripts in train_settings.') 73 | parser.add_argument('--script', type=str, default="mvit", required=False, help='Name of the train script.') 74 | parser.add_argument('--config', type=str, default="baseline_256_5_notoken", required=False, help="Name of the config file.") 75 | parser.add_argument('--cudnn_benchmark', type=bool, default=True, help='Set cudnn benchmark on (1) or off (0) (default is on).') 76 | parser.add_argument('--save_dir', type=str, default="/ssd/myc/VL_project/MUTrack", help='the directory to save checkpoints and logs') 77 | parser.add_argument('--seed', type=int, default=42, help='seed for random numbers') 78 | parser.add_argument('--use_lmdb', type=int, choices=[0, 1], default=0) # whether datasets are in lmdb format 79 | parser.add_argument('--script_prv', type=str, default=None, help='Name of the train script of previous model.') 80 | parser.add_argument('--config_prv', type=str, default=None, help="Name of the config file of previous model.") 81 | # for knowledge distillation 82 | parser.add_argument('--distill', type=int, choices=[0, 1], default=0) # whether to use knowledge distillation 83 | parser.add_argument('--script_teacher', type=str, help='teacher script name') 84 | parser.add_argument('--config_teacher', type=str, help='teacher yaml configure file name') 85 | parser.add_argument('--stage1_model', type=str, default=None, help='stage1 model used to train SPM.') 86 | args = parser.parse_args() 87 | os.environ['LOCAL_RANK'] = os.environ.get('LOCAL_RANK', '-1') 88 | local_rank = int(os.environ['LOCAL_RANK']) 89 | if local_rank != -1: 90 | dist.init_process_group(backend='nccl') 91 | torch.cuda.set_device(local_rank) 92 | else: 93 | torch.cuda.set_device(0) 94 | 95 | run_training(args.script, args.config, cudnn_benchmark=args.cudnn_benchmark, 96 | local_rank=local_rank, save_dir=args.save_dir, base_seed=args.seed, 97 | use_lmdb=args.use_lmdb, script_name_prv=args.script_prv, config_name_prv=args.config_prv, 98 | distill=args.distill, script_teacher=args.script_teacher, config_teacher=args.config_teacher, 99 | stage1_model=args.stage1_model) 100 | 101 | 102 | if __name__ == '__main__': 103 | main() 104 | -------------------------------------------------------------------------------- /lib/train/train_script_mutrack.py: -------------------------------------------------------------------------------- 1 | import os 2 | # loss function related 3 | from lib.utils.box_ops import giou_loss, GaussWeightedLoss 4 | from torch.nn.functional import l1_loss 5 | # train pipeline related 6 | from lib.train.trainers import LTRTrainer 7 | # distributed training related 8 | from torch.nn.parallel import DistributedDataParallel as DDP 9 | # some more advanced functions 10 | from .base_functions import * 11 | # network related 12 | import lib.models 13 | import lib.train.actors 14 | # for import modules 15 | import importlib 16 | from lib import registry 17 | 18 | def run(settings): 19 | settings.description = 'Training script for Mixformer' 20 | 21 | # update the default configs with config file 22 | if not os.path.exists(settings.cfg_file): 23 | raise ValueError("%s doesn't exist." % settings.cfg_file) 24 | config_module = importlib.import_module("lib.config.%s.config" % settings.script_name) 25 | cfg = config_module.cfg 26 | config_module.update_config_from_file(settings.cfg_file) 27 | 28 | # update settings based on cfg 29 | update_settings(settings, cfg) 30 | 31 | # Record the training log 32 | log_dir = os.path.join(settings.save_dir, 'logs') 33 | if settings.local_rank in [-1, 0]: 34 | if not os.path.exists(log_dir): 35 | os.makedirs(log_dir) 36 | settings.log_file = os.path.join(log_dir, "%s-%s.log" % (settings.script_name, settings.config_name)) 37 | 38 | # Build dataloaders 39 | loader_list = build_dataloaders(cfg, settings) 40 | 41 | # Create network 42 | net = registry.MODELS[settings.script_name](cfg).cuda() 43 | 44 | # wrap networks to distributed one 45 | if settings.local_rank != -1: 46 | net = DDP(net, device_ids=[settings.local_rank], find_unused_parameters=True) 47 | settings.device = torch.device("cuda:%d" % settings.local_rank) 48 | else: 49 | settings.device = torch.device("cuda:0") 50 | 51 | # settings.save_every_epoch = True 52 | actor = registry.ACTORS[settings.script_name](net, cfg) 53 | 54 | # Optimizer, parameters, and learning rates 55 | optimizer, lr_scheduler = get_optimizer_scheduler(net, cfg) 56 | trainer = LTRTrainer(actor, loader_list, optimizer, settings, lr_scheduler, use_amp=False) 57 | 58 | # train process 59 | trainer.train(cfg.TRAIN.EPOCH, load_latest=True, fail_safe=True) 60 | -------------------------------------------------------------------------------- /lib/train/trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_trainer import BaseTrainer 2 | from .ltr_trainer import LTRTrainer 3 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | .vim-template* 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jiayuan Mao 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/_assets/prroi_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/_assets/prroi_visualization.png -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/prroi_pool/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | /_prroi_pooling 3 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/prroi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : __init__.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | from .prroi_pool import * 13 | 14 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/prroi_pool/functional.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : functional.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch 13 | import torch.autograd as ag 14 | 15 | __all__ = ['prroi_pool2d'] 16 | 17 | 18 | _prroi_pooling = None 19 | 20 | 21 | def _import_prroi_pooling(): 22 | global _prroi_pooling 23 | 24 | if _prroi_pooling is None: 25 | try: 26 | from os.path import join as pjoin, dirname 27 | from torch.utils.cpp_extension import load as load_extension 28 | root_dir = pjoin(dirname(__file__), 'src') 29 | 30 | _prroi_pooling = load_extension( 31 | '_prroi_pooling', 32 | [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')], 33 | verbose=True 34 | ) 35 | except ImportError: 36 | raise ImportError('Can not compile Precise RoI Pooling library.') 37 | 38 | return _prroi_pooling 39 | 40 | 41 | class PrRoIPool2DFunction(ag.Function): 42 | @staticmethod 43 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale): 44 | _prroi_pooling = _import_prroi_pooling() 45 | 46 | assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \ 47 | 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type()) 48 | 49 | pooled_height = int(pooled_height) 50 | pooled_width = int(pooled_width) 51 | spatial_scale = float(spatial_scale) 52 | 53 | features = features.contiguous() 54 | rois = rois.contiguous() 55 | params = (pooled_height, pooled_width, spatial_scale) 56 | 57 | if features.is_cuda: 58 | output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params) 59 | ctx.params = params 60 | # everything here is contiguous. 61 | ctx.save_for_backward(features, rois, output) 62 | else: 63 | raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.') 64 | 65 | return output 66 | 67 | @staticmethod 68 | def backward(ctx, grad_output): 69 | _prroi_pooling = _import_prroi_pooling() 70 | 71 | features, rois, output = ctx.saved_tensors 72 | grad_input = grad_coor = None 73 | 74 | if features.requires_grad: 75 | grad_output = grad_output.contiguous() 76 | grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params) 77 | if rois.requires_grad: 78 | grad_output = grad_output.contiguous() 79 | grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params) 80 | 81 | return grad_input, grad_coor, None, None, None 82 | 83 | 84 | prroi_pool2d = PrRoIPool2DFunction.apply 85 | 86 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : prroi_pool.py 4 | # Author : Jiayuan Mao, Tete Xiao 5 | # Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 6 | # Date : 07/13/2018 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | # Copyright (c) 2017 Megvii Technology Limited. 11 | 12 | import torch.nn as nn 13 | 14 | from .functional import prroi_pool2d 15 | 16 | __all__ = ['PrRoIPool2D'] 17 | 18 | 19 | class PrRoIPool2D(nn.Module): 20 | def __init__(self, pooled_height, pooled_width, spatial_scale): 21 | super().__init__() 22 | 23 | self.pooled_height = int(pooled_height) 24 | self.pooled_width = int(pooled_width) 25 | self.spatial_scale = float(spatial_scale) 26 | 27 | def forward(self, features, rois): 28 | return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale) 29 | 30 | def extra_repr(self): 31 | return 'kernel_size=({pooled_height}, {pooled_width}), spatial_scale={spatial_scale}'.format(**self.__dict__) 32 | 33 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.c 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | 17 | // #include 18 | 19 | #include "prroi_pooling_gpu_impl.cuh" 20 | 21 | 22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) { 23 | int nr_rois = rois.size(0); 24 | int nr_channels = features.size(1); 25 | int height = features.size(2); 26 | int width = features.size(3); 27 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 28 | auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options()); 29 | 30 | if (output.numel() == 0) { 31 | AT_CUDA_CHECK(cudaGetLastError()); 32 | return output; 33 | } 34 | 35 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(features.device().index()); 36 | PrRoIPoolingForwardGpu( 37 | stream, features.data(), rois.data(), output.data(), 38 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 39 | top_count 40 | ); 41 | 42 | AT_CUDA_CHECK(cudaGetLastError()); 43 | return output; 44 | } 45 | 46 | at::Tensor prroi_pooling_backward_cuda( 47 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 48 | int pooled_height, int pooled_width, float spatial_scale) { 49 | 50 | auto features_diff = at::zeros_like(features); 51 | 52 | int nr_rois = rois.size(0); 53 | int batch_size = features.size(0); 54 | int nr_channels = features.size(1); 55 | int height = features.size(2); 56 | int width = features.size(3); 57 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 58 | int bottom_count = batch_size * nr_channels * height * width; 59 | 60 | if (output.numel() == 0) { 61 | AT_CUDA_CHECK(cudaGetLastError()); 62 | return features_diff; 63 | } 64 | 65 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(features.device().index()); 66 | PrRoIPoolingBackwardGpu( 67 | stream, 68 | features.data(), rois.data(), output.data(), output_diff.data(), 69 | features_diff.data(), 70 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 71 | top_count, bottom_count 72 | ); 73 | 74 | AT_CUDA_CHECK(cudaGetLastError()); 75 | return features_diff; 76 | } 77 | 78 | at::Tensor prroi_pooling_coor_backward_cuda( 79 | const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff, 80 | int pooled_height, int pooled_width, float spatial_scale) { 81 | 82 | auto coor_diff = at::zeros_like(rois); 83 | 84 | int nr_rois = rois.size(0); 85 | int nr_channels = features.size(1); 86 | int height = features.size(2); 87 | int width = features.size(3); 88 | int top_count = nr_rois * nr_channels * pooled_height * pooled_width; 89 | int bottom_count = nr_rois * 5; 90 | 91 | if (output.numel() == 0) { 92 | AT_CUDA_CHECK(cudaGetLastError()); 93 | return coor_diff; 94 | } 95 | 96 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(features.device().index()); 97 | PrRoIPoolingCoorBackwardGpu( 98 | stream, 99 | features.data(), rois.data(), output.data(), output_diff.data(), 100 | coor_diff.data(), 101 | nr_channels, height, width, pooled_height, pooled_width, spatial_scale, 102 | top_count, bottom_count 103 | ); 104 | 105 | AT_CUDA_CHECK(cudaGetLastError()); 106 | return coor_diff; 107 | } 108 | 109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 110 | m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward"); 111 | m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward"); 112 | m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor"); 113 | } 114 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu.h 3 | * Author : Jiayuan Mao, Tete Xiao 4 | * Email : maojiayuan@gmail.com, jasonhsiao97@gmail.com 5 | * Date : 07/13/2018 6 | * 7 | * Distributed under terms of the MIT license. 8 | * Copyright (c) 2017 Megvii Technology Limited. 9 | */ 10 | 11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale); 12 | 13 | int prroi_pooling_backward_cuda( 14 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 15 | int pooled_height, int pooled_width, float spatial_scale 16 | ); 17 | 18 | int prroi_pooling_coor_backward_cuda( 19 | THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff, 20 | int pooled_height, int pooled_width, float spatial_scal 21 | ); 22 | 23 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # File : test_prroi_pooling2d.py 3 | # Author : Jiayuan Mao 4 | # Email : maojiayuan@gmail.com 5 | # Date : 18/02/2018 6 | # 7 | # This file is part of Jacinle. 8 | 9 | import unittest 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | from jactorch.utils.unittest import TorchTestCase 16 | 17 | from prroi_pool import PrRoIPool2D 18 | 19 | 20 | class TestPrRoIPool2D(TorchTestCase): 21 | def test_forward(self): 22 | pool = PrRoIPool2D(7, 7, spatial_scale=0.5) 23 | features = torch.rand((4, 16, 24, 32)).cuda() 24 | rois = torch.tensor([ 25 | [0, 0, 0, 14, 14], 26 | [1, 14, 14, 28, 28], 27 | ]).float().cuda() 28 | 29 | out = pool(features, rois) 30 | out_gold = F.avg_pool2d(features, kernel_size=2, stride=1) 31 | 32 | self.assertTensorClose(out, torch.stack(( 33 | out_gold[0, :, :7, :7], 34 | out_gold[1, :, 7:14, 7:14], 35 | ), dim=0)) 36 | 37 | def test_backward_shapeonly(self): 38 | pool = PrRoIPool2D(2, 2, spatial_scale=0.5) 39 | 40 | features = torch.rand((4, 2, 24, 32)).cuda() 41 | rois = torch.tensor([ 42 | [0, 0, 0, 4, 4], 43 | [1, 14, 14, 18, 18], 44 | ]).float().cuda() 45 | features.requires_grad = rois.requires_grad = True 46 | out = pool(features, rois) 47 | 48 | loss = out.sum() 49 | loss.backward() 50 | 51 | self.assertTupleEqual(features.size(), features.grad.size()) 52 | self.assertTupleEqual(rois.size(), rois.grad.size()) 53 | 54 | 55 | if __name__ == '__main__': 56 | unittest.main() 57 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | # File : CMakeLists.txt 2 | # Author : Kanghee Lee 3 | # Email : lerohiso@gmail.com 4 | # Date : 09/25/2020 5 | # 6 | # This file is part of PreciseRoIPooling. 7 | # Distributed under terms of the MIT license. 8 | 9 | CMAKE_MINIMUM_REQUIRED(VERSION 3.17 FATAL_ERROR) 10 | 11 | PROJECT(precise_roi_pooling) 12 | FIND_PACKAGE(CUDA) 13 | FIND_PACKAGE(PythonInterp 3) 14 | 15 | if (MSVC) 16 | SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.lib) 17 | elseif (UNIX) 18 | SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.so) 19 | endif() 20 | 21 | if (NOT EXISTS ${GPU_LIB}) 22 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build_cuda.py" RESULTS_VARIABLE RET_CODE) 23 | if (NOT "${RET_CODE}" STREQUAL "0") 24 | MESSAGE(FATAL_ERROR "Fail to Complie CUDA codes") 25 | endif () 26 | endif () 27 | 28 | if (NOT DEFINED TF_PATH) 29 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_include(), end='', flush=True)" OUTPUT_VARIABLE TF_INC) 30 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_lib(), end='', flush=True)" OUTPUT_VARIABLE TF_LIB) 31 | MESSAGE(STATUS "TF_INC: " ${TF_INC}) 32 | MESSAGE(STATUS "TF_LIB: " ${TF_LIB}) 33 | SET(TF_PATH 1) 34 | endif () 35 | 36 | if (NOT DEFINED TF_FLAGS) 37 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()), end='', flush=True)" OUTPUT_VARIABLE TF_CFLAGS) 38 | EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()), end='', flush=True)" OUTPUT_VARIABLE TF_LFLAGS) 39 | MESSAGE(STATUS "TF_CFLAGS: " ${TF_CFLAGS}) 40 | MESSAGE(STATUS "TF_LFLAGS: " ${TF_LFLAGS}) 41 | SET(TF_FLAGS 1) 42 | endif () 43 | 44 | INCLUDE_DIRECTORIES(${TF_INC}) 45 | LINK_DIRECTORIES(${TF_LIB}) 46 | INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS}) 47 | 48 | LIST(APPEND CMAKE_CXX_FLAGS "${TF_CFLAGS} ${TF_LFLAGS} -O2 -D GOOGLE_CUDA=1 -std=c++11 -shared") 49 | if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC") 50 | LIST(APPEND CMAKE_CXX_FLAGS " -lcudart -DNOMINMAX") 51 | endif () 52 | 53 | MESSAGE(STATUS "CMAKE_CXX_COMPILER_ID: " ${CMAKE_CXX_COMPILER_ID}) 54 | MESSAGE(STATUS "CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS}) 55 | 56 | ADD_LIBRARY(precise_roi_pooling SHARED src/kernels/precise_roi_pooling.h 57 | src/kernels/precise_roi_pooling_kernels.cc 58 | src/ops/precise_roi_pooling_ops.cc) 59 | TARGET_COMPILE_FEATURES(precise_roi_pooling PUBLIC cxx_std_11) 60 | SET_TARGET_PROPERTIES(precise_roi_pooling PROPERTIES 61 | RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/" 62 | LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/" 63 | ) 64 | 65 | ADD_LIBRARY(precise_roi_pooling_gpu SHARED IMPORTED) 66 | if (MSVC) 67 | SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_IMPLIB ${GPU_LIB}) 68 | elseif (UNIX) 69 | SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_LOCATION ${GPU_LIB}) 70 | endif() 71 | 72 | ADD_LIBRARY(tensorflow_internal SHARED IMPORTED) 73 | if (MSVC) 74 | SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES 75 | IMPORTED_IMPLIB ${TF_LIB}/python/_pywrap_tensorflow_internal.lib) 76 | elseif (UNIX) 77 | SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES 78 | IMPORTED_LOCATION ${TF_LIB}/python/_pywrap_tensorflow_internal.so) 79 | endif() 80 | 81 | TARGET_LINK_LIBRARIES(precise_roi_pooling tensorflow_internal 82 | precise_roi_pooling_gpu 83 | ${CUDA_LIBRARIES}) 84 | 85 | ADD_CUSTOM_TARGET(precise_roi_pooling_test ALL 86 | COMMAND ${CMAKE_COMMAND} -E env 87 | "PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/../" 88 | ${PYTHON_EXECUTABLE} tests/precise_roi_pooling_ops_test.py 89 | WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../") 90 | 91 | ADD_DEPENDENCIES(precise_roi_pooling_test precise_roi_pooling) 92 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : __init__.py 4 | # Author : Kanghee Lee 5 | # Email : lerohiso@gmail.com 6 | # Date : 09/25/2020 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | 11 | from .precise_roi_pooling_ops import * 12 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/build_cuda.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : build_cuda.py 4 | # Author : Kanghee Lee 5 | # Email : lerohiso@gmail.com 6 | # Date : 09/25/2020 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | 11 | import os 12 | import platform 13 | import shutil 14 | import subprocess 15 | 16 | import tensorflow as tf 17 | 18 | CUDA_SRCS = [] 19 | CUDA_OUTPUT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'build') 20 | 21 | if not os.path.isdir(CUDA_OUTPUT_DIR): 22 | os.makedirs(CUDA_OUTPUT_DIR) 23 | 24 | for file in os.listdir(os.path.dirname(os.path.realpath(__file__))): 25 | if file.endswith('.cu.cc'): 26 | CUDA_SRCS.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), file)) 27 | 28 | CUDA_COMPILER = shutil.which('nvcc') 29 | if CUDA_COMPILER == None: 30 | raise ValueError('CUDA Compiler Not Found') 31 | 32 | TF_CFLAGS = ' '.join(tf.sysconfig.get_compile_flags()) 33 | TF_LFLAGS = ' '.join(tf.sysconfig.get_link_flags()) 34 | 35 | CUDA_NVCC_FLAGS = TF_CFLAGS + ' ' + TF_LFLAGS + ' -D GOOGLE_CUDA=1 -x cu --expt-relaxed-constexpr' 36 | 37 | os_type = platform.system() 38 | if os_type == 'Windows': 39 | CUDA_NVCC_FLAGS += ' -Xcompiler -MD -cudart=shared -D_WINSOCKAPI_' 40 | CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.lib' 41 | elif os_type == 'Linux': 42 | CUDA_NVCC_FLAGS += ' -Xcompiler -fPIC -DNDEBUG' 43 | CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.so' 44 | 45 | COMMAND = CUDA_COMPILER 46 | COMMAND += ' -c -o ' + os.path.join(CUDA_OUTPUT_DIR, CUDA_OUTPUT_FILENAME) 47 | COMMAND += ' ' + ' '.join(CUDA_SRCS) 48 | COMMAND += ' ' + CUDA_NVCC_FLAGS 49 | 50 | process = subprocess.Popen(COMMAND, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE) 51 | process_output = process.communicate()[0] 52 | print(process_output.decode()) 53 | 54 | if process.returncode is not 0: 55 | raise ValueError('Fail to CUDA Compile') 56 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/external/prroi_pooling_gpu_impl.cuh: -------------------------------------------------------------------------------- 1 | /* 2 | * File : prroi_pooling_gpu_impl.cuh 3 | * Author : Tete Xiao, Jiayuan Mao 4 | * Email : jasonhsiao97@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | * Copyright (c) 2017 Megvii Technology Limited. 8 | */ 9 | 10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH 11 | #define PRROI_POOLING_GPU_IMPL_CUH 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | #define F_DEVPTR_IN const float * 18 | #define F_DEVPTR_OUT float * 19 | 20 | void PrRoIPoolingForwardGpu( 21 | cudaStream_t stream, 22 | F_DEVPTR_IN bottom_data, 23 | F_DEVPTR_IN bottom_rois, 24 | F_DEVPTR_OUT top_data, 25 | const int channels_, const int height_, const int width_, 26 | const int pooled_height_, const int pooled_width_, 27 | const float spatial_scale_, 28 | const int top_count); 29 | 30 | void PrRoIPoolingBackwardGpu( 31 | cudaStream_t stream, 32 | F_DEVPTR_IN bottom_data, 33 | F_DEVPTR_IN bottom_rois, 34 | F_DEVPTR_IN top_data, 35 | F_DEVPTR_IN top_diff, 36 | F_DEVPTR_OUT bottom_diff, 37 | const int channels_, const int height_, const int width_, 38 | const int pooled_height_, const int pooled_width_, 39 | const float spatial_scale_, 40 | const int top_count, const int bottom_count); 41 | 42 | void PrRoIPoolingCoorBackwardGpu( 43 | cudaStream_t stream, 44 | F_DEVPTR_IN bottom_data, 45 | F_DEVPTR_IN bottom_rois, 46 | F_DEVPTR_IN top_data, 47 | F_DEVPTR_IN top_diff, 48 | F_DEVPTR_OUT bottom_diff, 49 | const int channels_, const int height_, const int width_, 50 | const int pooled_height_, const int pooled_width_, 51 | const float spatial_scale_, 52 | const int top_count, const int bottom_count); 53 | 54 | #ifdef __cplusplus 55 | } /* !extern "C" */ 56 | #endif 57 | 58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */ 59 | 60 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/precise_roi_pooling.h: -------------------------------------------------------------------------------- 1 | /* 2 | * File : precise_roi_pooling.h 3 | * Author : Kanghee Lee 4 | * Email : lerohiso@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | */ 8 | 9 | #ifndef KERNEL_PRECISE_ROI_POOLING_H_ 10 | #define KERNEL_PRECISE_ROI_POOLING_H_ 11 | 12 | #include "tensorflow/core/framework/op_kernel.h" 13 | #include "tensorflow/core/util/tensor_format.h" 14 | 15 | namespace tensorflow { 16 | 17 | namespace functor { 18 | 19 | template 20 | struct PreciseRoIPoolingFunctor { 21 | Status operator()(OpKernelContext* context, 22 | const Tensor& features, 23 | const Tensor& rois, 24 | Tensor* pooled_features, 25 | int pooled_height, 26 | int pooled_width, 27 | float spatial_scale, 28 | TensorFormat data_format); 29 | }; 30 | 31 | template 32 | struct PreciseRoIPoolingGradFunctor { 33 | Status operator()(OpKernelContext* context, 34 | const Tensor& features, 35 | const Tensor& rois, 36 | const Tensor& pooled_features, 37 | const Tensor& pooled_features_diff, 38 | Tensor* features_gradient, 39 | Tensor* rois_gradient, 40 | int pooled_height, 41 | int pooled_width, 42 | float spatial_scale, 43 | TensorFormat data_format); 44 | }; 45 | 46 | } // namespace functor 47 | 48 | } // namespace tensorflow 49 | 50 | #endif // KERNEL_PRECISE_ROI_POOLING_H_ -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/ops/precise_roi_pooling_ops.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * File : precise_roi_pooling_ops.cc 3 | * Author : Kanghee Lee 4 | * Email : lerohiso@gmail.com 5 | * 6 | * Distributed under terms of the MIT license. 7 | */ 8 | 9 | #include "tensorflow/core/framework/op.h" 10 | #include "tensorflow/core/framework/shape_inference.h" 11 | 12 | namespace tensorflow { 13 | 14 | using ::tensorflow::shape_inference::InferenceContext; 15 | using ::tensorflow::shape_inference::ShapeHandle; 16 | 17 | REGISTER_OP("PreciseRoIPooling") 18 | .Input("features: T") 19 | .Input("rois: T") 20 | .Output("pooled_features: T") 21 | .Attr("pooled_height: int") 22 | .Attr("pooled_width: int") 23 | .Attr("spatial_scale: float") 24 | .Attr("data_format: {'NCHW'} = 'NCHW'") 25 | .Attr("T: realnumbertype") 26 | .SetShapeFn([](InferenceContext* c) { 27 | ShapeHandle features, rois; 28 | 29 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features)); 30 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois)); 31 | 32 | // get input shapes 33 | int32 number_of_rois, number_of_channels; 34 | number_of_rois = c->Value(c->Dim(rois, 0)); 35 | string data_format; 36 | Status s = c->GetAttr("data_format", &data_format); 37 | if (s.ok() && data_format == "NCHW") { 38 | number_of_channels = c->Value(c->Dim(features, 1)); 39 | } 40 | else { 41 | number_of_channels = c->Value(c->Dim(features, 3)); 42 | } 43 | 44 | int32 pooled_height; 45 | int32 pooled_width; 46 | 47 | TF_RETURN_IF_ERROR(c->GetAttr("pooled_height", &pooled_height)); 48 | TF_RETURN_IF_ERROR(c->GetAttr("pooled_width", &pooled_width)); 49 | 50 | // Note, the output is always NCHW (even when input is NHWC) 51 | c->set_output(0, c->MakeShape({number_of_rois, number_of_channels, pooled_height, pooled_width})); 52 | return Status::OK(); 53 | }) 54 | .Doc(R"doc(PreciseRoIPooling op.)doc"); 55 | 56 | REGISTER_OP("PreciseRoIPoolingGrad") 57 | .Input("features: T") 58 | .Input("rois: T") 59 | .Input("pooled_features: T") 60 | .Input("pooled_features_diff: T") 61 | .Output("features_gradient: T") 62 | .Output("rois_gradient: T") 63 | .Attr("pooled_height: int") 64 | .Attr("pooled_width: int") 65 | .Attr("spatial_scale: float") 66 | .Attr("data_format: {'NCHW'} = 'NCHW'") 67 | .Attr("T: realnumbertype") 68 | .SetShapeFn([](InferenceContext* c) { 69 | ShapeHandle features, rois; 70 | TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features)); 71 | TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois)); 72 | c->set_output(0, features); 73 | c->set_output(1, rois); 74 | return Status::OK(); 75 | }) 76 | .Doc(R"doc(PreciseRoIPoolingGrad op.)doc"); 77 | 78 | } // namespace tensorflow -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/tests/precise_roi_pooling_ops_test.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # File : precise_roi_pooling_ops_test.py 4 | # Author : Kanghee Lee 5 | # Email : lerohiso@gmail.com 6 | # Date : 09/25/2020 7 | # 8 | # This file is part of PreciseRoIPooling. 9 | # Distributed under terms of the MIT license. 10 | 11 | from __future__ import absolute_import 12 | from __future__ import division 13 | from __future__ import print_function 14 | 15 | import os 16 | import numpy as np 17 | import tensorflow as tf 18 | 19 | from tensorflow.python.framework import ops 20 | from tensorflow.python.platform import test 21 | from tensorflow.python.framework import test_util 22 | from prroi_pool import PreciseRoIPooling 23 | 24 | 25 | class PreciseRoIPoolingTest(test.TestCase): 26 | @test_util.run_gpu_only 27 | def test_forward(self): 28 | with self.test_session(): 29 | with ops.device("/gpu:0"): 30 | pooled_width = 7 31 | pooled_height = 7 32 | spatial_scale = 0.5 33 | data_format = 'channels_first' 34 | pool = PreciseRoIPooling(pooled_height, 35 | pooled_width, 36 | spatial_scale=spatial_scale, 37 | data_format=data_format) 38 | features = tf.random.uniform([4, 16, 24, 32], dtype=tf.float32) 39 | rois = tf.constant([[0, 0, 0, 14, 14], [1, 14, 14, 28, 28]], dtype=tf.float32) 40 | operation_outputs = pool([features, rois]) 41 | real_outputs = tf.keras.layers.AveragePooling2D(data_format=data_format, strides=1)(features) 42 | real_outputs = tf.stack([real_outputs[0, :, :7, :7], real_outputs[1, :, 7:14, 7:14]], axis=0) 43 | self.assertAllClose(operation_outputs, real_outputs) 44 | 45 | @test_util.run_gpu_only 46 | def test_backward(self): 47 | with self.test_session(): 48 | with ops.device("/gpu:0"): 49 | pooled_width = 2 50 | pooled_height = 2 51 | spatial_scale = 0.5 52 | data_format = 'channels_first' 53 | base_directory = os.path.dirname(os.path.realpath(__file__)) 54 | 55 | # binaries from pytorch prroi_pool module 56 | features = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/features.npy')) 57 | rois = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/rois.npy')) 58 | 59 | real_outputs = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/real_outputs.npy')) 60 | real_gradients0 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients0.npy')) 61 | real_gradients1 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients1.npy')) 62 | features = tf.convert_to_tensor(features) 63 | rois = tf.convert_to_tensor(rois) 64 | with tf.GradientTape() as tape: 65 | tape.watch([features, rois]) 66 | outputs = PreciseRoIPooling(pooled_height=pooled_height, 67 | pooled_width=pooled_width, 68 | spatial_scale=spatial_scale, 69 | data_format=data_format)([features, rois]) 70 | loss = tf.reduce_sum(outputs) 71 | 72 | gradients = tape.gradient(loss, [features, rois]) 73 | 74 | self.assertAllClose(outputs, real_outputs) 75 | self.assertAllClose(gradients[0], real_gradients0) 76 | self.assertAllClose(gradients[1], real_gradients1) 77 | 78 | 79 | if __name__ == '__main__': 80 | test.main() 81 | -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy -------------------------------------------------------------------------------- /lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .tensor import TensorDict, TensorList 2 | -------------------------------------------------------------------------------- /lib/utils/classification_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | from torch.nn import functional as F 4 | 5 | 6 | class LBHinge(nn.Module): 7 | """Loss that uses a 'hinge' on the lower bound. 8 | This means that for samples with a label value smaller than the threshold, the loss is zero if the prediction is 9 | also smaller than that threshold. 10 | args: 11 | error_matric: What base loss to use (MSE by default). 12 | threshold: Threshold to use for the hinge. 13 | clip: Clip the loss if it is above this value. 14 | """ 15 | def __init__(self, error_metric=nn.MSELoss(), threshold=0.05, clip=None): 16 | super().__init__() 17 | self.error_metric = error_metric 18 | self.threshold = threshold if threshold is not None else -100 19 | self.clip = clip 20 | 21 | def forward(self, prediction, label): 22 | negative_mask = (label < self.threshold).float() 23 | positive_mask = (1.0 - negative_mask) 24 | 25 | prediction = negative_mask * F.relu(prediction) + positive_mask * prediction 26 | 27 | loss = self.error_metric(prediction, positive_mask * label) 28 | 29 | if self.clip is not None: 30 | loss = torch.min(loss, torch.tensor([self.clip], device=loss.device)) 31 | return loss 32 | -------------------------------------------------------------------------------- /lib/utils/lmdb_utils.py: -------------------------------------------------------------------------------- 1 | import lmdb 2 | import numpy as np 3 | import cv2 4 | import json 5 | 6 | LMDB_ENVS = dict() 7 | LMDB_HANDLES = dict() 8 | LMDB_FILELISTS = dict() 9 | 10 | 11 | def get_lmdb_handle(name): 12 | global LMDB_HANDLES, LMDB_FILELISTS 13 | item = LMDB_HANDLES.get(name, None) 14 | if item is None: 15 | env = lmdb.open(name, readonly=True, lock=False, readahead=False, meminit=False) 16 | LMDB_ENVS[name] = env 17 | item = env.begin(write=False) 18 | LMDB_HANDLES[name] = item 19 | 20 | return item 21 | 22 | 23 | def decode_img(lmdb_fname, key_name): 24 | handle = get_lmdb_handle(lmdb_fname) 25 | binfile = handle.get(key_name.encode()) 26 | if binfile is None: 27 | print("Illegal data detected. %s %s" % (lmdb_fname, key_name)) 28 | s = np.frombuffer(binfile, np.uint8) 29 | x = cv2.cvtColor(cv2.imdecode(s, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) 30 | return x 31 | 32 | 33 | def decode_str(lmdb_fname, key_name): 34 | handle = get_lmdb_handle(lmdb_fname) 35 | binfile = handle.get(key_name.encode()) 36 | string = binfile.decode() 37 | return string 38 | 39 | 40 | def decode_json(lmdb_fname, key_name): 41 | return json.loads(decode_str(lmdb_fname, key_name)) 42 | 43 | 44 | if __name__ == "__main__": 45 | lmdb_fname = "/data/sda/v-yanbi/iccv21/LittleBoy_clean/data/got10k_lmdb" 46 | '''Decode image''' 47 | # key_name = "test/GOT-10k_Test_000001/00000001.jpg" 48 | # img = decode_img(lmdb_fname, key_name) 49 | # cv2.imwrite("001.jpg", img) 50 | '''Decode str''' 51 | # key_name = "test/list.txt" 52 | # key_name = "train/GOT-10k_Train_000001/groundtruth.txt" 53 | key_name = "train/GOT-10k_Train_000001/absence.label" 54 | str_ = decode_str(lmdb_fname, key_name) 55 | print(str_) 56 | -------------------------------------------------------------------------------- /lib/utils/merge.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def merge_template_search(inp_list, return_search=False, return_template=False): 5 | """NOTICE: search region related features must be in the last place""" 6 | seq_dict = {"feat": torch.cat([x["feat"] for x in inp_list], dim=0), 7 | "mask": torch.cat([x["mask"] for x in inp_list], dim=1), 8 | "pos": torch.cat([x["pos"] for x in inp_list], dim=0)} 9 | if return_search: 10 | x = inp_list[-1] 11 | seq_dict.update({"feat_x": x["feat"], "mask_x": x["mask"], "pos_x": x["pos"]}) 12 | if return_template: 13 | z = inp_list[0] 14 | seq_dict.update({"feat_z": z["feat"], "mask_z": z["mask"], "pos_z": z["pos"]}) 15 | return seq_dict 16 | 17 | 18 | def get_qkv(inp_list): 19 | """The 1st element of the inp_list is about the template, 20 | the 2nd (the last) element is about the search region""" 21 | dict_x = inp_list[-1] 22 | dict_c = {"feat": torch.cat([x["feat"] for x in inp_list], dim=0), 23 | "mask": torch.cat([x["mask"] for x in inp_list], dim=1), 24 | "pos": torch.cat([x["pos"] for x in inp_list], dim=0)} # concatenated dict 25 | q = dict_x["feat"] + dict_x["pos"] 26 | k = dict_c["feat"] + dict_c["pos"] 27 | v = dict_c["feat"] 28 | key_padding_mask = dict_c["mask"] 29 | return q, k, v, key_padding_mask 30 | -------------------------------------------------------------------------------- /lib/utils/scheduler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from bisect import bisect_right 3 | 4 | 5 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 6 | def __init__( 7 | self, 8 | optimizer, 9 | milestones, 10 | gamma=0.1, 11 | warmup_factor=0.01, 12 | warmup_iters=20., 13 | warmup_method="linear", 14 | last_epoch=-1, 15 | ): 16 | if not list(milestones) == sorted(milestones): 17 | raise ValueError( 18 | "Milestones should be a list of" " increasing integers. Got {}", 19 | milestones, 20 | ) 21 | 22 | if warmup_method not in ("constant", "linear"): 23 | raise ValueError( 24 | "Only 'constant' or 'linear' warmup_method accepted" 25 | "got {}".format(warmup_method) 26 | ) 27 | self.milestones = milestones 28 | self.gamma = gamma 29 | self.warmup_factor = warmup_factor 30 | self.warmup_iters = warmup_iters 31 | self.warmup_method = warmup_method 32 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 33 | 34 | def get_lr(self): 35 | warmup_factor = 1 36 | if self.last_epoch < self.warmup_iters: 37 | if self.warmup_method == "constant": 38 | warmup_factor = self.warmup_factor 39 | elif self.warmup_method == "linear": 40 | # print(self.last_epoch) 41 | alpha = (self.last_epoch + 1) / self.warmup_iters 42 | # print(alpha) 43 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 44 | # print(warmup_factor) 45 | return [ 46 | base_lr 47 | * warmup_factor 48 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 49 | for base_lr in self.base_lrs 50 | ] 51 | -------------------------------------------------------------------------------- /scripts/demo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | script=$1 5 | config=$2 6 | input_video=$3 # 7 | output_video=$4 # 8 | language=${5:""} # 9 | init_bbox=${6:""} # 10 | 11 | python demo.py --tracker_name $script \ 12 | --tracker_param $config \ 13 | --input_video $input_video \ 14 | --output_video $output_video \ 15 | --language $language \ 16 | --init_bbox $init_bbox \ -------------------------------------------------------------------------------- /scripts/new_tracker.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | tracker=$1 4 | template=${2:-'mae'} 5 | 6 | cp -r experiments/${template} experiments/$tracker 7 | cp -r lib/config/${template} lib/config/$tracker 8 | cp -r lib/models/${template} lib/models/$tracker 9 | mv lib/models/$tracker/${template}.py lib/models/$tracker/$tracker.py 10 | cp lib/train/actors/${template}.py lib/train/actors/$tracker.py 11 | 12 | cp lib/test/parameter/${template}.py lib/test/parameter/$tracker.py 13 | cp lib/test/tracker/${template}.py lib/test/tracker/$tracker.py 14 | 15 | echo "\n"from .$tracker import $tracker >> lib/models/__init__.py 16 | echo "\n"from .$tracker import '*' >> lib/train/actors/__init__.py 17 | 18 | echo "The following file need to be modified: " 19 | echo "lib/models/$tracker/$tracker.py" 20 | echo "lib/train/actors/$tracker.py" 21 | echo "lib/test/parameter/$tracker.py" 22 | echo "lib/test/tracker/$tracker.py" -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | script=${1:-'uvltrack'} 4 | config=${2:-'baseline'} 5 | dataset=${3:-'tnl2k'} 6 | numgpu=${4:-2} 7 | threads_per_gpu=${5:-8} 8 | 9 | # CUDA_VISIBLE_DEVICES=2,3 \ 10 | nohup \ 11 | python tracking/test.py --tracker_name $script --tracker_param $config --dataset $dataset \ 12 | --threads $((threads_per_gpu*numgpu)) --num_gpus $numgpu --debug 0 \ 13 | > terminal_logs/test_$script'_'$config'_'$dataset.log 2>&1 & 14 | 15 | echo log save to terminal_logs/test_$script'_'$config'_'$dataset.log -------------------------------------------------------------------------------- /scripts/train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | script=${1:-'uvltrack'} 4 | config=${2:-'baseline_base'} 5 | numgpu=${3:-2} 6 | gpuid=${4:-'0,1'} 7 | 8 | CUDA_VISIBLE_DEVICES=$gpuid \ 9 | nohup \ 10 | python tracking/train.py --script $script \ 11 | --config $config \ 12 | --save_dir . \ 13 | --mode multiple \ 14 | --nproc_per_node $numgpu \ 15 | > terminal_logs/train_$script'_'$config.log 2>&1 & 16 | 17 | echo log save to terminal_logs/train_$script'_'$config.log -------------------------------------------------------------------------------- /tracking/_init_paths.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os.path as osp 6 | import sys 7 | 8 | 9 | def add_path(path): 10 | if path not in sys.path: 11 | sys.path.insert(0, path) 12 | 13 | 14 | this_dir = osp.dirname(__file__) 15 | 16 | prj_path = osp.join(this_dir, '..') 17 | add_path(prj_path) 18 | -------------------------------------------------------------------------------- /tracking/analysis_results.py: -------------------------------------------------------------------------------- 1 | import _init_paths 2 | import argparse 3 | import os 4 | import matplotlib.pyplot as plt 5 | plt.rcParams['figure.figsize'] = [8, 8] 6 | 7 | from lib.test.analysis.plot_results import plot_results, print_results, print_per_sequence_results 8 | from lib.test.evaluation import get_dataset, trackerlist 9 | from lib.test.evaluation.environment import env_settings 10 | import glob 11 | 12 | parser = argparse.ArgumentParser(description='Run tracker on sequence or dataset.') 13 | parser.add_argument('--tracker_name', type=str, help='Name of tracking method.') 14 | parser.add_argument('--tracker_param', type=str, help='Name of config file.') 15 | parser.add_argument('--dataset_name', type=str, help='Name of config file.') 16 | parser.add_argument('--save_file', type=str, default=None) 17 | 18 | args = parser.parse_args() 19 | 20 | def check_complete(path): 21 | file_num = { 22 | 'nfs': 200, 23 | 'uav': 246, 24 | 'lasotext': 300, 25 | 'lasot': 560, 26 | 'trackingnet': 1022, 27 | 'tnl2k': 1400, 28 | 'otb99': 96, 29 | 'itb': 360, 30 | 'avist': 240, 31 | } 32 | num_file = len(glob.glob(os.path.join(path, args.dataset_name, '*.txt'))) 33 | for name, num in file_num.items(): 34 | if name in args.dataset_name: 35 | if num_file == file_num[name]: 36 | return True 37 | else: 38 | return False 39 | raise ValueError("no such dataset") 40 | 41 | env = env_settings() 42 | trackers = [] 43 | tracker_params = [path.split('/')[-1] for path in sorted(glob.glob(os.path.join(env.results_path, args.tracker_name, args.tracker_param)), reverse=True) if check_complete(path)] 44 | trackers.extend(trackerlist(name=args.tracker_name, parameter_name=args.tracker_param, dataset_name=args.dataset_name, 45 | run_ids=None, display_name=args.tracker_name)) 46 | 47 | dataset = get_dataset(args.dataset_name) 48 | print_results(trackers, dataset, report_name=args.dataset_name, merge_results=True, force_evaluation=True, plot_types=('success', 'prec', 'norm_prec'), save_file=args.save_file) 49 | -------------------------------------------------------------------------------- /tracking/create_default_local_file.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import _init_paths 4 | from lib.train.admin import create_default_local_file_ITP_train 5 | from lib.test.evaluation import create_default_local_file_ITP_test 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser(description='Create default local file on ITP or PAI') 10 | parser.add_argument("--workspace_dir", type=str, required=True) # workspace dir 11 | parser.add_argument("--data_dir", type=str, required=True) 12 | parser.add_argument("--save_dir", type=str, required=True) 13 | args = parser.parse_args() 14 | return args 15 | 16 | 17 | if __name__ == "__main__": 18 | args = parse_args() 19 | workspace_dir = os.path.realpath(args.workspace_dir) 20 | data_dir = os.path.realpath(args.data_dir) 21 | save_dir = os.path.realpath(args.save_dir) 22 | create_default_local_file_ITP_train(workspace_dir, data_dir) 23 | create_default_local_file_ITP_test(workspace_dir, data_dir, save_dir) 24 | -------------------------------------------------------------------------------- /tracking/pre_read_datasets.py: -------------------------------------------------------------------------------- 1 | import _init_paths 2 | import multiprocessing as mp 3 | import argparse 4 | import os 5 | from lib.utils.lmdb_utils import decode_str 6 | import time 7 | import json 8 | 9 | 10 | def parse_args(): 11 | """ 12 | args for training. 13 | """ 14 | parser = argparse.ArgumentParser(description='Parse args for training') 15 | parser.add_argument('--data_dir', type=str, help='directory where lmdb data is located') 16 | parser.add_argument('--dataset_str', type=str, help="which datasets to use") 17 | args = parser.parse_args() 18 | 19 | return args 20 | 21 | 22 | def get_trknet_dict(trknet_dir): 23 | with open(os.path.join(trknet_dir, "seq_list.json"), "r") as f: 24 | seq_list = json.loads(f.read()) 25 | res_dict = {} 26 | set_idx_pre = -1 27 | for set_idx, seq_name in seq_list: 28 | if set_idx != set_idx_pre: 29 | res_dict[set_idx] = "anno/%s.txt" % seq_name 30 | set_idx_pre = set_idx 31 | return res_dict 32 | 33 | 34 | def target(lmdb_dir, key_name): 35 | _ = decode_str(lmdb_dir, key_name) 36 | 37 | 38 | if __name__ == "__main__": 39 | args = parse_args() 40 | data_dir = args.data_dir 41 | dataset_str = args.dataset_str 42 | key_dict = {"got10k_lmdb": "train/list.txt", 43 | "lasot_lmdb": "LaSOTBenchmark.json", 44 | "coco_lmdb": "annotations/instances_train2017.json", 45 | "vid_lmdb": "cache.json"} 46 | print("Ready to pre load datasets") 47 | start = time.time() 48 | ps = [] 49 | datasets = [] 50 | if 'g' in dataset_str: 51 | datasets.append("got10k_lmdb") 52 | if 'l' in dataset_str: 53 | datasets.append("lasot_lmdb") 54 | if 'c' in dataset_str: 55 | datasets.append("coco_lmdb") 56 | if 'v' in dataset_str: 57 | datasets.append("vid_lmdb") 58 | for dataset in datasets: 59 | lmdb_dir = os.path.join(data_dir, dataset) 60 | p = mp.Process(target=target, args=(lmdb_dir, key_dict[dataset])) 61 | print("add %s %s to job queue" % (lmdb_dir, key_dict[dataset])) 62 | ps.append(p) 63 | # deal with trackingnet 64 | if 't' in dataset_str: 65 | trknet_dict = get_trknet_dict(os.path.join(data_dir, "trackingnet_lmdb")) 66 | for set_idx, seq_path in trknet_dict.items(): 67 | lmdb_dir = os.path.join(data_dir, "trackingnet_lmdb", "TRAIN_%d_lmdb" % set_idx) 68 | p = mp.Process(target=target, args=(lmdb_dir, seq_path)) 69 | print("add %s %s to job queue" % (lmdb_dir, seq_path)) 70 | ps.append(p) 71 | for p in ps: 72 | p.start() 73 | for p in ps: 74 | p.join() 75 | 76 | print("Pre read over") 77 | end = time.time() 78 | hour = (end - start) / 3600 79 | print("it takes %.2f hours to pre-read data" % hour) 80 | -------------------------------------------------------------------------------- /tracking/profile_model.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | prj_path = os.path.join(os.path.dirname(__file__), '..') 5 | if prj_path not in sys.path: 6 | sys.path.append(prj_path) 7 | 8 | import argparse 9 | import torch 10 | from lib.utils.misc import NestedTensor 11 | from thop import profile 12 | from thop.utils import clever_format 13 | import time 14 | import importlib 15 | 16 | 17 | def parse_args(): 18 | """ 19 | args for training. 20 | """ 21 | parser = argparse.ArgumentParser(description='Parse args for training') 22 | # for train 23 | parser.add_argument('--script', type=str, default='uvltrack', choices=['uvltrack'], 24 | help='training script name') 25 | parser.add_argument('--config', type=str, default='baseline_base', help='yaml configure file name') 26 | args = parser.parse_args() 27 | 28 | return args 29 | 30 | def evaluate_speed(model, template, search, text, prompt, flag): 31 | '''Speed Test''' 32 | T_w = 500 33 | T_t = 1000 34 | print("testing speed ...") 35 | torch.cuda.synchronize() 36 | with torch.no_grad(): 37 | # overall 38 | for i in range(T_w): 39 | _ = model.forward_test(template, search, text, prompt, flag) 40 | start = time.time() 41 | for i in range(T_t): 42 | _ = model.forward_test(template, search, text, prompt, flag) 43 | torch.cuda.synchronize() 44 | end = time.time() 45 | avg_lat = (end - start) / T_t 46 | print("The average overall latency is %.2f ms" % (avg_lat * 1000)) 47 | print("FPS is %.2f fps" % (1. / avg_lat)) 48 | 49 | if __name__ == "__main__": 50 | device = "cuda:0" 51 | torch.cuda.set_device(device) 52 | args = parse_args() 53 | '''update cfg''' 54 | yaml_fname = 'experiments/%s/%s.yaml' % (args.script, args.config) 55 | config_module = importlib.import_module('lib.config.%s.config' % args.script) 56 | cfg = config_module.cfg 57 | config_module.update_config_from_file(yaml_fname) 58 | '''set some values''' 59 | bs = 1 60 | z_sz = cfg.TEST.TEMPLATE_SIZE 61 | x_sz = cfg.TEST.SEARCH_SIZE 62 | x_sz = cfg.TEST.SEARCH_SIZE 63 | dim = cfg.MODEL.HIDDEN_DIM 64 | 65 | if args.script == "uvltrack": 66 | model_module = importlib.import_module('lib.models') 67 | model_constructor = model_module.uvltrack.build_model 68 | model = model_constructor(cfg) 69 | # get the template and search 70 | template = torch.randn(bs, 3, z_sz, z_sz) 71 | search = torch.randn(bs, 3, x_sz, x_sz) 72 | text = NestedTensor(torch.ones(bs, 40).long(), torch.randn(bs, 40)>0.5) 73 | prompt = torch.randn(bs, 3, dim) 74 | flag = torch.ones(bs).long() 75 | # transfer to device 76 | model = model.to(device) 77 | template = template.to(device) 78 | search = search.to(device) 79 | text = text.to(device) 80 | prompt = prompt.to(device) 81 | flag = flag.to(device) 82 | evaluate_speed(model, template, search, text, prompt, flag) 83 | 84 | else: 85 | raise NotImplementedError 86 | -------------------------------------------------------------------------------- /tracking/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import importlib 5 | 6 | prj_path = os.path.join(os.path.dirname(__file__), '..') 7 | if prj_path not in sys.path: 8 | sys.path.append(prj_path) 9 | 10 | from lib.test.evaluation import get_dataset 11 | from lib.test.evaluation.running import run_dataset 12 | from lib.test.evaluation.tracker import Tracker 13 | 14 | import warnings 15 | warnings.filterwarnings('ignore') 16 | 17 | def run_tracker(tracker_name, tracker_param, run_id=None, dataset_name='otb', sequence=None, debug=0, threads=0, 18 | num_gpus=8): 19 | """Run tracker on sequence or dataset. 20 | args: 21 | tracker_name: Name of tracking method. 22 | tracker_param: Name of parameter file. 23 | run_id: The run id. 24 | dataset_name: Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot). 25 | sequence: Sequence number or name. 26 | debug: Debug level. 27 | threads: Number of threads. 28 | """ 29 | 30 | dataset = get_dataset(dataset_name) 31 | 32 | if sequence is not None: 33 | dataset = [dataset[sequence]] 34 | 35 | trackers = [Tracker(tracker_name, tracker_param, dataset_name, run_id)] 36 | run_dataset(dataset, trackers, debug, threads, num_gpus=num_gpus) 37 | 38 | 39 | def main(): 40 | parser = argparse.ArgumentParser(description='Run tracker on sequence or dataset.') 41 | parser.add_argument('--tracker_name', default="mvit", type=str, help='Name of tracking method.') 42 | parser.add_argument('--tracker_param', default="baseline_256_4", type=str, help='Name of config file.') 43 | parser.add_argument('--runid', type=int, default=None, help='The run id.') 44 | parser.add_argument('--dataset_name', type=str, default='otb99', help='Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot).') 45 | parser.add_argument('--sequence', type=str, default=None, help='Sequence number or name.') 46 | parser.add_argument('--debug', type=int, default=1, help='Debug level.') 47 | parser.add_argument('--threads', type=int, default=0, help='Number of threads.') 48 | parser.add_argument('--num_gpus', type=int, default=8) 49 | 50 | parser.add_argument('--params__model', type=str, default=None, help="Tracking model path.") 51 | parser.add_argument('--params__update_interval', type=int, default=None, help="Update interval of online tracking.") 52 | parser.add_argument('--params__online_sizes', type=int, default=None) 53 | parser.add_argument('--params__search_area_scale', type=float, default=None) 54 | parser.add_argument('--params__max_score_decay', type=float, default=1.0) 55 | parser.add_argument('--params__vis_attn', type=int, choices=[0, 1], default=0, help="Whether visualize the attention maps.") 56 | 57 | args = parser.parse_args() 58 | 59 | try: 60 | seq_name = int(args.sequence) 61 | except: 62 | seq_name = args.sequence 63 | 64 | run_tracker(args.tracker_name, args.tracker_param, args.runid, args.dataset_name, seq_name, args.debug, 65 | args.threads, num_gpus=args.num_gpus) 66 | 67 | 68 | if __name__ == '__main__': 69 | main() 70 | -------------------------------------------------------------------------------- /tracking/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import argparse 4 | 5 | import warnings 6 | warnings.filterwarnings('ignore') 7 | 8 | 9 | def parse_args(): 10 | """ 11 | args for training. 12 | """ 13 | parser = argparse.ArgumentParser(description='Parse args for training') 14 | # for train 15 | parser.add_argument('--script', type=str, help='training script name') 16 | parser.add_argument('--config', type=str, default='baseline', help='yaml configure file name') 17 | parser.add_argument('--stage1_model', type=str, default=None, help='stage1 model used to train SPM.') 18 | parser.add_argument('--save_dir', type=str, help='root directory to save checkpoints, logs, and tensorboard') 19 | parser.add_argument('--mode', type=str, choices=["single", "multiple"], default="multiple", 20 | help="train on single gpu or multiple gpus") 21 | parser.add_argument('--nproc_per_node', type=int, help="number of GPUs per node") # specify when mode is multiple 22 | parser.add_argument('--master_port', type=int, help="master port", default=26500) 23 | parser.add_argument('--use_lmdb', type=int, choices=[0, 1], default=0) # whether datasets are in lmdb format 24 | parser.add_argument('--script_prv', type=str, help='training script name') 25 | parser.add_argument('--config_prv', type=str, default='baseline', help='yaml configure file name') 26 | # for knowledge distillation 27 | parser.add_argument('--distill', type=int, choices=[0, 1], default=0) # whether to use knowledge distillation 28 | parser.add_argument('--script_teacher', type=str, help='teacher script name') 29 | parser.add_argument('--config_teacher', type=str, help='teacher yaml configure file name') 30 | 31 | args = parser.parse_args() 32 | 33 | return args 34 | 35 | 36 | def main(): 37 | args = parse_args() 38 | if args.mode == "single": 39 | train_cmd = "python lib/train/run_training.py --script %s --config %s --save_dir %s --use_lmdb %d " \ 40 | "--script_prv %s --config_prv %s --distill %d --script_teacher %s --config_teacher %s --stage1_model %s" \ 41 | % (args.script, args.config, args.save_dir, args.use_lmdb, args.script_prv, args.config_prv, 42 | args.distill, args.script_teacher, args.config_teacher, args.stage1_model) 43 | elif args.mode == "multiple": 44 | port = int(time.time()*1000)%20000+10000 45 | train_cmd = f"OMP_NUM_THREADS=1 torchrun --standalone --nnodes=1 --nproc_per_node %d lib/train/run_training.py " \ 46 | "--script %s --config %s --save_dir %s --use_lmdb %d --script_prv %s --config_prv %s " \ 47 | "--distill %d --script_teacher %s --config_teacher %s --stage1_model %s" \ 48 | % (args.nproc_per_node, args.script, args.config, args.save_dir, args.use_lmdb, args.script_prv, 49 | args.config_prv, args.distill, args.script_teacher, args.config_teacher, args.stage1_model) 50 | else: 51 | raise ValueError("mode should be 'single' or 'multiple'.") 52 | os.system(train_cmd) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | --------------------------------------------------------------------------------