├── .gitignore
├── LICENSE
├── README.md
├── demo.py
├── experiments
    └── uvltrack
    │   ├── baseline_base.yaml
    │   ├── baseline_base_grounding.yaml
    │   └── baseline_large.yaml
├── fig
    ├── arch.png
    └── results.png
├── install.sh
├── lib
    ├── __init__.py
    ├── config
    │   ├── __init__.py
    │   └── uvltrack
    │   │   └── config.py
    ├── models
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── bert_backbone.py
    │   │   ├── block.py
    │   │   ├── mae_vit.py
    │   │   ├── modality_unified_feature_extractor.py
    │   │   └── utils.py
    │   ├── heads
    │   │   ├── __init__.py
    │   │   ├── modality_adaptive_box_head.py
    │   │   └── utils.py
    │   └── uvltrack
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── uvltrack.py
    ├── registry.py
    ├── test
    │   ├── __init__.py
    │   ├── analysis
    │   │   ├── __init__.py
    │   │   ├── extract_results.py
    │   │   └── plot_results.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── avistdataset.py
    │   │   ├── data.py
    │   │   ├── datasets.py
    │   │   ├── environment.py
    │   │   ├── got10kdataset.py
    │   │   ├── itbdataset.py
    │   │   ├── lasot_lmdbdataset.py
    │   │   ├── lasotdataset.py
    │   │   ├── lasotextdataset.py
    │   │   ├── local.py
    │   │   ├── nfsdataset.py
    │   │   ├── otb99dataset.py
    │   │   ├── otbdataset.py
    │   │   ├── running.py
    │   │   ├── tc128cedataset.py
    │   │   ├── tc128dataset.py
    │   │   ├── tnl2kdataset.py
    │   │   ├── tracker.py
    │   │   ├── trackingnetdataset.py
    │   │   ├── uavdataset.py
    │   │   └── utils.py
    │   ├── parameter
    │   │   ├── __init__.py
    │   │   └── uvltrack.py
    │   ├── tracker
    │   │   ├── __init__.py
    │   │   ├── basetracker.py
    │   │   ├── tracker_utils.py
    │   │   └── uvltrack.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── _init_paths.py
    │   │   ├── augmentation.py
    │   │   ├── hann.py
    │   │   ├── load_text.py
    │   │   ├── params.py
    │   │   ├── transform_got10k.py
    │   │   └── transform_trackingnet.py
    ├── train
    │   ├── __init__.py
    │   ├── _init_paths.py
    │   ├── actors
    │   │   ├── __init__.py
    │   │   ├── base_actor.py
    │   │   └── uvltrack.py
    │   ├── admin
    │   │   ├── __init__.py
    │   │   ├── environment.py
    │   │   ├── local.py
    │   │   ├── multigpu.py
    │   │   ├── settings.py
    │   │   ├── stats.py
    │   │   └── tensorboard.py
    │   ├── base_functions.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── bounding_box_utils.py
    │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   ├── image_loader.py
    │   │   ├── loader.py
    │   │   ├── processing.py
    │   │   ├── processing_utils.py
    │   │   ├── processing_utils_grounding.py
    │   │   ├── processing_utils_grounding2.py
    │   │   ├── sampler.py
    │   │   ├── transforms.py
    │   │   └── utils.py
    │   ├── data_specs
    │   │   ├── README.md
    │   │   ├── got10k_train_full_split.txt
    │   │   ├── got10k_train_split.txt
    │   │   ├── got10k_val_split.txt
    │   │   ├── got10k_vot_exclude.txt
    │   │   ├── got10k_vot_train_split.txt
    │   │   ├── got10k_vot_val_split.txt
    │   │   ├── lasot_test_split.txt
    │   │   ├── lasot_train_split.txt
    │   │   └── trackingnet_classmap.txt
    │   ├── dataset
    │   │   ├── COCO_tool.py
    │   │   ├── __init__.py
    │   │   ├── base_image_dataset.py
    │   │   ├── base_video_dataset.py
    │   │   ├── coco.py
    │   │   ├── coco_seq.py
    │   │   ├── coco_seq_lmdb.py
    │   │   ├── got10k.py
    │   │   ├── got10k_lmdb.py
    │   │   ├── imagenetvid.py
    │   │   ├── imagenetvid_lmdb.py
    │   │   ├── lasot.py
    │   │   ├── lasot_lmdb.py
    │   │   ├── lasot_test.py
    │   │   ├── lasotext.py
    │   │   ├── object365.py
    │   │   ├── otb99.py
    │   │   ├── refcoco_seq.py
    │   │   ├── refer.py
    │   │   ├── tnl2k.py
    │   │   ├── tnl2k_test.py
    │   │   ├── tracking_net.py
    │   │   ├── tracking_net_lmdb.py
    │   │   ├── utils.py
    │   │   ├── visualgenome.py
    │   │   └── webuav.py
    │   ├── run_training.py
    │   ├── train_script_mutrack.py
    │   └── trainers
    │   │   ├── __init__.py
    │   │   ├── base_trainer.py
    │   │   └── ltr_trainer.py
    └── utils
    │   ├── PreciseRoIPooling
    │       ├── .gitignore
    │       ├── LICENSE
    │       ├── README.md
    │       ├── _assets
    │       │   └── prroi_visualization.png
    │       ├── pytorch
    │       │   ├── prroi_pool
    │       │   │   ├── .gitignore
    │       │   │   ├── __init__.py
    │       │   │   ├── functional.py
    │       │   │   ├── prroi_pool.py
    │       │   │   └── src
    │       │   │   │   ├── prroi_pooling_gpu.c
    │       │   │   │   ├── prroi_pooling_gpu.h
    │       │   │   │   ├── prroi_pooling_gpu_impl.cu
    │       │   │   │   └── prroi_pooling_gpu_impl.cuh
    │       │   └── tests
    │       │   │   └── test_prroi_pooling2d.py
    │       ├── src
    │       │   ├── prroi_pooling_gpu_impl.cu
    │       │   └── prroi_pooling_gpu_impl.cuh
    │       └── tensorflow
    │       │   ├── prroi_pool
    │       │       ├── CMakeLists.txt
    │       │       ├── __init__.py
    │       │       ├── precise_roi_pooling_ops.py
    │       │       └── src
    │       │       │   ├── kernels
    │       │       │       ├── build_cuda.py
    │       │       │       ├── external
    │       │       │       │   ├── prroi_pooling_gpu_impl.cu
    │       │       │       │   └── prroi_pooling_gpu_impl.cuh
    │       │       │       ├── precise_roi_pooling.h
    │       │       │       ├── precise_roi_pooling_kernels.cc
    │       │       │       └── precise_roi_pooling_kernels.cu.cc
    │       │       │   └── ops
    │       │       │       └── precise_roi_pooling_ops.cc
    │       │   └── tests
    │       │       ├── precise_roi_pooling_ops_test.py
    │       │       └── test_binaries
    │       │           └── 2_2_0.5
    │       │               ├── features.npy
    │       │               ├── gradients0.npy
    │       │               ├── gradients1.npy
    │       │               ├── real_outputs.npy
    │       │               └── rois.npy
    │   ├── __init__.py
    │   ├── box_ops.py
    │   ├── classification_loss.py
    │   ├── lmdb_utils.py
    │   ├── merge.py
    │   ├── misc.py
    │   ├── scheduler.py
    │   └── tensor.py
├── scripts
    ├── demo.sh
    ├── new_tracker.sh
    ├── test.sh
    └── train.sh
├── tracking
    ├── _init_paths.py
    ├── analysis_results.py
    ├── create_default_local_file.py
    ├── pre_read_datasets.py
    ├── profile_model.py
    ├── test.py
    └── train.py
└── uvltrack_env.yaml


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.idea
 2 | *~
 3 | *__pycache__*
 4 | *.pyc
 5 | *.pytest_cache
 6 | *.csv
 7 | /checkpoints
 8 | /data
 9 | /debug
10 | /logs
11 | /msic_logs
12 | /tensorboard
13 | /test
14 | release.zip
15 | /terminal_logs/*
16 | /workspace
17 | /pretrained
18 | /pretrain
19 | /compare
20 | /mutrack.zip


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 University of Science and Technology of China
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | import importlib
 2 | import cv2
 3 | import argparse
 4 | 
 5 | 
 6 | parser = argparse.ArgumentParser(description='Run the tracker on your webcam.')
 7 | parser.add_argument('--tracker_name', type=str, help='Name of tracking method.')
 8 | parser.add_argument('--tracker_param', type=str, help='Name of parameter file.')
 9 | parser.add_argument('--input_video', type=str, help='Path to input video.')
10 | parser.add_argument('--output_video', type=str, help='Path to output video.')
11 | parser.add_argument('--init_bbox', nargs="*", type=int, help='Initial target bounding box')
12 | parser.add_argument('--language', type=str, help='Language description of target')
13 | args = parser.parse_args()
14 | 
15 | 
16 | def _read_image(image_file):
17 |     if isinstance(image_file, str):
18 |         im = cv2.imread(image_file)
19 |         return cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
20 | 
21 | input_video = args.input_video
22 | output_video = args.output_video
23 | parameter_name = args.tracker_param
24 | 
25 | init_info = {}
26 | # specify target reference
27 | init_info['language'] = args.language # for NL and NLBBOX mode
28 | init_info['init_bbox'] = args.init_bbox # for BBOX and NLBBOX mode
29 | 
30 | param_module = importlib.import_module(f'lib.test.parameter.{args.tracker_name}')
31 | params = param_module.parameters(parameter_name, None)
32 | params.debug = False
33 | 
34 | tracker_class = importlib.import_module(f'lib.test.tracker.{args.tracker_name}').get_tracker_class()
35 | tracker = tracker_class(params, '')
36 | 
37 | output = {'target_bbox': [],
38 |             'time': []}
39 | if tracker.params.save_all_boxes:
40 |     output['all_boxes'] = []
41 |     output['all_scores'] = []
42 | 
43 | def _store_outputs(tracker_out: dict, defaults=None):
44 |     defaults = {} if defaults is None else defaults
45 |     for key in output.keys():
46 |         val = tracker_out.get(key, defaults.get(key, None))
47 |         if key in tracker_out or val is not None:
48 |             output[key].append(val)
49 | 
50 | def _store_outputs(tracker_out: dict, defaults=None):
51 |     defaults = {} if defaults is None else defaults
52 |     for key in output.keys():
53 |         val = tracker_out.get(key, defaults.get(key, None))
54 |         if key in tracker_out or val is not None:
55 |             output[key].append(val)
56 | 
57 | videoCapture = cv2.VideoCapture(input_video)
58 | success, image = videoCapture.read()
59 | 
60 | out = tracker.initialize(image, init_info)
61 | 
62 | height, weight, _ = image.shape
63 | fps = 20
64 | fourcc = cv2.VideoWriter_fourcc(*'mp4v')
65 | videowriter = cv2.VideoWriter(output_video, fourcc, fps, (weight, height))
66 | success, image = videoCapture.read()
67 | while success:
68 |     info = {}
69 |     out = tracker.track(image, info)
70 |     x, y, w, h = out['target_bbox']
71 |     image = cv2.rectangle(image, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0))
72 |     videowriter.write(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
73 |     success, image = videoCapture.read()
74 | videowriter.release()


--------------------------------------------------------------------------------
/experiments/uvltrack/baseline_base.yaml:
--------------------------------------------------------------------------------
  1 | DATA:
  2 |   CONTEXT_GAP: 20
  3 |   MAX_SAMPLE_INTERVAL: 200
  4 |   MEAN:
  5 |   - 0.485
  6 |   - 0.456
  7 |   - 0.406
  8 |   SEARCH:
  9 |     CENTER_JITTER: 3.5
 10 |     FACTOR: 4.0
 11 |     SCALE_JITTER: 0.5
 12 |     SIZE: 256
 13 |     NUMBER: 2
 14 |   STD:
 15 |   - 0.229
 16 |   - 0.224
 17 |   - 0.225
 18 |   TEMPLATE:
 19 |     CENTER_JITTER: 0
 20 |     FACTOR: 2.0
 21 |     SCALE_JITTER: 0
 22 |     SIZE: 128
 23 |     NUMBER: 1
 24 |   TRAIN:
 25 |     DATASETS_NAME:
 26 |     - GOT10K_vottrain
 27 |     - LASOT
 28 |     - COCO17
 29 |     - TRACKINGNET
 30 |     - TNL2K
 31 |     - OTB99
 32 |     - REFCOCOG
 33 |     DATASETS_RATIO:
 34 |     - 1
 35 |     - 1
 36 |     - 1
 37 |     - 1
 38 |     - 1
 39 |     - 0.2
 40 |     - 5
 41 |     SAMPLE_PER_EPOCH: 30000
 42 |   VAL:
 43 |     DATASETS_NAME: # []
 44 |     - OTB99_test
 45 |     - TNL2K_test
 46 |     - LASOT_test
 47 |   VALTRACK:
 48 |     DATASETS_NAME:
 49 |     - LASOT_test
 50 |     - LASOTEXT
 51 |     - OTB99_test
 52 |     - TNL2K_test
 53 |     DATASETS_RATIO:
 54 |     - 1
 55 |     - 1
 56 |     - 1
 57 |     - 1
 58 |     SAMPLE_PER_EPOCH: 6400
 59 |   VALVL:
 60 |     DATASETS_NAME:
 61 |     - LASOT_test
 62 |     - LASOTEXT
 63 |     - OTB99_test
 64 |     - TNL2K_test
 65 |     DATASETS_RATIO:
 66 |     - 1
 67 |     - 1
 68 |     - 1
 69 |     - 1
 70 |     SAMPLE_PER_EPOCH: 6400
 71 | MODEL:
 72 |   BACKBONE:
 73 |     FUSION_LAYER: [6,7,8,9,10,11]
 74 |     TYPE: modality_unified_feature_extractor
 75 |     PRETRAINED_PATH: 'pretrain/mae_pretrain_vit_base.pth'
 76 |     CONT_LOSS_LAYER: [3,4,5,6,7,8,9,10,11]
 77 |     TXT_TOKEN_MODE: 'cls'
 78 |     LANGUAGE:
 79 |       TYPE: 'pretrain/bert'
 80 |       PATH: 'pretrain/bert/bert-base-uncased.tar.gz'
 81 |       VOCAB_PATH: 'pretrain/bert/bert-base-uncased-vocab.txt'
 82 |   HEAD:
 83 |     HEAD_DIM: 256
 84 |     TYPE: modality_adaptive_box_head
 85 |     OFFSET_SIGMOID: true
 86 |     CLS_TOKENIZE: false
 87 |     JOINT_CLS: false
 88 |     SOFTMAX_ONE: true
 89 |   HIDDEN_DIM: 768
 90 |   POSITION_EMBEDDING: sine
 91 | TRAIN:
 92 |   MODE: joint
 93 |   GROUNDING_RATIO: 0.11
 94 |   VL_RATIO: 0.44
 95 |   BACKBONE_MULTIPLIER: 0.1
 96 |   BATCH_SIZE: 8
 97 |   EPOCH: 300
 98 |   GIOU_WEIGHT: 2.0
 99 |   GRAD_CLIP_NORM: 0.1
100 |   L1_WEIGHT: 5.0
101 |   AUX_WEIGHT: 0.1
102 |   CONT_WEIGHT: 1.0
103 |   CIB_WEIGHT: 0.01
104 |   CTR_RATIO: 0.75
105 |   LR: 0.0004
106 |   NUM_WORKER: 10
107 |   OPTIMIZER: ADAMW
108 |   PRINT_INTERVAL: 50
109 |   SCHEDULER:
110 |     TYPE: CosineAnnealingLR
111 |     DECAY_RATE: 0.1
112 |   VAL_EPOCH_INTERVAL: 1
113 |   WEIGHT_DECAY: 0.0001
114 |   DYNAMIC_CLS: true
115 |   REDUCTION: mean
116 |   GAUSSIAN_IOU: 0.7
117 | TEST:
118 |   MODE: 'NLBBOX'
119 |   EPOCH: 300
120 |   SEARCH_FACTOR: 4.0
121 |   SEARCH_SIZE: 256
122 |   TEMPLATE_FACTOR: 2.0
123 |   TEMPLATE_SIZE: 128
124 |   UPDATE_INTERVAL: 20


--------------------------------------------------------------------------------
/experiments/uvltrack/baseline_base_grounding.yaml:
--------------------------------------------------------------------------------
  1 | DATA:
  2 |   CONTEXT_GAP: 20
  3 |   MAX_SAMPLE_INTERVAL: 200
  4 |   MEAN:
  5 |   - 0.485
  6 |   - 0.456
  7 |   - 0.406
  8 |   SEARCH:
  9 |     CENTER_JITTER: 3.5
 10 |     FACTOR: 4.0
 11 |     SCALE_JITTER: 0.5
 12 |     SIZE: 384
 13 |     NUMBER: 2
 14 |   STD:
 15 |   - 0.229
 16 |   - 0.224
 17 |   - 0.225
 18 |   TEMPLATE:
 19 |     CENTER_JITTER: 0
 20 |     FACTOR: 2.0
 21 |     SCALE_JITTER: 0
 22 |     SIZE: 128
 23 |     NUMBER: 1
 24 |   TRAIN:
 25 |     DATASETS_NAME:
 26 |     - REFCOCOG
 27 |     DATASETS_RATIO:
 28 |     - 1
 29 |     SAMPLE_PER_EPOCH: 30000
 30 |   VAL:
 31 |     DATASETS_NAME: # []
 32 |     - REFCOCOG_val
 33 |   VALTRACK:
 34 |     DATASETS_NAME:
 35 |     - LASOT_test
 36 |     - LASOTEXT
 37 |     - OTB99_test
 38 |     - TNL2K_test
 39 |     DATASETS_RATIO:
 40 |     - 1
 41 |     - 1
 42 |     - 1
 43 |     - 1
 44 |     SAMPLE_PER_EPOCH: 6400
 45 |   VALVL:
 46 |     DATASETS_NAME:
 47 |     - LASOT_test
 48 |     - LASOTEXT
 49 |     - OTB99_test
 50 |     - TNL2K_test
 51 |     DATASETS_RATIO:
 52 |     - 1
 53 |     - 1
 54 |     - 1
 55 |     - 1
 56 |     SAMPLE_PER_EPOCH: 6400
 57 | MODEL:
 58 |   BACKBONE:
 59 |     FUSION_LAYER: [6,7,8,9,10,11]
 60 |     TYPE: modality_unified_feature_extractor
 61 |     PRETRAINED_PATH: 'pretrain/mae_pretrain_vit_base.pth'
 62 |     CONT_LOSS_LAYER: [3,4,5,6,7,8,9,10,11]
 63 |     TXT_TOKEN_MODE: 'cls'
 64 |     LANGUAGE:
 65 |       TYPE: 'pretrain/bert'
 66 |       PATH: 'pretrain/bert/bert-base-uncased.tar.gz'
 67 |       VOCAB_PATH: 'pretrain/bert/bert-base-uncased-vocab.txt'
 68 |   HEAD:
 69 |     HEAD_DIM: 256
 70 |     TYPE: modality_adaptive_box_head
 71 |     OFFSET_SIGMOID: true
 72 |     CLS_TOKENIZE: false
 73 |     JOINT_CLS: false
 74 |     SOFTMAX_ONE: true
 75 |   HIDDEN_DIM: 768
 76 |   POSITION_EMBEDDING: sine
 77 | TRAIN:
 78 |   MODE: grounding
 79 |   GROUNDING_RATIO: 0.11
 80 |   VL_RATIO: 0.44
 81 |   BACKBONE_MULTIPLIER: 0.1
 82 |   BATCH_SIZE: 16
 83 |   EPOCH: 100
 84 |   GIOU_WEIGHT: 2.0
 85 |   GRAD_CLIP_NORM: 0.1
 86 |   L1_WEIGHT: 5.0
 87 |   AUX_WEIGHT: 0.1
 88 |   CONT_WEIGHT: 1.0
 89 |   CIB_WEIGHT: 0.01
 90 |   CTR_RATIO: 0.75
 91 |   LR: 0.0004
 92 |   NUM_WORKER: 10
 93 |   OPTIMIZER: ADAMW
 94 |   PRINT_INTERVAL: 50
 95 |   SCHEDULER:
 96 |     TYPE: CosineAnnealingLR
 97 |     DECAY_RATE: 0.1
 98 |   VAL_EPOCH_INTERVAL: 1
 99 |   WEIGHT_DECAY: 0.0001
100 |   DYNAMIC_CLS: true
101 |   REDUCTION: mean
102 |   GAUSSIAN_IOU: 0.7
103 | TEST:
104 |   MODE: 'NLBBOX'
105 |   EPOCH: 300
106 |   SEARCH_FACTOR: 4.0
107 |   SEARCH_SIZE: 256
108 |   TEMPLATE_FACTOR: 2.0
109 |   TEMPLATE_SIZE: 128
110 |   UPDATE_INTERVAL: 20


--------------------------------------------------------------------------------
/experiments/uvltrack/baseline_large.yaml:
--------------------------------------------------------------------------------
  1 | DATA:
  2 |   CONTEXT_GAP: 20
  3 |   MAX_SAMPLE_INTERVAL: 200
  4 |   MEAN:
  5 |   - 0.485
  6 |   - 0.456
  7 |   - 0.406
  8 |   SEARCH:
  9 |     CENTER_JITTER: 4.5
 10 |     FACTOR: 5.0
 11 |     SCALE_JITTER: 0.5
 12 |     SIZE: 256
 13 |     NUMBER: 2
 14 |   STD:
 15 |   - 0.229
 16 |   - 0.224
 17 |   - 0.225
 18 |   TEMPLATE:
 19 |     CENTER_JITTER: 0
 20 |     FACTOR: 2.0
 21 |     SCALE_JITTER: 0
 22 |     SIZE: 128
 23 |     NUMBER: 1
 24 |   TRAIN:
 25 |     DATASETS_NAME:
 26 |     - GOT10K_vottrain
 27 |     - LASOT
 28 |     - COCO17
 29 |     - TRACKINGNET
 30 |     - TNL2K
 31 |     - OTB99
 32 |     - REFCOCOG
 33 |     DATASETS_RATIO:
 34 |     - 1
 35 |     - 1
 36 |     - 1
 37 |     - 1
 38 |     - 1
 39 |     - 0.2
 40 |     - 5
 41 |     SAMPLE_PER_EPOCH: 30000
 42 |   VAL:
 43 |     DATASETS_NAME: # []
 44 |     - OTB99_test
 45 |     - TNL2K_test
 46 |     - LASOT_test
 47 |   VALTRACK:
 48 |     DATASETS_NAME:
 49 |     - LASOT_test
 50 |     - LASOTEXT
 51 |     - OTB99_test
 52 |     - TNL2K_test
 53 |     DATASETS_RATIO:
 54 |     - 1
 55 |     - 1
 56 |     - 1
 57 |     - 1
 58 |     SAMPLE_PER_EPOCH: 6400
 59 |   VALVL:
 60 |     DATASETS_NAME:
 61 |     - LASOT_test
 62 |     - LASOTEXT
 63 |     - OTB99_test
 64 |     - TNL2K_test
 65 |     DATASETS_RATIO:
 66 |     - 1
 67 |     - 1
 68 |     - 1
 69 |     - 1
 70 |     SAMPLE_PER_EPOCH: 6400
 71 | MODEL:
 72 |   BACKBONE:
 73 |     FUSION_LAYER: [12,13,14,15,16,17,18,19,20,21,22,23]
 74 |     TYPE: modality_unified_feature_extractor
 75 |     PRETRAINED_PATH: 'pretrain/mae_pretrain_vit_large.pth'
 76 |     CONT_LOSS_LAYER: [8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23]
 77 |     TXT_TOKEN_MODE: 'cls'
 78 |     LANGUAGE:
 79 |       TYPE: 'pretrain/bert-large-uncased'
 80 |       PATH: 'pretrain/bert-large-uncased/bert-large-uncased.tar.gz'
 81 |       VOCAB_PATH: 'pretrain/bert-large-uncased/bert-large-uncased-vocab.txt'
 82 |   HEAD:
 83 |     HEAD_DIM: 256
 84 |     TYPE: modality_adaptive_box_head
 85 |     OFFSET_SIGMOID: true
 86 |     CLS_TOKENIZE: false
 87 |     JOINT_CLS: false
 88 |     SOFTMAX_ONE: true
 89 |   HIDDEN_DIM: 1024
 90 |   POSITION_EMBEDDING: sine
 91 | TRAIN:
 92 |   MODE: joint
 93 |   GROUNDING_RATIO: 0.11
 94 |   VL_RATIO: 0.44
 95 |   BACKBONE_MULTIPLIER: 0.1
 96 |   BATCH_SIZE: 4
 97 |   EPOCH: 300
 98 |   GIOU_WEIGHT: 2.0
 99 |   GRAD_CLIP_NORM: 0.1
100 |   L1_WEIGHT: 5.0
101 |   AUX_WEIGHT: 0.1
102 |   CONT_WEIGHT: 1.0
103 |   CIB_WEIGHT: 0.01
104 |   CTR_RATIO: 0.75
105 |   LR: 0.0002
106 |   NUM_WORKER: 10
107 |   OPTIMIZER: ADAMW
108 |   PRINT_INTERVAL: 50
109 |   SCHEDULER:
110 |     TYPE: CosineAnnealingLR
111 |     DECAY_RATE: 0.1
112 |   VAL_EPOCH_INTERVAL: 1
113 |   WEIGHT_DECAY: 0.0001
114 |   DYNAMIC_CLS: true
115 |   REDUCTION: mean
116 |   GAUSSIAN_IOU: 0.7
117 | TEST:
118 |   MODE: 'BBOX'
119 |   EPOCH: 300
120 |   SEARCH_FACTOR: 5.0
121 |   SEARCH_SIZE: 256
122 |   TEMPLATE_FACTOR: 2.0
123 |   TEMPLATE_SIZE: 128
124 |   UPDATE_INTERVAL: 20


--------------------------------------------------------------------------------
/fig/arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/fig/arch.png


--------------------------------------------------------------------------------
/fig/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/fig/results.png


--------------------------------------------------------------------------------
/install.sh:
--------------------------------------------------------------------------------
  1 | echo "****************** Installing pytorch ******************"
  2 | conda install pytorch==1.7.1 torchvision==0.8.2 torchaudio==0.7.2 cudatoolkit=11.0 -c pytorch -y
  3 | 
  4 | echo ""
  5 | echo ""
  6 | echo "****************** Installing yaml ******************"
  7 | pip install PyYAML
  8 | 
  9 | echo ""
 10 | echo ""
 11 | echo "****************** Installing easydict ******************"
 12 | pip install easydict
 13 | 
 14 | echo ""
 15 | echo ""
 16 | echo "****************** Installing cython ******************"
 17 | pip install cython
 18 | 
 19 | echo ""
 20 | echo ""
 21 | echo "****************** Installing opencv-python ******************"
 22 | pip install opencv-python
 23 | 
 24 | echo ""
 25 | echo ""
 26 | echo "****************** Installing pandas ******************"
 27 | pip install pandas
 28 | 
 29 | echo ""
 30 | echo ""
 31 | echo "****************** Installing tqdm ******************"
 32 | conda install -y tqdm
 33 | 
 34 | echo ""
 35 | echo ""
 36 | echo "****************** Installing coco toolkit ******************"
 37 | pip install pycocotools
 38 | 
 39 | echo ""
 40 | echo ""
 41 | echo "****************** Installing jpeg4py python wrapper ******************"
 42 | apt-get install libturbojpeg
 43 | pip install jpeg4py
 44 | 
 45 | echo ""
 46 | echo ""
 47 | echo "****************** Installing tensorboard ******************"
 48 | pip install tb-nightly
 49 | 
 50 | echo ""
 51 | echo ""
 52 | echo "****************** Installing tikzplotlib ******************"
 53 | pip install tikzplotlib
 54 | 
 55 | echo ""
 56 | echo ""
 57 | echo "****************** Installing thop tool for FLOPs and Params computing ******************"
 58 | pip install --upgrade git+https://github.com/Lyken17/pytorch-OpCounter.git
 59 | 
 60 | echo ""
 61 | echo ""
 62 | echo "****************** Installing colorama ******************"
 63 | pip install colorama
 64 | 
 65 | echo ""
 66 | echo ""
 67 | echo "****************** Installing lmdb ******************"
 68 | pip install lmdb
 69 | 
 70 | echo ""
 71 | echo ""
 72 | echo "****************** Installing scipy ******************"
 73 | pip install scipy
 74 | 
 75 | echo ""
 76 | echo ""
 77 | echo "****************** Installing visdom ******************"
 78 | pip install visdom
 79 | 
 80 | echo ""
 81 | echo ""
 82 | echo "****************** Installing vot-toolkit python ******************"
 83 | pip install git+https://github.com/votchallenge/vot-toolkit-python
 84 | 
 85 | echo ""
 86 | echo ""
 87 | echo "****************** Installing onnx and onnxruntime-gpu ******************"
 88 | pip install onnx onnxruntime-gpu==1.6.0
 89 | 
 90 | echo ""
 91 | echo ""
 92 | echo "****************** Installing timm ******************"
 93 | pip install timm==0.3.2
 94 | 
 95 | echo "****************** Installing yacs/einops/thop ******************"
 96 | pip install yacs
 97 | pip install einops
 98 | pip install thop
 99 | 
100 | echo "****************** Install ninja-build for Precise ROI pooling ******************"
101 | apt-get install ninja-build
102 | 
103 | echo "****************** Installation complete! ******************"
104 | 
105 | python -m pip install -i http://pkg.sensetime.com/repository/pypi-proxy/simple/ --trusted-host pkg.sensetime.com http://10.5.41.14/packages/petrel-oss-sdk.tar.gz --user


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/__init__.py


--------------------------------------------------------------------------------
/lib/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/config/__init__.py


--------------------------------------------------------------------------------
/lib/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .uvltrack import uvltrack
2 | 


--------------------------------------------------------------------------------
/lib/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from lib import registry
2 | from .modality_unified_feature_extractor import modality_unified_feature_extractor
3 | 
4 | @registry.BACKBONES.register('modality_unified_feature_extractor')
5 | def build_modality_unified_feature_extractor(cfg):
6 |     vit = modality_unified_feature_extractor(cfg)
7 |     return vit


--------------------------------------------------------------------------------
/lib/models/backbones/block.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from .utils import LayerScale, DropPath, Mlp
 3 | 
 4 | class Block(nn.Module):
 5 |     def __init__(
 6 |             self,
 7 |             dim,
 8 |             num_heads,
 9 |             mlp_ratio=4.,
10 |             qkv_bias=False,
11 |             drop=0.,
12 |             attn_drop=0.,
13 |             init_values=None,
14 |             drop_path=0.,
15 |             act_layer=nn.GELU,
16 |             norm_layer=nn.LayerNorm
17 |     ):
18 |         super().__init__()
19 |         self.norm1 = norm_layer(dim)
20 |         self.attn = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, attn_drop=attn_drop, proj_drop=drop)
21 |         self.ls1 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
22 |         self.drop_path1 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
23 | 
24 |         self.norm2 = norm_layer(dim)
25 |         self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop)
26 |         self.ls2 = LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
27 |         self.drop_path2 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
28 | 
29 |     def forward(self, x, mask=None, flag=None):
30 |         x = x + self.drop_path1(self.ls1(self.attn(self.norm1(x), mask, flag=flag)))
31 |         x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
32 |         return x
33 | 
34 | class Attention(nn.Module):
35 |     def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
36 |         super().__init__()
37 |         assert dim % num_heads == 0, 'dim should be divisible by num_heads'
38 |         self.num_heads = num_heads
39 |         head_dim = dim // num_heads
40 |         self.scale = head_dim ** -0.5
41 | 
42 |         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
43 |         self.attn_drop = nn.Dropout(attn_drop)
44 |         self.proj = nn.Linear(dim, dim)
45 |         self.proj_drop = nn.Dropout(proj_drop)
46 | 
47 |     def forward(self, x, mask=None, flag=None):
48 |         B, N, C = x.shape
49 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
50 |         q, k, v = qkv.unbind(0)   # make torchscript happy (cannot use tensor as tuple)
51 | 
52 |         attn = (q @ k.transpose(-2, -1)) * self.scale
53 |         if mask is not None:
54 |             attn = attn.masked_fill(mask.unsqueeze(1).unsqueeze(1), -1e10)
55 |         attn = attn.softmax(dim=-1)
56 |         attn = self.attn_drop(attn)
57 | 
58 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
59 |         x = self.proj(x)
60 |         x = self.proj_drop(x)
61 |         return x
62 | 


--------------------------------------------------------------------------------
/lib/models/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from lib import registry
 2 | from .modality_adaptive_box_head import ModalityAdaptiveBoxHead
 3 | 
 4 | @registry.HEADS.register('modality_adaptive_box_head')
 5 | def build_modality_adaptive_box_head(cfg):
 6 |     stride = 16
 7 |     feat_sz = int(cfg.DATA.SEARCH.SIZE / stride)
 8 |     channel = cfg.MODEL.HEAD.HEAD_DIM
 9 |     head = ModalityAdaptiveBoxHead(inplanes=cfg.MODEL.HIDDEN_DIM, channel=channel, feat_sz=feat_sz, stride=stride, 
10 |                         cls_tokenize=cfg.MODEL.HEAD.CLS_TOKENIZE, offset_sigmoid=cfg.MODEL.HEAD.OFFSET_SIGMOID,
11 |                         joint_cls=cfg.MODEL.HEAD.JOINT_CLS, drop_rate=cfg.MODEL.HEAD.DROP, softmax_one=cfg.MODEL.HEAD.SOFTMAX_ONE,
12 |                         grounding_dilation=cfg.MODEL.HEAD.GROUNDING_DILATION, contrastive_conv=cfg.MODEL.HEAD.CONTRASTIVE_CONV)
13 |     return head


--------------------------------------------------------------------------------
/lib/models/uvltrack/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/models/uvltrack/__init__.py


--------------------------------------------------------------------------------
/lib/models/uvltrack/utils.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | def conv(in_planes, out_planes, kernel_size=3, stride=1, padding=1, dilation=1,
 6 |          freeze_bn=False):
 7 |     if freeze_bn:
 8 |         return nn.Sequential(
 9 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
10 |                       padding=padding, dilation=dilation, bias=True),
11 |             FrozenBatchNorm2d(out_planes),
12 |             nn.ReLU(inplace=True))
13 |     else:
14 |         return nn.Sequential(
15 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride,
16 |                       padding=padding, dilation=dilation, bias=True),
17 |             nn.BatchNorm2d(out_planes),
18 |             nn.ReLU(inplace=True))
19 |             
20 | class FrozenBatchNorm2d(torch.nn.Module):
21 |     """
22 |     BatchNorm2d where the batch statistics and the affine parameters are fixed.
23 | 
24 |     Copy-paste from torchvision.misc.ops with added eps before rqsrt,
25 |     without which any other models than torchvision.models.resnet[18,34,50,101]
26 |     produce nans.
27 |     """
28 | 
29 |     def __init__(self, n):
30 |         super(FrozenBatchNorm2d, self).__init__()
31 |         self.register_buffer("weight", torch.ones(n))
32 |         self.register_buffer("bias", torch.zeros(n))
33 |         self.register_buffer("running_mean", torch.zeros(n))
34 |         self.register_buffer("running_var", torch.ones(n))
35 | 
36 |     def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
37 |                               missing_keys, unexpected_keys, error_msgs):
38 |         num_batches_tracked_key = prefix + 'num_batches_tracked'
39 |         if num_batches_tracked_key in state_dict:
40 |             del state_dict[num_batches_tracked_key]
41 | 
42 |         super(FrozenBatchNorm2d, self)._load_from_state_dict(
43 |             state_dict, prefix, local_metadata, strict,
44 |             missing_keys, unexpected_keys, error_msgs)
45 | 
46 |     def forward(self, x):
47 |         # move reshapes to the beginning
48 |         # to make it fuser-friendly
49 |         w = self.weight.reshape(1, -1, 1, 1)
50 |         b = self.bias.reshape(1, -1, 1, 1)
51 |         rv = self.running_var.reshape(1, -1, 1, 1)
52 |         rm = self.running_mean.reshape(1, -1, 1, 1)
53 |         eps = 1e-5
54 |         scale = w * (rv + eps).rsqrt()  # rsqrt(x): 1/sqrt(x), r: reciprocal
55 |         bias = b - rm * scale
56 |         return x * scale + bias
57 | 


--------------------------------------------------------------------------------
/lib/models/uvltrack/uvltrack.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from lib import registry
 4 | from lib.models.backbones import *
 5 | from lib.models.heads import *
 6 | 
 7 | 
 8 | class UVLTrack(nn.Module):
 9 |     """ This is the base class for Transformer Tracking, whcih jointly perform feature extraction and interaction. """
10 |     def __init__(self, backbone, box_head):
11 |         """ Initializes the model.
12 |         """
13 |         super().__init__()
14 |         # self.language_backbone = language_backbone
15 |         self.backbone = backbone
16 |         self.box_head = box_head
17 | 
18 |     def forward(self, template, search, text, template_mask, context_mask, flag):
19 |         # text_feature = self.language_backbone(text) # b, s, c  b, s  FT
20 |         backbone_info = self.backbone(template, search, text, flag)
21 |         backbone_info['template_mask'] = template_mask
22 |         backbone_info['context_mask'] = context_mask
23 |         head_info = self.box_head(backbone_info)
24 |         return head_info
25 |     
26 |     def forward_prompt_init(self, template, search, text, template_mask, context_mask, flag):
27 |         backbone_info = self.backbone(template, search, text, flag)
28 |         backbone_info['template_mask'] = template_mask
29 |         backbone_info['context_mask'] = context_mask
30 |         prompt = self.box_head.forward_prompt(backbone_info)
31 |         return prompt
32 |     
33 |     def forward_prompt(self, out_dict, template_mask, context_mask):
34 |         backbone_info = out_dict
35 |         backbone_info['template_mask'] = template_mask
36 |         backbone_info['context_mask'] = context_mask
37 |         prompt = self.box_head.forward_prompt(backbone_info)
38 |         return prompt
39 |         
40 |     
41 |     def forward_test(self, template, search, text, prompt, flag):
42 |         backbone_info = self.backbone(template, search, text, flag)
43 |         backbone_info['prompt'] = prompt
44 |         head_info = self.box_head(backbone_info)
45 |         return head_info
46 |         
47 | @registry.MODELS.register('uvltrack')
48 | def build_model(cfg):
49 |     # language_backbone = registry.BACKBONES[cfg.MODEL.BACKBONE.LANGUAGE.TYPE](cfg)
50 |     backbone = registry.BACKBONES[cfg.MODEL.BACKBONE.TYPE](cfg)
51 |     head = registry.HEADS[cfg.MODEL.HEAD.TYPE](cfg)  # a simple corner head
52 |     model = UVLTrack(
53 |         # language_backbone,
54 |         backbone,
55 |         head
56 |     )
57 |     return model
58 | 


--------------------------------------------------------------------------------
/lib/registry.py:
--------------------------------------------------------------------------------
 1 | def _register_generic(module_dict, module_name, module):
 2 |     assert module_name not in module_dict
 3 |     module_dict[module_name] = module
 4 | 
 5 | 
 6 | class Registry(dict):
 7 |     '''
 8 |     A helper class for managing registering modules, it extends a dictionary
 9 |     and provides a register functions.
10 | 
11 |     Eg. creeting a registry:
12 |         some_registry = Registry({"default": default_module})
13 | 
14 |     There're two ways of registering new modules:
15 |     1): normal way is just calling register function:
16 |         def foo():
17 |             ...
18 |         some_registry.register("foo_module", foo)
19 |     2): used as decorator when declaring the module:
20 |         @some_registry.register("foo_module")
21 |         @some_registry.register("foo_modeul_nickname")
22 |         def foo():
23 |             ...
24 | 
25 |     Access of module is just like using a dictionary, eg:
26 |         f = some_registry["foo_modeul"]
27 |     '''
28 |     def __init__(self, *args, **kwargs):
29 |         super(Registry, self).__init__(*args, **kwargs)
30 | 
31 |     def register(self, module_name, module=None):
32 |         # used as function call
33 |         if module is not None:
34 |             _register_generic(self, module_name, module)
35 |             return
36 | 
37 |         # used as decorator
38 |         def register_fn(fn):
39 |             _register_generic(self, module_name, fn)
40 |             return fn
41 | 
42 |         return register_fn
43 | 
44 | 
45 | ACTORS = Registry()
46 | MODELS = Registry()
47 | BACKBONES = Registry()
48 | HEADS = Registry()
49 | LOSSES = Registry()


--------------------------------------------------------------------------------
/lib/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/__init__.py


--------------------------------------------------------------------------------
/lib/test/analysis/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/analysis/__init__.py


--------------------------------------------------------------------------------
/lib/test/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .data import Sequence
2 | from .tracker import Tracker, trackerlist
3 | from .datasets import get_dataset
4 | from .environment import create_default_local_file_ITP_test


--------------------------------------------------------------------------------
/lib/test/evaluation/datasets.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | import importlib
 3 | from lib.test.evaluation.data import SequenceList
 4 | 
 5 | DatasetInfo = namedtuple('DatasetInfo', ['module', 'class_name', 'kwargs'])
 6 | 
 7 | pt = "lib.test.evaluation.%sdataset"  # Useful abbreviations to reduce the clutter
 8 | 
 9 | dataset_dict = dict(
10 |     otb=DatasetInfo(module=pt % "otb", class_name="OTBDataset", kwargs=dict()),
11 |     otb99=DatasetInfo(module=pt % "otb99", class_name="OTB99Dataset", kwargs=dict()),
12 |     nfs=DatasetInfo(module=pt % "nfs", class_name="NFSDataset", kwargs=dict()),
13 |     uav=DatasetInfo(module=pt % "uav", class_name="UAVDataset", kwargs=dict()),
14 |     tc128=DatasetInfo(module=pt % "tc128", class_name="TC128Dataset", kwargs=dict()),
15 |     tc128ce=DatasetInfo(module=pt % "tc128ce", class_name="TC128CEDataset", kwargs=dict()),
16 |     trackingnet=DatasetInfo(module=pt % "trackingnet", class_name="TrackingNetDataset", kwargs=dict()),
17 |     got10k_test=DatasetInfo(module=pt % "got10k", class_name="GOT10KDataset", kwargs=dict(split='test')),
18 |     got10k_val=DatasetInfo(module=pt % "got10k", class_name="GOT10KDataset", kwargs=dict(split='val')),
19 |     got10k_ltrval=DatasetInfo(module=pt % "got10k", class_name="GOT10KDataset", kwargs=dict(split='ltrval')),
20 |     lasot=DatasetInfo(module=pt % "lasot", class_name="LaSOTDataset", kwargs=dict()),
21 |     lasotext=DatasetInfo(module=pt % "lasotext", class_name="LaSOTextDataset", kwargs=dict()),
22 |     lasot_lmdb=DatasetInfo(module=pt % "lasot_lmdb", class_name="LaSOTlmdbDataset", kwargs=dict()),
23 |     tnl2k=DatasetInfo(module=pt % "tnl2k", class_name="TNL2KDataset", kwargs=dict()),
24 |     itb=DatasetInfo(module=pt % "itb", class_name="ITBDataset", kwargs=dict()),
25 |     avist=DatasetInfo(module=pt % "avist", class_name="AVisTDataset", kwargs=dict()),
26 | )
27 | 
28 | 
29 | def load_dataset(name: str):
30 |     """ Import and load a single dataset."""
31 |     name = name.lower()
32 |     dset_info = dataset_dict.get(name)
33 |     if dset_info is None:
34 |         raise ValueError('Unknown dataset \'%s\'' % name)
35 | 
36 |     m = importlib.import_module(dset_info.module)
37 |     dataset = getattr(m, dset_info.class_name)(**dset_info.kwargs)  # Call the constructor
38 |     return dataset.get_sequence_list()
39 | 
40 | 
41 | def get_dataset(*args):
42 |     """ Get a single or set of datasets."""
43 |     dset = SequenceList()
44 |     for name in args:
45 |         dset.extend(load_dataset(name.split('_')[0]))
46 |     return dset


--------------------------------------------------------------------------------
/lib/test/evaluation/environment.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import os
  3 | 
  4 | 
  5 | class EnvSettings:
  6 |     def __init__(self):
  7 |         test_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
  8 | 
  9 |         self.results_path = '{}/tracking_results/'.format(test_path)
 10 |         self.segmentation_path = '{}/segmentation_results/'.format(test_path)
 11 |         self.network_path = '{}/networks/'.format(test_path)
 12 |         self.result_plot_path = '{}/result_plots/'.format(test_path)
 13 |         self.otb_path = ''
 14 |         self.nfs_path = ''
 15 |         self.uav_path = ''
 16 |         self.tpl_path = ''
 17 |         self.vot_path = ''
 18 |         self.got10k_path = ''
 19 |         self.lasot_path = ''
 20 |         self.trackingnet_path = ''
 21 |         self.davis_dir = ''
 22 |         self.youtubevos_dir = ''
 23 | 
 24 |         self.got_packed_results_path = ''
 25 |         self.got_reports_path = ''
 26 |         self.tn_packed_results_path = ''
 27 | 
 28 | 
 29 | def create_default_local_file():
 30 |     comment = {'results_path': 'Where to store tracking results',
 31 |                'network_path': 'Where tracking networks are stored.'}
 32 | 
 33 |     path = os.path.join(os.path.dirname(__file__), 'local.py')
 34 |     with open(path, 'w') as f:
 35 |         settings = EnvSettings()
 36 | 
 37 |         f.write('from test.evaluation.environment import EnvSettings\n\n')
 38 |         f.write('def local_env_settings():\n')
 39 |         f.write('    settings = EnvSettings()\n\n')
 40 |         f.write('    # Set your local paths here.\n\n')
 41 | 
 42 |         for attr in dir(settings):
 43 |             comment_str = None
 44 |             if attr in comment:
 45 |                 comment_str = comment[attr]
 46 |             attr_val = getattr(settings, attr)
 47 |             if not attr.startswith('__') and not callable(attr_val):
 48 |                 if comment_str is None:
 49 |                     f.write('    settings.{} = \'{}\'\n'.format(attr, attr_val))
 50 |                 else:
 51 |                     f.write('    settings.{} = \'{}\'    # {}\n'.format(attr, attr_val, comment_str))
 52 |         f.write('\n    return settings\n\n')
 53 | 
 54 | 
 55 | class EnvSettings_ITP:
 56 |     def __init__(self, workspace_dir, data_dir, save_dir):
 57 |         self.prj_dir = workspace_dir
 58 |         self.save_dir = save_dir
 59 |         self.results_path = os.path.join(save_dir, 'test/tracking_results')
 60 |         self.segmentation_path = os.path.join(save_dir, 'test/segmentation_results')
 61 |         self.network_path = os.path.join(save_dir, 'test/networks')
 62 |         self.result_plot_path = os.path.join(save_dir, 'test/result_plots')
 63 |         self.otb_path = os.path.join(data_dir, 'OTB2015')
 64 |         self.nfs_path = os.path.join(data_dir, 'nfs')
 65 |         self.uav_path = os.path.join(data_dir, 'UAV123')
 66 |         self.tc128_path = os.path.join(data_dir, 'TC128')
 67 |         self.tpl_path = ''
 68 |         self.vot_path = os.path.join(data_dir, 'VOT2019')
 69 |         self.got10k_path = os.path.join(data_dir, 'got10k')
 70 |         self.got10k_lmdb_path = os.path.join(data_dir, 'got10k_lmdb')
 71 |         self.lasot_path = os.path.join(data_dir, 'lasot')
 72 |         self.lasot_lmdb_path = os.path.join(data_dir, 'lasot_lmdb')
 73 |         self.trackingnet_path = os.path.join(data_dir, 'trackingNet')
 74 |         self.davis_dir = ''
 75 |         self.youtubevos_dir = ''
 76 | 
 77 |         self.got_packed_results_path = ''
 78 |         self.got_reports_path = ''
 79 |         self.tn_packed_results_path = ''
 80 | 
 81 | 
 82 | def create_default_local_file_ITP_test(workspace_dir, data_dir, save_dir):
 83 |     comment = {'results_path': 'Where to store tracking results',
 84 |                'network_path': 'Where tracking networks are stored.'}
 85 | 
 86 |     path = os.path.join(os.path.dirname(__file__), 'local.py')
 87 |     with open(path, 'w') as f:
 88 |         settings = EnvSettings_ITP(workspace_dir, data_dir, save_dir)
 89 | 
 90 |         f.write('from lib.test.evaluation.environment import EnvSettings\n\n')
 91 |         f.write('def local_env_settings():\n')
 92 |         f.write('    settings = EnvSettings()\n\n')
 93 |         f.write('    # Set your local paths here.\n\n')
 94 | 
 95 |         for attr in dir(settings):
 96 |             comment_str = None
 97 |             if attr in comment:
 98 |                 comment_str = comment[attr]
 99 |             attr_val = getattr(settings, attr)
100 |             if not attr.startswith('__') and not callable(attr_val):
101 |                 if comment_str is None:
102 |                     f.write('    settings.{} = \'{}\'\n'.format(attr, attr_val))
103 |                 else:
104 |                     f.write('    settings.{} = \'{}\'    # {}\n'.format(attr, attr_val, comment_str))
105 |         f.write('\n    return settings\n\n')
106 | 
107 | 
108 | def env_settings():
109 |     env_module_name = 'lib.test.evaluation.local'
110 |     try:
111 |         env_module = importlib.import_module(env_module_name)
112 |         return env_module.local_env_settings()
113 |     except:
114 |         env_file = os.path.join(os.path.dirname(__file__), 'local.py')
115 | 
116 |         # Create a default file
117 |         create_default_local_file()
118 |         raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. '
119 |                            'Then try to run again.'.format(env_file))


--------------------------------------------------------------------------------
/lib/test/evaluation/got10kdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | from lib.test.utils.load_text import load_text
 4 | import os
 5 | 
 6 | 
 7 | class GOT10KDataset(BaseDataset):
 8 |     """ GOT-10k dataset.
 9 | 
10 |     Publication:
11 |         GOT-10k: A Large High-Diversity Benchmark for Generic Object Tracking in the Wild
12 |         Lianghua Huang, Xin Zhao, and Kaiqi Huang
13 |         arXiv:1810.11981, 2018
14 |         https://arxiv.org/pdf/1810.11981.pdf
15 | 
16 |     Download dataset from http://got-10k.aitestunion.com/downloads
17 |     """
18 |     def __init__(self, split):
19 |         super().__init__()
20 |         # Split can be test, val, or ltrval (a validation split consisting of videos from the official train set)
21 |         if split == 'test' or split == 'val':
22 |             self.base_path = os.path.join(self.env_settings.got10k_path, split)
23 |         else:
24 |             self.base_path = os.path.join(self.env_settings.got10k_path, 'train')
25 | 
26 |         self.sequence_list = self._get_sequence_list(split)
27 |         self.split = split
28 | 
29 |     def get_sequence_list(self):
30 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
31 | 
32 |     def _construct_sequence(self, sequence_name):
33 |         anno_path = '{}/{}/groundtruth.txt'.format(self.base_path, sequence_name)
34 | 
35 |         ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64)
36 | 
37 |         frames_path = '{}/{}'.format(self.base_path, sequence_name)
38 |         frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")]
39 |         frame_list.sort(key=lambda f: int(f[:-4]))
40 |         frames_list = [os.path.join(frames_path, frame) for frame in frame_list]
41 | 
42 |         return Sequence(sequence_name, frames_list, 'got10k', ground_truth_rect.reshape(-1, 4))
43 | 
44 |     def __len__(self):
45 |         return len(self.sequence_list)
46 | 
47 |     def _get_sequence_list(self, split):
48 |         with open('{}/list.txt'.format(self.base_path)) as f:
49 |             sequence_list = f.read().splitlines()
50 | 
51 |         if split == 'ltrval':
52 |             with open('{}/got10k_val_split.txt'.format(self.env_settings.dataspec_path)) as f:
53 |                 seq_ids = f.read().splitlines()
54 | 
55 |             sequence_list = [sequence_list[int(x)] for x in seq_ids]
56 |         return sequence_list
57 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/itbdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .data import Sequence, BaseDataset, SequenceList
 3 | from .utils import load_text
 4 | import os
 5 | 
 6 | class ITBDataset(BaseDataset):
 7 |     """ NUS-PRO dataset
 8 |     """
 9 |     def __init__(self):
10 |         super().__init__()
11 |         self.base_path = self.env_settings.itb_path
12 |         self.sequence_info_list = self._get_sequence_info_list(self.base_path )
13 | 
14 |     def get_sequence_list(self):
15 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_info_list])
16 | 
17 |     def _construct_sequence(self, sequence_info):
18 |         sequence_path = sequence_info['path']
19 |         nz = sequence_info['nz']
20 |         ext = sequence_info['ext']
21 |         start_frame = sequence_info['startFrame']
22 |         end_frame = sequence_info['endFrame']
23 | 
24 |         init_omit = 0
25 |         if 'initOmit' in sequence_info:
26 |             init_omit = sequence_info['initOmit']
27 | 
28 |         frames = ['{base_path}/{sequence_path}/{frame:0{nz}}.{ext}'.format(base_path=self.base_path,
29 |         sequence_path=sequence_path, frame=frame_num, nz=nz, ext=ext) for frame_num in range(start_frame+init_omit, end_frame+1)]
30 | 
31 |         anno_path = '{}/{}'.format(self.base_path, sequence_info['anno_path'])
32 | 
33 |         # NOTE: NUS has some weird annos which panda cannot handle
34 |         ground_truth_rect = load_text(str(anno_path), delimiter=(',', None), dtype=np.float64, backend='numpy')
35 |         return Sequence(sequence_info['name'], frames, 'itb', ground_truth_rect[init_omit:,:],
36 |                         object_class=sequence_info['object_class'], language=sequence_info['object_class'])
37 | 
38 |     def __len__(self):
39 |         return len(self.sequence_info_list)
40 | 
41 |     def get_fileNames(self, rootdir):
42 |         fs = []
43 |         fs_all = []
44 |         for root, dirs, files in os.walk(rootdir, topdown=True):
45 |             files.sort()
46 |             files.sort(key = len)
47 |             if files is not None:
48 |                 for name in files:
49 |                     _, ending = os.path.splitext(name)
50 |                     if ending == ".jpg":
51 |                         _, root_ = os.path.split(root)
52 |                         fs.append(os.path.join(root_, name))
53 |                         fs_all.append(os.path.join(root, name))
54 | 
55 |         return fs_all, fs
56 | 
57 |     def _get_sequence_info_list(self, base_path):
58 |         sequence_info_list = []
59 |         for scene in os.listdir(base_path):
60 |             if '.'in scene:
61 |                 continue
62 |             videos = os.listdir(os.path.join(base_path,scene))
63 |             for video in videos:
64 |                 _,fs=self.get_fileNames(os.path.join(base_path,scene,video))
65 |                 video_tmp = {"name": video, "path": scene+'/'+video, "startFrame": 1, "endFrame": len(fs), "nz": len(fs[0].split('/')[-1].split('.')[0]), "ext": "jpg",
66 |                   "anno_path": scene+'/'+video+"/groundtruth.txt",
67 |                   "object_class": "unknown"}
68 |                 sequence_info_list.append(video_tmp)
69 | 
70 |         return sequence_info_list #sequence_info_list_50 #
71 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/lasotextdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | from lib.test.utils.load_text import load_text
 4 | import glob
 5 | import os
 6 | 
 7 | class LaSOTextDataset(BaseDataset):
 8 |     """
 9 |     LaSOT test set consisting of 280 videos (see Protocol-II in the LaSOT paper)
10 | 
11 |     Publication:
12 |         LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking
13 |         Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling
14 |         CVPR, 2019
15 |         https://arxiv.org/pdf/1809.07845.pdf
16 | 
17 |     Download the dataset from https://cis.temple.edu/lasot/download.html
18 |     """
19 |     def __init__(self):
20 |         super().__init__()
21 |         self.base_path = self.env_settings.lasot_ext_path
22 |         self.sequence_list = self._get_sequence_list()
23 |         self.clean_list = self.clean_seq_list()
24 | 
25 |     def clean_seq_list(self):
26 |         clean_lst = []
27 |         for i in range(len(self.sequence_list)):
28 |             cls, _ = self.sequence_list[i].split('-')
29 |             clean_lst.append(cls)
30 |         return  clean_lst
31 | 
32 |     def get_sequence_list(self):
33 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
34 | 
35 |     def _construct_sequence(self, sequence_name):
36 |         class_name = sequence_name.split('-')[0]
37 |         anno_path = '{}/{}/{}/groundtruth.txt'.format(self.base_path, class_name, sequence_name)
38 | 
39 |         ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64)
40 | 
41 |         occlusion_label_path = '{}/{}/{}/full_occlusion.txt'.format(self.base_path, class_name, sequence_name)
42 | 
43 |         # NOTE: pandas backed seems super super slow for loading occlusion/oov masks
44 |         full_occlusion = load_text(str(occlusion_label_path), delimiter=',', dtype=np.float64, backend='numpy')
45 | 
46 |         out_of_view_label_path = '{}/{}/{}/out_of_view.txt'.format(self.base_path, class_name, sequence_name)
47 |         out_of_view = load_text(str(out_of_view_label_path), delimiter=',', dtype=np.float64, backend='numpy')
48 | 
49 |         target_visible = np.logical_and(full_occlusion == 0, out_of_view == 0)
50 | 
51 |         frames_path = '{}/{}/{}/img'.format(self.base_path, class_name, sequence_name)
52 | 
53 |         frames_list = ['{}/{:08d}.jpg'.format(frames_path, frame_number) for frame_number in range(1, ground_truth_rect.shape[0] + 1)]
54 | 
55 |         language_file = os.path.join(self.base_path, class_name, sequence_name, "nlp.txt")
56 |         with open(language_file, 'r') as f:
57 |             language = f.readlines()[0].rstrip()
58 | 
59 |         target_class = class_name
60 |         return Sequence(sequence_name, frames_list, 'lasotext', ground_truth_rect.reshape(-1, 4),
61 |                         object_class=target_class, target_visible=target_visible, language=language)
62 | 
63 |     def __len__(self):
64 |         return len(self.sequence_list)
65 | 
66 |     def _get_sequence_list(self):
67 |         sequence_list = [path.split('/')[-2] for path in sorted(glob.glob(os.path.join(self.base_path, '*/*/')))]
68 |         return sequence_list
69 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/local.py:
--------------------------------------------------------------------------------
 1 | from lib.test.evaluation.environment import EnvSettings
 2 | import os
 3 | prj_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
 4 | 
 5 | def local_env_settings():
 6 |     settings = EnvSettings()
 7 |     settings.prj_dir = prj_dir
 8 |     settings.save_dir = prj_dir
 9 |     settings.result_plot_path = os.path.join(prj_dir, 'test/result_plots')
10 |     settings.results_path = os.path.join(prj_dir, 'test/tracking_results')
11 |     settings.lasot_path = os.path.join(prj_dir, 'data/lasot')
12 |     settings.nfs_path = os.path.join(prj_dir, 'data/nfs')
13 |     settings.otb_path = os.path.join(prj_dir, 'data/otb99')
14 |     settings.trackingnet_path = os.path.join(prj_dir, 'data/trackingnet')
15 |     settings.uav_path = os.path.join(prj_dir, 'data/uav')
16 |     settings.tnl2k_path = os.path.join(prj_dir, 'data/tnl2k/test')
17 |     settings.otb99_path = os.path.join(prj_dir, 'data/otb99')
18 |     settings.lasot_ext_path = os.path.join(prj_dir, 'data/lasotext')
19 | 
20 |     return settings


--------------------------------------------------------------------------------
/lib/test/evaluation/otb99dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | from lib.test.utils.load_text import load_text
 4 | import glob
 5 | import os
 6 | 
 7 | class OTB99Dataset(BaseDataset):
 8 |     """
 9 |     LaSOT test set consisting of 280 videos (see Protocol-II in the LaSOT paper)
10 | 
11 |     Publication:
12 |         LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking
13 |         Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling
14 |         CVPR, 2019
15 |         https://arxiv.org/pdf/1809.07845.pdf
16 | 
17 |     Download the dataset from https://cis.temple.edu/lasot/download.html
18 |     """
19 |     def __init__(self):
20 |         super().__init__()
21 |         self.base_path = self.env_settings.otb99_path
22 |         self.sequence_list = self._get_sequence_list()
23 | 
24 |     def get_sequence_list(self):
25 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
26 | 
27 |     def _construct_sequence(self, sequence_name):
28 |         anno_path = os.path.join(self.base_path, "OTB_videos", sequence_name, 'groundtruth_rect.txt')
29 |         try:
30 |             ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64)
31 |         except:
32 |             ground_truth_rect = load_text(str(anno_path), delimiter='\t', dtype=np.float64)
33 | 
34 |         frames_list = sorted(glob.glob(os.path.join(self.base_path, 'OTB_videos', sequence_name, 'img', '*')))
35 |         language_file = os.path.join(self.base_path, 'OTB_query_test', f"{sequence_name}.txt")
36 |         
37 |         with open(language_file, 'r') as f:
38 |             language = f.readlines()[0].rstrip()
39 |         
40 |         return Sequence(sequence_name, frames_list, 'otb99', ground_truth_rect.reshape(-1, 4),
41 |                         object_class=None, target_visible=None, language=language)
42 | 
43 |     def __len__(self):
44 |         return len(self.sequence_list)
45 | 
46 |     def _get_sequence_list(self):
47 |         sequence_list = sorted([p.split('/')[-1].split('.')[0] for p in glob.glob(os.path.join(self.base_path, 'OTB_query_test/*'))])
48 |         return sequence_list
49 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/tc128cedataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | import os
 4 | import glob
 5 | import six
 6 | 
 7 | 
 8 | class TC128CEDataset(BaseDataset):
 9 |     """
10 |     TC-128 Dataset (78 newly added sequences)
11 |     modified from the implementation in got10k-toolkit (https://github.com/got-10k/toolkit)
12 |     """
13 |     def __init__(self):
14 |         super().__init__()
15 |         self.base_path = self.env_settings.tc128_path
16 |         self.anno_files = sorted(glob.glob(
17 |             os.path.join(self.base_path, '*/*_gt.txt')))
18 |         """filter the newly added sequences (_ce)"""
19 |         self.anno_files = [s for s in self.anno_files if "_ce" in s]
20 |         self.seq_dirs = [os.path.dirname(f) for f in self.anno_files]
21 |         self.seq_names = [os.path.basename(d) for d in self.seq_dirs]
22 |         # valid frame range for each sequence
23 |         self.range_files = [glob.glob(os.path.join(d, '*_frames.txt'))[0] for d in self.seq_dirs]
24 | 
25 |     def get_sequence_list(self):
26 |         return SequenceList([self._construct_sequence(s) for s in self.seq_names])
27 | 
28 |     def _construct_sequence(self, sequence_name):
29 |         if isinstance(sequence_name, six.string_types):
30 |             if not sequence_name in self.seq_names:
31 |                 raise Exception('Sequence {} not found.'.format(sequence_name))
32 |             index = self.seq_names.index(sequence_name)
33 |         # load valid frame range
34 |         frames = np.loadtxt(self.range_files[index], dtype=int, delimiter=',')
35 |         img_files = [os.path.join(self.seq_dirs[index], 'img/%04d.jpg' % f) for f in range(frames[0], frames[1] + 1)]
36 | 
37 |         # load annotations
38 |         anno = np.loadtxt(self.anno_files[index], delimiter=',')
39 |         assert len(img_files) == len(anno)
40 |         assert anno.shape[1] == 4
41 | 
42 |         # return img_files, anno
43 |         return Sequence(sequence_name, img_files, 'tc128', anno.reshape(-1, 4))
44 | 
45 |     def __len__(self):
46 |         return len(self.seq_names)
47 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/tc128dataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | import os
 4 | import glob
 5 | import six
 6 | 
 7 | 
 8 | class TC128Dataset(BaseDataset):
 9 |     """
10 |     TC-128 Dataset
11 |     modified from the implementation in got10k-toolkit (https://github.com/got-10k/toolkit)
12 |     """
13 |     def __init__(self):
14 |         super().__init__()
15 |         self.base_path = self.env_settings.tc128_path
16 |         self.anno_files = sorted(glob.glob(
17 |             os.path.join(self.base_path, '*/*_gt.txt')))
18 |         self.seq_dirs = [os.path.dirname(f) for f in self.anno_files]
19 |         self.seq_names = [os.path.basename(d) for d in self.seq_dirs]
20 |         # valid frame range for each sequence
21 |         self.range_files = [glob.glob(os.path.join(d, '*_frames.txt'))[0] for d in self.seq_dirs]
22 | 
23 |     def get_sequence_list(self):
24 |         return SequenceList([self._construct_sequence(s) for s in self.seq_names])
25 | 
26 |     def _construct_sequence(self, sequence_name):
27 |         if isinstance(sequence_name, six.string_types):
28 |             if not sequence_name in self.seq_names:
29 |                 raise Exception('Sequence {} not found.'.format(sequence_name))
30 |             index = self.seq_names.index(sequence_name)
31 |         # load valid frame range
32 |         frames = np.loadtxt(self.range_files[index], dtype=int, delimiter=',')
33 |         img_files = [os.path.join(self.seq_dirs[index], 'img/%04d.jpg' % f) for f in range(frames[0], frames[1] + 1)]
34 | 
35 |         # load annotations
36 |         anno = np.loadtxt(self.anno_files[index], delimiter=',')
37 |         assert len(img_files) == len(anno)
38 |         assert anno.shape[1] == 4
39 | 
40 |         # return img_files, anno
41 |         return Sequence(sequence_name, img_files, 'tc128', anno.reshape(-1, 4))
42 | 
43 |     def __len__(self):
44 |         return len(self.seq_names)
45 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/tnl2kdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | from lib.test.utils.load_text import load_text
 4 | import glob
 5 | import os
 6 | 
 7 | class TNL2KDataset(BaseDataset):
 8 |     """
 9 |     LaSOT test set consisting of 280 videos (see Protocol-II in the LaSOT paper)
10 | 
11 |     Publication:
12 |         LaSOT: A High-quality Benchmark for Large-scale Single Object Tracking
13 |         Heng Fan, Liting Lin, Fan Yang, Peng Chu, Ge Deng, Sijia Yu, Hexin Bai, Yong Xu, Chunyuan Liao and Haibin Ling
14 |         CVPR, 2019
15 |         https://arxiv.org/pdf/1809.07845.pdf
16 | 
17 |     Download the dataset from https://cis.temple.edu/lasot/download.html
18 |     """
19 |     def __init__(self):
20 |         super().__init__()
21 |         self.base_path = self.env_settings.tnl2k_path
22 |         self.sequence_list = self._get_sequence_list()
23 | 
24 |     def get_sequence_list(self):
25 |         return SequenceList([self._construct_sequence(s) for s in self.sequence_list])
26 | 
27 |     def _construct_sequence(self, sequence_name):
28 |         anno_path = os.path.join(self.base_path, sequence_name, 'groundtruth.txt')
29 |         ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64)
30 | 
31 |         frames_list = sorted(glob.glob(os.path.join(self.base_path, sequence_name, 'imgs', '*')))
32 | 
33 |         language_file = os.path.join(self.base_path, sequence_name, "language.txt")
34 |         
35 |         with open(language_file, 'r') as f:
36 |             language = f.readlines()[0].rstrip()
37 |         
38 |         return Sequence(sequence_name, frames_list, 'tnl2k', ground_truth_rect.reshape(-1, 4),
39 |                         object_class=None, target_visible=None, language=language)
40 | 
41 |     def __len__(self):
42 |         return len(self.sequence_list)
43 | 
44 |     def _get_sequence_list(self):
45 |         sequence_list = sorted([p.split('/')[-2] for p in glob.glob(os.path.join(self.base_path, '*/'))])
46 |         return sequence_list
47 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/trackingnetdataset.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.test.evaluation.data import Sequence, BaseDataset, SequenceList
 3 | import os
 4 | from lib.test.utils.load_text import load_text
 5 | 
 6 | 
 7 | class TrackingNetDataset(BaseDataset):
 8 |     """ TrackingNet test set.
 9 | 
10 |     Publication:
11 |         TrackingNet: A Large-Scale Dataset and Benchmark for Object Tracking in the Wild.
12 |         Matthias Mueller,Adel Bibi, Silvio Giancola, Salman Al-Subaihi and Bernard Ghanem
13 |         ECCV, 2018
14 |         https://ivul.kaust.edu.sa/Documents/Publications/2018/TrackingNet%20A%20Large%20Scale%20Dataset%20and%20Benchmark%20for%20Object%20Tracking%20in%20the%20Wild.pdf
15 | 
16 |     Download the dataset using the toolkit https://github.com/SilvioGiancola/TrackingNet-devkit.
17 |     """
18 |     def __init__(self):
19 |         super().__init__()
20 |         self.base_path = self.env_settings.trackingnet_path
21 | 
22 |         sets = 'TEST'
23 |         if not isinstance(sets, (list, tuple)):
24 |             if sets == 'TEST':
25 |                 sets = ['TEST']
26 |             elif sets == 'TRAIN':
27 |                 sets = ['TRAIN_{}'.format(i) for i in range(5)]
28 | 
29 |         self.sequence_list = self._list_sequences(self.base_path, sets)
30 | 
31 |     def get_sequence_list(self):
32 |         return SequenceList([self._construct_sequence(set, seq_name) for set, seq_name in self.sequence_list])
33 | 
34 |     def _construct_sequence(self, set, sequence_name):
35 |         anno_path = '{}/{}/anno/{}.txt'.format(self.base_path, set, sequence_name)
36 | 
37 |         ground_truth_rect = load_text(str(anno_path), delimiter=',', dtype=np.float64, backend='numpy')
38 | 
39 |         frames_path = '{}/{}/frames/{}'.format(self.base_path, set, sequence_name)
40 |         frame_list = [frame for frame in os.listdir(frames_path) if frame.endswith(".jpg")]
41 |         frame_list.sort(key=lambda f: int(f[:-4]))
42 |         frames_list = [os.path.join(frames_path, frame) for frame in frame_list]
43 | 
44 |         return Sequence(sequence_name, frames_list, 'trackingnet', ground_truth_rect.reshape(-1, 4))
45 | 
46 |     def __len__(self):
47 |         return len(self.sequence_list)
48 | 
49 |     def _list_sequences(self, root, set_ids):
50 |         sequence_list = []
51 | 
52 |         for s in set_ids:
53 |             anno_dir = os.path.join(root, s, "anno")
54 |             sequences_cur_set = [(s, os.path.splitext(f)[0]) for f in os.listdir(anno_dir) if f.endswith('.txt')]
55 | 
56 |             sequence_list += sequences_cur_set
57 | 
58 |         return sequence_list
59 | 


--------------------------------------------------------------------------------
/lib/test/evaluation/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | def load_text_numpy(path, delimiter, dtype):
 5 |     if isinstance(delimiter, (tuple, list)):
 6 |         for d in delimiter:
 7 |             try:
 8 |                 ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype)
 9 |                 return ground_truth_rect
10 |             except:
11 |                 pass
12 | 
13 |         raise Exception('Could not read file {}'.format(path))
14 |     else:
15 |         ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype)
16 |         return ground_truth_rect
17 | 
18 | 
19 | def load_text_pandas(path, delimiter, dtype):
20 |     if isinstance(delimiter, (tuple, list)):
21 |         for d in delimiter:
22 |             try:
23 |                 ground_truth_rect = pd.read_csv(path, delimiter=d, header=None, dtype=dtype, na_filter=False,
24 |                                                 low_memory=False).values
25 |                 return ground_truth_rect
26 |             except Exception as e:
27 |                 pass
28 | 
29 |         raise Exception('Could not read file {}'.format(path))
30 |     else:
31 |         ground_truth_rect = pd.read_csv(path, delimiter=delimiter, header=None, dtype=dtype, na_filter=False,
32 |                                         low_memory=False).values
33 |         return ground_truth_rect
34 |     
35 | def load_text(path, delimiter=' ', dtype=np.float32, backend='numpy'):
36 |     if backend == 'numpy':
37 |         return load_text_numpy(path, delimiter, dtype)
38 |     elif backend == 'pandas':
39 |         return load_text_pandas(path, delimiter, dtype)


--------------------------------------------------------------------------------
/lib/test/parameter/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/parameter/__init__.py


--------------------------------------------------------------------------------
/lib/test/parameter/uvltrack.py:
--------------------------------------------------------------------------------
 1 | from lib.test.utils import TrackerParams
 2 | import os
 3 | from easydict import EasyDict as edict
 4 | from lib.test.evaluation.environment import env_settings
 5 | from lib.config.uvltrack.config import cfg, update_config_from_file
 6 | 
 7 | 
 8 | def _update_config(base_cfg, exp_cfg):
 9 |     if isinstance(base_cfg, edict) and isinstance(exp_cfg, edict):
10 |         for k, v in exp_cfg.items():
11 |             if k in base_cfg:
12 |                 if not isinstance(v, dict):
13 |                     base_cfg[k] = v
14 |                 else:
15 |                     _update_config(base_cfg[k], v)
16 |             else:
17 |                 raise ValueError("{} not exist in config.py".format(k))
18 |     else:
19 |         return
20 | 
21 | def parameters(yaml_name: str, extra_cfg=None, epoch=None):
22 |     params = TrackerParams()
23 |     prj_dir = env_settings().prj_dir
24 |     save_dir = env_settings().save_dir
25 |     # update default config from yaml file
26 |     params.yaml_name = yaml_name
27 |     yaml_file = os.path.join(prj_dir, 'experiments/uvltrack/%s.yaml' % yaml_name)
28 |     update_config_from_file(yaml_file)
29 |     if epoch is not None:
30 |         cfg.TEST.EPOCH = epoch
31 |     params.cfg = cfg
32 |     if extra_cfg is not None:
33 |         _update_config(params.cfg, extra_cfg)
34 |     # template and search region
35 |     params.template_factor = cfg.TEST.TEMPLATE_FACTOR
36 |     params.template_size = cfg.TEST.TEMPLATE_SIZE
37 |     params.search_factor = cfg.TEST.SEARCH_FACTOR
38 |     params.search_size = cfg.TEST.SEARCH_SIZE
39 |     params.grounding_size = cfg.TEST.SEARCH_SIZE
40 | 
41 |     # Network checkpoint path
42 |     params.checkpoint = os.path.join(save_dir, "checkpoints/train/uvltrack/%s/UVLTrack_ep%04d.pth.tar"%(yaml_name, cfg.TEST.EPOCH))  # 470
43 | 
44 |     # whether to save boxes from all queries
45 |     params.save_all_boxes = False
46 | 
47 |     return params
48 | 


--------------------------------------------------------------------------------
/lib/test/tracker/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/test/tracker/__init__.py


--------------------------------------------------------------------------------
/lib/test/tracker/basetracker.py:
--------------------------------------------------------------------------------
 1 | from _collections import OrderedDict
 2 | 
 3 | 
 4 | class BaseTracker:
 5 |     """Base class for all trackers."""
 6 | 
 7 |     def __init__(self, params):
 8 |         self.params = params
 9 |         self.visdom = None
10 | 
11 |     def predicts_segmentation_mask(self):
12 |         return False
13 | 
14 |     def initialize(self, image, info: dict) -> dict:
15 |         """Overload this function in your tracker. This should initialize the model."""
16 |         raise NotImplementedError
17 | 
18 |     def track(self, image, info: dict = None) -> dict:
19 |         """Overload this function in your tracker. This should track in the frame and update the model."""
20 |         raise NotImplementedError
21 | 
22 |     def visdom_draw_tracking(self, image, box, segmentation=None):
23 |         if isinstance(box, OrderedDict):
24 |             box = [v for k, v in box.items()]
25 |         else:
26 |             box = (box,)
27 |         if segmentation is None:
28 |             self.visdom.register((image, *box), 'Tracking', 1, 'Tracking')
29 |         else:
30 |             self.visdom.register((image, *box, segmentation), 'Tracking', 1, 'Tracking')


--------------------------------------------------------------------------------
/lib/test/tracker/tracker_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from lib.utils.misc import NestedTensor
  4 | import matplotlib.pyplot as plt
  5 | import os
  6 | 
  7 | class Preprocessor(object):
  8 |     def __init__(self):
  9 |         self.mean = torch.tensor([0.485, 0.456, 0.406]).view((1, 3, 1, 1)).cuda()
 10 |         self.std = torch.tensor([0.229, 0.224, 0.225]).view((1, 3, 1, 1)).cuda()
 11 | 
 12 |     def process(self, img_arr: np.ndarray, amask_arr: np.ndarray):
 13 |         # Deal with the image patch
 14 |         img_tensor = torch.tensor(img_arr).cuda().float().permute((2,0,1)).unsqueeze(dim=0)
 15 |         img_tensor_norm = ((img_tensor / 255.0) - self.mean) / self.std  # (1,3,H,W)
 16 |         # Deal with the attention mask
 17 |         amask_tensor = torch.from_numpy(amask_arr).to(torch.bool).cuda().unsqueeze(dim=0)  # (1,H,W)
 18 |         return NestedTensor(img_tensor_norm, amask_tensor)
 19 | 
 20 | class Preprocessor_wo_mask(object):
 21 |     def __init__(self):
 22 |         self.mean = torch.tensor([0.485, 0.456, 0.406]).view((1, 3, 1, 1)).cuda()
 23 |         self.std = torch.tensor([0.229, 0.224, 0.225]).view((1, 3, 1, 1)).cuda()
 24 | 
 25 |     def process(self, img_arr: np.ndarray):
 26 |         # Deal with the image patch
 27 |         img_tensor = torch.tensor(img_arr).cuda().float().permute((2,0,1)).unsqueeze(dim=0)
 28 |         img_tensor_norm = ((img_tensor / 255.0) - self.mean) / self.std  # (1,3,H,W)
 29 |         return img_tensor_norm
 30 | 
 31 | 
 32 | class PreprocessorX(object):
 33 |     def __init__(self):
 34 |         self.mean = torch.tensor([0.485, 0.456, 0.406]).view((1, 3, 1, 1)).cuda()
 35 |         self.std = torch.tensor([0.229, 0.224, 0.225]).view((1, 3, 1, 1)).cuda()
 36 | 
 37 |     def process(self, img_arr: np.ndarray, amask_arr: np.ndarray):
 38 |         # Deal with the image patch
 39 |         img_tensor = torch.tensor(img_arr).cuda().float().permute((2,0,1)).unsqueeze(dim=0)
 40 |         img_tensor_norm = ((img_tensor / 255.0) - self.mean) / self.std  # (1,3,H,W)
 41 |         # Deal with the attention mask
 42 |         amask_tensor = torch.from_numpy(amask_arr).to(torch.bool).cuda().unsqueeze(dim=0)  # (1,H,W)
 43 |         return img_tensor_norm, amask_tensor
 44 | 
 45 | 
 46 | class PreprocessorX_onnx(object):
 47 |     def __init__(self):
 48 |         self.mean = np.array([0.485, 0.456, 0.406]).reshape((1, 3, 1, 1))
 49 |         self.std = np.array([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1))
 50 | 
 51 |     def process(self, img_arr: np.ndarray, amask_arr: np.ndarray):
 52 |         """img_arr: (H,W,3), amask_arr: (H,W)"""
 53 |         # Deal with the image patch
 54 |         img_arr_4d = img_arr[np.newaxis, :, :, :].transpose(0, 3, 1, 2)
 55 |         img_arr_4d = (img_arr_4d / 255.0 - self.mean) / self.std  # (1, 3, H, W)
 56 |         # Deal with the attention mask
 57 |         amask_arr_3d = amask_arr[np.newaxis, :, :]  # (1,H,W)
 58 |         return img_arr_4d.astype(np.float32), amask_arr_3d.astype(np.bool)
 59 | 
 60 | def vis_attn_maps(attn_weights, q_w, k_w, skip_len, x1, x2, x1_title, x2_title, save_path='.', idxs=None):
 61 |     if not os.path.exists(save_path):
 62 |         os.makedirs(save_path)
 63 |     shape1 = [q_w, q_w]
 64 |     shape2 = [k_w, k_w]
 65 | 
 66 |     attn_weights_mean = []
 67 |     for attn in attn_weights:
 68 |         attn_weights_mean.append(attn[..., skip_len:(skip_len+k_w**2)].mean(dim=1).squeeze().reshape(shape1+shape2).cpu())
 69 | 
 70 |     # downsampling factor
 71 |     fact = 32
 72 | 
 73 |     # let's select 4 reference points for visualization
 74 |     # idxs = [(32, 32), (64, 64), (32, 96), (96, 96), ]
 75 |     if idxs is None:
 76 |         idxs = [(64, 64)]
 77 | 
 78 |     block_num=0
 79 |     idx_o = idxs[0]
 80 |     for attn_weight in attn_weights_mean:
 81 |         fig = plt.figure(constrained_layout=False, figsize=(5, 5), dpi=160)
 82 |         fig.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0)
 83 |         ax = fig.add_subplot(111)
 84 |         idx = (idx_o[0] // fact, idx_o[1] // fact)
 85 |         ax.imshow(attn_weight[..., idx[0], idx[1]], cmap='cividis', interpolation='nearest')
 86 |         ax.axis('off')
 87 |         # ax.set_title(f'Stage2-Block{block_num}')
 88 |         plt.savefig(save_path + '/Stage2-Block{}_attn_weight.png'.format(block_num))
 89 |         plt.close()
 90 |         block_num += 1
 91 | 
 92 |     fig = plt.figure(constrained_layout=False, figsize=(5, 5), dpi=160)
 93 |     fig.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0)
 94 |     x2_ax = fig.add_subplot(111)
 95 |     x2_ax.imshow(x2)
 96 |     x2_ax.axis('off')
 97 |     plt.savefig(save_path + '/{}.png'.format(x2_title))
 98 |     plt.close()
 99 | 
100 |     # the reference points as red circles
101 |     fig = plt.figure(constrained_layout=False, figsize=(5, 5), dpi=160)
102 |     fig.subplots_adjust(left=0.0, bottom=0.0, right=1.0, top=1.0)
103 |     x1_ax = fig.add_subplot(111)
104 |     x1_ax.imshow(x1)
105 |     for (y, x) in idxs:
106 |         # scale = im.height / img.shape[-2]
107 |         x = ((x // fact) + 0.5) * fact
108 |         y = ((y // fact) + 0.5) * fact
109 |         x1_ax.add_patch(plt.Circle((x, y), fact // 2, color='r'))
110 |         # x1_ax.set_title(x1_title)
111 |         x1_ax.axis('off')
112 |     plt.savefig(save_path+'/{}.png'.format(x1_title))
113 |     plt.close()
114 | 
115 |     del attn_weights_mean
116 | 


--------------------------------------------------------------------------------
/lib/test/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .params import TrackerParams, FeatureParams, Choice


--------------------------------------------------------------------------------
/lib/test/utils/_init_paths.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import os.path as osp
 6 | import sys
 7 | 
 8 | 
 9 | def add_path(path):
10 |     if path not in sys.path:
11 |         sys.path.insert(0, path)
12 | 
13 | 
14 | this_dir = osp.dirname(__file__)
15 | 
16 | prj_path = osp.join(this_dir, '..', '..', '..')
17 | add_path(prj_path)
18 | 


--------------------------------------------------------------------------------
/lib/test/utils/augmentation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | import torch
 4 | import torch.nn.functional as F
 5 | import cv2 as cv
 6 | import random
 7 | 
 8 | 
 9 | class Transform:
10 |     """Base data augmentation transform class."""
11 | 
12 |     def __init__(self, output_sz = None, shift = None):
13 |         self.output_sz = output_sz
14 |         self.shift = (0,0) if shift is None else shift
15 | 
16 |     def __call__(self, image, is_mask=False):
17 |         raise NotImplementedError
18 | 
19 |     def crop_to_output(self, image):
20 |         if isinstance(image, torch.Tensor):
21 |             imsz = image.shape[2:]
22 |             if self.output_sz is None:
23 |                 pad_h = 0
24 |                 pad_w = 0
25 |             else:
26 |                 pad_h = (self.output_sz[0] - imsz[0]) / 2
27 |                 pad_w = (self.output_sz[1] - imsz[1]) / 2
28 | 
29 |             pad_left = math.floor(pad_w) + self.shift[1]
30 |             pad_right = math.ceil(pad_w) - self.shift[1]
31 |             pad_top = math.floor(pad_h) + self.shift[0]
32 |             pad_bottom = math.ceil(pad_h) - self.shift[0]
33 | 
34 |             return F.pad(image, (pad_left, pad_right, pad_top, pad_bottom), 'replicate')
35 |         else:
36 |             raise NotImplementedError
37 | 
38 | class Blur(Transform):
39 |     """Blur with given sigma (can be axis dependent)."""
40 |     def __init__(self, sigma, output_sz = None, shift = None):
41 |         super().__init__(output_sz, shift)
42 |         if isinstance(sigma, (float, int)):
43 |             sigma = (sigma, sigma)
44 |         self.sigma = sigma
45 |         self.filter_size = [math.ceil(2*s) for s in self.sigma]
46 |         x_coord = [torch.arange(-sz, sz+1, dtype=torch.float32) for sz in self.filter_size]
47 |         self.filter = [torch.exp(-(x**2)/(2*s**2)) for x, s in zip(x_coord, self.sigma)]
48 |         self.filter[0] = self.filter[0].view(1,1,-1,1) / self.filter[0].sum()
49 |         self.filter[1] = self.filter[1].view(1,1,1,-1) / self.filter[1].sum()
50 | 
51 |     def __call__(self, image, is_mask=False):
52 |         if isinstance(image, torch.Tensor):
53 |             sz = image.shape[2:]
54 |             im1 = F.conv2d(image.view(-1,1,sz[0],sz[1]), self.filter[0], padding=(self.filter_size[0],0))
55 |             return self.crop_to_output(F.conv2d(im1, self.filter[1], padding=(0,self.filter_size[1])).view(1,-1,sz[0],sz[1]))
56 |         else:
57 |             raise NotImplementedError
58 | 
59 | 
60 | class FlipHorizontal(Transform):
61 |     """Flip along horizontal axis."""
62 |     def __call__(self, image, is_mask=False):
63 |         if isinstance(image, torch.Tensor):
64 |             return self.crop_to_output(image.flip((3,)))
65 |         else:
66 |             return np.fliplr(image)
67 | 
68 | class FlipVertical(Transform):
69 |     """Flip along vertical axis."""
70 |     def __call__(self, image: torch.Tensor, is_mask=False):
71 |         if isinstance(image, torch.Tensor):
72 |             return self.crop_to_output(image.flip((2,)))
73 |         else:
74 |             return np.flipud(image)


--------------------------------------------------------------------------------
/lib/test/utils/hann.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def hann1d(sz: int, centered = True) -> torch.Tensor:
 7 |     """1D cosine window."""
 8 |     if centered:
 9 |         return 0.5 * (1 - torch.cos((2 * math.pi / (sz + 1)) * torch.arange(1, sz + 1).float()))
10 |     w = 0.5 * (1 + torch.cos((2 * math.pi / (sz + 2)) * torch.arange(0, sz//2 + 1).float()))
11 |     return torch.cat([w, w[1:sz-sz//2].flip((0,))])
12 | 
13 | 
14 | def hann2d(sz: torch.Tensor, centered = True) -> torch.Tensor:
15 |     """2D cosine window."""
16 |     return hann1d(sz[0].item(), centered).reshape(1, 1, -1, 1) * hann1d(sz[1].item(), centered).reshape(1, 1, 1, -1)
17 | 
18 | 
19 | def hann2d_bias(sz: torch.Tensor, ctr_point: torch.Tensor, centered = True) -> torch.Tensor:
20 |     """2D cosine window."""
21 |     distance = torch.stack([ctr_point, sz-ctr_point], dim=0)
22 |     max_distance, _ = distance.max(dim=0)
23 | 
24 |     hann1d_x = hann1d(max_distance[0].item() * 2, centered)
25 |     hann1d_x = hann1d_x[max_distance[0] - distance[0, 0]: max_distance[0] + distance[1, 0]]
26 |     hann1d_y = hann1d(max_distance[1].item() * 2, centered)
27 |     hann1d_y = hann1d_y[max_distance[1] - distance[0, 1]: max_distance[1] + distance[1, 1]]
28 | 
29 |     return hann1d_y.reshape(1, 1, -1, 1) * hann1d_x.reshape(1, 1, 1, -1)
30 | 
31 | 
32 | 
33 | def hann2d_clipped(sz: torch.Tensor, effective_sz: torch.Tensor, centered = True) -> torch.Tensor:
34 |     """1D clipped cosine window."""
35 | 
36 |     # Ensure that the difference is even
37 |     effective_sz += (effective_sz - sz) % 2
38 |     effective_window = hann1d(effective_sz[0].item(), True).reshape(1, 1, -1, 1) * hann1d(effective_sz[1].item(), True).reshape(1, 1, 1, -1)
39 | 
40 |     pad = (sz - effective_sz) // 2
41 | 
42 |     window = F.pad(effective_window, (pad[1].item(), pad[1].item(), pad[0].item(), pad[0].item()), 'replicate')
43 | 
44 |     if centered:
45 |         return window
46 |     else:
47 |         mid = (sz / 2).int()
48 |         window_shift_lr = torch.cat((window[:, :, :, mid[1]:], window[:, :, :, :mid[1]]), 3)
49 |         return torch.cat((window_shift_lr[:, :, mid[0]:, :], window_shift_lr[:, :, :mid[0], :]), 2)
50 | 
51 | 
52 | def gauss_fourier(sz: int, sigma: float, half: bool = False) -> torch.Tensor:
53 |     if half:
54 |         k = torch.arange(0, int(sz/2+1))
55 |     else:
56 |         k = torch.arange(-int((sz-1)/2), int(sz/2+1))
57 |     return (math.sqrt(2*math.pi) * sigma / sz) * torch.exp(-2 * (math.pi * sigma * k.float() / sz)**2)
58 | 
59 | 
60 | def gauss_spatial(sz, sigma, center=0, end_pad=0):
61 |     k = torch.arange(-(sz-1)/2, (sz+1)/2+end_pad)
62 |     return torch.exp(-1.0/(2*sigma**2) * (k - center)**2)
63 | 
64 | 
65 | def label_function(sz: torch.Tensor, sigma: torch.Tensor):
66 |     return gauss_fourier(sz[0].item(), sigma[0].item()).reshape(1, 1, -1, 1) * gauss_fourier(sz[1].item(), sigma[1].item(), True).reshape(1, 1, 1, -1)
67 | 
68 | def label_function_spatial(sz: torch.Tensor, sigma: torch.Tensor, center: torch.Tensor = torch.zeros(2), end_pad: torch.Tensor = torch.zeros(2)):
69 |     """The origin is in the middle of the image."""
70 |     return gauss_spatial(sz[0].item(), sigma[0].item(), center[0], end_pad[0].item()).reshape(1, 1, -1, 1) * \
71 |            gauss_spatial(sz[1].item(), sigma[1].item(), center[1], end_pad[1].item()).reshape(1, 1, 1, -1)
72 | 
73 | 
74 | def cubic_spline_fourier(f, a):
75 |     """The continuous Fourier transform of a cubic spline kernel."""
76 | 
77 |     bf = (6*(1 - torch.cos(2 * math.pi * f)) + 3*a*(1 - torch.cos(4 * math.pi * f))
78 |            - (6 + 8*a)*math.pi*f*torch.sin(2 * math.pi * f) - 2*a*math.pi*f*torch.sin(4 * math.pi * f)) \
79 |          / (4 * math.pi**4 * f**4)
80 | 
81 |     bf[f == 0] = 1
82 | 
83 |     return bf
84 | 
85 | def max2d(a: torch.Tensor) -> (torch.Tensor, torch.Tensor):
86 |     """Computes maximum and argmax in the last two dimensions."""
87 | 
88 |     max_val_row, argmax_row = torch.max(a, dim=-2)
89 |     max_val, argmax_col = torch.max(max_val_row, dim=-1)
90 |     argmax_row = argmax_row.view(argmax_col.numel(),-1)[torch.arange(argmax_col.numel()), argmax_col.view(-1)]
91 |     argmax_row = argmax_row.reshape(argmax_col.shape)
92 |     argmax = torch.cat((argmax_row.unsqueeze(-1), argmax_col.unsqueeze(-1)), -1)
93 |     return max_val, argmax
94 | 


--------------------------------------------------------------------------------
/lib/test/utils/load_text.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def load_text_numpy(path, delimiter, dtype):
 6 |     if isinstance(delimiter, (tuple, list)):
 7 |         for d in delimiter:
 8 |             try:
 9 |                 ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype)
10 |                 return ground_truth_rect
11 |             except:
12 |                 pass
13 | 
14 |         raise Exception('Could not read file {}'.format(path))
15 |     else:
16 |         ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype)
17 |         return ground_truth_rect
18 | 
19 | 
20 | def load_text_pandas(path, delimiter, dtype):
21 |     if isinstance(delimiter, (tuple, list)):
22 |         for d in delimiter:
23 |             try:
24 |                 ground_truth_rect = pd.read_csv(path, delimiter=d, header=None, dtype=dtype, na_filter=False,
25 |                                                 low_memory=False).values
26 |                 return ground_truth_rect
27 |             except Exception as e:
28 |                 pass
29 | 
30 |         raise Exception('Could not read file {}'.format(path))
31 |     else:
32 |         ground_truth_rect = pd.read_csv(path, delimiter=delimiter, header=None, dtype=dtype, na_filter=False,
33 |                                         low_memory=False).values
34 |         return ground_truth_rect
35 | 
36 | 
37 | def load_text(path, delimiter=' ', dtype=np.float32, backend='numpy'):
38 |     if backend == 'numpy':
39 |         return load_text_numpy(path, delimiter, dtype)
40 |     elif backend == 'pandas':
41 |         return load_text_pandas(path, delimiter, dtype)
42 | 


--------------------------------------------------------------------------------
/lib/test/utils/params.py:
--------------------------------------------------------------------------------
 1 | from lib.utils import TensorList
 2 | import random
 3 | 
 4 | 
 5 | class TrackerParams:
 6 |     """Class for tracker parameters."""
 7 |     def set_default_values(self, default_vals: dict):
 8 |         for name, val in default_vals.items():
 9 |             if not hasattr(self, name):
10 |                 setattr(self, name, val)
11 | 
12 |     def get(self, name: str, *default):
13 |         """Get a parameter value with the given name. If it does not exists, it return the default value given as a
14 |         second argument or returns an error if no default value is given."""
15 |         if len(default) > 1:
16 |             raise ValueError('Can only give one default value.')
17 | 
18 |         if not default:
19 |             return getattr(self, name)
20 | 
21 |         return getattr(self, name, default[0])
22 | 
23 |     def has(self, name: str):
24 |         """Check if there exist a parameter with the given name."""
25 |         return hasattr(self, name)
26 | 
27 | 
28 | class FeatureParams:
29 |     """Class for feature specific parameters"""
30 |     def __init__(self, *args, **kwargs):
31 |         if len(args) > 0:
32 |             raise ValueError
33 | 
34 |         for name, val in kwargs.items():
35 |             if isinstance(val, list):
36 |                 setattr(self, name, TensorList(val))
37 |             else:
38 |                 setattr(self, name, val)
39 | 
40 | 
41 | def Choice(*args):
42 |     """Can be used to sample random parameter values."""
43 |     return random.choice(args)
44 | 


--------------------------------------------------------------------------------
/lib/test/utils/transform_got10k.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import shutil
 4 | import argparse
 5 | import _init_paths
 6 | from lib.test.evaluation.environment import env_settings
 7 | 
 8 | 
 9 | def transform_got10k(tracker_name, cfg_name):
10 |     env = env_settings()
11 |     result_dir = env.results_path
12 |     src_dir = os.path.join(result_dir, "%s/%s/got10k/" % (tracker_name, cfg_name))
13 |     dest_dir = os.path.join(result_dir, "%s/%s/got10k_submit/" % (tracker_name, cfg_name))
14 |     if not os.path.exists(dest_dir):
15 |         os.makedirs(dest_dir)
16 |     items = os.listdir(src_dir)
17 |     for item in items:
18 |         if "all" in item:
19 |             continue
20 |         src_path = os.path.join(src_dir, item)
21 |         if "time" not in item:
22 |             seq_name = item.replace(".txt", '')
23 |             seq_dir = os.path.join(dest_dir, seq_name)
24 |             if not os.path.exists(seq_dir):
25 |                 os.makedirs(seq_dir)
26 |             new_item = item.replace(".txt", '_001.txt')
27 |             dest_path = os.path.join(seq_dir, new_item)
28 |             bbox_arr = np.loadtxt(src_path, dtype=np.int, delimiter='\t')
29 |             np.savetxt(dest_path, bbox_arr, fmt='%d', delimiter=',')
30 |         else:
31 |             seq_name = item.replace("_time.txt", '')
32 |             seq_dir = os.path.join(dest_dir, seq_name)
33 |             if not os.path.exists(seq_dir):
34 |                 os.makedirs(seq_dir)
35 |             dest_path = os.path.join(seq_dir, item)
36 |             os.system("cp %s %s" % (src_path, dest_path))
37 |     # make zip archive
38 |     shutil.make_archive(src_dir, "zip", src_dir)
39 |     shutil.make_archive(dest_dir, "zip", dest_dir)
40 |     # Remove the original files
41 |     shutil.rmtree(src_dir)
42 |     shutil.rmtree(dest_dir)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     parser = argparse.ArgumentParser(description='transform got10k results.')
47 |     parser.add_argument('--tracker_name', type=str, help='Name of tracking method.')
48 |     parser.add_argument('--cfg_name', type=str, help='Name of config file.')
49 | 
50 |     args = parser.parse_args()
51 |     transform_got10k(args.tracker_name, args.cfg_name)
52 | 
53 | 


--------------------------------------------------------------------------------
/lib/test/utils/transform_trackingnet.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import shutil
 4 | import argparse
 5 | import _init_paths
 6 | from lib.test.evaluation.environment import env_settings
 7 | 
 8 | 
 9 | def transform_trackingnet(tracker_name, cfg_name, epoch):
10 |     env = env_settings()
11 |     result_dir = env.results_path
12 |     src_dir = os.path.join(result_dir, "%s/%s/trackingnet_BBOX_%s/" % (tracker_name, cfg_name, epoch))
13 |     dest_dir = os.path.join(result_dir, "%s/%s/trackingnet_submit_%s/" % (tracker_name, cfg_name, epoch))
14 |     if not os.path.exists(dest_dir):
15 |         os.makedirs(dest_dir)
16 |     items = os.listdir(src_dir)
17 |     for item in items:
18 |         if "all" in item:
19 |             continue
20 |         if "time" not in item:
21 |             src_path = os.path.join(src_dir, item)
22 |             dest_path = os.path.join(dest_dir, item)
23 |             bbox_arr = np.loadtxt(src_path, dtype=np.int, delimiter='\t')
24 |             np.savetxt(dest_path, bbox_arr, fmt='%d', delimiter=',')
25 |     # make zip archive
26 |     shutil.make_archive(src_dir, "zip", src_dir)
27 |     shutil.make_archive(dest_dir, "zip", dest_dir)
28 |     # Remove the original files
29 |     shutil.rmtree(src_dir)
30 |     shutil.rmtree(dest_dir)
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     parser = argparse.ArgumentParser(description='transform trackingnet results.')
35 |     parser.add_argument('--tracker_name', type=str, help='Name of tracking method.')
36 |     parser.add_argument('--cfg_name', type=str, help='Name of config file.')
37 |     parser.add_argument('--epoch', type=str, help='Name of config file.')
38 | 
39 |     args = parser.parse_args()
40 |     transform_trackingnet(args.tracker_name, args.cfg_name, args.epoch)
41 | 


--------------------------------------------------------------------------------
/lib/train/__init__.py:
--------------------------------------------------------------------------------
1 | from .admin.multigpu import MultiGPU
2 | 


--------------------------------------------------------------------------------
/lib/train/_init_paths.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import os.path as osp
 6 | import sys
 7 | 
 8 | 
 9 | def add_path(path):
10 |     if path not in sys.path:
11 |         sys.path.insert(0, path)
12 | 
13 | 
14 | this_dir = osp.dirname(__file__)
15 | 
16 | prj_path = osp.join(this_dir, '../..')
17 | add_path(prj_path)
18 | 


--------------------------------------------------------------------------------
/lib/train/actors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_actor import *
2 | from .uvltrack import *
3 | 


--------------------------------------------------------------------------------
/lib/train/actors/base_actor.py:
--------------------------------------------------------------------------------
 1 | from lib.utils import TensorDict
 2 | 
 3 | 
 4 | class BaseActor:
 5 |     """ Base class for actor. The actor class handles the passing of the data through the network
 6 |     and calculation the loss"""
 7 |     def __init__(self, net, objective=None):
 8 |         """
 9 |         args:
10 |             net - The network to train
11 |             objective - The loss function
12 |         """
13 |         self.net = net
14 |         self.objective = objective
15 | 
16 |     def __call__(self, data: TensorDict):
17 |         """ Called in each training iteration. Should pass in input data through the network, calculate the loss, and
18 |         return the training stats for the input data
19 |         args:
20 |             data - A TensorDict containing all the necessary data blocks.
21 | 
22 |         returns:
23 |             loss    - loss for the input data
24 |             stats   - a dict containing detailed losses
25 |         """
26 |         raise NotImplementedError
27 | 
28 |     def to(self, device):
29 |         """ Move the network to device
30 |         args:
31 |             device - device to use. 'cpu' or 'cuda'
32 |         """
33 |         self.net.to(device)
34 | 
35 |     def train(self, mode=True):
36 |         """ Set whether the network is in train mode.
37 |         args:
38 |             mode (True) - Bool specifying whether in training mode.
39 |         """
40 |         self.net.train(mode)
41 | 
42 |     def eval(self):
43 |         """ Set network to eval mode"""
44 |         self.train(False)


--------------------------------------------------------------------------------
/lib/train/admin/__init__.py:
--------------------------------------------------------------------------------
1 | from .environment import env_settings, create_default_local_file_ITP_train
2 | from .stats import AverageMeter, StatValue
3 | from .tensorboard import TensorboardWriter
4 | 


--------------------------------------------------------------------------------
/lib/train/admin/environment.py:
--------------------------------------------------------------------------------
  1 | import importlib
  2 | import os
  3 | from collections import OrderedDict
  4 | 
  5 | 
  6 | def create_default_local_file():
  7 |     path = os.path.join(os.path.dirname(__file__), 'local.py')
  8 | 
  9 |     empty_str = '\'\''
 10 |     default_settings = OrderedDict({
 11 |         'workspace_dir': empty_str,
 12 |         'tensorboard_dir': 'self.workspace_dir + \'/tensorboard/\'',
 13 |         'pretrained_networks': 'self.workspace_dir + \'/pretrained_networks/\'',
 14 |         'lasot_dir': empty_str,
 15 |         'got10k_dir': empty_str,
 16 |         'trackingnet_dir': empty_str,
 17 |         'coco_dir': empty_str,
 18 |         'lvis_dir': empty_str,
 19 |         'sbd_dir': empty_str,
 20 |         'imagenet_dir': empty_str,
 21 |         'imagenetdet_dir': empty_str,
 22 |         'ecssd_dir': empty_str,
 23 |         'hkuis_dir': empty_str,
 24 |         'msra10k_dir': empty_str,
 25 |         'davis_dir': empty_str,
 26 |         'youtubevos_dir': empty_str})
 27 | 
 28 |     comment = {'workspace_dir': 'Base directory for saving network checkpoints.',
 29 |                'tensorboard_dir': 'Directory for tensorboard files.'}
 30 | 
 31 |     with open(path, 'w') as f:
 32 |         f.write('class EnvironmentSettings:\n')
 33 |         f.write('    def __init__(self):\n')
 34 | 
 35 |         for attr, attr_val in default_settings.items():
 36 |             comment_str = None
 37 |             if attr in comment:
 38 |                 comment_str = comment[attr]
 39 |             if comment_str is None:
 40 |                 f.write('        self.{} = {}\n'.format(attr, attr_val))
 41 |             else:
 42 |                 f.write('        self.{} = {}    # {}\n'.format(attr, attr_val, comment_str))
 43 | 
 44 | 
 45 | def create_default_local_file_ITP_train(workspace_dir, data_dir):
 46 |     path = os.path.join(os.path.dirname(__file__), 'local.py')
 47 | 
 48 |     empty_str = '\'\''
 49 |     default_settings = OrderedDict({
 50 |         'workspace_dir': workspace_dir,
 51 |         'tensorboard_dir': os.path.join(workspace_dir, 'tensorboard'),    # Directory for tensorboard files.
 52 |         'pretrained_networks': os.path.join(workspace_dir, 'pretrained_networks'),
 53 |         'lasot_dir': os.path.join(data_dir, 'lasot'),
 54 |         'got10k_dir': os.path.join(data_dir, 'got10k', 'train'),
 55 |         'lasot_lmdb_dir': os.path.join(data_dir, 'lasot_lmdb'),
 56 |         'got10k_lmdb_dir': os.path.join(data_dir, 'got10k_lmdb'),
 57 |         'trackingnet_dir': os.path.join(data_dir, 'trackingnet'),
 58 |         'trackingnet_lmdb_dir': os.path.join(data_dir, 'trackingnet_lmdb'),
 59 |         'coco_dir': os.path.join(data_dir, 'coco'),
 60 |         'coco_lmdb_dir': os.path.join(data_dir, 'coco_lmdb'),
 61 |         'lvis_dir': empty_str,
 62 |         'sbd_dir': empty_str,
 63 |         'imagenet_dir': os.path.join(data_dir, 'vid'),
 64 |         'imagenet_lmdb_dir': os.path.join(data_dir, 'vid_lmdb'),
 65 |         'imagenetdet_dir': empty_str,
 66 |         'ecssd_dir': empty_str,
 67 |         'hkuis_dir': empty_str,
 68 |         'msra10k_dir': empty_str,
 69 |         'davis_dir': empty_str,
 70 |         'youtubevos_dir': empty_str})
 71 | 
 72 |     comment = {'workspace_dir': 'Base directory for saving network checkpoints.',
 73 |                'tensorboard_dir': 'Directory for tensorboard files.'}
 74 | 
 75 |     with open(path, 'w') as f:
 76 |         f.write('class EnvironmentSettings:\n')
 77 |         f.write('    def __init__(self):\n')
 78 | 
 79 |         for attr, attr_val in default_settings.items():
 80 |             comment_str = None
 81 |             if attr in comment:
 82 |                 comment_str = comment[attr]
 83 |             if comment_str is None:
 84 |                 if attr_val == empty_str:
 85 |                     f.write('        self.{} = {}\n'.format(attr, attr_val))
 86 |                 else:
 87 |                     f.write('        self.{} = \'{}\'\n'.format(attr, attr_val))
 88 |             else:
 89 |                 f.write('        self.{} = \'{}\'    # {}\n'.format(attr, attr_val, comment_str))
 90 | 
 91 | 
 92 | def env_settings():
 93 |     env_module_name = 'lib.train.admin.local'
 94 |     try:
 95 |         env_module = importlib.import_module(env_module_name)
 96 |         return env_module.EnvironmentSettings()
 97 |     except:
 98 |         env_file = os.path.join(os.path.dirname(__file__), 'local.py')
 99 | 
100 |         create_default_local_file()
101 |         raise RuntimeError('YOU HAVE NOT SETUP YOUR local.py!!!\n Go to "{}" and set all the paths you need. Then try to run again.'.format(env_file))
102 | 


--------------------------------------------------------------------------------
/lib/train/admin/local.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | prj_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
 4 | class EnvironmentSettings:
 5 |     def __init__(self):
 6 |         self.workspace_dir = prj_dir    # Base directory for saving network checkpoints.
 7 |         self.tensorboard_dir = os.path.join(prj_dir, 'tensorboard')    # Directory for tensorboard files.
 8 |         self.pretrained_networks = os.path.join(prj_dir, 'pretrained_networks')
 9 |         self.lasot_dir = os.path.join(prj_dir, 'data/lasot')
10 |         self.lasotext_dir = os.path.join(prj_dir, 'data/lasotext')
11 |         self.got10k_dir = os.path.join(prj_dir, 'data/got10k')
12 |         self.trackingnet_dir = os.path.join(prj_dir, 'data/trackingnet')
13 |         self.coco_dir = os.path.join(prj_dir, 'data/coco')
14 |         self.tnl2k_dir = os.path.join(prj_dir, 'data/tnl2k/train')
15 |         self.tnl2k_test_dir = os.path.join(prj_dir, 'data/tnl2k/test')
16 |         self.otb99_dir = os.path.join(prj_dir, 'data/otb99')
17 |         self.refcoco_dir = os.path.join(prj_dir, 'data/refcocog')
18 | 


--------------------------------------------------------------------------------
/lib/train/admin/multigpu.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | # Here we use DistributedDataParallel(DDP) rather than DataParallel(DP) for multiple GPUs training
 3 | 
 4 | 
 5 | def is_multi_gpu(net):
 6 |     return isinstance(net, (MultiGPU, nn.parallel.distributed.DistributedDataParallel))
 7 | 
 8 | 
 9 | class MultiGPU(nn.parallel.distributed.DistributedDataParallel):
10 |     def __getattr__(self, item):
11 |         try:
12 |             return super().__getattr__(item)
13 |         except:
14 |             pass
15 |         return getattr(self.module, item)
16 | 


--------------------------------------------------------------------------------
/lib/train/admin/settings.py:
--------------------------------------------------------------------------------
 1 | from lib.train.admin.environment import env_settings
 2 | 
 3 | 
 4 | class Settings:
 5 |     """ Training settings, e.g. the paths to datasets and networks."""
 6 |     def __init__(self):
 7 |         self.set_default()
 8 | 
 9 |     def set_default(self):
10 |         self.env = env_settings()
11 |         self.use_gpu = True
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/lib/train/admin/stats.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | class StatValue:
 4 |     def __init__(self):
 5 |         self.clear()
 6 | 
 7 |     def reset(self):
 8 |         self.val = 0
 9 | 
10 |     def clear(self):
11 |         self.reset()
12 |         self.history = []
13 | 
14 |     def update(self, val):
15 |         self.val = val
16 |         self.history.append(self.val)
17 | 
18 | 
19 | class AverageMeter(object):
20 |     """Computes and stores the average and current value"""
21 |     def __init__(self):
22 |         self.clear()
23 |         self.has_new_data = False
24 | 
25 |     def reset(self):
26 |         self.avg = 0
27 |         self.val = 0
28 |         self.sum = 0
29 |         self.count = 0
30 | 
31 |     def clear(self):
32 |         self.reset()
33 |         self.history = []
34 | 
35 |     def update(self, val, n=1):
36 |         self.val = val
37 |         self.sum += val * n
38 |         self.count += n
39 |         self.avg = self.sum / self.count
40 | 
41 |     def new_epoch(self):
42 |         if self.count > 0:
43 |             self.history.append(self.avg)
44 |             self.reset()
45 |             self.has_new_data = True
46 |         else:
47 |             self.has_new_data = False
48 | 
49 | 
50 | def topk_accuracy(output, target, topk=(1,)):
51 |     """Computes the precision@k for the specified values of k"""
52 |     single_input = not isinstance(topk, (tuple, list))
53 |     if single_input:
54 |         topk = (topk,)
55 | 
56 |     maxk = max(topk)
57 |     batch_size = target.size(0)
58 | 
59 |     _, pred = output.topk(maxk, 1, True, True)
60 |     pred = pred.t()
61 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
62 | 
63 |     res = []
64 |     for k in topk:
65 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)[0]
66 |         res.append(correct_k * 100.0 / batch_size)
67 | 
68 |     if single_input:
69 |         return res[0]
70 | 
71 |     return res
72 | 


--------------------------------------------------------------------------------
/lib/train/admin/tensorboard.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from collections import OrderedDict
 3 | try:
 4 |     from torch.utils.tensorboard import SummaryWriter
 5 | except:
 6 |     print('WARNING: You are using tensorboardX instead sis you have a too old pytorch version.')
 7 |     from tensorboardX import SummaryWriter
 8 | 
 9 | 
10 | class TensorboardWriter:
11 |     def __init__(self, directory, loader_names):
12 |         self.directory = directory
13 |         self.writer = OrderedDict({name: SummaryWriter(os.path.join(self.directory, name)) for name in loader_names})
14 | 
15 |     def write_info(self, script_name, description):
16 |         tb_info_writer = SummaryWriter(os.path.join(self.directory, 'info'))
17 |         tb_info_writer.add_text('Script_name', script_name)
18 |         tb_info_writer.add_text('Description', description)
19 |         tb_info_writer.close()
20 | 
21 |     def write_epoch(self, stats: OrderedDict, epoch: int, ind=-1):
22 |         for loader_name, loader_stats in stats.items():
23 |             if loader_stats is None:
24 |                 continue
25 |             for var_name, val in loader_stats.items():
26 |                 if hasattr(val, 'history') and getattr(val, 'has_new_data', True):
27 |                     self.writer[loader_name].add_scalar(var_name, val.history[ind], epoch)


--------------------------------------------------------------------------------
/lib/train/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .loader import LTRLoader
2 | from .image_loader import jpeg4py_loader, opencv_loader, jpeg4py_loader_w_failsafe, default_image_loader
3 | 


--------------------------------------------------------------------------------
/lib/train/data/bounding_box_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def rect_to_rel(bb, sz_norm=None):
 5 |     """Convert standard rectangular parametrization of the bounding box [x, y, w, h]
 6 |     to relative parametrization [cx/sw, cy/sh, log(w), log(h)], where [cx, cy] is the center coordinate.
 7 |     args:
 8 |         bb  -  N x 4 tensor of boxes.
 9 |         sz_norm  -  [N] x 2 tensor of value of [sw, sh] (optional). sw=w and sh=h if not given.
10 |     """
11 | 
12 |     c = bb[...,:2] + 0.5 * bb[...,2:]
13 |     if sz_norm is None:
14 |         c_rel = c / bb[...,2:]
15 |     else:
16 |         c_rel = c / sz_norm
17 |     sz_rel = torch.log(bb[...,2:])
18 |     return torch.cat((c_rel, sz_rel), dim=-1)
19 | 
20 | 
21 | def rel_to_rect(bb, sz_norm=None):
22 |     """Inverts the effect of rect_to_rel. See above."""
23 | 
24 |     sz = torch.exp(bb[...,2:])
25 |     if sz_norm is None:
26 |         c = bb[...,:2] * sz
27 |     else:
28 |         c = bb[...,:2] * sz_norm
29 |     tl = c - 0.5 * sz
30 |     return torch.cat((tl, sz), dim=-1)
31 | 
32 | 
33 | def masks_to_bboxes(mask, fmt='c'):
34 | 
35 |     """ Convert a mask tensor to one or more bounding boxes.
36 |     Note: This function is a bit new, make sure it does what it says.  /Andreas
37 |     :param mask: Tensor of masks, shape = (..., H, W)
38 |     :param fmt: bbox layout. 'c' => "center + size" or (x_center, y_center, width, height)
39 |                              't' => "top left + size" or (x_left, y_top, width, height)
40 |                              'v' => "vertices" or (x_left, y_top, x_right, y_bottom)
41 |     :return: tensor containing a batch of bounding boxes, shape = (..., 4)
42 |     """
43 |     batch_shape = mask.shape[:-2]
44 |     mask = mask.reshape((-1, *mask.shape[-2:]))
45 |     bboxes = []
46 | 
47 |     for m in mask:
48 |         mx = m.sum(dim=-2).nonzero()
49 |         my = m.sum(dim=-1).nonzero()
50 |         bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0]
51 |         bboxes.append(bb)
52 | 
53 |     bboxes = torch.tensor(bboxes, dtype=torch.float32, device=mask.device)
54 |     bboxes = bboxes.reshape(batch_shape + (4,))
55 | 
56 |     if fmt == 'v':
57 |         return bboxes
58 | 
59 |     x1 = bboxes[..., :2]
60 |     s = bboxes[..., 2:] - x1 + 1
61 | 
62 |     if fmt == 'c':
63 |         return torch.cat((x1 + 0.5 * s, s), dim=-1)
64 |     elif fmt == 't':
65 |         return torch.cat((x1, s), dim=-1)
66 | 
67 |     raise ValueError("Undefined bounding box layout '%s'" % fmt)
68 | 
69 | 
70 | def masks_to_bboxes_multi(mask, ids, fmt='c'):
71 |     assert mask.dim() == 2
72 |     bboxes = []
73 | 
74 |     for id in ids:
75 |         mx = (mask == id).sum(dim=-2).nonzero()
76 |         my = (mask == id).float().sum(dim=-1).nonzero()
77 |         bb = [mx.min(), my.min(), mx.max(), my.max()] if (len(mx) > 0 and len(my) > 0) else [0, 0, 0, 0]
78 | 
79 |         bb = torch.tensor(bb, dtype=torch.float32, device=mask.device)
80 | 
81 |         x1 = bb[:2]
82 |         s = bb[2:] - x1 + 1
83 | 
84 |         if fmt == 'v':
85 |             pass
86 |         elif fmt == 'c':
87 |             bb = torch.cat((x1 + 0.5 * s, s), dim=-1)
88 |         elif fmt == 't':
89 |             bb = torch.cat((x1, s), dim=-1)
90 |         else:
91 |             raise ValueError("Undefined bounding box layout '%s'" % fmt)
92 |         bboxes.append(bb)
93 | 
94 |     return bboxes
95 | 


--------------------------------------------------------------------------------
/lib/train/data/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/train/data/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/lib/train/data/image_loader.py:
--------------------------------------------------------------------------------
  1 | # import jpeg4py
  2 | import cv2 as cv
  3 | from PIL import Image
  4 | import numpy as np
  5 | 
  6 | davis_palette = np.repeat(np.expand_dims(np.arange(0,256), 1), 3, 1).astype(np.uint8)
  7 | davis_palette[:22, :] = [[0, 0, 0], [128, 0, 0], [0, 128, 0], [128, 128, 0],
  8 |                          [0, 0, 128], [128, 0, 128], [0, 128, 128], [128, 128, 128],
  9 |                          [64, 0, 0], [191, 0, 0], [64, 128, 0], [191, 128, 0],
 10 |                          [64, 0, 128], [191, 0, 128], [64, 128, 128], [191, 128, 128],
 11 |                          [0, 64, 0], [128, 64, 0], [0, 191, 0], [128, 191, 0],
 12 |                          [0, 64, 128], [128, 64, 128]]
 13 | 
 14 | 
 15 | def default_image_loader(path):
 16 |     """The default image loader, reads the image from the given path. It first tries to use the jpeg4py_loader,
 17 |     but reverts to the opencv_loader if the former is not available."""
 18 |     # if default_image_loader.use_jpeg4py is None:
 19 |     #     # Try using jpeg4py
 20 |     #     im = jpeg4py_loader(path)
 21 |     #     if im is None:
 22 |     #         default_image_loader.use_jpeg4py = False
 23 |     #         print('Using opencv_loader instead.')
 24 |     #     else:
 25 |     #         default_image_loader.use_jpeg4py = True
 26 |     #         return im
 27 |     # if default_image_loader.use_jpeg4py:
 28 |     #     return jpeg4py_loader(path)
 29 |     default_image_loader.use_jpeg4py = False
 30 |     return opencv_loader(path)
 31 | 
 32 | default_image_loader.use_jpeg4py = None
 33 | 
 34 | 
 35 | def jpeg4py_loader(path):
 36 |     """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py"""
 37 |     try:
 38 |         return jpeg4py.JPEG(path).decode()
 39 |     except Exception as e:
 40 |         print('ERROR: Could not read image "{}"'.format(path))
 41 |         print(e)
 42 |         return None
 43 | 
 44 | 
 45 | def opencv_loader(path):
 46 |     """ Read image using opencv's imread function and returns it in rgb format"""
 47 |     try:
 48 |         im = cv.imread(path, cv.IMREAD_COLOR)
 49 | 
 50 |         # convert to rgb and return
 51 |         return cv.cvtColor(im, cv.COLOR_BGR2RGB)
 52 |     except Exception as e:
 53 |         print('ERROR: Could not read image "{}"'.format(path))
 54 |         print(e)
 55 |         return None
 56 | 
 57 | 
 58 | def jpeg4py_loader_w_failsafe(path):
 59 |     """ Image reading using jpeg4py https://github.com/ajkxyz/jpeg4py"""
 60 |     try:
 61 |         return jpeg4py.JPEG(path).decode()
 62 |     except:
 63 |         try:
 64 |             im = cv.imread(path, cv.IMREAD_COLOR)
 65 | 
 66 |             # convert to rgb and return
 67 |             return cv.cvtColor(im, cv.COLOR_BGR2RGB)
 68 |         except Exception as e:
 69 |             print('ERROR: Could not read image "{}"'.format(path))
 70 |             print(e)
 71 |             return None
 72 | 
 73 | 
 74 | def opencv_seg_loader(path):
 75 |     """ Read segmentation annotation using opencv's imread function"""
 76 |     try:
 77 |         return cv.imread(path)
 78 |     except Exception as e:
 79 |         print('ERROR: Could not read image "{}"'.format(path))
 80 |         print(e)
 81 |         return None
 82 | 
 83 | 
 84 | def imread_indexed(filename):
 85 |     """ Load indexed image with given filename. Used to read segmentation annotations."""
 86 | 
 87 |     im = Image.open(filename)
 88 | 
 89 |     annotation = np.atleast_3d(im)[...,0]
 90 |     return annotation
 91 | 
 92 | 
 93 | 
 94 | def imwrite_indexed(filename, array, color_palette=None):
 95 |     """ Save indexed image as png. Used to save segmentation annotation."""
 96 | 
 97 |     if color_palette is None:
 98 |         color_palette = davis_palette
 99 | 
100 |     if np.atleast_3d(array).shape[2] != 1:
101 |         raise Exception("Saving indexed PNGs requires 2D array.")
102 | 
103 |     im = Image.fromarray(array)
104 |     im.putpalette(color_palette.ravel())
105 |     im.save(filename, format='PNG')


--------------------------------------------------------------------------------
/lib/train/data/utils.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import html
  3 | import os
  4 | from functools import lru_cache
  5 | 
  6 | import ftfy
  7 | import regex as re
  8 | 
  9 | 
 10 | @lru_cache()
 11 | def default_bpe():
 12 |     return os.path.join(os.path.dirname(os.path.abspath(__file__)), "bpe_simple_vocab_16e6.txt.gz")
 13 | 
 14 | 
 15 | @lru_cache()
 16 | def bytes_to_unicode():
 17 |     """
 18 |     Returns list of utf-8 byte and a corresponding list of unicode strings.
 19 |     The reversible bpe codes work on unicode strings.
 20 |     This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
 21 |     When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
 22 |     This is a signficant percentage of your normal, say, 32K bpe vocab.
 23 |     To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
 24 |     And avoids mapping to whitespace/control characters the bpe code barfs on.
 25 |     """
 26 |     bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
 27 |     cs = bs[:]
 28 |     n = 0
 29 |     for b in range(2**8):
 30 |         if b not in bs:
 31 |             bs.append(b)
 32 |             cs.append(2**8+n)
 33 |             n += 1
 34 |     cs = [chr(n) for n in cs]
 35 |     return dict(zip(bs, cs))
 36 | 
 37 | 
 38 | def get_pairs(word):
 39 |     """Return set of symbol pairs in a word.
 40 |     Word is represented as tuple of symbols (symbols being variable-length strings).
 41 |     """
 42 |     pairs = set()
 43 |     prev_char = word[0]
 44 |     for char in word[1:]:
 45 |         pairs.add((prev_char, char))
 46 |         prev_char = char
 47 |     return pairs
 48 | 
 49 | 
 50 | def basic_clean(text):
 51 |     text = ftfy.fix_text(text)
 52 |     text = html.unescape(html.unescape(text))
 53 |     return text.strip()
 54 | 
 55 | 
 56 | def whitespace_clean(text):
 57 |     text = re.sub(r'\s+', ' ', text)
 58 |     text = text.strip()
 59 |     return text
 60 | 
 61 | 
 62 | class SimpleTokenizer(object):
 63 |     def __init__(self, bpe_path: str = default_bpe()):
 64 |         self.byte_encoder = bytes_to_unicode()
 65 |         self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
 66 |         merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
 67 |         merges = merges[1:49152-256-2+1]
 68 |         merges = [tuple(merge.split()) for merge in merges]
 69 |         vocab = list(bytes_to_unicode().values())
 70 |         vocab = vocab + [v+'</w>' for v in vocab]
 71 |         for merge in merges:
 72 |             vocab.append(''.join(merge))
 73 |         vocab.extend(['<|startoftext|>', '<|endoftext|>'])
 74 |         self.encoder = dict(zip(vocab, range(len(vocab))))
 75 |         self.decoder = {v: k for k, v in self.encoder.items()}
 76 |         self.bpe_ranks = dict(zip(merges, range(len(merges))))
 77 |         self.cache = {'<|startoftext|>': '<|startoftext|>', '<|endoftext|>': '<|endoftext|>'}
 78 |         self.pat = re.compile(r"""<\|startoftext\|>|<\|endoftext\|>|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""", re.IGNORECASE)
 79 | 
 80 |     def bpe(self, token):
 81 |         if token in self.cache:
 82 |             return self.cache[token]
 83 |         word = tuple(token[:-1]) + ( token[-1] + '</w>',)
 84 |         pairs = get_pairs(word)
 85 | 
 86 |         if not pairs:
 87 |             return token+'</w>'
 88 | 
 89 |         while True:
 90 |             bigram = min(pairs, key = lambda pair: self.bpe_ranks.get(pair, float('inf')))
 91 |             if bigram not in self.bpe_ranks:
 92 |                 break
 93 |             first, second = bigram
 94 |             new_word = []
 95 |             i = 0
 96 |             while i < len(word):
 97 |                 try:
 98 |                     j = word.index(first, i)
 99 |                     new_word.extend(word[i:j])
100 |                     i = j
101 |                 except:
102 |                     new_word.extend(word[i:])
103 |                     break
104 | 
105 |                 if word[i] == first and i < len(word)-1 and word[i+1] == second:
106 |                     new_word.append(first+second)
107 |                     i += 2
108 |                 else:
109 |                     new_word.append(word[i])
110 |                     i += 1
111 |             new_word = tuple(new_word)
112 |             word = new_word
113 |             if len(word) == 1:
114 |                 break
115 |             else:
116 |                 pairs = get_pairs(word)
117 |         word = ' '.join(word)
118 |         self.cache[token] = word
119 |         return word
120 | 
121 |     def encode(self, text):
122 |         bpe_tokens = []
123 |         text = whitespace_clean(basic_clean(text)).lower()
124 |         for token in re.findall(self.pat, text):
125 |             token = ''.join(self.byte_encoder[b] for b in token.encode('utf-8'))
126 |             bpe_tokens.extend(self.encoder[bpe_token] for bpe_token in self.bpe(token).split(' '))
127 |         return bpe_tokens
128 | 
129 |     def decode(self, tokens):
130 |         text = ''.join([self.decoder[token] for token in tokens])
131 |         text = bytearray([self.byte_decoder[c] for c in text]).decode('utf-8', errors="replace").replace('</w>', ' ')
132 |         return text
133 | 


--------------------------------------------------------------------------------
/lib/train/data_specs/README.md:
--------------------------------------------------------------------------------
 1 | # README
 2 | 
 3 | ## Description for different text files
 4 | GOT10K
 5 | - got10k_train_full_split.txt: the complete GOT-10K training set. (9335 videos)
 6 | - got10k_train_split.txt: part of videos from the GOT-10K training set
 7 | - got10k_val_split.txt: another part of videos from the GOT-10K training set
 8 | - got10k_vot_exclude.txt: 1k videos that are forbidden from "using to train models then testing on VOT" (as required by [VOT Challenge](https://www.votchallenge.net/vot2020/participation.html))
 9 | - got10k_vot_train_split.txt: part of videos from the "VOT-permitted" GOT-10K training set
10 | - got10k_vot_val_split.txt: another part of videos from the "VOT-permitted" GOT-10K training set
11 | 
12 | LaSOT
13 | - lasot_train_split.txt: the complete LaSOT training set
14 | 
15 | TrackingNnet
16 | - trackingnet_classmap.txt: The map from the sequence name to the target class for the TrackingNet


--------------------------------------------------------------------------------
/lib/train/data_specs/lasot_test_split.txt:
--------------------------------------------------------------------------------
  1 | airplane-1
  2 | airplane-9
  3 | airplane-13
  4 | airplane-15
  5 | basketball-1
  6 | basketball-6
  7 | basketball-7
  8 | basketball-11
  9 | bear-2
 10 | bear-4
 11 | bear-6
 12 | bear-17
 13 | bicycle-2
 14 | bicycle-7
 15 | bicycle-9
 16 | bicycle-18
 17 | bird-2
 18 | bird-3
 19 | bird-15
 20 | bird-17
 21 | boat-3
 22 | boat-4
 23 | boat-12
 24 | boat-17
 25 | book-3
 26 | book-10
 27 | book-11
 28 | book-19
 29 | bottle-1
 30 | bottle-12
 31 | bottle-14
 32 | bottle-18
 33 | bus-2
 34 | bus-5
 35 | bus-17
 36 | bus-19
 37 | car-2
 38 | car-6
 39 | car-9
 40 | car-17
 41 | cat-1
 42 | cat-3
 43 | cat-18
 44 | cat-20
 45 | cattle-2
 46 | cattle-7
 47 | cattle-12
 48 | cattle-13
 49 | spider-14
 50 | spider-16
 51 | spider-18
 52 | spider-20
 53 | coin-3
 54 | coin-6
 55 | coin-7
 56 | coin-18
 57 | crab-3
 58 | crab-6
 59 | crab-12
 60 | crab-18
 61 | surfboard-12
 62 | surfboard-4
 63 | surfboard-5
 64 | surfboard-8
 65 | cup-1
 66 | cup-4
 67 | cup-7
 68 | cup-17
 69 | deer-4
 70 | deer-8
 71 | deer-10
 72 | deer-14
 73 | dog-1
 74 | dog-7
 75 | dog-15
 76 | dog-19
 77 | guitar-3
 78 | guitar-8
 79 | guitar-10
 80 | guitar-16
 81 | person-1
 82 | person-5
 83 | person-10
 84 | person-12
 85 | pig-2
 86 | pig-10
 87 | pig-13
 88 | pig-18
 89 | rubicCube-1
 90 | rubicCube-6
 91 | rubicCube-14
 92 | rubicCube-19
 93 | swing-10
 94 | swing-14
 95 | swing-17
 96 | swing-20
 97 | drone-13
 98 | drone-15
 99 | drone-2
100 | drone-7
101 | pool-12
102 | pool-15
103 | pool-3
104 | pool-7
105 | rabbit-10
106 | rabbit-13
107 | rabbit-17
108 | rabbit-19
109 | racing-10
110 | racing-15
111 | racing-16
112 | racing-20
113 | robot-1
114 | robot-19
115 | robot-5
116 | robot-8
117 | sepia-13
118 | sepia-16
119 | sepia-6
120 | sepia-8
121 | sheep-3
122 | sheep-5
123 | sheep-7
124 | sheep-9
125 | skateboard-16
126 | skateboard-19
127 | skateboard-3
128 | skateboard-8
129 | tank-14
130 | tank-16
131 | tank-6
132 | tank-9
133 | tiger-12
134 | tiger-18
135 | tiger-4
136 | tiger-6
137 | train-1
138 | train-11
139 | train-20
140 | train-7
141 | truck-16
142 | truck-3
143 | truck-6
144 | truck-7
145 | turtle-16
146 | turtle-5
147 | turtle-8
148 | turtle-9
149 | umbrella-17
150 | umbrella-19
151 | umbrella-2
152 | umbrella-9
153 | yoyo-15
154 | yoyo-17
155 | yoyo-19
156 | yoyo-7
157 | zebra-10
158 | zebra-14
159 | zebra-16
160 | zebra-17
161 | elephant-1
162 | elephant-12
163 | elephant-16
164 | elephant-18
165 | goldfish-3
166 | goldfish-7
167 | goldfish-8
168 | goldfish-10
169 | hat-1
170 | hat-2
171 | hat-5
172 | hat-18
173 | kite-4
174 | kite-6
175 | kite-10
176 | kite-15
177 | motorcycle-1
178 | motorcycle-3
179 | motorcycle-9
180 | motorcycle-18
181 | mouse-1
182 | mouse-8
183 | mouse-9
184 | mouse-17
185 | flag-3
186 | flag-9
187 | flag-5
188 | flag-2
189 | frog-3
190 | frog-4
191 | frog-20
192 | frog-9
193 | gametarget-1
194 | gametarget-2
195 | gametarget-7
196 | gametarget-13
197 | hand-2
198 | hand-3
199 | hand-9
200 | hand-16
201 | helmet-5
202 | helmet-11
203 | helmet-19
204 | helmet-13
205 | licenseplate-6
206 | licenseplate-12
207 | licenseplate-13
208 | licenseplate-15
209 | electricfan-1
210 | electricfan-10
211 | electricfan-18
212 | electricfan-20
213 | chameleon-3
214 | chameleon-6
215 | chameleon-11
216 | chameleon-20
217 | crocodile-3
218 | crocodile-4
219 | crocodile-10
220 | crocodile-14
221 | gecko-1
222 | gecko-5
223 | gecko-16
224 | gecko-19
225 | fox-2
226 | fox-3
227 | fox-5
228 | fox-20
229 | giraffe-2
230 | giraffe-10
231 | giraffe-13
232 | giraffe-15
233 | gorilla-4
234 | gorilla-6
235 | gorilla-9
236 | gorilla-13
237 | hippo-1
238 | hippo-7
239 | hippo-9
240 | hippo-20
241 | horse-1
242 | horse-4
243 | horse-12
244 | horse-15
245 | kangaroo-2
246 | kangaroo-5
247 | kangaroo-11
248 | kangaroo-14
249 | leopard-1
250 | leopard-7
251 | leopard-16
252 | leopard-20
253 | lion-1
254 | lion-5
255 | lion-12
256 | lion-20
257 | lizard-1
258 | lizard-3
259 | lizard-6
260 | lizard-13
261 | microphone-2
262 | microphone-6
263 | microphone-14
264 | microphone-16
265 | monkey-3
266 | monkey-4
267 | monkey-9
268 | monkey-17
269 | shark-2
270 | shark-3
271 | shark-5
272 | shark-6
273 | squirrel-8
274 | squirrel-11
275 | squirrel-13
276 | squirrel-19
277 | volleyball-1
278 | volleyball-13
279 | volleyball-18
280 | volleyball-19
281 | 


--------------------------------------------------------------------------------
/lib/train/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | from .lasot import Lasot
 2 | from .got10k import Got10k
 3 | from .tracking_net import TrackingNet
 4 | from .imagenetvid import ImagenetVID
 5 | from .coco import MSCOCO
 6 | from .coco_seq import MSCOCOSeq
 7 | from .got10k_lmdb import Got10k_lmdb
 8 | from .lasot_lmdb import Lasot_lmdb
 9 | from .imagenetvid_lmdb import ImagenetVID_lmdb
10 | from .coco_seq_lmdb import MSCOCOSeq_lmdb
11 | from .tracking_net_lmdb import TrackingNet_lmdb
12 | from .tnl2k import TNL2K
13 | from .tnl2k_test import TNL2Ktest
14 | from .visualgenome import VisualGenome
15 | from .otb99 import OTB99
16 | from .object365 import Object365
17 | from .refcoco_seq import RefCOCOSeq
18 | from .lasotext import Lasotext
19 | from .lasot_test import Lasot_test
20 | from .webuav import WebUAV


--------------------------------------------------------------------------------
/lib/train/dataset/base_image_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch.utils.data
 2 | from lib.train.data.image_loader import jpeg4py_loader
 3 | 
 4 | 
 5 | class BaseImageDataset(torch.utils.data.Dataset):
 6 |     """ Base class for image datasets """
 7 | 
 8 |     def __init__(self, name, root, image_loader=jpeg4py_loader):
 9 |         """
10 |         args:
11 |             root - The root path to the dataset
12 |             image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
13 |                                             is used by default.
14 |         """
15 |         self.name = name
16 |         self.root = root
17 |         self.image_loader = image_loader
18 | 
19 |         self.image_list = []     # Contains the list of sequences.
20 |         self.class_list = []
21 | 
22 |     def __len__(self):
23 |         """ Returns size of the dataset
24 |         returns:
25 |             int - number of samples in the dataset
26 |         """
27 |         return self.get_num_images()
28 | 
29 |     def __getitem__(self, index):
30 |         """ Not to be used! Check get_frames() instead.
31 |         """
32 |         return None
33 | 
34 |     def get_name(self):
35 |         """ Name of the dataset
36 | 
37 |         returns:
38 |             string - Name of the dataset
39 |         """
40 |         raise NotImplementedError
41 | 
42 |     def get_num_images(self):
43 |         """ Number of sequences in a dataset
44 | 
45 |         returns:
46 |             int - number of sequences in the dataset."""
47 |         return len(self.image_list)
48 | 
49 |     def has_class_info(self):
50 |         return False
51 | 
52 |     def get_class_name(self, image_id):
53 |         return None
54 | 
55 |     def get_num_classes(self):
56 |         return len(self.class_list)
57 | 
58 |     def get_class_list(self):
59 |         return self.class_list
60 | 
61 |     def get_images_in_class(self, class_name):
62 |         raise NotImplementedError
63 | 
64 |     def has_segmentation_info(self):
65 |         return False
66 | 
67 |     def get_image_info(self, seq_id):
68 |         """ Returns information about a particular image,
69 | 
70 |         args:
71 |             seq_id - index of the image
72 | 
73 |         returns:
74 |             Dict
75 |             """
76 |         raise NotImplementedError
77 | 
78 |     def get_image(self, image_id, anno=None):
79 |         """ Get a image
80 | 
81 |         args:
82 |             image_id      - index of image
83 |             anno(None)  - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
84 | 
85 |         returns:
86 |             image -
87 |             anno -
88 |             dict - A dict containing meta information about the sequence, e.g. class of the target object.
89 | 
90 |         """
91 |         raise NotImplementedError
92 | 
93 | 


--------------------------------------------------------------------------------
/lib/train/dataset/base_video_dataset.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data
  2 | # 2021.1.5 use jpeg4py_loader_w_failsafe as default
  3 | from lib.train.data.image_loader import jpeg4py_loader_w_failsafe
  4 | 
  5 | 
  6 | class BaseVideoDataset(torch.utils.data.Dataset):
  7 |     """ Base class for video datasets """
  8 | 
  9 |     def __init__(self, name, root, image_loader=jpeg4py_loader_w_failsafe):
 10 |         """
 11 |         args:
 12 |             root - The root path to the dataset
 13 |             image_loader (jpeg4py_loader) -  The function to read the images. jpeg4py (https://github.com/ajkxyz/jpeg4py)
 14 |                                             is used by default.
 15 |         """
 16 |         self.name = name
 17 |         self.root = root
 18 |         self.image_loader = image_loader
 19 | 
 20 |         self.sequence_list = []     # Contains the list of sequences.
 21 |         self.class_list = []
 22 | 
 23 |     def __len__(self):
 24 |         """ Returns size of the dataset
 25 |         returns:
 26 |             int - number of samples in the dataset
 27 |         """
 28 |         return self.get_num_sequences()
 29 | 
 30 |     def __getitem__(self, index):
 31 |         """ Not to be used! Check get_frames() instead.
 32 |         """
 33 |         return None
 34 | 
 35 |     def is_video_sequence(self):
 36 |         """ Returns whether the dataset is a video dataset or an image dataset
 37 | 
 38 |         returns:
 39 |             bool - True if a video dataset
 40 |         """
 41 |         return True
 42 | 
 43 |     def is_synthetic_video_dataset(self):
 44 |         """ Returns whether the dataset contains real videos or synthetic
 45 | 
 46 |         returns:
 47 |             bool - True if a video dataset
 48 |         """
 49 |         return False
 50 | 
 51 |     def get_name(self):
 52 |         """ Name of the dataset
 53 | 
 54 |         returns:
 55 |             string - Name of the dataset
 56 |         """
 57 |         raise NotImplementedError
 58 | 
 59 |     def get_num_sequences(self):
 60 |         """ Number of sequences in a dataset
 61 | 
 62 |         returns:
 63 |             int - number of sequences in the dataset."""
 64 |         return len(self.sequence_list)
 65 | 
 66 |     def has_class_info(self):
 67 |         return False
 68 | 
 69 |     def has_occlusion_info(self):
 70 |         return False
 71 | 
 72 |     def get_num_classes(self):
 73 |         return len(self.class_list)
 74 | 
 75 |     def get_class_list(self):
 76 |         return self.class_list
 77 | 
 78 |     def get_sequences_in_class(self, class_name):
 79 |         raise NotImplementedError
 80 | 
 81 |     def has_segmentation_info(self):
 82 |         return False
 83 | 
 84 |     def get_sequence_info(self, seq_id):
 85 |         """ Returns information about a particular sequences,
 86 | 
 87 |         args:
 88 |             seq_id - index of the sequence
 89 | 
 90 |         returns:
 91 |             Dict
 92 |             """
 93 |         raise NotImplementedError
 94 | 
 95 |     def get_frames(self, seq_id, frame_ids, anno=None):
 96 |         """ Get a set of frames from a particular sequence
 97 | 
 98 |         args:
 99 |             seq_id      - index of sequence
100 |             frame_ids   - a list of frame numbers
101 |             anno(None)  - The annotation for the sequence (see get_sequence_info). If None, they will be loaded.
102 | 
103 |         returns:
104 |             list - List of frames corresponding to frame_ids
105 |             list - List of dicts for each frame
106 |             dict - A dict containing meta information about the sequence, e.g. class of the target object.
107 | 
108 |         """
109 |         raise NotImplementedError
110 | 
111 | 


--------------------------------------------------------------------------------
/lib/train/dataset/imagenetvid_lmdb.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .base_video_dataset import BaseVideoDataset
 3 | from lib.train.data import jpeg4py_loader
 4 | import torch
 5 | from collections import OrderedDict
 6 | from lib.train.admin import env_settings
 7 | from lib.utils.lmdb_utils import decode_img, decode_json
 8 | 
 9 | 
10 | def get_target_to_image_ratio(seq):
11 |     anno = torch.Tensor(seq['anno'])
12 |     img_sz = torch.Tensor(seq['image_size'])
13 |     return (anno[0, 2:4].prod() / (img_sz.prod())).sqrt()
14 | 
15 | 
16 | class ImagenetVID_lmdb(BaseVideoDataset):
17 |     """ Imagenet VID dataset.
18 | 
19 |     Publication:
20 |         ImageNet Large Scale Visual Recognition Challenge
21 |         Olga Russakovsky, Jia Deng, Hao Su, Jonathan Krause, Sanjeev Satheesh, Sean Ma, Zhiheng Huang, Andrej Karpathy,
22 |         Aditya Khosla, Michael Bernstein, Alexander C. Berg and Li Fei-Fei
23 |         IJCV, 2015
24 |         https://arxiv.org/pdf/1409.0575.pdf
25 | 
26 |     Download the dataset from http://image-net.org/
27 |     """
28 |     def __init__(self, root=None, image_loader=jpeg4py_loader, min_length=0, max_target_area=1):
29 |         """
30 |         args:
31 |             root - path to the imagenet vid dataset.
32 |             image_loader (default_image_loader) -  The function to read the images. If installed,
33 |                                                    jpeg4py (https://github.com/ajkxyz/jpeg4py) is used by default. Else,
34 |                                                    opencv's imread is used.
35 |             min_length - Minimum allowed sequence length.
36 |             max_target_area - max allowed ratio between target area and image area. Can be used to filter out targets
37 |                                 which cover complete image.
38 |         """
39 |         root = env_settings().imagenet_dir if root is None else root
40 |         super().__init__("imagenetvid_lmdb", root, image_loader)
41 | 
42 |         sequence_list_dict = decode_json(root, "cache.json")
43 |         self.sequence_list = sequence_list_dict
44 | 
45 |         # Filter the sequences based on min_length and max_target_area in the first frame
46 |         self.sequence_list = [x for x in self.sequence_list if len(x['anno']) >= min_length and
47 |                               get_target_to_image_ratio(x) < max_target_area]
48 | 
49 |     def get_name(self):
50 |         return 'imagenetvid_lmdb'
51 | 
52 |     def get_num_sequences(self):
53 |         return len(self.sequence_list)
54 | 
55 |     def get_sequence_info(self, seq_id):
56 |         bb_anno = torch.Tensor(self.sequence_list[seq_id]['anno'])
57 |         valid = (bb_anno[:, 2] > 0) & (bb_anno[:, 3] > 0)
58 |         visible = torch.ByteTensor(self.sequence_list[seq_id]['target_visible']) & valid.byte()
59 |         return {'bbox': bb_anno, 'valid': valid, 'visible': visible}
60 | 
61 |     def _get_frame(self, sequence, frame_id):
62 |         set_name = 'ILSVRC2015_VID_train_{:04d}'.format(sequence['set_id'])
63 |         vid_name = 'ILSVRC2015_train_{:08d}'.format(sequence['vid_id'])
64 |         frame_number = frame_id + sequence['start_frame']
65 |         frame_path = os.path.join('Data', 'VID', 'train', set_name, vid_name,
66 |                                   '{:06d}.JPEG'.format(frame_number))
67 |         return decode_img(self.root, frame_path)
68 | 
69 |     def get_frames(self, seq_id, frame_ids, anno=None):
70 |         sequence = self.sequence_list[seq_id]
71 | 
72 |         frame_list = [self._get_frame(sequence, f) for f in frame_ids]
73 | 
74 |         if anno is None:
75 |             anno = self.get_sequence_info(seq_id)
76 | 
77 |         # Create anno dict
78 |         anno_frames = {}
79 |         for key, value in anno.items():
80 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
81 | 
82 |         # added the class info to the meta info
83 |         object_meta = OrderedDict({'object_class': sequence['class_name'],
84 |                                    'motion_class': None,
85 |                                    'major_class': None,
86 |                                    'root_class': None,
87 |                                    'motion_adverb': None})
88 | 
89 |         return frame_list, anno_frames, object_meta
90 | 
91 | 


--------------------------------------------------------------------------------
/lib/train/dataset/object365.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from .base_video_dataset import BaseVideoDataset
  3 | from lib.train.data import jpeg4py_loader
  4 | import json
  5 | import torch
  6 | import random
  7 | from pycocotools.coco import COCO
  8 | from collections import OrderedDict
  9 | from lib.train.admin import env_settings
 10 | from .utils import generate_sentence
 11 | 
 12 | class Object365(BaseVideoDataset):
 13 |     def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
 14 |         super().__init__('Object365', root, image_loader)
 15 | 
 16 |         self.img_pth = os.path.join(root, 'imgs/')
 17 |         self.anno_path = os.path.join(root, 'zhiyuan_objv2_train.json')
 18 |         self.sequence_list = self._get_sequence_list()
 19 |         self.id2class = {}
 20 |         for cat in self.region_descriptions['categories']:
 21 |             self.id2class[cat['id']] = cat['name']
 22 | 
 23 |     def _get_sequence_list(self):
 24 |         with open(self.anno_path, 'r') as f:
 25 |             self.region_descriptions = json.load(f)
 26 |         seq_list = list(range(len(self.region_descriptions['annotations'])))
 27 |         return seq_list
 28 | 
 29 |     def is_video_sequence(self):
 30 |         return False
 31 | 
 32 |     def is_grounding_sequence(self):
 33 |         return False
 34 | 
 35 |     def get_name(self):
 36 |         return 'object365'
 37 | 
 38 |     def has_class_info(self):
 39 |         return True
 40 | 
 41 |     def has_segmentation_info(self):
 42 |         return True
 43 | 
 44 |     def get_num_sequences(self):
 45 |         return len(self.sequence_list)
 46 | 
 47 |     def get_sequence_info(self, seq_id):
 48 |         anno = self._get_anno(seq_id)
 49 |         bbox = torch.Tensor(anno['bbox']).view(1, 4)
 50 |         valid = torch.Tensor([True])
 51 |         visible = torch.Tensor([True])
 52 | 
 53 |         return {'bbox': bbox, 'valid': valid, 'visible': visible}
 54 | 
 55 |     def _get_anno(self, seq_id):
 56 |         desc = self.region_descriptions['annotations'][seq_id]
 57 |         anno = {
 58 |             'bbox': desc['bbox']
 59 |         }
 60 |         return anno
 61 | 
 62 |     def _get_frames(self, seq_id):
 63 |         desc = self.region_descriptions['annotations'][seq_id]
 64 |         img_path = os.path.join(self.img_pth, "objects365_v1_%08d.jpg"%(desc['image_id']))
 65 |         if os.path.exists(img_path):
 66 |             img = self.image_loader(img_path)
 67 |         else:
 68 |             img = self.image_loader(os.path.join(self.img_pth, "objects365_v2_%08d.jpg"%(desc['image_id'])))
 69 |         return img
 70 | 
 71 |     def get_frames(self, seq_id=None, frame_ids=None, anno=None):
 72 |         frame = self._get_frames(seq_id)
 73 | 
 74 |         frame_list = [frame.copy() for _ in frame_ids]
 75 | 
 76 |         if anno is None:
 77 |             anno = self.get_sequence_info(seq_id)
 78 | 
 79 |         language = self.id2class[self.region_descriptions['annotations'][seq_id]['category_id']]
 80 |         anno_frames = {}
 81 |         for key, value in anno.items():
 82 |             anno_frames[key] = [value[0, ...] for _ in frame_ids]
 83 | 
 84 |         object_meta = OrderedDict({'object_class_name': None,
 85 |                                    'motion_class': None,
 86 |                                    'major_class': None,
 87 |                                    'root_class': None,
 88 |                                    'motion_adverb': None,
 89 |                                    'language': generate_sentence(language.lower())})
 90 | 
 91 |         return frame_list, anno_frames, object_meta
 92 | 
 93 |     def get_annos(self, seq_id, frame_ids, anno=None):
 94 |         if anno is None:
 95 |             anno = self.get_sequence_info(seq_id)
 96 | 
 97 |         anno_frames = {}
 98 |         for key, value in anno.items():
 99 |             anno_frames[key] = [value[0, ...].clone() for _ in frame_ids]
100 | 
101 |         return anno_frames


--------------------------------------------------------------------------------
/lib/train/dataset/otb99.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import glob
  4 | import torch
  5 | import numpy as np
  6 | import pandas
  7 | import csv
  8 | import random
  9 | from collections import OrderedDict
 10 | from .base_video_dataset import BaseVideoDataset
 11 | from lib.train.data import jpeg4py_loader
 12 | from lib.train.admin import env_settings
 13 | 
 14 | 
 15 | class OTB99(BaseVideoDataset):
 16 |     def __init__(self, root=None, image_loader=jpeg4py_loader, split=None):
 17 |         root = env_settings().lasot_dir if root is None else root
 18 |         super().__init__('OTB99', root, image_loader)
 19 |         self.split = split
 20 |         self.sequence_list = self._build_sequence_list(split=split)
 21 | 
 22 |     def _build_sequence_list(self, vid_ids=None, split=None):
 23 |         seq_path = glob.glob(os.path.join(self.root, f'OTB_query_{split}/*.txt'))
 24 |         sequence_list = [p.split('/')[-1].split('.')[0] for p in seq_path]
 25 |         return sequence_list
 26 | 
 27 |     def get_name(self):
 28 |         return 'otb99'
 29 | 
 30 |     def is_grounding_sequence(self):
 31 |         return True
 32 | 
 33 |     def is_vl_sequence(self):
 34 |         return True
 35 | 
 36 |     def is_tracking_sequence(self):
 37 |         return True
 38 | 
 39 |     def get_num_sequences(self):
 40 |         return len(self.sequence_list)
 41 | 
 42 |     def _read_bb_anno(self, seq_path):
 43 |         bb_anno_file = os.path.join(seq_path, "groundtruth_rect.txt")
 44 |         try:
 45 |             gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
 46 |         except:
 47 |             gt = pandas.read_csv(bb_anno_file, delimiter='\t', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
 48 |         return torch.tensor(gt)
 49 | 
 50 |     def _get_sequence_path(self, seq_id):
 51 |         seq_name = self.sequence_list[seq_id].split('-')[0] if self.split=='train' else self.sequence_list[seq_id]
 52 |         return os.path.join(self.root, 'OTB_videos', seq_name)
 53 | 
 54 |     def _read_language(self, seq_id):
 55 |         seq_name = self.sequence_list[seq_id]
 56 |         language_file = os.path.join(self.root, f'OTB_query_{self.split}', f"{seq_name}.txt")
 57 |         with open(language_file, 'r') as f:
 58 |             language = f.readlines()
 59 |         return language[0].rstrip()
 60 | 
 61 |     def get_sequence_info(self, seq_id):
 62 |         seq_path = self._get_sequence_path(seq_id)
 63 |         bbox = self._read_bb_anno(seq_path)
 64 | 
 65 |         valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
 66 |         visible = valid.clone().byte()
 67 | 
 68 |         return {'bbox': bbox, 'valid': valid, 'visible': visible}
 69 | 
 70 |     def _get_frame(self, seq_path, frame_id):
 71 |         images = sorted(glob.glob(os.path.join(seq_path, 'img', '*')))
 72 |         return self.image_loader(images[frame_id])
 73 | 
 74 |     def get_frames(self, seq_id, frame_ids, anno=None):
 75 |         seq_path = self._get_sequence_path(seq_id)
 76 |         frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
 77 | 
 78 |         anno = self.get_sequence_info(seq_id)
 79 | 
 80 |         language = self._read_language(seq_id)
 81 |         anno_frames = {}
 82 |         for key, value in anno.items():
 83 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
 84 | 
 85 |         object_meta = OrderedDict({'object_class_name': None,
 86 |                                    'motion_class': None,
 87 |                                    'major_class': None,
 88 |                                    'root_class': None,
 89 |                                    'motion_adverb': None,
 90 |                                    'language': language.lower()})
 91 | 
 92 |         return frame_list, anno_frames, object_meta
 93 | 
 94 |     def get_annos(self, seq_id, frame_ids, anno=None):
 95 |         if anno is None:
 96 |             anno = self.get_sequence_info(seq_id)
 97 | 
 98 |         anno_frames = {}
 99 |         for key, value in anno.items():
100 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
101 | 
102 |         return anno_frames
103 | 


--------------------------------------------------------------------------------
/lib/train/dataset/tnl2k.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import glob
  4 | import torch
  5 | import numpy as np
  6 | import pandas
  7 | import csv
  8 | import random
  9 | from collections import OrderedDict
 10 | from .base_video_dataset import BaseVideoDataset
 11 | from lib.train.data import jpeg4py_loader
 12 | from lib.train.admin import env_settings
 13 | 
 14 | 
 15 | class TNL2K(BaseVideoDataset):
 16 |     def __init__(self, root=None, image_loader=jpeg4py_loader, split=None):
 17 |         root = env_settings().tnl2k_dir if root is None else root
 18 |         super().__init__('TNL2K', root, image_loader)
 19 | 
 20 |         self.sequence_list = self._build_sequence_list()
 21 | 
 22 |     def _build_sequence_list(self, vid_ids=None, split=None):
 23 |         seq_path = glob.glob(os.path.join(self.root, '*/'))
 24 |         sequence_list = [p.split('/')[-2] for p in seq_path]
 25 |         return sequence_list
 26 | 
 27 |     def get_name(self):
 28 |         return 'tnl2k'
 29 | 
 30 |     def has_class_info(self):
 31 |         return True
 32 | 
 33 |     def has_occlusion_info(self):
 34 |         return True
 35 | 
 36 |     def is_grounding_sequence(self):
 37 |         return True
 38 | 
 39 |     def is_tracking_sequence(self):
 40 |         return True
 41 | 
 42 |     def is_vl_sequence(self):
 43 |         return True
 44 | 
 45 |     def get_num_sequences(self):
 46 |         return len(self.sequence_list)
 47 | 
 48 |     def get_sequences_in_class(self, class_name):
 49 |         return self.seq_per_class[class_name]
 50 | 
 51 |     def _read_bb_anno(self, seq_path):
 52 |         bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
 53 |         gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
 54 |         return torch.tensor(gt)
 55 | 
 56 |     def _read_target_visible(self, seq_path):
 57 |         # Read full occlusion and out_of_view
 58 |         occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
 59 |         out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
 60 | 
 61 |         with open(occlusion_file, 'r', newline='') as f:
 62 |             occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
 63 |         with open(out_of_view_file, 'r') as f:
 64 |             out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
 65 | 
 66 |         target_visible = ~occlusion & ~out_of_view
 67 | 
 68 |         return target_visible
 69 | 
 70 |     def _get_sequence_path(self, seq_id):
 71 |         seq_name = self.sequence_list[seq_id]
 72 |         return os.path.join(self.root, seq_name)
 73 | 
 74 |     def _read_language(self, seq_path):
 75 |         language_file = os.path.join(seq_path, "language.txt")
 76 |         with open(language_file, 'r') as f:
 77 |             language = f.readlines()
 78 |         return language[0].rstrip()
 79 | 
 80 |     def get_sequence_info(self, seq_id):
 81 |         seq_path = self._get_sequence_path(seq_id)
 82 |         bbox = self._read_bb_anno(seq_path)
 83 | 
 84 |         valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
 85 |         visible = valid.clone().byte()
 86 | 
 87 |         return {'bbox': bbox, 'valid': valid, 'visible': visible}
 88 | 
 89 |     def _get_frame(self, seq_path, frame_id):
 90 |         images = sorted(glob.glob(os.path.join(seq_path, 'imgs', '*')))
 91 |         return self.image_loader(images[frame_id])
 92 | 
 93 |     def get_frames(self, seq_id, frame_ids, anno=None):
 94 |         seq_path = self._get_sequence_path(seq_id)
 95 |         frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
 96 | 
 97 |         anno = self.get_sequence_info(seq_id)
 98 | 
 99 |         language = self._read_language(seq_path)
100 |         anno_frames = {}
101 |         for key, value in anno.items():
102 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
103 | 
104 |         object_meta = OrderedDict({'object_class_name': None,
105 |                                    'motion_class': None,
106 |                                    'major_class': None,
107 |                                    'root_class': None,
108 |                                    'motion_adverb': None,
109 |                                    'language': language.lower()})
110 | 
111 |         return frame_list, anno_frames, object_meta
112 | 
113 |     def get_annos(self, seq_id, frame_ids, anno=None):
114 |         if anno is None:
115 |             anno = self.get_sequence_info(seq_id)
116 | 
117 |         anno_frames = {}
118 |         for key, value in anno.items():
119 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
120 | 
121 |         return anno_frames
122 | 


--------------------------------------------------------------------------------
/lib/train/dataset/tnl2k_test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import glob
  4 | import torch
  5 | import numpy as np
  6 | import pandas
  7 | import csv
  8 | import random
  9 | from collections import OrderedDict
 10 | from .base_video_dataset import BaseVideoDataset
 11 | from lib.train.data import jpeg4py_loader
 12 | from lib.train.admin import env_settings
 13 | 
 14 | 
 15 | class TNL2Ktest(BaseVideoDataset):
 16 |     def __init__(self, root=None, image_loader=jpeg4py_loader, split=None):
 17 |         root = env_settings().lasot_dir if root is None else root
 18 |         super().__init__('TNL2K', root, image_loader)
 19 | 
 20 |         self.sequence_list = self._build_sequence_list()
 21 | 
 22 |     def _build_sequence_list(self, vid_ids=None, split=None):
 23 |         seq_path = glob.glob(os.path.join(self.root, '*/'))
 24 |         sequence_list = [p.split('/')[-2] for p in seq_path]
 25 |         return sequence_list
 26 | 
 27 |     def get_name(self):
 28 |         return 'tnl2k_test'
 29 | 
 30 |     def has_class_info(self):
 31 |         return True
 32 | 
 33 |     def has_occlusion_info(self):
 34 |         return True
 35 | 
 36 |     def is_grounding_sequence(self):
 37 |         return True
 38 | 
 39 |     def is_tracking_sequence(self):
 40 |         return True
 41 | 
 42 |     def is_vl_sequence(self):
 43 |         return True
 44 | 
 45 |     def get_num_sequences(self):
 46 |         return len(self.sequence_list)
 47 | 
 48 |     def get_sequences_in_class(self, class_name):
 49 |         return self.seq_per_class[class_name]
 50 | 
 51 |     def _read_bb_anno(self, seq_path):
 52 |         bb_anno_file = os.path.join(seq_path, "groundtruth.txt")
 53 |         gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
 54 |         return torch.tensor(gt)
 55 | 
 56 |     def _read_target_visible(self, seq_path):
 57 |         # Read full occlusion and out_of_view
 58 |         occlusion_file = os.path.join(seq_path, "full_occlusion.txt")
 59 |         out_of_view_file = os.path.join(seq_path, "out_of_view.txt")
 60 | 
 61 |         with open(occlusion_file, 'r', newline='') as f:
 62 |             occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
 63 |         with open(out_of_view_file, 'r') as f:
 64 |             out_of_view = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
 65 | 
 66 |         target_visible = ~occlusion & ~out_of_view
 67 | 
 68 |         return target_visible
 69 | 
 70 |     def _get_sequence_path(self, seq_id):
 71 |         seq_name = self.sequence_list[seq_id]
 72 |         return os.path.join(self.root, seq_name)
 73 | 
 74 |     def _read_language(self, seq_path):
 75 |         language_file = os.path.join(seq_path, "language.txt")
 76 |         with open(language_file, 'r') as f:
 77 |             language = f.readlines()
 78 |         return language[0].rstrip()
 79 | 
 80 |     def get_sequence_info(self, seq_id):
 81 |         seq_path = self._get_sequence_path(seq_id)
 82 |         bbox = self._read_bb_anno(seq_path)
 83 | 
 84 |         valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
 85 |         visible = valid.clone().byte()
 86 | 
 87 |         return {'bbox': bbox, 'valid': valid, 'visible': visible}
 88 | 
 89 |     def _get_frame(self, seq_path, frame_id):
 90 |         images = sorted(glob.glob(os.path.join(seq_path, 'imgs', '*')))
 91 |         return self.image_loader(images[frame_id])
 92 | 
 93 |     def get_frames(self, seq_id, frame_ids, anno=None):
 94 |         seq_path = self._get_sequence_path(seq_id)
 95 |         frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
 96 | 
 97 |         anno = self.get_sequence_info(seq_id)
 98 | 
 99 |         language = self._read_language(seq_path)
100 |         anno_frames = {}
101 |         for key, value in anno.items():
102 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
103 | 
104 |         object_meta = OrderedDict({'object_class_name': None,
105 |                                    'motion_class': None,
106 |                                    'major_class': None,
107 |                                    'root_class': None,
108 |                                    'motion_adverb': None,
109 |                                    'language': language.lower()})
110 | 
111 |         return frame_list, anno_frames, object_meta
112 | 
113 |     def get_annos(self, seq_id, frame_ids, anno=None):
114 |         if anno is None:
115 |             anno = self.get_sequence_info(seq_id)
116 | 
117 |         anno_frames = {}
118 |         for key, value in anno.items():
119 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
120 | 
121 |         return anno_frames
122 | 


--------------------------------------------------------------------------------
/lib/train/dataset/utils.py:
--------------------------------------------------------------------------------
1 | def generate_sentence(name):
2 |     return f"the {name} in the view"


--------------------------------------------------------------------------------
/lib/train/dataset/visualgenome.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from .base_video_dataset import BaseVideoDataset
 3 | from lib.train.data import jpeg4py_loader
 4 | import json
 5 | import torch
 6 | import random
 7 | from pycocotools.coco import COCO
 8 | from collections import OrderedDict
 9 | from lib.train.admin import env_settings
10 | 
11 | 
12 | class VisualGenome(BaseVideoDataset):
13 |     def __init__(self, root=None, image_loader=jpeg4py_loader, data_fraction=None, split="train", version="2014"):
14 |         super().__init__('VisualGenome', root, image_loader)
15 | 
16 |         self.img_pth = os.path.join(root, 'VG_100K/')
17 |         self.anno_path = os.path.join(root, 'region_descriptions_new.json')
18 |         self.sequence_list = self._get_sequence_list()
19 | 
20 |     def _get_sequence_list(self):
21 |         with open(self.anno_path, 'r') as f:
22 |             self.region_descriptions = json.load(f)
23 |         seq_list = list(range(len(self.region_descriptions)))
24 |         return seq_list
25 | 
26 |     def is_video_sequence(self):
27 |         return False
28 | 
29 |     def get_name(self):
30 |         return 'visualgenome'
31 | 
32 |     def has_class_info(self):
33 |         return True
34 | 
35 |     def has_segmentation_info(self):
36 |         return True
37 | 
38 |     def is_grounding_sequence(self):
39 |         return True
40 | 
41 |     def get_num_sequences(self):
42 |         return len(self.sequence_list)
43 | 
44 |     def get_sequence_info(self, seq_id):
45 |         anno = self._get_anno(seq_id)
46 |         bbox = torch.Tensor(anno['bbox']).view(1, 4)
47 |         valid = torch.Tensor([True])
48 |         visible = torch.Tensor([True])
49 | 
50 |         return {'bbox': bbox, 'valid': valid, 'visible': visible}
51 | 
52 |     def _get_anno(self, seq_id):
53 |         desc = self.region_descriptions[seq_id]
54 |         anno = {
55 |             'bbox': [desc['x'], desc['y'], desc['width'], desc['height']]
56 |         }
57 |         return anno
58 | 
59 |     def _get_frames(self, seq_id):
60 |         desc = self.region_descriptions[seq_id]
61 |         img = self.image_loader(os.path.join(self.img_pth, "%d.jpg"%(desc['image_id'])))
62 |         return img
63 | 
64 |     def get_frames(self, seq_id=None, frame_ids=None, anno=None):
65 |         frame = self._get_frames(seq_id)
66 | 
67 |         frame_list = [frame.copy() for _ in frame_ids]
68 | 
69 |         if anno is None:
70 |             anno = self.get_sequence_info(seq_id)
71 | 
72 |         language = self.region_descriptions[seq_id]['phrase']
73 |         anno_frames = {}
74 |         for key, value in anno.items():
75 |             anno_frames[key] = [value[0, ...] for _ in frame_ids]
76 | 
77 |         object_meta = OrderedDict({'object_class_name': None,
78 |                                    'motion_class': None,
79 |                                    'major_class': None,
80 |                                    'root_class': None,
81 |                                    'motion_adverb': None,
82 |                                    'language': language.lower()})
83 | 
84 |         return frame_list, anno_frames, object_meta
85 | 
86 |     def get_annos(self, seq_id, frame_ids, anno=None):
87 |         if anno is None:
88 |             anno = self.get_sequence_info(seq_id)
89 | 
90 |         anno_frames = {}
91 |         for key, value in anno.items():
92 |             anno_frames[key] = [value[0, ...].clone() for _ in frame_ids]
93 | 
94 |         return anno_frames


--------------------------------------------------------------------------------
/lib/train/dataset/webuav.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path
  3 | import glob
  4 | import torch
  5 | import numpy as np
  6 | import pandas
  7 | import csv
  8 | import random
  9 | from collections import OrderedDict
 10 | from .base_video_dataset import BaseVideoDataset
 11 | from lib.train.data import jpeg4py_loader
 12 | from lib.train.admin import env_settings
 13 | 
 14 | 
 15 | class WebUAV(BaseVideoDataset):
 16 |     def __init__(self, root=None, image_loader=jpeg4py_loader, split=None):
 17 |         root = env_settings().webuav_dir if root is None else root
 18 |         super().__init__('WebUAV', root, image_loader)
 19 | 
 20 |         self.sequence_list = self._build_sequence_list()
 21 | 
 22 |     def _build_sequence_list(self, vid_ids=None, split=None):
 23 |         seq_path = glob.glob(os.path.join(self.root, 'train/Train/', '*/'))
 24 |         sequence_list = [p.split('/')[-2] for p in seq_path]
 25 |         return sequence_list
 26 | 
 27 |     def get_name(self):
 28 |         return 'tnl2k'
 29 | 
 30 |     def has_class_info(self):
 31 |         return True
 32 | 
 33 |     def has_occlusion_info(self):
 34 |         return True
 35 | 
 36 |     def is_grounding_sequence(self):
 37 |         return True
 38 | 
 39 |     def is_tracking_sequence(self):
 40 |         return True
 41 | 
 42 |     def is_vl_sequence(self):
 43 |         return True
 44 | 
 45 |     def get_num_sequences(self):
 46 |         return len(self.sequence_list)
 47 | 
 48 |     def get_sequences_in_class(self, class_name):
 49 |         return self.seq_per_class[class_name]
 50 | 
 51 |     def _read_bb_anno(self, seq_path):
 52 |         bb_anno_file = os.path.join(seq_path, "groundtruth_rect.txt")
 53 |         gt = pandas.read_csv(bb_anno_file, delimiter=',', header=None, dtype=np.float32, na_filter=False, low_memory=False).values
 54 |         return torch.tensor(gt)
 55 | 
 56 |     def _read_target_visible(self, seq_path):
 57 |         # Read full occlusion and out_of_view
 58 |         occlusion_file = os.path.join(seq_path, "absent.txt")
 59 | 
 60 |         with open(occlusion_file, 'r', newline='') as f:
 61 |             occlusion = torch.ByteTensor([int(v) for v in list(csv.reader(f))[0]])
 62 | 
 63 |         target_visible = ~occlusion
 64 | 
 65 |         return target_visible
 66 | 
 67 |     def _get_sequence_path(self, seq_id):
 68 |         seq_name = self.sequence_list[seq_id]
 69 |         return os.path.join(self.root, "train/Train", seq_name), seq_name
 70 | 
 71 |     def _read_language(self, seq):
 72 |         language_file = os.path.join(self.root, 'language/Language/Train', seq, "language.txt")
 73 |         with open(language_file, 'r') as f:
 74 |             language = f.readlines()
 75 |         return language[0].rstrip()
 76 | 
 77 |     def get_sequence_info(self, seq_id):
 78 |         seq_path, seq_name = self._get_sequence_path(seq_id)
 79 |         bbox = self._read_bb_anno(seq_path)
 80 | 
 81 |         valid = (bbox[:, 2] > 0) & (bbox[:, 3] > 0)
 82 |         visible = valid.clone().byte()
 83 | 
 84 |         return {'bbox': bbox, 'valid': valid, 'visible': visible}
 85 | 
 86 |     def _get_frame(self, seq_path, frame_id):
 87 |         images = sorted(glob.glob(os.path.join(seq_path, 'img', '*')))
 88 |         return self.image_loader(images[frame_id])
 89 | 
 90 |     def get_frames(self, seq_id, frame_ids, anno=None):
 91 |         seq_path, seq_name = self._get_sequence_path(seq_id)
 92 |         frame_list = [self._get_frame(seq_path, f_id) for f_id in frame_ids]
 93 | 
 94 |         anno = self.get_sequence_info(seq_id)
 95 | 
 96 |         language = self._read_language(seq_name)
 97 |         anno_frames = {}
 98 |         for key, value in anno.items():
 99 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
100 | 
101 |         object_meta = OrderedDict({'object_class_name': None,
102 |                                    'motion_class': None,
103 |                                    'major_class': None,
104 |                                    'root_class': None,
105 |                                    'motion_adverb': None,
106 |                                    'language': language.lower()})
107 | 
108 |         return frame_list, anno_frames, object_meta
109 | 
110 |     def get_annos(self, seq_id, frame_ids, anno=None):
111 |         if anno is None:
112 |             anno = self.get_sequence_info(seq_id)
113 | 
114 |         anno_frames = {}
115 |         for key, value in anno.items():
116 |             anno_frames[key] = [value[f_id, ...].clone() for f_id in frame_ids]
117 | 
118 |         return anno_frames
119 | 


--------------------------------------------------------------------------------
/lib/train/run_training.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import random
  4 | import argparse
  5 | import importlib
  6 | import cv2 as cv
  7 | import _init_paths
  8 | import numpy as np
  9 | import torch.backends.cudnn
 10 | import torch.distributed as dist
 11 | torch.backends.cudnn.benchmark = False
 12 | import lib.train.admin.settings as ws_settings
 13 | 
 14 | import warnings
 15 | warnings.filterwarnings('ignore')
 16 | 
 17 | 
 18 | def init_seeds(seed):
 19 |     random.seed(seed)
 20 |     np.random.seed(seed)
 21 |     torch.manual_seed(seed)
 22 |     torch.cuda.manual_seed(seed)
 23 |     torch.backends.cudnn.deterministic = True
 24 |     torch.backends.cudnn.benchmark = False
 25 | 
 26 | 
 27 | def run_training(script_name, config_name, cudnn_benchmark=True, local_rank=-1, save_dir=None, base_seed=None,
 28 |                  use_lmdb=False, script_name_prv=None, config_name_prv=None,
 29 |                  distill=None, script_teacher=None, config_teacher=None, stage1_model=None):
 30 |     """Run the train script.
 31 |     args:
 32 |         script_name: Name of emperiment in the "experiments/" folder.
 33 |         config_name: Name of the yaml file in the "experiments/<script_name>".
 34 |         cudnn_benchmark: Use cudnn benchmark or not (default is True).
 35 |     """
 36 |     if save_dir is None:
 37 |         print("save_dir dir is not given. Use the default dir instead.")
 38 |     # This is needed to avoid strange crashes related to opencv
 39 |     cv.setNumThreads(0)
 40 | 
 41 |     torch.backends.cudnn.benchmark = cudnn_benchmark
 42 | 
 43 |     if int(os.environ["LOCAL_RANK"]) <= 0:
 44 |         print('script_name: {}.py  config_name: {}.yaml'.format(script_name, config_name))
 45 | 
 46 |     '''2021.1.5 set seed for different process'''
 47 |     if base_seed is not None:
 48 |         if local_rank != -1:
 49 |             init_seeds(base_seed + local_rank)
 50 |         else:
 51 |             init_seeds(base_seed)
 52 | 
 53 |     settings = ws_settings.Settings()
 54 |     settings.script_name = script_name
 55 |     settings.config_name = config_name
 56 |     settings.stage1_model = stage1_model
 57 |     settings.project_path = 'train/{}/{}'.format(script_name, config_name)
 58 |     if script_name_prv is not None and config_name_prv is not None:
 59 |         settings.project_path_prv = 'train/{}/{}'.format(script_name_prv, config_name_prv)
 60 |     settings.local_rank = local_rank
 61 |     settings.save_dir = os.path.abspath(save_dir)
 62 |     settings.use_lmdb = use_lmdb
 63 |     prj_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
 64 |     settings.cfg_file = os.path.join(prj_dir, 'experiments/%s/%s.yaml' % (script_name, config_name))
 65 |     expr_module = importlib.import_module('lib.train.train_script_mutrack')
 66 |     expr_func = getattr(expr_module, 'run')
 67 | 
 68 |     expr_func(settings)
 69 | 
 70 | 
 71 | def main():
 72 |     parser = argparse.ArgumentParser(description='Run a train scripts in train_settings.')
 73 |     parser.add_argument('--script', type=str, default="mvit", required=False, help='Name of the train script.')
 74 |     parser.add_argument('--config', type=str, default="baseline_256_5_notoken", required=False, help="Name of the config file.")
 75 |     parser.add_argument('--cudnn_benchmark', type=bool, default=True, help='Set cudnn benchmark on (1) or off (0) (default is on).')
 76 |     parser.add_argument('--save_dir', type=str, default="/ssd/myc/VL_project/MUTrack", help='the directory to save checkpoints and logs')
 77 |     parser.add_argument('--seed', type=int, default=42, help='seed for random numbers')
 78 |     parser.add_argument('--use_lmdb', type=int, choices=[0, 1], default=0)  # whether datasets are in lmdb format
 79 |     parser.add_argument('--script_prv', type=str, default=None, help='Name of the train script of previous model.')
 80 |     parser.add_argument('--config_prv', type=str, default=None, help="Name of the config file of previous model.")
 81 |     # for knowledge distillation
 82 |     parser.add_argument('--distill', type=int, choices=[0, 1], default=0)  # whether to use knowledge distillation
 83 |     parser.add_argument('--script_teacher', type=str, help='teacher script name')
 84 |     parser.add_argument('--config_teacher', type=str, help='teacher yaml configure file name')
 85 |     parser.add_argument('--stage1_model', type=str, default=None, help='stage1 model used to train SPM.')
 86 |     args = parser.parse_args()
 87 |     os.environ['LOCAL_RANK'] = os.environ.get('LOCAL_RANK', '-1')
 88 |     local_rank = int(os.environ['LOCAL_RANK'])
 89 |     if local_rank != -1:
 90 |         dist.init_process_group(backend='nccl')
 91 |         torch.cuda.set_device(local_rank)
 92 |     else:
 93 |         torch.cuda.set_device(0)
 94 |         
 95 |     run_training(args.script, args.config, cudnn_benchmark=args.cudnn_benchmark,
 96 |                  local_rank=local_rank, save_dir=args.save_dir, base_seed=args.seed,
 97 |                  use_lmdb=args.use_lmdb, script_name_prv=args.script_prv, config_name_prv=args.config_prv,
 98 |                  distill=args.distill, script_teacher=args.script_teacher, config_teacher=args.config_teacher,
 99 |                  stage1_model=args.stage1_model)
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     main()
104 | 


--------------------------------------------------------------------------------
/lib/train/train_script_mutrack.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | # loss function related
 3 | from lib.utils.box_ops import giou_loss, GaussWeightedLoss
 4 | from torch.nn.functional import l1_loss
 5 | # train pipeline related
 6 | from lib.train.trainers import LTRTrainer
 7 | # distributed training related
 8 | from torch.nn.parallel import DistributedDataParallel as DDP
 9 | # some more advanced functions
10 | from .base_functions import *
11 | # network related
12 | import lib.models
13 | import lib.train.actors
14 | # for import modules
15 | import importlib
16 | from lib import registry
17 | 
18 | def run(settings):
19 |     settings.description = 'Training script for Mixformer'
20 | 
21 |     # update the default configs with config file
22 |     if not os.path.exists(settings.cfg_file):
23 |         raise ValueError("%s doesn't exist." % settings.cfg_file)
24 |     config_module = importlib.import_module("lib.config.%s.config" % settings.script_name)
25 |     cfg = config_module.cfg
26 |     config_module.update_config_from_file(settings.cfg_file)
27 | 
28 |     # update settings based on cfg
29 |     update_settings(settings, cfg)
30 | 
31 |     # Record the training log
32 |     log_dir = os.path.join(settings.save_dir, 'logs')
33 |     if settings.local_rank in [-1, 0]:
34 |         if not os.path.exists(log_dir):
35 |             os.makedirs(log_dir)
36 |     settings.log_file = os.path.join(log_dir, "%s-%s.log" % (settings.script_name, settings.config_name))
37 | 
38 |     # Build dataloaders
39 |     loader_list = build_dataloaders(cfg, settings)
40 | 
41 |     # Create network
42 |     net = registry.MODELS[settings.script_name](cfg).cuda()
43 | 
44 |     # wrap networks to distributed one
45 |     if settings.local_rank != -1:
46 |         net = DDP(net, device_ids=[settings.local_rank], find_unused_parameters=True)
47 |         settings.device = torch.device("cuda:%d" % settings.local_rank)
48 |     else:
49 |         settings.device = torch.device("cuda:0")
50 |         
51 |     # settings.save_every_epoch = True
52 |     actor = registry.ACTORS[settings.script_name](net, cfg)
53 | 
54 |     # Optimizer, parameters, and learning rates
55 |     optimizer, lr_scheduler = get_optimizer_scheduler(net, cfg)
56 |     trainer = LTRTrainer(actor, loader_list, optimizer, settings, lr_scheduler, use_amp=False)
57 | 
58 |     # train process
59 |     trainer.train(cfg.TRAIN.EPOCH, load_latest=True, fail_safe=True)
60 | 


--------------------------------------------------------------------------------
/lib/train/trainers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base_trainer import BaseTrainer
2 | from .ltr_trainer import LTRTrainer
3 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | .vim-template*
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | .hypothesis/
 50 | .pytest_cache/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | db.sqlite3
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | env.bak/
 93 | venv.bak/
 94 | 
 95 | # Spyder project settings
 96 | .spyderproject
 97 | .spyproject
 98 | 
 99 | # Rope project settings
100 | .ropeproject
101 | 
102 | # mkdocs documentation
103 | /site
104 | 
105 | # mypy
106 | .mypy_cache/
107 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jiayuan Mao
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/_assets/prroi_visualization.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/_assets/prroi_visualization.png


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/prroi_pool/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | /_prroi_pooling
3 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/prroi_pool/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : __init__.py
 4 | # Author : Jiayuan Mao, Tete Xiao
 5 | # Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
 6 | # Date   : 07/13/2018
 7 | # 
 8 | # This file is part of PreciseRoIPooling.
 9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 | 
12 | from .prroi_pool import *
13 | 
14 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/prroi_pool/functional.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : functional.py
 4 | # Author : Jiayuan Mao, Tete Xiao
 5 | # Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
 6 | # Date   : 07/13/2018
 7 | #
 8 | # This file is part of PreciseRoIPooling.
 9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 | 
12 | import torch
13 | import torch.autograd as ag
14 | 
15 | __all__ = ['prroi_pool2d']
16 | 
17 | 
18 | _prroi_pooling = None
19 | 
20 | 
21 | def _import_prroi_pooling():
22 |     global _prroi_pooling
23 | 
24 |     if _prroi_pooling is None:
25 |         try:
26 |             from os.path import join as pjoin, dirname
27 |             from torch.utils.cpp_extension import load as load_extension
28 |             root_dir = pjoin(dirname(__file__), 'src')
29 | 
30 |             _prroi_pooling = load_extension(
31 |                 '_prroi_pooling',
32 |                 [pjoin(root_dir, 'prroi_pooling_gpu.c'), pjoin(root_dir, 'prroi_pooling_gpu_impl.cu')],
33 |                 verbose=True
34 |             )
35 |         except ImportError:
36 |             raise ImportError('Can not compile Precise RoI Pooling library.')
37 | 
38 |     return _prroi_pooling
39 | 
40 | 
41 | class PrRoIPool2DFunction(ag.Function):
42 |     @staticmethod
43 |     def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale):
44 |         _prroi_pooling = _import_prroi_pooling()
45 | 
46 |         assert 'FloatTensor' in features.type() and 'FloatTensor' in rois.type(), \
47 |                 'Precise RoI Pooling only takes float input, got {} for features and {} for rois.'.format(features.type(), rois.type())
48 | 
49 |         pooled_height = int(pooled_height)
50 |         pooled_width = int(pooled_width)
51 |         spatial_scale = float(spatial_scale)
52 | 
53 |         features = features.contiguous()
54 |         rois = rois.contiguous()
55 |         params = (pooled_height, pooled_width, spatial_scale)
56 | 
57 |         if features.is_cuda:
58 |             output = _prroi_pooling.prroi_pooling_forward_cuda(features, rois, *params)
59 |             ctx.params = params
60 |             # everything here is contiguous.
61 |             ctx.save_for_backward(features, rois, output)
62 |         else:
63 |             raise NotImplementedError('Precise RoI Pooling only supports GPU (cuda) implememtations.')
64 | 
65 |         return output
66 | 
67 |     @staticmethod
68 |     def backward(ctx, grad_output):
69 |         _prroi_pooling = _import_prroi_pooling()
70 | 
71 |         features, rois, output = ctx.saved_tensors
72 |         grad_input = grad_coor = None
73 | 
74 |         if features.requires_grad:
75 |             grad_output = grad_output.contiguous()
76 |             grad_input = _prroi_pooling.prroi_pooling_backward_cuda(features, rois, output, grad_output, *ctx.params)
77 |         if rois.requires_grad:
78 |             grad_output = grad_output.contiguous()
79 |             grad_coor = _prroi_pooling.prroi_pooling_coor_backward_cuda(features, rois, output, grad_output, *ctx.params)
80 | 
81 |         return grad_input, grad_coor, None, None, None
82 | 
83 | 
84 | prroi_pool2d = PrRoIPool2DFunction.apply
85 | 
86 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/prroi_pool/prroi_pool.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : prroi_pool.py
 4 | # Author : Jiayuan Mao, Tete Xiao
 5 | # Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
 6 | # Date   : 07/13/2018
 7 | #
 8 | # This file is part of PreciseRoIPooling.
 9 | # Distributed under terms of the MIT license.
10 | # Copyright (c) 2017 Megvii Technology Limited.
11 | 
12 | import torch.nn as nn
13 | 
14 | from .functional import prroi_pool2d
15 | 
16 | __all__ = ['PrRoIPool2D']
17 | 
18 | 
19 | class PrRoIPool2D(nn.Module):
20 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
21 |         super().__init__()
22 | 
23 |         self.pooled_height = int(pooled_height)
24 |         self.pooled_width = int(pooled_width)
25 |         self.spatial_scale = float(spatial_scale)
26 | 
27 |     def forward(self, features, rois):
28 |         return prroi_pool2d(features, rois, self.pooled_height, self.pooled_width, self.spatial_scale)
29 | 
30 |     def extra_repr(self):
31 |         return 'kernel_size=({pooled_height}, {pooled_width}), spatial_scale={spatial_scale}'.format(**self.__dict__)
32 | 
33 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * File   : prroi_pooling_gpu.c
  3 |  * Author : Jiayuan Mao, Tete Xiao
  4 |  * Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com
  5 |  * Date   : 07/13/2018
  6 |  *
  7 |  * Distributed under terms of the MIT license.
  8 |  * Copyright (c) 2017 Megvii Technology Limited.
  9 |  */
 10 | 
 11 | #include <math.h>
 12 | #include <torch/extension.h>
 13 | 
 14 | #include <ATen/ATen.h>
 15 | #include <ATen/cuda/CUDAContext.h>
 16 | 
 17 | // #include <THC/THC.h>
 18 | 
 19 | #include "prroi_pooling_gpu_impl.cuh"
 20 | 
 21 | 
 22 | at::Tensor prroi_pooling_forward_cuda(const at::Tensor &features, const at::Tensor &rois, int pooled_height, int pooled_width, float spatial_scale) {
 23 |     int nr_rois = rois.size(0);
 24 |     int nr_channels = features.size(1);
 25 |     int height = features.size(2);
 26 |     int width = features.size(3);
 27 |     int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
 28 |     auto output = at::zeros({nr_rois, nr_channels, pooled_height, pooled_width}, features.options());
 29 | 
 30 |     if (output.numel() == 0) {
 31 |         AT_CUDA_CHECK(cudaGetLastError());
 32 |         return output;
 33 |     }
 34 | 
 35 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream(features.device().index());
 36 |     PrRoIPoolingForwardGpu(
 37 |         stream, features.data<float>(), rois.data<float>(), output.data<float>(),
 38 |         nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
 39 |         top_count
 40 |     );
 41 | 
 42 |     AT_CUDA_CHECK(cudaGetLastError());
 43 |     return output;
 44 | }
 45 | 
 46 | at::Tensor prroi_pooling_backward_cuda(
 47 |     const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
 48 |     int pooled_height, int pooled_width, float spatial_scale) {
 49 | 
 50 |     auto features_diff = at::zeros_like(features);
 51 | 
 52 |     int nr_rois = rois.size(0);
 53 |     int batch_size = features.size(0);
 54 |     int nr_channels = features.size(1);
 55 |     int height = features.size(2);
 56 |     int width = features.size(3);
 57 |     int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
 58 |     int bottom_count = batch_size * nr_channels * height * width;
 59 | 
 60 |     if (output.numel() == 0) {
 61 |         AT_CUDA_CHECK(cudaGetLastError());
 62 |         return features_diff;
 63 |     }
 64 | 
 65 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream(features.device().index());
 66 |     PrRoIPoolingBackwardGpu(
 67 |         stream,
 68 |         features.data<float>(), rois.data<float>(), output.data<float>(), output_diff.data<float>(),
 69 |         features_diff.data<float>(),
 70 |         nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
 71 |         top_count, bottom_count
 72 |     );
 73 | 
 74 |     AT_CUDA_CHECK(cudaGetLastError());
 75 |     return features_diff;
 76 | }
 77 | 
 78 | at::Tensor prroi_pooling_coor_backward_cuda(
 79 |     const at::Tensor &features, const at::Tensor &rois, const at::Tensor &output, const at::Tensor &output_diff,
 80 |     int pooled_height, int pooled_width, float spatial_scale) {
 81 | 
 82 |     auto coor_diff = at::zeros_like(rois);
 83 | 
 84 |     int nr_rois = rois.size(0);
 85 |     int nr_channels = features.size(1);
 86 |     int height = features.size(2);
 87 |     int width = features.size(3);
 88 |     int top_count = nr_rois * nr_channels * pooled_height * pooled_width;
 89 |     int bottom_count = nr_rois * 5;
 90 | 
 91 |     if (output.numel() == 0) {
 92 |         AT_CUDA_CHECK(cudaGetLastError());
 93 |         return coor_diff;
 94 |     }
 95 | 
 96 |     cudaStream_t stream = at::cuda::getCurrentCUDAStream(features.device().index());
 97 |     PrRoIPoolingCoorBackwardGpu(
 98 |         stream,
 99 |         features.data<float>(), rois.data<float>(), output.data<float>(), output_diff.data<float>(),
100 |         coor_diff.data<float>(),
101 |         nr_channels, height, width, pooled_height, pooled_width, spatial_scale,
102 |         top_count, bottom_count
103 |     );
104 | 
105 |     AT_CUDA_CHECK(cudaGetLastError());
106 |     return coor_diff;
107 | }
108 | 
109 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
110 |     m.def("prroi_pooling_forward_cuda", &prroi_pooling_forward_cuda, "PRRoIPooling_forward");
111 |     m.def("prroi_pooling_backward_cuda", &prroi_pooling_backward_cuda, "PRRoIPooling_backward");
112 |     m.def("prroi_pooling_coor_backward_cuda", &prroi_pooling_coor_backward_cuda, "PRRoIPooling_backward_coor");
113 | }
114 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File   : prroi_pooling_gpu.h
 3 |  * Author : Jiayuan Mao, Tete Xiao
 4 |  * Email  : maojiayuan@gmail.com, jasonhsiao97@gmail.com 
 5 |  * Date   : 07/13/2018
 6 |  * 
 7 |  * Distributed under terms of the MIT license.
 8 |  * Copyright (c) 2017 Megvii Technology Limited.
 9 |  */
10 | 
11 | int prroi_pooling_forward_cuda(THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, int pooled_height, int pooled_width, float spatial_scale);
12 | 
13 | int prroi_pooling_backward_cuda(
14 |     THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
15 |     int pooled_height, int pooled_width, float spatial_scale
16 | );
17 | 
18 | int prroi_pooling_coor_backward_cuda(
19 |     THCudaTensor *features, THCudaTensor *rois, THCudaTensor *output, THCudaTensor *output_diff, THCudaTensor *features_diff,
20 |     int pooled_height, int pooled_width, float spatial_scal
21 | );
22 | 
23 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/prroi_pool/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File   : prroi_pooling_gpu_impl.cuh
 3 |  * Author : Tete Xiao, Jiayuan Mao
 4 |  * Email  : jasonhsiao97@gmail.com
 5 |  *
 6 |  * Distributed under terms of the MIT license.
 7 |  * Copyright (c) 2017 Megvii Technology Limited.
 8 |  */
 9 | 
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 | 
20 | void PrRoIPoolingForwardGpu(
21 |     cudaStream_t stream,
22 |     F_DEVPTR_IN bottom_data,
23 |     F_DEVPTR_IN bottom_rois,
24 |     F_DEVPTR_OUT top_data,
25 |     const int channels_, const int height_, const int width_,
26 |     const int pooled_height_, const int pooled_width_,
27 |     const float spatial_scale_,
28 |     const int top_count);
29 | 
30 | void PrRoIPoolingBackwardGpu(
31 |     cudaStream_t stream,
32 |     F_DEVPTR_IN bottom_data,
33 |     F_DEVPTR_IN bottom_rois,
34 |     F_DEVPTR_IN top_data,
35 |     F_DEVPTR_IN top_diff,
36 |     F_DEVPTR_OUT bottom_diff,
37 |     const int channels_, const int height_, const int width_,
38 |     const int pooled_height_, const int pooled_width_,
39 |     const float spatial_scale_,
40 |     const int top_count, const int bottom_count);
41 | 
42 | void PrRoIPoolingCoorBackwardGpu(
43 |     cudaStream_t stream,
44 |     F_DEVPTR_IN bottom_data,
45 |     F_DEVPTR_IN bottom_rois,
46 |     F_DEVPTR_IN top_data,
47 |     F_DEVPTR_IN top_diff,
48 |     F_DEVPTR_OUT bottom_diff,
49 |     const int channels_, const int height_, const int width_,
50 |     const int pooled_height_, const int pooled_width_,
51 |     const float spatial_scale_,
52 |     const int top_count, const int bottom_count);
53 | 
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 | 
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 | 
60 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/pytorch/tests/test_prroi_pooling2d.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # File   : test_prroi_pooling2d.py
 3 | # Author : Jiayuan Mao
 4 | # Email  : maojiayuan@gmail.com
 5 | # Date   : 18/02/2018
 6 | #
 7 | # This file is part of Jacinle.
 8 | 
 9 | import unittest
10 | 
11 | import torch
12 | import torch.nn as nn
13 | import torch.nn.functional as F
14 | 
15 | from jactorch.utils.unittest import TorchTestCase
16 | 
17 | from prroi_pool import PrRoIPool2D
18 | 
19 | 
20 | class TestPrRoIPool2D(TorchTestCase):
21 |     def test_forward(self):
22 |         pool = PrRoIPool2D(7, 7, spatial_scale=0.5)
23 |         features = torch.rand((4, 16, 24, 32)).cuda()
24 |         rois = torch.tensor([
25 |             [0, 0, 0, 14, 14],
26 |             [1, 14, 14, 28, 28],
27 |         ]).float().cuda()
28 | 
29 |         out = pool(features, rois)
30 |         out_gold = F.avg_pool2d(features, kernel_size=2, stride=1)
31 | 
32 |         self.assertTensorClose(out, torch.stack((
33 |             out_gold[0, :, :7, :7],
34 |             out_gold[1, :, 7:14, 7:14],
35 |         ), dim=0))
36 | 
37 |     def test_backward_shapeonly(self):
38 |         pool = PrRoIPool2D(2, 2, spatial_scale=0.5)
39 | 
40 |         features = torch.rand((4, 2, 24, 32)).cuda()
41 |         rois = torch.tensor([
42 |             [0, 0, 0, 4, 4],
43 |             [1, 14, 14, 18, 18],
44 |         ]).float().cuda()
45 |         features.requires_grad = rois.requires_grad = True
46 |         out = pool(features, rois)
47 | 
48 |         loss = out.sum()
49 |         loss.backward()
50 | 
51 |         self.assertTupleEqual(features.size(), features.grad.size())
52 |         self.assertTupleEqual(rois.size(), rois.grad.size())
53 | 
54 | 
55 | if __name__ == '__main__':
56 |     unittest.main()
57 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/src/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File   : prroi_pooling_gpu_impl.cuh
 3 |  * Author : Tete Xiao, Jiayuan Mao
 4 |  * Email  : jasonhsiao97@gmail.com
 5 |  *
 6 |  * Distributed under terms of the MIT license.
 7 |  * Copyright (c) 2017 Megvii Technology Limited.
 8 |  */
 9 | 
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 | 
20 | void PrRoIPoolingForwardGpu(
21 |     cudaStream_t stream,
22 |     F_DEVPTR_IN bottom_data,
23 |     F_DEVPTR_IN bottom_rois,
24 |     F_DEVPTR_OUT top_data,
25 |     const int channels_, const int height_, const int width_,
26 |     const int pooled_height_, const int pooled_width_,
27 |     const float spatial_scale_,
28 |     const int top_count);
29 | 
30 | void PrRoIPoolingBackwardGpu(
31 |     cudaStream_t stream,
32 |     F_DEVPTR_IN bottom_data,
33 |     F_DEVPTR_IN bottom_rois,
34 |     F_DEVPTR_IN top_data,
35 |     F_DEVPTR_IN top_diff,
36 |     F_DEVPTR_OUT bottom_diff,
37 |     const int channels_, const int height_, const int width_,
38 |     const int pooled_height_, const int pooled_width_,
39 |     const float spatial_scale_,
40 |     const int top_count, const int bottom_count);
41 | 
42 | void PrRoIPoolingCoorBackwardGpu(
43 |     cudaStream_t stream,
44 |     F_DEVPTR_IN bottom_data,
45 |     F_DEVPTR_IN bottom_rois,
46 |     F_DEVPTR_IN top_data,
47 |     F_DEVPTR_IN top_diff,
48 |     F_DEVPTR_OUT bottom_diff,
49 |     const int channels_, const int height_, const int width_,
50 |     const int pooled_height_, const int pooled_width_,
51 |     const float spatial_scale_,
52 |     const int top_count, const int bottom_count);
53 | 
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 | 
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 | 
60 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | # File   : CMakeLists.txt
 2 | # Author : Kanghee Lee
 3 | # Email  : lerohiso@gmail.com
 4 | # Date   : 09/25/2020
 5 | #
 6 | # This file is part of PreciseRoIPooling.
 7 | # Distributed under terms of the MIT license.
 8 | 
 9 | CMAKE_MINIMUM_REQUIRED(VERSION 3.17 FATAL_ERROR)
10 | 
11 | PROJECT(precise_roi_pooling)
12 | FIND_PACKAGE(CUDA)
13 | FIND_PACKAGE(PythonInterp 3)
14 | 
15 | if (MSVC)
16 |     SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.lib)
17 | elseif (UNIX)
18 |     SET(GPU_LIB ${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build/precise_roi_pooling_cuda.so)
19 | endif()
20 | 
21 | if (NOT EXISTS ${GPU_LIB})
22 |     EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/src/kernels/build_cuda.py" RESULTS_VARIABLE RET_CODE)
23 |     if (NOT "${RET_CODE}" STREQUAL "0")
24 |         MESSAGE(FATAL_ERROR "Fail to Complie CUDA codes")
25 |     endif ()
26 | endif ()
27 | 
28 | if (NOT DEFINED TF_PATH)
29 |     EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_include(), end='', flush=True)"  OUTPUT_VARIABLE TF_INC)
30 |     EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(tf.sysconfig.get_lib(), end='', flush=True)"  OUTPUT_VARIABLE TF_LIB)
31 |     MESSAGE(STATUS "TF_INC: " ${TF_INC})
32 |     MESSAGE(STATUS "TF_LIB: " ${TF_LIB})
33 |     SET(TF_PATH 1)
34 | endif ()
35 | 
36 | if (NOT DEFINED TF_FLAGS)
37 |     EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_compile_flags()), end='', flush=True)"  OUTPUT_VARIABLE TF_CFLAGS)
38 |     EXECUTE_PROCESS(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['TF_CPP_MIN_LOG_LEVEL']='3'; import tensorflow as tf; print(' '.join(tf.sysconfig.get_link_flags()), end='', flush=True)"  OUTPUT_VARIABLE TF_LFLAGS)
39 |     MESSAGE(STATUS "TF_CFLAGS: " ${TF_CFLAGS})
40 |     MESSAGE(STATUS "TF_LFLAGS: " ${TF_LFLAGS})
41 |     SET(TF_FLAGS 1)
42 | endif ()
43 | 
44 | INCLUDE_DIRECTORIES(${TF_INC})
45 | LINK_DIRECTORIES(${TF_LIB})
46 | INCLUDE_DIRECTORIES(${CUDA_INCLUDE_DIRS})
47 | 
48 | LIST(APPEND CMAKE_CXX_FLAGS "${TF_CFLAGS} ${TF_LFLAGS} -O2 -D GOOGLE_CUDA=1 -std=c++11 -shared")
49 | if (CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
50 |     LIST(APPEND CMAKE_CXX_FLAGS " -lcudart -DNOMINMAX")
51 | endif ()
52 | 
53 | MESSAGE(STATUS "CMAKE_CXX_COMPILER_ID: " ${CMAKE_CXX_COMPILER_ID})
54 | MESSAGE(STATUS "CMAKE_CXX_FLAGS: " ${CMAKE_CXX_FLAGS})
55 | 
56 | ADD_LIBRARY(precise_roi_pooling SHARED src/kernels/precise_roi_pooling.h
57 |                                        src/kernels/precise_roi_pooling_kernels.cc
58 |                                        src/ops/precise_roi_pooling_ops.cc)
59 | TARGET_COMPILE_FEATURES(precise_roi_pooling PUBLIC cxx_std_11)
60 | SET_TARGET_PROPERTIES(precise_roi_pooling PROPERTIES
61 |     RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/"
62 |     LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/module/"
63 | )
64 | 
65 | ADD_LIBRARY(precise_roi_pooling_gpu SHARED IMPORTED)
66 | if (MSVC)
67 |     SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_IMPLIB ${GPU_LIB})
68 | elseif (UNIX)
69 |     SET_TARGET_PROPERTIES(precise_roi_pooling_gpu PROPERTIES IMPORTED_LOCATION ${GPU_LIB})
70 | endif()
71 | 
72 | ADD_LIBRARY(tensorflow_internal SHARED IMPORTED)
73 | if (MSVC)
74 |     SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES
75 |         IMPORTED_IMPLIB ${TF_LIB}/python/_pywrap_tensorflow_internal.lib)
76 | elseif (UNIX)
77 |     SET_TARGET_PROPERTIES(tensorflow_internal PROPERTIES
78 |         IMPORTED_LOCATION ${TF_LIB}/python/_pywrap_tensorflow_internal.so)
79 | endif()
80 | 
81 | TARGET_LINK_LIBRARIES(precise_roi_pooling tensorflow_internal
82 |                                           precise_roi_pooling_gpu
83 |                                           ${CUDA_LIBRARIES})
84 | 
85 | ADD_CUSTOM_TARGET(precise_roi_pooling_test ALL
86 |                   COMMAND ${CMAKE_COMMAND} -E env
87 |                   "PYTHONPATH=${CMAKE_CURRENT_SOURCE_DIR}/../"
88 |                   ${PYTHON_EXECUTABLE} tests/precise_roi_pooling_ops_test.py
89 |                   WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/../")
90 | 
91 | ADD_DEPENDENCIES(precise_roi_pooling_test precise_roi_pooling)
92 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/__init__.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : __init__.py
 4 | # Author : Kanghee Lee
 5 | # Email  : lerohiso@gmail.com
 6 | # Date   : 09/25/2020
 7 | #
 8 | # This file is part of PreciseRoIPooling.
 9 | # Distributed under terms of the MIT license.
10 | 
11 | from .precise_roi_pooling_ops import *
12 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/build_cuda.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : build_cuda.py
 4 | # Author : Kanghee Lee
 5 | # Email  : lerohiso@gmail.com
 6 | # Date   : 09/25/2020
 7 | #
 8 | # This file is part of PreciseRoIPooling.
 9 | # Distributed under terms of the MIT license.
10 | 
11 | import os
12 | import platform
13 | import shutil
14 | import subprocess
15 | 
16 | import tensorflow as tf
17 | 
18 | CUDA_SRCS = []
19 | CUDA_OUTPUT_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'build')
20 | 
21 | if not os.path.isdir(CUDA_OUTPUT_DIR):
22 |   os.makedirs(CUDA_OUTPUT_DIR)
23 | 
24 | for file in os.listdir(os.path.dirname(os.path.realpath(__file__))):
25 |   if file.endswith('.cu.cc'):
26 |     CUDA_SRCS.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), file))
27 | 
28 | CUDA_COMPILER = shutil.which('nvcc')
29 | if CUDA_COMPILER == None:
30 |   raise ValueError('CUDA Compiler Not Found')
31 | 
32 | TF_CFLAGS = ' '.join(tf.sysconfig.get_compile_flags())
33 | TF_LFLAGS = ' '.join(tf.sysconfig.get_link_flags())
34 | 
35 | CUDA_NVCC_FLAGS = TF_CFLAGS + ' ' + TF_LFLAGS + ' -D GOOGLE_CUDA=1 -x cu --expt-relaxed-constexpr'
36 | 
37 | os_type = platform.system()
38 | if os_type == 'Windows':
39 |   CUDA_NVCC_FLAGS += ' -Xcompiler -MD -cudart=shared -D_WINSOCKAPI_'
40 |   CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.lib'
41 | elif os_type == 'Linux':
42 |   CUDA_NVCC_FLAGS += ' -Xcompiler -fPIC -DNDEBUG'
43 |   CUDA_OUTPUT_FILENAME = 'precise_roi_pooling_cuda.so'
44 | 
45 | COMMAND = CUDA_COMPILER
46 | COMMAND += ' -c -o ' + os.path.join(CUDA_OUTPUT_DIR, CUDA_OUTPUT_FILENAME)
47 | COMMAND += ' ' + ' '.join(CUDA_SRCS)
48 | COMMAND += ' ' + CUDA_NVCC_FLAGS
49 | 
50 | process = subprocess.Popen(COMMAND, shell=True, stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
51 | process_output = process.communicate()[0]
52 | print(process_output.decode())
53 | 
54 | if process.returncode is not 0:
55 |   raise ValueError('Fail to CUDA Compile')
56 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/external/prroi_pooling_gpu_impl.cuh:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File   : prroi_pooling_gpu_impl.cuh
 3 |  * Author : Tete Xiao, Jiayuan Mao
 4 |  * Email  : jasonhsiao97@gmail.com
 5 |  *
 6 |  * Distributed under terms of the MIT license.
 7 |  * Copyright (c) 2017 Megvii Technology Limited.
 8 |  */
 9 | 
10 | #ifndef PRROI_POOLING_GPU_IMPL_CUH
11 | #define PRROI_POOLING_GPU_IMPL_CUH
12 | 
13 | #ifdef __cplusplus
14 | extern "C" {
15 | #endif
16 | 
17 | #define F_DEVPTR_IN const float *
18 | #define F_DEVPTR_OUT float *
19 | 
20 | void PrRoIPoolingForwardGpu(
21 |     cudaStream_t stream,
22 |     F_DEVPTR_IN bottom_data,
23 |     F_DEVPTR_IN bottom_rois,
24 |     F_DEVPTR_OUT top_data,
25 |     const int channels_, const int height_, const int width_,
26 |     const int pooled_height_, const int pooled_width_,
27 |     const float spatial_scale_,
28 |     const int top_count);
29 | 
30 | void PrRoIPoolingBackwardGpu(
31 |     cudaStream_t stream,
32 |     F_DEVPTR_IN bottom_data,
33 |     F_DEVPTR_IN bottom_rois,
34 |     F_DEVPTR_IN top_data,
35 |     F_DEVPTR_IN top_diff,
36 |     F_DEVPTR_OUT bottom_diff,
37 |     const int channels_, const int height_, const int width_,
38 |     const int pooled_height_, const int pooled_width_,
39 |     const float spatial_scale_,
40 |     const int top_count, const int bottom_count);
41 | 
42 | void PrRoIPoolingCoorBackwardGpu(
43 |     cudaStream_t stream,
44 |     F_DEVPTR_IN bottom_data,
45 |     F_DEVPTR_IN bottom_rois,
46 |     F_DEVPTR_IN top_data,
47 |     F_DEVPTR_IN top_diff,
48 |     F_DEVPTR_OUT bottom_diff,
49 |     const int channels_, const int height_, const int width_,
50 |     const int pooled_height_, const int pooled_width_,
51 |     const float spatial_scale_,
52 |     const int top_count, const int bottom_count);
53 | 
54 | #ifdef __cplusplus
55 | } /* !extern "C" */
56 | #endif
57 | 
58 | #endif /* !PRROI_POOLING_GPU_IMPL_CUH */
59 | 
60 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/kernels/precise_roi_pooling.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File   : precise_roi_pooling.h
 3 |  * Author : Kanghee Lee
 4 |  * Email  : lerohiso@gmail.com
 5 |  *
 6 |  * Distributed under terms of the MIT license.
 7 |  */
 8 | 
 9 | #ifndef KERNEL_PRECISE_ROI_POOLING_H_
10 | #define KERNEL_PRECISE_ROI_POOLING_H_
11 | 
12 | #include "tensorflow/core/framework/op_kernel.h"
13 | #include "tensorflow/core/util/tensor_format.h"
14 | 
15 | namespace tensorflow {
16 | 
17 | namespace functor {
18 | 
19 | template <typename Device, typename T>
20 | struct PreciseRoIPoolingFunctor {
21 |     Status operator()(OpKernelContext* context,
22 |                       const Tensor& features,
23 |                       const Tensor& rois,
24 |                       Tensor* pooled_features,
25 |                       int pooled_height,
26 |                       int pooled_width,
27 |                       float spatial_scale,
28 |                       TensorFormat data_format);
29 | };
30 | 
31 | template <typename Device, typename T>
32 | struct PreciseRoIPoolingGradFunctor {
33 |     Status operator()(OpKernelContext* context,
34 |                       const Tensor& features,
35 |                       const Tensor& rois,
36 |                       const Tensor& pooled_features,
37 |                       const Tensor& pooled_features_diff,
38 |                       Tensor* features_gradient,
39 |                       Tensor* rois_gradient,
40 |                       int pooled_height,
41 |                       int pooled_width,
42 |                       float spatial_scale,
43 |                       TensorFormat data_format);
44 | };
45 | 
46 | }  // namespace functor
47 | 
48 | }  // namespace tensorflow
49 | 
50 | #endif // KERNEL_PRECISE_ROI_POOLING_H_


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/prroi_pool/src/ops/precise_roi_pooling_ops.cc:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * File   : precise_roi_pooling_ops.cc
 3 |  * Author : Kanghee Lee
 4 |  * Email  : lerohiso@gmail.com
 5 |  *
 6 |  * Distributed under terms of the MIT license.
 7 |  */
 8 | 
 9 | #include "tensorflow/core/framework/op.h"
10 | #include "tensorflow/core/framework/shape_inference.h"
11 | 
12 | namespace tensorflow {
13 | 
14 | using ::tensorflow::shape_inference::InferenceContext;
15 | using ::tensorflow::shape_inference::ShapeHandle;
16 | 
17 | REGISTER_OP("PreciseRoIPooling")
18 |     .Input("features: T")
19 |     .Input("rois: T")
20 |     .Output("pooled_features: T")
21 |     .Attr("pooled_height: int")
22 |     .Attr("pooled_width: int")
23 |     .Attr("spatial_scale: float")
24 |     .Attr("data_format: {'NCHW'} = 'NCHW'")
25 |     .Attr("T: realnumbertype")
26 |     .SetShapeFn([](InferenceContext* c) {
27 |          ShapeHandle features, rois;
28 | 
29 |          TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features));
30 |          TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois));
31 | 
32 |          // get input shapes
33 |          int32 number_of_rois, number_of_channels;
34 |          number_of_rois = c->Value(c->Dim(rois, 0));
35 |          string data_format;
36 |          Status s = c->GetAttr("data_format", &data_format);
37 |          if (s.ok() && data_format == "NCHW") {
38 |               number_of_channels = c->Value(c->Dim(features, 1));
39 |          }
40 |          else {
41 |               number_of_channels = c->Value(c->Dim(features, 3));
42 |          }
43 | 
44 |          int32 pooled_height;
45 |          int32 pooled_width;
46 | 
47 |          TF_RETURN_IF_ERROR(c->GetAttr("pooled_height", &pooled_height));
48 |          TF_RETURN_IF_ERROR(c->GetAttr("pooled_width", &pooled_width));
49 | 
50 |          // Note, the output is always NCHW (even when input is NHWC)
51 |          c->set_output(0, c->MakeShape({number_of_rois, number_of_channels, pooled_height, pooled_width}));
52 |          return Status::OK();
53 |     })
54 |     .Doc(R"doc(PreciseRoIPooling op.)doc");
55 | 
56 | REGISTER_OP("PreciseRoIPoolingGrad")
57 |     .Input("features: T")
58 |     .Input("rois: T")
59 |     .Input("pooled_features: T")
60 |     .Input("pooled_features_diff: T")
61 |     .Output("features_gradient: T")
62 |     .Output("rois_gradient: T")
63 |     .Attr("pooled_height: int")
64 |     .Attr("pooled_width: int")
65 |     .Attr("spatial_scale: float")
66 |     .Attr("data_format: {'NCHW'} = 'NCHW'")
67 |     .Attr("T: realnumbertype")
68 |     .SetShapeFn([](InferenceContext* c) {
69 |          ShapeHandle features, rois;
70 |          TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &features));
71 |          TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 2, &rois));
72 |          c->set_output(0, features);
73 |          c->set_output(1, rois);
74 |          return Status::OK();
75 |     })
76 |     .Doc(R"doc(PreciseRoIPoolingGrad op.)doc");
77 | 
78 | }  // namespace tensorflow


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/tests/precise_roi_pooling_ops_test.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # File   : precise_roi_pooling_ops_test.py
 4 | # Author : Kanghee Lee
 5 | # Email  : lerohiso@gmail.com
 6 | # Date   : 09/25/2020
 7 | #
 8 | # This file is part of PreciseRoIPooling.
 9 | # Distributed under terms of the MIT license.
10 | 
11 | from __future__ import absolute_import
12 | from __future__ import division
13 | from __future__ import print_function
14 | 
15 | import os
16 | import numpy as np
17 | import tensorflow as tf
18 | 
19 | from tensorflow.python.framework import ops
20 | from tensorflow.python.platform import test
21 | from tensorflow.python.framework import test_util
22 | from prroi_pool import PreciseRoIPooling
23 | 
24 | 
25 | class PreciseRoIPoolingTest(test.TestCase):
26 |     @test_util.run_gpu_only
27 |     def test_forward(self):
28 |         with self.test_session():
29 |             with ops.device("/gpu:0"):
30 |                 pooled_width = 7
31 |                 pooled_height = 7
32 |                 spatial_scale = 0.5
33 |                 data_format = 'channels_first'
34 |                 pool = PreciseRoIPooling(pooled_height,
35 |                                          pooled_width,
36 |                                          spatial_scale=spatial_scale,
37 |                                          data_format=data_format)
38 |                 features = tf.random.uniform([4, 16, 24, 32], dtype=tf.float32)
39 |                 rois = tf.constant([[0, 0, 0, 14, 14], [1, 14, 14, 28, 28]], dtype=tf.float32)
40 |                 operation_outputs = pool([features, rois])
41 |                 real_outputs = tf.keras.layers.AveragePooling2D(data_format=data_format, strides=1)(features)
42 |                 real_outputs = tf.stack([real_outputs[0, :, :7, :7], real_outputs[1, :, 7:14, 7:14]], axis=0)
43 |                 self.assertAllClose(operation_outputs, real_outputs)
44 | 
45 |     @test_util.run_gpu_only
46 |     def test_backward(self):
47 |         with self.test_session():
48 |             with ops.device("/gpu:0"):
49 |                 pooled_width = 2
50 |                 pooled_height = 2
51 |                 spatial_scale = 0.5
52 |                 data_format = 'channels_first'
53 |                 base_directory = os.path.dirname(os.path.realpath(__file__))
54 | 
55 |                 # binaries from pytorch prroi_pool module
56 |                 features = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/features.npy'))
57 |                 rois = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/rois.npy'))
58 | 
59 |                 real_outputs = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/real_outputs.npy'))
60 |                 real_gradients0 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients0.npy'))
61 |                 real_gradients1 = np.load(os.path.join(base_directory, 'test_binaries/2_2_0.5/gradients1.npy'))
62 |                 features = tf.convert_to_tensor(features)
63 |                 rois = tf.convert_to_tensor(rois)
64 |                 with tf.GradientTape() as tape:
65 |                     tape.watch([features, rois])
66 |                     outputs = PreciseRoIPooling(pooled_height=pooled_height,
67 |                                                 pooled_width=pooled_width,
68 |                                                 spatial_scale=spatial_scale,
69 |                                                 data_format=data_format)([features, rois])
70 |                     loss = tf.reduce_sum(outputs)
71 | 
72 |                 gradients = tape.gradient(loss, [features, rois])
73 | 
74 |                 self.assertAllClose(outputs, real_outputs)
75 |                 self.assertAllClose(gradients[0], real_gradients0)
76 |                 self.assertAllClose(gradients[1], real_gradients1)
77 | 
78 | 
79 | if __name__ == '__main__':
80 |     test.main()
81 | 


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/features.npy


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients0.npy


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/gradients1.npy


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/real_outputs.npy


--------------------------------------------------------------------------------
/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OpenSpaceAI/UVLTrack/6ca34055c3447cd69b032eeb0f7cf6af6c9f3728/lib/utils/PreciseRoIPooling/tensorflow/tests/test_binaries/2_2_0.5/rois.npy


--------------------------------------------------------------------------------
/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .tensor import TensorDict, TensorList
2 | 


--------------------------------------------------------------------------------
/lib/utils/classification_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | from torch.nn import functional as F
 4 | 
 5 | 
 6 | class LBHinge(nn.Module):
 7 |     """Loss that uses a 'hinge' on the lower bound.
 8 |     This means that for samples with a label value smaller than the threshold, the loss is zero if the prediction is
 9 |     also smaller than that threshold.
10 |     args:
11 |         error_matric:  What base loss to use (MSE by default).
12 |         threshold:  Threshold to use for the hinge.
13 |         clip:  Clip the loss if it is above this value.
14 |     """
15 |     def __init__(self, error_metric=nn.MSELoss(), threshold=0.05, clip=None):
16 |         super().__init__()
17 |         self.error_metric = error_metric
18 |         self.threshold = threshold if threshold is not None else -100
19 |         self.clip = clip
20 | 
21 |     def forward(self, prediction, label):
22 |         negative_mask = (label < self.threshold).float()
23 |         positive_mask = (1.0 - negative_mask)
24 | 
25 |         prediction = negative_mask * F.relu(prediction) + positive_mask * prediction
26 | 
27 |         loss = self.error_metric(prediction, positive_mask * label)
28 | 
29 |         if self.clip is not None:
30 |             loss = torch.min(loss, torch.tensor([self.clip], device=loss.device))
31 |         return loss
32 | 


--------------------------------------------------------------------------------
/lib/utils/lmdb_utils.py:
--------------------------------------------------------------------------------
 1 | import lmdb
 2 | import numpy as np
 3 | import cv2
 4 | import json
 5 | 
 6 | LMDB_ENVS = dict()
 7 | LMDB_HANDLES = dict()
 8 | LMDB_FILELISTS = dict()
 9 | 
10 | 
11 | def get_lmdb_handle(name):
12 |     global LMDB_HANDLES, LMDB_FILELISTS
13 |     item = LMDB_HANDLES.get(name, None)
14 |     if item is None:
15 |         env = lmdb.open(name, readonly=True, lock=False, readahead=False, meminit=False)
16 |         LMDB_ENVS[name] = env
17 |         item = env.begin(write=False)
18 |         LMDB_HANDLES[name] = item
19 | 
20 |     return item
21 | 
22 | 
23 | def decode_img(lmdb_fname, key_name):
24 |     handle = get_lmdb_handle(lmdb_fname)
25 |     binfile = handle.get(key_name.encode())
26 |     if binfile is None:
27 |         print("Illegal data detected. %s %s" % (lmdb_fname, key_name))
28 |     s = np.frombuffer(binfile, np.uint8)
29 |     x = cv2.cvtColor(cv2.imdecode(s, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
30 |     return x
31 | 
32 | 
33 | def decode_str(lmdb_fname, key_name):
34 |     handle = get_lmdb_handle(lmdb_fname)
35 |     binfile = handle.get(key_name.encode())
36 |     string = binfile.decode()
37 |     return string
38 | 
39 | 
40 | def decode_json(lmdb_fname, key_name):
41 |     return json.loads(decode_str(lmdb_fname, key_name))
42 | 
43 | 
44 | if __name__ == "__main__":
45 |     lmdb_fname = "/data/sda/v-yanbi/iccv21/LittleBoy_clean/data/got10k_lmdb"
46 |     '''Decode image'''
47 |     # key_name = "test/GOT-10k_Test_000001/00000001.jpg"
48 |     # img = decode_img(lmdb_fname, key_name)
49 |     # cv2.imwrite("001.jpg", img)
50 |     '''Decode str'''
51 |     # key_name = "test/list.txt"
52 |     # key_name = "train/GOT-10k_Train_000001/groundtruth.txt"
53 |     key_name = "train/GOT-10k_Train_000001/absence.label"
54 |     str_ = decode_str(lmdb_fname, key_name)
55 |     print(str_)
56 | 


--------------------------------------------------------------------------------
/lib/utils/merge.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def merge_template_search(inp_list, return_search=False, return_template=False):
 5 |     """NOTICE: search region related features must be in the last place"""
 6 |     seq_dict = {"feat": torch.cat([x["feat"] for x in inp_list], dim=0),
 7 |                 "mask": torch.cat([x["mask"] for x in inp_list], dim=1),
 8 |                 "pos": torch.cat([x["pos"] for x in inp_list], dim=0)}
 9 |     if return_search:
10 |         x = inp_list[-1]
11 |         seq_dict.update({"feat_x": x["feat"], "mask_x": x["mask"], "pos_x": x["pos"]})
12 |     if return_template:
13 |         z = inp_list[0]
14 |         seq_dict.update({"feat_z": z["feat"], "mask_z": z["mask"], "pos_z": z["pos"]})
15 |     return seq_dict
16 | 
17 | 
18 | def get_qkv(inp_list):
19 |     """The 1st element of the inp_list is about the template,
20 |     the 2nd (the last) element is about the search region"""
21 |     dict_x = inp_list[-1]
22 |     dict_c = {"feat": torch.cat([x["feat"] for x in inp_list], dim=0),
23 |               "mask": torch.cat([x["mask"] for x in inp_list], dim=1),
24 |               "pos": torch.cat([x["pos"] for x in inp_list], dim=0)}  # concatenated dict
25 |     q = dict_x["feat"] + dict_x["pos"]
26 |     k = dict_c["feat"] + dict_c["pos"]
27 |     v = dict_c["feat"]
28 |     key_padding_mask = dict_c["mask"]
29 |     return q, k, v, key_padding_mask
30 | 


--------------------------------------------------------------------------------
/lib/utils/scheduler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from bisect import bisect_right
 3 | 
 4 | 
 5 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
 6 |     def __init__(
 7 |             self,
 8 |             optimizer,
 9 |             milestones,
10 |             gamma=0.1,
11 |             warmup_factor=0.01,
12 |             warmup_iters=20.,
13 |             warmup_method="linear",
14 |             last_epoch=-1,
15 |     ):
16 |         if not list(milestones) == sorted(milestones):
17 |             raise ValueError(
18 |                 "Milestones should be a list of" " increasing integers. Got {}",
19 |                 milestones,
20 |             )
21 | 
22 |         if warmup_method not in ("constant", "linear"):
23 |             raise ValueError(
24 |                 "Only 'constant' or 'linear' warmup_method accepted"
25 |                 "got {}".format(warmup_method)
26 |             )
27 |         self.milestones = milestones
28 |         self.gamma = gamma
29 |         self.warmup_factor = warmup_factor
30 |         self.warmup_iters = warmup_iters
31 |         self.warmup_method = warmup_method
32 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
33 | 
34 |     def get_lr(self):
35 |         warmup_factor = 1
36 |         if self.last_epoch < self.warmup_iters:
37 |             if self.warmup_method == "constant":
38 |                 warmup_factor = self.warmup_factor
39 |             elif self.warmup_method == "linear":
40 |                 # print(self.last_epoch)
41 |                 alpha = (self.last_epoch + 1) / self.warmup_iters
42 |                 # print(alpha)
43 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
44 |                 # print(warmup_factor)
45 |         return [
46 |             base_lr
47 |             * warmup_factor
48 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
49 |             for base_lr in self.base_lrs
50 |         ]
51 | 


--------------------------------------------------------------------------------
/scripts/demo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | 
 4 | script=$1
 5 | config=$2
 6 | input_video=$3 # <input video path>
 7 | output_video=$4 # <output video path>
 8 | language=${5:""} # <language description of target>
 9 | init_bbox=${6:""} # <initial bbox of target: x y w h>
10 | 
11 | python demo.py  --tracker_name $script  \
12 |                 --tracker_param $config  \
13 |                 --input_video $input_video \
14 |                 --output_video $output_video \
15 |                 --language $language \
16 |                 --init_bbox $init_bbox \


--------------------------------------------------------------------------------
/scripts/new_tracker.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | tracker=$1
 4 | template=${2:-'mae'}
 5 | 
 6 | cp -r experiments/${template} experiments/$tracker
 7 | cp -r lib/config/${template} lib/config/$tracker
 8 | cp -r lib/models/${template} lib/models/$tracker
 9 | mv lib/models/$tracker/${template}.py lib/models/$tracker/$tracker.py
10 | cp lib/train/actors/${template}.py lib/train/actors/$tracker.py
11 | 
12 | cp lib/test/parameter/${template}.py lib/test/parameter/$tracker.py
13 | cp lib/test/tracker/${template}.py lib/test/tracker/$tracker.py
14 | 
15 | echo "\n"from .$tracker import $tracker >> lib/models/__init__.py
16 | echo "\n"from .$tracker import '*' >> lib/train/actors/__init__.py
17 | 
18 | echo "The following file need to be modified: "
19 | echo "lib/models/$tracker/$tracker.py"
20 | echo "lib/train/actors/$tracker.py"
21 | echo "lib/test/parameter/$tracker.py"
22 | echo "lib/test/tracker/$tracker.py"


--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | script=${1:-'uvltrack'}
 4 | config=${2:-'baseline'}
 5 | dataset=${3:-'tnl2k'}
 6 | numgpu=${4:-2}
 7 | threads_per_gpu=${5:-8}
 8 | 
 9 | # CUDA_VISIBLE_DEVICES=2,3 \
10 | nohup \
11 | python tracking/test.py --tracker_name $script --tracker_param $config --dataset $dataset \
12 |                         --threads $((threads_per_gpu*numgpu)) --num_gpus $numgpu --debug 0 \
13 | > terminal_logs/test_$script'_'$config'_'$dataset.log 2>&1 &
14 | 
15 | echo log save to terminal_logs/test_$script'_'$config'_'$dataset.log


--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | script=${1:-'uvltrack'}
 4 | config=${2:-'baseline_base'}
 5 | numgpu=${3:-2}
 6 | gpuid=${4:-'0,1'}
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$gpuid \
 9 | nohup \
10 | python tracking/train.py --script $script \
11 |                         --config $config \
12 |                         --save_dir . \
13 |                         --mode multiple \
14 |                         --nproc_per_node $numgpu \
15 | > terminal_logs/train_$script'_'$config.log 2>&1 &
16 | 
17 | echo log save to terminal_logs/train_$script'_'$config.log


--------------------------------------------------------------------------------
/tracking/_init_paths.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import os.path as osp
 6 | import sys
 7 | 
 8 | 
 9 | def add_path(path):
10 |     if path not in sys.path:
11 |         sys.path.insert(0, path)
12 | 
13 | 
14 | this_dir = osp.dirname(__file__)
15 | 
16 | prj_path = osp.join(this_dir, '..')
17 | add_path(prj_path)
18 | 


--------------------------------------------------------------------------------
/tracking/analysis_results.py:
--------------------------------------------------------------------------------
 1 | import _init_paths
 2 | import argparse
 3 | import os
 4 | import matplotlib.pyplot as plt
 5 | plt.rcParams['figure.figsize'] = [8, 8]
 6 | 
 7 | from lib.test.analysis.plot_results import plot_results, print_results, print_per_sequence_results
 8 | from lib.test.evaluation import get_dataset, trackerlist
 9 | from lib.test.evaluation.environment import env_settings
10 | import glob
11 | 
12 | parser = argparse.ArgumentParser(description='Run tracker on sequence or dataset.')
13 | parser.add_argument('--tracker_name', type=str, help='Name of tracking method.')
14 | parser.add_argument('--tracker_param', type=str, help='Name of config file.')
15 | parser.add_argument('--dataset_name', type=str, help='Name of config file.')
16 | parser.add_argument('--save_file', type=str, default=None)
17 | 
18 | args = parser.parse_args()
19 | 
20 | def check_complete(path):
21 |     file_num = {
22 |         'nfs': 200,
23 |         'uav': 246,
24 |         'lasotext': 300,
25 |         'lasot': 560,
26 |         'trackingnet': 1022,
27 |         'tnl2k': 1400,
28 |         'otb99': 96,
29 |         'itb': 360,
30 |         'avist': 240,
31 |     }
32 |     num_file = len(glob.glob(os.path.join(path, args.dataset_name, '*.txt')))
33 |     for name, num in file_num.items():
34 |         if name in args.dataset_name:
35 |             if num_file == file_num[name]:
36 |                 return True
37 |             else:
38 |                 return False
39 |     raise ValueError("no such dataset")
40 |             
41 | env = env_settings()
42 | trackers = []
43 | tracker_params = [path.split('/')[-1] for path in sorted(glob.glob(os.path.join(env.results_path, args.tracker_name, args.tracker_param)), reverse=True) if check_complete(path)]
44 | trackers.extend(trackerlist(name=args.tracker_name, parameter_name=args.tracker_param, dataset_name=args.dataset_name,
45 |                             run_ids=None, display_name=args.tracker_name))
46 | 
47 | dataset = get_dataset(args.dataset_name)
48 | print_results(trackers, dataset, report_name=args.dataset_name, merge_results=True, force_evaluation=True, plot_types=('success', 'prec', 'norm_prec'), save_file=args.save_file)
49 | 


--------------------------------------------------------------------------------
/tracking/create_default_local_file.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import _init_paths
 4 | from lib.train.admin import create_default_local_file_ITP_train
 5 | from lib.test.evaluation import create_default_local_file_ITP_test
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser(description='Create default local file on ITP or PAI')
10 |     parser.add_argument("--workspace_dir", type=str, required=True)  # workspace dir
11 |     parser.add_argument("--data_dir", type=str, required=True)
12 |     parser.add_argument("--save_dir", type=str, required=True)
13 |     args = parser.parse_args()
14 |     return args
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     args = parse_args()
19 |     workspace_dir = os.path.realpath(args.workspace_dir)
20 |     data_dir = os.path.realpath(args.data_dir)
21 |     save_dir = os.path.realpath(args.save_dir)
22 |     create_default_local_file_ITP_train(workspace_dir, data_dir)
23 |     create_default_local_file_ITP_test(workspace_dir, data_dir, save_dir)
24 | 


--------------------------------------------------------------------------------
/tracking/pre_read_datasets.py:
--------------------------------------------------------------------------------
 1 | import _init_paths
 2 | import multiprocessing as mp
 3 | import argparse
 4 | import os
 5 | from lib.utils.lmdb_utils import decode_str
 6 | import time
 7 | import json
 8 | 
 9 | 
10 | def parse_args():
11 |     """
12 |     args for training.
13 |     """
14 |     parser = argparse.ArgumentParser(description='Parse args for training')
15 |     parser.add_argument('--data_dir', type=str, help='directory where lmdb data is located')
16 |     parser.add_argument('--dataset_str', type=str, help="which datasets to use")
17 |     args = parser.parse_args()
18 | 
19 |     return args
20 | 
21 | 
22 | def get_trknet_dict(trknet_dir):
23 |     with open(os.path.join(trknet_dir, "seq_list.json"), "r") as f:
24 |         seq_list = json.loads(f.read())
25 |     res_dict = {}
26 |     set_idx_pre = -1
27 |     for set_idx, seq_name in seq_list:
28 |         if set_idx != set_idx_pre:
29 |             res_dict[set_idx] = "anno/%s.txt" % seq_name
30 |             set_idx_pre = set_idx
31 |     return res_dict
32 | 
33 | 
34 | def target(lmdb_dir, key_name):
35 |     _ = decode_str(lmdb_dir, key_name)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     args = parse_args()
40 |     data_dir = args.data_dir
41 |     dataset_str = args.dataset_str
42 |     key_dict = {"got10k_lmdb": "train/list.txt",
43 |                 "lasot_lmdb": "LaSOTBenchmark.json",
44 |                 "coco_lmdb": "annotations/instances_train2017.json",
45 |                 "vid_lmdb": "cache.json"}
46 |     print("Ready to pre load datasets")
47 |     start = time.time()
48 |     ps = []
49 |     datasets = []
50 |     if 'g' in dataset_str:
51 |         datasets.append("got10k_lmdb")
52 |     if 'l' in dataset_str:
53 |         datasets.append("lasot_lmdb")
54 |     if 'c' in dataset_str:
55 |         datasets.append("coco_lmdb")
56 |     if 'v' in dataset_str:
57 |         datasets.append("vid_lmdb")
58 |     for dataset in datasets:
59 |         lmdb_dir = os.path.join(data_dir, dataset)
60 |         p = mp.Process(target=target, args=(lmdb_dir, key_dict[dataset]))
61 |         print("add %s %s to job queue" % (lmdb_dir, key_dict[dataset]))
62 |         ps.append(p)
63 |     # deal with trackingnet
64 |     if 't' in dataset_str:
65 |         trknet_dict = get_trknet_dict(os.path.join(data_dir, "trackingnet_lmdb"))
66 |         for set_idx, seq_path in trknet_dict.items():
67 |             lmdb_dir = os.path.join(data_dir, "trackingnet_lmdb", "TRAIN_%d_lmdb" % set_idx)
68 |             p = mp.Process(target=target, args=(lmdb_dir, seq_path))
69 |             print("add %s %s to job queue" % (lmdb_dir, seq_path))
70 |             ps.append(p)
71 |     for p in ps:
72 |         p.start()
73 |     for p in ps:
74 |         p.join()
75 | 
76 |     print("Pre read over")
77 |     end = time.time()
78 |     hour = (end - start) / 3600
79 |     print("it takes %.2f hours to pre-read data" % hour)
80 | 


--------------------------------------------------------------------------------
/tracking/profile_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | prj_path = os.path.join(os.path.dirname(__file__), '..')
 5 | if prj_path not in sys.path:
 6 |     sys.path.append(prj_path)
 7 | 
 8 | import argparse
 9 | import torch
10 | from lib.utils.misc import NestedTensor
11 | from thop import profile
12 | from thop.utils import clever_format
13 | import time
14 | import importlib
15 | 
16 | 
17 | def parse_args():
18 |     """
19 |     args for training.
20 |     """
21 |     parser = argparse.ArgumentParser(description='Parse args for training')
22 |     # for train
23 |     parser.add_argument('--script', type=str, default='uvltrack', choices=['uvltrack'],
24 |                         help='training script name')
25 |     parser.add_argument('--config', type=str, default='baseline_base', help='yaml configure file name')
26 |     args = parser.parse_args()
27 | 
28 |     return args
29 | 
30 | def evaluate_speed(model, template, search, text, prompt, flag):
31 |     '''Speed Test'''
32 |     T_w = 500
33 |     T_t = 1000
34 |     print("testing speed ...")
35 |     torch.cuda.synchronize()
36 |     with torch.no_grad():
37 |         # overall
38 |         for i in range(T_w):
39 |             _ = model.forward_test(template, search, text, prompt, flag)
40 |         start = time.time()
41 |         for i in range(T_t):
42 |             _ = model.forward_test(template, search, text, prompt, flag)
43 |         torch.cuda.synchronize()
44 |         end = time.time()
45 |         avg_lat = (end - start) / T_t
46 |         print("The average overall latency is %.2f ms" % (avg_lat * 1000))
47 |         print("FPS is %.2f fps" % (1. / avg_lat))
48 | 
49 | if __name__ == "__main__":
50 |     device = "cuda:0"
51 |     torch.cuda.set_device(device)
52 |     args = parse_args()
53 |     '''update cfg'''
54 |     yaml_fname = 'experiments/%s/%s.yaml' % (args.script, args.config)
55 |     config_module = importlib.import_module('lib.config.%s.config' % args.script)
56 |     cfg = config_module.cfg
57 |     config_module.update_config_from_file(yaml_fname)
58 |     '''set some values'''
59 |     bs = 1
60 |     z_sz = cfg.TEST.TEMPLATE_SIZE
61 |     x_sz = cfg.TEST.SEARCH_SIZE
62 |     x_sz = cfg.TEST.SEARCH_SIZE
63 |     dim = cfg.MODEL.HIDDEN_DIM
64 | 
65 |     if args.script == "uvltrack":
66 |         model_module = importlib.import_module('lib.models')
67 |         model_constructor = model_module.uvltrack.build_model
68 |         model = model_constructor(cfg)
69 |         # get the template and search
70 |         template = torch.randn(bs, 3, z_sz, z_sz)
71 |         search = torch.randn(bs, 3, x_sz, x_sz)
72 |         text = NestedTensor(torch.ones(bs, 40).long(), torch.randn(bs, 40)>0.5)
73 |         prompt = torch.randn(bs, 3, dim)
74 |         flag = torch.ones(bs).long()
75 |         # transfer to device
76 |         model = model.to(device)
77 |         template = template.to(device)
78 |         search = search.to(device)
79 |         text = text.to(device)
80 |         prompt = prompt.to(device)
81 |         flag = flag.to(device)
82 |         evaluate_speed(model, template, search, text, prompt, flag)
83 | 
84 |     else:
85 |         raise NotImplementedError
86 | 


--------------------------------------------------------------------------------
/tracking/test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import argparse
 4 | import importlib
 5 | 
 6 | prj_path = os.path.join(os.path.dirname(__file__), '..')
 7 | if prj_path not in sys.path:
 8 |     sys.path.append(prj_path)
 9 | 
10 | from lib.test.evaluation import get_dataset
11 | from lib.test.evaluation.running import run_dataset
12 | from lib.test.evaluation.tracker import Tracker
13 | 
14 | import warnings
15 | warnings.filterwarnings('ignore')
16 | 
17 | def run_tracker(tracker_name, tracker_param, run_id=None, dataset_name='otb', sequence=None, debug=0, threads=0,
18 |                 num_gpus=8):
19 |     """Run tracker on sequence or dataset.
20 |     args:
21 |         tracker_name: Name of tracking method.
22 |         tracker_param: Name of parameter file.
23 |         run_id: The run id.
24 |         dataset_name: Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot).
25 |         sequence: Sequence number or name.
26 |         debug: Debug level.
27 |         threads: Number of threads.
28 |     """
29 | 
30 |     dataset = get_dataset(dataset_name)
31 | 
32 |     if sequence is not None:
33 |         dataset = [dataset[sequence]]
34 | 
35 |     trackers = [Tracker(tracker_name, tracker_param, dataset_name, run_id)]
36 |     run_dataset(dataset, trackers, debug, threads, num_gpus=num_gpus)
37 | 
38 | 
39 | def main():
40 |     parser = argparse.ArgumentParser(description='Run tracker on sequence or dataset.')
41 |     parser.add_argument('--tracker_name', default="mvit", type=str, help='Name of tracking method.')
42 |     parser.add_argument('--tracker_param', default="baseline_256_4", type=str, help='Name of config file.')
43 |     parser.add_argument('--runid', type=int, default=None, help='The run id.')
44 |     parser.add_argument('--dataset_name', type=str, default='otb99', help='Name of dataset (otb, nfs, uav, tpl, vot, tn, gott, gotv, lasot).')
45 |     parser.add_argument('--sequence', type=str, default=None, help='Sequence number or name.')
46 |     parser.add_argument('--debug', type=int, default=1, help='Debug level.')
47 |     parser.add_argument('--threads', type=int, default=0, help='Number of threads.')
48 |     parser.add_argument('--num_gpus', type=int, default=8)
49 | 
50 |     parser.add_argument('--params__model', type=str, default=None, help="Tracking model path.")
51 |     parser.add_argument('--params__update_interval', type=int, default=None, help="Update interval of online tracking.")
52 |     parser.add_argument('--params__online_sizes', type=int, default=None)
53 |     parser.add_argument('--params__search_area_scale', type=float, default=None)
54 |     parser.add_argument('--params__max_score_decay', type=float, default=1.0)
55 |     parser.add_argument('--params__vis_attn', type=int, choices=[0, 1], default=0, help="Whether visualize the attention maps.")
56 | 
57 |     args = parser.parse_args()
58 | 
59 |     try:
60 |         seq_name = int(args.sequence)
61 |     except:
62 |         seq_name = args.sequence
63 | 
64 |     run_tracker(args.tracker_name, args.tracker_param, args.runid, args.dataset_name, seq_name, args.debug,
65 |                 args.threads, num_gpus=args.num_gpus)
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     main()
70 | 


--------------------------------------------------------------------------------
/tracking/train.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import argparse
 4 | 
 5 | import warnings
 6 | warnings.filterwarnings('ignore')
 7 | 
 8 | 
 9 | def parse_args():
10 |     """
11 |     args for training.
12 |     """
13 |     parser = argparse.ArgumentParser(description='Parse args for training')
14 |     # for train
15 |     parser.add_argument('--script', type=str, help='training script name')
16 |     parser.add_argument('--config', type=str, default='baseline', help='yaml configure file name')
17 |     parser.add_argument('--stage1_model', type=str, default=None, help='stage1 model used to train SPM.')
18 |     parser.add_argument('--save_dir', type=str, help='root directory to save checkpoints, logs, and tensorboard')
19 |     parser.add_argument('--mode', type=str, choices=["single", "multiple"], default="multiple",
20 |                         help="train on single gpu or multiple gpus")
21 |     parser.add_argument('--nproc_per_node', type=int, help="number of GPUs per node")  # specify when mode is multiple
22 |     parser.add_argument('--master_port', type=int, help="master port", default=26500)
23 |     parser.add_argument('--use_lmdb', type=int, choices=[0, 1], default=0)  # whether datasets are in lmdb format
24 |     parser.add_argument('--script_prv', type=str, help='training script name')
25 |     parser.add_argument('--config_prv', type=str, default='baseline', help='yaml configure file name')
26 |     # for knowledge distillation
27 |     parser.add_argument('--distill', type=int, choices=[0, 1], default=0)  # whether to use knowledge distillation
28 |     parser.add_argument('--script_teacher', type=str, help='teacher script name')
29 |     parser.add_argument('--config_teacher', type=str, help='teacher yaml configure file name')
30 | 
31 |     args = parser.parse_args()
32 | 
33 |     return args
34 | 
35 | 
36 | def main():
37 |     args = parse_args()
38 |     if args.mode == "single":
39 |         train_cmd = "python lib/train/run_training.py --script %s --config %s --save_dir %s --use_lmdb %d " \
40 |                     "--script_prv %s --config_prv %s --distill %d --script_teacher %s --config_teacher %s --stage1_model %s" \
41 |                     % (args.script, args.config, args.save_dir, args.use_lmdb, args.script_prv, args.config_prv,
42 |                        args.distill, args.script_teacher, args.config_teacher, args.stage1_model)
43 |     elif args.mode == "multiple":
44 |         port = int(time.time()*1000)%20000+10000
45 |         train_cmd = f"OMP_NUM_THREADS=1 torchrun --standalone --nnodes=1 --nproc_per_node %d lib/train/run_training.py " \
46 |                     "--script %s --config %s --save_dir %s --use_lmdb %d --script_prv %s --config_prv %s  " \
47 |                     "--distill %d --script_teacher %s --config_teacher %s --stage1_model %s" \
48 |                     % (args.nproc_per_node, args.script, args.config, args.save_dir, args.use_lmdb, args.script_prv,
49 |                        args.config_prv, args.distill, args.script_teacher, args.config_teacher, args.stage1_model)
50 |     else:
51 |         raise ValueError("mode should be 'single' or 'multiple'.")
52 |     os.system(train_cmd)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------