├── README.md
├── experiments
    └── config.yaml
├── images
    └── workflow.jpg
├── pretrained_models
    └── alexnet-bn.pth
├── pysot
    ├── __pycache__
    │   ├── __init__.cpython-36.pyc
    │   ├── __init__.cpython-37.pyc
    │   ├── __init__.cpython-38.pyc
    │   ├── config.cpython-36.pyc
    │   ├── config.cpython-37.pyc
    │   └── config.cpython-38.pyc
    ├── core
    │   ├── __pycache__
    │   │   └── config.cpython-38.pyc
    │   └── config.py
    ├── datasets
    │   ├── __pycache__
    │   │   ├── anchortarget.cpython-38.pyc
    │   │   ├── augmentation.cpython-38.pyc
    │   │   └── dataset.cpython-38.pyc
    │   ├── anchortarget.py
    │   ├── augmentation.py
    │   └── dataset.py
    ├── models
    │   ├── __pycache__
    │   │   ├── loss.cpython-38.pyc
    │   │   └── model_builder.cpython-38.pyc
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-36.pyc
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── __init__.cpython-38.pyc
    │   │   │   ├── alexnet.cpython-36.pyc
    │   │   │   ├── alexnet.cpython-37.pyc
    │   │   │   ├── alexnet.cpython-38.pyc
    │   │   │   ├── mobile_v2.cpython-36.pyc
    │   │   │   ├── mobile_v2.cpython-37.pyc
    │   │   │   ├── mobile_v2.cpython-38.pyc
    │   │   │   ├── newalexnet.cpython-36.pyc
    │   │   │   ├── newalexnet.cpython-37.pyc
    │   │   │   ├── newalexnet.cpython-38.pyc
    │   │   │   ├── resnet_atrous.cpython-36.pyc
    │   │   │   ├── resnet_atrous.cpython-37.pyc
    │   │   │   └── resnet_atrous.cpython-38.pyc
    │   │   ├── alexnet.py
    │   │   ├── mobile_v2.py
    │   │   ├── newalexnet.py
    │   │   └── resnet_atrous.py
    │   ├── init_weight.py
    │   ├── loss.py
    │   ├── model_builder.py
    │   └── utile
    │   │   ├── __pycache__
    │   │       ├── tran.cpython-38.pyc
    │   │       └── utile.cpython-38.pyc
    │   │   ├── tran.py
    │   │   └── utile.py
    ├── tracker
    │   ├── __pycache__
    │   │   ├── base_tracker.cpython-38.pyc
    │   │   └── siamapn_tracker.cpython-38.pyc
    │   ├── base_tracker.py
    │   └── siamapn_tracker.py
    └── utils
    │   ├── __pycache__
    │       ├── average_meter.cpython-38.pyc
    │       ├── bbox.cpython-38.pyc
    │       ├── distributed.cpython-38.pyc
    │       ├── log_helper.cpython-38.pyc
    │       ├── lr_scheduler.cpython-38.pyc
    │       ├── misc.cpython-38.pyc
    │       └── model_load.cpython-38.pyc
    │   ├── average_meter.py
    │   ├── bbox.py
    │   ├── distributed.py
    │   ├── location_grid.py
    │   ├── log_helper.py
    │   ├── lr_scheduler.py
    │   ├── misc.py
    │   ├── model_load.py
    │   └── xcorr.py
├── requirement.txt
├── toolkit
    ├── datasets
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── dataset.cpython-36.pyc
    │   │   ├── dataset.cpython-38.pyc
    │   │   ├── dtb.cpython-36.pyc
    │   │   ├── dtb.cpython-38.pyc
    │   │   ├── dtb701.cpython-38.pyc
    │   │   ├── testreal.cpython-38.pyc
    │   │   ├── uav.cpython-36.pyc
    │   │   ├── uav.cpython-38.pyc
    │   │   ├── uav10fps.cpython-36.pyc
    │   │   ├── uav10fps.cpython-38.pyc
    │   │   ├── uav1231.cpython-36.pyc
    │   │   ├── uav1231.cpython-38.pyc
    │   │   ├── uav20l.cpython-36.pyc
    │   │   ├── uav20l.cpython-38.pyc
    │   │   ├── uavdt.cpython-36.pyc
    │   │   ├── uavdt.cpython-38.pyc
    │   │   ├── uavtrack112_l.cpython-36.pyc
    │   │   ├── uavtrack112_l.cpython-38.pyc
    │   │   ├── v4r.cpython-36.pyc
    │   │   ├── v4r.cpython-38.pyc
    │   │   ├── video.cpython-36.pyc
    │   │   ├── video.cpython-38.pyc
    │   │   ├── visdrone1.cpython-36.pyc
    │   │   └── visdrone1.cpython-38.pyc
    │   ├── dataset.py
    │   ├── dtb.py
    │   ├── uav.py
    │   ├── uav10fps.py
    │   ├── uav20l.py
    │   ├── uavdt.py
    │   ├── uavtrack112_l.py
    │   ├── v4r.py
    │   └── video.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-38.pyc
    │   │   └── ope_benchmark.cpython-38.pyc
    │   └── ope_benchmark.py
    ├── utils
    │   ├── __pycache__
    │   │   └── statistics.cpython-38.pyc
    │   ├── region.pyx
    │   └── statistics.py
    └── visualization
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-38.pyc
    │       ├── draw_success_precision.cpython-38.pyc
    │       └── draw_utils.cpython-38.pyc
    │   ├── draw_success_precision.py
    │   └── draw_utils.py
└── tools
    ├── demo.py
    ├── eval.py
    ├── test.py
    └── train.py


/README.md:
--------------------------------------------------------------------------------
 1 | # End-to-End Feature Decontaminated Network for UAV Tracking
 2 | ### Haobo Zuo, Changhong Fu, Sihang Li, Junjie Ye, and Guangze Zheng
 3 | ## Abstract
 4 | Object feature pollution is one of
 5 | the burning issues in UAV tracking, which is commonly caused by occlusion, fast motion, and illumination variation. Due to the contaminated information in the polluted object features, most trackers fail to precisely estimate the location and scale of the object. To address the feature pollution issue, this work proposes an efficient and effective adaptive feature resampling tracker, *i.e.*, AFRT. AFRT mainly includes two stages: an adaptive downsampling network which can reduce the interference information of the feature pollution and a super-resolution upsampling network, applying Transformer to restore the object scale information. Specifically, the adaptive downsampling network strengthens the expression of the object location information, with a feature enhancement downsampling (FED) module. In order to achieve better training effect, a novel pooling distance loss function is designed to help FED module focus on the critical regions with the object information. Thereby, the features downsampled can be validly exploited to determine the location of the object. Subsequently, the super-resolution upsampling network raises the scale information in the features with a low-to-high (LTH) Transformer encoder. Exhaustive experiments on three well-known benchmarks validate the effectiveness of AFRT, especially on the sequences with feature pollution. In addition, real-world tests show the efficiency of AFRT with 31.4 frames per second. 
 6 | The code and demo videos are available at: https://github.com/vision4robotics/FDNT. 
 7 | 
 8 | ![Workflow of our tracker](https://github.com/vision4robotics/ResamplingNet/blob/main/images/workflow.jpg)
 9 | ## About Code
10 | ### 1. Environment setup
11 | This code has been tested on Ubuntu 18.04, Python 3.8.3, Pytorch 0.7.0/1.6.0, CUDA 10.2. Please install related libraries before running this code:
12 | 
13 |       pip install -r requirements.txt
14 | ### 2. Test
15 | Download pretrained model: [AFRTmodel](https://pan.baidu.com/s/1xXs60LeQehvCwKJo1zwzrg)(code: huat) and put it into `tools/snapshot` directory.
16 | 
17 | Download testing datasets and put them into `test_dataset` directory. If you want to test the tracker on a new dataset, please refer to [pysot-toolkit](https://github.com/StrangerZhang/pysot-toolkit.git) to set test_dataset.
18 | 
19 |        python test.py 
20 | 	        --dataset UAV123                #dataset_name
21 | 	        --snapshot snapshot/AFRTmodel.pth  # tracker_name
22 | 	
23 | The testing result will be saved in the `results/dataset_name/tracker_name` directory.
24 | ### 3. Train
25 | #### Prepare training datasets
26 | 
27 | Download the datasets：
28 | 
29 | [VID](https://image-net.org/challenges/LSVRC/2017/)
30 |  
31 | [COCO](https://cocodataset.org/#home)
32 | 
33 | [GOT-10K](http://got-10k.aitestunion.com/downloads)
34 | 
35 | [LaSOT](http://vision.cs.stonybrook.edu/~lasot/)
36 | 
37 | #### Train a model
38 | 
39 | To train the AFRT model, run `train.py` with the desired configs:
40 | 
41 |        cd tools
42 |        python train.py
43 | 
44 | ### 4. Evaluation
45 | We provide the tracking [results](https://pan.baidu.com/s/1d8P3O9V3I6jqDqgG2LG5Ng)(code: 6q8m) of UAV123@10fps, UAV123, and UAVTrack112_L. If you want to evaluate the tracker, please put those results into `results` directory.
46 | 
47 |         python eval.py 	                          \
48 | 	         --tracker_path ./results          \ # result path
49 | 	         --dataset UAV123                  \ # dataset_name
50 | 	         --tracker_prefix 'AFRTmodel'   # tracker_name
51 | ### 5. Contact
52 | If you have any questions, please contact me.
53 | 
54 | Haobo Zuo
55 | 
56 | Email: <1951684@tongji.edu.cn>
57 | ## Demo Video
58 | [![Watch the video](https://i.ytimg.com/vi/_FtC6ZmSo3s/maxresdefault.jpg)](https://youtu.be/_FtC6ZmSo3s)
59 | ## Acknowledgement
60 | The code is implemented based on [pysot](https://github.com/STVIR/pysot.git). We would like to express our sincere thanks to the contributors.
61 | 


--------------------------------------------------------------------------------
/experiments/config.yaml:
--------------------------------------------------------------------------------
 1 | META_ARC: "HiFT_alexnet"
 2 | 
 3 | BACKBONE:
 4 |     TYPE: "alexnet"
 5 |     PRETRAINED:  'alexnet-bn.pth'         
 6 |     TRAIN_LAYERS: ['layer3','layer4','layer5'] 
 7 |     TRAIN_EPOCH: 10 
 8 |     LAYERS_LR: 0.1
 9 | 
10 | TRACK:
11 |     TYPE: 'HiFTtracker'
12 |     EXEMPLAR_SIZE: 127 #175 #143
13 |     INSTANCE_SIZE: 287 #399 #495 #335 #303
14 |     CONTEXT_AMOUNT: 0.5 
15 |     STRIDE: 8
16 |     PENALTY_K: 0.0789 #0.0789
17 |     WINDOW_INFLUENCE: 0.437 #0.437 #0.4373684210526316
18 |     LR: 0.317 #0.317#0.31666666666666665
19 | 
20 |     w2: 1.02 #0.97 #1.0
21 |     w3: 1.0
22 | 
23 | TRAIN:
24 |     EPOCH: 100 
25 |     START_EPOCH: 0  
26 |     BATCH_SIZE: 128
27 |     NUM_GPU: 2
28 |     BASE_LR: 0.005
29 |     RESUME:  ''
30 |     WEIGHT_DECAY : 0.0001 
31 |     PRETRAINED: 0
32 |     OUTPUT_SIZE: 11
33 |     NUM_WORKERS: 12
34 |     LOC_WEIGHT: 3.0
35 |     CLS_WEIGHT: 1.2
36 |     PD_WEIGHT: 0.1
37 |     w2: 1.0
38 |     w3: 0.6
39 |     w4: 1.0
40 |     w5: 1.0
41 |     w6: 0.4
42 |     w7: 0.4
43 |     POS_NUM : 16
44 |     TOTAL_NUM : 64
45 |     NEG_NUM : 16
46 |     LARGER: 1.0 
47 |     range : 1.0 
48 |     LR:
49 |         TYPE: 'log'
50 |         KWARGS:
51 |             start_lr: 0.01 
52 |             end_lr: 0.0001
53 | 
54 |     LR_WARMUP:
55 |         TYPE: 'step'
56 |         EPOCH: 5 
57 |         KWARGS:
58 |             start_lr: 0.005
59 |             end_lr: 0.01
60 |             step: 1
61 | 
62 | DATASET:
63 |     NAMES: 
64 |      - 'VID'   
65 |      - 'COCO'
66 |      - 'GOT'
67 |      - 'LaSOT'  
68 |    # - 'YOUTUBEBB'  
69 | 
70 | 
71 |     TEMPLATE:
72 |         SHIFT: 4 
73 |         SCALE: 0.05 
74 |         BLUR: 0.0
75 |         FLIP: 0.0 
76 |         COLOR: 1.0
77 | 
78 |     SEARCH:
79 |         SHIFT: 64 
80 |         SCALE: 0.18 
81 |         BLUR: 0.2
82 |         FLIP: 0.0
83 |         COLOR: 1.0
84 | 
85 |     NEG: 0.05
86 |     GRAY: 0.0
87 | 


--------------------------------------------------------------------------------
/images/workflow.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/images/workflow.jpg


--------------------------------------------------------------------------------
/pretrained_models/alexnet-bn.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pretrained_models/alexnet-bn.pth


--------------------------------------------------------------------------------
/pysot/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/pysot/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/pysot/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/__pycache__/config.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/__pycache__/config.cpython-36.pyc


--------------------------------------------------------------------------------
/pysot/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/__pycache__/config.cpython-37.pyc


--------------------------------------------------------------------------------
/pysot/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/core/__pycache__/config.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/core/__pycache__/config.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/core/config.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | from yacs.config import CfgNode as CN
  9 | 
 10 | __C = CN()
 11 | 
 12 | cfg = __C
 13 | 
 14 | __C.META_ARC = "HiFT_alexnet"
 15 | 
 16 | __C.CUDA = True
 17 | 
 18 | # ------------------------------------------------------------------------ #
 19 | # Training options
 20 | # ------------------------------------------------------------------------ #
 21 | __C.TRAIN = CN()
 22 | 
 23 | 
 24 | __C.TRAIN.THR_HIGH = 0.6
 25 | 
 26 | __C.TRAIN.apnchannel = 256
 27 | 
 28 | __C.TRAIN.clsandlocchannel = 256
 29 | 
 30 | __C.TRAIN.groupchannel = 32
 31 | 
 32 | __C.TRAIN.THR_LOW = 0.3
 33 | 
 34 | __C.TRAIN.NEG_NUM = 16
 35 | 
 36 | __C.TRAIN.POS_NUM = 16
 37 | 
 38 | __C.TRAIN.TOTAL_NUM = 64
 39 | 
 40 | __C.TRAIN.PR = 1
 41 | 
 42 | __C.TRAIN.CLS_WEIGHT = 1.0
 43 | 
 44 | __C.TRAIN.LOC_WEIGHT = 3.0
 45 | 
 46 | __C.TRAIN.SHAPE_WEIGHT =2.0
 47 | 
 48 | __C.TRAIN.PD_WEIGHT = 0.1
 49 | 
 50 | __C.TRAIN.EXEMPLAR_SIZE = 127 
 51 | 
 52 | __C.TRAIN.SEARCH_SIZE = 287 
 53 | __C.TRAIN.BASE_SIZE = 8
 54 | 
 55 | __C.TRAIN.OUTPUT_SIZE = 21 
 56 | 
 57 | __C.TRAIN.RESUME = ''
 58 | 
 59 | __C.TRAIN.PRETRAINED = 1
 60 | 
 61 | __C.TRAIN.LARGER=2.0
 62 | 
 63 | __C.TRAIN.LOG_DIR = './logs'
 64 | 
 65 | __C.TRAIN.SNAPSHOT_DIR = './snapshot'
 66 | 
 67 | __C.TRAIN.EPOCH = 30
 68 | 
 69 | __C.TRAIN.START_EPOCH = 0
 70 | 
 71 | __C.TRAIN.BATCH_SIZE = 100
 72 | 
 73 | __C.TRAIN.NUM_GPU = 2
 74 | 
 75 | __C.TRAIN.NUM_WORKERS = 1
 76 | 
 77 | __C.TRAIN.MOMENTUM = 0.9
 78 | 
 79 | __C.TRAIN.WEIGHT_DECAY = 0.0001
 80 | 
 81 | __C.TRAIN.w1=1.0
 82 | 
 83 | __C.TRAIN.R1=1.5
 84 | 
 85 | __C.TRAIN.R2=1.2
 86 | 
 87 | __C.TRAIN.w2=1.0
 88 | 
 89 | __C.TRAIN.w3=1.0
 90 | 
 91 | __C.TRAIN.w4=1.0
 92 | 
 93 | __C.TRAIN.w5=1.0
 94 | 
 95 | __C.TRAIN.w6=0.4
 96 | 
 97 | __C.TRAIN.w7=0.4
 98 | 
 99 | __C.TRAIN.range=2.0
100 | 
101 | 
102 | __C.TRAIN.MASK_WEIGHT = 1
103 | 
104 | __C.TRAIN.PRINT_FREQ = 20
105 | 
106 | __C.TRAIN.LOG_GRADS = False
107 | 
108 | __C.TRAIN.GRAD_CLIP = 10.0
109 | 
110 | __C.TRAIN.BASE_LR = 0.005
111 | 
112 | __C.TRAIN.LR = CN()
113 | 
114 | __C.TRAIN.LR.TYPE = 'log'
115 | 
116 | __C.TRAIN.LR.KWARGS = CN(new_allowed=True)
117 | 
118 | __C.TRAIN.LR_WARMUP = CN()
119 | 
120 | __C.TRAIN.LR_WARMUP.WARMUP = True
121 | 
122 | __C.TRAIN.LR_WARMUP.TYPE = 'step'
123 | 
124 | __C.TRAIN.LR_WARMUP.EPOCH = 5
125 | 
126 | __C.TRAIN.LR_WARMUP.KWARGS = CN(new_allowed=True)
127 | 
128 | # ------------------------------------------------------------------------ #
129 | # Dataset options
130 | # ------------------------------------------------------------------------ #
131 | __C.DATASET = CN(new_allowed=True)
132 | 
133 | # Augmentation
134 | # for template
135 | __C.DATASET.TEMPLATE = CN()
136 | 
137 | # for detail discussion
138 | __C.DATASET.TEMPLATE.SHIFT = 4
139 | 
140 | __C.DATASET.TEMPLATE.SCALE = 0.05
141 | 
142 | __C.DATASET.TEMPLATE.BLUR = 0.0
143 | 
144 | __C.DATASET.TEMPLATE.FLIP = 0.0
145 | 
146 | __C.DATASET.TEMPLATE.COLOR = 1.0
147 | 
148 | __C.DATASET.SEARCH = CN()
149 | 
150 | __C.DATASET.SEARCH.SHIFT = 64
151 | 
152 | __C.DATASET.SEARCH.SCALE = 0.18
153 | 
154 | __C.DATASET.SEARCH.BLUR = 0.0
155 | 
156 | __C.DATASET.SEARCH.FLIP = 0.0
157 | 
158 | __C.DATASET.SEARCH.COLOR = 1.0
159 | 
160 | # for detail discussion
161 | __C.DATASET.NEG = 0.2  
162 | 
163 | __C.DATASET.GRAY = 0.0
164 | 
165 | __C.DATASET.NAMES = ('VID', 'COCO', 'GOT', 'LaSOT')
166 | 
167 | __C.DATASET.VID = CN()
168 | __C.DATASET.VID.ROOT = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/vid/crop511'
169 | __C.DATASET.VID.ANNO = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/vid/train.json'
170 | __C.DATASET.VID.FRAME_RANGE = 50 #100
171 | __C.DATASET.VID.NUM_USE = 100000  # repeat until reach NUM_USE
172 | 
173 | __C.DATASET.YOUTUBEBB = CN()
174 | __C.DATASET.YOUTUBEBB.ROOT = '/home/mist/dataset/yt_bb/crop511'
175 | __C.DATASET.YOUTUBEBB.ANNO = '/media/tj-v4r/f99cab90-4591-432d-ab35-d27c27ad2860/train_dataset/yt_bb/train.json'
176 | __C.DATASET.YOUTUBEBB.FRAME_RANGE = 3
177 | __C.DATASET.YOUTUBEBB.NUM_USE = -1  # use all not repeat
178 | 
179 | __C.DATASET.COCO = CN()
180 | __C.DATASET.COCO.ROOT = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/coco/crop511'
181 | __C.DATASET.COCO.ANNO = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/coco/train2017.json'
182 | __C.DATASET.COCO.FRAME_RANGE = 1
183 | __C.DATASET.COCO.NUM_USE = -1
184 | 
185 | __C.DATASET.LaSOT = CN()
186 | __C.DATASET.LaSOT.ROOT = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/lasot/crop511'         # LaSOT dataset path
187 | __C.DATASET.LaSOT.ANNO = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/lasot/train.json'
188 | __C.DATASET.LaSOT.FRAME_RANGE = 100
189 | __C.DATASET.LaSOT.NUM_USE = 100000
190 | 
191 | __C.DATASET.GOT = CN()
192 | __C.DATASET.GOT.ROOT = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/got10k/crop511'         # GOT dataset path
193 | __C.DATASET.GOT.ANNO = '/home/tj-v4r/Documents/caoziang/tran-master/train_dataset/got10k/train.json'
194 | __C.DATASET.GOT.FRAME_RANGE = 50
195 | __C.DATASET.GOT.NUM_USE = 100000
196 | 
197 | 
198 | __C.DATASET.VIDEOS_PER_EPOCH = 600000
199 | 
200 | 
201 | 
202 | # ------------------------------------------------------------------------ #
203 | # Backbone options
204 | # ------------------------------------------------------------------------ #
205 | __C.BACKBONE = CN()
206 | 
207 | # Backbone type, current only support resnet18,34,50;alexnet;mobilenet
208 | __C.BACKBONE.TYPE = 'alexnet'
209 | 
210 | __C.BACKBONE.KWARGS = CN(new_allowed=True)
211 | 
212 | # Pretrained backbone weights
213 | __C.BACKBONE.PRETRAINED = 'back.pth'
214 | 
215 | # Train layers
216 | __C.BACKBONE.TRAIN_LAYERS = ['layer3', 'layer4', 'layer5']
217 | 
218 | # Layer LR
219 | __C.BACKBONE.LAYERS_LR = 0.1
220 | 
221 | # Switch to train layer
222 | __C.BACKBONE.TRAIN_EPOCH = 10
223 | 
224 | # # ------------------------------------------------------------------------ #
225 | # # Anchor options
226 | # # ------------------------------------------------------------------------ #
227 | __C.ANCHOR = CN()
228 | 
229 | # # Anchor stride
230 | __C.ANCHOR.STRIDE = 16
231 | 
232 | 
233 | # ------------------------------------------------------------------------ #
234 | # Tracker options
235 | # ------------------------------------------------------------------------ #
236 | __C.TRACK = CN()
237 | 
238 | __C.TRACK.TYPE = 'HiFTtracker'
239 | 
240 | # Scale penalty
241 | __C.TRACK.PENALTY_K = 0.04
242 | 
243 | # Window influence
244 | __C.TRACK.WINDOW_INFLUENCE = 0.44
245 | 
246 | # Interpolation learning rate
247 | __C.TRACK.LR = 0.4
248 | 
249 | __C.TRACK.w1=1.2 
250 | 
251 | __C.TRACK.w2=1.0
252 | 
253 | __C.TRACK.w3=1.6 
254 | 
255 | 
256 | __C.TRACK.LARGER=1.4
257 | # Exemplar size
258 | __C.TRACK.EXEMPLAR_SIZE = 127
259 | 
260 | # Instance size
261 | __C.TRACK.INSTANCE_SIZE = 255
262 | 
263 | # Base size
264 | __C.TRACK.BASE_SIZE = 8
265 | 
266 | __C.TRACK.STRIDE = 8 
267 | 
268 | 
269 | # Context amount
270 | __C.TRACK.CONTEXT_AMOUNT = 0.5
271 | 
272 | # Long term lost search size
273 | __C.TRACK.LOST_INSTANCE_SIZE = 831
274 | 
275 | # Long term confidence low
276 | __C.TRACK.CONFIDENCE_LOW = 0.85
277 | 
278 | # Long term confidence high
279 | __C.TRACK.CONFIDENCE_HIGH = 0.998
280 | 
281 | # Mask threshold
282 | __C.TRACK.MASK_THERSHOLD = 0.30
283 | 
284 | # Mask output size
285 | __C.TRACK.MASK_OUTPUT_SIZE = 127
286 | 


--------------------------------------------------------------------------------
/pysot/datasets/__pycache__/anchortarget.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/datasets/__pycache__/anchortarget.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/datasets/__pycache__/augmentation.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/datasets/__pycache__/augmentation.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/datasets/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/datasets/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/datasets/anchortarget.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | from __future__ import unicode_literals
  8 | 
  9 | import numpy as np
 10 | import torch as t
 11 | from pysot.core.config import cfg
 12 | from pysot.utils.bbox import IoU
 13 | 
 14 | 
 15 | 
 16 | class AnchorTarget():
 17 |     def __init__(self):
 18 | 
 19 |         return
 20 |     def select(self,position, keep_num=16):
 21 |         num = position[0].shape[0]
 22 |         if num <= keep_num:
 23 |             return position, num
 24 |         slt = np.arange(num)
 25 |         np.random.shuffle(slt)
 26 |         slt = slt[:keep_num]
 27 |         return tuple(p[slt] for p in position), keep_num
 28 |     
 29 | 
 30 |     def get(self,bbox,size):
 31 |            
 32 |         labelcls2=np.zeros((1,size,size))
 33 | 
 34 |         pre=(16*(np.linspace(0,size-1,size))+63).reshape(-1,1)
 35 |         pr=np.zeros((size**2,2))
 36 |         pr[:,0]=np.maximum(0,np.tile(pre,(size)).T.reshape(-1))
 37 |         pr[:,1]=np.maximum(0,np.tile(pre,(size)).reshape(-1))
 38 |     
 39 |         labelxff=np.zeros((4, size, size), dtype=np.float32)
 40 |         
 41 |         labelcls3=np.zeros((1,size,size))
 42 |         weightxff=np.zeros((1,size,size))
 43 | 
 44 |         
 45 |         target=np.array([bbox.x1,bbox.y1,bbox.x2,bbox.y2])
 46 |         
 47 |         index2=np.int32(np.minimum(size-1,np.maximum(0,(target-63)/16)))
 48 |         w=int(index2[2]-index2[0]+1)
 49 |         h=int(index2[3]-index2[1]+1)
 50 |         
 51 | 
 52 |         for ii in np.arange(0,size):
 53 |             for jj in np.arange(0,size):
 54 |                  weightxff[0,ii,jj]=((ii-(index2[1]+index2[3])/2)/(h/2))**2+((jj-(index2[0]+index2[2])/2)/(w/2))**2
 55 |         
 56 |         
 57 |         
 58 |         weightxff[np.where(weightxff<=1)]=1-weightxff[np.where(weightxff<=1)]
 59 |         weightxff[np.where((weightxff>1)|(weightxff<0.5))]=0
 60 |         
 61 |         pos=np.where((weightxff.squeeze()<0.8)&(weightxff.squeeze()>=0.5))
 62 |         num=len(pos[0])
 63 |         pos = self.select(pos, int(num/1.2))
 64 |         weightxff[:,pos[0][0],pos[0][1]] = 0           
 65 | 
 66 | 
 67 |         
 68 | 
 69 |         index=np.int32(np.minimum(size-1,np.maximum(0,(target-63)/16)))
 70 |         w=int(index[2]-index[0]+1)
 71 |         h=int(index[3]-index[1]+1)
 72 | 
 73 | 
 74 |         for ii in np.arange(0,size):
 75 |             for jj in np.arange(0,size):
 76 |                  labelcls3[0,ii,jj]=((ii-(index2[1]+index2[3])/2)/(h/2))**2+((jj-(index2[0]+index2[2])/2)/(w/2))**2
 77 |         
 78 |         
 79 |         
 80 |         labelcls3[np.where(labelcls3<=1)]=1-labelcls3[np.where(labelcls3<=1)]
 81 |         labelcls3[np.where((labelcls3>1))]=0
 82 |         
 83 |         # pos=np.where((weightxff.squeeze()<0.8)&(weightxff.squeeze()>=0.5))
 84 |         # num=len(pos[0])
 85 |         # pos = self.select(pos, int(num/1.2))
 86 |         # weightxff[:,pos[0][0],pos[0][1]] = 0   
 87 |         
 88 |         # for ii in np.arange(0,size):
 89 |         #     for jj in np.arange(0,size):
 90 |         #           labelcls3[0,ii,jj]=(((ii-(index[1]+index[3])/2)*ran)**2+((jj-(index[0]+index[2])/2)*ran)**2)
 91 |                  
 92 |                  
 93 |         # see=labelcls3[np.where(labelcls3<=((w//2+h//2)*ran/1.2)**2)]
 94 |         
 95 |         # labelcls3[np.where(labelcls3<=((w//2+h//2)*ran/1.2)**2)]=1-((see-see.min())/(see.max()-see.min()+1e-4))
 96 |         # weightcls3=np.zeros((1,size,size))
 97 |         # weightcls3[np.where(labelcls3<((w//2+h//2)*ran/1.2)**2)]=1
 98 |         # labelcls3=labelcls3*weightcls3
 99 | 
100 | 
101 | 
102 | 
103 | 
104 |         def con(x):
105 |            return (np.exp(x)-np.exp(-x))/(np.exp(x)+np.exp(-x))  
106 |         def dcon(x):
107 |            return (np.log(1+x)-np.log(1-x))/2 
108 | 
109 |         labelxff[0,:,:]=(pr[:,0]-target[0]).reshape(cfg.TRAIN.OUTPUT_SIZE,cfg.TRAIN.OUTPUT_SIZE)
110 |         labelxff[1,:,:]=(target[2]-pr[:,0]).reshape(cfg.TRAIN.OUTPUT_SIZE,cfg.TRAIN.OUTPUT_SIZE)
111 |         labelxff[2,:,:]=(pr[:,1]-target[1]).reshape(cfg.TRAIN.OUTPUT_SIZE,cfg.TRAIN.OUTPUT_SIZE)
112 |         labelxff[3,:,:]=(target[3]-pr[:,1]).reshape(cfg.TRAIN.OUTPUT_SIZE,cfg.TRAIN.OUTPUT_SIZE)
113 |         labelxff=con(labelxff/143)
114 | 
115 |         
116 |         for ii in np.arange(0,size):
117 |             for jj in np.arange(0,size):
118 |                  labelcls2[0,ii,jj]=((ii-(index2[1]+index2[3])/2)/(h/2))**2+((jj-(index2[0]+index2[2])/2)/(w/2))**2
119 |         
120 |         labelcls2[np.where((labelcls2>1))]=-2
121 |         
122 |         labelcls2[np.where((labelcls2<=1)&(labelcls2>=0))]=1-labelcls2[np.where((labelcls2<=1)&(labelcls2>=0))]
123 |         labelcls2[np.where(((labelcls2>0.3)&(labelcls2<0.78)))]=-1
124 |         labelcls2[np.where((labelcls2>0)&(labelcls2<=0.3))]=-2
125 |         
126 |         
127 |         
128 |         # for ii in np.arange(0,size):
129 |         #     for jj in np.arange(0,size):
130 |         #          labelcls2[0,ii,jj]=(((ii-(index2[1]+index2[3])/2)*ran)**2+((jj-(index2[0]+index2[2])/2)*ran)**2)
131 |         
132 |         # se=labelcls2[np.where(labelcls2<=((w//2+h//2)*ran/rcls)**2)]
133 |         
134 |         # labelcls2[np.where(labelcls2<=((w//2+h//2)*ran/rcls)**2)]=1-((se-se.min())/(se.max()-se.min()+1e-4))
135 |         
136 |         # neg=np.where(labelcls2<=0.76)
137 |         # labelcls2[neg]=-2
138 |         
139 |         
140 |         
141 |         
142 |         # negg=np.where(labelcls2==-2)
143 |         # labelcls2[negg]=-1
144 | 
145 |         
146 |         neg2=np.where(labelcls2.squeeze()==-2)
147 |         neg2 = self.select(neg2, int(len(np.where(labelcls2>0)[0])*2))
148 |         labelcls2[:,neg2[0][0],neg2[0][1]] = 0
149 |         
150 | 
151 |      
152 |         return  labelcls2,labelxff,labelcls3,weightxff
153 | 
154 | 


--------------------------------------------------------------------------------
/pysot/datasets/augmentation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import numpy as np
  9 | import cv2
 10 | 
 11 | from pysot.utils.bbox import corner2center, \
 12 |         Center, center2corner, Corner
 13 | 
 14 | 
 15 | class Augmentation:
 16 |     def __init__(self, shift, scale, blur, flip, color):
 17 |         self.shift = shift
 18 |         self.scale = scale
 19 |         self.blur = blur
 20 |         self.flip = flip
 21 |         self.color = color
 22 |         self.rgbVar = np.array(
 23 |             [[-0.55919361,  0.98062831, - 0.41940627],
 24 |              [1.72091413,  0.19879334, - 1.82968581],
 25 |              [4.64467907,  4.73710203, 4.88324118]], dtype=np.float32)
 26 | 
 27 |     @staticmethod
 28 |     def random():
 29 |         return np.random.random() * 2 - 1.0
 30 | 
 31 |     def _crop_roi(self, image, bbox, out_sz, padding=(0, 0, 0)):
 32 |         bbox = [float(x) for x in bbox]
 33 |         a = (out_sz-1) / (bbox[2]-bbox[0])
 34 |         b = (out_sz-1) / (bbox[3]-bbox[1])
 35 |         c = -a * bbox[0]
 36 |         d = -b * bbox[1]
 37 |         mapping = np.array([[a, 0, c],
 38 |                             [0, b, d]]).astype(np.float)
 39 |         crop = cv2.warpAffine(image, mapping, (out_sz, out_sz),
 40 |                               borderMode=cv2.BORDER_CONSTANT,
 41 |                               borderValue=padding)
 42 |         return crop
 43 | 
 44 |     def _blur_aug(self, image):
 45 |         def rand_kernel():
 46 |             sizes = np.arange(5, 46, 2)
 47 |             size = np.random.choice(sizes)
 48 |             kernel = np.zeros((size, size))
 49 |             c = int(size/2)
 50 |             wx = np.random.random()
 51 |             kernel[:, c] += 1. / size * wx
 52 |             kernel[c, :] += 1. / size * (1-wx)
 53 |             return kernel
 54 |         kernel = rand_kernel()
 55 |         image = cv2.filter2D(image, -1, kernel)
 56 |         return image
 57 | 
 58 |     def _color_aug(self, image):
 59 |         offset = np.dot(self.rgbVar, np.random.randn(3, 1))
 60 |         offset = offset[::-1]  # bgr 2 rgb
 61 |         offset = offset.reshape(3)
 62 |         image = image - offset
 63 |         return image
 64 | 
 65 |     def _gray_aug(self, image):
 66 |         grayed = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 67 |         image = cv2.cvtColor(grayed, cv2.COLOR_GRAY2BGR)
 68 |         return image
 69 | 
 70 |     def _shift_scale_aug(self, image, bbox, crop_bbox, size):
 71 |         im_h, im_w = image.shape[:2]
 72 | 
 73 |         # adjust crop bounding box
 74 |         crop_bbox_center = corner2center(crop_bbox)
 75 |         if self.scale:
 76 |             scale_x = (1.0 + Augmentation.random() * self.scale)
 77 |             scale_y = (1.0 + Augmentation.random() * self.scale)
 78 |             h, w = crop_bbox_center.h, crop_bbox_center.w
 79 |             scale_x = min(scale_x, float(im_w) / w)
 80 |             scale_y = min(scale_y, float(im_h) / h)
 81 |             crop_bbox_center = Center(crop_bbox_center.x,
 82 |                                       crop_bbox_center.y,
 83 |                                       crop_bbox_center.w * scale_x,
 84 |                                       crop_bbox_center.h * scale_y)
 85 | 
 86 |         crop_bbox = center2corner(crop_bbox_center)
 87 |         if self.shift:
 88 |             sx = Augmentation.random() * self.shift
 89 |             sy = Augmentation.random() * self.shift
 90 | 
 91 |             x1, y1, x2, y2 = crop_bbox
 92 | 
 93 |             sx = max(-x1, min(im_w - 1 - x2, sx))
 94 |             sy = max(-y1, min(im_h - 1 - y2, sy))
 95 | 
 96 |             crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy)
 97 | 
 98 |         # adjust target bounding box
 99 |         x1, y1 = crop_bbox.x1, crop_bbox.y1
100 |         bbox = Corner(bbox.x1 - x1, bbox.y1 - y1,
101 |                       bbox.x2 - x1, bbox.y2 - y1)
102 | 
103 |         if self.scale:
104 |             bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y,
105 |                           bbox.x2 / scale_x, bbox.y2 / scale_y)
106 | 
107 |         image = self._crop_roi(image, crop_bbox, size)
108 |         return image, bbox
109 | 
110 |     def _flip_aug(self, image, bbox):
111 |         image = cv2.flip(image, 1)
112 |         width = image.shape[1]
113 |         bbox = Corner(width - 1 - bbox.x2, bbox.y1,
114 |                       width - 1 - bbox.x1, bbox.y2)
115 |         return image, bbox
116 | 
117 |     def __call__(self, image, bbox, size, gray=False):
118 |         shape = image.shape
119 |         crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2,
120 |                                          size-1, size-1))
121 |         # gray augmentation
122 |         if gray:
123 |             image = self._gray_aug(image)
124 | 
125 |         # shift scale augmentation
126 |         image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size)
127 | 
128 |         # color augmentation
129 |         if self.color > np.random.random():
130 |             image = self._color_aug(image)
131 | 
132 |         # blur augmentation
133 |         if self.blur > np.random.random():
134 |             image = self._blur_aug(image)
135 | 
136 |         # flip augmentation
137 |         if self.flip and self.flip > np.random.random():
138 |             image, bbox = self._flip_aug(image, bbox)
139 |         return image, bbox
140 | 


--------------------------------------------------------------------------------
/pysot/models/__pycache__/loss.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/__pycache__/loss.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/__pycache__/model_builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/__pycache__/model_builder.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | from pysot.models.backbone.alexnet import alexnetlegacy, alexnet
 9 | from pysot.models.backbone.mobile_v2 import mobilenetv2
10 | from pysot.models.backbone.resnet_atrous import resnet18, resnet34, resnet50
11 | 
12 | BACKBONES = {
13 |               'alexnetlegacy': alexnetlegacy,
14 |               'mobilenetv2': mobilenetv2,
15 |               'resnet18': resnet18,
16 |               'resnet34': resnet34,
17 |               'resnet50': resnet50,
18 |               'alexnet': alexnet,
19 |             }
20 | 
21 | 
22 | def get_backbone(name, **kwargs):
23 |     return BACKBONES[name](**kwargs)
24 | 


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/alexnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/alexnet.cpython-36.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/alexnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/alexnet.cpython-37.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/alexnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/alexnet.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/mobile_v2.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/mobile_v2.cpython-36.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/mobile_v2.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/mobile_v2.cpython-37.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/mobile_v2.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/mobile_v2.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/newalexnet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/newalexnet.cpython-36.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/newalexnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/newalexnet.cpython-37.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/newalexnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/newalexnet.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/resnet_atrous.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/resnet_atrous.cpython-36.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/resnet_atrous.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/resnet_atrous.cpython-37.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/__pycache__/resnet_atrous.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/backbone/__pycache__/resnet_atrous.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/backbone/alexnet.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import torch.nn as nn
 7 | 
 8 | 
 9 | class AlexNetLegacy(nn.Module):
10 |     configs = [3, 96, 256, 384, 384, 256]
11 | 
12 |     def __init__(self, width_mult=1):
13 |         configs = list(map(lambda x: 3 if x == 3 else
14 |                        int(x*width_mult), AlexNet.configs))
15 |         super(AlexNetLegacy, self).__init__()
16 |         self.features = nn.Sequential(
17 |             nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2),
18 |             nn.BatchNorm2d(configs[1]),
19 |             nn.MaxPool2d(kernel_size=3, stride=2),
20 |             nn.ReLU(inplace=True),
21 |             nn.Conv2d(configs[1], configs[2], kernel_size=5),
22 |             nn.BatchNorm2d(configs[2]),
23 |             nn.MaxPool2d(kernel_size=3, stride=2),
24 |             nn.ReLU(inplace=True),
25 |             nn.Conv2d(configs[2], configs[3], kernel_size=3),
26 |             nn.BatchNorm2d(configs[3]),
27 |             nn.ReLU(inplace=True),
28 |             nn.Conv2d(configs[3], configs[4], kernel_size=3),
29 |             nn.BatchNorm2d(configs[4]),
30 |             nn.ReLU(inplace=True),
31 |             nn.Conv2d(configs[4], configs[5], kernel_size=3),
32 |             nn.BatchNorm2d(configs[5]),
33 |         )
34 |         self.feature_size = configs[5]
35 | 
36 |     def forward(self, x):
37 |         x = self.features(x)
38 |         return x
39 | 
40 | 
41 | class AlexNet(nn.Module):
42 |     configs = [3, 96, 256, 384, 384, 256]
43 | 
44 |     def __init__(self, width_mult=1):
45 |         configs = list(map(lambda x: 3 if x == 3 else
46 |                        int(x*width_mult), AlexNet.configs))
47 |         super(AlexNet, self).__init__()
48 |         self.layer1 = nn.Sequential(
49 |             nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2),
50 |             nn.BatchNorm2d(configs[1]),
51 |             nn.MaxPool2d(kernel_size=3, stride=2),
52 |             nn.ReLU(inplace=True),
53 |             )
54 |         self.layer2 = nn.Sequential(
55 |             nn.Conv2d(configs[1], configs[2], kernel_size=5),
56 |             nn.BatchNorm2d(configs[2]),
57 |             nn.MaxPool2d(kernel_size=3, stride=2),
58 |             nn.ReLU(inplace=True),
59 |             )
60 |         self.layer3 = nn.Sequential(
61 |             nn.Conv2d(configs[2], configs[3], kernel_size=3),
62 |             nn.BatchNorm2d(configs[3]),
63 |             nn.ReLU(inplace=True),
64 |             )
65 |         self.layer4 = nn.Sequential(
66 |             nn.Conv2d(configs[3], configs[4], kernel_size=3),
67 |             nn.BatchNorm2d(configs[4]),
68 |             nn.ReLU(inplace=True),
69 |             )
70 | 
71 |         self.layer5 = nn.Sequential(
72 |             nn.Conv2d(configs[4], configs[5], kernel_size=3),
73 |             nn.BatchNorm2d(configs[5]),
74 |             )
75 |         self.feature_size = configs[5]
76 | 
77 |     def forward(self, x):
78 |         x = self.layer1(x)
79 |         x = self.layer2(x)
80 |         x = self.layer3(x)
81 |         x = self.layer4(x)
82 |         x = self.layer5(x)
83 |         return x
84 | 
85 | 
86 | def alexnetlegacy(**kwargs):
87 |     return AlexNetLegacy(**kwargs)
88 | 
89 | 
90 | def alexnet(**kwargs):
91 |     return AlexNet(**kwargs)
92 | 


--------------------------------------------------------------------------------
/pysot/models/backbone/mobile_v2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | 
 10 | def conv_bn(inp, oup, stride, padding=1):
 11 |     return nn.Sequential(
 12 |         nn.Conv2d(inp, oup, 3, stride, padding, bias=False),
 13 |         nn.BatchNorm2d(oup),
 14 |         nn.ReLU6(inplace=True)
 15 |     )
 16 | 
 17 | 
 18 | def conv_1x1_bn(inp, oup):
 19 |     return nn.Sequential(
 20 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 21 |         nn.BatchNorm2d(oup),
 22 |         nn.ReLU6(inplace=True)
 23 |     )
 24 | 
 25 | 
 26 | class InvertedResidual(nn.Module):
 27 |     def __init__(self, inp, oup, stride, expand_ratio, dilation=1):
 28 |         super(InvertedResidual, self).__init__()
 29 |         self.stride = stride
 30 | 
 31 |         self.use_res_connect = self.stride == 1 and inp == oup
 32 | 
 33 |         padding = 2 - stride
 34 |         if dilation > 1:
 35 |             padding = dilation
 36 | 
 37 |         self.conv = nn.Sequential(
 38 |             # pw
 39 |             nn.Conv2d(inp, inp * expand_ratio, 1, 1, 0, bias=False),
 40 |             nn.BatchNorm2d(inp * expand_ratio),
 41 |             nn.ReLU6(inplace=True),
 42 |             # dw
 43 |             nn.Conv2d(inp * expand_ratio, inp * expand_ratio, 3,
 44 |                       stride, padding, dilation=dilation,
 45 |                       groups=inp * expand_ratio, bias=False),
 46 |             nn.BatchNorm2d(inp * expand_ratio),
 47 |             nn.ReLU6(inplace=True),
 48 |             # pw-linear
 49 |             nn.Conv2d(inp * expand_ratio, oup, 1, 1, 0, bias=False),
 50 |             nn.BatchNorm2d(oup),
 51 |         )
 52 | 
 53 |     def forward(self, x):
 54 |         if self.use_res_connect:
 55 |             return x + self.conv(x)
 56 |         else:
 57 |             return self.conv(x)
 58 | 
 59 | 
 60 | class MobileNetV2(nn.Sequential):
 61 |     def __init__(self, width_mult=1.0, used_layers=[3, 5, 7]):
 62 |         super(MobileNetV2, self).__init__()
 63 | 
 64 |         self.interverted_residual_setting = [
 65 |             # t, c, n, s
 66 |             [1, 16, 1, 1, 1],
 67 |             [6, 24, 2, 2, 1],
 68 |             [6, 32, 3, 2, 1],
 69 |             [6, 64, 4, 2, 1],
 70 |             [6, 96, 3, 1, 1],
 71 |             [6, 160, 3, 2, 1],
 72 |             [6, 320, 1, 1, 1],
 73 |         ]
 74 |         # 0,2,3,4,6
 75 | 
 76 |         self.interverted_residual_setting = [
 77 |             # t, c, n, s
 78 |             [1, 16, 1, 1, 1],
 79 |             [6, 24, 2, 2, 1],
 80 |             [6, 32, 3, 2, 1],
 81 |             [6, 64, 4, 1, 2],
 82 |             [6, 96, 3, 1, 2],
 83 |             [6, 160, 3, 1, 4],
 84 |             [6, 320, 1, 1, 4],
 85 |         ]
 86 | 
 87 |         self.channels = [24, 32, 96, 320]
 88 |         self.channels = [int(c * width_mult) for c in self.channels]
 89 | 
 90 |         input_channel = int(32 * width_mult)
 91 |         self.last_channel = int(1280 * width_mult) \
 92 |             if width_mult > 1.0 else 1280
 93 | 
 94 |         self.add_module('layer0', conv_bn(3, input_channel, 2, 0))
 95 | 
 96 |         last_dilation = 1
 97 | 
 98 |         self.used_layers = used_layers
 99 | 
100 |         for idx, (t, c, n, s, d) in \
101 |                 enumerate(self.interverted_residual_setting, start=1):
102 |             output_channel = int(c * width_mult)
103 | 
104 |             layers = []
105 | 
106 |             for i in range(n):
107 |                 if i == 0:
108 |                     if d == last_dilation:
109 |                         dd = d
110 |                     else:
111 |                         dd = max(d // 2, 1)
112 |                     layers.append(InvertedResidual(input_channel,
113 |                                                    output_channel, s, t, dd))
114 |                 else:
115 |                     layers.append(InvertedResidual(input_channel,
116 |                                                    output_channel, 1, t, d))
117 |                 input_channel = output_channel
118 | 
119 |             last_dilation = d
120 | 
121 |             self.add_module('layer%d' % (idx), nn.Sequential(*layers))
122 | 
123 |     def forward(self, x):
124 |         outputs = []
125 |         for idx in range(8):
126 |             name = "layer%d" % idx
127 |             x = getattr(self, name)(x)
128 |             outputs.append(x)
129 |         p0, p1, p2, p3, p4 = [outputs[i] for i in [1, 2, 3, 5, 7]]
130 |         out = [outputs[i] for i in self.used_layers]
131 |         return out
132 | 
133 | 
134 | def mobilenetv2(**kwargs):
135 |     model = MobileNetV2(**kwargs)
136 |     return model
137 | 
138 | 
139 | if __name__ == '__main__':
140 |     net = mobilenetv2()
141 | 
142 |     print(net)
143 | 
144 |     from torch.autograd import Variable
145 |     tensor = Variable(torch.Tensor(1, 3, 255, 255)).cuda()
146 | 
147 |     net = net.cuda()
148 | 
149 |     out = net(tensor)
150 | 
151 |     for i, p in enumerate(out):
152 |         print(i, p.size())
153 | 


--------------------------------------------------------------------------------
/pysot/models/backbone/newalexnet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class AlexNet(nn.Module):
 5 |     configs = [3, 96, 256, 384, 384, 256]
 6 | 
 7 |     def __init__(self, width_mult=1):
 8 |         configs = list(map(lambda x: 3 if x == 3 else
 9 |                        int(x*width_mult), AlexNet.configs))
10 |         super(AlexNet, self).__init__()
11 |         self.layer1 = nn.Sequential(
12 |             nn.Conv2d(configs[0], configs[1], kernel_size=11, stride=2),
13 |             nn.BatchNorm2d(configs[1]),
14 |             nn.MaxPool2d(kernel_size=3, stride=2),
15 |             nn.ReLU(inplace=True),
16 |             )
17 |         self.layer2 = nn.Sequential(
18 |             nn.Conv2d(configs[1], configs[2], kernel_size=5),
19 |             nn.BatchNorm2d(configs[2]),
20 |             nn.MaxPool2d(kernel_size=3, stride=2),
21 |             nn.ReLU(inplace=True),
22 |             )
23 |         self.layer3 = nn.Sequential(
24 |             nn.Conv2d(configs[2], configs[3], kernel_size=3),
25 |             nn.BatchNorm2d(configs[3]),
26 |             nn.ReLU(inplace=True),
27 |             )
28 |         self.layer4 = nn.Sequential(
29 |             nn.Conv2d(configs[3], configs[4], kernel_size=3),
30 |             nn.BatchNorm2d(configs[4]),
31 |             nn.ReLU(inplace=True),
32 |             )
33 | 
34 |         self.layer5 = nn.Sequential(
35 |             nn.Conv2d(configs[4], configs[5], kernel_size=3),
36 |             nn.BatchNorm2d(configs[5]),
37 |             )
38 |         self.feature_size = configs[5]
39 |         for param in self.layer1.parameters():
40 |                 param.requires_grad = False
41 |         for param in self.layer2.parameters():
42 |                 param.requires_grad = False
43 | 
44 |     def forward(self, x):
45 |         x = self.layer1(x)
46 |         x = self.layer2(x)
47 |         x = self.layer3(x)
48 |         x = self.layer4(x)
49 |         x = self.layer5(x)
50 |         return x
51 |     
52 | 


--------------------------------------------------------------------------------
/pysot/models/init_weight.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.init as init
 3 | 
 4 | 
 5 | def init_weights(model):
 6 |     for m in model.modules():
 7 |         if isinstance(m, nn.Conv2d):
 8 |             nn.init.kaiming_normal_(m.weight.data,
 9 |                                     mode='fan_out',
10 |                                     nonlinearity='relu')
11 |         elif isinstance(m, nn.BatchNorm2d):
12 |             m.weight.data.fill_(1)
13 |             m.bias.data.zero_()
14 | 
15 | 
16 | def xavier_fill(model):
17 |     for module in model.modules():
18 |         if hasattr(module, 'weight'):
19 |             if not ('BatchNorm' in module.__class__.__name__):
20 |                 init.xavier_uniform_(module.weight, gain=1)
21 |             else:
22 |                 init.constant(module.weight, 1)
23 |         if hasattr(module, 'bias'):
24 |             if module.bias is not None:
25 |                 init.constant_(module.bias, 0)
26 |     return model
27 | 
28 | 
29 | def gauss_fill(model, std=0.001):
30 |     for module in model.modules():
31 |         if hasattr(module, 'weight'):
32 |             if not ('BatchNorm' in module.__class__.__name__):
33 |                 init.normal_(module.weight, std=std)
34 |             else:
35 |                 init.constant_(module.weight, 1)
36 |         if hasattr(module, 'bias'):
37 |             if module.bias is not None:
38 |                 init.constant_(module.bias, 0)
39 |     return model
40 | 
41 | 
42 | def kaiming_init(module,
43 |                  a=0,
44 |                  mode='fan_out',
45 |                  nonlinearity='relu',
46 |                  bias=0,
47 |                  distribution='normal'):
48 |     assert distribution in ['uniform', 'normal']
49 |     if distribution == 'uniform':
50 |         nn.init.kaiming_uniform_(
51 |             module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
52 |     else:
53 |         nn.init.kaiming_normal_(
54 |             module.weight, a=a, mode=mode, nonlinearity=nonlinearity)
55 |     if hasattr(module, 'bias') and module.bias is not None:
56 |         nn.init.constant_(module.bias, bias)
57 | 
58 | def constant_init(module, val, bias=0):
59 |     nn.init.constant_(module.weight, val)
60 |     if hasattr(module, 'bias') and module.bias is not None:
61 |         nn.init.constant_(module.bias, bias)
62 | 


--------------------------------------------------------------------------------
/pysot/models/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | from torch import nn
  8 | 
  9 | import torch
 10 | import torch.nn.functional as F
 11 | 
 12 | 
 13 | def get_cls_loss(pred, label, select):
 14 |     if len(select.size()) == 0 or \
 15 |             select.size() == torch.Size([0]):
 16 |         return 0
 17 |     pred = torch.index_select(pred, 0, select)
 18 |     label = torch.index_select(label, 0, select)
 19 |     label=label.long()
 20 |     return F.nll_loss(pred, label)
 21 | 
 22 | def Downsamplingloss(z,z1):
 23 |     b,c,w,h=z.size()
 24 |     b,c,w1,h1=z1.size()
 25 |     z=z.reshape(b*c,w*h)
 26 |     z1=z1.reshape(b*c,w1*h1)
 27 |     loss=torch.tensor(0)
 28 |     for i in range(w1*h1):
 29 |       loss1=pow(torch.norm((z1[:, i].unsqueeze(dim=1)-z),dim=0),2).cuda()
 30 |       loss2=torch.min(loss1,dim=0)[0].cuda()
 31 |       loss=(loss+loss2).cuda()
 32 |     lossx=(loss/(b*c*w*h)).sum().cuda()
 33 |     
 34 |     return lossx
 35 | def select_cross_entropy_loss(pred, label):
 36 |     pred = pred.view(-1, 2)
 37 |     label = label.view(-1)
 38 |     pos = label.data.eq(1).nonzero(as_tuple =False).squeeze().cuda()
 39 |     neg = label.data.eq(0).nonzero(as_tuple =False).squeeze().cuda()
 40 |     loss_pos = get_cls_loss(pred, label, pos)
 41 |     loss_neg = get_cls_loss(pred, label, neg)
 42 |     return loss_pos * 0.5 + loss_neg * 0.5
 43 | 
 44 | def l1loss(pre,label,weight):
 45 |     loss=(torch.abs((pre-label))*weight).sum()/(weight).sum()
 46 |     return loss
 47 | 
 48 | def weight_l1_loss(pred_loc, label_loc, loss_weight):
 49 |     b, _, sh, sw = pred_loc.size()
 50 |     pred_loc = pred_loc.view(b, 4, -1, sh, sw)
 51 |     diff = (pred_loc - label_loc).abs()
 52 |     diff = diff.sum(dim=1).view(b, -1, sh, sw)
 53 |     loss = diff * loss_weight
 54 |     return loss.sum().div(b)
 55 | 
 56 | def DISCLE(pred, target, weight):
 57 |     pred_x = (pred[:,:, 0]+pred[:,:, 2])/2
 58 |     pred_y = (pred[:,:, 1]+pred[:,:, 3])/2
 59 |     pred_w = (-pred[:,:, 0]+pred[:,:, 2])
 60 |     pred_h = (-pred[:,:, 1]+pred[:,:, 3])
 61 | 
 62 | 
 63 | 
 64 |     target_x = (target[:,:, 0]+target[:,:, 2])/2
 65 |     target_y = (target[:,:, 1]+target[:,:, 3])/2
 66 |     target_w = (-target[:,:, 0]+target[:,:, 2])
 67 |     target_h = (-target[:,:, 1]+target[:,:, 3])
 68 |     
 69 |     loss=torch.sqrt(torch.pow((pred_x-target_x),2)/target_w+torch.pow((pred_y-target_y),2)/target_h)
 70 |     
 71 |     #utile4 torch.sqrt(torch.pow((pred_x-target_x),2)/target_w+torch.pow((pred_y-target_y),2)/target_h)
 72 | #testloss     torch.sqrt(torch.pow((pred_x-target_x)/target_w,2)+torch.pow((pred_y-target_y)/target_h,2))\
 73 |               
 74 |     weight=weight.view(loss.size())
 75 |         
 76 |     return  (loss * weight).sum() / (weight.sum()+1e-6)
 77 | 
 78 | class IOULoss(nn.Module):
 79 |     def forward(self, pred, target, weight=None):
 80 |         
 81 |         pred_left = pred[:,:, 0]
 82 |         pred_top = pred[:,:, 1]
 83 |         pred_right = pred[:,:, 2]
 84 |         pred_bottom = pred[:,:, 3]
 85 | 
 86 |         target_left = target[:,:, 0]
 87 |         target_top = target[:,:, 1]
 88 |         target_right = target[:,:, 2]
 89 |         target_bottom = target[:,:, 3]
 90 | 
 91 |         target_aera = (target_right-target_left ) * \
 92 |                       (target_bottom-target_top)
 93 |         pred_aera = (pred_right-pred_left ) * \
 94 |                     (pred_bottom-pred_top)
 95 | 
 96 |         w_intersect = torch.min(pred_right, target_right)-torch.max(pred_left, target_left) 
 97 |         w_intersect=w_intersect.clamp(min=0)        
 98 |         h_intersect = torch.min(pred_bottom, target_bottom) -torch.max(pred_top, target_top)
 99 |         h_intersect=h_intersect.clamp(min=0)   
100 |         area_intersect = w_intersect * h_intersect
101 |         area_union = target_aera + pred_aera - area_intersect
102 |         ious=((area_intersect ) / (area_union +1e-6)).clamp(min=0)+1e-6
103 |         
104 |         losses = -torch.log(ious)
105 |         weight=weight.view(losses.size())
106 | 
107 |             
108 |         return (losses * weight).sum() / (weight.sum()+1e-6)
109 | 
110 | class dIOULoss(nn.Module):
111 |     def forward(self, pred, target, weight=None):
112 |         
113 |         pred_left = pred[:,:, 0]
114 |         pred_top = pred[:,:, 1]
115 |         pred_right = pred[:,:, 2]
116 |         pred_bottom = pred[:,:, 3]
117 | 
118 |         target_left = target[:,:, 0]
119 |         target_top = target[:,:, 1]
120 |         target_right = target[:,:, 2]
121 |         target_bottom = target[:,:, 3]
122 |         
123 |         prx=((pred_left+pred_right)/2)
124 |         pry=((pred_top+pred_bottom)/2)
125 |         tax=((target_left+target_right)/2)
126 |         tay=((target_top+target_bottom)/2)
127 | 
128 | 
129 | 
130 |         target_aera = (target_right-target_left ) * \
131 |                       (target_bottom-target_top)
132 |         pred_aera = (pred_right-pred_left ) * \
133 |                     (pred_bottom-pred_top)
134 | 
135 |         w_intersect = torch.min(pred_right, target_right)-torch.max(pred_left, target_left) 
136 |         w_intersect=w_intersect.clamp(min=0)        
137 |         h_intersect = torch.min(pred_bottom, target_bottom) -torch.max(pred_top, target_top)
138 |         h_intersect=h_intersect.clamp(min=0)   
139 |         area_intersect = w_intersect * h_intersect
140 |         area_union = target_aera + pred_aera - area_intersect
141 |         ious=((area_intersect ) / (area_union +1e-6)).clamp(min=0)+1e-6
142 |         
143 |         losses = -torch.log(ious)+(((prx-tax)**2+(tay-pry)**2)**0.5)*0.2
144 | 
145 |         weight=weight.view(losses.size())
146 |         if weight.sum()>0:
147 |             
148 |             return (losses * weight).sum() / (weight.sum()+1e-6)
149 |         else:
150 |             return (losses *weight).sum()    
151 |   
152 | class gIOULoss(nn.Module):
153 |     def forward(self, pred, target, weight=None):
154 |         
155 |         pred_left = pred[:,:, 0]
156 |         pred_top = pred[:,:, 1]
157 |         pred_right = pred[:,:, 2]
158 |         pred_bottom = pred[:,:, 3]
159 | 
160 |         target_left = target[:,:, 0]
161 |         target_top = target[:,:, 1]
162 |         target_right = target[:,:, 2]
163 |         target_bottom = target[:,:, 3]
164 |         
165 |         x1 = torch.min(pred_left, pred_right)
166 |         y1 = torch.min(pred_top, pred_bottom)
167 |         x2 = torch.max(pred_left, pred_right)
168 |         y2 = torch.max(pred_top, pred_bottom)
169 |     
170 |         xkis1 = torch.max(x1, target_left)
171 |         ykis1 = torch.max(y1, target_top)
172 |         xkis2 = torch.min(x2, target_right)
173 |         ykis2 = torch.min(y2, target_bottom)
174 |     
175 |         xc1 = torch.min(x1, target_left)
176 |         yc1 = torch.min(y1, target_top)
177 |         xc2 = torch.max(x2, target_right)
178 |         yc2 = torch.max(y2, target_bottom)
179 |     
180 |         intsctk = torch.zeros(x1.size()).cuda()
181 |         
182 |         mask = (ykis2 > ykis1) * (xkis2 > xkis1)
183 |         intsctk[mask] = (xkis2[mask] - xkis1[mask]) * (ykis2[mask] - ykis1[mask])
184 |         unionk = (x2 - x1) * (y2 - y1) + (target_right - target_left) * (target_bottom - target_top) - intsctk + 1e-7
185 |         iouk = intsctk / unionk
186 |     
187 |         area_c = (xc2 - xc1) * (yc2 - yc1) + 1e-7
188 |         miouk = iouk - ((area_c - unionk) / area_c)
189 |         
190 |         # iouk = ((1 - iouk) * iou_weights).sum(0) / batch_size
191 |         # miouk = ((1 - miouk) * iou_weights).sum(0) / batch_size
192 | 
193 | 
194 |         losses = 1-miouk
195 |         weight=weight.view(losses.size())
196 |         if weight.sum()>0:
197 |             
198 |             return (losses * weight).sum() / (weight.sum()+1e-6)
199 |         else:
200 |             return (losses *weight).sum()
201 | 


--------------------------------------------------------------------------------
/pysot/models/model_builder.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | import torch as t
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | from pysot.core.config import cfg
 11 | from pysot.models.loss import select_cross_entropy_loss, weight_l1_loss,l1loss,IOULoss,Downsamplingloss
 12 | from pysot.models.backbone.alexnet import AlexNet
 13 | from pysot.models.utile.utile import hiftmodule,Adadownsamplingnet_tem
 14 | import numpy as np
 15 | 
 16 | 
 17 | 
 18 | class ModelBuilder(nn.Module):
 19 |     def __init__(self):
 20 |         super(ModelBuilder, self).__init__()
 21 |         self.backbone = AlexNet().cuda()
 22 |         self.downsampling=Adadownsamplingnet_tem().cuda()
 23 |         
 24 |         self.grader=hiftmodule(cfg).cuda()
 25 |         self.cls2loss=nn.BCEWithLogitsLoss()
 26 |         self.IOULoss=IOULoss()          
 27 |         
 28 |     def template(self, z):
 29 |         with t.no_grad():
 30 |             zf = self.backbone(z)
 31 |             zff=self.downsampling(zf)
 32 |             self.zf=zff
 33 |             self.z=zf
 34 |        # self.zf1=zf1
 35 | 
 36 |     
 37 |     def track(self, x):
 38 |         with t.no_grad():
 39 |             
 40 |             xf = self.backbone(x)  
 41 |             xff=self.downsampling(xf)
 42 |             loc,cls1,cls2=self.grader(xff,self.zf,xf,self.z)
 43 | 
 44 |             return {
 45 | 
 46 |                 'cls1': cls1,
 47 |                 'cls2': cls2,
 48 |                 'loc': loc
 49 |                }
 50 | 
 51 |     def log_softmax(self, cls):
 52 |         b, a2, h, w = cls.size()
 53 |         cls = cls.view(b, 2, a2//2, h, w)
 54 |         cls = cls.permute(0, 2, 3, 4, 1).contiguous()
 55 |         cls = F.log_softmax(cls, dim=4)
 56 | 
 57 |         return cls
 58 | 
 59 | 
 60 |     def getcentercuda(self,mapp):
 61 | 
 62 | 
 63 |         def dcon(x):
 64 |            x[t.where(x<=-1)]=-0.99
 65 |            x[t.where(x>=1)]=0.99
 66 |            return (t.log(1+x)-t.log(1-x))/2 
 67 |         
 68 |         size=mapp.size()[3]
 69 |         #location 
 70 |         x=t.Tensor(np.tile((16*(np.linspace(0,size-1,size))+63)-cfg.TRAIN.SEARCH_SIZE//2,size).reshape(-1)).cuda()
 71 |         y=t.Tensor(np.tile((16*(np.linspace(0,size-1,size))+63).reshape(-1,1)-cfg.TRAIN.SEARCH_SIZE//2,size).reshape(-1)).cuda()
 72 |         
 73 |         shap=dcon(mapp)*(cfg.TRAIN.SEARCH_SIZE//2)
 74 |         
 75 |         xx=np.int16(np.tile(np.linspace(0,size-1,size),size).reshape(-1))
 76 |         yy=np.int16(np.tile(np.linspace(0,size-1,size).reshape(-1,1),size).reshape(-1))
 77 | 
 78 | 
 79 |         w=shap[:,0,yy,xx]+shap[:,1,yy,xx]
 80 |         h=shap[:,2,yy,xx]+shap[:,3,yy,xx]
 81 |         x=x-shap[:,0,yy,xx]+w/2+cfg.TRAIN.SEARCH_SIZE//2
 82 |         y=y-shap[:,2,yy,xx]+h/2+cfg.TRAIN.SEARCH_SIZE//2
 83 | 
 84 |         anchor=t.zeros((cfg.TRAIN.BATCH_SIZE//cfg.TRAIN.NUM_GPU,size**2,4)).cuda()
 85 | 
 86 |         anchor[:,:,0]=x-w/2
 87 |         anchor[:,:,1]=y-h/2
 88 |         anchor[:,:,2]=x+w/2
 89 |         anchor[:,:,3]=y+h/2
 90 |         return anchor
 91 | 
 92 | 
 93 |     def forward(self,data):
 94 |         """ only used in training
 95 |         """
 96 |                 
 97 |         template = data['template'].cuda()
 98 |         search =data['search'].cuda()
 99 |         bbox=data['bbox'].cuda()
100 |         labelcls1=data['label_cls1'].cuda()
101 |         labelxff=data['labelxff'].cuda()
102 |         labelcls2=data['labelcls2'].cuda()
103 |         weightxff=data['weightxff'].cuda()
104 |         
105 | 
106 |         
107 |         zf = self.backbone(template)
108 |         zff=self.downsampling(zf)
109 |         xf = self.backbone(search)
110 |         xff=self.downsampling(xf)
111 |         loc,cls1,cls2=self.grader(xff,zff,xf,zf)  
112 |   
113 | 
114 |         cls1 = self.log_softmax(cls1) 
115 | 
116 |         
117 |  
118 |         cls_loss1 = select_cross_entropy_loss(cls1, labelcls1)
119 |         cls_loss2 = self.cls2loss(cls2, labelcls2)  
120 |         
121 |         pre_bbox=self.getcentercuda(loc) 
122 |         bbo=self.getcentercuda(labelxff) 
123 |         
124 |         loc_loss=self.IOULoss(pre_bbox,bbo,weightxff)
125 |         
126 |         pd_loss=cfg.TRAIN.w6*Downsamplingloss(zf,zff)
127 |                 +cfg.TRAIN.w7*Downsamplingloss(xf,xff)
128 |         cls_loss=cfg.TRAIN.w4*cls_loss1+cfg.TRAIN.w5*cls_loss2
129 |  
130 |         
131 | 
132 |         outputs = {}
133 |         outputs['total_loss'] =\
134 |             cfg.TRAIN.LOC_WEIGHT*loc_loss\
135 |                 +cfg.TRAIN.CLS_WEIGHT*cls_loss+cfg.TRAIN.PD_WEIGHT*pd_loss
136 |         outputs['cls_loss'] = cls_loss
137 |         outputs['loc_loss'] = loc_loss
138 |         outputs['pd_loss'] = pd_loss
139 | 
140 |         return outputs
141 | 


--------------------------------------------------------------------------------
/pysot/models/utile/__pycache__/tran.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/utile/__pycache__/tran.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/utile/__pycache__/utile.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/models/utile/__pycache__/utile.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/models/utile/utile.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | import torch as t
  4 | import math
  5 | from pysot.models.utile.tran import Transformer
  6 | class Adadownsamplingnet_tem(nn.Module):
  7 | 
  8 |     def __init__(self):
  9 |         super(Adadownsamplingnet_tem, self).__init__()
 10 |         
 11 |         channel=256
 12 |         
 13 |         self.downsampling1=nn.Sequential(
 14 |                 nn.Conv2d(256, 256,  kernel_size=2, stride=2),
 15 |                 nn.BatchNorm2d(256),
 16 |                 nn.ReLU(inplace=True),
 17 |                 
 18 |                 )
 19 |         self.downsampling2= nn.Sequential(
 20 |                 nn.Conv2d(128, 128,  kernel_size=2, stride=2),
 21 |                 nn.BatchNorm2d(128),
 22 |                 nn.ReLU(inplace=True),
 23 |                 )
 24 |         
 25 |        
 26 |         self.conv1= nn.Sequential(
 27 |                 nn.Conv2d(256, 128,  kernel_size=1, stride=1),
 28 |                 nn.BatchNorm2d(128),
 29 |                 nn.ReLU(inplace=True),
 30 |                 nn.Conv2d(128, 128,  kernel_size=(1,3), stride=1,padding=(0,1)),
 31 |                 nn.BatchNorm2d(128),
 32 |                 nn.ReLU(inplace=True),
 33 |                 nn.Conv2d(128, 128,  kernel_size=(3,1), stride=1,padding=(1,0)),
 34 |                 )
 35 |         self.conv2 = nn.Sequential(
 36 |                 nn.Conv2d(256, 128,  kernel_size=1, stride=1),
 37 |                 nn.BatchNorm2d(128),
 38 |                 nn.ReLU(inplace=True),
 39 |                 nn.Conv2d(128, 128,  kernel_size=(1,3), stride=1,padding=(0,1)),
 40 |                 nn.BatchNorm2d(128),
 41 |                 nn.ReLU(inplace=True),
 42 |                 nn.Conv2d(128, 128,  kernel_size=(3,1), stride=1,padding=(1,0)),
 43 |                 ) 
 44 |         
 45 |         self.conv3 = nn.Sequential(
 46 |                nn.ConvTranspose2d(128*2, 256,  kernel_size=1, stride=1),
 47 |                nn.BatchNorm2d(256),
 48 |                nn.ReLU(inplace=True),
 49 |                )  
 50 |         
 51 |      
 52 |         for modules in [self.conv1,self.conv2,self.conv3,self.downsampling1,self.downsampling2]:
 53 |             for l in modules.modules():
 54 |                if isinstance(l, nn.Conv2d):
 55 |                     t.nn.init.normal_(l.weight, std=0.01)
 56 |                     t.nn.init.constant_(l.bias, 0) 
 57 |        
 58 |     def forward(self, z):
 59 |         
 60 |         b, c, w, h=z.size()
 61 |         
 62 |        
 63 |         z1=self.downsampling1(z)
 64 |         z2=self.conv1(z1)
 65 |         
 66 |         z3=self.conv2(z)
 67 |         z4=self.downsampling2(z3)
 68 |        
 69 |         z5=self.conv3(t.cat((z2,z4),1))
 70 |         
 71 |         return z5
 72 | 
 73 | class hiftmodule(nn.Module):
 74 |     
 75 |     def __init__(self,cfg):
 76 |         super(hiftmodule, self).__init__()
 77 |         
 78 |         
 79 |         channel=256
 80 |         self.conv1=nn.Sequential(
 81 |                 nn.Conv2d(256, 256,  kernel_size=3, stride=2,padding=1),
 82 |                 nn.BatchNorm2d(256),
 83 |                 nn.ReLU(inplace=True),
 84 |                 )
 85 |         self.conv2 = nn.Sequential(
 86 |                nn.ConvTranspose2d(256*2, 256,  kernel_size=1, stride=1),
 87 |                nn.BatchNorm2d(256),
 88 |                nn.ReLU(inplace=True),
 89 |                ) 
 90 |         self.conv3 = nn.Sequential(
 91 |                nn.ConvTranspose2d(256, 256,  kernel_size=2, stride=2),
 92 |                nn.BatchNorm2d(256),
 93 |                nn.ReLU(inplace=True),
 94 |                nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
 95 |                
 96 |               
 97 |              
 98 |                 ) 
 99 |         
100 |         self.convloc = nn.Sequential(
101 |                 nn.Conv2d(channel, channel,  kernel_size=2, stride=2),
102 |                 nn.BatchNorm2d(channel),
103 |                 nn.ReLU(inplace=True),
104 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
105 |                 nn.BatchNorm2d(channel),
106 |                 nn.ReLU(inplace=True),                
107 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
108 |                 nn.BatchNorm2d(channel),
109 |                 nn.ReLU(inplace=True),
110 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
111 |                 nn.BatchNorm2d(channel),
112 |                 nn.ReLU(inplace=True),
113 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
114 |                 nn.BatchNorm2d(channel),
115 |                 nn.ReLU(inplace=True),
116 |                 nn.Conv2d(channel, 4,  kernel_size=3, stride=1,padding=1),
117 |                 )
118 |         
119 |         self.convcls = nn.Sequential(
120 |                 nn.Conv2d(channel, channel,  kernel_size=2, stride=2),
121 |                 nn.BatchNorm2d(channel),
122 |                 nn.ReLU(inplace=True),
123 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
124 |                 nn.BatchNorm2d(channel),
125 |                 nn.ReLU(inplace=True),
126 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
127 |                 nn.BatchNorm2d(channel),
128 |                 nn.ReLU(inplace=True),
129 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
130 |                 nn.BatchNorm2d(channel),
131 |                 nn.ReLU(inplace=True),
132 |                 nn.Conv2d(channel, channel,  kernel_size=3, stride=1,padding=1),
133 |                 nn.BatchNorm2d(channel),
134 |                 nn.ReLU(inplace=True),
135 |                 )
136 | 
137 |         self.row_embed1 = nn.Embedding(50, 256//2)
138 |         self.col_embed1 = nn.Embedding(50, 256//2)
139 |         self.row_embed2 = nn.Embedding(50, 256//2)
140 |         self.col_embed2 = nn.Embedding(50, 256//2)
141 |         self.reset_parameters()
142 |         
143 |         self.trans = Transformer(256, 8,1,1)
144 |         
145 |         self.cls1=nn.Conv2d(channel, 2,  kernel_size=3, stride=1,padding=1)
146 |         self.cls2=nn.Conv2d(channel, 1,  kernel_size=3, stride=1,padding=1)
147 |         for modules in [self.conv1,self.conv2,self.convloc, self.convcls,self.conv3,
148 |                         self.cls1, self.cls2]:
149 |             for l in modules.modules():
150 |                if isinstance(l, nn.Conv2d):
151 |                     t.nn.init.normal_(l.weight, std=0.01)
152 |                     t.nn.init.constant_(l.bias, 0)
153 |         
154 |         
155 |     def reset_parameters(self):
156 |         nn.init.uniform_(self.row_embed1.weight)
157 |         nn.init.uniform_(self.col_embed1.weight)
158 |         nn.init.uniform_(self.row_embed2.weight)
159 |         nn.init.uniform_(self.col_embed2.weight)
160 |         
161 |     def xcorr_depthwise(self,x, kernel):
162 |         """depthwise cross correlation
163 |         """
164 |         batch = kernel.size(0)
165 |         channel = kernel.size(1)
166 |         x = x.view(1, batch*channel, x.size(2), x.size(3))
167 |         kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3))
168 |         out = F.conv2d(x, kernel, groups=batch*channel)
169 |         out = out.view(batch, channel, out.size(2), out.size(3))
170 |         return out
171 |     
172 |     def forward(self,x,z,xf,zf):
173 |         
174 |         resx=self.xcorr_depthwise(x, z)
175 |         resd=self.xcorr_depthwise(xf, zf)
176 |         resd=self.conv1(resd)
177 |         res=self.conv2(t.cat((resx,resd),1))
178 |         h1, w1 = 11, 11
179 |         i1 = t.arange(w1).cuda()
180 |         j1 = t.arange(h1).cuda()
181 |         x_emb1 = self.col_embed1(i1)
182 |         y_emb1 = self.row_embed1(j1)
183 | 
184 |         pos1 = t.cat([
185 |             x_emb1.unsqueeze(0).repeat(h1, 1, 1),
186 |             y_emb1.unsqueeze(1).repeat(1, w1, 1),
187 |         ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(res.shape[0], 1, 1, 1)
188 | 
189 |         h2, w2 = 22, 22
190 |         i2 = t.arange(w2).cuda()
191 |         j2 = t.arange(h2).cuda()
192 |         x_emb2 = self.col_embed2(i2)
193 |         y_emb2 = self.row_embed2(j2)
194 | 
195 |         pos2 = t.cat([
196 |             x_emb2.unsqueeze(0).repeat(h2, 1, 1),
197 |             y_emb2.unsqueeze(1).repeat(1, w2, 1),
198 |         ], dim=-1).permute(2, 0, 1).unsqueeze(0).repeat(res.shape[0], 1, 1, 1)
199 | 
200 | 
201 | 
202 |         b, c, w, h=res.size()
203 |         res1=self.conv3(res)
204 |         res2=self.trans((pos1+res).view(b,256,-1).permute(2, 0, 1),\
205 |                           (pos2+res1).view(b,256,-1).permute(2, 0, 1)) 
206 |                                 
207 |                             
208 | 
209 |        
210 |         res2=res2.permute(1,2,0).view(b,256,22,22)
211 |         loc=self.convloc(res2)
212 |         acls=self.convcls(res2)
213 | 
214 |         cls1=self.cls1(acls)
215 |         cls2=self.cls2(acls)
216 |         
217 |         return loc,cls1,cls2
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 


--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/base_tracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/tracker/__pycache__/base_tracker.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/tracker/__pycache__/siamapn_tracker.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/tracker/__pycache__/siamapn_tracker.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/tracker/base_tracker.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import cv2
 9 | import numpy as np
10 | import torch
11 | 
12 | from pysot.core.config import cfg
13 | 
14 | 
15 | class BaseTracker(object):
16 |     """ Base tracker of single objec tracking
17 |     """
18 |     def init(self, img, bbox):
19 |         """
20 |         args:
21 |             img(np.ndarray): BGR image
22 |             bbox(list): [x, y, width, height]
23 |                         x, y need to be 0-based
24 |         """
25 |         raise NotImplementedError
26 | 
27 |     def track(self, img):
28 |         """
29 |         args:
30 |             img(np.ndarray): BGR image
31 |         return:
32 |             bbox(list):[x, y, width, height]
33 |         """
34 |         raise NotImplementedError
35 | 
36 | 
37 | class SiameseTracker(BaseTracker):
38 |     def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans):
39 |         """
40 |         args:
41 |             im: bgr based image
42 |             pos: center position
43 |             model_sz: exemplar size
44 |             s_z: original size
45 |             avg_chans: channel average
46 |         """
47 |         if isinstance(pos, float):
48 |             pos = [pos, pos]
49 |         sz = original_sz
50 |         im_sz = im.shape
51 |         c = (original_sz + 1) / 2
52 |         # context_xmin = round(pos[0] - c) # py2 and py3 round
53 |         context_xmin = np.floor(pos[0] - c + 0.5)
54 |         context_xmax = context_xmin + sz - 1
55 |         # context_ymin = round(pos[1] - c)
56 |         context_ymin = np.floor(pos[1] - c + 0.5)
57 |         context_ymax = context_ymin + sz - 1
58 |         left_pad = int(max(0., -context_xmin))
59 |         top_pad = int(max(0., -context_ymin))
60 |         right_pad = int(max(0., context_xmax - im_sz[1] + 1))
61 |         bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
62 | 
63 |         context_xmin = context_xmin + left_pad
64 |         context_xmax = context_xmax + left_pad
65 |         context_ymin = context_ymin + top_pad
66 |         context_ymax = context_ymax + top_pad
67 | 
68 |         r, c, k = im.shape
69 |         if any([top_pad, bottom_pad, left_pad, right_pad]):
70 |             size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k)
71 |             te_im = np.zeros(size, np.uint8)
72 |             te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
73 |             if top_pad:
74 |                 te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
75 |             if bottom_pad:
76 |                 te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
77 |             if left_pad:
78 |                 te_im[:, 0:left_pad, :] = avg_chans
79 |             if right_pad:
80 |                 te_im[:, c + left_pad:, :] = avg_chans
81 |             im_patch = te_im[int(context_ymin):int(context_ymax + 1),
82 |                              int(context_xmin):int(context_xmax + 1), :]
83 |         else:
84 |             im_patch = im[int(context_ymin):int(context_ymax + 1),
85 |                           int(context_xmin):int(context_xmax + 1), :]
86 | 
87 |         if not np.array_equal(model_sz, original_sz):
88 |             im_patch = cv2.resize(im_patch, (model_sz, model_sz))
89 |         im_patch = im_patch.transpose(2, 0, 1)
90 |         im_patch = im_patch[np.newaxis, :, :, :]
91 |         im_patch = im_patch.astype(np.float32)
92 |         im_patch = torch.from_numpy(im_patch)
93 |         if cfg.CUDA:
94 |             im_patch = im_patch.cuda()
95 |         return im_patch
96 | 


--------------------------------------------------------------------------------
/pysot/tracker/siamapn_tracker.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | import numpy as np
  6 | import torch.nn.functional as F
  7 | from pysot.core.config import cfg
  8 | from pysot.tracker.base_tracker import SiameseTracker
  9 | 
 10 | class SiamAPNTracker(SiameseTracker):
 11 |     def __init__(self, model):
 12 |         super(SiamAPNTracker, self).__init__()
 13 |         #self.score_size = (cfg.TRACK.INSTANCE_SIZE - cfg.TRACK.EXEMPLAR_SIZE) // \
 14 |          #   cfg.ANCHOR.STRIDE + 1 
 15 |         self.score_size=cfg.TRAIN.OUTPUT_SIZE
 16 |         self.anchor_num=1
 17 |         hanning = np.hanning(self.score_size)
 18 |         window = np.outer(hanning, hanning)
 19 |         self.window = np.tile(window.flatten(), self.anchor_num)
 20 |         self.model = model
 21 |         self.model.eval()
 22 | 
 23 |     def generate_anchor(self,mapp):  
 24 |         def dcon(x):
 25 |            x[np.where(x<=-1)]=-0.99
 26 |            x[np.where(x>=1)]=0.99
 27 |            return (np.log(1+x)-np.log(1-x))/2 
 28 |        
 29 | 
 30 |         size=cfg.TRAIN.OUTPUT_SIZE
 31 |         x=np.tile((cfg.ANCHOR.STRIDE*(np.linspace(0,size-1,size))+63)-cfg.TRAIN.SEARCH_SIZE//2,size).reshape(-1)
 32 |         y=np.tile((cfg.ANCHOR.STRIDE*(np.linspace(0,size-1,size))+63).reshape(-1,1)-cfg.TRAIN.SEARCH_SIZE//2,size).reshape(-1)
 33 |         shap=(dcon(mapp[0].cpu().detach().numpy()))*143
 34 |         xx=np.int16(np.tile(np.linspace(0,size-1,size),size).reshape(-1))
 35 |         yy=np.int16(np.tile(np.linspace(0,size-1,size).reshape(-1,1),size).reshape(-1))     
 36 |         w=shap[0,yy,xx]+shap[1,yy,xx]
 37 |         h=shap[2,yy,xx]+shap[3,yy,xx]
 38 |         x=x-shap[0,yy,xx]+w/2
 39 |         y=y-shap[2,yy,xx]+h/2
 40 | 
 41 |         anchor=np.zeros((size**2,4))
 42 |   
 43 |         anchor[:,0]=x
 44 |         anchor[:,1]=y
 45 |         anchor[:,2]=np.maximum(1,w)
 46 |         anchor[:,3]=np.maximum(1,h)
 47 |         return anchor
 48 |     
 49 |     
 50 |     def _convert_bbox(self, delta, anchor):
 51 |         delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1)
 52 |         delta = delta.data.cpu().numpy()
 53 | 
 54 |         delta[0, :] = delta[0, :] * anchor[:, 2] + anchor[:, 0]
 55 |         delta[1, :] = delta[1, :] * anchor[:, 3] + anchor[:, 1]
 56 |         delta[2, :] = np.exp(delta[2, :]) * anchor[:, 2]
 57 |         delta[3, :] = np.exp(delta[3, :]) * anchor[:, 3]
 58 |         return delta
 59 | 
 60 |     def _convert_score(self, score):
 61 |         score = score.permute(1, 2, 3, 0).contiguous().view(2, -1).permute(1, 0)
 62 |         score = F.softmax(score, dim=1).data[:, 1].cpu().numpy()
 63 |         return score
 64 | 
 65 |     def _bbox_clip(self, cx, cy, width, height, boundary):
 66 |         cx = max(0, min(cx, boundary[1]))
 67 |         cy = max(0, min(cy, boundary[0]))
 68 |         width = max(10, min(width, boundary[1]))
 69 |         height = max(10, min(height, boundary[0]))
 70 |         return cx, cy, width, height
 71 | 
 72 |     def init(self, img, bbox):
 73 |         """
 74 |         args:
 75 |             img(np.ndarray): BGR image
 76 |             bbox: (x, y, w, h) bbox
 77 |         """
 78 |         self.image=img
 79 |         
 80 |         
 81 |         self.center_pos = np.array([bbox[0]+(bbox[2]-1)/2,
 82 |                                     bbox[1]+(bbox[3]-1)/2])
 83 |         
 84 |         
 85 |         self.size = np.array([bbox[2], bbox[3]])
 86 |         #self.firstbbox=np.concatenate((self.center_pos,self.size))
 87 |         # calculate z crop size
 88 |         w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
 89 |         h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
 90 |         s_z = round(np.sqrt(w_z * h_z))
 91 |         self.scaleaa=s_z
 92 | 
 93 |         # calculate channle average
 94 |         self.channel_average = np.mean(img, axis=(0, 1))
 95 | 
 96 |         # get crop
 97 |         z_crop = self.get_subwindow(img, self.center_pos,
 98 |                                     cfg.TRACK.EXEMPLAR_SIZE,
 99 |                                     s_z, self.channel_average)
100 |         self.template=z_crop
101 | 
102 |     
103 | 
104 |         
105 |         
106 |         self.model.template(z_crop)
107 |   
108 |     def con(self, x):
109 |         return  x*(cfg.TRAIN.SEARCH_SIZE//2)
110 | 
111 |     def track(self, img):
112 |         """
113 |         args:
114 |             img(np.ndarray): BGR image
115 |         return:
116 |             bbox(list):[x, y, width, height]
117 |         """
118 | 
119 |         w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
120 |         h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size)
121 |         s_z = np.sqrt(w_z * h_z)
122 |         if self.size[0]*self.size[1] >0.5*img.shape[0]*img.shape[1]:
123 |             s_z=self.scaleaa
124 |        
125 |         scale_z = cfg.TRAIN.EXEMPLAR_SIZE / s_z
126 | 
127 |         s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE)
128 |         
129 |         x_crop = self.get_subwindow(img, self.center_pos,
130 |                                     cfg.TRACK.INSTANCE_SIZE,
131 |                                     round(s_x), self.channel_average)
132 | 
133 | 
134 |         outputs = self.model.track(x_crop)
135 |         pred_bbox=self.generate_anchor(outputs['loc']).transpose()
136 |         # self.anchors = self.generate_anchor() 
137 |         # score1 = self._convert_score(outputs['cls1'])*cfg.TRACK.w1
138 |         score2 = self._convert_score(outputs['cls1'])*cfg.TRACK.w2
139 |         score3=(outputs['cls2']).view(-1).cpu().detach().numpy()*cfg.TRACK.w3
140 |         score=(score2+score3)/2  #0.5 0.7
141 | 
142 |         # pred_bbox = self._convert_bbox(outputs['loc'], self.anchors)
143 | 
144 |        
145 |         def change(r):
146 |             
147 |             return np.maximum(r, 1. / (r+1e-5))
148 | 
149 |         def sz(w, h):
150 |             pad = (w + h) * 0.5
151 |             return np.sqrt((w + pad) * (h + pad))
152 | 
153 |         # scale penalty
154 |         s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) /
155 |                      (sz(self.size[0]*scale_z, self.size[1]*scale_z)))
156 | 
157 |         # aspect ratio penalty
158 |         r_c = change((self.size[0]/(self.size[1]+1e-5)) /
159 |                      (pred_bbox[2, :]/(pred_bbox[3, :]+1e-5)))
160 |         penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K)
161 |         pscore = penalty * score
162 | 
163 |         # window penalty
164 |         pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \
165 |             self.window * cfg.TRACK.WINDOW_INFLUENCE
166 |         best_idx = np.argmax(pscore)
167 |         
168 |         bbox = pred_bbox[:, best_idx] / scale_z
169 |         
170 |         lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR 
171 | 
172 |         cx = bbox[0] + self.center_pos[0]
173 |         cy = bbox[1] + self.center_pos[1]
174 | 
175 |         # smooth bbox
176 |         width = self.size[0] * (1 - lr) + bbox[2] * lr
177 |         height = self.size[1] * (1 - lr) + bbox[3] * lr
178 | 
179 |         # clip boundary
180 |         cx, cy, width, height = self._bbox_clip(cx, cy, width,
181 |                                                 height, img.shape[:2])
182 | 
183 |         # udpate state
184 |       
185 |         self.center_pos = np.array([cx, cy])
186 |         self.size = np.array([width, height])
187 | 
188 |         bbox = [cx - width / 2,
189 |                 cy - height / 2,
190 |                 width,
191 |                 height]
192 | 
193 |         best_score = score[best_idx]
194 | 
195 |         return {
196 |                 'bbox': bbox,
197 |                 'best_score': best_score,
198 |                }
199 | 


--------------------------------------------------------------------------------
/pysot/utils/__pycache__/average_meter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/utils/__pycache__/average_meter.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/utils/__pycache__/bbox.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/utils/__pycache__/bbox.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/utils/__pycache__/distributed.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/utils/__pycache__/distributed.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/utils/__pycache__/log_helper.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/utils/__pycache__/log_helper.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/utils/__pycache__/lr_scheduler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/utils/__pycache__/lr_scheduler.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/utils/__pycache__/misc.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/utils/__pycache__/misc.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/utils/__pycache__/model_load.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/pysot/utils/__pycache__/model_load.cpython-38.pyc


--------------------------------------------------------------------------------
/pysot/utils/average_meter.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | 
  9 | class Meter(object):
 10 |     def __init__(self, name, val, avg):
 11 |         self.name = name
 12 |         self.val = val
 13 |         self.avg = avg
 14 | 
 15 |     def __repr__(self):
 16 |         return "{name}: {val:.6f} ({avg:.6f})".format(
 17 |             name=self.name, val=self.val, avg=self.avg
 18 |         )
 19 | 
 20 |     def __format__(self, *tuples, **kwargs):
 21 |         return self.__repr__()
 22 | 
 23 | 
 24 | class AverageMeter:
 25 |     """Computes and stores the average and current value"""
 26 |     def __init__(self, num=100):
 27 |         self.num = num
 28 |         self.reset()
 29 | 
 30 |     def reset(self):
 31 |         self.val = {}
 32 |         self.sum = {}
 33 |         self.count = {}
 34 |         self.history = {}
 35 | 
 36 |     def update(self, batch=1, **kwargs):
 37 |         val = {}
 38 |         for k in kwargs:
 39 |             val[k] = kwargs[k] / float(batch)
 40 |         self.val.update(val)
 41 |         for k in kwargs:
 42 |             if k not in self.sum:
 43 |                 self.sum[k] = 0
 44 |                 self.count[k] = 0
 45 |                 self.history[k] = []
 46 |             self.sum[k] += kwargs[k]
 47 |             self.count[k] += batch
 48 |             for _ in range(batch):
 49 |                 self.history[k].append(val[k])
 50 | 
 51 |             if self.num <= 0:
 52 |                 # < 0, average all
 53 |                 self.history[k] = []
 54 | 
 55 |                 # == 0: no average
 56 |                 if self.num == 0:
 57 |                     self.sum[k] = self.val[k]
 58 |                     self.count[k] = 1
 59 | 
 60 |             elif len(self.history[k]) > self.num:
 61 |                 pop_num = len(self.history[k]) - self.num
 62 |                 for _ in range(pop_num):
 63 |                     self.sum[k] -= self.history[k][0]
 64 |                     del self.history[k][0]
 65 |                     self.count[k] -= 1
 66 | 
 67 |     def __repr__(self):
 68 |         s = ''
 69 |         for k in self.sum:
 70 |             s += self.format_str(k)
 71 |         return s
 72 | 
 73 |     def format_str(self, attr):
 74 |         return "{name}: {val:.6f} ({avg:.6f}) ".format(
 75 |                     name=attr,
 76 |                     val=float(self.val[attr]),
 77 |                     avg=float(self.sum[attr]) / self.count[attr])
 78 | 
 79 |     def __getattr__(self, attr):
 80 |         if attr in self.__dict__:
 81 |             return super(AverageMeter, self).__getattr__(attr)
 82 |         if attr not in self.sum:
 83 |             print("invalid key '{}'".format(attr))
 84 |             return Meter(attr, 0, 0)
 85 |         return Meter(attr, self.val[attr], self.avg(attr))
 86 | 
 87 |     def avg(self, attr):
 88 |         return float(self.sum[attr]) / self.count[attr]
 89 | 
 90 | 
 91 | if __name__ == '__main__':
 92 |     avg1 = AverageMeter(10)
 93 |     avg2 = AverageMeter(0)
 94 |     avg3 = AverageMeter(-1)
 95 | 
 96 |     for i in range(20):
 97 |         avg1.update(s=i)
 98 |         avg2.update(s=i)
 99 |         avg3.update(s=i)
100 | 
101 |         print('iter {}'.format(i))
102 |         print(avg1.s)
103 |         print(avg2.s)
104 |         print(avg3.s)
105 | 


--------------------------------------------------------------------------------
/pysot/utils/bbox.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | from collections import namedtuple
  9 | 
 10 | import numpy as np
 11 | 
 12 | 
 13 | Corner = namedtuple('Corner', 'x1 y1 x2 y2')
 14 | # alias
 15 | BBox = Corner
 16 | Center = namedtuple('Center', 'x y w h')
 17 | 
 18 | 
 19 | def corner2center(corner):
 20 |     """ convert (x1, y1, x2, y2) to (cx, cy, w, h)
 21 |     Args:
 22 |         conrner: Corner or np.array (4*N)
 23 |     Return:
 24 |         Center or np.array (4 * N)
 25 |     """
 26 |     if isinstance(corner, Corner):
 27 |         x1, y1, x2, y2 = corner
 28 |         return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1))
 29 |     else:
 30 |         x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3]
 31 |         x = (x1 + x2) * 0.5
 32 |         y = (y1 + y2) * 0.5
 33 |         w = x2 - x1
 34 |         h = y2 - y1
 35 |         return x, y, w, h
 36 | 
 37 | 
 38 | def center2corner(center):
 39 |     """ convert (cx, cy, w, h) to (x1, y1, x2, y2)
 40 |     Args:
 41 |         center: Center or np.array (4 * N)
 42 |     Return:
 43 |         center or np.array (4 * N)
 44 |     """
 45 |     if isinstance(center, Center):
 46 |         x, y, w, h = center
 47 |         return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5)
 48 |     else:
 49 |         x, y, w, h = center[0], center[1], center[2], center[3]
 50 |         x1 = x - w * 0.5
 51 |         y1 = y - h * 0.5
 52 |         x2 = x + w * 0.5
 53 |         y2 = y + h * 0.5
 54 |         return x1, y1, x2, y2
 55 | 
 56 | 
 57 | def IoU(rect1, rect2):
 58 |     """ caculate interection over union
 59 |     Args:
 60 |         rect1: (x1, y1, x2, y2)
 61 |         rect2: (x1, y1, x2, y2)
 62 |     Returns:
 63 |         iou
 64 |     """
 65 |     # overlap
 66 |     x1, y1, x2, y2 = rect1[0], rect1[1], rect1[2], rect1[3]
 67 |     tx1, ty1, tx2, ty2 = rect2[0], rect2[1], rect2[2], rect2[3] ##rect2[0], rect2[1], rect2[2], rect2[3]
 68 | 
 69 |     xx1 = np.maximum(tx1, x1)
 70 |     yy1 = np.maximum(ty1, y1)
 71 |     xx2 = np.minimum(tx2, x2)
 72 |     yy2 = np.minimum(ty2, y2)
 73 | 
 74 |     ww = np.maximum(0, xx2 - xx1)
 75 |     hh = np.maximum(0, yy2 - yy1)
 76 | 
 77 |     area = (x2-x1) * (y2-y1)
 78 |     target_a = (tx2-tx1) * (ty2 - ty1)
 79 |     inter = ww * hh
 80 |     iou = inter / (area + target_a - inter)
 81 |     return iou
 82 | 
 83 | 
 84 | def cxy_wh_2_rect(pos, sz):
 85 |     """ convert (cx, cy, w, h) to (x1, y1, w, h), 0-index
 86 |     """
 87 |     return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]])
 88 | 
 89 | 
 90 | def rect_2_cxy_wh(rect):
 91 |     """ convert (x1, y1, w, h) to (cx, cy, w, h), 0-index
 92 |     """
 93 |     return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), \
 94 |         np.array([rect[2], rect[3]])
 95 | 
 96 | 
 97 | def cxy_wh_2_rect1(pos, sz):
 98 |     """ convert (cx, cy, w, h) to (x1, y1, w, h), 1-index
 99 |     """
100 |     return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]])
101 | 
102 | 
103 | def rect1_2_cxy_wh(rect):
104 |     """ convert (x1, y1, w, h) to (cx, cy, w, h), 1-index
105 |     """
106 |     return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), \
107 |         np.array([rect[2], rect[3]])
108 | 
109 | 
110 | def get_axis_aligned_bbox(region):
111 |     """ convert region to (cx, cy, w, h) that represent by axis aligned box
112 |     """
113 |     nv = region.size
114 |     if nv == 8:
115 |         cx = np.mean(region[0::2])
116 |         cy = np.mean(region[1::2])
117 |         x1 = min(region[0::2])
118 |         x2 = max(region[0::2])
119 |         y1 = min(region[1::2])
120 |         y2 = max(region[1::2])
121 |         A1 = np.linalg.norm(region[0:2] - region[2:4]) * \
122 |             np.linalg.norm(region[2:4] - region[4:6])
123 |         A2 = (x2 - x1) * (y2 - y1)
124 |         s = np.sqrt(A1 / A2)
125 |         w = s * (x2 - x1) + 1
126 |         h = s * (y2 - y1) + 1
127 |     else:
128 |         x = region[0]
129 |         y = region[1]
130 |         w = region[2]
131 |         h = region[3]
132 |         cx = x+w/2
133 |         cy = y+h/2
134 |     return cx, cy, w, h
135 | 
136 | 
137 | def get_min_max_bbox(region):
138 |     """ convert region to (cx, cy, w, h) that represent by mim-max box
139 |     """
140 |     nv = region.size
141 |     if nv == 8:
142 |         cx = np.mean(region[0::2])
143 |         cy = np.mean(region[1::2])
144 |         x1 = min(region[0::2])
145 |         x2 = max(region[0::2])
146 |         y1 = min(region[1::2])
147 |         y2 = max(region[1::2])
148 |         w = x2 - x1
149 |         h = y2 - y1
150 |     else:
151 |         x = region[0]
152 |         y = region[1]
153 |         w = region[2]
154 |         h = region[3]
155 |         cx = x+w/2
156 |         cy = y+h/2
157 |     return cx, cy, w, h
158 | 


--------------------------------------------------------------------------------
/pysot/utils/distributed.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import os
  9 | import socket
 10 | import logging
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.distributed as dist
 15 | 
 16 | from pysot.utils.log_helper import log_once
 17 | 
 18 | logger = logging.getLogger('global')
 19 | 
 20 | 
 21 | def average_reduce(v):
 22 |     if get_world_size() == 1:
 23 |         return v
 24 |     tensor = torch.cuda.FloatTensor(1)
 25 |     tensor[0] = v
 26 |     dist.all_reduce(tensor)
 27 |     v = tensor[0] / get_world_size()
 28 |     return v
 29 | 
 30 | 
 31 | class DistModule(nn.Module):
 32 |     def __init__(self, module, bn_method=0):
 33 |         super(DistModule, self).__init__()
 34 |         self.module = module
 35 |         self.bn_method = bn_method
 36 |         if get_world_size() > 1:
 37 |             broadcast_params(self.module)
 38 |         else:
 39 |             self.bn_method = 0  # single proccess
 40 | 
 41 |     def forward(self, *args, **kwargs):
 42 |         broadcast_buffers(self.module, self.bn_method)
 43 |         return self.module(*args, **kwargs)
 44 | 
 45 |     def train(self, mode=True):
 46 |         super(DistModule, self).train(mode)
 47 |         self.module.train(mode)
 48 |         return self
 49 | 
 50 | 
 51 | def broadcast_params(model):
 52 |     """ broadcast model parameters """
 53 |     for p in model.state_dict().values():
 54 |         dist.broadcast(p, 0)
 55 | 
 56 | 
 57 | def broadcast_buffers(model, method=0):
 58 |     """ broadcast model buffers """
 59 |     if method == 0:
 60 |         return
 61 | 
 62 |     world_size = get_world_size()
 63 | 
 64 |     for b in model._all_buffers():
 65 |         if method == 1:  # broadcast from main proccess
 66 |             dist.broadcast(b, 0)
 67 |         elif method == 2:  # average
 68 |             dist.all_reduce(b)
 69 |             b /= world_size
 70 |         else:
 71 |             raise Exception('Invalid buffer broadcast code {}'.format(method))
 72 | 
 73 | 
 74 | inited = False
 75 | 
 76 | 
 77 | def _dist_init():
 78 |     '''
 79 |     if guess right:
 80 |         ntasks: world_size (process num)
 81 |         proc_id: rank
 82 |     '''
 83 |     # rank = int(os.environ['RANK'])
 84 |     rank = 0
 85 |     num_gpus = torch.cuda.device_count()
 86 |     torch.cuda.set_device(rank % num_gpus)
 87 |     dist.init_process_group(backend='nccl')
 88 |     world_size = dist.get_world_size()
 89 |     return rank, world_size
 90 | 
 91 | 
 92 | def _get_local_ip():
 93 |     try:
 94 |         s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 95 |         s.connect(('8.8.8.8', 80))
 96 |         ip = s.getsockname()[0]
 97 |     finally:
 98 |         s.close()
 99 |     return ip
100 | 
101 | 
102 | def dist_init():
103 |     global rank, world_size, inited
104 |     # try:
105 |     #     rank, world_size = _dist_init()
106 |     # except RuntimeError as e:
107 |     #     if 'public' in e.args[0]:
108 |     #         logger.info(e)
109 |     #         logger.info('Warning: use single process')
110 |     #         rank, world_size = 0, 1
111 |     #     else:
112 |     #         raise RuntimeError(*e.args)
113 |     rank, world_size = 0, 1
114 |     inited = True
115 |     return rank, world_size
116 | 
117 | 
118 | def get_rank():
119 |     if not inited:
120 |         raise(Exception('dist not inited'))
121 |     return rank
122 | 
123 | 
124 | def get_world_size():
125 |     if not inited:
126 |         raise(Exception('dist not inited'))
127 |     return world_size
128 | 
129 | 
130 | def reduce_gradients(model, _type='sum'):
131 |     types = ['sum', 'avg']
132 |     assert _type in types, 'gradients method must be in "{}"'.format(types)
133 |     log_once("gradients method is {}".format(_type))
134 |     if get_world_size() > 1:
135 |         for param in model.parameters():
136 |             if param.requires_grad:
137 |                 dist.all_reduce(param.grad.data)
138 |                 if _type == 'avg':
139 |                     param.grad.data /= get_world_size()
140 |     else:
141 |         return None
142 | 


--------------------------------------------------------------------------------
/pysot/utils/location_grid.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | def compute_locations(features,stride):
 3 |     h, w = features.size()[-2:]
 4 |     locations_per_level = compute_locations_per_level(
 5 |         h, w, stride,
 6 |         features.device
 7 |     )
 8 |     return locations_per_level
 9 | 
10 | 
11 | def compute_locations_per_level(h, w, stride, device):
12 |     shifts_x = torch.arange(
13 |         0, w * stride, step=stride,
14 |         dtype=torch.float32, device=device
15 |     )
16 |     shifts_y = torch.arange(
17 |         0, h * stride, step=stride,
18 |         dtype=torch.float32, device=device
19 |     )
20 |     shift_y, shift_x = torch.meshgrid((shifts_y, shifts_x))
21 |     shift_x = shift_x.reshape(-1)
22 |     shift_y = shift_y.reshape(-1)
23 |     # locations = torch.stack((shift_x, shift_y), dim=1) + stride + 3*stride  # (size_z-1)/2*size_z 28
24 |     # locations = torch.stack((shift_x, shift_y), dim=1) + stride
25 |     locations = torch.stack((shift_x, shift_y), dim=1) + 32  #alex:48 // 32
26 |     return locations
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/pysot/utils/log_helper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import os
  9 | import logging
 10 | import math
 11 | import sys
 12 | 
 13 | 
 14 | if hasattr(sys, 'frozen'):  # support for py2exe
 15 |     _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:])
 16 | elif __file__[-4:].lower() in ['.pyc', '.pyo']:
 17 |     _srcfile = __file__[:-4] + '.py'
 18 | else:
 19 |     _srcfile = __file__
 20 | _srcfile = os.path.normcase(_srcfile)
 21 | 
 22 | logs = set()
 23 | 
 24 | 
 25 | class Filter:
 26 |     def __init__(self, flag):
 27 |         self.flag = flag
 28 | 
 29 |     def filter(self, x):
 30 |         return self.flag
 31 | 
 32 | 
 33 | class Dummy:
 34 |     def __init__(self, *arg, **kwargs):
 35 |         pass
 36 | 
 37 |     def __getattr__(self, arg):
 38 |         def dummy(*args, **kwargs): pass
 39 |         return dummy
 40 | 
 41 | 
 42 | def get_format(logger, level):
 43 |     if 'RANK' in os.environ:
 44 |         rank = int(os.environ['RANK'])
 45 | 
 46 |         if level == logging.INFO:
 47 |             logger.addFilter(Filter(rank == 0))
 48 |     else:
 49 |         rank = 0
 50 |     format_str = '[%(asctime)s-rk{}-%(filename)s#%(lineno)3d] %(message)s'.format(rank)
 51 |     formatter = logging.Formatter(format_str)
 52 |     return formatter
 53 | 
 54 | 
 55 | def get_format_custom(logger, level):
 56 |     if 'RANK' in os.environ:
 57 |         rank = int(os.environ['RANK'])
 58 |         if level == logging.INFO:
 59 |             logger.addFilter(Filter(rank == 0))
 60 |     else:
 61 |         rank = 0
 62 |     format_str = '[%(asctime)s-rk{}-%(message)s'.format(rank)
 63 |     formatter = logging.Formatter(format_str)
 64 |     return formatter
 65 | 
 66 | 
 67 | def init_log(name, level=logging.INFO, format_func=get_format):
 68 |     if (name, level) in logs:
 69 |         return
 70 |     logs.add((name, level))
 71 |     logger = logging.getLogger(name)
 72 |     logger.setLevel(level)
 73 |     ch = logging.StreamHandler()
 74 |     ch.setLevel(level)
 75 |     formatter = format_func(logger, level)
 76 |     ch.setFormatter(formatter)
 77 |     logger.addHandler(ch)
 78 |     return logger
 79 | 
 80 | 
 81 | def add_file_handler(name, log_file, level=logging.INFO):
 82 |     logger = logging.getLogger(name)
 83 |     fh = logging.FileHandler(log_file)
 84 |     fh.setFormatter(get_format(logger, level))
 85 |     logger.addHandler(fh)
 86 | 
 87 | 
 88 | init_log('global')
 89 | 
 90 | 
 91 | def print_speed(i, i_time, n):
 92 |     """print_speed(index, index_time, total_iteration)"""
 93 |     logger = logging.getLogger('global')
 94 |     average_time = i_time
 95 |     remaining_time = (n - i) * average_time
 96 |     remaining_day = math.floor(remaining_time / 86400)
 97 |     remaining_hour = math.floor(remaining_time / 3600 -
 98 |                                 remaining_day * 24)
 99 |     remaining_min = math.floor(remaining_time / 60 -
100 |                                remaining_day * 1440 -
101 |                                remaining_hour * 60)
102 |     logger.info('Progress: %d / %d [%d%%], Speed: %.3f s/iter, ETA %d:%02d:%02d (D:H:M)\n' %
103 |                 (i, n, i / n * 100,
104 |                  average_time,
105 |                  remaining_day, remaining_hour, remaining_min))
106 | 
107 | 
108 | def find_caller():
109 |     def current_frame():
110 |         try:
111 |             raise Exception
112 |         except:
113 |             return sys.exc_info()[2].tb_frame.f_back
114 | 
115 |     f = current_frame()
116 |     if f is not None:
117 |         f = f.f_back
118 |     rv = "(unknown file)", 0, "(unknown function)"
119 |     while hasattr(f, "f_code"):
120 |         co = f.f_code
121 |         filename = os.path.normcase(co.co_filename)
122 |         rv = (co.co_filename, f.f_lineno, co.co_name)
123 |         if filename == _srcfile:
124 |             f = f.f_back
125 |             continue
126 |         break
127 |     rv = list(rv)
128 |     rv[0] = os.path.basename(rv[0])
129 |     return rv
130 | 
131 | 
132 | class LogOnce:
133 |     def __init__(self):
134 |         self.logged = set()
135 |         self.logger = init_log('log_once', format_func=get_format_custom)
136 | 
137 |     def log(self, strings):
138 |         fn, lineno, caller = find_caller()
139 |         key = (fn, lineno, caller, strings)
140 |         if key in self.logged:
141 |             return
142 |         self.logged.add(key)
143 |         message = "{filename:s}<{caller}>#{lineno:3d}] {strings}".format(
144 |                 filename=fn, lineno=lineno, strings=strings, caller=caller)
145 |         self.logger.info(message)
146 | 
147 | 
148 | once_logger = LogOnce()
149 | 
150 | 
151 | def log_once(strings):
152 |     once_logger.log(strings)
153 | 
154 | 
155 | def main():
156 |     for i, lvl in enumerate([logging.DEBUG, logging.INFO,
157 |                              logging.WARNING, logging.ERROR,
158 |                              logging.CRITICAL]):
159 |         log_name = str(lvl)
160 |         init_log(log_name, lvl)
161 |         logger = logging.getLogger(log_name)
162 |         print('****cur lvl:{}'.format(lvl))
163 |         logger.debug('debug')
164 |         logger.info('info')
165 |         logger.warning('warning')
166 |         logger.error('error')
167 |         logger.critical('critiacal')
168 | 
169 | 
170 | if __name__ == '__main__':
171 |     main()
172 |     for i in range(10):
173 |         log_once('xxx')
174 | 


--------------------------------------------------------------------------------
/pysot/utils/lr_scheduler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import math
  9 | 
 10 | import numpy as np
 11 | from torch.optim.lr_scheduler import _LRScheduler
 12 | 
 13 | from pysot.core.config import cfg
 14 | 
 15 | 
 16 | class LRScheduler(_LRScheduler):
 17 |     def __init__(self, optimizer, last_epoch=-1):
 18 |         if 'lr_spaces' not in self.__dict__:
 19 |             raise Exception('lr_spaces must be set in "LRSchduler"')
 20 |         super(LRScheduler, self).__init__(optimizer, last_epoch)
 21 | 
 22 |     def get_cur_lr(self):
 23 |         return self.lr_spaces[self.last_epoch]
 24 | 
 25 |     def get_lr(self):
 26 |         epoch = self.last_epoch
 27 |         return [self.lr_spaces[epoch] * pg['initial_lr'] / self.start_lr
 28 |                 for pg in self.optimizer.param_groups]
 29 | 
 30 |     def __repr__(self):
 31 |         return "({}) lr spaces: \n{}".format(self.__class__.__name__,
 32 |                                              self.lr_spaces)
 33 | 
 34 | 
 35 | class LogScheduler(LRScheduler):
 36 |     def __init__(self, optimizer, start_lr=0.03, end_lr=5e-4,
 37 |                  epochs=50, last_epoch=-1, **kwargs):
 38 |         self.start_lr = start_lr
 39 |         self.end_lr = end_lr
 40 |         self.epochs = epochs
 41 |         self.lr_spaces = np.logspace(math.log10(start_lr),
 42 |                                      math.log10(end_lr),
 43 |                                      epochs)
 44 | 
 45 |         super(LogScheduler, self).__init__(optimizer, last_epoch)
 46 | 
 47 | 
 48 | class StepScheduler(LRScheduler):
 49 |     def __init__(self, optimizer, start_lr=0.01, end_lr=None,
 50 |                  step=10, mult=0.1, epochs=50, last_epoch=-1, **kwargs):
 51 |         if end_lr is not None:
 52 |             if start_lr is None:
 53 |                 start_lr = end_lr / (mult ** (epochs // step))
 54 |             else:  # for warm up policy
 55 |                 mult = math.pow(end_lr/start_lr, 1. / (epochs // step))
 56 |         self.start_lr = start_lr
 57 |         self.lr_spaces = self.start_lr * (mult**(np.arange(epochs) // step))
 58 |         self.mult = mult
 59 |         self._step = step
 60 | 
 61 |         super(StepScheduler, self).__init__(optimizer, last_epoch)
 62 | 
 63 | 
 64 | class MultiStepScheduler(LRScheduler):
 65 |     def __init__(self, optimizer, start_lr=0.01, end_lr=None,
 66 |                  steps=[10, 20, 30, 40], mult=0.5, epochs=50,
 67 |                  last_epoch=-1, **kwargs):
 68 |         if end_lr is not None:
 69 |             if start_lr is None:
 70 |                 start_lr = end_lr / (mult ** (len(steps)))
 71 |             else:
 72 |                 mult = math.pow(end_lr/start_lr, 1. / len(steps))
 73 |         self.start_lr = start_lr
 74 |         self.lr_spaces = self._build_lr(start_lr, steps, mult, epochs)
 75 |         self.mult = mult
 76 |         self.steps = steps
 77 | 
 78 |         super(MultiStepScheduler, self).__init__(optimizer, last_epoch)
 79 | 
 80 |     def _build_lr(self, start_lr, steps, mult, epochs):
 81 |         lr = [0] * epochs
 82 |         lr[0] = start_lr
 83 |         for i in range(1, epochs):
 84 |             lr[i] = lr[i-1]
 85 |             if i in steps:
 86 |                 lr[i] *= mult
 87 |         return np.array(lr, dtype=np.float32)
 88 | 
 89 | 
 90 | class LinearStepScheduler(LRScheduler):
 91 |     def __init__(self, optimizer, start_lr=0.01, end_lr=0.005,
 92 |                  epochs=50, last_epoch=-1, **kwargs):
 93 |         self.start_lr = start_lr
 94 |         self.end_lr = end_lr
 95 |         self.lr_spaces = np.linspace(start_lr, end_lr, epochs)
 96 |         super(LinearStepScheduler, self).__init__(optimizer, last_epoch)
 97 | 
 98 | 
 99 | class CosStepScheduler(LRScheduler):
100 |     def __init__(self, optimizer, start_lr=0.01, end_lr=0.005,
101 |                  epochs=50, last_epoch=-1, **kwargs):
102 |         self.start_lr = start_lr
103 |         self.end_lr = end_lr
104 |         self.lr_spaces = self._build_lr(start_lr, end_lr, epochs)
105 | 
106 |         super(CosStepScheduler, self).__init__(optimizer, last_epoch)
107 | 
108 |     def _build_lr(self, start_lr, end_lr, epochs):
109 |         index = np.arange(epochs).astype(np.float32)
110 |         lr = end_lr + (start_lr - end_lr) * \
111 |             (1. + np.cos(index * np.pi / epochs)) * 0.5
112 |         return lr.astype(np.float32)
113 | 
114 | 
115 | class WarmUPScheduler(LRScheduler):
116 |     def __init__(self, optimizer, warmup, normal, epochs=50, last_epoch=-1):
117 |         warmup = warmup.lr_spaces  # [::-1]
118 |         normal = normal.lr_spaces
119 |         self.lr_spaces = np.concatenate([warmup, normal])
120 |         self.start_lr = normal[0]
121 | 
122 |         super(WarmUPScheduler, self).__init__(optimizer, last_epoch)
123 | 
124 | 
125 | LRs = {
126 |     'log': LogScheduler,
127 |     'step': StepScheduler,
128 |     'multi-step': MultiStepScheduler,
129 |     'linear': LinearStepScheduler,
130 |     'cos': CosStepScheduler}
131 | 
132 | 
133 | def _build_lr_scheduler(optimizer, config, epochs=50, last_epoch=-1):
134 |     return LRs[config.TYPE](optimizer, last_epoch=last_epoch,
135 |                             epochs=epochs, **config.KWARGS)
136 | 
137 | 
138 | def _build_warm_up_scheduler(optimizer, epochs=50, last_epoch=-1):
139 |     warmup_epoch = cfg.TRAIN.LR_WARMUP.EPOCH
140 |     sc1 = _build_lr_scheduler(optimizer, cfg.TRAIN.LR_WARMUP,
141 |                               warmup_epoch, last_epoch)
142 |     sc2 = _build_lr_scheduler(optimizer, cfg.TRAIN.LR,
143 |                               epochs - warmup_epoch, last_epoch)
144 |     return WarmUPScheduler(optimizer, sc1, sc2, epochs, last_epoch)
145 | 
146 | 
147 | def build_lr_scheduler(optimizer, epochs=50, last_epoch=-1):
148 |     if cfg.TRAIN.LR_WARMUP.WARMUP:
149 |         return _build_warm_up_scheduler(optimizer, epochs, last_epoch)
150 |     else:
151 |         return _build_lr_scheduler(optimizer, cfg.TRAIN.LR,
152 |                                    epochs, last_epoch)
153 | 
154 | 
155 | if __name__ == '__main__':
156 |     import torch.nn as nn
157 |     from torch.optim import SGD
158 | 
159 |     class Net(nn.Module):
160 |         def __init__(self):
161 |             super(Net, self).__init__()
162 |             self.conv = nn.Conv2d(10, 10, kernel_size=3)
163 |     net = Net().parameters()
164 |     optimizer = SGD(net, lr=0.01)
165 | 
166 |     # test1
167 |     step = {
168 |             'type': 'step',
169 |             'start_lr': 0.01,
170 |             'step': 10,
171 |             'mult': 0.1
172 |             }
173 |     lr = build_lr_scheduler(optimizer, step)
174 |     print(lr)
175 | 
176 |     log = {
177 |             'type': 'log',
178 |             'start_lr': 0.03,
179 |             'end_lr': 5e-4,
180 |             }
181 |     lr = build_lr_scheduler(optimizer, log)
182 | 
183 |     print(lr)
184 | 
185 |     log = {
186 |             'type': 'multi-step',
187 |             "start_lr": 0.01,
188 |             "mult": 0.1,
189 |             "steps": [10, 15, 20]
190 |             }
191 |     lr = build_lr_scheduler(optimizer, log)
192 |     print(lr)
193 | 
194 |     cos = {
195 |             "type": 'cos',
196 |             'start_lr': 0.01,
197 |             'end_lr': 0.0005,
198 |             }
199 |     lr = build_lr_scheduler(optimizer, cos)
200 |     print(lr)
201 | 
202 |     step = {
203 |             'type': 'step',
204 |             'start_lr': 0.001,
205 |             'end_lr': 0.03,
206 |             'step': 1,
207 |             }
208 | 
209 |     warmup = log.copy()
210 |     warmup['warmup'] = step
211 |     warmup['warmup']['epoch'] = 5
212 |     lr = build_lr_scheduler(optimizer, warmup, epochs=55)
213 |     print(lr)
214 | 
215 |     lr.step()
216 |     print(lr.last_epoch)
217 | 
218 |     lr.step(5)
219 |     print(lr.last_epoch)
220 | 


--------------------------------------------------------------------------------
/pysot/utils/misc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import os
 9 | import numpy as np
10 | import torch
11 | 
12 | from colorama import Fore, Style
13 | 
14 | 
15 | __all__ = ['commit', 'describe']
16 | 
17 | 
18 | def _exec(cmd):
19 |     f = os.popen(cmd, 'r', 1)
20 |     return f.read().strip()
21 | 
22 | 
23 | def _bold(s):
24 |     return "\033[1m%s\033[0m" % s
25 | 
26 | 
27 | def _color(s):
28 |     # return f'{Fore.RED}{s}{Style.RESET_ALL}'
29 |     return "{}{}{}".format(Fore.RED,s,Style.RESET_ALL)
30 | 
31 | 
32 | def _describe(model, lines=None, spaces=0):
33 |     head = " " * spaces
34 |     for name, p in model.named_parameters():
35 |         if '.' in name:
36 |             continue
37 |         if p.requires_grad:
38 |             name = _color(name)
39 |         line = "{head}- {name}".format(head=head, name=name)
40 |         lines.append(line)
41 | 
42 |     for name, m in model.named_children():
43 |         space_num = len(name) + spaces + 1
44 |         if m.training:
45 |             name = _color(name)
46 |         line = "{head}.{name} ({type})".format(
47 |                 head=head,
48 |                 name=name,
49 |                 type=m.__class__.__name__)
50 |         lines.append(line)
51 |         _describe(m, lines, space_num)
52 | 
53 | 
54 | def commit():
55 |     root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))
56 |     cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root)
57 |     commit = _exec(cmd)
58 |     cmd = "cd {}; git log --oneline | head -n1".format(root)
59 |     commit_log = _exec(cmd)
60 |     return "commit : {}\n  log  : {}".format(commit, commit_log)
61 | 
62 | 
63 | def describe(net, name=None):
64 |     num = 0
65 |     lines = []
66 |     if name is not None:
67 |         lines.append(name)
68 |         num = len(name)
69 |     _describe(net, lines, num)
70 |     return "\n".join(lines)
71 | 
72 | 
73 | def bbox_clip(x, min_value, max_value):
74 |     new_x = max(min_value, min(x, max_value))
75 |     return new_x
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/pysot/utils/model_load.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import logging
 9 | 
10 | import torch
11 | 
12 | 
13 | logger = logging.getLogger('global')
14 | 
15 | 
16 | def check_keys(model, pretrained_state_dict):
17 |     ckpt_keys = set(pretrained_state_dict.keys())
18 |     model_keys = set(model.state_dict().keys())
19 |     used_pretrained_keys = model_keys & ckpt_keys
20 |     unused_pretrained_keys = ckpt_keys - model_keys
21 |     missing_keys = model_keys - ckpt_keys
22 |     # filter 'num_batches_tracked'
23 |     missing_keys = [x for x in missing_keys
24 |                     if not x.endswith('num_batches_tracked')]
25 |     if len(missing_keys) > 0:
26 |         logger.info('[Warning] missing keys: {}'.format(missing_keys))
27 |         logger.info('missing keys:{}'.format(len(missing_keys)))
28 |     if len(unused_pretrained_keys) > 0:
29 |         logger.info('[Warning] unused_pretrained_keys: {}'.format(
30 |             unused_pretrained_keys))
31 |         logger.info('unused checkpoint keys:{}'.format(
32 |             len(unused_pretrained_keys)))
33 |     logger.info('used keys:{}'.format(len(used_pretrained_keys)))
34 |     assert len(used_pretrained_keys) > 0, \
35 |         'load NONE from pretrained checkpoint'
36 |     return True
37 | 
38 | 
39 | def remove_prefix(state_dict, prefix):
40 |     ''' Old style model is stored with all names of parameters
41 |     share common prefix 'module.' '''
42 |     logger.info('remove prefix \'{}\''.format(prefix))
43 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
44 |     return {f(key): value for key, value in state_dict.items()}
45 | 
46 | 
47 | def load_pretrain(model, pretrained_path):
48 |     logger.info('load pretrained model from {}'.format(pretrained_path))
49 |     device = torch.cuda.current_device()
50 |     pretrained_dict = torch.load(pretrained_path,
51 |         map_location=lambda storage, loc: storage.cuda(device))
52 |     if "state_dict" in pretrained_dict.keys():
53 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'],
54 |                                         'module.')
55 |     else:
56 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
57 | 
58 |     try:
59 |         check_keys(model, pretrained_dict)
60 |     except:
61 |         logger.info('[Warning]: using pretrain as features.\
62 |                 Adding "features." as prefix')
63 |         new_dict = {}
64 |         for k, v in pretrained_dict.items():
65 |             k = 'features.' + k
66 |             new_dict[k] = v
67 |         pretrained_dict = new_dict
68 |         check_keys(model, pretrained_dict)
69 |     model.load_state_dict(pretrained_dict, strict=False)
70 |     return model
71 | 
72 | 
73 | def restore_from(model, optimizer, ckpt_path):
74 |     device = torch.cuda.current_device()
75 |     ckpt = torch.load(ckpt_path,
76 |         map_location=lambda storage, loc: storage.cuda(device))
77 |     epoch = ckpt['epoch']
78 | 
79 |     ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.')
80 |     check_keys(model, ckpt_model_dict)
81 |     model.load_state_dict(ckpt_model_dict, strict=False)
82 | 
83 |     check_keys(optimizer, ckpt['optimizer'])
84 |     optimizer.load_state_dict(ckpt['optimizer'])
85 |     return model, optimizer, epoch
86 | 


--------------------------------------------------------------------------------
/pysot/utils/xcorr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) SenseTime. All Rights Reserved.
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import torch
 9 | import torch.nn.functional as F
10 | 
11 | 
12 | def xcorr_slow(x, kernel):
13 |     """for loop to calculate cross correlation, slow version
14 |     """
15 |     batch = x.size()[0]
16 |     out = []
17 |     for i in range(batch):
18 |         px = x[i]
19 |         pk = kernel[i]
20 |         px = px.view(1, px.size()[0], px.size()[1], px.size()[2])
21 |         pk = pk.view(-1, px.size()[1], pk.size()[1], pk.size()[2])
22 |         po = F.conv2d(px, pk)
23 |         out.append(po)
24 |     out = torch.cat(out, 0)
25 |     return out
26 | 
27 | 
28 | def xcorr_fast(x, kernel):
29 |     """group conv2d to calculate cross correlation, fast version
30 |     """
31 |     batch = kernel.size()[0]
32 |     pk = kernel.view(-1, x.size()[1], kernel.size()[2], kernel.size()[3])
33 |     px = x.view(1, -1, x.size()[2], x.size()[3])
34 |     po = F.conv2d(px, pk, groups=batch)
35 |     po = po.view(batch, -1, po.size()[2], po.size()[3])
36 |     return po
37 | 
38 | 
39 | def xcorr_depthwise(x, kernel):
40 |     """depthwise cross correlation
41 |     """
42 |     batch = kernel.size(0)
43 |     channel = kernel.size(1)
44 |     x = x.view(1, batch*channel, x.size(2), x.size(3))
45 |     kernel = kernel.view(batch*channel, 1, kernel.size(2), kernel.size(3))
46 |     out = F.conv2d(x, kernel, groups=batch*channel)
47 |     out = out.view(batch, channel, out.size(2), out.size(3))
48 |     return out
49 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | opencv-python
 3 | pyyaml
 4 | yacs
 5 | tqdm
 6 | colorama
 7 | matplotlib
 8 | cython
 9 | tensorboardX
10 | 


--------------------------------------------------------------------------------
/toolkit/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .uavtrack112_l import UAVTrack112lDataset
 2 | from .uav10fps import UAV10Dataset
 3 | from .uav20l import UAV20Dataset
 4 | from .dtb import DTBDataset
 5 | from .uavdt import UAVDTDataset
 6 | from .visdrone1 import VISDRONED2018Dataset
 7 | from .v4r import V4RDataset
 8 | from .uav import UAVDataset
 9 | from .uav1231 import UAV123lDataset
10 | from .dtb import DTBDataset
11 | from .testreal import V4RtestDataset
12 | class DatasetFactory(object):
13 |     @staticmethod
14 |     def create_dataset(**kwargs):
15 | 
16 | 
17 |         assert 'name' in kwargs, "should provide dataset name"
18 |         name = kwargs['name']
19 |         
20 |         if 'UAV10' == name:
21 |             dataset = UAV10Dataset(**kwargs)
22 |         elif 'UAV20' in name:
23 |             dataset = UAV20Dataset(**kwargs)
24 |         
25 |         elif 'VISDRONED2018' in name:
26 |             dataset = VISDRONED2018Dataset(**kwargs)
27 |         elif'UAV101' in name :
28 |             dataset = UAV123lDataset(**kwargs)
29 |         elif 'UAVTrack112_l' in name:
30 |             dataset = UAVTrack112lDataset(**kwargs)
31 |         elif 'UAVTrack112' in name:
32 |             dataset = V4RDataset(**kwargs)
33 |         elif 'UAV123' in name:
34 |             dataset = UAVDataset(**kwargs)
35 |         elif 'DTB70' in name:
36 |             dataset = DTBDataset(**kwargs)
37 |         elif 'UAVDT' in name:
38 |             dataset = UAVDTDataset(**kwargs)
39 |         elif 'testreal' in name:
40 |             dataset = V4RtestDataset(**kwargs)
41 |         else:
42 |             raise Exception("unknow dataset {}".format(kwargs['name']))
43 |         return dataset
44 | 
45 | 


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/dataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/dataset.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/dataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/dataset.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/dtb.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/dtb.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/dtb.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/dtb.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/dtb701.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/dtb701.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/testreal.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/testreal.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav10fps.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav10fps.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav1231.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav1231.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav1231.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav1231.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav20l.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav20l.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uav20l.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uav20l.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uavdt.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uavdt.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uavdt.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uavdt.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uavtrack112_l.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uavtrack112_l.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/uavtrack112_l.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/uavtrack112_l.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/v4r.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/v4r.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/v4r.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/v4r.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/video.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/video.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/video.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/video.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/visdrone1.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/visdrone1.cpython-36.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/__pycache__/visdrone1.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/datasets/__pycache__/visdrone1.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/datasets/dataset.py:
--------------------------------------------------------------------------------
 1 | from tqdm import tqdm
 2 | 
 3 | class Dataset(object):
 4 |     def __init__(self, name, dataset_root):
 5 |         self.name = name
 6 |         self.dataset_root = dataset_root
 7 |         self.videos = None
 8 | 
 9 |     def __getitem__(self, idx):
10 |         if isinstance(idx, str):
11 |             return self.videos[idx]
12 |         elif isinstance(idx, int):
13 |             return self.videos[sorted(list(self.videos.keys()))[idx]]
14 | 
15 |     def __len__(self):
16 |         return len(self.videos)
17 | 
18 |     def __iter__(self):
19 |         keys = sorted(list(self.videos.keys()))
20 |         for key in keys:
21 |             yield self.videos[key]
22 | 
23 |     def set_tracker(self, path, tracker_names):
24 |         """
25 |         Args:
26 |             path: path to tracker results,
27 |             tracker_names: list of tracker name
28 |         """
29 |         self.tracker_path = path
30 |         self.tracker_names = tracker_names
31 |         # for video in tqdm(self.videos.values(), 
32 |         #         desc='loading tacker result', ncols=100):
33 |         #     video.load_tracker(path, tracker_names)
34 | 


--------------------------------------------------------------------------------
/toolkit/datasets/dtb.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | from glob import glob
  8 | 
  9 | from .dataset import Dataset
 10 | from .video import Video
 11 | 
 12 | 
 13 | class DTBVideo(Video):
 14 |     """
 15 |     Args:
 16 |         name: video name
 17 |         root: dataset root
 18 |         video_dir: video directory
 19 |         init_rect: init rectangle
 20 |         img_names: image names
 21 |         gt_rect: groundtruth rectangle
 22 |         attr: attribute of video
 23 |     """
 24 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 25 |             gt_rect, attr, load_img=False):
 26 |         super(DTBVideo, self).__init__(name, root, video_dir,
 27 |                 init_rect, img_names, gt_rect, attr, load_img)
 28 | 
 29 |     def load_tracker(self, path, tracker_names=None, store=True):
 30 |         """
 31 |         Args:
 32 |             path(str): path to result
 33 |             tracker_name(list): name of tracker
 34 |         """
 35 |         pred_traj=[]
 36 | 
 37 |         if not tracker_names:
 38 |             tracker_names = [x.split('/')[-1] for x in glob(path)
 39 |                     if os.path.isdir(x)]
 40 |         if isinstance(tracker_names, str):
 41 |             tracker_names = [tracker_names]
 42 |         for name in tracker_names:
 43 |             traj_file = os.path.join(path, name, self.name+'.txt')
 44 |             if not os.path.exists(traj_file):
 45 |                 if self.name == 'FleetFace':
 46 |                     txt_name = 'fleetface.txt'
 47 |                 elif self.name == 'Jogging-1':
 48 |                     txt_name = 'jogging_1.txt'
 49 |                 elif self.name == 'Jogging-2':
 50 |                     txt_name = 'jogging_2.txt'
 51 |                 elif self.name == 'Skating2-1':
 52 |                     txt_name = 'skating2_1.txt'
 53 |                 elif self.name == 'Skating2-2':
 54 |                     txt_name = 'skating2_2.txt'
 55 |                 elif self.name == 'FaceOcc1':
 56 |                     txt_name = 'faceocc1.txt'
 57 |                 elif self.name == 'FaceOcc2':
 58 |                     txt_name = 'faceocc2.txt'
 59 |                 elif self.name == 'Human4-2':
 60 |                     txt_name = 'human4_2.txt'
 61 |                 else:
 62 |                     txt_name = self.name[0].lower()+self.name[1:]+'.txt'
 63 |                 traj_file = os.path.join(path, name, txt_name)
 64 |             if os.path.exists(traj_file):
 65 |                 with open(traj_file, 'r') as f :
 66 | 
 67 |                     for x in f.readlines():
 68 |                         pred_traj.append(list(map(float, x.strip().split(','))))
 69 | 
 70 | 
 71 |                     if len(pred_traj) != len(self.gt_traj):
 72 |                         print(name, len(pred_traj), len(self.gt_traj), self.name)
 73 |                     if store:
 74 |                         self.pred_trajs[name] = pred_traj
 75 |                     else:
 76 |                         return pred_traj
 77 |             else:
 78 |                 print(traj_file)
 79 |         self.tracker_names = list(self.pred_trajs.keys())
 80 | def ca():
 81 |     
 82 | 
 83 |     
 84 |     path='/home/tj-v4r/Dataset/DTB70'
 85 |     txt='groundtruth_rect.txt'
 86 |     name_list=os.listdir(path)
 87 |     name_list.sort()
 88 |     a=1
 89 |     b=[]
 90 |     for i in range(a):
 91 |         b.append(name_list[i])
 92 |     c=[]
 93 |     
 94 |     for jj in range(a):
 95 |         imgs=path+'/'+str(name_list[jj])+'/img'
 96 |         txt=path+'/'+str(name_list[jj])+'/groundtruth_rect.txt'
 97 |         bbox=[]
 98 |         f = open(txt)               # 返回一个文件对象
 99 |         file= f.readlines()
100 |         li=os.listdir(imgs)
101 |         li.sort()
102 |         for ii in range(len(file)):
103 |             li[ii]=name_list[jj]+'/img/'+li[ii]
104 |     
105 |             line = file[ii].strip('\n').split(',')
106 |             
107 |             try:
108 |                 line[0]=int(line[0])
109 |             except:
110 |                 line[0]=float(line[0])
111 |             try:
112 |                 line[1]=int(line[1])
113 |             except:
114 |                 line[1]=float(line[1])
115 |             try:
116 |                 line[2]=int(line[2])
117 |             except:
118 |                 line[2]=float(line[2])
119 |             try:
120 |                 line[3]=int(line[3])
121 |             except:
122 |                 line[3]=float(line[3])
123 |             bbox.append(line)
124 |             
125 |         if len(bbox)!=len(li):
126 |             print (jj)
127 |         f.close()
128 |         c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
129 |         
130 |     d=dict(zip(b,c))
131 |     
132 |     return d
133 | class DTBDataset(Dataset):
134 |     """
135 |     Args:
136 |         name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
137 |         dataset_root: dataset root
138 |         load_img: wether to load all imgs
139 |     """
140 |     def __init__(self, name, dataset_root, load_img=False):
141 |         super(DTBDataset, self).__init__(name, dataset_root)
142 |         # with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
143 |         #     meta_data = json.load(f)
144 |         meta_data=ca()
145 |         # load videos
146 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
147 |         self.videos = {}
148 |         for video in pbar:
149 |             pbar.set_postfix_str(video)
150 |             self.videos[video] = DTBVideo(video,
151 |                                           dataset_root,
152 |                                           meta_data[video]['video_dir'],
153 |                                           meta_data[video]['init_rect'],
154 |                                           meta_data[video]['img_names'],
155 |                                           meta_data[video]['gt_rect'],
156 |                                           meta_data[video]['attr'],
157 |                                           load_img)
158 | 
159 |         # set attr
160 |         attr = []
161 |         for x in self.videos.values():
162 |             attr += x.attr
163 |         attr = set(attr)
164 |         self.attr = {}
165 |         self.attr['ALL'] = list(self.videos.keys())
166 |         for x in attr:
167 |             self.attr[x] = []
168 |         for k, v in self.videos.items():
169 |             for attr_ in v.attr:
170 |                 self.attr[attr_].append(k)
171 | 


--------------------------------------------------------------------------------
/toolkit/datasets/uav.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | 
 4 | from tqdm import tqdm
 5 | from glob import glob
 6 | 
 7 | from .dataset import Dataset
 8 | from .video import Video
 9 | 
10 | class UAVVideo(Video):
11 |     """
12 |     Args:
13 |         name: video name
14 |         root: dataset root
15 |         video_dir: video directory
16 |         init_rect: init rectangle
17 |         img_names: image names
18 |         gt_rect: groundtruth rectangle
19 |         attr: attribute of video
20 |     """
21 |     def __init__(self, name, root, video_dir, init_rect, img_names,
22 |             gt_rect, attr, load_img=False):
23 |         super(UAVVideo, self).__init__(name, root, video_dir,
24 |                 init_rect, img_names, gt_rect, attr, load_img)
25 | 
26 | 
27 | class UAVDataset(Dataset):
28 |     """
29 |     Args:
30 |         name: dataset name, should be 'UAV123', 'UAV20L'
31 |         dataset_root: dataset root
32 |         load_img: wether to load all imgs
33 |     """
34 |     def __init__(self, name, dataset_root, load_img=False):
35 |         super(UAVDataset, self).__init__(name, dataset_root)
36 |         with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
37 |             meta_data = json.load(f)
38 | 
39 |         # load videos
40 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
41 |         self.videos = {}
42 |         for video in pbar:
43 |             pbar.set_postfix_str(video)
44 |             self.videos[video] = UAVVideo(video,
45 |                                           dataset_root,
46 |                                           meta_data[video]['video_dir'],
47 |                                           meta_data[video]['init_rect'],
48 |                                           meta_data[video]['img_names'],
49 |                                           meta_data[video]['gt_rect'],
50 |                                           meta_data[video]['attr'])
51 | 
52 |         # set attr
53 |         attr = []
54 |         for x in self.videos.values():
55 |             attr += x.attr
56 |         attr = set(attr)
57 |         self.attr = {}
58 |         self.attr['ALL'] = list(self.videos.keys())
59 |         for x in attr:
60 |             self.attr[x] = []
61 |         for k, v in self.videos.items():
62 |             for attr_ in v.attr:
63 |                 self.attr[attr_].append(k)
64 | 
65 | 


--------------------------------------------------------------------------------
/toolkit/datasets/uav10fps.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | from glob import glob
  8 | 
  9 | from .dataset import Dataset
 10 | from .video import Video
 11 | 
 12 | def ca():
 13 |     path='/home/tj-v4r/Dataset/UAV123_10fps'
 14 |     
 15 |     name_list=os.listdir(path+'/data_seq')
 16 |     name_list.sort()
 17 |     a=123
 18 |     b=[]
 19 |     for i in range(a):
 20 |         b.append(name_list[i])
 21 |     c=[]
 22 |     
 23 |     for jj in range(a):
 24 |         imgs=path+'/data_seq/'+str(name_list[jj])
 25 |         txt=path+'/anno/'+str(name_list[jj])+'.txt'
 26 |         bbox=[]
 27 |         f = open(txt)               # 返回一个文件对象
 28 |         file= f.readlines()
 29 |         li=os.listdir(imgs)
 30 |         li.sort()
 31 |         for ii in range(len(file)):
 32 |             li[ii]=name_list[jj]+'/'+li[ii]
 33 |     
 34 |             line = file[ii].strip('\n').split(',')
 35 |             
 36 |             try:
 37 |                 line[0]=int(line[0])
 38 |             except:
 39 |                 line[0]=float(line[0])
 40 |             try:
 41 |                 line[1]=int(line[1])
 42 |             except:
 43 |                 line[1]=float(line[1])
 44 |             try:
 45 |                 line[2]=int(line[2])
 46 |             except:
 47 |                 line[2]=float(line[2])
 48 |             try:
 49 |                 line[3]=int(line[3])
 50 |             except:
 51 |                 line[3]=float(line[3])
 52 |             bbox.append(line)
 53 |             
 54 |         if len(bbox)!=len(li):
 55 |             print (jj)
 56 |         f.close()
 57 |         c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
 58 |         
 59 |     d=dict(zip(b,c))
 60 | 
 61 |     return d
 62 | 
 63 | class UAVVideo(Video):
 64 |     """
 65 |     Args:
 66 |         name: video name
 67 |         root: dataset root
 68 |         video_dir: video directory
 69 |         init_rect: init rectangle
 70 |         img_names: image names
 71 |         gt_rect: groundtruth rectangle
 72 |         attr: attribute of video
 73 |     """
 74 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 75 |             gt_rect, attr, load_img=False):
 76 |         super(UAVVideo, self).__init__(name, root, video_dir,
 77 |                 init_rect, img_names, gt_rect, attr, load_img)
 78 | 
 79 | 
 80 | class UAV10Dataset(Dataset):
 81 |     """
 82 |     Args:
 83 |         name: dataset name, should be 'UAV123', 'UAV20L'
 84 |         dataset_root: dataset root
 85 |         load_img: wether to load all imgs
 86 |     """
 87 |     def __init__(self, name, dataset_root, load_img=False):
 88 |         super(UAV10Dataset, self).__init__(name, dataset_root)
 89 |         meta_data = ca()
 90 | 
 91 |         # load videos
 92 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
 93 |         self.videos = {}
 94 |         for video in pbar:
 95 |             pbar.set_postfix_str(video)
 96 |             self.videos[video] = UAVVideo(video,
 97 |                                           dataset_root+'/data_seq',
 98 |                                           meta_data[video]['video_dir'],
 99 |                                           meta_data[video]['init_rect'],
100 |                                           meta_data[video]['img_names'],
101 |                                           meta_data[video]['gt_rect'],
102 |                                           meta_data[video]['attr'])
103 | 
104 |         # set attr
105 |         attr = []
106 |         for x in self.videos.values():
107 |             attr += x.attr
108 |         attr = set(attr)
109 |         self.attr = {}
110 |         self.attr['ALL'] = list(self.videos.keys())
111 |         for x in attr:
112 |             self.attr[x] = []
113 |         for k, v in self.videos.items():
114 |             for attr_ in v.attr:
115 |                 self.attr[attr_].append(k)
116 | 
117 | 


--------------------------------------------------------------------------------
/toolkit/datasets/uav20l.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | from glob import glob
  8 | 
  9 | from .dataset import Dataset
 10 | from .video import Video
 11 | 
 12 | 
 13 | def loaddata():
 14 |     
 15 |     path='/home/mist/dataset/UAV123_20L'
 16 |     
 17 |     name_list=os.listdir(path+'/data_seq')
 18 |     name_list.sort()
 19 |     
 20 |     b=[]
 21 |     for i in range(len(name_list)):
 22 |         b.append(name_list[i])
 23 |     c=[]
 24 |     
 25 |     for jj in range(len(name_list)):
 26 |         imgs=path+'/data_seq/'+str(name_list[jj])
 27 |         txt=path+'/anno/'+str(name_list[jj])+'.txt'
 28 |         bbox=[]
 29 |         f = open(txt)               # 返回一个文件对象
 30 |         file= f.readlines()
 31 |         li=os.listdir(imgs)
 32 |         li.sort()
 33 |         for ii in range(len(file)):
 34 |             li[ii]=name_list[jj]+'/'+li[ii]
 35 |     
 36 |             line = file[ii].strip('\n').split(',')
 37 |             
 38 |             try:
 39 |                 line[0]=int(line[0])
 40 |             except:
 41 |                 line[0]=float(line[0])
 42 |             try:
 43 |                 line[1]=int(line[1])
 44 |             except:
 45 |                 line[1]=float(line[1])
 46 |             try:
 47 |                 line[2]=int(line[2])
 48 |             except:
 49 |                 line[2]=float(line[2])
 50 |             try:
 51 |                 line[3]=int(line[3])
 52 |             except:
 53 |                 line[3]=float(line[3])
 54 |             bbox.append(line)
 55 |             
 56 |         if len(bbox)!=len(li):
 57 |             print (jj)
 58 |         f.close()
 59 |         c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
 60 |         
 61 |     d=dict(zip(b,c))
 62 | 
 63 |     return d
 64 | 
 65 | class UAVVideo(Video):
 66 |     """
 67 |     Args:
 68 |         name: video name
 69 |         root: dataset root
 70 |         video_dir: video directory
 71 |         init_rect: init rectangle
 72 |         img_names: image names
 73 |         gt_rect: groundtruth rectangle
 74 |         attr: attribute of video
 75 |     """
 76 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 77 |             gt_rect, attr, load_img=False):
 78 |         super(UAVVideo, self).__init__(name, root, video_dir,
 79 |                 init_rect, img_names, gt_rect, attr, load_img)
 80 | 
 81 | 
 82 | class UAV20Dataset(Dataset):
 83 |     """
 84 |     Args:
 85 |         name: dataset name, should be 'UAV123', 'UAV20L'
 86 |         dataset_root: dataset root
 87 |         load_img: wether to load all imgs
 88 |     """
 89 |     def __init__(self, name, dataset_root, load_img=False):
 90 |         super(UAV20Dataset, self).__init__(name, dataset_root)
 91 |         meta_data = loaddata()
 92 | 
 93 |         # load videos
 94 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
 95 |         self.videos = {}
 96 |         for video in pbar:
 97 |             pbar.set_postfix_str(video)
 98 |             self.videos[video] = UAVVideo(video,
 99 |                                           dataset_root+'/data_seq',
100 |                                           meta_data[video]['video_dir'],
101 |                                           meta_data[video]['init_rect'],
102 |                                           meta_data[video]['img_names'],
103 |                                           meta_data[video]['gt_rect'],
104 |                                           meta_data[video]['attr'])
105 | 
106 |         # set attr
107 |         attr = []
108 |         for x in self.videos.values():
109 |             attr += x.attr
110 |         attr = set(attr)
111 |         self.attr = {}
112 |         self.attr['ALL'] = list(self.videos.keys())
113 |         for x in attr:
114 |             self.attr[x] = []
115 |         for k, v in self.videos.items():
116 |             for attr_ in v.attr:
117 |                 self.attr[attr_].append(k)
118 | 
119 | 


--------------------------------------------------------------------------------
/toolkit/datasets/uavdt.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | from glob import glob
  8 | 
  9 | from .dataset import Dataset
 10 | from .video import Video
 11 | 
 12 | 
 13 | class UVADTVideo(Video):
 14 |     """
 15 |     Args:
 16 |         name: video name
 17 |         root: dataset root
 18 |         video_dir: video directory
 19 |         init_rect: init rectangle
 20 |         img_names: image names
 21 |         gt_rect: groundtruth rectangle
 22 |         attr: attribute of video
 23 |     """
 24 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 25 |             gt_rect, attr, load_img=False):
 26 |         super(UVADTVideo, self).__init__(name, root, video_dir,
 27 |                 init_rect, img_names, gt_rect, attr, load_img)
 28 | 
 29 |     def load_tracker(self, path, tracker_names=None, store=True):
 30 |         """
 31 |         Args:
 32 |             path(str): path to result
 33 |             tracker_name(list): name of tracker
 34 |         """
 35 |         if not tracker_names:
 36 |             tracker_names = [x.split('/')[-1] for x in glob(path)
 37 |                     if os.path.isdir(x)]
 38 |         if isinstance(tracker_names, str):
 39 |             tracker_names = [tracker_names]
 40 |         for name in tracker_names:
 41 |             traj_file = os.path.join(path, name, self.name+'.txt')
 42 |             if not os.path.exists(traj_file):
 43 |                 if self.name == 'FleetFace':
 44 |                     txt_name = 'fleetface.txt'
 45 |                 elif self.name == 'Jogging-1':
 46 |                     txt_name = 'jogging_1.txt'
 47 |                 elif self.name == 'Jogging-2':
 48 |                     txt_name = 'jogging_2.txt'
 49 |                 elif self.name == 'Skating2-1':
 50 |                     txt_name = 'skating2_1.txt'
 51 |                 elif self.name == 'Skating2-2':
 52 |                     txt_name = 'skating2_2.txt'
 53 |                 elif self.name == 'FaceOcc1':
 54 |                     txt_name = 'faceocc1.txt'
 55 |                 elif self.name == 'FaceOcc2':
 56 |                     txt_name = 'faceocc2.txt'
 57 |                 elif self.name == 'Human4-2':
 58 |                     txt_name = 'human4_2.txt'
 59 |                 else:
 60 |                     txt_name = self.name[0].lower()+self.name[1:]+'.txt'
 61 |                 traj_file = os.path.join(path, name, txt_name)
 62 |             if os.path.exists(traj_file):
 63 |                 with open(traj_file, 'r') as f :
 64 |                     pred_traj = [list(map(float, x.strip().split(',')))
 65 |                             for x in f.readlines()]
 66 |                     if len(pred_traj) != len(self.gt_traj):
 67 |                         print(name, len(pred_traj), len(self.gt_traj), self.name)
 68 |                     if store:
 69 |                         self.pred_trajs[name] = pred_traj
 70 |                     else:
 71 |                         return pred_traj
 72 |             else:
 73 |                 print(traj_file)
 74 |         self.tracker_names = list(self.pred_trajs.keys())
 75 | def ca():
 76 |     
 77 | 
 78 |     
 79 |     
 80 |     path='/home/mist/dataset/UAVDT'
 81 |     
 82 |     name_list=os.listdir(path+'/data_seq')
 83 |     name_list.sort()
 84 |     a=50
 85 |     b=[]
 86 |     for i in range(a):
 87 |         b.append(name_list[i])
 88 |     c=[]
 89 |     
 90 |     for jj in range(a):
 91 |         imgs=path+'/data_seq/'+str(name_list[jj])
 92 |         txt=path+'/anno/'+str(name_list[jj])+'_gt.txt'
 93 |         bbox=[]
 94 |         f = open(txt)               # 返回一个文件对象
 95 |         file= f.readlines()
 96 |         li=os.listdir(imgs)
 97 |         li.sort()
 98 |         for ii in range(len(file)):
 99 |             li[ii]='data_seq/'+name_list[jj]+'/'+li[ii]
100 |     
101 |             line = file[ii].strip('\n').split(',')
102 |             
103 |             try:
104 |                 line[0]=int(line[0])
105 |             except:
106 |                 line[0]=float(line[0])
107 |             try:
108 |                 line[1]=int(line[1])
109 |             except:
110 |                 line[1]=float(line[1])
111 |             try:
112 |                 line[2]=int(line[2])
113 |             except:
114 |                 line[2]=float(line[2])
115 |             try:
116 |                 line[3]=int(line[3])
117 |             except:
118 |                 line[3]=float(line[3])
119 |             bbox.append(line)
120 |             
121 |         if len(bbox)!=len(li):
122 |             print (jj)
123 |         f.close()
124 |         c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
125 |         
126 |     d=dict(zip(b,c))
127 | 
128 |     return d
129 | class UAVDTDataset(Dataset):
130 |     """
131 |     Args:
132 |         name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50'
133 |         dataset_root: dataset root
134 |         load_img: wether to load all imgs
135 |     """
136 |     def __init__(self, name, dataset_root, load_img=False):
137 |         super(UAVDTDataset, self).__init__(name, dataset_root)
138 |         # with open(os.path.join(dataset_root, name+'.json'), 'r') as f:
139 |         #     meta_data = json.load(f)
140 |         meta_data=ca()
141 |         # load videos
142 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
143 |         self.videos = {}
144 |         for video in pbar:
145 |             pbar.set_postfix_str(video)
146 |             self.videos[video] = UVADTVideo(video,
147 |                                           dataset_root,
148 |                                           meta_data[video]['video_dir'],
149 |                                           meta_data[video]['init_rect'],
150 |                                           meta_data[video]['img_names'],
151 |                                           meta_data[video]['gt_rect'],
152 |                                           meta_data[video]['attr'],
153 |                                           load_img)
154 | 
155 |         # set attr
156 |         attr = []
157 |         for x in self.videos.values():
158 |             attr += x.attr
159 |         attr = set(attr)
160 |         self.attr = {}
161 |         self.attr['ALL'] = list(self.videos.keys())
162 |         for x in attr:
163 |             self.attr[x] = []
164 |         for k, v in self.videos.items():
165 |             for attr_ in v.attr:
166 |                 self.attr[attr_].append(k)
167 | 


--------------------------------------------------------------------------------
/toolkit/datasets/uavtrack112_l.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | from glob import glob
  8 | 
  9 | from .dataset import Dataset
 10 | from .video import Video
 11 | 
 12 | def ca():
 13 |     path='/home/tj-v4r/Dataset/UAVTrack112'
 14 |     namefile=os.listdir(path+'/anno_li')
 15 |     namefile.sort()
 16 |     
 17 |     name_list=[]
 18 |     for seqname in namefile:
 19 |         name_list.append(seqname[:-4])
 20 | 
 21 |         
 22 |     
 23 |     # name_list=os.listdir(path+'/data_seq')
 24 |     # name_list.sort()
 25 |     a=len(name_list)
 26 |     b=[]
 27 |     for i in range(a):
 28 |         b.append(name_list[i])
 29 |     c=[]
 30 |     
 31 |     for jj in range(a):
 32 |         
 33 |         
 34 |         
 35 |         imgs=path+'/data_seq/'+str(name_list[jj])
 36 |         txt=path+'/anno/'+str(name_list[jj])+'.txt'
 37 |         bbox=[]
 38 |         f = open(txt)               # 返回一个文件对象
 39 |         file= f.readlines()
 40 |         li=os.listdir(imgs)
 41 |         li.sort()
 42 |         for ii in range(len(file)):
 43 |             li[ii]=name_list[jj]+'/'+li[ii]
 44 |     
 45 |             line = file[ii].strip('\n').split(',')
 46 |             if len(line)!=4:
 47 |                 line = file[ii].strip('\n').split(' ')
 48 |             
 49 |             try:
 50 |                 line[0]=int(line[0])
 51 |             except:
 52 |                 line[0]=float(line[0])
 53 |             try:
 54 |                 line[1]=int(line[1])
 55 |             except:
 56 |                 line[1]=float(line[1])
 57 |             try:
 58 |                 line[2]=int(line[2])
 59 |             except:
 60 |                 line[2]=float(line[2])
 61 |             try:
 62 |                 line[3]=int(line[3])
 63 |             except:
 64 |                 line[3]=float(line[3])
 65 |             bbox.append(line)
 66 |             
 67 |         if len(bbox)!=len(li):
 68 |             print (jj)
 69 |         f.close()
 70 |         c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
 71 |         
 72 |     d=dict(zip(b,c))
 73 | 
 74 |     return d
 75 | 
 76 | class UAVVideo(Video):
 77 |     """
 78 |     Args:
 79 |         name: video name
 80 |         root: dataset root
 81 |         video_dir: video directory
 82 |         init_rect: init rectangle
 83 |         img_names: image names
 84 |         gt_rect: groundtruth rectangle
 85 |         attr: attribute of video
 86 |     """
 87 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 88 |             gt_rect, attr, load_img=False):
 89 |         super(UAVVideo, self).__init__(name, root, video_dir,
 90 |                 init_rect, img_names, gt_rect, attr, load_img)
 91 | 
 92 | 
 93 | class UAVTrack112lDataset(Dataset):
 94 |     """
 95 |     Args:
 96 |         name: dataset name, should be 'UAV123', 'UAV20L'
 97 |         dataset_root: dataset root
 98 |         load_img: wether to load all imgs
 99 |     """
100 |     def __init__(self, name, dataset_root, load_img=False):
101 |         super(UAVTrack112lDataset, self).__init__(name, dataset_root)
102 |         meta_data = ca()
103 | 
104 |         # load videos
105 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
106 |         self.videos = {}
107 |         for video in pbar:
108 |             pbar.set_postfix_str(video)
109 |             self.videos[video] = UAVVideo(video,
110 |                                           dataset_root,
111 |                                           meta_data[video]['video_dir'],
112 |                                           meta_data[video]['init_rect'],
113 |                                           meta_data[video]['img_names'],
114 |                                           meta_data[video]['gt_rect'],
115 |                                           meta_data[video]['attr'])
116 | 
117 |         # set attr
118 |         attr = []
119 |         for x in self.videos.values():
120 |             attr += x.attr
121 |         attr = set(attr)
122 |         self.attr = {}
123 |         self.attr['ALL'] = list(self.videos.keys())
124 |         for x in attr:
125 |             self.attr[x] = []
126 |         for k, v in self.videos.items():
127 |             for attr_ in v.attr:
128 |                 self.attr[attr_].append(k)
129 | 
130 | 


--------------------------------------------------------------------------------
/toolkit/datasets/v4r.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import numpy as np
  4 | 
  5 | from PIL import Image
  6 | from tqdm import tqdm
  7 | from glob import glob
  8 | 
  9 | from .dataset import Dataset
 10 | from .video import Video
 11 | 
 12 | def ca():
 13 |     path='/home/mist/dataset/UAVTrack112/'
 14 |     
 15 |     name_list=os.listdir(path+'/data_seq')
 16 |     name_list.sort()
 17 |     a=len(name_list)
 18 |     b=[]
 19 |     for i in range(a):
 20 |         b.append(name_list[i])
 21 |     c=[]
 22 |     
 23 |     for jj in range(a):
 24 |         imgs=path+'/data_seq/'+str(name_list[jj])
 25 |         txt=path+'/anno/'+str(name_list[jj])+'.txt'
 26 |         bbox=[]
 27 |         f = open(txt)               # 返回一个文件对象
 28 |         file= f.readlines()
 29 |         li=os.listdir(imgs)
 30 |         li.sort()
 31 |         for ii in range(len(file)):
 32 |             try:
 33 |                 li[ii]=name_list[jj]+'/'+li[ii]
 34 |             except:
 35 |                 a=1
 36 |            
 37 |             line = file[ii].strip('\n').split(' ')
 38 |             
 39 | 
 40 |             if len(line)!=4:
 41 |                 line = file[ii].strip('\n').split(',')
 42 |             if len(line)!=4:
 43 |                 line = file[ii].strip('\n').split('\t')
 44 |             
 45 |             try:
 46 |                 line[0]=int(line[0])
 47 |             except:
 48 |                 line[0]=float(line[0])
 49 |             try:
 50 |                 line[1]=int(line[1])
 51 |             except:
 52 |                 line[1]=float(line[1])
 53 |             try:
 54 |                 line[2]=int(line[2])
 55 |             except:
 56 |                 line[2]=float(line[2])
 57 |             try:
 58 |                 line[3]=int(line[3])
 59 |             except:
 60 |                 line[3]=float(line[3])
 61 |             bbox.append(line)
 62 |             
 63 |         if len(bbox)!=len(li):
 64 |             print (jj)
 65 |         f.close()
 66 |         c.append({'attr':[],'gt_rect':bbox,'img_names':li,'init_rect':bbox[0],'video_dir':name_list[jj]})
 67 |         
 68 |     d=dict(zip(b,c))
 69 | 
 70 |     return d
 71 | 
 72 | class UAVVideo(Video):
 73 |     """
 74 |     Args:
 75 |         name: video name
 76 |         root: dataset root
 77 |         video_dir: video directory
 78 |         init_rect: init rectangle
 79 |         img_names: image names
 80 |         gt_rect: groundtruth rectangle
 81 |         attr: attribute of video
 82 |     """
 83 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 84 |             gt_rect, attr, load_img=False):
 85 |         super(UAVVideo, self).__init__(name, root, video_dir,
 86 |                 init_rect, img_names, gt_rect, attr, load_img)
 87 | 
 88 | 
 89 | class V4RDataset(Dataset):
 90 |     """
 91 |     Args:
 92 |         name: dataset name, should be 'UAV123', 'UAV20L'
 93 |         dataset_root: dataset root
 94 |         load_img: wether to load all imgs
 95 |     """
 96 |     def __init__(self, name, dataset_root, load_img=False):
 97 |         super(V4RDataset, self).__init__(name, dataset_root)
 98 |         meta_data = ca()
 99 | 
100 |         # load videos
101 |         pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100)
102 |         self.videos = {}
103 |         for video in pbar:
104 |             pbar.set_postfix_str(video)
105 |             self.videos[video] = UAVVideo(video,
106 |                                           dataset_root,
107 |                                           meta_data[video]['video_dir'],
108 |                                           meta_data[video]['init_rect'],
109 |                                           meta_data[video]['img_names'],
110 |                                           meta_data[video]['gt_rect'],
111 |                                           meta_data[video]['attr'])
112 | 
113 |         # set attr
114 |         attr = []
115 |         for x in self.videos.values():
116 |             attr += x.attr
117 |         attr = set(attr)
118 |         self.attr = {}
119 |         self.attr['ALL'] = list(self.videos.keys())
120 |         for x in attr:
121 |             self.attr[x] = []
122 |         for k, v in self.videos.items():
123 |             for attr_ in v.attr:
124 |                 self.attr[attr_].append(k)
125 | 
126 | 


--------------------------------------------------------------------------------
/toolkit/datasets/video.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import cv2
  3 | import re
  4 | import numpy as np
  5 | import json
  6 | 
  7 | from glob import glob
  8 | 
  9 | class Video(object):
 10 |     def __init__(self, name, root, video_dir, init_rect, img_names,
 11 |             gt_rect, attr, load_img=False):
 12 |         self.name = name
 13 |         self.video_dir = video_dir
 14 |         self.init_rect = init_rect
 15 |         self.gt_traj = gt_rect
 16 |         self.attr = attr
 17 |         self.pred_trajs = {}
 18 |         self.img_names = [os.path.join(root, x) for x in img_names]
 19 |         self.imgs = None
 20 | 
 21 |         if load_img:
 22 |             self.imgs = [cv2.imread(x) for x in self.img_names]
 23 |             self.width = self.imgs[0].shape[1]
 24 |             self.height = self.imgs[0].shape[0]
 25 |         else:
 26 |             img = cv2.imread(self.img_names[0])
 27 |             assert img is not None, self.img_names[0]
 28 |             self.width = img.shape[1]
 29 |             self.height = img.shape[0]
 30 | 
 31 |     def load_tracker(self, path, tracker_names=None, store=True):
 32 |         """
 33 |         Args:
 34 |             path(str): path to result
 35 |             tracker_name(list): name of tracker
 36 |         """
 37 |         if not tracker_names:
 38 |             tracker_names = [x.split('/')[-1] for x in glob(path)
 39 |                     if os.path.isdir(x)]
 40 |         if isinstance(tracker_names, str):
 41 |             tracker_names = [tracker_names]
 42 |         for name in tracker_names:
 43 |             traj_file = os.path.join(path, name, self.name+'.txt')
 44 |             if os.path.exists(traj_file):
 45 |                 with open(traj_file, 'r') as f :
 46 |                     pred_traj = [list(map(float, x.strip().split(',')))
 47 |                             for x in f.readlines()]
 48 |                 if len(pred_traj) != len(self.gt_traj):
 49 |                     print(name, len(pred_traj), len(self.gt_traj), self.name)
 50 |                 if store:
 51 |                     self.pred_trajs[name] = pred_traj
 52 |                 else:
 53 |                     return pred_traj
 54 |             else:
 55 |                 print(traj_file)
 56 |         self.tracker_names = list(self.pred_trajs.keys())
 57 | 
 58 |     def load_img(self):
 59 |         if self.imgs is None:
 60 |             self.imgs = [cv2.imread(x) for x in self.img_names]
 61 |             self.width = self.imgs[0].shape[1]
 62 |             self.height = self.imgs[0].shape[0]
 63 | 
 64 |     def free_img(self):
 65 |         self.imgs = None
 66 | 
 67 |     def __len__(self):
 68 |         return len(self.img_names)
 69 | 
 70 |     def __getitem__(self, idx):
 71 |         if self.imgs is None:
 72 |             return cv2.imread(self.img_names[idx]), self.gt_traj[idx]
 73 |         else:
 74 |             return self.imgs[idx], self.gt_traj[idx]
 75 | 
 76 |     def __iter__(self):
 77 |         for i in range(len(self.img_names)):
 78 |             if self.imgs is not None:
 79 |                 yield self.imgs[i], self.gt_traj[i]
 80 |             else:
 81 |                 yield cv2.imread(self.img_names[i]), self.gt_traj[i]
 82 | 
 83 |     def draw_box(self, roi, img, linewidth, color, name=None):
 84 |         """
 85 |             roi: rectangle or polygon
 86 |             img: numpy array img
 87 |             linewith: line width of the bbox
 88 |         """
 89 |         if len(roi) > 6 and len(roi) % 2 == 0:
 90 |             pts = np.array(roi, np.int32).reshape(-1, 1, 2)
 91 |             color = tuple(map(int, color))
 92 |             img = cv2.polylines(img, [pts], True, color, linewidth)
 93 |             pt = (pts[0, 0, 0], pts[0, 0, 1]-5)
 94 |             if name:
 95 |                 img = cv2.putText(img, name, pt, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1)
 96 |         elif len(roi) == 4:
 97 |             if not np.isnan(roi[0]):
 98 |                 roi = list(map(int, roi))
 99 |                 color = tuple(map(int, color))
100 |                 img = cv2.rectangle(img, (roi[0], roi[1]), (roi[0]+roi[2], roi[1]+roi[3]),
101 |                          color, linewidth)
102 |                 if name:
103 |                     img = cv2.putText(img, name, (roi[0], roi[1]-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1)
104 |         return img
105 | 
106 |     def show(self, pred_trajs={}, linewidth=2, show_name=False):
107 |         """
108 |             pred_trajs: dict of pred_traj, {'tracker_name': list of traj}
109 |                         pred_traj should contain polygon or rectangle(x, y, width, height)
110 |             linewith: line width of the bbox
111 |         """
112 |         assert self.imgs is not None
113 |         video = []
114 |         cv2.namedWindow(self.name, cv2.WINDOW_NORMAL)
115 |         colors = {}
116 |         if len(pred_trajs) == 0 and len(self.pred_trajs) > 0:
117 |             pred_trajs = self.pred_trajs
118 |         for i, (roi, img) in enumerate(zip(self.gt_traj,
119 |                 self.imgs[self.start_frame:self.end_frame+1])):
120 |             img = img.copy()
121 |             if len(img.shape) == 2:
122 |                 img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
123 |             else:
124 |                 img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
125 |             img = self.draw_box(roi, img, linewidth, (0, 255, 0),
126 |                     'gt' if show_name else None)
127 |             for name, trajs in pred_trajs.items():
128 |                 if name not in colors:
129 |                     color = tuple(np.random.randint(0, 256, 3))
130 |                     colors[name] = color
131 |                 else:
132 |                     color = colors[name]
133 |                 img = self.draw_box(trajs[0][i], img, linewidth, color,
134 |                         name if show_name else None)
135 |             cv2.putText(img, str(i+self.start_frame), (5, 20),
136 |                     cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 0), 2)
137 |             cv2.imshow(self.name, img)
138 |             cv2.waitKey(40)
139 |             video.append(img.copy())
140 |         return video
141 | 


--------------------------------------------------------------------------------
/toolkit/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .ope_benchmark import OPEBenchmark
2 | 


--------------------------------------------------------------------------------
/toolkit/evaluation/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/evaluation/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/evaluation/__pycache__/ope_benchmark.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/evaluation/__pycache__/ope_benchmark.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/evaluation/ope_benchmark.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from colorama import Style, Fore
  4 | 
  5 | from ..utils.statistics import success_overlap, success_error
  6 | 
  7 | class OPEBenchmark:
  8 |     """
  9 |     Args:
 10 |         result_path: result path of your tracker
 11 |                 should the same format like VOT
 12 |     """
 13 |     def __init__(self, dataset):
 14 |         self.dataset = dataset
 15 | 
 16 |     def convert_bb_to_center(self, bboxes):
 17 |         return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2),
 18 |                          (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T
 19 | 
 20 |     def convert_bb_to_norm_center(self, bboxes, gt_wh):
 21 |         return self.convert_bb_to_center(bboxes) / (gt_wh+1e-16)
 22 | 
 23 |     def eval_success(self, eval_trackers=None):
 24 |         """
 25 |         Args: 
 26 |             eval_trackers: list of tracker name or single tracker name
 27 |         Return:
 28 |             res: dict of results
 29 |         """
 30 |         if eval_trackers is None:
 31 |             eval_trackers = self.dataset.tracker_names
 32 |         if isinstance(eval_trackers, str):
 33 |             eval_trackers = [eval_trackers]
 34 | 
 35 |         success_ret = {}
 36 |         for tracker_name in eval_trackers:
 37 |             success_ret_ = {}
 38 |             for video in self.dataset:
 39 |                 gt_traj = np.array(video.gt_traj)
 40 |                 if tracker_name not in video.pred_trajs:
 41 |                     tracker_traj = video.load_tracker(self.dataset.tracker_path,
 42 |                             tracker_name, False)
 43 |                     tracker_traj = np.array(tracker_traj)
 44 |                 else:
 45 |                     tracker_traj = np.array(video.pred_trajs[tracker_name])
 46 |                 n_frame = len(gt_traj)
 47 |                 if hasattr(video, 'absent'):
 48 |                     gt_traj = gt_traj[video.absent == 1]
 49 |                     tracker_traj = tracker_traj[video.absent == 1]
 50 |                 success_ret_[video.name] = success_overlap(gt_traj, tracker_traj, n_frame)
 51 |             success_ret[tracker_name] = success_ret_
 52 |         return success_ret
 53 | 
 54 |     def eval_precision(self, eval_trackers=None):
 55 |         """
 56 |         Args:
 57 |             eval_trackers: list of tracker name or single tracker name
 58 |         Return:
 59 |             res: dict of results
 60 |         """
 61 |         if eval_trackers is None:
 62 |             eval_trackers = self.dataset.tracker_names
 63 |         if isinstance(eval_trackers, str):
 64 |             eval_trackers = [eval_trackers]
 65 | 
 66 |         precision_ret = {}
 67 |         for tracker_name in eval_trackers:
 68 |             precision_ret_ = {}
 69 |             for video in self.dataset:
 70 |                 gt_traj = np.array(video.gt_traj)
 71 |                 if tracker_name not in video.pred_trajs:
 72 |                     tracker_traj = video.load_tracker(self.dataset.tracker_path,
 73 |                             tracker_name, False)
 74 |                     tracker_traj = np.array(tracker_traj)
 75 |                 else:
 76 |                     tracker_traj = np.array(video.pred_trajs[tracker_name])
 77 |                 n_frame = len(gt_traj)
 78 |                 if hasattr(video, 'absent'):
 79 |                     gt_traj = gt_traj[video.absent == 1]
 80 |                     tracker_traj = tracker_traj[video.absent == 1]
 81 |                 gt_center = self.convert_bb_to_center(gt_traj)
 82 |                 tracker_center = self.convert_bb_to_center(tracker_traj)
 83 |                 thresholds = np.arange(0, 51, 1)
 84 |                 precision_ret_[video.name] = success_error(gt_center, tracker_center,
 85 |                         thresholds, n_frame)
 86 |             precision_ret[tracker_name] = precision_ret_
 87 |         return precision_ret
 88 | 
 89 |     def eval_norm_precision(self, eval_trackers=None):
 90 |         """
 91 |         Args:
 92 |             eval_trackers: list of tracker name or single tracker name
 93 |         Return:
 94 |             res: dict of results
 95 |         """
 96 |         if eval_trackers is None:
 97 |             eval_trackers = self.dataset.tracker_names
 98 |         if isinstance(eval_trackers, str):
 99 |             eval_trackers = [eval_trackers]
100 | 
101 |         norm_precision_ret = {}
102 |         for tracker_name in eval_trackers:
103 |             norm_precision_ret_ = {}
104 |             for video in self.dataset:
105 |                 gt_traj = np.array(video.gt_traj)
106 |                 if tracker_name not in video.pred_trajs:
107 |                     tracker_traj = video.load_tracker(self.dataset.tracker_path, 
108 |                             tracker_name, False)
109 |                     tracker_traj = np.array(tracker_traj)
110 |                 else:
111 |                     tracker_traj = np.array(video.pred_trajs[tracker_name])
112 |                 n_frame = len(gt_traj)
113 |                 if hasattr(video, 'absent'):
114 |                     gt_traj = gt_traj[video.absent == 1]
115 |                     tracker_traj = tracker_traj[video.absent == 1]
116 |                 gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4])
117 |                 tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4])
118 |                 thresholds = np.arange(0, 51, 1) / 100
119 |                 norm_precision_ret_[video.name] = success_error(gt_center_norm,
120 |                         tracker_center_norm, thresholds, n_frame)
121 |             norm_precision_ret[tracker_name] = norm_precision_ret_
122 |         return norm_precision_ret
123 | 
124 |     def show_result(self, success_ret, precision_ret=None,
125 |             norm_precision_ret=None, show_video_level=False, helight_threshold=0.6):
126 |         """pretty print result
127 |         Args:
128 |             result: returned dict from function eval
129 |         """
130 |         # sort tracker
131 |         tracker_auc = {}
132 |         for tracker_name in success_ret.keys():
133 |             auc = np.mean(list(success_ret[tracker_name].values()))
134 |             tracker_auc[tracker_name] = auc
135 |         tracker_auc_ = sorted(tracker_auc.items(),
136 |                              key=lambda x:x[1],
137 |                              reverse=True)[:20]
138 |         tracker_names = [x[0] for x in tracker_auc_]
139 | 
140 | 
141 |         tracker_name_len = max((max([len(x) for x in success_ret.keys()])+2), 12)
142 |         header = ("|{:^"+str(tracker_name_len)+"}|{:^9}|{:^16}|{:^11}|").format(
143 |                 "Tracker name", "Success", "Norm Precision", "Precision")
144 |         formatter = "|{:^"+str(tracker_name_len)+"}|{:^9.3f}|{:^16.3f}|{:^11.3f}|"
145 |         print('-'*len(header))
146 |         print(header)
147 |         print('-'*len(header))
148 |         for tracker_name in tracker_names:
149 |             # success = np.mean(list(success_ret[tracker_name].values()))
150 |             success = tracker_auc[tracker_name]
151 |             if precision_ret is not None:
152 |                 precision = np.mean(list(precision_ret[tracker_name].values()), axis=0)[20]
153 |             else:
154 |                 precision = 0
155 |             if norm_precision_ret is not None:
156 |                 norm_precision = np.mean(list(norm_precision_ret[tracker_name].values()),
157 |                         axis=0)[20]
158 |             else:
159 |                 norm_precision = 0
160 |             print(formatter.format(tracker_name, success, norm_precision, precision))
161 |         print('-'*len(header))
162 | 
163 |         if show_video_level and len(success_ret) < 10 \
164 |                 and precision_ret is not None \
165 |                 and len(precision_ret) < 10:
166 |             print("\n\n")
167 |             header1 = "|{:^21}|".format("Tracker name")
168 |             header2 = "|{:^21}|".format("Video name")
169 |             for tracker_name in success_ret.keys():
170 |                 # col_len = max(20, len(tracker_name))
171 |                 header1 += ("{:^21}|").format(tracker_name)
172 |                 header2 += "{:^9}|{:^11}|".format("success", "precision")
173 |             print('-'*len(header1))
174 |             print(header1)
175 |             print('-'*len(header1))
176 |             print(header2)
177 |             print('-'*len(header1))
178 |             videos = list(success_ret[tracker_name].keys())
179 |             for video in videos:
180 |                 row = "|{:^21}|".format(video)
181 |                 for tracker_name in success_ret.keys():
182 |                     success = np.mean(success_ret[tracker_name][video])
183 |                     precision = np.mean(precision_ret[tracker_name][video])
184 |                     success_str = "{:^9.3f}".format(success)
185 |                     if success < helight_threshold:
186 |                         row += Fore.RED+success_str+Style.RESET_ALL+'|'
187 |                     else:
188 |                         row += success_str+'|'
189 |                     precision_str = "{:^11.3f}".format(precision)
190 |                     if precision < helight_threshold:
191 |                         row += Fore.RED+precision_str+Style.RESET_ALL+'|'
192 |                     else:
193 |                         row += precision_str+'|'
194 |                 print(row)
195 |             print('-'*len(header1))
196 | 


--------------------------------------------------------------------------------
/toolkit/utils/__pycache__/statistics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/utils/__pycache__/statistics.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/utils/region.pyx:
--------------------------------------------------------------------------------
  1 | """
  2 |     @author fangyi.zhang@vipl.ict.ac.cn
  3 | """
  4 | # distutils: sources = src/region.c
  5 | # distutils: include_dirs = src/
  6 | 
  7 | from libc.stdlib cimport malloc, free
  8 | from libc.stdio cimport sprintf
  9 | from libc.string cimport strlen
 10 | 
 11 | cimport c_region
 12 | 
 13 | cpdef enum RegionType:
 14 |     EMTPY
 15 |     SPECIAL
 16 |     RECTANGEL
 17 |     POLYGON
 18 |     MASK
 19 | 
 20 | cdef class RegionBounds:
 21 |     cdef c_region.region_bounds* _c_region_bounds
 22 | 
 23 |     def __cinit__(self):
 24 |         self._c_region_bounds = <c_region.region_bounds*>malloc(
 25 |                 sizeof(c_region.region_bounds))
 26 |         if not self._c_region_bounds:
 27 |             self._c_region_bounds = NULL
 28 |             raise MemoryError()
 29 | 
 30 |     def __init__(self, top, bottom, left, right):
 31 |         self.set(top, bottom, left, right)
 32 | 
 33 |     def __dealloc__(self):
 34 |         if self._c_region_bounds is not NULL:
 35 |             free(self._c_region_bounds)
 36 |             self._c_region_bounds = NULL
 37 | 
 38 |     def __str__(self):
 39 |         return "top: {:.3f} bottom: {:.3f} left: {:.3f} reight: {:.3f}".format(
 40 |                 self._c_region_bounds.top,
 41 |                 self._c_region_bounds.bottom,
 42 |                 self._c_region_bounds.left,
 43 |                 self._c_region_bounds.right)
 44 | 
 45 |     def get(self):
 46 |         return (self._c_region_bounds.top,
 47 |                 self._c_region_bounds.bottom,
 48 |                 self._c_region_bounds.left,
 49 |                 self._c_region_bounds.right)
 50 | 
 51 |     def set(self, top, bottom, left, right):
 52 |         self._c_region_bounds.top = top
 53 |         self._c_region_bounds.bottom = bottom
 54 |         self._c_region_bounds.left = left
 55 |         self._c_region_bounds.right = right
 56 | 
 57 | cdef class Rectangle:
 58 |     cdef c_region.region_rectangle* _c_region_rectangle
 59 | 
 60 |     def __cinit__(self):
 61 |         self._c_region_rectangle = <c_region.region_rectangle*>malloc(
 62 |                 sizeof(c_region.region_rectangle))
 63 |         if not self._c_region_rectangle:
 64 |             self._c_region_rectangle = NULL
 65 |             raise MemoryError()
 66 | 
 67 |     def __init__(self, x, y, width, height):
 68 |         self.set(x, y, width, height)
 69 | 
 70 |     def __dealloc__(self):
 71 |         if self._c_region_rectangle is not NULL:
 72 |             free(self._c_region_rectangle)
 73 |             self._c_region_rectangle = NULL
 74 | 
 75 |     def __str__(self):
 76 |         return "x: {:.3f} y: {:.3f} width: {:.3f} height: {:.3f}".format(
 77 |                 self._c_region_rectangle.x,
 78 |                 self._c_region_rectangle.y,
 79 |                 self._c_region_rectangle.width,
 80 |                 self._c_region_rectangle.height)
 81 | 
 82 |     def set(self, x, y, width, height):
 83 |         self._c_region_rectangle.x = x
 84 |         self._c_region_rectangle.y = y
 85 |         self._c_region_rectangle.width = width
 86 |         self._c_region_rectangle.height = height
 87 | 
 88 |     def get(self):
 89 |         """
 90 |         return:
 91 |             (x, y, width, height)
 92 |         """
 93 |         return (self._c_region_rectangle.x,
 94 |                 self._c_region_rectangle.y,
 95 |                 self._c_region_rectangle.width,
 96 |                 self._c_region_rectangle.height)
 97 | 
 98 | cdef class Polygon:
 99 |     cdef c_region.region_polygon* _c_region_polygon
100 | 
101 |     def __cinit__(self, points):
102 |         """
103 |         args:
104 |             points: tuple of point
105 |             points = ((1, 1), (10, 10))
106 |         """
107 |         num = len(points) // 2
108 |         self._c_region_polygon = <c_region.region_polygon*>malloc(
109 |                 sizeof(c_region.region_polygon))
110 |         if not self._c_region_polygon:
111 |             self._c_region_polygon = NULL
112 |             raise MemoryError()
113 |         self._c_region_polygon.count = num
114 |         self._c_region_polygon.x = <float*>malloc(sizeof(float) * num)
115 |         if not self._c_region_polygon.x:
116 |             raise MemoryError()
117 |         self._c_region_polygon.y = <float*>malloc(sizeof(float) * num)
118 |         if not self._c_region_polygon.y:
119 |             raise MemoryError()
120 | 
121 |         for i in range(num):
122 |             self._c_region_polygon.x[i] = points[i*2]
123 |             self._c_region_polygon.y[i] = points[i*2+1]
124 | 
125 |     def __dealloc__(self):
126 |         if self._c_region_polygon is not NULL:
127 |             if self._c_region_polygon.x is not NULL:
128 |                 free(self._c_region_polygon.x)
129 |                 self._c_region_polygon.x = NULL
130 |             if self._c_region_polygon.y is not NULL:
131 |                 free(self._c_region_polygon.y)
132 |                 self._c_region_polygon.y = NULL
133 |             free(self._c_region_polygon)
134 |             self._c_region_polygon = NULL
135 | 
136 |     def __str__(self):
137 |         ret = ""
138 |         for i in range(self._c_region_polygon.count-1):
139 |             ret += "({:.3f} {:.3f}) ".format(self._c_region_polygon.x[i],
140 |                     self._c_region_polygon.y[i])
141 |         ret += "({:.3f} {:.3f})".format(self._c_region_polygon.x[i],
142 |                 self._c_region_polygon.y[i])
143 |         return ret
144 | 
145 | def vot_overlap(polygon1, polygon2, bounds=None):
146 |     """ computing overlap between two polygon
147 |     Args:
148 |         polygon1: polygon tuple of points
149 |         polygon2: polygon tuple of points
150 |         bounds: tuple of (left, top, right, bottom) or tuple of (width height)
151 |     Return:
152 |         overlap: overlap between two polygons
153 |     """
154 |     if len(polygon1) == 1 or len(polygon2) == 1:
155 |         return float("nan")
156 | 
157 |     if len(polygon1) == 4:
158 |         polygon1_ = Polygon([polygon1[0], polygon1[1],
159 |                              polygon1[0]+polygon1[2], polygon1[1],
160 |                              polygon1[0]+polygon1[2], polygon1[1]+polygon1[3],
161 |                              polygon1[0], polygon1[1]+polygon1[3]])
162 |     else:
163 |         polygon1_ = Polygon(polygon1)
164 | 
165 |     if len(polygon2) == 4:
166 |         polygon2_ = Polygon([polygon2[0], polygon2[1],
167 |                              polygon2[0]+polygon2[2], polygon2[1],
168 |                              polygon2[0]+polygon2[2], polygon2[1]+polygon2[3],
169 |                              polygon2[0], polygon2[1]+polygon2[3]])
170 |     else:
171 |         polygon2_ = Polygon(polygon2)
172 | 
173 |     if bounds is not None and len(bounds) == 4:
174 |         pno_bounds = RegionBounds(bounds[0], bounds[1], bounds[2], bounds[3])
175 |     elif bounds is not None and len(bounds) == 2:
176 |         pno_bounds = RegionBounds(0, bounds[1], 0, bounds[0])
177 |     else:
178 |         pno_bounds = RegionBounds(-float("inf"), float("inf"),
179 |                                   -float("inf"), float("inf"))
180 |     cdef float only1 = 0
181 |     cdef float only2 = 0
182 |     cdef c_region.region_polygon* c_polygon1 = polygon1_._c_region_polygon
183 |     cdef c_region.region_polygon* c_polygon2 = polygon2_._c_region_polygon
184 |     cdef c_region.region_bounds no_bounds = pno_bounds._c_region_bounds[0] # deference
185 |     return c_region.compute_polygon_overlap(c_polygon1,
186 |                                             c_polygon2,
187 |                                             &only1,
188 |                                             &only2,
189 |                                             no_bounds)
190 | 
191 | def vot_overlap_traj(polygons1, polygons2, bounds=None):
192 |     """ computing overlap between two trajectory
193 |     Args:
194 |         polygons1: list of polygon
195 |         polygons2: list of polygon
196 |         bounds: tuple of (left, top, right, bottom) or tuple of (width height)
197 |     Return:
198 |         overlaps: overlaps between all pair of polygons
199 |     """
200 |     assert len(polygons1) == len(polygons2)
201 |     overlaps = []
202 |     for i in range(len(polygons1)):
203 |         overlap = vot_overlap(polygons1[i], polygons2[i], bounds=bounds)
204 |         overlaps.append(overlap)
205 |     return overlaps
206 | 
207 | 
208 | def vot_float2str(template, float value):
209 |     """
210 |     Args:
211 |         tempate: like "%.3f" in C syntax
212 |         value: float value
213 |     """
214 |     cdef bytes ptemplate = template.encode()
215 |     cdef const char* ctemplate = ptemplate
216 |     cdef char* output = <char*>malloc(sizeof(char) * 100)
217 |     if not output:
218 |         raise MemoryError()
219 |     sprintf(output, ctemplate, value)
220 |     try:
221 |         ret = output[:strlen(output)].decode()
222 |     finally:
223 |         free(output)
224 |     return ret
225 | 


--------------------------------------------------------------------------------
/toolkit/utils/statistics.py:
--------------------------------------------------------------------------------
 1 | """
 2 |     @author fangyi.zhang@vipl.ict.ac.cn
 3 | """
 4 | import numpy as np
 5 | 
 6 | def overlap_ratio(rect1, rect2):
 7 |     '''Compute overlap ratio between two rects
 8 |     Args
 9 |         rect:2d array of N x [x,y,w,h]
10 |     Return:
11 |         iou
12 |     '''
13 |     # if rect1.ndim==1:
14 |     #     rect1 = rect1[np.newaxis, :]
15 |     # if rect2.ndim==1:
16 |     #     rect2 = rect2[np.newaxis, :]
17 |     left = np.maximum(rect1[:,0], rect2[:,0])
18 |     right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
19 |     top = np.maximum(rect1[:,1], rect2[:,1])
20 |     bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
21 | 
22 |     intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
23 |     union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
24 |     iou = intersect / union
25 |     iou = np.maximum(np.minimum(1, iou), 0)
26 |     return iou
27 | 
28 | def success_overlap(gt_bb, result_bb, n_frame):
29 |     thresholds_overlap = np.arange(0, 1.05, 0.05)
30 |     success = np.zeros(len(thresholds_overlap))
31 |     iou = np.ones(len(gt_bb)) * (-1)
32 |     # mask = np.sum(gt_bb > 0, axis=1) == 4 #TODO check all dataset
33 |     mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2
34 |     iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask])
35 |     for i in range(len(thresholds_overlap)):
36 |         success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame)
37 |     return success
38 | 
39 | def success_error(gt_center, result_center, thresholds, n_frame):
40 |     # n_frame = len(gt_center)
41 |     success = np.zeros(len(thresholds))
42 |     dist = np.ones(len(gt_center)) * (-1)
43 |     mask = np.sum(gt_center > 0, axis=1) == 2
44 |     dist[mask] = np.sqrt(np.sum(
45 |         np.power(gt_center[mask] - result_center[mask], 2), axis=1))
46 |     for i in range(len(thresholds)):
47 |         success[i] = np.sum(dist <= thresholds[i]) / float(n_frame)
48 |     return success
49 | 
50 | 
51 | 


--------------------------------------------------------------------------------
/toolkit/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .draw_success_precision import draw_success_precision
2 | 


--------------------------------------------------------------------------------
/toolkit/visualization/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/visualization/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/visualization/__pycache__/draw_success_precision.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/visualization/__pycache__/draw_success_precision.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/visualization/__pycache__/draw_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/vision4robotics/FDNT/ed34d59e98ee5d6b2718432f84381f3d6d496e40/toolkit/visualization/__pycache__/draw_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/toolkit/visualization/draw_success_precision.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | 
  4 | from .draw_utils import COLOR, LINE_STYLE
  5 | 
  6 | def draw_success_precision(success_ret, name, videos, attr, precision_ret=None,
  7 |         norm_precision_ret=None, bold_name=None, axis=[0, 1]):
  8 |     # success plot
  9 |     fig, ax = plt.subplots()
 10 |     ax.grid(b=True)
 11 |     ax.set_aspect(1)
 12 |     plt.xlabel('Overlap threshold')
 13 |     plt.ylabel('Success rate')
 14 |     if attr == 'ALL':
 15 |         plt.title(r'\textbf{Success plots of OPE on %s}' % (name))
 16 |     else:
 17 |         plt.title(r'\textbf{Success plots of OPE - %s}' % (attr))
 18 |     plt.axis([0, 1]+axis)
 19 |     success = {}
 20 |     thresholds = np.arange(0, 1.05, 0.05)
 21 |     for tracker_name in success_ret.keys():
 22 |         value = [v for k, v in success_ret[tracker_name].items() if k in videos]
 23 |         success[tracker_name] = np.mean(value)
 24 |     for idx, (tracker_name, auc) in  \
 25 |             enumerate(sorted(success.items(), key=lambda x:x[1], reverse=True)):
 26 |         if tracker_name == bold_name:
 27 |             label = r"\textbf{[%.3f] %s}" % (auc, tracker_name)
 28 |         else:
 29 |             label = "[%.3f] " % (auc) + tracker_name
 30 |         value = [v for k, v in success_ret[tracker_name].items() if k in videos]
 31 |         plt.plot(thresholds, np.mean(value, axis=0),
 32 |                 color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
 33 |     ax.legend(loc='lower left', labelspacing=0.2)
 34 |     ax.autoscale(enable=True, axis='both', tight=True)
 35 |     xmin, xmax, ymin, ymax = plt.axis()
 36 |     ax.autoscale(enable=False)
 37 |     ymax += 0.03
 38 |     plt.axis([xmin, xmax, ymin, ymax])
 39 |     plt.xticks(np.arange(xmin, xmax+0.01, 0.1))
 40 |     plt.yticks(np.arange(ymin, ymax, 0.1))
 41 |     ax.set_aspect((xmax - xmin)/(ymax-ymin))
 42 |     plt.show()
 43 | 
 44 |     if precision_ret:
 45 |         # norm precision plot
 46 |         fig, ax = plt.subplots()
 47 |         ax.grid(b=True)
 48 |         ax.set_aspect(50)
 49 |         plt.xlabel('Location error threshold')
 50 |         plt.ylabel('Precision')
 51 |         if attr == 'ALL':
 52 |             plt.title(r'\textbf{Precision plots of OPE on %s}' % (name))
 53 |         else:
 54 |             plt.title(r'\textbf{Precision plots of OPE - %s}' % (attr))
 55 |         plt.axis([0, 50]+axis)
 56 |         precision = {}
 57 |         thresholds = np.arange(0, 51, 1)
 58 |         for tracker_name in precision_ret.keys():
 59 |             value = [v for k, v in precision_ret[tracker_name].items() if k in videos]
 60 |             precision[tracker_name] = np.mean(value, axis=0)[20]
 61 |         for idx, (tracker_name, pre) in \
 62 |                 enumerate(sorted(precision.items(), key=lambda x:x[1], reverse=True)):
 63 |             if tracker_name == bold_name:
 64 |                 label = r"\textbf{[%.3f] %s}" % (pre, tracker_name)
 65 |             else:
 66 |                 label = "[%.3f] " % (pre) + tracker_name
 67 |             value = [v for k, v in precision_ret[tracker_name].items() if k in videos]
 68 |             plt.plot(thresholds, np.mean(value, axis=0),
 69 |                     color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
 70 |         ax.legend(loc='lower right', labelspacing=0.2)
 71 |         ax.autoscale(enable=True, axis='both', tight=True)
 72 |         xmin, xmax, ymin, ymax = plt.axis()
 73 |         ax.autoscale(enable=False)
 74 |         ymax += 0.03
 75 |         plt.axis([xmin, xmax, ymin, ymax])
 76 |         plt.xticks(np.arange(xmin, xmax+0.01, 5))
 77 |         plt.yticks(np.arange(ymin, ymax, 0.1))
 78 |         ax.set_aspect((xmax - xmin)/(ymax-ymin))
 79 |         plt.show()
 80 | 
 81 |     # norm precision plot
 82 |     if norm_precision_ret:
 83 |         fig, ax = plt.subplots()
 84 |         ax.grid(b=True)
 85 |         plt.xlabel('Location error threshold')
 86 |         plt.ylabel('Precision')
 87 |         if attr == 'ALL':
 88 |             plt.title(r'\textbf{Normalized Precision plots of OPE on %s}' % (name))
 89 |         else:
 90 |             plt.title(r'\textbf{Normalized Precision plots of OPE - %s}' % (attr))
 91 |         norm_precision = {}
 92 |         thresholds = np.arange(0, 51, 1) / 100
 93 |         for tracker_name in precision_ret.keys():
 94 |             value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos]
 95 |             norm_precision[tracker_name] = np.mean(value, axis=0)[20]
 96 |         for idx, (tracker_name, pre) in \
 97 |                 enumerate(sorted(norm_precision.items(), key=lambda x:x[1], reverse=True)):
 98 |             if tracker_name == bold_name:
 99 |                 label = r"\textbf{[%.3f] %s}" % (pre, tracker_name)
100 |             else:
101 |                 label = "[%.3f] " % (pre) + tracker_name
102 |             value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos]
103 |             plt.plot(thresholds, np.mean(value, axis=0),
104 |                     color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2)
105 |         ax.legend(loc='lower right', labelspacing=0.2)
106 |         ax.autoscale(enable=True, axis='both', tight=True)
107 |         xmin, xmax, ymin, ymax = plt.axis()
108 |         ax.autoscale(enable=False)
109 |         ymax += 0.03
110 |         plt.axis([xmin, xmax, ymin, ymax])
111 |         plt.xticks(np.arange(xmin, xmax+0.01, 0.05))
112 |         plt.yticks(np.arange(ymin, ymax, 0.1))
113 |         ax.set_aspect((xmax - xmin)/(ymax-ymin))
114 |         plt.show()
115 | 


--------------------------------------------------------------------------------
/toolkit/visualization/draw_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | COLOR = ((1, 0, 0),
 3 |          (0, 1, 0),
 4 |          (1, 0, 1),
 5 |          (1, 1, 0),
 6 |          (0  , 162/255, 232/255),
 7 |          (0.5, 0.5, 0.5),
 8 |          (0, 0, 1),
 9 |          (0, 1, 1),
10 |          (136/255, 0  , 21/255),
11 |          (255/255, 127/255, 39/255),
12 |          (0, 0, 0))
13 | 
14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-']
15 | 
16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.']
17 | 


--------------------------------------------------------------------------------
/tools/demo.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import os
 7 | import sys
 8 | sys.path.append('../')
 9 | 
10 | import argparse
11 | import cv2
12 | import torch
13 | from glob import glob
14 | 
15 | from pysot.core.config import cfg
16 | from pysot.models.model_builder_base import ModelBuilder
17 | from pysot.tracker.hift_tracker import HiFTTracker
18 | from pysot.utils.model_load import load_pretrain
19 | 
20 | 
21 | torch.set_num_threads(1)
22 | 
23 | parser = argparse.ArgumentParser(description='HiFT demo')
24 | parser.add_argument('--config', type=str, default='../experiments/config.yaml', help='config file')
25 | parser.add_argument('--snapshot', type=str, default='./snapshot/general_model.pth', help='model name')
26 | parser.add_argument('--video_name', default='../test_dataset/sequence_name', type=str, help='videos or image files')
27 | args = parser.parse_args()
28 | 
29 | 
30 | def get_frames(video_name):
31 |     if not video_name:
32 |         cap = cv2.VideoCapture(0)
33 | 
34 |         # warmup
35 |         for i in range(5):
36 |             cap.read()
37 |         while True:
38 |             ret, frame = cap.read()
39 |             if ret:
40 |                 yield frame
41 |             else:
42 |                 break
43 |     elif video_name.endswith('avi') or \
44 |             video_name.endswith('mp4'):
45 |         cap = cv2.VideoCapture(args.video_name)
46 |         while True:
47 |             ret, frame = cap.read()
48 |             if ret:
49 |                 yield frame
50 |             else:
51 |                 break
52 |     else:
53 |         images = sorted(glob(os.path.join(video_name, 'img', '*.jp*')))
54 |         for img in images:
55 |             frame = cv2.imread(img)
56 |             yield frame
57 | 
58 | 
59 | def main():
60 |     # load config
61 |     cfg.merge_from_file(args.config)
62 |     cfg.CUDA = torch.cuda.is_available()
63 |     device = torch.device('cuda' if cfg.CUDA else 'cpu')
64 | 
65 |     # create model
66 |     model = ModelBuilder()
67 | 
68 |     # load model
69 |     model = load_pretrain(model, args.snapshot).eval().to(device)
70 | 
71 |     # build tracker
72 |     tracker = HiFTTracker(model)
73 | 
74 |     first_frame = True
75 |     if args.video_name:
76 |         video_name = args.video_name.split('/')[-1].split('.')[0]
77 |     else:
78 |         video_name = 'webcam'
79 |     cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN)
80 |     for frame in get_frames(args.video_name):
81 |         if first_frame:
82 |             try:
83 |                 init_rect = cv2.selectROI(video_name, frame, False, False)
84 |             except:
85 |                 exit()
86 |             tracker.init(frame, init_rect)
87 |             first_frame = False
88 |         else:
89 |             outputs = tracker.track(frame)
90 |             bbox = list(map(int, outputs['bbox']))
91 |             cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (0, 255, 0), 3)
92 |             cv2.imshow(video_name, frame)
93 |             cv2.waitKey(40)
94 | 
95 | 
96 | if __name__ == '__main__':
97 |     main()
98 | 


--------------------------------------------------------------------------------
/tools/eval.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import argparse
  5 | import functools
  6 | sys.path.append("./")
  7 | 
  8 | from glob import glob
  9 | from tqdm import tqdm
 10 | from multiprocessing import Pool
 11 | from toolkit.datasets import UAV10Dataset,UAV20Dataset,DTBDataset,UAVDataset
 12 | from toolkit.evaluation import OPEBenchmark
 13 | from toolkit.visualization import draw_success_precision
 14 | 
 15 | if __name__ == '__main__':
 16 |     
 17 | 
 18 |     parser = argparse.ArgumentParser(description='Single Object Tracking Evaluation')
 19 |     parser.add_argument('--dataset_dir', default='',type=str, help='dataset root directory')
 20 |     parser.add_argument('--dataset', default='UAV10',type=str, help='dataset name')
 21 |     parser.add_argument('--tracker_result_dir',default='', type=str, help='tracker result root')
 22 |     parser.add_argument('--trackers',default='general_model', nargs='+')
 23 |     parser.add_argument('--vis', default='',dest='vis', action='store_true')
 24 |     parser.add_argument('--show_video_level', default=' ',dest='show_video_level', action='store_true')
 25 |     parser.add_argument('--num', default=1, type=int, help='number of processes to eval')
 26 |     args = parser.parse_args()
 27 | 
 28 |     tracker_dir = os.path.join(args.tracker_path, args.dataset)
 29 |     trackers = glob(os.path.join(args.tracker_path,
 30 |                                   args.dataset,
 31 |                                   args.tracker_prefix+'*'))
 32 |     trackers = [x.split('/')[-1] for x in trackers]
 33 | 
 34 | 
 35 |     root = os.path.realpath(os.path.join(os.path.dirname(__file__),
 36 |                              '../testing_dataset'))
 37 |     root = os.path.join(root, args.dataset)
 38 | 
 39 |     trackers=args.tracker_prefix
 40 | 
 41 |   
 42 |     assert len(trackers) > 0
 43 |     args.num = min(args.num, len(trackers))
 44 | 
 45 |     if 'UAV10' in args.dataset:
 46 |         dataset = UAV10Dataset(args.dataset, root)
 47 |         dataset.set_tracker(tracker_dir, trackers)
 48 |         benchmark = OPEBenchmark(dataset)
 49 |         success_ret = {}
 50 |         with Pool(processes=args.num) as pool:
 51 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
 52 |                 trackers), desc='eval success', total=len(trackers), ncols=18):
 53 |                 success_ret.update(ret)
 54 |         precision_ret = {}
 55 |         with Pool(processes=args.num) as pool:
 56 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
 57 |                 trackers), desc='eval precision', total=len(trackers), ncols=18):
 58 |                 precision_ret.update(ret)
 59 |         benchmark.show_result(success_ret, precision_ret,
 60 |                 show_video_level=args.show_video_level)
 61 |         if args.vis:
 62 |             for attr, videos in dataset.attr.items():
 63 |                 draw_success_precision(success_ret,
 64 |                             name=dataset.name,
 65 |                             videos=videos,
 66 |                             attr=attr,
 67 |                             precision_ret=precision_ret)
 68 |     elif 'UAV20' in args.dataset:
 69 |         dataset = UAV20Dataset(args.dataset, root)
 70 |         dataset.set_tracker(tracker_dir, trackers)
 71 |         benchmark = OPEBenchmark(dataset)
 72 |         success_ret = {}
 73 |         with Pool(processes=args.num) as pool:
 74 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
 75 |                 trackers), desc='eval success', total=len(trackers), ncols=18):
 76 |                 success_ret.update(ret)
 77 |         precision_ret = {}
 78 |         with Pool(processes=args.num) as pool:
 79 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
 80 |                 trackers), desc='eval precision', total=len(trackers), ncols=18):
 81 |                 precision_ret.update(ret)
 82 |         benchmark.show_result(success_ret, precision_ret,
 83 |                 show_video_level=args.show_video_level)
 84 |         if args.vis:
 85 |             for attr, videos in dataset.attr.items():
 86 |                 draw_success_precision(success_ret,
 87 |                             name=dataset.name,
 88 |                             videos=videos,
 89 |                             attr=attr,
 90 |                             precision_ret=precision_ret)
 91 |     elif 'DTB70' in args.dataset:
 92 |         dataset = DTBDataset(args.dataset, root)
 93 |         dataset.set_tracker(tracker_dir, trackers)
 94 |         benchmark = OPEBenchmark(dataset)
 95 |         success_ret = {}
 96 |         with Pool(processes=args.num) as pool:
 97 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
 98 |                 trackers), desc='eval success', total=len(trackers), ncols=18):
 99 |                 success_ret.update(ret)
100 |         precision_ret = {}
101 |         with Pool(processes=args.num) as pool:
102 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
103 |                 trackers), desc='eval precision', total=len(trackers), ncols=18):
104 |                 precision_ret.update(ret)
105 |         benchmark.show_result(success_ret, precision_ret,
106 |                 show_video_level=args.show_video_level)
107 |         if args.vis:
108 |             for attr, videos in dataset.attr.items():
109 |                 draw_success_precision(success_ret,
110 |                             name=dataset.name,
111 |                             videos=videos,
112 |                             attr=attr,
113 |                             precision_ret=precision_ret)
114 |     elif 'UAV123' in args.dataset:
115 |         dataset = UAVDataset(args.dataset, root)
116 |         dataset.set_tracker(tracker_dir, trackers)
117 |         benchmark = OPEBenchmark(dataset)
118 |         success_ret = {}
119 |         with Pool(processes=args.num) as pool:
120 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_success,
121 |                 trackers), desc='eval success', total=len(trackers), ncols=18):
122 |                 success_ret.update(ret)
123 |         precision_ret = {}
124 |         with Pool(processes=args.num) as pool:
125 |             for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,
126 |                 trackers), desc='eval precision', total=len(trackers), ncols=18):
127 |                 precision_ret.update(ret)
128 |         benchmark.show_result(success_ret, precision_ret,
129 |                 show_video_level=args.show_video_level)
130 |         if args.vis:
131 |             for attr, videos in dataset.attr.items():
132 |                 draw_success_precision(success_ret,
133 |                             name=dataset.name,
134 |                             videos=videos,
135 |                             attr=attr,
136 |                             precision_ret=precision_ret)
137 |     else:
138 |         print('dataset error')
139 | 
140 | 
141 |     
142 | 
143 | 
144 | 
145 |  


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | 
  8 | import argparse
  9 | import os
 10 | import cv2
 11 | import torch
 12 | import numpy as np
 13 | 
 14 | from pysot.core.config import cfg
 15 | from pysot.models.model_builder_base import ModelBuilder
 16 | from pysot.tracker.siamapn_tracker import SiamAPNTracker
 17 | from pysot.utils.bbox import get_axis_aligned_bbox
 18 | from pysot.utils.model_load import load_pretrain
 19 | from toolkit.datasets import DatasetFactory
 20 | 
 21 | def main():
 22 |     parser = argparse.ArgumentParser(description='AFRT tracking')
 23 |     parser.add_argument('--dataset', default='UAV123',type=str,
 24 |             help='datasets')
 25 |     parser.add_argument('--config', default='./../experiments/config.yaml', type=str,
 26 |             help='config file')
 27 |     parser.add_argument('--snapshot', default='./snapshot/AFRT.pth', type=str,
 28 |             help='snapshot of models to eval')
 29 |     parser.add_argument('--video', default='', type=str,
 30 |             help='eval one special video')
 31 |     parser.add_argument('--vis', default='',action='store_true',
 32 |             help='whether visualzie result')
 33 |     args = parser.parse_args()
 34 | 
 35 |     torch.set_num_threads(1)
 36 | 
 37 |     cfg.merge_from_file(args.config)
 38 | 
 39 |     cur_dir = os.path.dirname(os.path.realpath(__file__))#获得file绝对路径后返回目录
 40 |     dataset_root = os.path.join(cur_dir, '../test_dataset', args.dataset)
 41 | 
 42 |     model = ModelBuilder()
 43 | 
 44 |     model = load_pretrain(model, args.snapshot).cuda().eval()
 45 | 
 46 |     tracker = SiamAPNTracker(model)
 47 | 
 48 |     dataset = DatasetFactory.create_dataset(name=args.dataset,
 49 |                                             dataset_root=dataset_root,
 50 |                                             load_img=False)
 51 | 
 52 |     model_name = args.snapshot.split('/')[-1].split('.')[0]+str(cfg.TRACK.w1)
 53 | 
 54 |     for v_idx, video in enumerate(dataset):
 55 | 
 56 |             if args.video != '':
 57 |                 # test one special video
 58 |                 if video.name != args.video:
 59 |                     continue
 60 |             toc = 0
 61 |             pred_bboxes = []
 62 |             scores = []
 63 |             track_times = []
 64 |             for idx, (img, gt_bbox) in enumerate(video):
 65 |                 tic = cv2.getTickCount()
 66 |                 if idx == 0:
 67 |                     cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
 68 |                     gt_bbox_ = [cx-(w-1)/2, cy-(h-1)/2, w, h]
 69 |                     tracker.init(img, gt_bbox_)
 70 |                     pred_bbox = gt_bbox_
 71 |                     scores.append(None)
 72 |                     if 'VOT2018-LT' == args.dataset:
 73 |                         pred_bboxes.append([1])
 74 |                     else:
 75 |                         pred_bboxes.append(pred_bbox)
 76 |                 else:
 77 |                     outputs = tracker.track(img)
 78 |                     pred_bbox = outputs['bbox']
 79 |                     pred_bboxes.append(pred_bbox)
 80 |                     scores.append(outputs['best_score'])
 81 |                 toc += cv2.getTickCount() - tic
 82 |                 track_times.append((cv2.getTickCount() - tic)/cv2.getTickFrequency())
 83 |                 if idx == 0:
 84 |                     cv2.destroyAllWindows()
 85 |                 if args.vis and idx > 0:
 86 |                     try:
 87 |                         gt_bbox = list(map(int, gt_bbox))
 88 |                     except:
 89 |                         gt_bbox=[0,0,0,0]
 90 |                         pred_bbox = list(map(int, pred_bbox))
 91 |                         cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]),
 92 |                                     (gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3]), (0, 255, 0), 3)
 93 |                         cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]),
 94 |                                     (pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]), (0, 255, 255), 3)
 95 |                         cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
 96 |                         cv2.imshow(video.name, img)
 97 |                         cv2.waitKey(1)
 98 |             toc /= cv2.getTickFrequency()
 99 |             # save results
100 |         
101 |             model_path = os.path.join('results', args.dataset, model_name)
102 |             if not os.path.isdir(model_path):
103 |                 os.makedirs(model_path)
104 |             result_path = os.path.join(model_path, '{}.txt'.format(video.name))
105 |             with open(result_path, 'w') as f:
106 |                 for x in pred_bboxes:
107 |                     f.write(','.join([str(i) for i in x])+'\n')
108 |             print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
109 |                 v_idx+1, video.name, toc, idx / toc))
110 | 
111 | if __name__ == '__main__':
112 |     main()
113 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) SenseTime. All Rights Reserved.
  2 | 
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | from __future__ import unicode_literals
  7 | import gc
  8 | import argparse
  9 | import logging
 10 | import os
 11 | import time
 12 | import math
 13 | import json
 14 | import random
 15 | import numpy as np
 16 | 
 17 | import torch
 18 | import torch.nn as nn
 19 | from torch.utils.data import DataLoader
 20 | from tensorboardX import SummaryWriter
 21 | from torch.nn.utils import clip_grad_norm_
 22 | from torch.utils.data.distributed import DistributedSampler
 23 | 
 24 | from pysot.utils.lr_scheduler import build_lr_scheduler
 25 | from pysot.utils.log_helper import init_log, print_speed, add_file_handler
 26 | from pysot.utils.distributed import dist_init, reduce_gradients,\
 27 |         average_reduce, get_rank, get_world_size
 28 | from pysot.utils.model_load import load_pretrain, restore_from
 29 | from pysot.utils.average_meter import AverageMeter
 30 | from pysot.utils.misc import describe, commit
 31 | from pysot.models.model_builder import ModelBuilder
 32 | 
 33 | from pysot.datasets.dataset import TrkDataset
 34 | from pysot.core.config import cfg
 35 | 
 36 | 
 37 | logger = logging.getLogger('global')
 38 | parser = argparse.ArgumentParser(description='HiFT tracking')
 39 | parser.add_argument('--cfg', type=str, default='experiments/config.yaml',
 40 |                     help='configuration of tracking')
 41 | parser.add_argument('--seed', type=int, default=123456,
 42 |                     help='random seed')
 43 | parser.add_argument('--local_rank', type=int, default=0,
 44 |                     help='compulsory for pytorch launcer')
 45 | args = parser.parse_args()
 46 | 
 47 | 
 48 | def seed_torch(seed=0):
 49 |     random.seed(seed)
 50 |     os.environ['PYTHONHASHSEED'] = str(seed)
 51 |     np.random.seed(seed)
 52 |     torch.manual_seed(seed)
 53 |     torch.cuda.manual_seed(seed)
 54 |     torch.backends.cudnn.benchmark = False
 55 |     torch.backends.cudnn.deterministic = True
 56 | 
 57 | 
 58 | def build_data_loader():
 59 |     logger.info("build train dataset")
 60 |     # train_dataset
 61 |     train_dataset = TrkDataset()
 62 |     logger.info("build dataset done")
 63 | 
 64 |     train_sampler = None
 65 |     if get_world_size() > 1:
 66 |         train_sampler = DistributedSampler(train_dataset)
 67 |     train_loader = DataLoader(train_dataset,
 68 |                               batch_size=cfg.TRAIN.BATCH_SIZE,
 69 |                               num_workers=cfg.TRAIN.NUM_WORKERS, 
 70 |                               pin_memory=True,
 71 |                               sampler=train_sampler)
 72 |     return train_loader
 73 | 
 74 | 
 75 | def build_opt_lr(model, current_epoch=0):
 76 |     if current_epoch >= cfg.BACKBONE.TRAIN_EPOCH:
 77 |         for layer in cfg.BACKBONE.TRAIN_LAYERS:
 78 |             for param in getattr(model.backbone, layer).parameters():
 79 |                 param.requires_grad = True
 80 |             for m in getattr(model.backbone, layer).modules():
 81 |                 if isinstance(m, nn.BatchNorm2d):
 82 |                     m.train()
 83 |     else:
 84 |         for param in model.backbone.parameters():
 85 |             param.requires_grad = False
 86 |         for m in model.backbone.modules():
 87 |             if isinstance(m, nn.BatchNorm2d):
 88 |                 m.eval()
 89 | 
 90 |     trainable_params = []
 91 |     trainable_params += [{'params': filter(lambda x: x.requires_grad,
 92 |                                             model.backbone.parameters()),
 93 |                           'lr': cfg.BACKBONE.LAYERS_LR * cfg.TRAIN.BASE_LR}]
 94 | 
 95 |     trainable_params += [{'params': model.grader.parameters(),
 96 |                           'lr': cfg.TRAIN.BASE_LR}]
 97 |     
 98 |     optimizer = torch.optim.SGD(trainable_params,
 99 |                                 momentum=cfg.TRAIN.MOMENTUM,
100 |                                 weight_decay=cfg.TRAIN.WEIGHT_DECAY)
101 | 
102 |     lr_scheduler = build_lr_scheduler(optimizer, epochs=cfg.TRAIN.EPOCH)
103 |     lr_scheduler.step(cfg.TRAIN.START_EPOCH)
104 |     return optimizer, lr_scheduler
105 | 
106 | 
107 | def train(train_loader, model, optimizer, lr_scheduler, tb_writer):
108 |     cur_lr = lr_scheduler.get_cur_lr()
109 |     rank = get_rank()
110 | 
111 |     average_meter = AverageMeter()
112 | 
113 |     def is_valid_number(x):
114 |         return not(math.isnan(x) or math.isinf(x) or x > 1e4)
115 | 
116 |     world_size = get_world_size()
117 |     num_per_epoch = len(train_loader.dataset) // \
118 |         cfg.TRAIN.EPOCH // (cfg.TRAIN.BATCH_SIZE * world_size)
119 |     start_epoch = cfg.TRAIN.START_EPOCH
120 |     epoch = start_epoch
121 | 
122 |     if not os.path.exists(cfg.TRAIN.SNAPSHOT_DIR) and \
123 |             get_rank() == 0:
124 |         os.makedirs(cfg.TRAIN.SNAPSHOT_DIR)
125 | 
126 |     logger.info("model\n{}".format(describe(model.module)))
127 |     end = time.time()
128 |     for idx, data in enumerate(train_loader):
129 |         if epoch != idx // num_per_epoch + start_epoch:
130 |             epoch = idx // num_per_epoch + start_epoch
131 | 
132 |             if get_rank() == 0:
133 |                 torch.save(
134 |                         {'epoch': epoch,
135 |                           'state_dict': model.module.state_dict(),
136 |                           'optimizer': optimizer.state_dict()},
137 |                         cfg.TRAIN.SNAPSHOT_DIR+'/checkpoint00_e%d.pth' % (epoch))
138 |                
139 |             if epoch == cfg.TRAIN.EPOCH:
140 |                 
141 |                 return
142 | 
143 |             if cfg.BACKBONE.TRAIN_EPOCH == epoch:
144 |                 logger.info('start training backbone.')
145 |                 optimizer, lr_scheduler = build_opt_lr(model.module, epoch)
146 |                 logger.info("model\n{}".format(describe(model.module)))
147 | 
148 |             lr_scheduler.step(epoch)
149 |             cur_lr = lr_scheduler.get_cur_lr()
150 |             logger.info('epoch: {}'.format(epoch+1))
151 | 
152 |         tb_idx = idx
153 |         if idx % num_per_epoch == 0 and idx != 0:
154 |             for idx, pg in enumerate(optimizer.param_groups):
155 |                 logger.info('epoch {} lr {}'.format(epoch+1, pg['lr']))
156 |                 if rank == 0:
157 |                     tb_writer.add_scalar('lr/group{}'.format(idx+1),
158 |                                           pg['lr'], tb_idx)
159 | 
160 |         data_time = average_reduce(time.time() - end)
161 |         if rank == 0:
162 |             tb_writer.add_scalar('time/data', data_time, tb_idx)
163 | 
164 |         outputs = model(data)
165 |         loss = outputs['total_loss'].mean()
166 |         if is_valid_number(loss.data.item()):
167 |             optimizer.zero_grad()
168 |             loss.backward()
169 |             reduce_gradients(model)
170 | 
171 |             # clip gradient
172 |             clip_grad_norm_(model.parameters(), cfg.TRAIN.GRAD_CLIP)
173 |             optimizer.step()
174 | 
175 |         batch_time = time.time() - end
176 |         batch_info = {}
177 |         batch_info['batch_time'] = average_reduce(batch_time)
178 |         batch_info['data_time'] = average_reduce(data_time)
179 |         for k, v in sorted(outputs.items()):
180 |             batch_info[k] = average_reduce(v.mean().data.item())
181 | 
182 |         average_meter.update(**batch_info)
183 | 
184 |         if rank == 0:
185 |             for k, v in batch_info.items():
186 |                 tb_writer.add_scalar(k, v, tb_idx)
187 | 
188 |             if (idx+1) % cfg.TRAIN.PRINT_FREQ == 0:
189 |                 info = "Epoch: [{}][{}/{}] lr: {:.6f}\n".format(
190 |                             epoch+1, (idx+1) % num_per_epoch,
191 |                             num_per_epoch, cur_lr)
192 |                 for cc, (k, v) in enumerate(batch_info.items()):
193 |                     if cc % 2 == 0:
194 |                         info += ("\t{:s}\t").format(
195 |                                 getattr(average_meter, k))
196 |                     else:
197 |                         info += ("{:s}\n").format(
198 |                                 getattr(average_meter, k))
199 |                 logger.info(info)
200 |                 print_speed(idx+1+start_epoch*num_per_epoch,
201 |                             average_meter.batch_time.avg,
202 |                             cfg.TRAIN.EPOCH * num_per_epoch)
203 |         end = time.time()
204 |         
205 | 
206 | 
207 | 
208 | def main():
209 |     rank, world_size = dist_init()
210 |     # rank = 0
211 |     logger.info("init done")
212 | 
213 |     # load cfg
214 |     cfg.merge_from_file(args.cfg)
215 |     
216 |     if rank == 0:
217 |         if not os.path.exists(cfg.TRAIN.LOG_DIR):
218 |             os.makedirs(cfg.TRAIN.LOG_DIR)
219 |         init_log('global', logging.INFO)
220 |         if cfg.TRAIN.LOG_DIR:
221 |             add_file_handler('global',
222 |                               os.path.join(cfg.TRAIN.LOG_DIR, 'logs.txt'),
223 |                               logging.INFO)
224 | 
225 |         logger.info("Version Information: \n{}\n".format(commit()))
226 |         logger.info("config \n{}".format(json.dumps(cfg, indent=4)))
227 | 
228 |     # create model
229 |     model = ModelBuilder().train()
230 |     dist_model = nn.DataParallel(model).cuda()
231 | 
232 |     # load pretrained backbone weights
233 |     if cfg.BACKBONE.PRETRAINED:
234 |         cur_path = os.path.dirname(os.path.realpath(__file__))
235 |         backbone_path = os.path.join(cur_path, 'pretrained_models/', cfg.BACKBONE.PRETRAINED)
236 |         load_pretrain(model.backbone, backbone_path)
237 | 
238 |     # create tensorboard writer
239 |     if rank == 0 and cfg.TRAIN.LOG_DIR:
240 |         tb_writer = SummaryWriter(cfg.TRAIN.LOG_DIR)
241 |     else:
242 |         tb_writer = None
243 | 
244 |     # build dataset loader
245 |     train_loader = build_data_loader()
246 | 
247 |     # build optimizer and lr_scheduler
248 |     optimizer, lr_scheduler = build_opt_lr(dist_model.module,
249 |                                             cfg.TRAIN.START_EPOCH)
250 | 
251 |     # resume training
252 |     if cfg.TRAIN.RESUME:
253 |         logger.info("resume from {}".format(cfg.TRAIN.RESUME))
254 |         assert os.path.isfile(cfg.TRAIN.RESUME), \
255 |             '{} is not a valid file.'.format(cfg.TRAIN.RESUME)
256 |         model, optimizer, cfg.TRAIN.START_EPOCH = \
257 |             restore_from(model, optimizer, cfg.TRAIN.RESUME)
258 |     # load pretrain
259 |     elif cfg.TRAIN.PRETRAINED:
260 |         cur_path = os.path.dirname(os.path.realpath(__file__))
261 |         backbone_path = os.path.join(cur_path, '/pretrained_models/')
262 |         load_pretrain(model, backbone_path+'')               
263 |     
264 |     dist_model = nn.DataParallel(model)
265 |     
266 |     logger.info(lr_scheduler)
267 |     logger.info("model prepare done")
268 | 
269 |     # start training
270 |     train(train_loader, dist_model, optimizer, lr_scheduler, tb_writer)
271 | 
272 | 
273 | if __name__ == '__main__':
274 |     seed_torch(args.seed)
275 |     main()
276 | 


--------------------------------------------------------------------------------