├── LICENSE ├── README.md ├── experiments ├── TCTrack++ │ ├── config.yaml │ └── config_online.yaml └── TCTrack │ ├── config.yaml │ └── config_l.yaml ├── images └── workflow.jpg ├── pretrained_models ├── temporalalexnet.pth └── temporalalexnet_v2.pth ├── pysot ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── __init__.cpython-38.pyc ├── core │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── config.cpython-37.pyc │ │ └── config.cpython-38.pyc │ └── config.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── anchortarget.cpython-38.pyc │ │ ├── augmentation.cpython-38.pyc │ │ ├── augmentationsear.cpython-38.pyc │ │ └── dataset.cpython-38.pyc │ ├── anchortarget.py │ ├── augmentation.py │ ├── augmentationsear.py │ └── dataset.py ├── models │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── att.cpython-38.pyc │ │ ├── loss.cpython-37.pyc │ │ ├── loss.cpython-38.pyc │ │ ├── loss_car.cpython-38.pyc │ │ ├── model_builder.cpython-37.pyc │ │ ├── model_builder.cpython-38.pyc │ │ ├── newalexnet.cpython-38.pyc │ │ ├── newbackbone.cpython-38.pyc │ │ ├── tran.cpython-38.pyc │ │ ├── utile.cpython-37.pyc │ │ └── utile.cpython-38.pyc │ ├── backbone │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── alexnet.cpython-37.pyc │ │ │ ├── alexnet.cpython-38.pyc │ │ │ ├── mobile_v2.cpython-37.pyc │ │ │ ├── mobile_v2.cpython-38.pyc │ │ │ ├── newalexnet.cpython-37.pyc │ │ │ ├── newalexnet.cpython-38.pyc │ │ │ ├── resnet_atrous.cpython-37.pyc │ │ │ ├── resnet_atrous.cpython-38.pyc │ │ │ ├── temporalbackbone.cpython-38.pyc │ │ │ └── temporalbackbonev2.cpython-38.pyc │ │ ├── alexnet.py │ │ ├── mobile_v2.py │ │ ├── newalexnet.py │ │ ├── resnet_atrous.py │ │ ├── temporalbackbone.py │ │ └── temporalbackbonev2.py │ ├── init_weight.py │ ├── utile_tctrack │ │ ├── __pycache__ │ │ │ ├── loss.cpython-38.pyc │ │ │ ├── model_builder.cpython-38.pyc │ │ │ ├── tran.cpython-38.pyc │ │ │ ├── trancls.cpython-38.pyc │ │ │ ├── tranloc.cpython-38.pyc │ │ │ ├── trantime.cpython-38.pyc │ │ │ ├── utile.cpython-38.pyc │ │ │ └── utiletest.cpython-38.pyc │ │ ├── loss.py │ │ ├── model_builder.py │ │ ├── trantime.py │ │ ├── utile.py │ │ └── utiletest.py │ └── utile_tctrackplus │ │ ├── __pycache__ │ │ ├── loss.cpython-38.pyc │ │ ├── model_builder.cpython-38.pyc │ │ ├── tran.cpython-38.pyc │ │ ├── trancls.cpython-38.pyc │ │ ├── tranloc.cpython-38.pyc │ │ ├── trantime.cpython-38.pyc │ │ ├── utile.cpython-38.pyc │ │ └── utiletest.cpython-38.pyc │ │ ├── loss.py │ │ ├── model_builder.py │ │ ├── trantime.py │ │ ├── utile.py │ │ └── utiletest.py ├── tracker │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── base_tracker.cpython-37.pyc │ │ ├── base_tracker.cpython-38.pyc │ │ ├── dsiamrpn_tracker.cpython-38.pyc │ │ ├── siamapn_tracker.cpython-37.pyc │ │ ├── siamapn_tracker.cpython-38.pyc │ │ ├── siamapn_tracker_loop.cpython-38.pyc │ │ ├── tctrack_tracker.cpython-38.pyc │ │ └── tctrackplus_tracker.cpython-38.pyc │ ├── base_tracker.py │ ├── tctrack_tracker.py │ └── tctrackplus_tracker.py └── utils │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── average_meter.cpython-38.pyc │ ├── bbox.cpython-37.pyc │ ├── bbox.cpython-38.pyc │ ├── distributed.cpython-38.pyc │ ├── location_grid.cpython-38.pyc │ ├── log_helper.cpython-38.pyc │ ├── lr_scheduler.cpython-38.pyc │ ├── misc.cpython-38.pyc │ ├── model_load.cpython-37.pyc │ ├── model_load.cpython-38.pyc │ └── xcorr.cpython-38.pyc │ ├── average_meter.py │ ├── bbox.py │ ├── distributed.py │ ├── location_grid.py │ ├── log_helper.py │ ├── lr_scheduler.py │ ├── misc.py │ ├── model_load.py │ └── xcorr.py ├── requirement.txt ├── toolkit ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-37.pyc │ └── __init__.cpython-38.pyc ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── dataset.cpython-37.pyc │ │ ├── dataset.cpython-38.pyc │ │ ├── dtb.cpython-38.pyc │ │ ├── got10k.cpython-38.pyc │ │ ├── lasot.cpython-38.pyc │ │ ├── otb.cpython-38.pyc │ │ ├── uav.cpython-38.pyc │ │ ├── uav10fps.cpython-37.pyc │ │ ├── uav10fps.cpython-38.pyc │ │ ├── uav20l.cpython-37.pyc │ │ ├── uav20l.cpython-38.pyc │ │ ├── uavdt.cpython-38.pyc │ │ ├── uavtrack112.cpython-38.pyc │ │ ├── uavtrack112_l.cpython-38.pyc │ │ ├── video.cpython-37.pyc │ │ ├── video.cpython-38.pyc │ │ ├── visdrone.cpython-38.pyc │ │ ├── visdrone1.cpython-37.pyc │ │ └── visdrone1.cpython-38.pyc │ ├── dataset.py │ ├── dtb.py │ ├── got10k.py │ ├── lasot.py │ ├── otb.py │ ├── uav.py │ ├── uav10fps.py │ ├── uavtrack112.py │ ├── uavtrack112_l.py │ ├── video.py │ └── visdrone.py ├── evaluation │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ └── ope_benchmark.cpython-38.pyc │ └── ope_benchmark.py ├── utils │ ├── __pycache__ │ │ └── statistics.cpython-38.pyc │ ├── region.pyx │ └── statistics.py └── visualization │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── draw_success_precision.cpython-38.pyc │ └── draw_utils.cpython-38.pyc │ ├── draw_success_precision.py │ └── draw_utils.py ├── tools ├── demo.py ├── eval.py ├── rt_eva.py ├── test.py ├── test_rt.py ├── train_tctrack.py └── train_tctrackpp.py └── train_dataset ├── got10k ├── gen_json.py ├── par_crop.py └── readme.md ├── lasot ├── gen_json.py ├── par_crop.py ├── readme.md └── train_id.txt └── vid ├── gen_json.py ├── par_crop.py ├── parse_vid.py ├── readme.md └── visual.py /experiments/TCTrack++/config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "TCTrack++_alexnet" 2 | 3 | BACKBONE: 4 | TYPE: "alexnet" 5 | PRETRAINED: 'temporalalexnet_v2.pth' 6 | TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 7 | Tempor_TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 8 | TRAIN_EPOCH: 20 9 | LAYERS_LR: 0.1 10 | 11 | TRACK: 12 | TYPE: 'TCTrack++_alexnet' 13 | EXEMPLAR_SIZE: 127 14 | INSTANCE_SIZE: 287 15 | CONTEXT_AMOUNT: 0.5 16 | STRIDE: 8 17 | 18 | w2: 1.0 19 | w3: 1.0 20 | 21 | TRAIN: 22 | EPOCH: 100 23 | START_EPOCH: 0 24 | BATCH_SIZE: 140 #140 25 | videorangemax: 3 26 | SEARCH_SIZE: 287 27 | NUM_GPU: 2 28 | BASE_LR: 0.005 29 | RESUME: '' 30 | WEIGHT_DECAY : 0.0001 31 | PRETRAINED: 0 32 | OUTPUT_SIZE: 11 33 | NUM_WORKERS: 6 34 | LOC_WEIGHT: 1.2 35 | CLS_WEIGHT: 1.0 36 | w1: 1.0 37 | w2: 0.5 38 | w3: 0.5 39 | w4: 1.0 40 | w5: 1.0 41 | 42 | POS_NUM : 16 43 | TOTAL_NUM : 64 44 | NEG_NUM : 16 45 | LARGER: 1.0 46 | range : 1.0 47 | LR: 48 | TYPE: 'log' 49 | KWARGS: 50 | start_lr: 0.01 51 | end_lr: 0.00025 52 | 53 | LR_WARMUP: 54 | TYPE: 'step' 55 | EPOCH: 10 56 | KWARGS: 57 | start_lr: 0.005 58 | end_lr: 0.01 59 | step: 1 60 | 61 | DATASET: 62 | NAMES: 63 | - 'VID' 64 | - 'COCO' 65 | - 'GOT' 66 | - 'LaSOT' 67 | 68 | 69 | TEMPLATE: 70 | SHIFT: 4 71 | SCALE: 0.05 72 | BLUR: 0.0 73 | FLIP: 0.0 74 | COLOR: 1.0 75 | 76 | SEARCH: 77 | SHIFT: 64 78 | SCALE: 0.18 79 | BLUR: 0.2 80 | FLIP: 0.0 81 | COLOR: 1.0 82 | 83 | NEG: 0.0 #0.05 84 | GRAY: 0.0 85 | -------------------------------------------------------------------------------- /experiments/TCTrack++/config_online.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "TCTrack++_alexnet" 2 | 3 | BACKBONE: 4 | TYPE: "alexnet" 5 | PRETRAINED: 'temporalalexnet_v2.pth' 6 | TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 7 | Tempor_TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 8 | TRAIN_EPOCH: 20 9 | LAYERS_LR: 0.1 10 | 11 | TRACK: 12 | TYPE: 'TCTrack++_alexnet' 13 | EXEMPLAR_SIZE: 127 14 | INSTANCE_SIZE: 287 15 | CONTEXT_AMOUNT: 0.5 16 | STRIDE: 8 17 | 18 | w2: 1.0 19 | w3: 1.0 20 | 21 | TRAIN: 22 | EPOCH: 100 23 | START_EPOCH: 0 24 | BATCH_SIZE: 140 #140 25 | videorangemax: 3 26 | SEARCH_SIZE: 287 27 | NUM_GPU: 2 28 | BASE_LR: 0.005 29 | RESUME: '' 30 | WEIGHT_DECAY : 0.0001 31 | PRETRAINED: 0 32 | OUTPUT_SIZE: 11 33 | NUM_WORKERS: 6 34 | LOC_WEIGHT: 1.2 35 | CLS_WEIGHT: 1.0 36 | w1: 1.0 37 | w2: 0.5 38 | w3: 0.5 39 | w4: 1.0 40 | w5: 1.0 41 | 42 | POS_NUM : 16 43 | TOTAL_NUM : 64 44 | NEG_NUM : 16 45 | LARGER: 1.0 46 | range : 1.0 47 | LR: 48 | TYPE: 'log' 49 | KWARGS: 50 | start_lr: 0.01 51 | end_lr: 0.00025 52 | 53 | LR_WARMUP: 54 | TYPE: 'step' 55 | EPOCH: 10 56 | KWARGS: 57 | start_lr: 0.005 58 | end_lr: 0.01 59 | step: 1 60 | 61 | DATASET: 62 | NAMES: 63 | - 'VID' 64 | - 'COCO' 65 | - 'GOT' 66 | - 'LaSOT' 67 | 68 | 69 | TEMPLATE: 70 | SHIFT: 4 71 | SCALE: 0.05 72 | BLUR: 0.0 73 | FLIP: 0.0 74 | COLOR: 1.0 75 | 76 | SEARCH: 77 | SHIFT: 64 78 | SCALE: 0.18 79 | BLUR: 0.2 80 | FLIP: 0.0 81 | COLOR: 1.0 82 | 83 | NEG: 0.0 #0.05 84 | GRAY: 0.0 85 | -------------------------------------------------------------------------------- /experiments/TCTrack/config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "TCTrack_alexnet" 2 | 3 | BACKBONE: 4 | TYPE: "temporalalexnet" 5 | PRETRAINED: 'temporalalexnet.pth' 6 | TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 7 | Tempor_TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 8 | TRAIN_EPOCH: 10 9 | LAYERS_LR: 0.1 10 | 11 | TRACK: 12 | TYPE: 'TCTracktracker' 13 | EXEMPLAR_SIZE: 127 14 | INSTANCE_SIZE: 287 15 | CONTEXT_AMOUNT: 0.5 16 | STRIDE: 8 17 | PENALTY_K: 0.0595 18 | WINDOW_INFLUENCE: 0.43985 19 | LR: 0.3148 20 | 21 | w2: 1.0 22 | w3: 1.0 23 | 24 | TRAIN: 25 | EPOCH: 100 26 | START_EPOCH: 0 27 | BATCH_SIZE: 140 28 | videorange: 3 29 | SEARCH_SIZE: 287 30 | NUM_GPU: 2 31 | BASE_LR: 0.005 32 | RESUME: '' 33 | WEIGHT_DECAY : 0.0001 34 | PRETRAINED: 0 35 | OUTPUT_SIZE: 11 36 | NUM_WORKERS: 8 37 | LOC_WEIGHT: 3.0 38 | CLS_WEIGHT: 1.2 39 | w2: 1.0 40 | w3: 1.0 41 | w4: 1.0 42 | w5: 1.0 43 | 44 | POS_NUM : 16 45 | TOTAL_NUM : 64 46 | NEG_NUM : 16 47 | LARGER: 1.0 48 | range : 1.0 49 | LR: 50 | TYPE: 'log' 51 | KWARGS: 52 | start_lr: 0.01 53 | end_lr: 0.00005 54 | 55 | LR_WARMUP: 56 | TYPE: 'step' 57 | EPOCH: 5 58 | KWARGS: 59 | start_lr: 0.005 60 | end_lr: 0.01 61 | step: 1 62 | 63 | DATASET: 64 | NAMES: 65 | - 'VID' 66 | - 'GOT' 67 | - 'LaSOT' 68 | 69 | 70 | 71 | TEMPLATE: 72 | SHIFT: 4 73 | SCALE: 0.05 74 | BLUR: 0.0 75 | FLIP: 0.0 76 | COLOR: 1.0 77 | 78 | SEARCH: 79 | SHIFT: 64 80 | SCALE: 0.18 81 | BLUR: 0.2 82 | FLIP: 0.0 83 | COLOR: 1.0 84 | 85 | NEG: 0.0 #0.05 86 | GRAY: 0.0 87 | -------------------------------------------------------------------------------- /experiments/TCTrack/config_l.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "TCTrack_alexnet" 2 | 3 | BACKBONE: 4 | TYPE: "temporalalexnet" 5 | PRETRAINED: 'temporalalexnet.pth' 6 | TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 7 | Tempor_TRAIN_LAYERS: ['block3','temporalconv1','b_f1','temporalconv2','b_f2'] 8 | TRAIN_EPOCH: 10 9 | LAYERS_LR: 0.1 10 | 11 | TRACK: 12 | TYPE: 'TCTracktracker' 13 | EXEMPLAR_SIZE: 127 14 | INSTANCE_SIZE: 287 15 | CONTEXT_AMOUNT: 0.5 16 | STRIDE: 8 17 | PENALTY_K: 0.0604 18 | WINDOW_INFLUENCE: 0.4401 19 | LR: 0.303 20 | 21 | w2: 1.0 22 | w3: 1.0 23 | 24 | TRAIN: 25 | EPOCH: 100 26 | START_EPOCH: 0 27 | BATCH_SIZE: 140 28 | videorange: 3 29 | SEARCH_SIZE: 287 30 | NUM_GPU: 2 31 | BASE_LR: 0.005 32 | RESUME: '' 33 | WEIGHT_DECAY : 0.0001 34 | PRETRAINED: 0 35 | OUTPUT_SIZE: 11 36 | NUM_WORKERS: 8 37 | LOC_WEIGHT: 3.0 38 | CLS_WEIGHT: 1.2 39 | w2: 1.0 40 | w3: 1.0 41 | w4: 1.0 42 | w5: 1.0 43 | 44 | POS_NUM : 16 45 | TOTAL_NUM : 64 46 | NEG_NUM : 16 47 | LARGER: 1.0 48 | range : 1.0 49 | LR: 50 | TYPE: 'log' 51 | KWARGS: 52 | start_lr: 0.01 53 | end_lr: 0.00005 54 | 55 | LR_WARMUP: 56 | TYPE: 'step' 57 | EPOCH: 5 58 | KWARGS: 59 | start_lr: 0.005 60 | end_lr: 0.01 61 | step: 1 62 | 63 | DATASET: 64 | NAMES: 65 | - 'VID' 66 | - 'GOT' 67 | - 'LaSOT' 68 | 69 | 70 | 71 | TEMPLATE: 72 | SHIFT: 4 73 | SCALE: 0.05 74 | BLUR: 0.0 75 | FLIP: 0.0 76 | COLOR: 1.0 77 | 78 | SEARCH: 79 | SHIFT: 64 80 | SCALE: 0.18 81 | BLUR: 0.2 82 | FLIP: 0.0 83 | COLOR: 1.0 84 | 85 | NEG: 0.0 #0.05 86 | GRAY: 0.0 87 | -------------------------------------------------------------------------------- /images/workflow.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/images/workflow.jpg -------------------------------------------------------------------------------- /pretrained_models/temporalalexnet.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pretrained_models/temporalalexnet.pth -------------------------------------------------------------------------------- /pretrained_models/temporalalexnet_v2.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pretrained_models/temporalalexnet_v2.pth -------------------------------------------------------------------------------- /pysot/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/__init__.py -------------------------------------------------------------------------------- /pysot/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/core/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/core/__init__.py -------------------------------------------------------------------------------- /pysot/core/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/core/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/core/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/core/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/core/__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/core/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/core/__pycache__/config.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/core/__pycache__/config.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/datasets/__init__.py -------------------------------------------------------------------------------- /pysot/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/datasets/__pycache__/anchortarget.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/datasets/__pycache__/anchortarget.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/datasets/__pycache__/augmentation.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/datasets/__pycache__/augmentation.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/datasets/__pycache__/augmentationsear.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/datasets/__pycache__/augmentationsear.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/datasets/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/datasets/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/datasets/anchortarget.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import paddle 3 | from pysot.core.config import cfg 4 | from pysot.utils.bbox import IoU 5 | 6 | class AnchorTarget: 7 | def __init__(self): 8 | pass 9 | 10 | def select(self, position, keep_num=16): 11 | num = position.shape[0] 12 | if num <= keep_num: 13 | return position, num 14 | slt = np.arange(num) 15 | np.random.shuffle(slt) 16 | slt = slt[:keep_num] 17 | return position[slt], keep_num 18 | 19 | def get(self, bbox, size): 20 | labelcls2 = paddle.zeros((1, size, size)) 21 | pre = (16 * (paddle.linspace(0, size-1, size)) + 63).reshape(-1, 1) 22 | pr = paddle.zeros((size**2, 2)) 23 | pr[:, 0] = paddle.clip_min(pre.repeat(size, 1).T.reshape(-1), 0) 24 | pr[:, 1] = paddle.clip_min(pre.repeat(size), 0) 25 | labelxff = paddle.zeros((4, size, size), dtype='float32') 26 | labelcls3 = paddle.zeros((1, size, size)) 27 | weightxff = paddle.zeros((1, size, size)) 28 | 29 | target = paddle.to_tensor([bbox.x1, bbox.y1, bbox.x2, bbox.y2]) 30 | index2 = paddle.clip((target - 63) / 16, min=0, max=size - 1).astype('int32') 31 | w = index2[2] - index2[0] + 1 32 | h = index2[3] - index2[1] + 1 33 | 34 | for ii in range(size): 35 | for jj in range(size): 36 | weightxff[0, ii, jj] = ((ii - (index2[1] + index2[3]) / 2) / (h / 2))**2 + \ 37 | ((jj - (index2[0] + index2[2]) / 2) / (w / 2))**2 38 | 39 | weightxff[weightxff <= 1] = 1 - weightxff[weightxff <= 1] 40 | weightxff[(weightxff > 1) | (weightxff < 0.5)] = 0 41 | 42 | pos = paddle.nonzero((weightxff.squeeze() < 0.8) & (weightxff.squeeze() >= 0.5)) 43 | num = len(pos) 44 | pos, _ = self.select(pos, int(num / 1.2)) 45 | weightxff[:, pos[:, 0], pos[:, 1]] = 0 46 | 47 | index = paddle.clip((target - 63) / 16, min=0, max=size - 1).astype('int32') 48 | w = index[2] - index[0] + 1 49 | h = index[3] - index[1] + 1 50 | 51 | for ii in range(size): 52 | for jj in range(size): 53 | labelcls3[0, ii, jj] = ((ii - (index2[1] + index2[3]) / 2) / (h / 2))**2 + \ 54 | ((jj - (index2[0] + index2[2]) / 2) / (w / 2))**2 55 | 56 | labelcls3[labelcls3 <= 1] = 1 - labelcls3[labelcls3 <= 1] 57 | labelcls3[labelcls3 > 1] = 0 58 | 59 | def con(x): 60 | return (paddle.exp(x) - paddle.exp(-x)) / (paddle.exp(x) + paddle.exp(-x)) 61 | 62 | labelxff[0, :, :] = (pr[:, 0] - target[0]).reshape(cfg.TRAIN.OUTPUT_SIZE, cfg.TRAIN.OUTPUT_SIZE) 63 | labelxff[1, :, :] = (target[2] - pr[:, 0]).reshape(cfg.TRAIN.OUTPUT_SIZE, cfg.TRAIN.OUTPUT_SIZE) 64 | labelxff[2, :, :] = (pr[:, 1] - target[1]).reshape(cfg.TRAIN.OUTPUT_SIZE, cfg.TRAIN.OUTPUT_SIZE) 65 | labelxff[3, :, :] = (target[3] - pr[:, 1]).reshape(cfg.TRAIN.OUTPUT_SIZE, cfg.TRAIN.OUTPUT_SIZE) 66 | labelxff = con(labelxff / 143) 67 | 68 | for ii in range(size): 69 | for jj in range(size): 70 | labelcls2[0, ii, jj] = ((ii - (index2[1] + index2[3]) / 2) / (h / 2))**2 + \ 71 | ((jj - (index2[0] + index2[2]) / 2) / (w / 2))**2 72 | 73 | labelcls2[(labelcls2 > 1)] = -2 74 | labelcls2[((labelcls2 <= 1) & (labelcls2 >= 0))] = 1 - labelcls2[((labelcls2 <= 1) & (labelcls2 >= 0))] 75 | labelcls2[((labelcls2 > 0.3) & (labelcls2 < 0.78))] = -1 76 | labelcls2[((labelcls2 > 0) & (labelcls2 <= 0.3))] = -2 77 | 78 | neg2 = paddle.nonzero(labelcls2.squeeze() == -2) 79 | neg2, _ = self.select(neg2, int(len(paddle.nonzero(labelcls2 > 0)[0]) * 2)) 80 | labelcls2[:, neg2[:, 0], neg2[:, 1]] = 0 81 | 82 | return labelcls2, labelxff, labelcls3, weightxff 83 | -------------------------------------------------------------------------------- /pysot/datasets/augmentation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import paddle 4 | 5 | from pysot.utils.bbox import corner2center, Center, center2corner, Corner 6 | 7 | class Augmentation: 8 | def __init__(self, shift, scale, blur, flip, color): 9 | self.shift = shift 10 | self.scale = scale 11 | self.blur = blur 12 | self.flip = flip 13 | self.color = color 14 | self.rgbVar = paddle.to_tensor( 15 | [[-0.55919361, 0.98062831, - 0.41940627], 16 | [1.72091413, 0.19879334, - 1.82968581], 17 | [4.64467907, 4.73710203, 4.88324118]], dtype='float32') 18 | 19 | @staticmethod 20 | def random(): 21 | return np.random.random() * 2 - 1.0 22 | 23 | def _crop_roi(self, image, bbox, out_sz, padding=(0, 0, 0)): 24 | bbox = [float(x) for x in bbox] 25 | a = (out_sz-1) / (bbox[2]-bbox[0]) 26 | b = (out_sz-1) / (bbox[3]-bbox[1]) 27 | c = -a * bbox[0] 28 | d = -b * bbox[1] 29 | mapping = paddle.to_tensor([[a, 0, c], 30 | [0, b, d]], dtype='float32') 31 | crop = cv2.warpAffine(image, mapping.numpy(), (out_sz, out_sz), 32 | borderMode=cv2.BORDER_CONSTANT, 33 | borderValue=padding) 34 | return crop 35 | 36 | def _blur_aug(self, image): 37 | def rand_kernel(): 38 | sizes = np.arange(5, 46, 2) 39 | size = np.random.choice(sizes) 40 | kernel = np.zeros((size, size)) 41 | c = int(size/2) 42 | wx = np.random.random() 43 | kernel[:, c] += 1. / size * wx 44 | kernel[c, :] += 1. / size * (1-wx) 45 | return kernel 46 | kernel = rand_kernel() 47 | image = cv2.filter2D(image, -1, kernel) 48 | return image 49 | 50 | def _color_aug(self, image): 51 | offset = paddle.matmul(self.rgbVar, paddle.randn([3, 1])) 52 | offset = paddle.flip(offset, [0]) # bgr 2 rgb 53 | offset = offset.flatten() 54 | image = image - offset.numpy() 55 | return image 56 | 57 | def _gray_aug(self, image): 58 | grayed = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 59 | image = cv2.cvtColor(grayed, cv2.COLOR_GRAY2BGR) 60 | return image 61 | 62 | def _shift_scale_aug(self, image, bbox, crop_bbox, size): 63 | im_h, im_w = image.shape[:2] 64 | 65 | # adjust crop bounding box 66 | crop_bbox_center = corner2center(crop_bbox) 67 | if self.scale: 68 | scale_x = (1.0 + Augmentation.random() * self.scale) 69 | scale_y = (1.0 + Augmentation.random() * self.scale) 70 | h, w = crop_bbox_center.h, crop_bbox_center.w 71 | scale_x = min(scale_x, float(im_w) / w) 72 | scale_y = min(scale_y, float(im_h) / h) 73 | crop_bbox_center = Center(crop_bbox_center.x, 74 | crop_bbox_center.y, 75 | crop_bbox_center.w * scale_x, 76 | crop_bbox_center.h * scale_y) 77 | 78 | crop_bbox = center2corner(crop_bbox_center) 79 | if self.shift: 80 | sx = Augmentation.random() * self.shift 81 | sy = Augmentation.random() * self.shift 82 | 83 | x1, y1, x2, y2 = crop_bbox 84 | 85 | sx = max(-x1, min(im_w - 1 - x2, sx)) 86 | sy = max(-y1, min(im_h - 1 - y2, sy)) 87 | 88 | crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy) 89 | 90 | # adjust target bounding box 91 | x1, y1 = crop_bbox.x1, crop_bbox.y1 92 | bbox = Corner(bbox.x1 - x1, bbox.y1 - y1, 93 | bbox.x2 - x1, bbox.y2 - y1) 94 | 95 | if self.scale: 96 | bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y, 97 | bbox.x2 / scale_x, bbox.y2 / scale_y) 98 | 99 | image = self._crop_roi(image, crop_bbox, size) 100 | return image, bbox 101 | 102 | def _flip_aug(self, image, bbox): 103 | image = cv2.flip(image, 1) 104 | width = image.shape[1] 105 | bbox = Corner(width - 1 - bbox.x2, bbox.y1, 106 | width - 1 - bbox.x1, bbox.y2) 107 | return image, bbox 108 | 109 | def __call__(self, image, bbox, size, gray=False): 110 | shape = image.shape 111 | crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2, 112 | size-1, size-1)) 113 | # gray augmentation 114 | if gray: 115 | image = self._gray_aug(image) 116 | 117 | # shift scale augmentation 118 | image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size) 119 | 120 | # color augmentation 121 | if self.color > np.random.random(): 122 | image = self._color_aug(image) 123 | 124 | # blur augmentation 125 | if self.blur > np.random.random(): 126 | image = self._blur_aug(image) 127 | 128 | # flip augmentation 129 | if self.flip and self.flip > np.random.random(): 130 | image, bbox = self._flip_aug(image, bbox) 131 | return image, bbox 132 | -------------------------------------------------------------------------------- /pysot/datasets/augmentationsear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import paddle 4 | 5 | from pysot.utils.bbox import corner2center, \ 6 | Center, center2corner, Corner 7 | 8 | class Augmentations: 9 | def __init__(self, shift, scale, blur, flip, color): 10 | self.shift = shift 11 | self.scale = scale 12 | self.blur = blur 13 | self.flip = flip 14 | self.color = color 15 | self.rgbVar = np.array( 16 | [[-0.55919361, 0.98062831, - 0.41940627], 17 | [1.72091413, 0.19879334, - 1.82968581], 18 | [4.64467907, 4.73710203, 4.88324118]], dtype=np.float32) 19 | 20 | # @staticmethod 21 | # def random(): 22 | # return np.random.random() * 2 - 1.0 23 | 24 | def _crop_roi(self, image, bbox, out_sz, padding=(0, 0, 0)): 25 | bbox = [float(x) for x in bbox] 26 | a = (out_sz-1) / (bbox[2]-bbox[0]) 27 | b = (out_sz-1) / (bbox[3]-bbox[1]) 28 | c = -a * bbox[0] 29 | d = -b * bbox[1] 30 | mapping = np.array([[a, 0, c], 31 | [0, b, d]]).astype(np.float) 32 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), 33 | borderMode=cv2.BORDER_CONSTANT, 34 | borderValue=padding) 35 | return crop 36 | 37 | def _blur_aug(self, image): 38 | def rand_kernel(): 39 | sizes = np.arange(5, 46, 2) 40 | size = np.random.choice(sizes) 41 | kernel = np.zeros((size, size)) 42 | c = int(size/2) 43 | wx = np.random.random() 44 | kernel[:, c] += 1. / size * wx 45 | kernel[c, :] += 1. / size * (1-wx) 46 | return kernel 47 | kernel = rand_kernel() 48 | image = cv2.filter2D(image, -1, kernel) 49 | return image 50 | 51 | def _color_aug(self, image): 52 | offset = np.dot(self.rgbVar, np.random.randn(3, 1)) 53 | offset = offset[::-1] # bgr 2 rgb 54 | offset = offset.reshape(3) 55 | image = image - offset 56 | return image 57 | 58 | def _gray_aug(self, image): 59 | grayed = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) 60 | image = cv2.cvtColor(grayed, cv2.COLOR_GRAY2BGR) 61 | return image 62 | 63 | def _shift_scale_aug(self, image, bbox, crop_bbox, size, rand): 64 | im_h, im_w = image.shape[:2] 65 | 66 | # adjust crop bounding box 67 | crop_bbox_center = corner2center(crop_bbox) 68 | if self.scale: 69 | scale_x = (1.0 + rand[0] * self.scale) 70 | scale_y = (1.0 + rand[1] * self.scale) 71 | h, w = crop_bbox_center.h, crop_bbox_center.w 72 | scale_x = min(scale_x, float(im_w) / w) 73 | scale_y = min(scale_y, float(im_h) / h) 74 | crop_bbox_center = Center(crop_bbox_center.x, 75 | crop_bbox_center.y, 76 | crop_bbox_center.w * scale_x, 77 | crop_bbox_center.h * scale_y) 78 | 79 | crop_bbox = center2corner(crop_bbox_center) 80 | if self.shift: 81 | sx = rand[2] * self.shift 82 | sy = rand[3] * self.shift 83 | 84 | x1, y1, x2, y2 = crop_bbox 85 | 86 | sx = max(-x1, min(im_w - 1 - x2, sx)) 87 | sy = max(-y1, min(im_h - 1 - y2, sy)) 88 | 89 | crop_bbox = Corner(x1 + sx, y1 + sy, x2 + sx, y2 + sy) 90 | 91 | # adjust target bounding box 92 | x1, y1 = crop_bbox.x1, crop_bbox.y1 93 | bbox = Corner(bbox.x1 - x1, bbox.y1 - y1, 94 | bbox.x2 - x1, bbox.y2 - y1) 95 | 96 | if self.scale: 97 | bbox = Corner(bbox.x1 / scale_x, bbox.y1 / scale_y, 98 | bbox.x2 / scale_x, bbox.y2 / scale_y) 99 | 100 | image = self._crop_roi(image, crop_bbox, size) 101 | return image, bbox 102 | 103 | def _flip_aug(self, image, bbox): 104 | image = cv2.flip(image, 1) 105 | width = image.shape[1] 106 | bbox = Corner(width - 1 - bbox.x2, bbox.y1, 107 | width - 1 - bbox.x1, bbox.y2) 108 | return image, bbox 109 | 110 | def __call__(self, image, bbox, size, rand, gray=False): 111 | shape = image.shape 112 | crop_bbox = center2corner(Center(shape[0]//2, shape[1]//2, 113 | size-1, size-1)) 114 | 115 | 116 | 117 | # shift scale augmentation 118 | image, bbox = self._shift_scale_aug(image, bbox, crop_bbox, size, rand) 119 | 120 | # color augmentation 121 | if self.color > np.random.random(): 122 | image = self._color_aug(image) 123 | 124 | # blur augmentation 125 | if self.blur > np.random.random(): 126 | image = self._blur_aug(image) 127 | 128 | 129 | return image, bbox 130 | -------------------------------------------------------------------------------- /pysot/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__init__.py -------------------------------------------------------------------------------- /pysot/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/att.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/att.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/loss_car.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/loss_car.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/model_builder.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/model_builder.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/model_builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/model_builder.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/newalexnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/newalexnet.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/newbackbone.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/newbackbone.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/tran.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/tran.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/utile.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/utile.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/__pycache__/utile.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/__pycache__/utile.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from pysot.models.backbone.alexnet import alexnetlegacy, alexnet 9 | from pysot.models.backbone.mobile_v2 import mobilenetv2 10 | from pysot.models.backbone.resnet_atrous import resnet18, resnet34, resnet50 11 | 12 | BACKBONES = { 13 | 'alexnetlegacy': alexnetlegacy, 14 | 'mobilenetv2': mobilenetv2, 15 | 'resnet18': resnet18, 16 | 'resnet34': resnet34, 17 | 'resnet50': resnet50, 18 | 'alexnet': alexnet, 19 | } 20 | 21 | 22 | def get_backbone(name, **kwargs): 23 | return BACKBONES[name](**kwargs) 24 | -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/alexnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/alexnet.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/alexnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/alexnet.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/mobile_v2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/mobile_v2.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/mobile_v2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/mobile_v2.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/newalexnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/newalexnet.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/newalexnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/newalexnet.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/resnet_atrous.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/resnet_atrous.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/resnet_atrous.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/resnet_atrous.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/temporalbackbone.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/temporalbackbone.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/__pycache__/temporalbackbonev2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/backbone/__pycache__/temporalbackbonev2.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/backbone/alexnet.py: -------------------------------------------------------------------------------- 1 | import paddle.nn as nn 2 | 3 | class AlexNetLegacy(nn.Layer): 4 | configs = [3, 96, 256, 384, 384, 256] 5 | 6 | def __init__(self, width_mult=1): 7 | configs = list(map(lambda x: 3 if x == 3 else int(x*width_mult), AlexNetLegacy.configs)) 8 | super(AlexNetLegacy, self).__init__() 9 | self.features = nn.Sequential( 10 | nn.Conv2D(configs[0], configs[1], kernel_size=11, stride=2), 11 | nn.BatchNorm2D(configs[1]), 12 | nn.MaxPool2D(kernel_size=3, stride=2), 13 | nn.ReLU(), 14 | nn.Conv2D(configs[1], configs[2], kernel_size=5), 15 | nn.BatchNorm2D(configs[2]), 16 | nn.MaxPool2D(kernel_size=3, stride=2), 17 | nn.ReLU(), 18 | nn.Conv2D(configs[2], configs[3], kernel_size=3), 19 | nn.BatchNorm2D(configs[3]), 20 | nn.ReLU(), 21 | nn.Conv2D(configs[3], configs[4], kernel_size=3), 22 | nn.BatchNorm2D(configs[4]), 23 | nn.ReLU(), 24 | nn.Conv2D(configs[4], configs[5], kernel_size=3), 25 | nn.BatchNorm2D(configs[5]), 26 | ) 27 | self.feature_size = configs[5] 28 | 29 | def forward(self, x): 30 | x = self.features(x) 31 | return x 32 | 33 | 34 | class AlexNet(nn.Layer): 35 | configs = [3, 96, 256, 384, 384, 256] 36 | 37 | def __init__(self, width_mult=1): 38 | configs = list(map(lambda x: 3 if x == 3 else int(x*width_mult), AlexNet.configs)) 39 | super(AlexNet, self).__init__() 40 | self.layer1 = nn.Sequential( 41 | nn.Conv2D(configs[0], configs[1], kernel_size=11, stride=2), 42 | nn.BatchNorm2D(configs[1]), 43 | nn.MaxPool2D(kernel_size=3, stride=2), 44 | nn.ReLU(), 45 | ) 46 | self.layer2 = nn.Sequential( 47 | nn.Conv2D(configs[1], configs[2], kernel_size=5), 48 | nn.BatchNorm2D(configs[2]), 49 | nn.MaxPool2D(kernel_size=3, stride=2), 50 | nn.ReLU(), 51 | ) 52 | self.layer3 = nn.Sequential( 53 | nn.Conv2D(configs[2], configs[3], kernel_size=3), 54 | nn.BatchNorm2D(configs[3]), 55 | nn.ReLU(), 56 | ) 57 | self.layer4 = nn.Sequential( 58 | nn.Conv2D(configs[3], configs[4], kernel_size=3), 59 | nn.BatchNorm2D(configs[4]), 60 | nn.ReLU(), 61 | ) 62 | 63 | self.layer5 = nn.Sequential( 64 | nn.Conv2D(configs[4], configs[5], kernel_size=3), 65 | nn.BatchNorm2D(configs[5]), 66 | ) 67 | self.feature_size = configs[5] 68 | 69 | def forward(self, x): 70 | x = self.layer1(x) 71 | x = self.layer2(x) 72 | x = self.layer3(x) 73 | x = self.layer4(x) 74 | x = self.layer5(x) 75 | return x 76 | 77 | def alexnetlegacy(**kwargs): 78 | return AlexNetLegacy(**kwargs) 79 | 80 | def alexnet(**kwargs): 81 | return AlexNet(**kwargs) 82 | -------------------------------------------------------------------------------- /pysot/models/backbone/mobile_v2.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | 4 | def conv_bn(inp, oup, stride, padding=1): 5 | return nn.Sequential( 6 | nn.Conv2D(inp, oup, 3, stride, padding, bias_attr=False), 7 | nn.BatchNorm2D(oup), 8 | nn.ReLU6(inplace=True) 9 | ) 10 | 11 | def conv_1x1_bn(inp, oup): 12 | return nn.Sequential( 13 | nn.Conv2D(inp, oup, 1, 1, 0, bias_attr=False), 14 | nn.BatchNorm2D(oup), 15 | nn.ReLU6(inplace=True) 16 | ) 17 | 18 | class InvertedResidual(nn.Layer): 19 | def __init__(self, inp, oup, stride, expand_ratio, dilation=1): 20 | super(InvertedResidual, self).__init__() 21 | self.stride = stride 22 | 23 | self.use_res_connect = self.stride == 1 and inp == oup 24 | 25 | padding = 2 - stride 26 | if dilation > 1: 27 | padding = dilation 28 | 29 | self.conv = nn.Sequential( 30 | # pw 31 | nn.Conv2D(inp, inp * expand_ratio, 1, 1, 0, bias_attr=False), 32 | nn.BatchNorm2D(inp * expand_ratio), 33 | nn.ReLU6(inplace=True), 34 | # dw 35 | nn.Conv2D(inp * expand_ratio, inp * expand_ratio, 3, 36 | stride, padding, dilation=dilation, 37 | groups=inp * expand_ratio, bias_attr=False), 38 | nn.BatchNorm2D(inp * expand_ratio), 39 | nn.ReLU6(inplace=True), 40 | # pw-linear 41 | nn.Conv2D(inp * expand_ratio, oup, 1, 1, 0, bias_attr=False), 42 | nn.BatchNorm2D(oup), 43 | ) 44 | 45 | def forward(self, x): 46 | if self.use_res_connect: 47 | return x + self.conv(x) 48 | else: 49 | return self.conv(x) 50 | 51 | class MobileNetV2(nn.Layer): 52 | def __init__(self, width_mult=1.0, used_layers=[3, 5, 7]): 53 | super(MobileNetV2, self).__init__() 54 | 55 | self.interverted_residual_setting = [ 56 | # t, c, n, s 57 | [1, 16, 1, 1, 1], 58 | [6, 24, 2, 2, 1], 59 | [6, 32, 3, 2, 1], 60 | [6, 64, 4, 1, 2], 61 | [6, 96, 3, 1, 2], 62 | [6, 160, 3, 1, 4], 63 | [6, 320, 1, 1, 4], 64 | ] 65 | 66 | self.channels = [24, 32, 96, 320] 67 | self.channels = [int(c * width_mult) for c in self.channels] 68 | 69 | input_channel = int(32 * width_mult) 70 | self.last_channel = int(1280 * width_mult) if width_mult > 1.0 else 1280 71 | 72 | self.add_sublayer('layer0', conv_bn(3, input_channel, 2, 0)) 73 | 74 | last_dilation = 1 75 | 76 | self.used_layers = used_layers 77 | 78 | for idx, (t, c, n, s, d) in enumerate(self.interverted_residual_setting, start=1): 79 | output_channel = int(c * width_mult) 80 | 81 | layers = [] 82 | 83 | for i in range(n): 84 | if i == 0: 85 | if d == last_dilation: 86 | dd = d 87 | else: 88 | dd = max(d // 2, 1) 89 | layers.append(InvertedResidual(input_channel, output_channel, s, t, dd)) 90 | else: 91 | layers.append(InvertedResidual(input_channel, output_channel, 1, t, d)) 92 | input_channel = output_channel 93 | 94 | last_dilation = d 95 | 96 | self.add_sublayer('layer%d' % (idx), nn.Sequential(*layers)) 97 | 98 | def forward(self, x): 99 | outputs = [] 100 | for idx in range(8): 101 | name = "layer%d" % idx 102 | x = getattr(self, name)(x) 103 | outputs.append(x) 104 | p0, p1, p2, p3, p4 = [outputs[i] for i in [1, 2, 3, 5, 7]] 105 | out = [outputs[i] for i in self.used_layers] 106 | return out 107 | 108 | def mobilenetv2(**kwargs): 109 | model = MobileNetV2(**kwargs) 110 | return model 111 | 112 | if __name__ == '__main__': 113 | net = mobilenetv2() 114 | 115 | print(net) 116 | 117 | tensor = paddle.rand([1, 3, 255, 255]) 118 | net = net.cuda() 119 | 120 | out = net(tensor) 121 | 122 | for i, p in enumerate(out): 123 | print(i, p.shape) 124 | -------------------------------------------------------------------------------- /pysot/models/backbone/newalexnet.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | 4 | class AlexNet(nn.Layer): 5 | configs = [3, 96, 256, 384, 384, 256] 6 | 7 | def __init__(self, width_mult=1): 8 | configs = list(map(lambda x: 3 if x == 3 else int(x*width_mult), AlexNet.configs)) 9 | super(AlexNet, self).__init__() 10 | self.layer1 = nn.Sequential( 11 | nn.Conv2D(configs[0], configs[1], kernel_size=11, stride=2), 12 | nn.BatchNorm2D(configs[1]), 13 | nn.MaxPool2D(kernel_size=3, stride=2), 14 | nn.ReLU(inplace=True), 15 | ) 16 | self.layer2 = nn.Sequential( 17 | nn.Conv2D(configs[1], configs[2], kernel_size=5), 18 | nn.BatchNorm2D(configs[2]), 19 | nn.MaxPool2D(kernel_size=3, stride=2), 20 | nn.ReLU(inplace=True), 21 | ) 22 | self.layer3 = nn.Sequential( 23 | nn.Conv2D(configs[2], configs[3], kernel_size=3), 24 | nn.BatchNorm2D(configs[3]), 25 | nn.ReLU(inplace=True), 26 | ) 27 | self.layer4 = nn.Sequential( 28 | nn.Conv2D(configs[3], configs[4], kernel_size=3), 29 | nn.BatchNorm2D(configs[4]), 30 | nn.ReLU(inplace=True), 31 | ) 32 | 33 | self.layer5 = nn.Sequential( 34 | nn.Conv2D(configs[4], configs[5], kernel_size=3), 35 | nn.BatchNorm2D(configs[5]), 36 | ) 37 | self.feature_size = configs[5] 38 | for param in self.layer1.parameters(): 39 | param.trainable = False 40 | for param in self.layer2.parameters(): 41 | param.trainable = False 42 | 43 | def forward(self, x): 44 | x = self.layer1(x) 45 | x = self.layer2(x) 46 | x = self.layer3(x) 47 | x = self.layer4(x) 48 | x = self.layer5(x) 49 | return x 50 | -------------------------------------------------------------------------------- /pysot/models/init_weight.py: -------------------------------------------------------------------------------- 1 | import paddle.nn as nn 2 | 3 | def init_weights(model): 4 | for m in model.sublayers(): 5 | if isinstance(m, nn.Conv2D): 6 | nn.initializer.KaimingNormal(0.0, mode='fan_out', nonlinearity='relu')(m.weight) 7 | if m.bias is not None: 8 | nn.initializer.Constant(value=0.0)(m.bias) 9 | elif isinstance(m, nn.BatchNorm2D): 10 | nn.initializer.Constant(value=1.0)(m.weight) 11 | nn.initializer.Constant(value=0.0)(m.bias) 12 | -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/model_builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/model_builder.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/tran.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/tran.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/trancls.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/trancls.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/tranloc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/tranloc.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/trantime.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/trantime.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/utile.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/utile.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/__pycache__/utiletest.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrack/__pycache__/utiletest.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from paddle import nn 4 | import paddle 5 | import paddle.nn.functional as F 6 | 7 | 8 | def get_cls_loss(pred, label, select): 9 | if len(select.shape) == 0 or \ 10 | select.shape == paddle.shape([0]): 11 | return 0 12 | pred = paddle.index_select(pred, 0, select) 13 | label = paddle.index_select(label, 0, select) 14 | label = paddle.cast(label, 'int64') 15 | return F.cross_entropy(pred, label) 16 | 17 | 18 | def select_cross_entropy_loss(pred, label): 19 | pred = pred.reshape([-1, 2]) 20 | label = label.reshape([-1]) 21 | pos = paddle.nonzero(label.data.eq(1)).squeeze().cuda() 22 | neg = paddle.nonzero(label.data.eq(0)).squeeze().cuda() 23 | loss_pos = get_cls_loss(pred, label, pos) 24 | loss_neg = get_cls_loss(pred, label, neg) 25 | return loss_pos * 0.5 + loss_neg * 0.5 26 | 27 | 28 | def DISCLE(pred, target, weight): 29 | pred_x = (pred[:, :, 0] + pred[:, :, 2]) / 2 30 | pred_y = (pred[:, :, 1] + pred[:, :, 3]) / 2 31 | pred_w = (-pred[:, :, 0] + pred[:, :, 2]) 32 | pred_h = (-pred[:, :, 1] + pred[:, :, 3]) 33 | 34 | target_x = (target[:, :, 0] + target[:, :, 2]) / 2 35 | target_y = (target[:, :, 1] + target[:, :, 3]) / 2 36 | target_w = (-target[:, :, 0] + target[:, :, 2]) 37 | target_h = (-target[:, :, 1] + target[:, :, 3]) 38 | 39 | loss = paddle.sqrt(paddle.pow((pred_x - target_x), 2) / target_w + paddle.pow((pred_y - target_y), 2) / target_h) 40 | 41 | weight = weight.reshape(loss.shape) 42 | 43 | return (loss * weight).sum() / (weight.sum() + 1e-6) 44 | 45 | 46 | class IOULoss(nn.Layer): 47 | def forward(self, pred, target, weight=None): 48 | 49 | pred_left = pred[:, :, 0] 50 | pred_top = pred[:, :, 1] 51 | pred_right = pred[:, :, 2] 52 | pred_bottom = pred[:, :, 3] 53 | 54 | target_left = target[:, :, 0] 55 | target_top = target[:, :, 1] 56 | target_right = target[:, :, 2] 57 | target_bottom = target[:, :, 3] 58 | 59 | target_aera = (target_right - target_left) * \ 60 | (target_bottom - target_top) 61 | pred_aera = (pred_right - pred_left) * \ 62 | (pred_bottom - pred_top) 63 | 64 | w_intersect = paddle.minimum(pred_right, target_right) - paddle.maximum(pred_left, target_left) 65 | w_intersect = w_intersect.clip(min=0) 66 | h_intersect = paddle.minimum(pred_bottom, target_bottom) - paddle.maximum(pred_top, target_top) 67 | h_intersect = h_intersect.clip(min=0) 68 | area_intersect = w_intersect * h_intersect 69 | area_union = target_aera + pred_aera - area_intersect 70 | ious = ((area_intersect) / (area_union + 1e-6)).clip(min=0) + 1e-6 71 | 72 | losses = -paddle.log(ious) 73 | weight = weight.reshape(losses.shape) 74 | 75 | return (losses * weight).sum() / (weight.sum() + 1e-6) 76 | -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/model_builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import paddle 4 | import paddle.nn as nn 5 | import paddle.nn.functional as F 6 | from pysot.core.config import cfg 7 | from pysot.models.utile_tctrack.loss import select_cross_entropy_loss, IOULoss, DISCLE 8 | from pysot.models.backbone.temporalbackbone import TemporalAlexNet 9 | 10 | from pysot.models.utile_tctrack.utile import TCT 11 | from pysot.models.utile_tctrack.utiletest import TCTtest 12 | import numpy as np 13 | 14 | class ModelBuilder_tctrack(nn.Layer): 15 | def __init__(self, label): 16 | super(ModelBuilder_tctrack, self).__init__() 17 | 18 | self.backbone = TemporalAlexNet() 19 | self.backbone.eval() 20 | 21 | if label == 'test': 22 | self.grader = TCTtest(cfg) 23 | else: 24 | self.grader = TCT(cfg) 25 | 26 | self.cls3loss = nn.BCEWithLogitsLoss() 27 | self.IOULOSS = IOULoss() 28 | 29 | def template(self, z, x): 30 | with paddle.no_grad(): 31 | zf, _, _ = self.backbone.init(z) 32 | self.zf = zf 33 | 34 | xf, xfeat1, xfeat2 = self.backbone.init(x) 35 | 36 | ppres = self.grader.conv1(self.xcorr_depthwise(xf, zf)) 37 | 38 | self.memory = ppres 39 | self.featset1 = xfeat1 40 | self.featset2 = xfeat2 41 | 42 | def xcorr_depthwise(self, x, kernel): 43 | batch = kernel.shape[0] 44 | channel = kernel.shape[1] 45 | x = x.reshape([1, batch * channel, x.shape[2], x.shape[3]]) 46 | kernel = kernel.reshape([batch * channel, 1, kernel.shape[2], kernel.shape[3]]) 47 | out = F.conv2d(x, kernel, groups=batch * channel) 48 | out = out.reshape([batch, channel, out.shape[2], out.shape[3]]) 49 | return out 50 | 51 | def track(self, x): 52 | with paddle.no_grad(): 53 | xf, xfeat1, xfeat2 = self.backbone.eachtest(x, self.featset1, self.featset2) 54 | 55 | loc, cls2, cls3, memory = self.grader(xf, self.zf, self.memory) 56 | 57 | self.memory = memory 58 | self.featset1 = xfeat1 59 | self.featset2 = xfeat2 60 | 61 | return { 62 | 'cls2': cls2, 63 | 'cls3': cls3, 64 | 'loc': loc 65 | } 66 | 67 | def log_softmax(self, cls): 68 | b, a2, h, w = cls.shape 69 | cls = cls.reshape([b, 2, a2 // 2, h, w]) 70 | cls = cls.transpose([0, 2, 3, 4, 1]) 71 | cls = F.log_softmax(cls, axis=4) 72 | 73 | return cls 74 | 75 | def getcentercuda(self, mapp): 76 | def dcon(x): 77 | x = paddle.where(x <= -1, -0.99, x) 78 | x = paddle.where(x >= 1, 0.99, x) 79 | return (paddle.log(1 + x) - paddle.log(1 - x)) / 2 80 | 81 | size = mapp.shape[3] 82 | 83 | x = paddle.to_tensor(np.tile((16 * (np.linspace(0, size - 1, size)) + 63) - cfg.TRAIN.SEARCH_SIZE // 2, 84 | size).reshape(-1)) 85 | y = paddle.to_tensor(np.tile((16 * (np.linspace(0, size - 1, size)) + 63).reshape(-1, 1) - cfg.TRAIN.SEARCH_SIZE // 2, 86 | size).reshape(-1)) 87 | 88 | shap = dcon(mapp) * (cfg.TRAIN.SEARCH_SIZE // 2) 89 | 90 | xx = np.int16(np.tile(np.linspace(0, size - 1, size), size).reshape(-1)) 91 | yy = np.int16(np.tile(np.linspace(0, size - 1, size).reshape(-1, 1), size).reshape(-1)) 92 | 93 | w = shap[:, 0, yy, xx] + shap[:, 1, yy, xx] 94 | h = shap[:, 2, yy, xx] + shap[:, 3, yy, xx] 95 | x = x - shap[:, 0, yy, xx] + w / 2 + cfg.TRAIN.SEARCH_SIZE // 2 96 | y = y - shap[:, 2, yy, xx] + h / 2 + cfg.TRAIN.SEARCH_SIZE // 2 97 | 98 | anchor = paddle.zeros([cfg.TRAIN.BATCH_SIZE // cfg.TRAIN.NUM_GPU, size ** 2, 4]) 99 | 100 | anchor[:, :, 0] = x - w / 2 101 | anchor[:, :, 1] = y - h / 2 102 | anchor[:, :, 2] = x + w / 2 103 | anchor[:, :, 3] = y + h / 2 104 | 105 | return anchor 106 | 107 | def forward(self, data): 108 | presearch = data['pre_search'] 109 | template = data['template'] 110 | search = data['search'] 111 | bbox = data['bbox'] 112 | labelcls2 = data['label_cls2'] 113 | labelxff = data['labelxff'] 114 | labelcls3 = data['labelcls3'] 115 | weightxff = data['weightxff'] 116 | 117 | presearch = paddle.concat([presearch, search.unsqueeze(1)], 1) 118 | 119 | zf = self.backbone(template.unsqueeze(1)) 120 | 121 | xf = self.backbone(presearch) 122 | xf = xf.reshape([cfg.TRAIN.BATCH_SIZE // cfg.TRAIN.NUM_GPU, cfg.TRAIN.videorange + 1, xf.shape[2], xf.shape[3], xf.shape[4]]) 123 | 124 | loc, cls2, cls3 = self.grader(xf[:, -1, :, :, :], zf, xf[:, :-1, :, :, :].transpose([1, 0, 2, 3, 4])) 125 | 126 | cls2 = self.log_softmax(cls2) 127 | 128 | cls_loss2 = select_cross_entropy_loss(cls2, labelcls2) 129 | cls_loss3 = self.cls3loss(cls3, labelcls3) 130 | 131 | pre_bbox = self.getcentercuda(loc) 132 | bbo = self.getcentercuda(labelxff) 133 | 134 | loc_loss1 = self.IOULOSS(pre_bbox, bbo, weightxff) 135 | loc_loss2 = DISCLE(pre_bbox, bbo, weightxff) 136 | loc_loss = cfg.TRAIN.w2 * loc_loss1 + cfg.TRAIN.w3 * loc_loss2 137 | 138 | cls_loss = cfg.TRAIN.w4 * cls_loss2 + cfg.TRAIN.w5 * cls_loss3 139 | 140 | outputs = {} 141 | outputs['total_loss'] = \ 142 | cfg.TRAIN.LOC_WEIGHT * loc_loss \ 143 | + cfg.TRAIN.CLS_WEIGHT * cls_loss 144 | 145 | outputs['cls_loss'] = cls_loss 146 | outputs['loc_loss1'] = loc_loss1 147 | outputs['loc_loss2'] = loc_loss2 148 | 149 | return outputs 150 | -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/utile.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | from paddle.nn.initializer import Normal 5 | 6 | from pysot.models.utile_tctrack.trantime import Transformertime 7 | 8 | class TCT(nn.Layer): 9 | 10 | def __init__(self,cfg): 11 | super(TCT, self).__init__() 12 | 13 | self.conv1 = nn.Sequential( 14 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 15 | nn.BatchNorm2D(192), 16 | nn.ReLU(inplace=True), 17 | ) 18 | self.conv2 = nn.Sequential( 19 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 20 | nn.BatchNorm2D(192), 21 | nn.ReLU(inplace=True), 22 | ) 23 | 24 | channel = 192 25 | 26 | self.convloc = nn.Sequential( 27 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 28 | nn.BatchNorm2D(channel), 29 | nn.ReLU(inplace=True), 30 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 31 | nn.BatchNorm2D(channel), 32 | nn.ReLU(inplace=True), 33 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 34 | nn.BatchNorm2D(channel), 35 | nn.ReLU(inplace=True), 36 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 37 | nn.BatchNorm2D(channel), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2D(channel, 4, kernel_size=3, stride=1, padding=1), 40 | ) 41 | 42 | self.convcls = nn.Sequential( 43 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 44 | nn.BatchNorm2D(channel), 45 | nn.ReLU(inplace=True), 46 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 47 | nn.BatchNorm2D(channel), 48 | nn.ReLU(inplace=True), 49 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 50 | nn.BatchNorm2D(channel), 51 | nn.ReLU(inplace=True), 52 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 53 | nn.BatchNorm2D(channel), 54 | nn.ReLU(inplace=True), 55 | ) 56 | 57 | self.transformer = Transformertime(channel, 6, 1, 2) 58 | 59 | self.cls1 = nn.Conv2D(channel, 2, kernel_size=3, stride=1, padding=1) 60 | self.cls2 = nn.Conv2D(channel, 1, kernel_size=3, stride=1, padding=1) 61 | 62 | self.reset_parameters() 63 | 64 | def reset_parameters(self): 65 | for modules in [self.conv1, self.conv2, self.convloc, self.convcls, self.cls1, self.cls2]: 66 | for l in modules.sublayers(): 67 | if isinstance(l, nn.Conv2D): 68 | l.weight.set_value(paddle.normal(mean=0.0, std=0.01)) 69 | 70 | def xcorr_depthwise(self, x, kernel): 71 | """depthwise cross correlation""" 72 | batch = kernel.shape[0] 73 | channel = kernel.shape[1] 74 | x = x.reshape([1, batch*channel, x.shape[2], x.shape[3]]) 75 | kernel = kernel.reshape([batch*channel, 1, kernel.shape[2], kernel.shape[3]]) 76 | out = F.conv2d(x, kernel, groups=batch*channel) 77 | out = out.reshape([batch, channel, out.shape[2], out.shape[3]]) 78 | return out 79 | 80 | def forward(self, x, z, px): 81 | 82 | ppres = self.conv1(self.xcorr_depthwise(px[0], z)) 83 | 84 | for i in range(len(px)): 85 | res3 = self.conv2(self.xcorr_depthwise(px[i], z)) 86 | 87 | b, c, w, h = res3.shape 88 | memory = self.transformer.encoder(res3.reshape([b, c, -1]).transpose([2, 0, 1]), \ 89 | ppres.reshape([b, c, -1]).transpose([2, 0, 1])) 90 | ppres = memory.transpose([1, 2, 0]).reshape([b, c, w, h]) 91 | 92 | res3 = self.conv2(self.xcorr_depthwise(x, z)) 93 | _, res = self.transformer(res3.reshape([b, c, -1]).transpose([2, 0, 1]), \ 94 | ppres.reshape([b, c, -1]).transpose([2, 0, 1]), \ 95 | res3.reshape([b, c, -1]).transpose([2, 0, 1])) 96 | res = res.transpose([1, 2, 0]).reshape([b, c, w, h]) 97 | 98 | loc = self.convloc(res) 99 | acls = self.convcls(res) 100 | 101 | cls1 = self.cls1(acls) 102 | cls2 = self.cls2(acls) 103 | 104 | return loc, cls1, cls2 105 | -------------------------------------------------------------------------------- /pysot/models/utile_tctrack/utiletest.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | from paddle.nn.initializer import Normal 5 | 6 | from pysot.models.utile_tctrack.trantime import Transformertime 7 | 8 | class TCTtest(nn.Layer): 9 | 10 | def __init__(self,cfg): 11 | super(TCTtest, self).__init__() 12 | 13 | self.conv1 = nn.Sequential( 14 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 15 | nn.BatchNorm2D(192), 16 | nn.ReLU(inplace=True), 17 | ) 18 | self.conv2 = nn.Sequential( 19 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 20 | nn.BatchNorm2D(192), 21 | nn.ReLU(inplace=True), 22 | ) 23 | 24 | channel = 192 25 | 26 | self.convloc = nn.Sequential( 27 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 28 | nn.BatchNorm2D(channel), 29 | nn.ReLU(inplace=True), 30 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 31 | nn.BatchNorm2D(channel), 32 | nn.ReLU(inplace=True), 33 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 34 | nn.BatchNorm2D(channel), 35 | nn.ReLU(inplace=True), 36 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 37 | nn.BatchNorm2D(channel), 38 | nn.ReLU(inplace=True), 39 | nn.Conv2D(channel, 4, kernel_size=3, stride=1, padding=1), 40 | ) 41 | 42 | self.convcls = nn.Sequential( 43 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 44 | nn.BatchNorm2D(channel), 45 | nn.ReLU(inplace=True), 46 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 47 | nn.BatchNorm2D(channel), 48 | nn.ReLU(inplace=True), 49 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 50 | nn.BatchNorm2D(channel), 51 | nn.ReLU(inplace=True), 52 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 53 | nn.BatchNorm2D(channel), 54 | nn.ReLU(inplace=True), 55 | ) 56 | 57 | self.transformer = Transformertime(channel, 6, 1, 2) 58 | self.cls1 = nn.Conv2D(channel, 2, kernel_size=3, stride=1, padding=1) 59 | self.cls2 = nn.Conv2D(channel, 1, kernel_size=3, stride=1, padding=1) 60 | for modules in [self.conv1, self.conv2, self.convloc, self.convcls, self.cls1, self.cls2]: 61 | for l in modules.sublayers(): 62 | if isinstance(l, nn.Conv2D): 63 | l.weight.set_value(paddle.normal(mean=0.0, std=0.01)) 64 | 65 | def xcorr_depthwise(self, x, kernel): 66 | """depthwise cross correlation""" 67 | batch = kernel.shape[0] 68 | channel = kernel.shape[1] 69 | x = x.reshape([1, batch*channel, x.shape[2], x.shape[3]]) 70 | kernel = kernel.reshape([batch*channel, 1, kernel.shape[2], kernel.shape[3]]) 71 | out = F.conv2d(x, kernel, groups=batch*channel) 72 | out = out.reshape([batch, channel, out.shape[2], out.shape[3]]) 73 | return out 74 | 75 | def forward(self, x, z, ppres): 76 | 77 | res3 = self.conv2(self.xcorr_depthwise(x, z)) 78 | 79 | b, c, w, h = res3.shape 80 | memory, res = self.transformer(res3.reshape([b, c, -1]).transpose([2, 0, 1]), \ 81 | ppres.reshape([b, c, -1]).transpose([2, 0, 1]), \ 82 | res3.reshape([b, c, -1]).transpose([2, 0, 1])) 83 | res = res.transpose([1, 2, 0]).reshape([b, c, w, h]) 84 | 85 | loc = self.convloc(res) 86 | acls = self.convcls(res) 87 | 88 | cls1 = self.cls1(acls) 89 | cls2 = self.cls2(acls) 90 | 91 | return loc, cls1, cls2, memory 92 | -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/model_builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/model_builder.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/tran.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/tran.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/trancls.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/trancls.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/tranloc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/tranloc.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/trantime.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/trantime.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/utile.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/utile.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/__pycache__/utiletest.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/models/utile_tctrackplus/__pycache__/utiletest.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/model_builder.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | from paddle.vision.models import alexnet 5 | from pysot.models.utile_tctrackplus.loss import select_cross_entropy_loss, weight_l1_loss, l1loss, IOULoss, DISCLE 6 | from pysot.models.backbone.temporalbackbonev2 import TemporalAlexNet 7 | from pysot.models.utile_tctrackplus.utile import APN 8 | from pysot.models.utile_tctrackplus.utiletest import APNtest 9 | 10 | import numpy as np 11 | 12 | 13 | class ModelBuilder_tctrackplus(nn.Layer): 14 | def __init__(self, label): 15 | super(ModelBuilder_tctrackplus, self).__init__() 16 | 17 | self.backbone = TemporalAlexNet() 18 | 19 | if label == 'test': 20 | self.grader = APNtest() 21 | else: 22 | self.grader = APN() 23 | self.cls3loss = nn.BCEWithLogitsLoss() 24 | self.IOULOSS = IOULoss() 25 | 26 | def template(self, z, x): 27 | with paddle.no_grad(): 28 | zf, _, _ = self.backbone.init(paddle.to_tensor(z)) 29 | self.zf = zf 30 | 31 | xf, xfeat1, xfeat2 = self.backbone.init(paddle.to_tensor(x)) 32 | 33 | ppres = self.grader.conv1(self.xcorr_depthwise(xf, zf)) 34 | 35 | self.memory = ppres 36 | self.featset1 = xfeat1 37 | self.featset2 = xfeat2 38 | 39 | def xcorr_depthwise(self, x, kernel): 40 | """depthwise cross correlation 41 | """ 42 | batch = kernel.shape[0] 43 | channel = kernel.shape[1] 44 | x = x.reshape([1, batch * channel, x.shape[2], x.shape[3]]) 45 | kernel = kernel.reshape([batch * channel, 1, kernel.shape[2], kernel.shape[3]]) 46 | out = F.conv2d(x, kernel, groups=batch * channel) 47 | out = out.reshape([batch, channel, out.shape[2], out.shape[3]]) 48 | return out 49 | 50 | def track(self, x): 51 | with paddle.no_grad(): 52 | 53 | xf, xfeat1, xfeat2 = self.backbone.eachtest(x, self.featset1, self.featset2) 54 | 55 | loc, cls2, cls3, memory = self.grader(xf, self.zf, self.memory) 56 | 57 | self.memory = memory 58 | self.featset1 = xfeat1 59 | self.featset2 = xfeat2 60 | 61 | return { 62 | 'cls2': cls2, 63 | 'cls3': cls3, 64 | 'loc': loc 65 | } 66 | 67 | def log_softmax(self, cls): 68 | b, a2, h, w = cls.shape 69 | cls = cls.reshape([b, 2, a2 // 2, h, w]) 70 | cls = cls.permute([0, 2, 3, 4, 1]) 71 | cls = F.log_softmax(cls, axis=4) 72 | 73 | return cls 74 | 75 | def getcentercuda(self, mapp): 76 | 77 | def dcon(x): 78 | x[paddle.where(x <= -1)] = -0.99 79 | x[paddle.where(x >= 1)] = 0.99 80 | return (paddle.log(1 + x) - paddle.log(1 - x)) / 2 81 | 82 | size = mapp.shape[3] 83 | # location 84 | x = paddle.Tensor(np.tile((16 * (np.linspace(0, size - 1, size)) + 63) - 287 // 2, size).reshape(-1)) 85 | y = paddle.Tensor( 86 | np.tile((16 * (np.linspace(0, size - 1, size)) + 63).reshape(-1, 1) - 287 // 2, size).reshape(-1)) 87 | 88 | shap = dcon(mapp) * 143 89 | 90 | xx = np.int16(np.tile(np.linspace(0, size - 1, size), size).reshape(-1)) 91 | yy = np.int16(np.tile(np.linspace(0, size - 1, size).reshape(-1, 1), size).reshape(-1)) 92 | 93 | w = shap[:, 0, yy, xx] + shap[:, 1, yy, xx] 94 | h = shap[:, 2, yy, xx] + shap[:, 3, yy, xx] 95 | x = x - shap[:, 0, yy, xx] + w / 2 + 287 // 2 96 | y = y - shap[:, 2, yy, xx] + h / 2 + 287 // 2 97 | 98 | anchor = paddle.zeros((cfg.TRAIN.BATCH_SIZE // cfg.TRAIN.NUM_GPU, size ** 2, 4)) 99 | 100 | anchor[:, :, 0] = x - w / 2 101 | anchor[:, :, 1] = y - h / 2 102 | anchor[:, :, 2] = x + w / 2 103 | anchor[:, :, 3] = y + h / 2 104 | return anchor 105 | 106 | def forward(self, data, videorange): 107 | """ only used in training 108 | """ 109 | 110 | presearch = data['pre_search'] 111 | template = data['template'] 112 | search = data['search'] 113 | bbox = data['bbox'] 114 | labelcls2 = data['label_cls2'] 115 | labelxff = data['labelxff'] 116 | labelcls3 = data['labelcls3'] 117 | weightxff = data['weightxff'] 118 | 119 | presearch = paddle.concat((presearch[:, cfg.TRAIN.videorangemax - videorange:, :, :, :], search.unsqueeze(1)), 1) 120 | 121 | zf = self.backbone(template.unsqueeze(1)) 122 | 123 | xf = self.backbone(presearch) ###b l c w h 124 | xf = xf.reshape([cfg.TRAIN.BATCH_SIZE // cfg.TRAIN.NUM_GPU, videorange + 1, xf.shape[-3], xf.shape[-2], 125 | xf.shape[-1]]) 126 | 127 | loc, cls2, cls3 = self.grader(xf[:, -1, :, :, :], zf, xf[:, :-1, :, :, :].transpose([1, 0, 2, 3, 4])) 128 | 129 | cls2 = self.log_softmax(cls2) 130 | 131 | cls_loss1 = select_cross_entropy_loss(cls2, labelcls2) 132 | cls_loss2 = self.cls3loss(cls3, labelcls3) 133 | 134 | pre_bbox = self.getcentercuda(loc) 135 | bbo = self.getcentercuda(labelxff) 136 | 137 | loc_loss1 = self.IOULOSS(pre_bbox, bbo, weightxff) 138 | loc_loss2 = weight_l1_loss(loc, labelxff, weightxff) 139 | loc_loss3 = DISCLE(pre_bbox, bbo, weightxff) 140 | loc_loss = cfg.TRAIN.w1 * loc_loss1 + cfg.TRAIN.w2 * loc_loss2 + cfg.TRAIN.w3 * loc_loss3 141 | 142 | cls_loss = cfg.TRAIN.w4 * cls_loss1 + cfg.TRAIN.w5 * cls_loss2 143 | 144 | outputs = {} 145 | outputs['total_loss'] = \ 146 | cfg.TRAIN.LOC_WEIGHT * loc_loss \ 147 | + cfg.TRAIN.CLS_WEIGHT * cls_loss 148 | 149 | outputs['cls_loss'] = cls_loss 150 | outputs['loc_loss1'] = loc_loss1 151 | outputs['loc_loss2'] = loc_loss2 152 | outputs['loc_loss3'] = loc_loss3 153 | 154 | return outputs 155 | -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/utile.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | import paddle.tensor as tensor 5 | from paddle.nn.initializer import Normal, Uniform 6 | from pysot.models.utile_tctrackplus.trantime_paddle import Transformertime 7 | 8 | 9 | class APN(nn.Layer): 10 | 11 | def __init__(self, cfg): 12 | super(APN, self).__init__() 13 | 14 | self.conv = nn.Sequential( 15 | nn.Conv2D(384, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 16 | nn.BatchNorm2D(192), 17 | nn.ReLU(inplace=True), 18 | ) 19 | self.conv3 = nn.Sequential( 20 | nn.Conv2D(384, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 21 | nn.BatchNorm2D(192), 22 | nn.ReLU(inplace=True), 23 | ) 24 | self.conv2 = nn.Sequential( 25 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 26 | nn.BatchNorm2D(192), 27 | nn.ReLU(inplace=True), 28 | ) 29 | self.conv1 = nn.Sequential( 30 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 31 | nn.BatchNorm2D(192), 32 | nn.ReLU(inplace=True), 33 | ) 34 | 35 | channel = 192 36 | 37 | self.convloc = nn.Sequential( 38 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 39 | nn.BatchNorm2D(channel), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2D(channel, channel // 2, kernel_size=3, stride=1, padding=1), 42 | nn.BatchNorm2D(channel // 2), 43 | nn.ReLU(inplace=True), 44 | nn.Conv2D(channel // 2, channel // 4, kernel_size=3, stride=1, padding=1), 45 | nn.BatchNorm2D(channel // 4), 46 | nn.ReLU(inplace=True), 47 | nn.Conv2D(channel // 4, channel // 8, kernel_size=3, stride=1, padding=1), 48 | nn.BatchNorm2D(channel // 8), 49 | nn.ReLU(inplace=True), 50 | nn.Conv2D(channel // 8, 4, kernel_size=3, stride=1, padding=1), 51 | ) 52 | 53 | self.convcls = nn.Sequential( 54 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 55 | nn.BatchNorm2D(channel), 56 | nn.ReLU(inplace=True), 57 | nn.Conv2D(channel, channel // 2, kernel_size=3, stride=1, padding=1), 58 | nn.BatchNorm2D(channel // 2), 59 | nn.ReLU(inplace=True), 60 | nn.Conv2D(channel // 2, channel // 4, kernel_size=3, stride=1, padding=1), 61 | nn.BatchNorm2D(channel // 4), 62 | nn.ReLU(inplace=True), 63 | nn.Conv2D(channel // 4, channel // 8, kernel_size=3, stride=1, padding=1), 64 | nn.BatchNorm2D(channel // 8), 65 | nn.ReLU(inplace=True), 66 | ) 67 | 68 | self.transformer = Transformertime(channel, 6, 1, 2) 69 | 70 | self.cls1 = nn.Conv2D(channel // 8, 2, kernel_size=3, stride=1, padding=1) 71 | self.cls2 = nn.Conv2D(channel // 8, 1, kernel_size=3, stride=1, padding=1) 72 | 73 | self._initialize_weights() 74 | 75 | def _initialize_weights(self): 76 | for module in [self.conv1, self.conv2, self.conv3, self.convloc, self.convcls, self.cls1, self.cls2]: 77 | for layer in module.sublayers(): 78 | if isinstance(layer, nn.Conv2D): 79 | layer.weight.set_value(Uniform(0.0, 0.01)) 80 | 81 | def xcorr_depthwise(self, x, kernel): 82 | batch = kernel.shape[0] 83 | channel = kernel.shape[1] 84 | x = x.reshape([1, batch * channel, x.shape[2], x.shape[3]]) 85 | kernel = kernel.reshape([batch * channel, 1, kernel.shape[2], kernel.shape[3]]) 86 | out = F.conv2d(x, kernel, groups=batch * channel) 87 | out = out.reshape([batch, channel, out.shape[2], out.shape[3]]) 88 | return out 89 | 90 | def forward(self, x, z, px): 91 | ppres = self.conv1(self.xcorr_depthwise(px[0], z)) 92 | 93 | for i in range(len(px)): 94 | res3 = self.conv2(self.xcorr_depthwise(px[i], z)) 95 | b, c, w, h = res3.shape 96 | memory = self.transformer.encoder(res3.reshape([b, c, -1]).transpose([2, 0, 1]), 97 | ppres.reshape([b, c, -1]).transpose([2, 0, 1])) 98 | ppres = memory.transpose([1, 2, 0]).reshape([b, c, w, h]) 99 | 100 | res3 = self.conv2(self.xcorr_depthwise(x, z)) 101 | _, res = self.transformer(res3.reshape([b, c, -1]).transpose([2, 0, 1]), 102 | ppres.reshape([b, c, -1]).transpose([2, 0, 1]), 103 | res3.reshape([b, c, -1]).transpose([2, 0, 1])) 104 | res = res.transpose([1, 2, 0]).reshape([b, c, w, h]) 105 | 106 | loc = self.convloc(res) 107 | acls = self.convcls(res) 108 | 109 | cls1 = self.cls1(acls) 110 | cls2 = self.cls2(acls) 111 | 112 | return loc, cls1, cls2 113 | -------------------------------------------------------------------------------- /pysot/models/utile_tctrackplus/utiletest.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import paddle.nn as nn 3 | import paddle.nn.functional as F 4 | import paddle.tensor as tensor 5 | from paddle.nn.initializer import Normal, Uniform 6 | from pysot.models.utile_tctrackplus.trantime_paddle import Transformertime 7 | 8 | 9 | class APNtest(nn.Layer): 10 | 11 | def __init__(self, cfg): 12 | super(APNtest, self).__init__() 13 | 14 | self.conv = nn.Sequential( 15 | nn.Conv2D(384, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 16 | nn.BatchNorm2D(192), 17 | nn.ReLU(inplace=True), 18 | ) 19 | self.conv3 = nn.Sequential( 20 | nn.Conv2D(384, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 21 | nn.BatchNorm2D(192), 22 | nn.ReLU(inplace=True), 23 | ) 24 | self.conv2 = nn.Sequential( 25 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 26 | nn.BatchNorm2D(192), 27 | nn.ReLU(inplace=True), 28 | ) 29 | self.conv1 = nn.Sequential( 30 | nn.Conv2D(256, 192, kernel_size=3, bias_attr=False, stride=2, padding=1), 31 | nn.BatchNorm2D(192), 32 | nn.ReLU(inplace=True), 33 | ) 34 | 35 | channel = 192 36 | 37 | self.convloc = nn.Sequential( 38 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 39 | nn.BatchNorm2D(channel), 40 | nn.ReLU(inplace=True), 41 | nn.Conv2D(channel, channel // 2, kernel_size=3, stride=1, padding=1), 42 | nn.BatchNorm2D(channel // 2), 43 | nn.ReLU(inplace=True), 44 | nn.Conv2D(channel // 2, channel // 4, kernel_size=3, stride=1, padding=1), 45 | nn.BatchNorm2D(channel // 4), 46 | nn.ReLU(inplace=True), 47 | nn.Conv2D(channel // 4, channel // 8, kernel_size=3, stride=1, padding=1), 48 | nn.BatchNorm2D(channel // 8), 49 | nn.ReLU(inplace=True), 50 | nn.Conv2D(channel // 8, 4, kernel_size=3, stride=1, padding=1), 51 | ) 52 | 53 | self.convcls = nn.Sequential( 54 | nn.Conv2D(channel, channel, kernel_size=3, stride=1, padding=1), 55 | nn.BatchNorm2D(channel), 56 | nn.ReLU(inplace=True), 57 | nn.Conv2D(channel, channel // 2, kernel_size=3, stride=1, padding=1), 58 | nn.BatchNorm2D(channel // 2), 59 | nn.ReLU(inplace=True), 60 | nn.Conv2D(channel // 2, channel // 4, kernel_size=3, stride=1, padding=1), 61 | nn.BatchNorm2D(channel // 4), 62 | nn.ReLU(inplace=True), 63 | nn.Conv2D(channel // 4, channel // 8, kernel_size=3, stride=1, padding=1), 64 | nn.BatchNorm2D(channel // 8), 65 | nn.ReLU(inplace=True), 66 | ) 67 | 68 | self.transformer = Transformertime(channel, 6, 1, 2) 69 | self.cls1 = nn.Conv2D(channel // 8, 2, kernel_size=3, stride=1, padding=1) 70 | self.cls2 = nn.Conv2D(channel // 8, 1, kernel_size=3, stride=1, padding=1) 71 | 72 | self._initialize_weights() 73 | 74 | def _initialize_weights(self): 75 | for module in [self.conv1, self.conv2, self.conv3, self.convloc, self.convcls, self.cls1, self.cls2]: 76 | for layer in module.sublayers(): 77 | if isinstance(layer, nn.Conv2D): 78 | layer.weight.set_value(Uniform(0.0, 0.01)) 79 | 80 | def xcorr_depthwise(self, x, kernel): 81 | batch = kernel.shape[0] 82 | channel = kernel.shape[1] 83 | x = x.reshape([1, batch * channel, x.shape[2], x.shape[3]]) 84 | kernel = kernel.reshape([batch * channel, 1, kernel.shape[2], kernel.shape[3]]) 85 | out = F.conv2d(x, kernel, groups=batch * channel) 86 | out = out.reshape([batch, channel, out.shape[2], out.shape[3]]) 87 | return out 88 | 89 | def forward(self, x, z, ppres): 90 | res3 = self.conv2(self.xcorr_depthwise(x, z)) 91 | 92 | b, c, w, h = res3.shape 93 | memory, res = self.transformer(res3.reshape([b, c, -1]).transpose([2, 0, 1]), 94 | ppres.reshape([b, c, -1]).transpose([2, 0, 1]), 95 | res3.reshape([b, c, -1]).transpose([2, 0, 1])) 96 | 97 | res = res.transpose([1, 2, 0]).reshape([b, c, w, h]) 98 | 99 | loc = self.convloc(res) 100 | acls = self.convcls(res) 101 | 102 | cls1 = self.cls1(acls) 103 | cls2 = self.cls2(acls) 104 | 105 | return loc, cls1, cls2, memory 106 | -------------------------------------------------------------------------------- /pysot/tracker/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__init__.py -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/base_tracker.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/base_tracker.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/base_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/base_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/dsiamrpn_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/dsiamrpn_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siamapn_tracker.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/siamapn_tracker.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siamapn_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/siamapn_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/siamapn_tracker_loop.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/siamapn_tracker_loop.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/tctrack_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/tctrack_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/__pycache__/tctrackplus_tracker.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/tracker/__pycache__/tctrackplus_tracker.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/tracker/base_tracker.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import cv2 4 | import numpy as np 5 | import paddle 6 | 7 | from pysot.core.config import cfg 8 | 9 | 10 | class BaseTracker(object): 11 | """ Base tracker of single object tracking 12 | """ 13 | def init(self, img, bbox): 14 | """ 15 | args: 16 | img(np.ndarray): BGR image 17 | bbox(list): [x, y, width, height] 18 | x, y need to be 0-based 19 | """ 20 | raise NotImplementedError 21 | 22 | def track(self, img): 23 | """ 24 | args: 25 | img(np.ndarray): BGR image 26 | return: 27 | bbox(list):[x, y, width, height] 28 | """ 29 | raise NotImplementedError 30 | 31 | 32 | class SiameseTracker(BaseTracker): 33 | def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans): 34 | """ 35 | args: 36 | im: bgr based image 37 | pos: center position 38 | model_sz: exemplar size 39 | original_sz: original size 40 | avg_chans: channel average 41 | """ 42 | if isinstance(pos, float): 43 | pos = [pos, pos] 44 | sz = original_sz 45 | im_sz = im.shape 46 | c = (original_sz + 1) / 2 47 | # context_xmin = round(pos[0] - c) # py2 and py3 round 48 | context_xmin = np.floor(pos[0] - c + 0.5) 49 | context_xmax = context_xmin + sz - 1 50 | # context_ymin = round(pos[1] - c) 51 | context_ymin = np.floor(pos[1] - c + 0.5) 52 | context_ymax = context_ymin + sz - 1 53 | left_pad = int(max(0., -context_xmin)) 54 | top_pad = int(max(0., -context_ymin)) 55 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 56 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 57 | 58 | context_xmin = context_xmin + left_pad 59 | context_xmax = context_xmax + left_pad 60 | context_ymin = context_ymin + top_pad 61 | context_ymax = context_ymax + top_pad 62 | 63 | r, c, k = im.shape 64 | if any([top_pad, bottom_pad, left_pad, right_pad]): 65 | size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k) 66 | te_im = np.zeros(size, np.uint8) 67 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 68 | if top_pad: 69 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 70 | if bottom_pad: 71 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 72 | if left_pad: 73 | te_im[:, 0:left_pad, :] = avg_chans 74 | if right_pad: 75 | te_im[:, c + left_pad:, :] = avg_chans 76 | im_patch = te_im[int(context_ymin):int(context_ymax + 1), 77 | int(context_xmin):int(context_xmax + 1), :] 78 | else: 79 | im_patch = im[int(context_ymin):int(context_ymax + 1), 80 | int(context_xmin):int(context_xmax + 1), :] 81 | 82 | if not np.array_equal(model_sz, original_sz): 83 | im_patch = cv2.resize(im_patch, (model_sz, model_sz)) 84 | im_patch = im_patch.transpose(2, 0, 1) 85 | im_patch = im_patch[np.newaxis, :, :, :] 86 | im_patch = im_patch.astype(np.float32) 87 | im_patch = paddle.to_tensor(im_patch) 88 | if cfg.CUDA: 89 | im_patch = im_patch.cuda() 90 | return im_patch 91 | -------------------------------------------------------------------------------- /pysot/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__init__.py -------------------------------------------------------------------------------- /pysot/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/average_meter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/average_meter.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/bbox.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/bbox.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/bbox.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/bbox.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/distributed.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/distributed.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/location_grid.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/location_grid.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/log_helper.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/log_helper.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/lr_scheduler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/lr_scheduler.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/model_load.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/model_load.cpython-37.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/model_load.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/model_load.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/__pycache__/xcorr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/pysot/utils/__pycache__/xcorr.cpython-38.pyc -------------------------------------------------------------------------------- /pysot/utils/average_meter.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | class Meter(object): 7 | def __init__(self, name, val, avg): 8 | self.name = name 9 | self.val = val 10 | self.avg = avg 11 | 12 | def __repr__(self): 13 | return "{name}: {val:.6f} ({avg:.6f})".format( 14 | name=self.name, val=self.val, avg=self.avg 15 | ) 16 | 17 | def __format__(self, *tuples, **kwargs): 18 | return self.__repr__() 19 | 20 | 21 | class AverageMeter: 22 | """Computes and stores the average and current value""" 23 | def __init__(self, num=100): 24 | self.num = num 25 | self.reset() 26 | 27 | def reset(self): 28 | self.val = {} 29 | self.sum = {} 30 | self.count = {} 31 | self.history = {} 32 | 33 | def update(self, batch=1, **kwargs): 34 | val = {} 35 | for k in kwargs: 36 | val[k] = kwargs[k] / float(batch) 37 | self.val.update(val) 38 | for k in kwargs: 39 | if k not in self.sum: 40 | self.sum[k] = 0 41 | self.count[k] = 0 42 | self.history[k] = [] 43 | self.sum[k] += kwargs[k] 44 | self.count[k] += batch 45 | for _ in range(batch): 46 | self.history[k].append(val[k]) 47 | 48 | if self.num <= 0: 49 | # < 0, average all 50 | self.history[k] = [] 51 | 52 | # == 0: no average 53 | if self.num == 0: 54 | self.sum[k] = self.val[k] 55 | self.count[k] = 1 56 | 57 | elif len(self.history[k]) > self.num: 58 | pop_num = len(self.history[k]) - self.num 59 | for _ in range(pop_num): 60 | self.sum[k] -= self.history[k][0] 61 | del self.history[k][0] 62 | self.count[k] -= 1 63 | 64 | def __repr__(self): 65 | s = '' 66 | for k in self.sum: 67 | s += self.format_str(k) 68 | return s 69 | 70 | def format_str(self, attr): 71 | return "{name}: {val:.6f} ({avg:.6f}) ".format( 72 | name=attr, 73 | val=float(self.val[attr]), 74 | avg=float(self.sum[attr]) / self.count[attr]) 75 | 76 | def __getattr__(self, attr): 77 | if attr in self.__dict__: 78 | return super(AverageMeter, self).__getattr__(attr) 79 | if attr not in self.sum: 80 | print("invalid key '{}'".format(attr)) 81 | return Meter(attr, 0, 0) 82 | return Meter(attr, self.val[attr], self.avg(attr)) 83 | 84 | def avg(self, attr): 85 | return float(self.sum[attr]) / self.count[attr] 86 | 87 | 88 | if __name__ == '__main__': 89 | avg1 = AverageMeter(10) 90 | avg2 = AverageMeter(0) 91 | avg3 = AverageMeter(-1) 92 | 93 | for i in range(20): 94 | avg1.update(s=i) 95 | avg2.update(s=i) 96 | avg3.update(s=i) 97 | 98 | print('iter {}'.format(i)) 99 | print(avg1.s) 100 | print(avg2.s) 101 | print(avg3.s) 102 | -------------------------------------------------------------------------------- /pysot/utils/bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from collections import namedtuple 7 | 8 | import numpy as np 9 | 10 | 11 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 12 | # alias 13 | BBox = Corner 14 | Center = namedtuple('Center', 'x y w h') 15 | 16 | 17 | def corner2center(corner): 18 | """ convert (x1, y1, x2, y2) to (cx, cy, w, h) 19 | Args: 20 | conrner: Corner or np.array (4*N) 21 | Return: 22 | Center or np.array (4 * N) 23 | """ 24 | if isinstance(corner, Corner): 25 | x1, y1, x2, y2 = corner 26 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 27 | else: 28 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 29 | x = (x1 + x2) * 0.5 30 | y = (y1 + y2) * 0.5 31 | w = x2 - x1 32 | h = y2 - y1 33 | return x, y, w, h 34 | 35 | 36 | def center2corner(center): 37 | """ convert (cx, cy, w, h) to (x1, y1, x2, y2) 38 | Args: 39 | center: Center or np.array (4 * N) 40 | Return: 41 | center or np.array (4 * N) 42 | """ 43 | if isinstance(center, Center): 44 | x, y, w, h = center 45 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 46 | else: 47 | x, y, w, h = center[0], center[1], center[2], center[3] 48 | x1 = x - w * 0.5 49 | y1 = y - h * 0.5 50 | x2 = x + w * 0.5 51 | y2 = y + h * 0.5 52 | return x1, y1, x2, y2 53 | 54 | 55 | def IoU(rect1, rect2): 56 | """ caculate interection over union 57 | Args: 58 | rect1: (x1, y1, x2, y2) 59 | rect2: (x1, y1, x2, y2) 60 | Returns: 61 | iou 62 | """ 63 | # overlap 64 | x1, y1, x2, y2 = rect1[0], rect1[1], rect1[2], rect1[3] 65 | tx1, ty1, tx2, ty2 = rect2[0], rect2[1], rect2[2], rect2[3] ##rect2[0], rect2[1], rect2[2], rect2[3] 66 | 67 | xx1 = np.maximum(tx1, x1) 68 | yy1 = np.maximum(ty1, y1) 69 | xx2 = np.minimum(tx2, x2) 70 | yy2 = np.minimum(ty2, y2) 71 | 72 | ww = np.maximum(0, xx2 - xx1) 73 | hh = np.maximum(0, yy2 - yy1) 74 | 75 | area = (x2-x1) * (y2-y1) 76 | target_a = (tx2-tx1) * (ty2 - ty1) 77 | inter = ww * hh 78 | iou = inter / (area + target_a - inter) 79 | return iou 80 | 81 | 82 | def cxy_wh_2_rect(pos, sz): 83 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 0-index 84 | """ 85 | return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]]) 86 | 87 | 88 | def rect_2_cxy_wh(rect): 89 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 0-index 90 | """ 91 | return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), \ 92 | np.array([rect[2], rect[3]]) 93 | 94 | 95 | def cxy_wh_2_rect1(pos, sz): 96 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 1-index 97 | """ 98 | return np.array([pos[0]-sz[0]/2+1, pos[1]-sz[1]/2+1, sz[0], sz[1]]) 99 | 100 | 101 | def rect1_2_cxy_wh(rect): 102 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 1-index 103 | """ 104 | return np.array([rect[0]+rect[2]/2-1, rect[1]+rect[3]/2-1]), \ 105 | np.array([rect[2], rect[3]]) 106 | 107 | 108 | def get_axis_aligned_bbox(region): 109 | """ convert region to (cx, cy, w, h) that represent by axis aligned box 110 | """ 111 | nv = region.size 112 | if nv == 8: 113 | cx = np.mean(region[0::2]) 114 | cy = np.mean(region[1::2]) 115 | x1 = min(region[0::2]) 116 | x2 = max(region[0::2]) 117 | y1 = min(region[1::2]) 118 | y2 = max(region[1::2]) 119 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * \ 120 | np.linalg.norm(region[2:4] - region[4:6]) 121 | A2 = (x2 - x1) * (y2 - y1) 122 | s = np.sqrt(A1 / A2) 123 | w = s * (x2 - x1) + 1 124 | h = s * (y2 - y1) + 1 125 | else: 126 | x = region[0] 127 | y = region[1] 128 | w = region[2] 129 | h = region[3] 130 | cx = x+w/2 131 | cy = y+h/2 132 | return cx, cy, w, h 133 | 134 | 135 | def get_min_max_bbox(region): 136 | """ convert region to (cx, cy, w, h) that represent by mim-max box 137 | """ 138 | nv = region.size 139 | if nv == 8: 140 | cx = np.mean(region[0::2]) 141 | cy = np.mean(region[1::2]) 142 | x1 = min(region[0::2]) 143 | x2 = max(region[0::2]) 144 | y1 = min(region[1::2]) 145 | y2 = max(region[1::2]) 146 | w = x2 - x1 147 | h = y2 - y1 148 | else: 149 | x = region[0] 150 | y = region[1] 151 | w = region[2] 152 | h = region[3] 153 | cx = x+w/2 154 | cy = y+h/2 155 | return cx, cy, w, h 156 | -------------------------------------------------------------------------------- /pysot/utils/distributed.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import socket 8 | import logging 9 | 10 | import paddle.distributed as dist 11 | import paddle.nn as nn 12 | 13 | from pysot.utils.log_helper import log_once 14 | 15 | logger = logging.getLogger('global') 16 | 17 | 18 | def average_reduce(v): 19 | if get_world_size() == 1: 20 | return v 21 | tensor = paddle.to_tensor([v], dtype='float32') 22 | dist.all_reduce(tensor) 23 | v = tensor.numpy()[0] / get_world_size() 24 | return v 25 | 26 | 27 | class DistModule(nn.Layer): 28 | def __init__(self, module, bn_method=0): 29 | super(DistModule, self).__init__() 30 | self.module = module 31 | self.bn_method = bn_method 32 | if get_world_size() > 1: 33 | broadcast_params(self.module) 34 | else: 35 | self.bn_method = 0 # single process 36 | 37 | def forward(self, *args, **kwargs): 38 | broadcast_buffers(self.module, self.bn_method) 39 | return self.module(*args, **kwargs) 40 | 41 | def train(self, mode=True): 42 | super(DistModule, self).train(mode) 43 | self.module.train(mode) 44 | return self 45 | 46 | 47 | def broadcast_params(model): 48 | """ broadcast model parameters """ 49 | for p in model.parameters(): 50 | dist.broadcast(p) 51 | 52 | 53 | def broadcast_buffers(model, method=0): 54 | """ broadcast model buffers """ 55 | if method == 0: 56 | return 57 | 58 | world_size = get_world_size() 59 | 60 | for b in model.buffers(): 61 | if method == 1: # broadcast from main process 62 | dist.broadcast(b) 63 | elif method == 2: # average 64 | dist.all_reduce(b) 65 | b /= world_size 66 | else: 67 | raise Exception('Invalid buffer broadcast code {}'.format(method)) 68 | 69 | 70 | inited = False 71 | 72 | 73 | def _dist_init(): 74 | ''' 75 | if guess right: 76 | ntasks: world_size (process num) 77 | proc_id: rank 78 | ''' 79 | # rank = int(os.environ['RANK']) 80 | rank = 0 81 | num_gpus = paddle.distributed.get_world_size() 82 | paddle.device.set_device(rank % num_gpus) 83 | dist.init_parallel_env(backend='nccl') 84 | world_size = paddle.distributed.get_world_size() 85 | return rank, world_size 86 | 87 | 88 | def _get_local_ip(): 89 | try: 90 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 91 | s.connect(('8.8.8.8', 80)) 92 | ip = s.getsockname()[0] 93 | finally: 94 | s.close() 95 | return ip 96 | 97 | 98 | def dist_init(): 99 | global rank, world_size, inited 100 | # try: 101 | # rank, world_size = _dist_init() 102 | # except RuntimeError as e: 103 | # if 'public' in e.args[0]: 104 | # logger.info(e) 105 | # logger.info('Warning: use single process') 106 | # rank, world_size = 0, 1 107 | # else: 108 | # raise RuntimeError(*e.args) 109 | rank, world_size = 0, 1 110 | inited = True 111 | return rank, world_size 112 | 113 | 114 | def get_rank(): 115 | if not inited: 116 | raise(Exception('dist not inited')) 117 | return rank 118 | 119 | 120 | def get_world_size(): 121 | if not inited: 122 | raise(Exception('dist not inited')) 123 | return world_size 124 | 125 | 126 | def reduce_gradients(model, _type='sum'): 127 | types = ['sum', 'avg'] 128 | assert _type in types, 'gradients method must be in "{}"'.format(types) 129 | log_once("gradients method is {}".format(_type)) 130 | if get_world_size() > 1: 131 | for param in model.parameters(): 132 | if param.trainable: 133 | dist.all_reduce(param.grad) 134 | if _type == 'avg': 135 | param.grad /= get_world_size() 136 | else: 137 | return None 138 | -------------------------------------------------------------------------------- /pysot/utils/location_grid.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | def compute_locations(features,stride): 3 | h, w = features.shape[-2:] 4 | locations_per_level = compute_locations_per_level( 5 | h, w, stride, 6 | features.place 7 | ) 8 | return locations_per_level 9 | 10 | 11 | def compute_locations_per_level(h, w, stride, place): 12 | shifts_x = paddle.arange( 13 | 0, w * stride, step=stride, 14 | dtype='float32', place=place 15 | ) 16 | shifts_y = paddle.arange( 17 | 0, h * stride, step=stride, 18 | dtype='float32', place=place 19 | ) 20 | shift_y, shift_x = paddle.meshgrid((shifts_y, shifts_x)) 21 | shift_x = shift_x.reshape([-1]) 22 | shift_y = shift_y.reshape([-1]) 23 | # locations = paddle.stack((shift_x, shift_y), axis=1) + stride + 3*stride # (size_z-1)/2*size_z 28 24 | # locations = paddle.stack((shift_x, shift_y), axis=1) + stride 25 | locations = paddle.stack((shift_x, shift_y), axis=1) + 32 #alex:48 // 32 26 | return locations 27 | -------------------------------------------------------------------------------- /pysot/utils/log_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import math 4 | import sys 5 | 6 | 7 | if hasattr(sys, 'frozen'): # support for py2exe 8 | _srcfile = "logging%s__init__%s" % (os.sep, __file__[-4:]) 9 | elif __file__[-4:].lower() in ['.pyc', '.pyo']: 10 | _srcfile = __file__[:-4] + '.py' 11 | else: 12 | _srcfile = __file__ 13 | _srcfile = os.path.normcase(_srcfile) 14 | 15 | logs = set() 16 | 17 | 18 | class Filter: 19 | def __init__(self, flag): 20 | self.flag = flag 21 | 22 | def filter(self, x): 23 | return self.flag 24 | 25 | 26 | class Dummy: 27 | def __init__(self, *arg, **kwargs): 28 | pass 29 | 30 | def __getattr__(self, arg): 31 | def dummy(*args, **kwargs): pass 32 | return dummy 33 | 34 | 35 | def get_format(logger, level): 36 | if 'RANK' in os.environ: 37 | rank = int(os.environ['RANK']) 38 | 39 | if level == logging.INFO: 40 | logger.addFilter(Filter(rank == 0)) 41 | else: 42 | rank = 0 43 | format_str = '[%(asctime)s-rk{}-%(filename)s#%(lineno)3d] %(message)s'.format(rank) 44 | formatter = logging.Formatter(format_str) 45 | return formatter 46 | 47 | 48 | def get_format_custom(logger, level): 49 | if 'RANK' in os.environ: 50 | rank = int(os.environ['RANK']) 51 | if level == logging.INFO: 52 | logger.addFilter(Filter(rank == 0)) 53 | else: 54 | rank = 0 55 | format_str = '[%(asctime)s-rk{}-%(message)s'.format(rank) 56 | formatter = logging.Formatter(format_str) 57 | return formatter 58 | 59 | 60 | def init_log(name, level=logging.INFO, format_func=get_format): 61 | if (name, level) in logs: 62 | return 63 | logs.add((name, level)) 64 | logger = logging.getLogger(name) 65 | logger.setLevel(level) 66 | ch = logging.StreamHandler() 67 | ch.setLevel(level) 68 | formatter = format_func(logger, level) 69 | ch.setFormatter(formatter) 70 | logger.addHandler(ch) 71 | return logger 72 | 73 | 74 | def add_file_handler(name, log_file, level=logging.INFO): 75 | logger = logging.getLogger(name) 76 | fh = logging.FileHandler(log_file) 77 | fh.setFormatter(get_format(logger, level)) 78 | logger.addHandler(fh) 79 | 80 | 81 | init_log('global') 82 | 83 | 84 | def print_speed(i, i_time, n): 85 | """print_speed(index, index_time, total_iteration)""" 86 | logger = logging.getLogger('global') 87 | average_time = i_time 88 | remaining_time = (n - i) * average_time 89 | remaining_day = math.floor(remaining_time / 86400) 90 | remaining_hour = math.floor(remaining_time / 3600 - 91 | remaining_day * 24) 92 | remaining_min = math.floor(remaining_time / 60 - 93 | remaining_day * 1440 - 94 | remaining_hour * 60) 95 | logger.info('Progress: %d / %d [%d%%], Speed: %.3f s/iter, ETA %d:%02d:%02d (D:H:M)\n' % 96 | (i, n, i / n * 100, 97 | average_time, 98 | remaining_day, remaining_hour, remaining_min)) 99 | 100 | 101 | def find_caller(): 102 | def current_frame(): 103 | try: 104 | raise Exception 105 | except: 106 | return sys.exc_info()[2].tb_frame.f_back 107 | 108 | f = current_frame() 109 | if f is not None: 110 | f = f.f_back 111 | rv = "(unknown file)", 0, "(unknown function)" 112 | while hasattr(f, "f_code"): 113 | co = f.f_code 114 | filename = os.path.normcase(co.co_filename) 115 | rv = (co.co_filename, f.f_lineno, co.co_name) 116 | if filename == _srcfile: 117 | f = f.f_back 118 | continue 119 | break 120 | rv = list(rv) 121 | rv[0] = os.path.basename(rv[0]) 122 | return rv 123 | 124 | 125 | class LogOnce: 126 | def __init__(self): 127 | self.logged = set() 128 | self.logger = init_log('log_once', format_func=get_format_custom) 129 | 130 | def log(self, strings): 131 | fn, lineno, caller = find_caller() 132 | key = (fn, lineno, caller, strings) 133 | if key in self.logged: 134 | return 135 | self.logged.add(key) 136 | message = "{filename:s}<{caller}>#{lineno:3d}] {strings}".format( 137 | filename=fn, lineno=lineno, strings=strings, caller=caller) 138 | self.logger.info(message) 139 | 140 | 141 | once_logger = LogOnce() 142 | 143 | 144 | def log_once(strings): 145 | once_logger.log(strings) 146 | 147 | 148 | def main(): 149 | for i, lvl in enumerate([logging.DEBUG, logging.INFO, 150 | logging.WARNING, logging.ERROR, 151 | logging.CRITICAL]): 152 | log_name = str(lvl) 153 | init_log(log_name, lvl) 154 | logger = logging.getLogger(log_name) 155 | print('****cur lvl:{}'.format(lvl)) 156 | logger.debug('debug') 157 | logger.info('info') 158 | logger.warning('warning') 159 | logger.error('error') 160 | logger.critical('critiacal') 161 | 162 | 163 | if __name__ == '__main__': 164 | main() 165 | for i in range(10): 166 | log_once('xxx') 167 | -------------------------------------------------------------------------------- /pysot/utils/misc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import os 4 | import numpy as np 5 | import paddle 6 | 7 | from colorama import Fore, Style 8 | 9 | 10 | __all__ = ['commit', 'describe'] 11 | 12 | 13 | def _exec(cmd): 14 | f = os.popen(cmd, 'r', 1) 15 | return f.read().strip() 16 | 17 | 18 | def _bold(s): 19 | return "\033[1m%s\033[0m" % s 20 | 21 | 22 | def _color(s): 23 | # return f'{Fore.RED}{s}{Style.RESET_ALL}' 24 | return "{}{}{}".format(Fore.RED,s,Style.RESET_ALL) 25 | 26 | 27 | def _describe(model, lines=None, spaces=0): 28 | head = " " * spaces 29 | for name, p in model.named_parameters(): 30 | if '.' in name: 31 | continue 32 | if p.trainable: 33 | name = _color(name) 34 | line = "{head}- {name}".format(head=head, name=name) 35 | lines.append(line) 36 | 37 | for name, m in model.named_sublayers(): 38 | space_num = len(name) + spaces + 1 39 | if m.training: 40 | name = _color(name) 41 | line = "{head}.{name} ({type})".format( 42 | head=head, 43 | name=name, 44 | type=m.__class__.__name__) 45 | lines.append(line) 46 | _describe(m, lines, space_num) 47 | 48 | 49 | def commit(): 50 | root = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')) 51 | cmd = "cd {}; git log | head -n1 | awk '{{print $2}}'".format(root) 52 | commit = _exec(cmd) 53 | cmd = "cd {}; git log --oneline | head -n1".format(root) 54 | commit_log = _exec(cmd) 55 | return "commit : {}\n log : {}".format(commit, commit_log) 56 | 57 | 58 | def describe(net, name=None): 59 | num = 0 60 | lines = [] 61 | if name is not None: 62 | lines.append(name) 63 | num = len(name) 64 | _describe(net, lines, num) 65 | return "\n".join(lines) 66 | 67 | 68 | def bbox_clip(x, min_value, max_value): 69 | new_x = max(min_value, min(x, max_value)) 70 | return new_x 71 | -------------------------------------------------------------------------------- /pysot/utils/model_load.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import logging 4 | 5 | import paddle 6 | 7 | 8 | logger = logging.getLogger('global') 9 | 10 | 11 | def check_keys(model, pretrained_state_dict): 12 | ckpt_keys = set(pretrained_state_dict.keys()) 13 | model_keys = set(model.state_dict().keys()) 14 | used_pretrained_keys = model_keys & ckpt_keys 15 | unused_pretrained_keys = ckpt_keys - model_keys 16 | missing_keys = model_keys - ckpt_keys 17 | # filter 'num_batches_tracked' 18 | missing_keys = [x for x in missing_keys 19 | if not x.endswith('num_batches_tracked')] 20 | if len(missing_keys) > 0: 21 | logger.info('[Warning] missing keys: {}'.format(missing_keys)) 22 | logger.info('missing keys:{}'.format(len(missing_keys))) 23 | if len(unused_pretrained_keys) > 0: 24 | logger.info('[Warning] unused_pretrained_keys: {}'.format( 25 | unused_pretrained_keys)) 26 | logger.info('unused checkpoint keys:{}'.format( 27 | len(unused_pretrained_keys))) 28 | logger.info('used keys:{}'.format(len(used_pretrained_keys))) 29 | assert len(used_pretrained_keys) > 0, \ 30 | 'load NONE from pretrained checkpoint' 31 | return True 32 | 33 | 34 | def remove_prefix(state_dict, prefix): 35 | ''' Old style model is stored with all names of parameters 36 | share common prefix 'module.' ''' 37 | logger.info('remove prefix \'{}\''.format(prefix)) 38 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 39 | return {f(key): value for key, value in state_dict.items()} 40 | 41 | 42 | def load_pretrain(model, pretrained_path): 43 | logger.info('load pretrained model from {}'.format(pretrained_path)) 44 | device = paddle.get_device() 45 | pretrained_dict = paddle.load(pretrained_path) 46 | if "state_dict" in pretrained_dict.keys(): 47 | pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 48 | 'module.') 49 | else: 50 | pretrained_dict = remove_prefix(pretrained_dict, 'module.') 51 | 52 | try: 53 | check_keys(model, pretrained_dict) 54 | except: 55 | logger.info('[Warning]: using pretrain as features.\ 56 | Adding "features." as prefix') 57 | new_dict = {} 58 | for k, v in pretrained_dict.items(): 59 | k = 'features.' + k 60 | new_dict[k] = v 61 | pretrained_dict = new_dict 62 | check_keys(model, pretrained_dict) 63 | model.set_state_dict(pretrained_dict) 64 | return model 65 | 66 | 67 | def restore_from(model, optimizer, ckpt_path): 68 | device = paddle.get_device() 69 | ckpt = paddle.load(ckpt_path) 70 | epoch = ckpt['epoch'] 71 | 72 | ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.') 73 | check_keys(model, ckpt_model_dict) 74 | model.set_state_dict(ckpt_model_dict) 75 | 76 | check_keys(optimizer, ckpt['optimizer']) 77 | optimizer.set_state_dict(ckpt['optimizer']) 78 | return model, optimizer, epoch 79 | -------------------------------------------------------------------------------- /pysot/utils/xcorr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import paddle 4 | import paddle.nn.functional as F 5 | 6 | 7 | def xcorr_slow(x, kernel): 8 | """for loop to calculate cross correlation, slow version 9 | """ 10 | batch = x.shape[0] 11 | out = [] 12 | for i in range(batch): 13 | px = x[i] 14 | pk = kernel[i] 15 | px = px.unsqueeze(0) 16 | pk = pk.unsqueeze(0) 17 | po = F.conv2d(px, pk) 18 | out.append(po) 19 | out = paddle.concat(out, axis=0) 20 | return out 21 | 22 | 23 | def xcorr_fast(x, kernel): 24 | """group conv2d to calculate cross correlation, fast version 25 | """ 26 | batch = kernel.shape[0] 27 | pk = kernel.view(-1, x.shape[1], kernel.shape[2], kernel.shape[3]) 28 | px = x.view(1, -1, x.shape[2], x.shape[3]) 29 | po = F.conv2d(px, pk, groups=batch) 30 | po = po.view(batch, -1, po.shape[2], po.shape[3]) 31 | return po 32 | 33 | 34 | def xcorr_depthwise(x, kernel): 35 | """depthwise cross correlation 36 | """ 37 | batch = kernel.shape[0] 38 | channel = kernel.shape[1] 39 | x = x.view(1, batch*channel, x.shape[2], x.shape[3]) 40 | kernel = kernel.view(batch*channel, 1, kernel.shape[2], kernel.shape[3]) 41 | out = F.conv2d(x, kernel, groups=batch*channel) 42 | out = out.view(batch, channel, out.shape[2], out.shape[3]) 43 | return out 44 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | opencv-python 3 | pyyaml 4 | yacs 5 | tqdm 6 | colorama 7 | matplotlib 8 | cython 9 | tensorboardX 10 | 11 | 12 | -------------------------------------------------------------------------------- /toolkit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/__init__.py -------------------------------------------------------------------------------- /toolkit/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /toolkit/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .uav import UAVDataset 2 | from .dtb import DTBDataset 3 | from .uav10fps import UAV10Dataset 4 | from .uavtrack112_l import UAVTrack112lDataset 5 | from .uavtrack112 import UAVTrack112Dataset 6 | from .otb import OTBDataset 7 | from .lasot import LaSOTDataset 8 | from .got10k import GOT10kDataset 9 | from .visdrone import VISDRONED2018Dataset 10 | 11 | class DatasetFactory(object): 12 | @staticmethod 13 | def create_dataset(**kwargs): 14 | """ 15 | Args: 16 | name: dataset name 'OTB2015', 'LaSOT', 'UAV123', 'NFS240', 'NFS30', 17 | 'VOT2018', 'VOT2016', 'VOT2018-LT' 18 | dataset_root: dataset root 19 | load_img: whether to load image 20 | Return: 21 | dataset 22 | """ 23 | assert 'name' in kwargs, "should provide dataset name" 24 | name = kwargs['name'] 25 | if 'OTB100' in name: 26 | dataset = OTBDataset(**kwargs) 27 | elif 'LaSOT' == name: 28 | dataset = LaSOTDataset(**kwargs) 29 | elif 'GOT' == name: 30 | dataset = GOT10kDataset(**kwargs) 31 | elif 'DTB70' in name: 32 | dataset = DTBDataset(**kwargs) 33 | elif 'UAV123_10fps' in name: 34 | dataset = UAV10Dataset(**kwargs) 35 | elif 'UAVTrack112_l' in name: 36 | dataset = UAVTrack112lDataset(**kwargs) 37 | elif 'UAVTrack112' in name: 38 | dataset = UAVTrack112Dataset(**kwargs) 39 | elif 'UAV123' in name: 40 | dataset = UAVDataset(**kwargs) 41 | elif 'Visdrone2018' in name: 42 | dataset = VISDRONED2018Dataset(**kwargs) 43 | else: 44 | raise Exception("unknown dataset {}".format(kwargs['name'])) 45 | return dataset 46 | -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/dataset.cpython-37.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/dataset.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/dtb.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/dtb.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/got10k.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/got10k.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/lasot.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/lasot.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/otb.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/otb.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uav.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav10fps.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uav10fps.cpython-37.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uav10fps.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav20l.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uav20l.cpython-37.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uav20l.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uav20l.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uavdt.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uavdt.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uavtrack112.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uavtrack112.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/uavtrack112_l.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/uavtrack112_l.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/video.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/video.cpython-37.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/video.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/video.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/visdrone.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/visdrone.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/visdrone1.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/visdrone1.cpython-37.pyc -------------------------------------------------------------------------------- /toolkit/datasets/__pycache__/visdrone1.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/datasets/__pycache__/visdrone1.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | from tqdm import tqdm 2 | 3 | class Dataset(object): 4 | def __init__(self, name, dataset_root): 5 | self.name = name 6 | self.dataset_root = dataset_root 7 | self.videos = None 8 | 9 | def __getitem__(self, idx): 10 | if isinstance(idx, str): 11 | return self.videos[idx] 12 | elif isinstance(idx, int): 13 | return self.videos[sorted(list(self.videos.keys()))[idx]] 14 | 15 | def __len__(self): 16 | return len(self.videos) 17 | 18 | def __iter__(self): 19 | keys = sorted(list(self.videos.keys())) 20 | for key in keys: 21 | yield self.videos[key] 22 | 23 | def set_tracker(self, path, tracker_names): 24 | """ 25 | Args: 26 | path: path to tracker results, 27 | tracker_names: list of tracker name 28 | """ 29 | self.tracker_path = path 30 | self.tracker_names = tracker_names 31 | for video in tqdm(self.videos.values(), 32 | desc='loading tracker result', ncols=100): 33 | video.load_tracker(path, tracker_names) 34 | -------------------------------------------------------------------------------- /toolkit/datasets/lasot.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | from glob import glob 7 | 8 | from .dataset import Dataset 9 | from .video import Video 10 | 11 | class LaSOTVideo(Video): 12 | """ 13 | Args: 14 | name: video name 15 | root: dataset root 16 | video_dir: video directory 17 | init_rect: init rectangle 18 | img_names: image names 19 | gt_rect: groundtruth rectangle 20 | attr: attribute of video 21 | """ 22 | def __init__(self, name, root, video_dir, init_rect, img_names, 23 | gt_rect, attr, absent, load_img=False): 24 | super(LaSOTVideo, self).__init__(name, root, video_dir, 25 | init_rect, img_names, gt_rect, attr, load_img) 26 | self.absent = np.array(absent, np.int8) 27 | 28 | def load_tracker(self, path, tracker_names=None, store=True): 29 | """ 30 | Args: 31 | path(str): path to result 32 | tracker_name(list): name of tracker 33 | """ 34 | if not tracker_names: 35 | tracker_names = [x.split('/')[-1] for x in glob(path) 36 | if os.path.isdir(x)] 37 | if isinstance(tracker_names, str): 38 | tracker_names = [tracker_names] 39 | for name in tracker_names: 40 | traj_file = os.path.join(path, name, self.name+'.txt') 41 | if os.path.exists(traj_file): 42 | with open(traj_file, 'r') as f : 43 | pred_traj = [list(map(float, x.strip().split(','))) 44 | for x in f.readlines()] 45 | else: 46 | print("File not exists: ", traj_file) 47 | if self.name == 'monkey-17': 48 | pred_traj = pred_traj[:len(self.gt_traj)] 49 | if store: 50 | self.pred_trajs[name] = pred_traj 51 | else: 52 | return pred_traj 53 | self.tracker_names = list(self.pred_trajs.keys()) 54 | 55 | class LaSOTDataset(Dataset): 56 | """ 57 | Args: 58 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 59 | dataset_root: dataset root 60 | load_img: wether to load all imgs 61 | """ 62 | def __init__(self, name, dataset_root, load_img=False): 63 | super(LaSOTDataset, self).__init__(name, dataset_root) 64 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 65 | meta_data = json.load(f) 66 | 67 | # load videos 68 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 69 | self.videos = {} 70 | for video in pbar: 71 | pbar.set_postfix_str(video) 72 | self.videos[video] = LaSOTVideo(video, 73 | dataset_root, 74 | meta_data[video]['video_dir'], 75 | meta_data[video]['init_rect'], 76 | meta_data[video]['img_names'], 77 | meta_data[video]['gt_rect'], 78 | meta_data[video]['attr'], 79 | meta_data[video]['absent']) 80 | 81 | # set attr 82 | attr = [] 83 | for x in self.videos.values(): 84 | attr += x.attr 85 | attr = set(attr) 86 | self.attr = {} 87 | self.attr['ALL'] = list(self.videos.keys()) 88 | for x in attr: 89 | self.attr[x] = [] 90 | for k, v in self.videos.items(): 91 | for attr_ in v.attr: 92 | self.attr[attr_].append(k) 93 | -------------------------------------------------------------------------------- /toolkit/datasets/otb.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | class OTBVideo(Video): 14 | """ 15 | Args: 16 | name: video name 17 | root: dataset root 18 | video_dir: video directory 19 | init_rect: init rectangle 20 | img_names: image names 21 | gt_rect: groundtruth rectangle 22 | attr: attribute of video 23 | """ 24 | def __init__(self, name, root, video_dir, init_rect, img_names, 25 | gt_rect, attr, load_img=False): 26 | super(OTBVideo, self).__init__(name, root, video_dir, 27 | init_rect, img_names, gt_rect, attr, load_img) 28 | 29 | def load_tracker(self, path, tracker_names=None, store=True): 30 | """ 31 | Args: 32 | path(str): path to result 33 | tracker_name(list): name of tracker 34 | """ 35 | if not tracker_names: 36 | tracker_names = [x.split('/')[-1] for x in glob(path) 37 | if os.path.isdir(x)] 38 | if isinstance(tracker_names, str): 39 | tracker_names = [tracker_names] 40 | for name in tracker_names: 41 | traj_file = os.path.join(path, name, self.name+'.txt') 42 | if not os.path.exists(traj_file): 43 | if self.name == 'FleetFace': 44 | txt_name = 'fleetface.txt' 45 | elif self.name == 'Jogging-1': 46 | txt_name = 'jogging_1.txt' 47 | elif self.name == 'Jogging-2': 48 | txt_name = 'jogging_2.txt' 49 | elif self.name == 'Skating2-1': 50 | txt_name = 'skating2_1.txt' 51 | elif self.name == 'Skating2-2': 52 | txt_name = 'skating2_2.txt' 53 | elif self.name == 'FaceOcc1': 54 | txt_name = 'faceocc1.txt' 55 | elif self.name == 'FaceOcc2': 56 | txt_name = 'faceocc2.txt' 57 | elif self.name == 'Human4-2': 58 | txt_name = 'human4_2.txt' 59 | else: 60 | txt_name = self.name[0].lower()+self.name[1:]+'.txt' 61 | traj_file = os.path.join(path, name, txt_name) 62 | if os.path.exists(traj_file): 63 | with open(traj_file, 'r') as f : 64 | pred_traj = [list(map(float, x.strip().split(','))) 65 | for x in f.readlines()] 66 | if self.name=='Tiger1': 67 | pred_traj=pred_traj[5:] 68 | if len(pred_traj) != len(self.gt_traj): 69 | print(name, len(pred_traj), len(self.gt_traj), self.name) 70 | if store: 71 | self.pred_trajs[name] = pred_traj 72 | else: 73 | return pred_traj 74 | else: 75 | print(traj_file) 76 | self.tracker_names = list(self.pred_trajs.keys()) 77 | 78 | 79 | 80 | class OTBDataset(Dataset): 81 | """ 82 | Args: 83 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 84 | dataset_root: dataset root 85 | load_img: wether to load all imgs 86 | """ 87 | def __init__(self, name, dataset_root, load_img=False): 88 | super(OTBDataset, self).__init__(name, dataset_root) 89 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 90 | meta_data = json.load(f) 91 | 92 | # load videos 93 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 94 | self.videos = {} 95 | for video in pbar: 96 | pbar.set_postfix_str(video) 97 | self.videos[video] = OTBVideo(video, 98 | dataset_root, 99 | meta_data[video]['video_dir'], 100 | meta_data[video]['init_rect'], 101 | meta_data[video]['img_names'], 102 | meta_data[video]['gt_rect'], 103 | meta_data[video]['attr'], 104 | load_img) 105 | 106 | # set attr 107 | attr = [] 108 | for x in self.videos.values(): 109 | attr += x.attr 110 | attr = set(attr) 111 | self.attr = {} 112 | self.attr['ALL'] = list(self.videos.keys()) 113 | for x in attr: 114 | self.attr[x] = [] 115 | for k, v in self.videos.items(): 116 | for attr_ in v.attr: 117 | self.attr[attr_].append(k) 118 | -------------------------------------------------------------------------------- /toolkit/datasets/uav.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | 4 | from tqdm import tqdm 5 | from glob import glob 6 | 7 | from .dataset import Dataset 8 | from .video import Video 9 | 10 | class UAVVideo(Video): 11 | """ 12 | Args: 13 | name: video name 14 | root: dataset root 15 | video_dir: video directory 16 | init_rect: init rectangle 17 | img_names: image names 18 | gt_rect: groundtruth rectangle 19 | attr: attribute of video 20 | """ 21 | def __init__(self, name, root, video_dir, init_rect, img_names, 22 | gt_rect, attr, load_img=False): 23 | super(UAVVideo, self).__init__(name, root, video_dir, 24 | init_rect, img_names, gt_rect, attr, load_img) 25 | 26 | 27 | class UAVDataset(Dataset): 28 | """ 29 | Args: 30 | name: dataset name, should be 'UAV123', 'UAV20L' 31 | dataset_root: dataset root 32 | load_img: whether to load all imgs 33 | """ 34 | def __init__(self, name, dataset_root, load_img=False): 35 | super(UAVDataset, self).__init__(name, dataset_root) 36 | with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 37 | meta_data = json.load(f) 38 | 39 | # load videos 40 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 41 | self.videos = {} 42 | for video in pbar: 43 | pbar.set_postfix_str(video) 44 | self.videos[video] = UAVVideo(video, 45 | dataset_root, 46 | meta_data[video]['video_dir'], 47 | meta_data[video]['init_rect'], 48 | meta_data[video]['img_names'], 49 | meta_data[video]['gt_rect'], 50 | meta_data[video]['attr']) 51 | 52 | # set attr 53 | attr = [] 54 | for x in self.videos.values(): 55 | attr += x.attr 56 | attr = set(attr) 57 | self.attr = {} 58 | self.attr['ALL'] = list(self.videos.keys()) 59 | for x in attr: 60 | self.attr[x] = [] 61 | for k, v in self.videos.items(): 62 | for attr_ in v.attr: 63 | self.attr[attr_].append(k) 64 | -------------------------------------------------------------------------------- /toolkit/datasets/uav10fps.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | def ca(): 13 | path = './test_dataset/UAV123_10fps' 14 | 15 | name_list = os.listdir(os.path.join(path, 'data_seq')) 16 | name_list.sort() 17 | a = len(name_list) 18 | b = [] 19 | for i in range(a): 20 | b.append(name_list[i]) 21 | c = [] 22 | 23 | for jj in range(a): 24 | imgs = os.path.join(path, 'data_seq', str(name_list[jj])) 25 | txt = os.path.join(path, 'anno', str(name_list[jj]) + '.txt') 26 | bbox = [] 27 | with open(txt, 'r') as f: 28 | file = f.readlines() 29 | li = os.listdir(imgs) 30 | li.sort() 31 | for ii in range(len(file)): 32 | li[ii] = os.path.join(name_list[jj], li[ii]) 33 | 34 | line = file[ii].strip('\n').split(',') 35 | 36 | try: 37 | line[0] = int(line[0]) 38 | except: 39 | line[0] = float(line[0]) 40 | try: 41 | line[1] = int(line[1]) 42 | except: 43 | line[1] = float(line[1]) 44 | try: 45 | line[2] = int(line[2]) 46 | except: 47 | line[2] = float(line[2]) 48 | try: 49 | line[3] = int(line[3]) 50 | except: 51 | line[3] = float(line[3]) 52 | bbox.append(line) 53 | 54 | if len(bbox) != len(li): 55 | print(jj) 56 | c.append({'attr': [], 'gt_rect': bbox, 'img_names': li, 'init_rect': bbox[0], 'video_dir': name_list[jj]}) 57 | 58 | d = dict(zip(b, c)) 59 | 60 | return d 61 | 62 | class UAVVideo(Video): 63 | """ 64 | Args: 65 | name: video name 66 | root: dataset root 67 | video_dir: video directory 68 | init_rect: init rectangle 69 | img_names: image names 70 | gt_rect: groundtruth rectangle 71 | attr: attribute of video 72 | """ 73 | def __init__(self, name, root, video_dir, init_rect, img_names, 74 | gt_rect, attr, load_img=False): 75 | super(UAVVideo, self).__init__(name, root, video_dir, 76 | init_rect, img_names, gt_rect, attr, load_img) 77 | 78 | 79 | class UAV10Dataset(Dataset): 80 | """ 81 | Args: 82 | name: dataset name, should be 'UAV123', 'UAV20L' 83 | dataset_root: dataset root 84 | load_img: whether to load all imgs 85 | """ 86 | def __init__(self, name, dataset_root, load_img=False): 87 | super(UAV10Dataset, self).__init__(name, dataset_root) 88 | meta_data = ca() 89 | 90 | # load videos 91 | pbar = tqdm(meta_data.keys(), desc='loading ' + name, ncols=100) 92 | self.videos = {} 93 | for video in pbar: 94 | pbar.set_postfix_str(video) 95 | self.videos[video] = UAVVideo(video, 96 | dataset_root, 97 | meta_data[video]['video_dir'], 98 | meta_data[video]['init_rect'], 99 | meta_data[video]['img_names'], 100 | meta_data[video]['gt_rect'], 101 | meta_data[video]['attr']) 102 | 103 | # set attr 104 | attr = [] 105 | for x in self.videos.values(): 106 | attr += x.attr 107 | attr = set(attr) 108 | self.attr = {} 109 | self.attr['ALL'] = list(self.videos.keys()) 110 | for x in attr: 111 | self.attr[x] = [] 112 | for k, v in self.videos.items(): 113 | for attr_ in v.attr: 114 | self.attr[attr_].append(k) 115 | -------------------------------------------------------------------------------- /toolkit/datasets/uavtrack112.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | def ca(): 13 | path = '/data/Test_dataset/UAVTrack112' 14 | 15 | name_list = os.listdir(os.path.join(path, 'data_seq')) 16 | name_list.sort() 17 | a = 112 18 | b = [] 19 | for i in range(a): 20 | b.append(name_list[i]) 21 | c = [] 22 | 23 | for jj in range(a): 24 | imgs = os.path.join(path, 'data_seq', str(name_list[jj])) 25 | txt = os.path.join(path, 'anno', str(name_list[jj]) + '.txt') 26 | bbox = [] 27 | with open(txt, 'r') as f: 28 | file = f.readlines() 29 | li = os.listdir(imgs) 30 | li.sort() 31 | for ii in range(len(file)): 32 | li[ii] = os.path.join(name_list[jj], li[ii]) 33 | 34 | line = file[ii].strip('\n').split(',') 35 | if len(line) != 4: 36 | line = file[ii].strip('\n').split(' ') 37 | 38 | try: 39 | line[0] = int(line[0]) 40 | except: 41 | line[0] = float(line[0]) 42 | try: 43 | line[1] = int(line[1]) 44 | except: 45 | line[1] = float(line[1]) 46 | try: 47 | line[2] = int(line[2]) 48 | except: 49 | line[2] = float(line[2]) 50 | try: 51 | line[3] = int(line[3]) 52 | except: 53 | line[3] = float(line[3]) 54 | bbox.append(line) 55 | 56 | if len(bbox) != len(li): 57 | print(jj) 58 | c.append({'attr': [], 'gt_rect': bbox, 'img_names': li, 'init_rect': bbox[0], 'video_dir': name_list[jj]}) 59 | 60 | d = dict(zip(b, c)) 61 | 62 | return d 63 | 64 | class UAVVideo(Video): 65 | """ 66 | Args: 67 | name: video name 68 | root: dataset root 69 | video_dir: video directory 70 | init_rect: init rectangle 71 | img_names: image names 72 | gt_rect: groundtruth rectangle 73 | attr: attribute of video 74 | """ 75 | def __init__(self, name, root, video_dir, init_rect, img_names, 76 | gt_rect, attr, load_img=False): 77 | super(UAVVideo, self).__init__(name, root, video_dir, 78 | init_rect, img_names, gt_rect, attr, load_img) 79 | 80 | 81 | class UAVTrack112Dataset(Dataset): 82 | """ 83 | Args: 84 | name: dataset name, should be 'UAV123', 'UAV20L' 85 | dataset_root: dataset root 86 | load_img: whether to load all imgs 87 | """ 88 | def __init__(self, name, dataset_root, load_img=False): 89 | super(UAVTrack112Dataset, self).__init__(name, dataset_root) 90 | meta_data = ca() 91 | 92 | # load videos 93 | pbar = tqdm(meta_data.keys(), desc='loading ' + name, ncols=100) 94 | self.videos = {} 95 | for video in pbar: 96 | pbar.set_postfix_str(video) 97 | self.videos[video] = UAVVideo(video, 98 | dataset_root, 99 | meta_data[video]['video_dir'], 100 | meta_data[video]['init_rect'], 101 | meta_data[video]['img_names'], 102 | meta_data[video]['gt_rect'], 103 | meta_data[video]['attr']) 104 | 105 | # set attr 106 | attr = [] 107 | for x in self.videos.values(): 108 | attr += x.attr 109 | attr = set(attr) 110 | self.attr = {} 111 | self.attr['ALL'] = list(self.videos.keys()) 112 | for x in attr: 113 | self.attr[x] = [] 114 | for k, v in self.videos.items(): 115 | for attr_ in v.attr: 116 | self.attr[attr_].append(k) 117 | -------------------------------------------------------------------------------- /toolkit/datasets/uavtrack112_l.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | def ca(): 13 | path = './test_dataset/UAVTrack112_l' 14 | namefile = os.listdir(os.path.join(path, 'anno_l')) 15 | namefile.sort() 16 | 17 | name_list = [] 18 | for seqname in namefile: 19 | name_list.append(seqname[:-4]) 20 | 21 | a = len(name_list) 22 | b = [] 23 | for i in range(a): 24 | b.append(name_list[i]) 25 | c = [] 26 | 27 | for jj in range(a): 28 | 29 | imgs = os.path.join(path, 'data_seq', str(name_list[jj])) 30 | txt = os.path.join(path, 'anno', str(name_list[jj]) + '.txt') 31 | bbox = [] 32 | with open(txt, 'r') as f: 33 | file = f.readlines() 34 | li = os.listdir(imgs) 35 | li.sort() 36 | for ii in range(len(file)): 37 | li[ii] = os.path.join(name_list[jj], li[ii]) 38 | 39 | line = file[ii].strip('\n').split(',') 40 | if len(line) != 4: 41 | line = file[ii].strip('\n').split(' ') 42 | 43 | try: 44 | line[0] = int(line[0]) 45 | except: 46 | line[0] = float(line[0]) 47 | try: 48 | line[1] = int(line[1]) 49 | except: 50 | line[1] = float(line[1]) 51 | try: 52 | line[2] = int(line[2]) 53 | except: 54 | line[2] = float(line[2]) 55 | try: 56 | line[3] = int(line[3]) 57 | except: 58 | line[3] = float(line[3]) 59 | bbox.append(line) 60 | 61 | if len(bbox) != len(li): 62 | print(jj) 63 | c.append({'attr': [], 'gt_rect': bbox, 'img_names': li, 'init_rect': bbox[0], 'video_dir': name_list[jj]}) 64 | 65 | d = dict(zip(b, c)) 66 | 67 | return d 68 | 69 | class UAVVideo(Video): 70 | """ 71 | Args: 72 | name: video name 73 | root: dataset root 74 | video_dir: video directory 75 | init_rect: init rectangle 76 | img_names: image names 77 | gt_rect: groundtruth rectangle 78 | attr: attribute of video 79 | """ 80 | def __init__(self, name, root, video_dir, init_rect, img_names, 81 | gt_rect, attr, load_img=False): 82 | super(UAVVideo, self).__init__(name, root, video_dir, 83 | init_rect, img_names, gt_rect, attr, load_img) 84 | 85 | 86 | class UAVTrack112lDataset(Dataset): 87 | """ 88 | Args: 89 | name: dataset name, should be 'UAV123', 'UAV20L' 90 | dataset_root: dataset root 91 | load_img: whether to load all imgs 92 | """ 93 | def __init__(self, name, dataset_root, load_img=False): 94 | super(UAVTrack112lDataset, self).__init__(name, dataset_root) 95 | meta_data = ca() 96 | 97 | # load videos 98 | pbar = tqdm(meta_data.keys(), desc='loading ' + name, ncols=100) 99 | self.videos = {} 100 | for video in pbar: 101 | pbar.set_postfix_str(video) 102 | self.videos[video] = UAVVideo(video, 103 | dataset_root, 104 | meta_data[video]['video_dir'], 105 | meta_data[video]['init_rect'], 106 | meta_data[video]['img_names'], 107 | meta_data[video]['gt_rect'], 108 | meta_data[video]['attr']) 109 | 110 | # set attr 111 | attr = [] 112 | for x in self.videos.values(): 113 | attr += x.attr 114 | attr = set(attr) 115 | self.attr = {} 116 | self.attr['ALL'] = list(self.videos.keys()) 117 | for x in attr: 118 | self.attr[x] = [] 119 | for k, v in self.videos.items(): 120 | for attr_ in v.attr: 121 | self.attr[attr_].append(k) 122 | -------------------------------------------------------------------------------- /toolkit/datasets/video.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import re 4 | import numpy as np 5 | import json 6 | 7 | from glob import glob 8 | 9 | class Video(object): 10 | def __init__(self, name, root, video_dir, init_rect, img_names, 11 | gt_rect, attr, load_img=False): 12 | self.name = name 13 | self.video_dir = video_dir 14 | self.init_rect = init_rect 15 | self.gt_traj = gt_rect 16 | self.attr = attr 17 | self.pred_trajs = {} 18 | self.img_names = [os.path.join(root, x) for x in img_names] 19 | self.imgs = None 20 | 21 | if load_img: 22 | self.load_img() 23 | else: 24 | img = cv2.imread(self.img_names[0]) 25 | assert img is not None, self.img_names[0] 26 | self.width = img.shape[1] 27 | self.height = img.shape[0] 28 | 29 | def load_tracker(self, path, tracker_names=None, store=True): 30 | """ 31 | Args: 32 | path(str): path to result 33 | tracker_name(list): name of tracker 34 | """ 35 | if not tracker_names: 36 | tracker_names = [x.split('/')[-1] for x in glob(path) 37 | if os.path.isdir(x)] 38 | if isinstance(tracker_names, str): 39 | tracker_names = [tracker_names] 40 | for name in tracker_names: 41 | traj_file = os.path.join(path, name, self.name+'.txt') 42 | if os.path.exists(traj_file): 43 | with open(traj_file, 'r') as f : 44 | pred_traj = [list(map(float, x.strip().split(','))) 45 | for x in f.readlines()] 46 | if len(pred_traj) != len(self.gt_traj): 47 | print(name, len(pred_traj), len(self.gt_traj), self.name) 48 | if store: 49 | self.pred_trajs[name] = pred_traj 50 | else: 51 | return pred_traj 52 | else: 53 | print(traj_file) 54 | self.tracker_names = list(self.pred_trajs.keys()) 55 | 56 | def load_img(self): 57 | if self.imgs is None: 58 | self.imgs = [cv2.imread(x) for x in self.img_names] 59 | self.width = self.imgs[0].shape[1] 60 | self.height = self.imgs[0].shape[0] 61 | 62 | def free_img(self): 63 | self.imgs = None 64 | 65 | def __len__(self): 66 | return len(self.img_names) 67 | 68 | def __getitem__(self, idx): 69 | if self.imgs is None: 70 | return cv2.imread(self.img_names[idx]), self.gt_traj[idx] 71 | else: 72 | return self.imgs[idx], self.gt_traj[idx] 73 | 74 | def __iter__(self): 75 | for i in range(len(self.img_names)): 76 | if self.imgs is not None: 77 | yield self.imgs[i], self.gt_traj[i] 78 | else: 79 | yield cv2.imread(self.img_names[i]), self.gt_traj[i] 80 | 81 | def draw_box(self, roi, img, linewidth, color, name=None): 82 | """ 83 | roi: rectangle or polygon 84 | img: numpy array img 85 | linewith: line width of the bbox 86 | """ 87 | if len(roi) > 6 and len(roi) % 2 == 0: 88 | pts = np.array(roi, np.int32).reshape(-1, 1, 2) 89 | color = tuple(map(int, color)) 90 | img = cv2.polylines(img, [pts], True, color, linewidth) 91 | pt = (pts[0, 0, 0], pts[0, 0, 1]-5) 92 | if name: 93 | img = cv2.putText(img, name, pt, cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1) 94 | elif len(roi) == 4: 95 | if not np.isnan(roi[0]): 96 | roi = list(map(int, roi)) 97 | color = tuple(map(int, color)) 98 | img = cv2.rectangle(img, (roi[0], roi[1]), (roi[0]+roi[2], roi[1]+roi[3]), 99 | color, linewidth) 100 | if name: 101 | img = cv2.putText(img, name, (roi[0], roi[1]-5), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, color, 1) 102 | return img 103 | 104 | def show(self, pred_trajs={}, linewidth=2, show_name=False): 105 | """ 106 | pred_trajs: dict of pred_traj, {'tracker_name': list of traj} 107 | pred_traj should contain polygon or rectangle(x, y, width, height) 108 | linewith: line width of the bbox 109 | """ 110 | assert self.imgs is not None 111 | video = [] 112 | cv2.namedWindow(self.name, cv2.WINDOW_NORMAL) 113 | colors = {} 114 | if len(pred_trajs) == 0 and len(self.pred_trajs) > 0: 115 | pred_trajs = self.pred_trajs 116 | for i, (roi, img) in enumerate(zip(self.gt_traj, 117 | self.imgs[self.start_frame:self.end_frame+1])): 118 | img = img.copy() 119 | if len(img.shape) == 2: 120 | img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 121 | else: 122 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 123 | img = self.draw_box(roi, img, linewidth, (0, 255, 0), 124 | 'gt' if show_name else None) 125 | for name, trajs in pred_trajs.items(): 126 | if name not in colors: 127 | color = tuple(np.random.randint(0, 256, 3)) 128 | colors[name] = color 129 | else: 130 | color = colors[name] 131 | img = self.draw_box(trajs[0][i], img, linewidth, color, 132 | name if show_name else None) 133 | cv2.putText(img, str(i+self.start_frame), (5, 20), 134 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (255, 255, 0), 2) 135 | cv2.imshow(self.name, img) 136 | cv2.waitKey(40) 137 | video.append(img.copy()) 138 | return video 139 | -------------------------------------------------------------------------------- /toolkit/datasets/visdrone.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import numpy as np 4 | 5 | from PIL import Image 6 | from tqdm import tqdm 7 | from glob import glob 8 | 9 | from .dataset import Dataset 10 | from .video import Video 11 | 12 | 13 | class UVADTVideo(Video): 14 | """ 15 | Args: 16 | name: video name 17 | root: dataset root 18 | video_dir: video directory 19 | init_rect: init rectangle 20 | img_names: image names 21 | gt_rect: groundtruth rectangle 22 | attr: attribute of video 23 | """ 24 | def __init__(self, name, root, video_dir, init_rect, img_names, 25 | gt_rect, attr, load_img=False): 26 | super(UVADTVideo, self).__init__(name, root, video_dir, 27 | init_rect, img_names, gt_rect, attr, load_img) 28 | 29 | def ca(): 30 | path = '/data1/Test_dataset/VisDrone2018-SOT-test' 31 | 32 | name_list = os.listdir(path + '/sequences') 33 | name_list.sort() 34 | 35 | b = [] 36 | for i in range(len(name_list)): 37 | b.append(name_list[i]) 38 | c = [] 39 | 40 | for jj in range(len(name_list)): 41 | imgs = path + '/sequences/' + str(name_list[jj]) 42 | txt = path + '/annotations/' + str(name_list[jj]) + '.txt' 43 | bbox = [] 44 | f = open(txt) # 返回一个文件对象 45 | file = f.readlines() 46 | li = os.listdir(imgs) 47 | li.sort() 48 | for ii in range(len(file)): 49 | li[ii] = name_list[jj] + '/' + li[ii] 50 | 51 | line = file[ii].strip('\n').split(',') 52 | 53 | try: 54 | line[0] = int(line[0]) 55 | except: 56 | line[0] = float(line[0]) 57 | try: 58 | line[1] = int(line[1]) 59 | except: 60 | line[1] = float(line[1]) 61 | try: 62 | line[2] = int(line[2]) 63 | except: 64 | line[2] = float(line[2]) 65 | try: 66 | line[3] = int(line[3]) 67 | except: 68 | line[3] = float(line[3]) 69 | bbox.append(line) 70 | 71 | if len(bbox) != len(li): 72 | print(jj) 73 | f.close() 74 | c.append({'attr': [], 'gt_rect': bbox, 'img_names': li, 'init_rect': bbox[0], 'video_dir': name_list[jj]}) 75 | 76 | d = dict(zip(b, c)) 77 | 78 | return d 79 | 80 | class VISDRONED2018Dataset(Dataset): 81 | """ 82 | Args: 83 | name: dataset name, should be 'OTB100', 'CVPR13', 'OTB50' 84 | dataset_root: dataset root 85 | load_img: wether to load all imgs 86 | """ 87 | def __init__(self, name, dataset_root, load_img=False): 88 | super(VISDRONED2018Dataset, self).__init__(name, dataset_root) 89 | # with open(os.path.join(dataset_root, name+'.json'), 'r') as f: 90 | # meta_data = json.load(f) 91 | meta_data = ca() 92 | # load videos 93 | pbar = tqdm(meta_data.keys(), desc='loading '+name, ncols=100) 94 | self.videos = {} 95 | for video in pbar: 96 | pbar.set_postfix_str(video) 97 | self.videos[video] = UVADTVideo(video, 98 | dataset_root, 99 | meta_data[video]['video_dir'], 100 | meta_data[video]['init_rect'], 101 | meta_data[video]['img_names'], 102 | meta_data[video]['gt_rect'], 103 | meta_data[video]['attr'], 104 | load_img) 105 | 106 | # set attr 107 | attr = [] 108 | for x in self.videos.values(): 109 | attr += x.attr 110 | attr = set(attr) 111 | self.attr = {} 112 | self.attr['ALL'] = list(self.videos.keys()) 113 | for x in attr: 114 | self.attr[x] = [] 115 | for k, v in self.videos.items(): 116 | for attr_ in v.attr: 117 | self.attr[attr_].append(k) 118 | -------------------------------------------------------------------------------- /toolkit/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .ope_benchmark import OPEBenchmark 2 | -------------------------------------------------------------------------------- /toolkit/evaluation/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/evaluation/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/evaluation/__pycache__/ope_benchmark.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/evaluation/__pycache__/ope_benchmark.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/utils/__pycache__/statistics.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/utils/__pycache__/statistics.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/utils/statistics.py: -------------------------------------------------------------------------------- 1 | """ 2 | @author fangyi.zhang@vipl.ict.ac.cn 3 | """ 4 | import numpy as np 5 | 6 | def overlap_ratio(rect1, rect2): 7 | '''Compute overlap ratio between two rects 8 | Args 9 | rect:2d array of N x [x,y,w,h] 10 | Return: 11 | iou 12 | ''' 13 | # if rect1.ndim==1: 14 | # rect1 = rect1[np.newaxis, :] 15 | # if rect2.ndim==1: 16 | # rect2 = rect2[np.newaxis, :] 17 | left = np.maximum(rect1[:,0], rect2[:,0]) 18 | right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2]) 19 | top = np.maximum(rect1[:,1], rect2[:,1]) 20 | bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3]) 21 | 22 | intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top) 23 | union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect 24 | iou = intersect / union 25 | iou = np.maximum(np.minimum(1, iou), 0) 26 | return iou 27 | 28 | def success_overlap(gt_bb, result_bb, n_frame): 29 | thresholds_overlap = np.arange(0, 1.05, 0.05) 30 | success = np.zeros(len(thresholds_overlap)) 31 | iou = np.ones(len(gt_bb)) * (-1) 32 | # mask = np.sum(gt_bb > 0, axis=1) == 4 #TODO check all dataset 33 | mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2 34 | iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask]) 35 | for i in range(len(thresholds_overlap)): 36 | success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame) 37 | return success 38 | 39 | def success_error(gt_center, result_center, thresholds, n_frame): 40 | # n_frame = len(gt_center) 41 | success = np.zeros(len(thresholds)) 42 | dist = np.ones(len(gt_center)) * (-1) 43 | mask = np.sum(gt_center > 0, axis=1) == 2 44 | dist[mask] = np.sqrt(np.sum( 45 | np.power(gt_center[mask] - result_center[mask], 2), axis=1)) 46 | for i in range(len(thresholds)): 47 | success[i] = np.sum(dist <= thresholds[i]) / float(n_frame) 48 | return success 49 | -------------------------------------------------------------------------------- /toolkit/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .draw_success_precision import draw_success_precision 2 | -------------------------------------------------------------------------------- /toolkit/visualization/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/visualization/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/visualization/__pycache__/draw_success_precision.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/visualization/__pycache__/draw_success_precision.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/visualization/__pycache__/draw_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IzuiZero/TCTrack-main-paddle/87df07a27415b1a0478081874297c0a0486b681a/toolkit/visualization/__pycache__/draw_utils.cpython-38.pyc -------------------------------------------------------------------------------- /toolkit/visualization/draw_success_precision.py: -------------------------------------------------------------------------------- 1 | import paddle 2 | import numpy as np 3 | 4 | from draw_utils import COLOR, LINE_STYLE 5 | 6 | def draw_success_precision(success_ret, name, videos, attr, precision_ret=None, 7 | norm_precision_ret=None, bold_name=None, axis=[0, 1]): 8 | # success plot 9 | fig, ax = plt.subplots() 10 | ax.grid(b=True) 11 | ax.set_aspect(1) 12 | plt.xlabel('Overlap threshold') 13 | plt.ylabel('Success rate') 14 | if attr == 'ALL': 15 | plt.title(r'\textbf{Success plots of OPE on %s}' % (name)) 16 | else: 17 | plt.title(r'\textbf{Success plots of OPE - %s}' % (attr)) 18 | plt.axis([0, 1]+axis) 19 | success = {} 20 | thresholds = np.arange(0, 1.05, 0.05) 21 | for tracker_name in success_ret.keys(): 22 | value = [v for k, v in success_ret[tracker_name].items() if k in videos] 23 | success[tracker_name] = np.mean(value) 24 | for idx, (tracker_name, auc) in \ 25 | enumerate(sorted(success.items(), key=lambda x:x[1], reverse=True)): 26 | if tracker_name == bold_name: 27 | label = r"\textbf{[%.3f] %s}" % (auc, tracker_name) 28 | else: 29 | label = "[%.3f] " % (auc) + tracker_name 30 | value = [v for k, v in success_ret[tracker_name].items() if k in videos] 31 | plt.plot(thresholds, np.mean(value, axis=0), 32 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 33 | ax.legend(loc='lower left', labelspacing=0.2) 34 | ax.autoscale(enable=True, axis='both', tight=True) 35 | xmin, xmax, ymin, ymax = plt.axis() 36 | ax.autoscale(enable=False) 37 | ymax += 0.03 38 | plt.axis([xmin, xmax, ymin, ymax]) 39 | plt.xticks(np.arange(xmin, xmax+0.01, 0.1)) 40 | plt.yticks(np.arange(ymin, ymax, 0.1)) 41 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 42 | plt.show() 43 | 44 | if precision_ret: 45 | # norm precision plot 46 | fig, ax = plt.subplots() 47 | ax.grid(b=True) 48 | ax.set_aspect(50) 49 | plt.xlabel('Location error threshold') 50 | plt.ylabel('Precision') 51 | if attr == 'ALL': 52 | plt.title(r'\textbf{Precision plots of OPE on %s}' % (name)) 53 | else: 54 | plt.title(r'\textbf{Precision plots of OPE - %s}' % (attr)) 55 | plt.axis([0, 50]+axis) 56 | precision = {} 57 | thresholds = np.arange(0, 51, 1) 58 | for tracker_name in precision_ret.keys(): 59 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos] 60 | precision[tracker_name] = np.mean(value, axis=0)[20] 61 | for idx, (tracker_name, pre) in \ 62 | enumerate(sorted(precision.items(), key=lambda x:x[1], reverse=True)): 63 | if tracker_name == bold_name: 64 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name) 65 | else: 66 | label = "[%.3f] " % (pre) + tracker_name 67 | value = [v for k, v in precision_ret[tracker_name].items() if k in videos] 68 | plt.plot(thresholds, np.mean(value, axis=0), 69 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 70 | ax.legend(loc='lower right', labelspacing=0.2) 71 | ax.autoscale(enable=True, axis='both', tight=True) 72 | xmin, xmax, ymin, ymax = plt.axis() 73 | ax.autoscale(enable=False) 74 | ymax += 0.03 75 | plt.axis([xmin, xmax, ymin, ymax]) 76 | plt.xticks(np.arange(xmin, xmax+0.01, 5)) 77 | plt.yticks(np.arange(ymin, ymax, 0.1)) 78 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 79 | plt.show() 80 | 81 | # norm precision plot 82 | if norm_precision_ret: 83 | fig, ax = plt.subplots() 84 | ax.grid(b=True) 85 | plt.xlabel('Location error threshold') 86 | plt.ylabel('Precision') 87 | if attr == 'ALL': 88 | plt.title(r'\textbf{Normalized Precision plots of OPE on %s}' % (name)) 89 | else: 90 | plt.title(r'\textbf{Normalized Precision plots of OPE - %s}' % (attr)) 91 | norm_precision = {} 92 | thresholds = np.arange(0, 51, 1) / 100 93 | for tracker_name in precision_ret.keys(): 94 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos] 95 | norm_precision[tracker_name] = np.mean(value, axis=0)[20] 96 | for idx, (tracker_name, pre) in \ 97 | enumerate(sorted(norm_precision.items(), key=lambda x:x[1], reverse=True)): 98 | if tracker_name == bold_name: 99 | label = r"\textbf{[%.3f] %s}" % (pre, tracker_name) 100 | else: 101 | label = "[%.3f] " % (pre) + tracker_name 102 | value = [v for k, v in norm_precision_ret[tracker_name].items() if k in videos] 103 | plt.plot(thresholds, np.mean(value, axis=0), 104 | color=COLOR[idx], linestyle=LINE_STYLE[idx],label=label, linewidth=2) 105 | ax.legend(loc='lower right', labelspacing=0.2) 106 | ax.autoscale(enable=True, axis='both', tight=True) 107 | xmin, xmax, ymin, ymax = plt.axis() 108 | ax.autoscale(enable=False) 109 | ymax += 0.03 110 | plt.axis([xmin, xmax, ymin, ymax]) 111 | plt.xticks(np.arange(xmin, xmax+0.01, 0.05)) 112 | plt.yticks(np.arange(ymin, ymax, 0.1)) 113 | ax.set_aspect((xmax - xmin)/(ymax-ymin)) 114 | plt.show() 115 | -------------------------------------------------------------------------------- /toolkit/visualization/draw_utils.py: -------------------------------------------------------------------------------- 1 | 2 | COLOR = ((1, 0, 0), 3 | (0, 1, 0), 4 | (1, 0, 1), 5 | (1, 1, 0), 6 | (0 , 162/255, 232/255), 7 | (0.5, 0.5, 0.5), 8 | (0, 0, 1), 9 | (0, 1, 1), 10 | (136/255, 0 , 21/255), 11 | (255/255, 127/255, 39/255), 12 | (0, 0, 0)) 13 | 14 | LINE_STYLE = ['-', '--', ':', '-', '--', ':', '-', '--', ':', '-'] 15 | 16 | MARKER_STYLE = ['o', 'v', '<', '*', 'D', 'x', '.', 'x', '<', '.'] 17 | -------------------------------------------------------------------------------- /tools/demo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import paddle 4 | from glob import glob 5 | from paddle.vision.models import resnet50 6 | 7 | from pysot.core.config import cfg 8 | from pysot.models.utile.model_builder import ModelBuilder 9 | from pysot.tracker.tctrack_tracker import TCTrackTracker 10 | from pysot.utils.model_load import load_pretrain 11 | 12 | paddle.set_device('cpu') 13 | 14 | # Mock argparse.ArgumentParser for demo 15 | class Args: 16 | def __init__(self): 17 | self.config = '../experiments/TCTrack/config.yaml' 18 | self.snapshot = './snapshot/tctrack.pdparams' 19 | self.video_name = '../test_dataset/sequence_name' 20 | 21 | args = Args() 22 | 23 | def get_frames(video_name): 24 | if not video_name: 25 | cap = cv2.VideoCapture(0) 26 | 27 | # warmup 28 | for i in range(5): 29 | cap.read() 30 | while True: 31 | ret, frame = cap.read() 32 | if ret: 33 | yield frame 34 | else: 35 | break 36 | elif video_name.endswith('avi') or \ 37 | video_name.endswith('mp4'): 38 | cap = cv2.VideoCapture(args.video_name) 39 | while True: 40 | ret, frame = cap.read() 41 | if ret: 42 | yield frame 43 | else: 44 | break 45 | else: 46 | images = sorted(glob(os.path.join(video_name, 'img', '*.jp*'))) 47 | for img in images: 48 | frame = cv2.imread(img) 49 | yield frame 50 | 51 | def main(): 52 | # load config 53 | cfg.merge_from_file(args.config) 54 | cfg.CUDA = paddle.device.is_compiled_with_cuda() 55 | device = paddle.device.get_device() 56 | 57 | # create model 58 | model = ModelBuilder('test') 59 | 60 | # load model 61 | model = load_pretrain(model, args.snapshot).eval().to(device) 62 | 63 | # build tracker 64 | tracker = TCTrackTracker(model) 65 | hp = [cfg.TRACK.PENALTY_K, cfg.TRACK.WINDOW_INFLUENCE, cfg.TRACK.LR] 66 | 67 | first_frame = True 68 | if args.video_name: 69 | video_name = args.video_name.split('/')[-1].split('.')[0] 70 | else: 71 | video_name = 'webcam' 72 | cv2.namedWindow(video_name, cv2.WND_PROP_FULLSCREEN) 73 | for frame in get_frames(args.video_name): 74 | if first_frame: 75 | try: 76 | init_rect = cv2.selectROI(video_name, frame, False, False) 77 | except: 78 | exit() 79 | tracker.init(frame, init_rect) 80 | first_frame = False 81 | else: 82 | outputs = tracker.track(frame, hp) 83 | bbox = list(map(int, outputs['bbox'])) 84 | cv2.rectangle(frame, (bbox[0], bbox[1]), 85 | (bbox[0]+bbox[2], bbox[1]+bbox[3]), 86 | (0, 255, 0), 3) 87 | cv2.imshow(video_name, frame) 88 | cv2.waitKey(40) 89 | 90 | if __name__ == '__main__': 91 | main() 92 | -------------------------------------------------------------------------------- /tools/eval.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import argparse 5 | from glob import glob 6 | from tqdm import tqdm 7 | from multiprocessing import Pool 8 | from toolkit.datasets import OTBDataset, LaSOTDataset, UAVTrack112Dataset, UAV10Dataset, UAVDataset, DTB70Dataset, UAVTrack112lDataset 9 | from toolkit.evaluation import OPEBenchmark 10 | from toolkit.visualization import draw_success_precision 11 | 12 | if __name__ == '__main__': 13 | parser = argparse.ArgumentParser(description='Single Object Tracking Evaluation') 14 | parser.add_argument('--dataset_dir', default='', type=str, help='dataset root directory') 15 | parser.add_argument('--dataset', default='OTB100', type=str, help='dataset name') 16 | parser.add_argument('--tracker_result_dir', default='', type=str, help='tracker result root') 17 | parser.add_argument('--trackers', default='general_model', nargs='+') 18 | parser.add_argument('--vis', default='', dest='vis', action='store_true') 19 | parser.add_argument('--show_video_level', default='', dest='show_video_level', action='store_true') 20 | parser.add_argument('--num', default=1, type=int, help='number of processes to eval') 21 | args = parser.parse_args() 22 | 23 | tracker_dir = os.path.join(args.tracker_path, args.dataset) 24 | trackers = glob(os.path.join(args.tracker_path, 25 | args.dataset, 26 | args.tracker_prefix+'*')) 27 | trackers = [x.split('/')[-1] for x in trackers] 28 | 29 | root = os.path.realpath(os.path.join(os.path.dirname(__file__), '../testing_dataset')) 30 | root = os.path.join(root, args.dataset) 31 | 32 | trackers = args.tracker_prefix 33 | 34 | assert len(trackers) > 0 35 | args.num = min(args.num, len(trackers)) 36 | 37 | if 'UAV123_10fps' in args.dataset: 38 | dataset = UAV10Dataset(args.dataset, root) 39 | elif 'UAV123' in args.dataset: 40 | dataset = UAVDataset(args.dataset, root) 41 | elif 'OTB100' in args.dataset: 42 | dataset = OTBDataset(args.dataset, root) 43 | elif 'LaSOT' in args.dataset: 44 | dataset = LaSOTDataset(args.dataset, root) 45 | elif 'DTB70' in args.dataset: 46 | dataset = DTB70Dataset(args.dataset, root) 47 | elif 'UAVTrack112_l' in args.dataset: 48 | dataset = UAVTrack112lDataset(args.dataset, root) 49 | elif 'UAVTrack112' in args.dataset: 50 | dataset = UAVTrack112Dataset(args.dataset, root) 51 | else: 52 | print('benchmark error') 53 | exit() 54 | 55 | dataset.set_tracker(tracker_dir, trackers) 56 | benchmark = OPEBenchmark(dataset) 57 | success_ret = {} 58 | precision_ret = {} 59 | 60 | with Pool(processes=args.num) as pool: 61 | for ret in tqdm(pool.imap_unordered(benchmark.eval_success, trackers), desc='eval success', total=len(trackers), ncols=18): 62 | success_ret.update(ret) 63 | 64 | with Pool(processes=args.num) as pool: 65 | for ret in tqdm(pool.imap_unordered(benchmark.eval_precision, trackers), desc='eval precision', total=len(trackers), ncols=18): 66 | precision_ret.update(ret) 67 | 68 | benchmark.show_result(success_ret, precision_ret, show_video_level=args.show_video_level) 69 | 70 | if args.vis: 71 | for attr, videos in dataset.attr.items(): 72 | draw_success_precision(success_ret, 73 | name=dataset.name, 74 | videos=videos, 75 | attr=attr, 76 | precision_ret=precision_ret) 77 | -------------------------------------------------------------------------------- /tools/rt_eva.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import pickle 3 | from os.path import join, isfile 4 | import numpy as np 5 | import sys 6 | import os 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--raw_root', default='./tools/results_rt_raw/OTB100/', type=str, 11 | help='raw result root') 12 | parser.add_argument('--tar_root', default='./tools/results_rt/OTB100', type=str, 13 | help='target result root') 14 | parser.add_argument('--gtroot', default='./test_dataset/OTB100', type=str) 15 | parser.add_argument('--fps', type=float, default=30) 16 | parser.add_argument('--eta', type=float, default=0, help='eta >= -1') 17 | parser.add_argument('--overwrite', action='store_true', default=False) 18 | 19 | args = parser.parse_args() 20 | return args 21 | 22 | def main(): 23 | args = parse_args() 24 | trackers = os.listdir(args.raw_root) 25 | gt_path = args.gtroot 26 | if 'DTB70' in gt_path: 27 | seqs = os.listdir(gt_path) 28 | gt_list = [] 29 | for seq in seqs: 30 | gt_list.append(os.path.join(gt_path, seq, 'groundtruth_rect.txt')) 31 | elif 'OTB' in gt_path: 32 | seqs = os.listdir(gt_path) 33 | gt_list = [] 34 | for seq in seqs: 35 | if seq == 'CVPR13.json' or seq == 'OTB50.json' or seq == 'OTB100.json': 36 | continue 37 | gt_list.append(os.path.join(gt_path, seq, 'groundtruth_rect.txt')) 38 | else: 39 | gt_list = os.listdir(gt_path) 40 | gt_list = [os.path.join(gt_path, i) for i in os.listdir(gt_path) if i.endswith('.txt')] 41 | 42 | for tracker in trackers: 43 | ra_path = join(args.raw_root, tracker) 44 | ou_path = join(args.tar_root, tracker) 45 | if os.path.isdir(ou_path): 46 | continue 47 | mismatch = 0 48 | fps_a = [] 49 | for gt_idx, video in enumerate(gt_list): 50 | name = video.split('/')[-1][0:-4] 51 | name_rt = name[0:-3] 52 | if 'DTB70' in gt_path: 53 | name = video.split('/')[-2] 54 | name_rt = name 55 | elif 'OTB100' in gt_path: 56 | name = video.split('/')[-2] 57 | name_rt = name 58 | print('Pairing {:s} output with the ground truth ({:d}/{:d}): {:s}'.format(tracker, len(gt_list), gt_idx, name)) 59 | results = pickle.load(open(join(ra_path, name + '.pkl'), 'rb')) 60 | gtlen = len(open(join(video)).readlines()) 61 | results_raw = results.get('results_raw', None) 62 | timestamps = results['timestamps'] 63 | timestamps[0] = 0 64 | input_fidx = results['input_fidx'] 65 | run_time = results['runtime'] 66 | fps_a.append(len(run_time) / sum(run_time)) 67 | tidx_p1 = 0 68 | pred_bboxes = [] 69 | for idx in range(gtlen): 70 | t = (idx - args.eta) / args.fps 71 | while tidx_p1 < len(timestamps) and timestamps[tidx_p1] <= t: 72 | tidx_p1 += 1 73 | tidx = tidx_p1 - 1 74 | ifidx = input_fidx[tidx] 75 | mismatch += idx - ifidx 76 | pred_bboxes.append(results_raw[tidx]) 77 | if not os.path.isdir(ou_path): 78 | os.makedirs(ou_path) 79 | result_path = join(ou_path, '{}.txt'.format(name)) 80 | with open(result_path, 'w') as f: 81 | for x in pred_bboxes: 82 | f.write(','.join([str(i) for i in x]) + '\n') 83 | fps_path = join(ou_path, '{}.txt'.format('Speed')) 84 | with open(fps_path, 'w') as f: 85 | f.write(str(sum(fps_a) / len(fps_a))) 86 | 87 | if __name__ == '__main__': 88 | main() 89 | -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import argparse 4 | import os 5 | import cv2 6 | import numpy as np 7 | import paddle 8 | from paddle.vision.models import resnet50 9 | 10 | from pysot.core.config import cfg 11 | from pysot.models.utile_tctrack.model_builder import ModelBuilder_tctrack 12 | from pysot.models.utile_tctrackplus.model_builder import ModelBuilder_tctrackplus 13 | from pysot.tracker.tctrack_tracker import TCTrackTracker 14 | from pysot.tracker.tctrackplus_tracker import TCTrackplusTracker 15 | from pysot.utils.bbox import get_axis_aligned_bbox 16 | from pysot.utils.model_load import load_pretrain 17 | from toolkit.datasets import DatasetFactory 18 | 19 | parser = argparse.ArgumentParser(description='TCTrack tracking') 20 | parser.add_argument('--dataset', default='OTB100', type=str, 21 | help='datasets') 22 | parser.add_argument('--tracker_name', default='TCTrack', type=str, 23 | help='tracker name') 24 | parser.add_argument('--snapshot', default='./tools/snapshot/checkpoint00_e88.pth', type=str, 25 | help='snapshot of models to eval') 26 | parser.add_argument('--video', default='', type=str, 27 | help='eval one special video') 28 | parser.add_argument('--vis', default='', action='store_true', 29 | help='whether visualzie result') 30 | args = parser.parse_args() 31 | 32 | def main(): 33 | paddle.set_device('gpu') 34 | # load config 35 | if args.tracker_name == "TCTrack": 36 | if args.dataset in ['UAV123', 'UAV123_10fps', 'DTB70']: 37 | cfg.merge_from_file(os.path.join('./experiments', args.tracker_name, 'config.yaml')) 38 | else: 39 | cfg.merge_from_file(os.path.join('./experiments', args.tracker_name, 'config_l.yaml')) 40 | # create model 41 | model = ModelBuilder_tctrack('test') 42 | 43 | # load model 44 | model = load_pretrain(model, args.snapshot).cuda().eval() 45 | 46 | # build tracker 47 | tracker = TCTrackTracker(model) 48 | hp = [cfg.TRACK.PENALTY_K, cfg.TRACK.WINDOW_INFLUENCE, cfg.TRACK.LR] 49 | 50 | elif args.tracker_name == "TCTrack++": 51 | cfg.merge_from_file(os.path.join('./experiments', args.tracker_name, 'config.yaml')) 52 | # create model 53 | model = ModelBuilder_tctrackplus('test') 54 | 55 | # load model 56 | model = load_pretrain(model, args.snapshot).cuda().eval() 57 | 58 | # build tracker 59 | tracker = TCTrackplusTracker(model) 60 | hp = getattr(cfg.HP_SEARCH_TCTrackpp_offline, args.dataset) 61 | 62 | else: 63 | print('No such tracker') 64 | 65 | cur_dir = os.path.dirname(os.path.realpath(__file__)) 66 | 67 | dataset_root = os.path.join(cur_dir, '../test_dataset', args.dataset) 68 | 69 | # create dataset 70 | dataset = DatasetFactory.create_dataset(name=args.dataset, 71 | dataset_root=dataset_root, 72 | load_img=False) 73 | 74 | model_name = args.tracker_name 75 | 76 | for v_idx, video in enumerate(dataset): 77 | if args.video != '': 78 | # test one special video 79 | if video.name != args.video: 80 | continue 81 | toc = 0 82 | pred_bboxes = [] 83 | scores = [] 84 | track_times = [] 85 | for idx, (img, gt_bbox) in enumerate(video): 86 | tic = cv2.getTickCount() 87 | if idx == 0: 88 | cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) 89 | gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] 90 | tracker.init(img, gt_bbox_) 91 | pred_bbox = gt_bbox_ 92 | scores.append(None) 93 | if 'VOT2018-LT' == args.dataset: 94 | pred_bboxes.append([1]) 95 | else: 96 | pred_bboxes.append(pred_bbox) 97 | else: 98 | outputs = tracker.track(img, hp) 99 | pred_bbox = outputs['bbox'] 100 | pred_bboxes.append(pred_bbox) 101 | scores.append(outputs['best_score']) 102 | toc += cv2.getTickCount() - tic 103 | track_times.append((cv2.getTickCount() - tic) / cv2.getTickFrequency()) 104 | if idx == 0: 105 | cv2.destroyAllWindows() 106 | if args.vis and idx > 0: 107 | gt_bbox = list(map(int, gt_bbox)) 108 | pred_bbox = list(map(int, pred_bbox)) 109 | cv2.rectangle(img, (gt_bbox[0], gt_bbox[1]), 110 | (gt_bbox[0] + gt_bbox[2], gt_bbox[1] + gt_bbox[3]), (0, 255, 0), 3) 111 | cv2.rectangle(img, (pred_bbox[0], pred_bbox[1]), 112 | (pred_bbox[0] + pred_bbox[2], pred_bbox[1] + pred_bbox[3]), (0, 255, 255), 3) 113 | cv2.putText(img, str(idx), (40, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2) 114 | cv2.imshow(video.name, img) 115 | cv2.waitKey(1) 116 | toc /= cv2.getTickFrequency() 117 | # save results 118 | 119 | model_path = os.path.join('results', args.dataset, model_name) 120 | if not os.path.isdir(model_path): 121 | os.makedirs(model_path) 122 | result_path = os.path.join(model_path, '{}.txt'.format(video.name)) 123 | with open(result_path, 'w') as f: 124 | for x in pred_bboxes: 125 | f.write(','.join([str(i) for i in x]) + '\n') 126 | print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( 127 | v_idx + 1, video.name, toc, idx / toc)) 128 | 129 | if __name__ == '__main__': 130 | main() 131 | -------------------------------------------------------------------------------- /tools/test_rt.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | import argparse 4 | import os 5 | import cv2 6 | import numpy as np 7 | import paddle 8 | from paddle.vision.models import resnet50 9 | import pickle 10 | from time import perf_counter 11 | 12 | from pysot.core.config import cfg 13 | from pysot.models.utile_tctrackplus.model_builder import ModelBuilder_tctrackplus 14 | from pysot.tracker.tctrackplus_tracker import TCTrackplusTracker 15 | from pysot.utils.bbox import get_axis_aligned_bbox 16 | from pysot.utils.model_load import load_pretrain 17 | from toolkit.datasets import DatasetFactory 18 | 19 | parser = argparse.ArgumentParser(description='siamrpn tracking') 20 | parser.add_argument('--dataset', default='OTB100', type=str, 21 | help='datasets') 22 | parser.add_argument('--fps', default=30, type=int, 23 | help='input frame rate') 24 | parser.add_argument('--snapshot', default='./tools/snapshot/checkpoint00_e88.pth', type=str, 25 | help='snapshot of models to eval') 26 | parser.add_argument('--tracker_name', default='TCTrack++', type=str, 27 | help='snapshot of models to eval') 28 | parser.add_argument('--video', default='', type=str, 29 | help='eval one special video') 30 | parser.add_argument('--vis', default=False, action='store_true', 31 | help='whether visualzie result') 32 | parser.add_argument('--overwrite', default=True, action='store_true', 33 | help='whether to overwrite existing results') 34 | args = parser.parse_args() 35 | 36 | def main(): 37 | paddle.set_device('gpu') 38 | # load config 39 | cfg.merge_from_file(os.path.join('./experiments', args.tracker_name, 'config.yaml')) 40 | # create model 41 | model = ModelBuilder_tctrackplus('test') 42 | 43 | # load model 44 | model = load_pretrain(model, args.snapshot).cuda().eval() 45 | 46 | # build tracker 47 | tracker = TCTrackplusTracker(model) 48 | hp = getattr(cfg.HP_SEARCH_TCTrackpp_offline, args.dataset) 49 | 50 | cur_dir = os.path.dirname(os.path.realpath(__file__)) 51 | 52 | dataset_root = os.path.join(cur_dir, '../test_dataset', args.dataset) 53 | 54 | # create dataset 55 | dataset = DatasetFactory.create_dataset(name=args.dataset, 56 | dataset_root=dataset_root, 57 | load_img=False) 58 | 59 | model_name = args.tracker_name 60 | torch.cuda.synchronize() 61 | 62 | # OPE tracking 63 | for v_idx, video in enumerate(dataset): 64 | # if v_idx>40: 65 | 66 | if args.video != '': 67 | # test one special video 68 | if video.name != args.video: 69 | continue 70 | o_path = os.path.join('results_rt_raw', args.dataset, model_name) 71 | if not os.path.isdir(o_path): 72 | os.makedirs(o_path) 73 | out_path = os.path.join('results_rt_raw', args.dataset, model_name, video.name + '.pkl') 74 | if os.path.isfile(out_path): 75 | continue 76 | toc = 0 77 | pred_bboxes = [] 78 | video.load_img() 79 | scores = [] 80 | track_times = [] 81 | input_fidx = [] 82 | runtime = [] 83 | timestamps = [] 84 | last_fidx = None 85 | n_frame = len(video) 86 | t_total = n_frame / args.fps 87 | t_start = perf_counter() 88 | while 1: 89 | t1 = perf_counter() 90 | t_elapsed = t1 - t_start 91 | if t_elapsed > t_total: 92 | break 93 | # identify latest available frame 94 | fidx_continous = t_elapsed * args.fps 95 | fidx = int(np.floor(fidx_continous)) 96 | # if the tracker finishes current frame before next frame comes, continue 97 | if fidx == last_fidx: 98 | continue 99 | last_fidx = fidx 100 | tic = cv2.getTickCount() 101 | img, gt_bbox = video[fidx] 102 | if fidx == 0: 103 | cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) 104 | gt_bbox_ = [cx - (w - 1) / 2, cy - (h - 1) / 2, w, h] 105 | tracker.init(img, gt_bbox_) 106 | paddle.device.cuda.synchronize() 107 | t2 = perf_counter() 108 | t_elapsed = t2 - t_start 109 | timestamps.append(t_elapsed) 110 | runtime.append(t2 - t1) 111 | pred_bbox = gt_bbox_ 112 | scores.append(None) 113 | pred_bboxes.append(pred_bbox) 114 | input_fidx.append(fidx) 115 | else: 116 | outputs = tracker.track(img, hp) 117 | paddle.device.cuda.synchronize() 118 | t2 = perf_counter() 119 | t_elapsed = t2 - t_start 120 | timestamps.append(t_elapsed) 121 | runtime.append(t2 - t1) 122 | pred_bbox = outputs['bbox'] 123 | pred_bboxes.append(pred_bbox) 124 | scores.append(outputs['best_score']) 125 | input_fidx.append(fidx) 126 | if t_elapsed > t_total: 127 | break 128 | toc += cv2.getTickCount() - tic 129 | track_times.append((cv2.getTickCount() - tic) / cv2.getTickFrequency()) 130 | 131 | # save results and run time 132 | if args.overwrite or not os.path.isfile(out_path): 133 | pickle.dump({ 134 | 'results_raw': pred_bboxes, 135 | 'timestamps': timestamps, 136 | 'input_fidx': input_fidx, 137 | 'runtime': runtime, 138 | }, open(out_path, 'wb')) 139 | toc /= cv2.getTickFrequency() 140 | video.free_img() 141 | 142 | print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( 143 | v_idx + 1, video.name, toc, len(runtime) / sum(runtime))) 144 | 145 | 146 | if __name__ == '__main__': 147 | main() 148 | -------------------------------------------------------------------------------- /train_dataset/got10k/gen_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | from os.path import join, exists 3 | import os 4 | import pandas as pd 5 | 6 | dataset_path = 'data' 7 | train_sets = ['GOT-10k_Train_split_01','GOT-10k_Train_split_02','GOT-10k_Train_split_03','GOT-10k_Train_split_04', 8 | 'GOT-10k_Train_split_05','GOT-10k_Train_split_06','GOT-10k_Train_split_07','GOT-10k_Train_split_08', 9 | 'GOT-10k_Train_split_09','GOT-10k_Train_split_10','GOT-10k_Train_split_11','GOT-10k_Train_split_12', 10 | 'GOT-10k_Train_split_13','GOT-10k_Train_split_14','GOT-10k_Train_split_15','GOT-10k_Train_split_16', 11 | 'GOT-10k_Train_split_17','GOT-10k_Train_split_18','GOT-10k_Train_split_19'] 12 | val_set = ['val'] 13 | d_sets = {'videos_val':val_set,'videos_train':train_sets} 14 | 15 | 16 | def parse_and_sched(dl_dir='.'): 17 | js = {} 18 | for d_set in d_sets: 19 | for dataset in d_sets[d_set]: 20 | videos = os.listdir(os.path.join(dataset_path,dataset)) 21 | for video in videos: 22 | if video == 'list.txt': 23 | continue 24 | video = dataset+'/'+video 25 | gt_path = join(dataset_path, video, 'groundtruth.txt') 26 | with open(gt_path, 'r') as f: 27 | groundtruth = f.readlines() 28 | for idx, gt_line in enumerate(groundtruth): 29 | gt_image = gt_line.strip().split(',') 30 | frame = '%06d' % (int(idx)) 31 | obj = '%02d' % (int(0)) 32 | bbox = [int(float(gt_image[0])), int(float(gt_image[1])), 33 | int(float(gt_image[0])) + int(float(gt_image[2])), 34 | int(float(gt_image[1])) + int(float(gt_image[3]))] # xmin,ymin,xmax,ymax 35 | 36 | if video not in js: 37 | js[video] = {} 38 | if obj not in js[video]: 39 | js[video][obj] = {} 40 | js[video][obj][frame] = bbox 41 | if 'videos_val' == d_set: 42 | with open('val.json', 'w') as json_file: 43 | json.dump(js, json_file, indent=4, sort_keys=True) 44 | else: 45 | with open('train.json', 'w') as json_file: 46 | json.dump(js, json_file, indent=4, sort_keys=True) 47 | js = {} 48 | 49 | print(d_set+': All videos downloaded' ) 50 | 51 | 52 | if __name__ == '__main__': 53 | parse_and_sched() 54 | -------------------------------------------------------------------------------- /train_dataset/got10k/par_crop.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from paddle.vision.transforms import functional as F 4 | import os 5 | from concurrent import futures 6 | import sys 7 | import time 8 | 9 | dataset_path = './data' 10 | sub_sets = ['GOT-10k_Train_split_01','GOT-10k_Train_split_02','GOT-10k_Train_split_03','GOT-10k_Train_split_04', 11 | 'GOT-10k_Train_split_05','GOT-10k_Train_split_06','GOT-10k_Train_split_07','GOT-10k_Train_split_08', 12 | 'GOT-10k_Train_split_09','GOT-10k_Train_split_10','GOT-10k_Train_split_11','GOT-10k_Train_split_12', 13 | 'GOT-10k_Train_split_13','GOT-10k_Train_split_14','GOT-10k_Train_split_15','GOT-10k_Train_split_16', 14 | 'GOT-10k_Train_split_17','GOT-10k_Train_split_18','GOT-10k_Train_split_19','val'] 15 | 16 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100): 17 | formatStr = "{0:." + str(decimals) + "f}" 18 | percents = formatStr.format(100 * (iteration / float(total))) 19 | filledLength = int(round(barLength * iteration / float(total))) 20 | bar = '' * filledLength + '-' * (barLength - filledLength) 21 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), 22 | if iteration == total: 23 | sys.stdout.write('\x1b[2K\r') 24 | sys.stdout.flush() 25 | 26 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 27 | a = (out_sz-1) / (bbox[2]-bbox[0]) 28 | b = (out_sz-1) / (bbox[3]-bbox[1]) 29 | c = -a * bbox[0] 30 | d = -b * bbox[1] 31 | mapping = np.array([[a, 0, c], 32 | [0, b, d]]).astype(np.float32) 33 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 34 | return crop 35 | 36 | def pos_s_2_bbox(pos, s): 37 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2] 38 | 39 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 40 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 41 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]] 42 | wc_z = target_size[1] + context_amount * sum(target_size) 43 | hc_z = target_size[0] + context_amount * sum(target_size) 44 | s_z = np.sqrt(wc_z * hc_z) 45 | scale_z = exemplar_size / s_z 46 | d_search = (instanc_size - exemplar_size) / 2 47 | pad = d_search / scale_z 48 | s_x = s_z + 2 * pad 49 | 50 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding) 51 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 52 | return z, x 53 | 54 | def crop_video(video, d_set, crop_path, instanc_size): 55 | if video != 'list.txt': 56 | video_crop_base_path = os.path.join(crop_path, video) 57 | if not os.path.isdir(video_crop_base_path): os.makedirs(video_crop_base_path) 58 | gt_path = os.path.join(dataset_path, d_set, video, 'groundtruth.txt') 59 | images_path = os.path.join(dataset_path, d_set, video) 60 | with open(gt_path, 'r') as f: 61 | groundtruth = f.readlines() 62 | for idx, gt_line in enumerate(groundtruth): 63 | gt_image = gt_line.strip().split(',') 64 | bbox = [int(float(gt_image[0])),int(float(gt_image[1])),int(float(gt_image[0]))+int(float(gt_image[2])),int(float(gt_image[1]))+int(float(gt_image[3]))]#xmin,ymin,xmax,ymax 65 | 66 | im = cv2.imread(os.path.join(images_path,str(idx+1).zfill(8)+'.jpg')) 67 | avg_chans = np.mean(im, axis=(0, 1)) 68 | 69 | z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans) 70 | cv2.imwrite(os.path.join(video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(idx), int(0))), z) 71 | cv2.imwrite(os.path.join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(idx), int(0))), x) 72 | 73 | def main(instanc_size=511, num_threads=24): 74 | crop_path = './crop{:d}'.format(instanc_size) 75 | 76 | if not os.path.isdir(crop_path): os.mkdir(crop_path) 77 | for d_set in sub_sets: 78 | save_path = os.path.join(crop_path, d_set) 79 | videos = os.listdir(os.path.join(dataset_path,d_set)) 80 | if not os.path.isdir(save_path): os.mkdir(save_path) 81 | 82 | n_videos = len(videos) 83 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 84 | fs = [executor.submit(crop_video, video, d_set, save_path, instanc_size) for video in videos] 85 | for i, f in enumerate(futures.as_completed(fs)): 86 | # Write progress to error so that it can be seen 87 | printProgress(i, n_videos, prefix='train', suffix='Done ', barLength=40) 88 | 89 | if __name__ == '__main__': 90 | since = time.time() 91 | main() 92 | time_elapsed = time.time() - since 93 | print('Total complete in {:.0f}m {:.0f}s'.format( 94 | time_elapsed // 60, time_elapsed % 60)) 95 | -------------------------------------------------------------------------------- /train_dataset/got10k/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing GOT-10K 2 | A Large High-Diversity Benchmark for Generic Object Tracking in the Wild 3 | 4 | ### Prepare dataset 5 | 6 | After download the dataset, please unzip the dataset at *train_dataset/got10k* directory 7 | mkdir data 8 | unzip full_data/train_data/*.zip -d ./data 9 | ```` 10 | 11 | ### Crop & Generate data info 12 | 13 | ````shell 14 | #python par_crop.py [crop_size] [num_threads] 15 | python par_crop.py 511 12 16 | python gen_json.py 17 | ```` 18 | -------------------------------------------------------------------------------- /train_dataset/lasot/gen_json.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | 4 | dataset_path = './data' 5 | 6 | def parse_and_sched(dl_dir='.'): 7 | # For each of the two datasets 8 | with open('./train_id.txt', 'r') as f: 9 | videos = f.readlines() 10 | n_videos = len(videos) 11 | js = {} 12 | for idx, video in enumerate(videos): 13 | print('{}/{}'.format(idx, n_videos)) 14 | video = video.strip() 15 | class_name = video.split('-')[0] 16 | class_path = os.path.join(dataset_path, class_name) 17 | gt_path = os.path.join(class_path, video, 'groundtruth.txt') 18 | with open(gt_path, 'r') as f: 19 | groundtruth = f.readlines() 20 | for idx, gt_line in enumerate(groundtruth): 21 | gt_image = gt_line.strip().split(',') 22 | frame = '%06d' % (int(idx)) 23 | obj = '%02d' % (int(0)) 24 | bbox = [int(float(gt_image[0])), int(float(gt_image[1])), 25 | int(float(gt_image[0])) + int(float(gt_image[2])), 26 | int(float(gt_image[1])) + int(float(gt_image[3]))] # xmin,ymin,xmax,ymax 27 | x1 = bbox[0] 28 | y1 = bbox[1] 29 | w = bbox[2] 30 | h = bbox[3] 31 | if x1 < 0 or y1 < 0 or w <= 0 or h <= 0: 32 | continue 33 | 34 | if video not in js: 35 | js[video] = {} 36 | if obj not in js[video]: 37 | js[video][obj] = {} 38 | js[video][obj][frame] = bbox 39 | with open('train.json', 'w') as f: 40 | json.dump(js, f, indent=4, sort_keys=True) 41 | js = {} 42 | with open('val.json', 'w') as f: 43 | json.dump(js, f, indent=4, sort_keys=True) 44 | print('done') 45 | 46 | if __name__ == '__main__': 47 | parse_and_sched() 48 | -------------------------------------------------------------------------------- /train_dataset/lasot/par_crop.py: -------------------------------------------------------------------------------- 1 | from os.path import join, isdir 2 | from os import mkdir 3 | import cv2 4 | import numpy as np 5 | from concurrent import futures 6 | import sys 7 | import time 8 | 9 | dataset_path = './data' 10 | 11 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100): 12 | formatStr = "{0:." + str(decimals) + "f}" 13 | percents = formatStr.format(100 * (iteration / float(total))) 14 | filledLength = int(round(barLength * iteration / float(total))) 15 | bar = '-' * filledLength + ' ' * (barLength - filledLength) 16 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), 17 | if iteration == total: 18 | sys.stdout.write('\x1b[2K\r') 19 | sys.stdout.flush() 20 | 21 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 22 | a = (out_sz-1) / (bbox[2]-bbox[0]) 23 | b = (out_sz-1) / (bbox[3]-bbox[1]) 24 | c = -a * bbox[0] 25 | d = -b * bbox[1] 26 | mapping = np.array([[a, 0, c], 27 | [0, b, d]]).astype(np.float32) 28 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 29 | return crop 30 | 31 | def pos_s_2_bbox(pos, s): 32 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2] 33 | 34 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 35 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 36 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]] 37 | wc_z = target_size[1] + context_amount * sum(target_size) 38 | hc_z = target_size[0] + context_amount * sum(target_size) 39 | s_z = np.sqrt(wc_z * hc_z) 40 | scale_z = exemplar_size / s_z 41 | d_search = (instanc_size - exemplar_size) / 2 42 | pad = d_search / scale_z 43 | s_x = s_z + 2 * pad 44 | 45 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding) 46 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 47 | return z, x 48 | 49 | def crop_video(video, crop_path, instanc_size, num): 50 | video = video.strip() 51 | class_name = video.split('-')[0] 52 | class_path = join(dataset_path, class_name) 53 | video_crop_base_path = join(crop_path, video) 54 | if not isdir(video_crop_base_path): 55 | mkdir(video_crop_base_path) 56 | gt_path = join(class_path, video, 'groundtruth.txt') 57 | images_path = join(class_path, video, 'img') 58 | with open(gt_path, 'r') as f: 59 | groundtruth = f.readlines() 60 | for idx, gt_line in enumerate(groundtruth): 61 | gt_image = gt_line.strip().split(',') 62 | bbox = [int(float(gt_image[0])),int(float(gt_image[1])),int(float(gt_image[0]))+int(float(gt_image[2])),int(float(gt_image[1]))+int(float(gt_image[3]))]#xmin,ymin,xmax,ymax 63 | 64 | im = cv2.imread(join(images_path, str(idx+1).zfill(8) + '.jpg')) 65 | avg_chans = np.mean(im, axis=(0, 1)) 66 | 67 | z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans) 68 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(idx), int(0))), z) 69 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(idx), int(0))), x) 70 | 71 | def main(instanc_size=511, num_threads=24): 72 | crop_path = './crop{:d}'.format(instanc_size) 73 | with open('./train_id.txt', 'r') as f: 74 | videos = f.readlines() 75 | if not isdir(crop_path): 76 | mkdir(crop_path) 77 | 78 | n_videos = len(videos) 79 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 80 | fs = [executor.submit(crop_video, video, crop_path, instanc_size, idx) for idx, video in enumerate(videos)] 81 | for i, f in enumerate(futures.as_completed(fs)): 82 | # Write progress to error so that it can be seen 83 | printProgress(i, n_videos, prefix='train', suffix='Done ', barLength=40) 84 | 85 | if __name__ == '__main__': 86 | since = time.time() 87 | main() 88 | time_elapsed = time.time() - since 89 | print('Total complete in {:.0f}m {:.0f}s'.format( 90 | time_elapsed // 60, time_elapsed % 60)) 91 | -------------------------------------------------------------------------------- /train_dataset/lasot/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing LaSOT 2 | Large-scale Single Object Tracking 3 | 4 | ### Prepare dataset 5 | 6 | After download the dataset, please unzip the dataset at *train_dataset/lasot* directory 7 | ````shell 8 | mkdir data 9 | unzip LaSOT/zip/*.zip -d ./data 10 | ```` 11 | 12 | ### Crop & Generate data info 13 | 14 | ````shell 15 | #python par_crop.py [crop_size] [num_threads] 16 | python par_crop.py 511 12 17 | python gen_json.py 18 | ```` 19 | -------------------------------------------------------------------------------- /train_dataset/vid/gen_json.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | import json 3 | import numpy as np 4 | 5 | print('load json (raw vid info), please wait 20 seconds~') 6 | vid = json.load(open('vid.json', 'r')) 7 | 8 | 9 | def check_size(frame_sz, bbox): 10 | min_ratio = 0.1 11 | max_ratio = 0.75 12 | area_ratio = np.sqrt((bbox[2]-bbox[0])*(bbox[3]-bbox[1])/float(np.prod(frame_sz))) 13 | ok = (area_ratio > min_ratio) and (area_ratio < max_ratio) 14 | return ok 15 | 16 | 17 | def check_borders(frame_sz, bbox): 18 | dist_from_border = 0.05 * (bbox[2] - bbox[0] + bbox[3] - bbox[1])/2 19 | ok = (bbox[0] > dist_from_border) and (bbox[1] > dist_from_border) and \ 20 | ((frame_sz[0] - bbox[2]) > dist_from_border) and \ 21 | ((frame_sz[1] - bbox[3]) > dist_from_border) 22 | return ok 23 | 24 | 25 | snippets = dict() 26 | n_snippets = 0 27 | n_videos = 0 28 | for subset in vid: 29 | for video in subset: 30 | n_videos += 1 31 | frames = video['frame'] 32 | id_set = [] 33 | id_frames = [[]] * 60 # at most 60 objects 34 | for f, frame in enumerate(frames): 35 | objs = frame['objs'] 36 | frame_sz = frame['frame_sz'] 37 | for obj in objs: 38 | trackid = obj['trackid'] 39 | occluded = obj['occ'] 40 | bbox = obj['bbox'] 41 | 42 | if trackid not in id_set: 43 | id_set.append(trackid) 44 | id_frames[trackid] = [] 45 | id_frames[trackid].append(f) 46 | if len(id_set) > 0: 47 | snippets[video['base_path']] = dict() 48 | for selected in id_set: 49 | frame_ids = sorted(id_frames[selected]) 50 | sequences = np.split(frame_ids, np.array(np.where(np.diff(frame_ids) > 1)[0]) + 1) 51 | sequences = [s for s in sequences if len(s) > 1] # remove isolated frame. 52 | for seq in sequences: 53 | snippet = dict() 54 | for frame_id in seq: 55 | frame = frames[frame_id] 56 | for obj in frame['objs']: 57 | if obj['trackid'] == selected: 58 | o = obj 59 | continue 60 | snippet[frame['img_path'].split('.')[0]] = o['bbox'] 61 | snippets[video['base_path']]['{:02d}'.format(selected)] = snippet 62 | n_snippets += 1 63 | print('video: {:d} snippets_num: {:d}'.format(n_videos, n_snippets)) 64 | 65 | train = {k:v for (k,v) in snippets.items() if 'train' in k} 66 | val = {k:v for (k,v) in snippets.items() if 'val' in k} 67 | 68 | with open('train.json', 'w') as train_file: 69 | json.dump(train, train_file, indent=4, sort_keys=True) 70 | 71 | with open('val.json', 'w') as val_file: 72 | json.dump(val, val_file, indent=4, sort_keys=True) 73 | 74 | print('done!') 75 | -------------------------------------------------------------------------------- /train_dataset/vid/par_crop.py: -------------------------------------------------------------------------------- 1 | from os.path import join, isdir 2 | from os import listdir, mkdir, makedirs 3 | import cv2 4 | import numpy as np 5 | import glob 6 | import xml.etree.ElementTree as ET 7 | from concurrent import futures 8 | import sys 9 | import time 10 | 11 | VID_base_path = './ILSVRC2015' 12 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 13 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'}) 14 | 15 | 16 | # Print iterations progress (thanks StackOverflow) 17 | def printProgress(iteration, total, prefix='', suffix='', decimals=1, barLength=100): 18 | 19 | formatStr = "{0:." + str(decimals) + "f}" 20 | percents = formatStr.format(100 * (iteration / float(total))) 21 | filledLength = int(round(barLength * iteration / float(total))) 22 | bar = '' * filledLength + '-' * (barLength - filledLength) 23 | sys.stdout.write('\r%s |%s| %s%s %s' % (prefix, bar, percents, '%', suffix)), 24 | if iteration == total: 25 | sys.stdout.write('\x1b[2K\r') 26 | sys.stdout.flush() 27 | 28 | 29 | def crop_hwc(image, bbox, out_sz, padding=(0, 0, 0)): 30 | a = (out_sz-1) / (bbox[2]-bbox[0]) 31 | b = (out_sz-1) / (bbox[3]-bbox[1]) 32 | c = -a * bbox[0] 33 | d = -b * bbox[1] 34 | mapping = np.array([[a, 0, c], 35 | [0, b, d]]).astype(np.float32) 36 | crop = cv2.warpAffine(image, mapping, (out_sz, out_sz), borderMode=cv2.BORDER_CONSTANT, borderValue=padding) 37 | return crop 38 | 39 | 40 | def pos_s_2_bbox(pos, s): 41 | return [pos[0]-s/2, pos[1]-s/2, pos[0]+s/2, pos[1]+s/2] 42 | 43 | 44 | def crop_like_SiamFC(image, bbox, context_amount=0.5, exemplar_size=127, instanc_size=255, padding=(0, 0, 0)): 45 | target_pos = [(bbox[2]+bbox[0])/2., (bbox[3]+bbox[1])/2.] 46 | target_size = [bbox[2]-bbox[0], bbox[3]-bbox[1]] 47 | wc_z = target_size[1] + context_amount * sum(target_size) 48 | hc_z = target_size[0] + context_amount * sum(target_size) 49 | s_z = np.sqrt(wc_z * hc_z) 50 | scale_z = exemplar_size / s_z 51 | d_search = (instanc_size - exemplar_size) / 2 52 | pad = d_search / scale_z 53 | s_x = s_z + 2 * pad 54 | 55 | z = crop_hwc(image, pos_s_2_bbox(target_pos, s_z), exemplar_size, padding) 56 | x = crop_hwc(image, pos_s_2_bbox(target_pos, s_x), instanc_size, padding) 57 | return z, x 58 | 59 | 60 | def crop_video(sub_set, video, crop_path, instanc_size): 61 | video_crop_base_path = join(crop_path, sub_set, video) 62 | if not isdir(video_crop_base_path): makedirs(video_crop_base_path) 63 | 64 | sub_set_base_path = join(ann_base_path, sub_set) 65 | xmls = sorted(glob.glob(join(sub_set_base_path, video, '*.xml'))) 66 | for xml in xmls: 67 | xmltree = ET.parse(xml) 68 | # size = xmltree.findall('size')[0] 69 | # frame_sz = [int(it.text) for it in size] 70 | objects = xmltree.findall('object') 71 | objs = [] 72 | filename = xmltree.findall('filename')[0].text 73 | 74 | im = cv2.imread(xml.replace('xml', 'JPEG').replace('Annotations', 'Data')) 75 | avg_chans = np.mean(im, axis=(0, 1)) 76 | for object_iter in objects: 77 | trackid = int(object_iter.find('trackid').text) 78 | # name = (object_iter.find('name')).text 79 | bndbox = object_iter.find('bndbox') 80 | # occluded = int(object_iter.find('occluded').text) 81 | 82 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 83 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 84 | z, x = crop_like_SiamFC(im, bbox, instanc_size=instanc_size, padding=avg_chans) 85 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.z.jpg'.format(int(filename), trackid)), z) 86 | cv2.imwrite(join(video_crop_base_path, '{:06d}.{:02d}.x.jpg'.format(int(filename), trackid)), x) 87 | 88 | 89 | def main(instanc_size=511, num_threads=24): 90 | crop_path = './crop{:d}'.format(instanc_size) 91 | if not isdir(crop_path): mkdir(crop_path) 92 | 93 | for sub_set in sub_sets: 94 | sub_set_base_path = join(ann_base_path, sub_set) 95 | videos = sorted(listdir(sub_set_base_path)) 96 | n_videos = len(videos) 97 | with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: 98 | fs = [executor.submit(crop_video, sub_set, video, crop_path, instanc_size) for video in videos] 99 | for i, f in enumerate(futures.as_completed(fs)): 100 | # Write progress to error so that it can be seen 101 | printProgress(i, n_videos, prefix=sub_set, suffix='Done ', barLength=40) 102 | 103 | 104 | if __name__ == '__main__': 105 | since = time.time() 106 | main(int(sys.argv[1]), int(sys.argv[2])) 107 | time_elapsed = time.time() - since 108 | print('Total complete in {:.0f}m {:.0f}s'.format( 109 | time_elapsed // 60, time_elapsed % 60)) 110 | -------------------------------------------------------------------------------- /train_dataset/vid/parse_vid.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import json 4 | import glob 5 | import xml.etree.ElementTree as ET 6 | 7 | VID_base_path = './ILSVRC2015' 8 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 9 | img_base_path = join(VID_base_path, 'Data/VID/train/') 10 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'}) 11 | 12 | vid = [] 13 | for sub_set in sub_sets: 14 | sub_set_base_path = join(ann_base_path, sub_set) 15 | videos = sorted(listdir(sub_set_base_path)) 16 | s = [] 17 | for vi, video in enumerate(videos): 18 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 19 | v = dict() 20 | v['base_path'] = join(sub_set, video) 21 | v['frame'] = [] 22 | video_base_path = join(sub_set_base_path, video) 23 | xmls = sorted(glob.glob(join(video_base_path, '*.xml'))) 24 | for xml in xmls: 25 | f = dict() 26 | xmltree = ET.parse(xml) 27 | size = xmltree.findall('size')[0] 28 | frame_sz = [int(it.text) for it in size] 29 | objects = xmltree.findall('object') 30 | objs = [] 31 | for object_iter in objects: 32 | trackid = int(object_iter.find('trackid').text) 33 | name = (object_iter.find('name')).text 34 | bndbox = object_iter.find('bndbox') 35 | occluded = int(object_iter.find('occluded').text) 36 | o = dict() 37 | o['c'] = name 38 | o['bbox'] = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 39 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 40 | o['trackid'] = trackid 41 | o['occ'] = occluded 42 | objs.append(o) 43 | f['frame_sz'] = frame_sz 44 | f['img_path'] = xml.split('/')[-1].replace('xml', 'JPEG') 45 | f['objs'] = objs 46 | v['frame'].append(f) 47 | s.append(v) 48 | vid.append(s) 49 | print('save json (raw vid info), please wait 1 min~') 50 | json.dump(vid, open('vid.json', 'w'), indent=4, sort_keys=True) 51 | print('done!') 52 | -------------------------------------------------------------------------------- /train_dataset/vid/readme.md: -------------------------------------------------------------------------------- 1 | # Preprocessing VID(Object detection from video) 2 | Large Scale Visual Recognition Challenge 2015 (ILSVRC2015) 3 | 4 | ### Download dataset (86GB) 5 | 6 | ````shell 7 | wget http://bvisionweb1.cs.unc.edu/ilsvrc2015/ILSVRC2015_VID.tar.gz 8 | tar -xzvf ./ILSVRC2015_VID.tar.gz 9 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0000 ILSVRC2015/Annotations/VID/train/a 10 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0001 ILSVRC2015/Annotations/VID/train/b 11 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0002 ILSVRC2015/Annotations/VID/train/c 12 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/train/ILSVRC2015_VID_train_0003 ILSVRC2015/Annotations/VID/train/d 13 | ln -sfb $PWD/ILSVRC2015/Annotations/VID/val ILSVRC2015/Annotations/VID/train/e 14 | 15 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0000 ILSVRC2015/Data/VID/train/a 16 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0001 ILSVRC2015/Data/VID/train/b 17 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0002 ILSVRC2015/Data/VID/train/c 18 | ln -sfb $PWD/ILSVRC2015/Data/VID/train/ILSVRC2015_VID_train_0003 ILSVRC2015/Data/VID/train/d 19 | ln -sfb $PWD/ILSVRC2015/Data/VID/val ILSVRC2015/Data/VID/train/e 20 | ```` 21 | 22 | ### Crop & Generate data info (20 min) 23 | 24 | ````shell 25 | python parse_vid.py 26 | 27 | #python par_crop.py [crop_size] [num_threads] 28 | python par_crop.py 511 12 29 | python gen_json.py 30 | ```` 31 | -------------------------------------------------------------------------------- /train_dataset/vid/visual.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | from os import listdir 3 | import cv2 4 | import numpy as np 5 | import glob 6 | import xml.etree.ElementTree as ET 7 | 8 | visual = False 9 | color_bar = np.random.randint(0, 255, (90, 3)) 10 | 11 | VID_base_path = './ILSVRC2015' 12 | ann_base_path = join(VID_base_path, 'Annotations/VID/train/') 13 | img_base_path = join(VID_base_path, 'Data/VID/train/') 14 | sub_sets = sorted({'a', 'b', 'c', 'd', 'e'}) 15 | for sub_set in sub_sets: 16 | sub_set_base_path = join(ann_base_path, sub_set) 17 | videos = sorted(listdir(sub_set_base_path)) 18 | for vi, video in enumerate(videos): 19 | print('subset: {} video id: {:04d} / {:04d}'.format(sub_set, vi, len(videos))) 20 | 21 | video_base_path = join(sub_set_base_path, video) 22 | xmls = sorted(glob.glob(join(video_base_path, '*.xml'))) 23 | for xml in xmls: 24 | f = dict() 25 | xmltree = ET.parse(xml) 26 | size = xmltree.findall('size')[0] 27 | frame_sz = [int(it.text) for it in size] 28 | objects = xmltree.findall('object') 29 | if visual: 30 | im = cv2.imread(xml.replace('xml', 'JPEG').replace('Annotations', 'Data')) 31 | for object_iter in objects: 32 | trackid = int(object_iter.find('trackid').text) 33 | bndbox = object_iter.find('bndbox') 34 | bbox = [int(bndbox.find('xmin').text), int(bndbox.find('ymin').text), 35 | int(bndbox.find('xmax').text), int(bndbox.find('ymax').text)] 36 | if visual: 37 | pt1 = (int(bbox[0]), int(bbox[1])) 38 | pt2 = (int(bbox[2]), int(bbox[3])) 39 | cv2.rectangle(im, pt1, pt2, color_bar[trackid], 3) 40 | if visual: 41 | cv2.imshow('img', im) 42 | cv2.waitKey(1) 43 | 44 | print('done!') 45 | --------------------------------------------------------------------------------