├── img
    ├── 100.png
    ├── xywh.png
    ├── SiamRPN.png
    ├── WeChat.jpg
    ├── centr.png
    ├── coord.png
    ├── corners.png
    ├── error.png
    └── new_file.png
├── train
    ├── __pycache__
    │   └── net.cpython-37.pyc
    ├── experiments
    │   └── default
    │   │   └── parameters.json
    ├── test.py
    ├── config.py
    ├── network.py
    ├── custom_transforms.py
    ├── net.py
    ├── loss.py
    ├── train_siamrpn.py
    ├── util.py
    └── data.py
├── tracking
    ├── experiments
    │   └── default
    │   │   └── parameters.json
    ├── config.py
    ├── run_tracking.py
    ├── network.py
    ├── data_loader.py
    ├── util.py
    ├── custom_transforms.py
    └── siamRPNBIG.py
├── .gitignore
├── README.md
└── fixed.py


/img/100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/100.png


--------------------------------------------------------------------------------
/img/xywh.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/xywh.png


--------------------------------------------------------------------------------
/img/SiamRPN.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/SiamRPN.png


--------------------------------------------------------------------------------
/img/WeChat.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/WeChat.jpg


--------------------------------------------------------------------------------
/img/centr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/centr.png


--------------------------------------------------------------------------------
/img/coord.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/coord.png


--------------------------------------------------------------------------------
/img/corners.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/corners.png


--------------------------------------------------------------------------------
/img/error.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/error.png


--------------------------------------------------------------------------------
/img/new_file.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/img/new_file.png


--------------------------------------------------------------------------------
/train/__pycache__/net.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arbitularov/SiamRPN-PyTorch/HEAD/train/__pycache__/net.cpython-37.pyc


--------------------------------------------------------------------------------
/train/experiments/default/parameters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "template_img_size": 127,
 3 |     "detection_img_size": 271,
 4 |     "stride": 8,
 5 |     "lr": 1e-5,
 6 |     "epoches": 200,
 7 |     "weight_decay": 0.0005,
 8 |     "momentum": 0.9,
 9 |     "context": 0.5,
10 |     "ratios": [0.33, 0.5, 1, 2, 3],
11 |     "scales": [8],
12 |     "penalty_k": 0.055,
13 |     "window_influence": 0.42
14 | }
15 | 


--------------------------------------------------------------------------------
/tracking/experiments/default/parameters.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "template_img_size": 127,
 3 |     "detection_img_size": 271,
 4 |     "stride": 8,
 5 |     "lr": 0.001,
 6 |     "epoches": 20,
 7 |     "weight_decay": 0.00005,
 8 |     "momentum": 0.9,
 9 |     "context": 0.5,
10 |     "ratios": [0.33, 0.5, 1, 2, 3],
11 |     "scales": [8],
12 |     "penalty_k": 0.055,
13 |     "window_influence": 0.42
14 | }
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | test/data
 2 | youtube_BB
 3 | youtube-bb.py
 4 | experiments
 5 | test/__pycache__
 6 | __pycache__
 7 | test/results
 8 | tracking/results
 9 | model
10 | model_e1.pth
11 | model_e2.pth
12 | model_e3.pth
13 | model_e4.pth
14 | weights-000.pth.tar
15 | test
16 | OTBreports
17 | OTBresults
18 | results
19 | siamrpn_7.pth
20 | siamrpn_50.pth
21 | siamrpn_25.pth
22 | model_e6.pth
23 | model_e9.pth
24 | model_e11.pth
25 | model_e12.pth
26 | model_e10.pth
27 | model_e25.pth
28 | model_e74.pth
29 | model_got.pth
30 | model.pth
31 | test.png
32 | test1.png
33 | weights-0690000.pth.tar
34 | cache
35 | 


--------------------------------------------------------------------------------
/train/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | foto = cv2.imread('detection_img.png')
 5 | 
 6 | f = open('text.txt', 'r')
 7 | boxes = []
 8 | for line in f:
 9 |     a = line.replace('.', ',')
10 |     a = a.replace('[', '')
11 |     a = a.replace(']', '')
12 |     a = a.replace("\n", '')
13 |     a = a.split(',')
14 | 
15 | 
16 |     a = np.asarray(a)
17 |     a = np.asarray(a)
18 |     a = np.asarray([int(a[0]), int(a[1]), int(a[2]), int(a[3])])
19 |     boxes.append(a)
20 | f.close()
21 | 
22 | coint = 0
23 | for box in boxes:
24 |     print(box)
25 |     cx , cy, w, h = box
26 |     cx_big = 255/2 + (cx/0.16)
27 |     cy_big = 255/2 + (cy/0.16)
28 | 
29 |     x1 = int(cx_big - w/2)
30 |     x2 = int(cx_big + w/2)
31 | 
32 |     y1 = int(cy_big - h/2)
33 |     y2 = int(cy_big + h/2)
34 | 
35 |     r = int(np.random.choice(range(250)))
36 |     g = int(np.random.choice(range(250)))
37 |     b = int(np.random.choice(range(250)))
38 |     coint += 1
39 |     '''if coint >= 3:
40 |         coint = 1'''
41 | 
42 |     frame = cv2.rectangle(foto, (x1,y1), (x2,y2), (r, g, b), coint)
43 | 
44 | cv2.imwrite('detection_img1.png',frame)
45 | 


--------------------------------------------------------------------------------
/tracking/config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class TrackerConfig(object):
 4 |     # These are the default hyper-params for DaSiamRPN 0.3827
 5 |     windowing = 'cosine'  # to penalize large displacements [cosine/uniform]
 6 |     # Params from the network architecture, have to be consistent with the training
 7 |     template_img_size = 127  # input z size
 8 |     detection_img_size = 271  # input x size (search region)
 9 |     total_stride = 8
10 |     valid_scope = int((detection_img_size - template_img_size) / total_stride / 2)
11 |     score_size = int((detection_img_size - template_img_size)/total_stride+1)
12 |     context_amount = 0.5  # context amount for the exemplar
13 |     ratios = [0.33, 0.5, 1, 2, 3]
14 |     scales = [8, ]
15 |     anchor_num = len(ratios) * len(scales)
16 |     anchor = []
17 |     penalty_k = 0.055
18 |     window_influence = 0.42
19 |     lr = 0.295
20 |     lr_box = 0.30
21 | 
22 |     min_scale = 0.1
23 |     max_scale = 10
24 | 
25 |     anchor_base_size = 8
26 |     anchor_scales = np.array([8, ])
27 |     anchor_ratios = np.array([0.33, 0.5, 1, 2, 3])
28 |     size = anchor_num * score_size * score_size
29 | 
30 | config = TrackerConfig()
31 | 


--------------------------------------------------------------------------------
/train/config.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class Config(object):
 4 | 
 5 |     '''config for train_siamrpn.py'''
 6 |     template_img_size  = 127
 7 |     detection_img_size = 271
 8 |     epoches = 200
 9 |     train_epoch_size = 1000
10 |     val_epoch_size = 100
11 | 
12 |     train_batch_size = 32                  # training batch size
13 |     valid_batch_size = 8                   # validation batch size
14 |     train_num_workers = 16                  # number of workers of train dataloader
15 |     valid_num_workers = 16
16 | 
17 |     start_lr = 3e-3
18 |     end_lr = 1e-5
19 |     warm_lr = 1e-3
20 |     warm_scale = warm_lr/start_lr
21 |     lr = np.logspace(np.log10(start_lr), np.log10(end_lr), num=epoches)[0]
22 |     gamma = np.logspace(np.log10(start_lr), np.log10(end_lr), num=epoches)[1] / \
23 |             np.logspace(np.log10(start_lr), np.log10(end_lr), num=epoches)[0]
24 |     momentum = 0.9
25 |     weight_decay = 0.0005
26 | 
27 |     clip = 100 # grad clip
28 | 
29 |     anchor_scales = np.array([8, ])
30 |     anchor_ratios = np.array([0.33, 0.5, 1, 2, 3])
31 |     anchor_num    = len(anchor_scales) * len(anchor_ratios) # 5
32 |     score_size = int((detection_img_size - template_img_size) / 8 + 1)
33 |     size = anchor_num * score_size * score_size
34 | 
35 |     '''config for data.py'''
36 | 
37 |     out_feature = 19
38 |     max_inter   = 80
39 |     fix_former_3_layers = True
40 |     pretrained_model =  '/home/arbi/Загрузки/alexnet.pth' # '/home/arbi/desktop/alexnet.pth' # # '/Users/arbi/Desktop/alexnet.pth'
41 | 
42 |     total_stride = 8
43 |     anchor_total_stride = total_stride
44 |     anchor_base_size = 8
45 |     anchor_scales = np.array([8, ])
46 |     anchor_ratios = np.array([0.33, 0.5, 1, 2, 3])
47 | 
48 |     valid_scope = int((detection_img_size - template_img_size) / total_stride / 2)
49 |     anchor_valid_scope = 2 * valid_scope + 1
50 |     pos_threshold = 0.6
51 |     neg_threshold = 0.3
52 | 
53 |     context = 0.5
54 |     penalty_k = 0.055
55 |     window_influence = 0.42
56 |     eps = 0.01
57 | 
58 |     max_translate = 12
59 |     scale_resize = 0.15
60 |     gray_ratio = 0.25
61 |     exem_stretch = False
62 | 
63 |     '''config for net.py'''
64 |     num_pos = 16
65 |     num_neg = 48
66 |     lamb    = 5
67 | 
68 |     ohem_pos = False
69 |     ohem_neg = False
70 |     ohem_reg = False
71 | 
72 | 
73 | config = Config()
74 | 


--------------------------------------------------------------------------------
/tracking/run_tracking.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from got10k.experiments import *
 3 | from siamRPNBIG import TrackerSiamRPNBIG
 4 | import argparse
 5 | import os
 6 | import json
 7 | 
 8 | parser = argparse.ArgumentParser(description='PyTorch SiameseRPN Tracking')
 9 | 
10 | parser.add_argument('--tracker_path', default='/home/arbi/desktop/data', metavar='DIR',help='path to dataset')
11 | parser.add_argument('--experiment_name', default='default', metavar='DIR',help='path to weight')
12 | parser.add_argument('--net_path', default='../train/experiments/default/model/model_e17.pth', metavar='DIR',help='path to weight')
13 | # ../train/experiments/default/model/model_e1.pth # ../model.pth #../siamrpn_7.pth
14 | # /Users/arbi/Desktop # /home/arbi/desktop/GOT-10k
15 | # /media/arbi/9132EE0B9756C987/dataset/GOT-10k/full_data
16 | parser.add_argument('--visualize', default=True, help='visualize')
17 | 
18 | args = parser.parse_args()
19 | 
20 | if __name__ == '__main__':
21 | 
22 |     """Load the parameters from json file"""
23 |     json_path = os.path.join('experiments/{}'.format(args.experiment_name), 'parameters.json')
24 |     assert os.path.isfile(json_path), ("No json configuration file found at {}".format(json_path))
25 |     with open(json_path) as data_file:
26 |         params = json.load(data_file)
27 | 
28 |     '''setup tracker'''
29 |     tracker = TrackerSiamRPNBIG(params, args.net_path)
30 | 
31 |     '''setup experiments'''
32 |     # 7 datasets with different versions
33 |     '''
34 |     experiments = ExperimentGOT10k('data/GOT-10k', subset='test'),
35 |         ExperimentOTB('data/OTB', version=2015),
36 |         ExperimentOTB('data/OTB', version=2013),
37 |         ExperimentVOT('data/vot2018', version=2018),
38 |         ExperimentUAV123('data/UAV123', version='UAV123'),
39 |         ExperimentUAV123('data/UAV123', version='UAV20L'),
40 |         ExperimentDTB70('data/DTB70'),
41 |         ExperimentTColor128('data/Temple-color-128'),
42 |         ExperimentNfS('data/nfs', fps=30),
43 |         ExperimentNfS('data/nfs', fps=240),
44 |     ]
45 | 
46 |     for e in experiments:
47 |         e.run(tracker, visualize=True)
48 |         e.report([tracker.name])
49 |     '''
50 | 
51 |     '''
52 |     experiments = ExperimentGOT10k(args.tracker_path, subset='val',
53 |                     result_dir='experiments/{}/results'.format(args.experiment_name),
54 |                     report_dir='experiments/{}/reports'.format(args.experiment_name))
55 | 
56 |     '''
57 |     experiments = ExperimentOTB('/home/arbi/desktop/data', version=2015,
58 |                     result_dir='experiments/{}/OTB2015resultsGOT-10k_42'.format(args.experiment_name),
59 |                     report_dir='experiments/{}/OTB2015reportsGOT-10k_42'.format(args.experiment_name))
60 | 
61 | 
62 |     '''run experiments'''
63 |     experiments.run(tracker, visualize = args.visualize)
64 |     experiments.report([tracker.name])
65 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # I got on OTB2015 result 8.41 and 0.625 without RPN. In the [SiamFusion project](https://github.com/arbitularov/SiamFusion)
  2 | ![result](https://github.com/arbitularov/SiamFusion/blob/master/img/results_for_31.jpg)
  3 | # SiamRPN-PyTorch
  4 | Implementation SiamRPN on PyTorch with GOT-10k dataset  
  5 | 
  6 | <center>
  7 |     <figure>
  8 |         <img src="img/SiamRPN.png" height="60%" width="100%">
  9 |         <figcaption>
 10 |         </figcaption>
 11 |     </figure>
 12 | </center>
 13 | 
 14 | ## How to run Training
 15 | 1. Download the GOT-10k dataset in http://got-10k.aitestunion.com/downloads
 16 | 2. Run the train_siamrpn.py script:
 17 | ```
 18 | cd train
 19 | 
 20 | python3 train_siamrpn.py --train_path=/path/to/dataset/GOT-10k/train
 21 | ```
 22 | 
 23 | ## How to run Tracking
 24 | [Coming Soon]
 25 | 
 26 | 
 27 | ## pip install
 28 | ```
 29 | pip3 install shapely
 30 | ```
 31 | 
 32 | ## How to fix GOT-10k dataset
 33 | 
 34 | <center>
 35 |     <figure>
 36 |         <img src="img/error.png" height="60%" width="100%">
 37 |         <figcaption>
 38 |         </figcaption>
 39 |     </figure>
 40 | </center>
 41 | 
 42 | 1. First you need to delete four videos:
 43 | ```
 44 | GOT-10k_Train_008628
 45 | GOT-10k_Train_008630
 46 | GOT-10k_Train_009058  
 47 | GOT-10k_Train_009059
 48 | ```
 49 | Because they are ymin and xmin is greater than the size of the image.
 50 | 
 51 | 2. Run the fixed.py script:
 52 | ```
 53 | python3 fixed.py --dataset_path=/path/to/dataset/GOT-10k/train
 54 | ```
 55 | <center>
 56 |     <figure>
 57 |         <img src="img/100.png" height="60%" width="100%">
 58 |         <figcaption>
 59 |         </figcaption>
 60 |     </figure>
 61 | </center>
 62 | 
 63 | After you have new_file.txt file. In this file a lot of information about where the error.
 64 | 
 65 | <center>
 66 |     <figure>
 67 |         <img src="img/new_file.png" height="60%" width="60%">
 68 |         <figcaption>
 69 |         </figcaption>
 70 |     </figure>
 71 | </center>
 72 | 
 73 | You do not need to change anything yourself, the fixed.py script will do it for you.
 74 | 
 75 | ## My contacts
 76 | 
 77 | E-mail: arbi.tularov@yandex.ru
 78 | 
 79 | WeChat: tularov_arbi
 80 | 
 81 | <center>
 82 |     <figure>
 83 |         <img src="img/WeChat.jpg" height="30%" width="30%">
 84 |         <figcaption>
 85 |         </figcaption>
 86 |     </figure>
 87 | </center>
 88 | 
 89 | ## Authors
 90 | 
 91 | * `Bo Li` - paper - [Siamese-RPN](http://openaccess.thecvf.com/content_cvpr_2018/papers/Li_High_Performance_Visual_CVPR_2018_paper.pdf)
 92 | * `De jiasong` - code - [Siamese-RPN-pytorch](https://github.com/songdejia/Siamese-RPN-pytorch)
 93 | * `Makalo` - code - [Siamese-RPN-tensorflow](https://github.com/makalo/Siamese-RPN-tensorflow)
 94 | 
 95 | ## Citation
 96 | ```
 97 | Paper: @InProceedings{Li_2018_CVPR,
 98 | author = {Li, Bo and Yan, Junjie and Wu, Wei and Zhu, Zheng and Hu, Xiaolin},
 99 | title = {High Performance Visual Tracking With Siamese Region Proposal Network},
100 | booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
101 | month = {June},
102 | year = {2018}
103 | }
104 | ```
105 | 


--------------------------------------------------------------------------------
/fixed.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import os
 3 | from tqdm import tqdm
 4 | import argparse
 5 | 
 6 | parser = argparse.ArgumentParser(description='Fixed GOT-10k Dataset')
 7 | parser.add_argument('--dataset_path', default='/Users/arbi/Desktop/val', metavar='DIR',help='path to dataset')
 8 | param = parser.parse_args()
 9 | 
10 | sub_class_dir = [sub_class_dir for sub_class_dir in os.listdir(param.dataset_path) if os.path.isdir(os.path.join(param.dataset_path, sub_class_dir))]
11 | 
12 | array_error = []
13 | 
14 | for name_dir in tqdm(sub_class_dir):
15 | 
16 |     meta = open("{}/{}/meta_info.ini".format(param.dataset_path, name_dir), "r")
17 | 
18 |     read_meta = meta.readlines()
19 | 
20 |     w_and_h = re.findall(r'\d+', read_meta[10])
21 |     meta.close()
22 | 
23 |     groundtruth = open("{}/{}/groundtruth.txt".format(param.dataset_path, name_dir), "r")
24 |     read_groundtruth = groundtruth.readlines()
25 |     count_gt = len(read_groundtruth)
26 |     groundtruth.close()
27 | 
28 |     groundtruth_write = open("{}/{}/groundtruth.txt".format(param.dataset_path, name_dir), "w")
29 | 
30 |     groundtruth_array = []
31 |     for i2, name_gt in enumerate(read_groundtruth):
32 | 
33 |         gt = [abs(int(float(i))) for i in name_gt.strip('\n').split(',')]
34 |         w = gt[0]+gt[2]
35 |         h = gt[1]+gt[3]
36 | 
37 |         if w > int(w_and_h[0]) and h > int(w_and_h[1]):
38 |             print('i2', i2+1,'w and h')
39 |             info = 'w and h {}, img: {}, img_size_h: {} < ymax = {} + {} = {} and img_size_w: {} < xmax = {} + {} = {} '.format(name_dir, i2+1, w_and_h[0], gt[0], gt[2], w, w_and_h[1], gt[1], gt[3], h )
40 |             array_error.append(info)
41 |             w_fixed = gt[2] - (w - int(w_and_h[0]))
42 |             h_fixed = gt[3] - (h - int(w_and_h[1]))
43 |             gt_fixed = '{}.0000,{}.0000,{}.0000,{}.0000'.format(gt[0], gt[1], w_fixed, h_fixed)
44 |             groundtruth_array.append(gt_fixed)
45 | 
46 |         elif w > int(w_and_h[0]):
47 |             #print('i2', i2+1,'just w')
48 |             info = 'just w {}, img: {}, img_size: {} < xmax = {} + {} = {}'.format(name_dir, i2+1, w_and_h[0], gt[0], gt[2], w)
49 |             array_error.append(info)
50 |             w_fixed = gt[2] - (w - int(w_and_h[0]))
51 |             gt_fixed = '{}.0000,{}.0000,{}.0000,{}.0000'.format(gt[0], gt[1], w_fixed, gt[3])
52 |             groundtruth_array.append(gt_fixed)
53 | 
54 |         elif h > int(w_and_h[1]):
55 |             #print('i2', i2+1,'just h')
56 |             info = 'just w {}, img: {}, img_size: {} < ymax = {} + {} = {}'.format(name_dir, i2+1, w_and_h[1], gt[1], gt[3], h)
57 |             array_error.append(info)
58 |             h_fixed = gt[3] - (h - int(w_and_h[1]))
59 |             gt_fixed = '{}.0000,{}.0000,{}.0000,{}.0000'.format(gt[0], gt[1], gt[2], h_fixed)
60 |             groundtruth_array.append(gt_fixed)
61 | 
62 |         else:
63 |             #print('i2', i2+1,'all it\'s ok')
64 |             gt_fixed = '{}.0000,{}.0000,{}.0000,{}.0000'.format(gt[0], gt[1], gt[2], gt[3])
65 |             groundtruth_array.append(gt_fixed)
66 |     try:
67 |         for l in groundtruth_array:
68 |             groundtruth_write.write('{}\n'.format(l))
69 |     finally:
70 |         groundtruth_write.close()
71 | 
72 | new_file = open("new_file.txt", "w")
73 | try:
74 |     for i in array_error:
75 |         new_file.write('{}\n'.format(i))
76 | finally:
77 |     new_file.close()
78 | 


--------------------------------------------------------------------------------
/train/network.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | import torch.nn.functional as F
 4 | import torchvision.transforms as transforms
 5 | 
 6 | from torchvision.models import alexnet
 7 | from torch.autograd import Variable
 8 | from torch import nn
 9 | 
10 | from config import config
11 | 
12 | 
13 | class SiameseAlexNet(nn.Module):
14 |     def __init__(self, ):
15 |         super(SiameseAlexNet, self).__init__()
16 |         self.featureExtract = nn.Sequential(
17 |             nn.Conv2d(3, 96, 11, stride=2),
18 |             nn.BatchNorm2d(96),
19 |             nn.MaxPool2d(3, stride=2),
20 |             nn.ReLU(inplace=True),
21 |             nn.Conv2d(96, 256, 5),
22 |             nn.BatchNorm2d(256),
23 |             nn.MaxPool2d(3, stride=2),
24 |             nn.ReLU(inplace=True),
25 |             nn.Conv2d(256, 384, 3),
26 |             nn.BatchNorm2d(384),
27 |             nn.ReLU(inplace=True),
28 |             nn.Conv2d(384, 384, 3),
29 |             nn.BatchNorm2d(384),
30 |             nn.ReLU(inplace=True),
31 |             nn.Conv2d(384, 256, 3),
32 |             nn.BatchNorm2d(256),
33 |         )
34 |         self.anchor_num = config.anchor_num
35 |         self.input_size = config.detection_img_size
36 |         self.score_displacement = int((self.input_size - config.template_img_size) / config.total_stride)
37 |         self.conv_cls1 = nn.Conv2d(256, 256 * 2 * self.anchor_num, kernel_size=3, stride=1, padding=0)
38 |         self.conv_r1 = nn.Conv2d(256, 256 * 4 * self.anchor_num, kernel_size=3, stride=1, padding=0)
39 | 
40 |         self.conv_cls2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=0)
41 |         self.conv_r2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=0)
42 |         self.regress_adjust = nn.Conv2d(4 * self.anchor_num, 4 * self.anchor_num, 1)
43 | 
44 |     def init_weights(self):
45 |         for m in self.modules():
46 |             if isinstance(m, nn.Conv2d):
47 |                 nn.init.normal_(m.weight.data, std= 0.0005)
48 |                 nn.init.normal_(m.bias.data, std= 0.0005)
49 |             elif isinstance(m, nn.BatchNorm2d):
50 |                 m.weight.data.fill_(1)
51 |                 m.bias.data.zero_()
52 | 
53 |     def forward(self, template, detection):
54 |         N = template.size(0)
55 |         template_feature = self.featureExtract(template)
56 |         detection_feature = self.featureExtract(detection)
57 | 
58 |         kernel_score = self.conv_cls1(template_feature).view(N, 2 * self.anchor_num, 256, 4, 4)
59 |         kernel_regression = self.conv_r1(template_feature).view(N, 4 * self.anchor_num, 256, 4, 4)
60 |         conv_score = self.conv_cls2(detection_feature)
61 |         conv_regression = self.conv_r2(detection_feature)
62 | 
63 |         conv_scores = conv_score.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
64 |         score_filters = kernel_score.reshape(-1, 256, 4, 4)
65 |         pred_score = F.conv2d(conv_scores, score_filters, groups=N).reshape(N, 10, self.score_displacement + 1,
66 |                                                                             self.score_displacement + 1)
67 | 
68 |         conv_reg = conv_regression.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
69 |         reg_filters = kernel_regression.reshape(-1, 256, 4, 4)
70 |         pred_regression = self.regress_adjust(
71 |             F.conv2d(conv_reg, reg_filters, groups=N).reshape(N, 20, self.score_displacement + 1,
72 |                                                               self.score_displacement + 1))
73 |         return pred_score, pred_regression
74 | 
75 |     def track_init(self, template):
76 |         N = template.size(0)
77 |         template_feature = self.featureExtract(template)
78 | 
79 |         kernel_score = self.conv_cls1(template_feature).view(N, 2 * self.anchor_num, 256, 4, 4)
80 |         kernel_regression = self.conv_r1(template_feature).view(N, 4 * self.anchor_num, 256, 4, 4)
81 |         self.score_filters = kernel_score.reshape(-1, 256, 4, 4)
82 |         self.reg_filters = kernel_regression.reshape(-1, 256, 4, 4)
83 | 
84 |     def track(self, detection):
85 |         N = detection.size(0)
86 |         detection_feature = self.featureExtract(detection)
87 | 
88 |         conv_score = self.conv_cls2(detection_feature)
89 |         conv_regression = self.conv_r2(detection_feature)
90 | 
91 |         conv_scores = conv_score.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
92 |         pred_score = F.conv2d(conv_scores, self.score_filters, groups=N).reshape(N, 10, self.score_displacement + 1,
93 |                                                                                  self.score_displacement + 1)
94 |         conv_reg = conv_regression.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
95 |         pred_regression = self.regress_adjust(
96 |             F.conv2d(conv_reg, self.reg_filters, groups=N).reshape(N, 20, self.score_displacement + 1,
97 |                                                                    self.score_displacement + 1))
98 |         return pred_score, pred_regression
99 | 


--------------------------------------------------------------------------------
/tracking/network.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import torch.nn.functional as F
  4 | import torchvision.transforms as transforms
  5 | from custom_transforms import ToTensor
  6 | 
  7 | from torchvision.models import alexnet
  8 | from torch.autograd import Variable
  9 | from torch import nn
 10 | 
 11 | 
 12 | from config import config
 13 | 
 14 | 
 15 | class SiameseAlexNet(nn.Module):
 16 |     def __init__(self, ):
 17 |         super(SiameseAlexNet, self).__init__()
 18 |         self.featureExtract = nn.Sequential(
 19 |             nn.Conv2d(3, 96, 11, stride=2),
 20 |             nn.BatchNorm2d(96),
 21 |             nn.MaxPool2d(3, stride=2),
 22 |             nn.ReLU(inplace=True),
 23 |             nn.Conv2d(96, 256, 5),
 24 |             nn.BatchNorm2d(256),
 25 |             nn.MaxPool2d(3, stride=2),
 26 |             nn.ReLU(inplace=True),
 27 |             nn.Conv2d(256, 384, 3),
 28 |             nn.BatchNorm2d(384),
 29 |             nn.ReLU(inplace=True),
 30 |             nn.Conv2d(384, 384, 3),
 31 |             nn.BatchNorm2d(384),
 32 |             nn.ReLU(inplace=True),
 33 |             nn.Conv2d(384, 256, 3),
 34 |             nn.BatchNorm2d(256),
 35 |         )
 36 |         self.anchor_num = config.anchor_num
 37 |         self.input_size = config.detection_img_size
 38 |         self.score_displacement = int((self.input_size - config.template_img_size) / config.total_stride)
 39 |         self.conv_cls1 = nn.Conv2d(256, 256 * 2 * self.anchor_num, kernel_size=3, stride=1, padding=0)
 40 |         self.conv_r1 = nn.Conv2d(256, 256 * 4 * self.anchor_num, kernel_size=3, stride=1, padding=0)
 41 | 
 42 |         self.conv_cls2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=0)
 43 |         self.conv_r2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=0)
 44 |         self.regress_adjust = nn.Conv2d(4 * self.anchor_num, 4 * self.anchor_num, 1)
 45 | 
 46 |     def init_weights(self):
 47 |         for m in self.modules():
 48 |             if isinstance(m, nn.Conv2d):
 49 |                 # nn.init.kaiming_normal_(m.weight.data, mode='fan_out', nonlinearity='relu')
 50 |                 nn.init.normal_(m.weight.data, std=0.0005)
 51 |                 nn.init.normal_(m.bias.data, std=0.0005)
 52 |             elif isinstance(m, nn.BatchNorm2d):
 53 |                 m.weight.data.fill_(1)
 54 |                 m.bias.data.zero_()
 55 | 
 56 |     def forward(self, template, detection):
 57 |         N = template.size(0)
 58 |         template_feature = self.featureExtract(template)
 59 |         detection_feature = self.featureExtract(detection)
 60 | 
 61 |         kernel_score = self.conv_cls1(template_feature).view(N, 2 * self.anchor_num, 256, 4, 4)
 62 |         kernel_regression = self.conv_r1(template_feature).view(N, 4 * self.anchor_num, 256, 4, 4)
 63 |         conv_score = self.conv_cls2(detection_feature)
 64 |         conv_regression = self.conv_r2(detection_feature)
 65 | 
 66 |         conv_scores = conv_score.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
 67 |         score_filters = kernel_score.reshape(-1, 256, 4, 4)
 68 |         pred_score = F.conv2d(conv_scores, score_filters, groups=N).reshape(N, 10, self.score_displacement + 1,
 69 |                                                                             self.score_displacement + 1)
 70 | 
 71 |         conv_reg = conv_regression.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
 72 |         reg_filters = kernel_regression.reshape(-1, 256, 4, 4)
 73 |         pred_regression = self.regress_adjust(
 74 |             F.conv2d(conv_reg, reg_filters, groups=N).reshape(N, 20, self.score_displacement + 1,
 75 |                                                               self.score_displacement + 1))
 76 |         return pred_score, pred_regression
 77 | 
 78 |     def track_init(self, template):
 79 |         N = template.size(0)
 80 |         template_feature = self.featureExtract(template)
 81 | 
 82 |         kernel_score = self.conv_cls1(template_feature).view(N, 2 * self.anchor_num, 256, 4, 4)
 83 |         kernel_regression = self.conv_r1(template_feature).view(N, 4 * self.anchor_num, 256, 4, 4)
 84 |         self.score_filters = kernel_score.reshape(-1, 256, 4, 4)
 85 |         self.reg_filters = kernel_regression.reshape(-1, 256, 4, 4)
 86 | 
 87 |     def track(self, detection):
 88 |         N = detection.size(0)
 89 |         detection_feature = self.featureExtract(detection)
 90 | 
 91 |         conv_score = self.conv_cls2(detection_feature)
 92 |         conv_regression = self.conv_r2(detection_feature)
 93 | 
 94 |         conv_scores = conv_score.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
 95 |         pred_score = F.conv2d(conv_scores, self.score_filters, groups=N).reshape(N, 10, self.score_displacement + 1,
 96 |                                                                                  self.score_displacement + 1)
 97 |         conv_reg = conv_regression.reshape(1, -1, self.score_displacement + 4, self.score_displacement + 4)
 98 |         pred_regression = self.regress_adjust(
 99 |             F.conv2d(conv_reg, self.reg_filters, groups=N).reshape(N, 20, self.score_displacement + 1,
100 |                                                                    self.score_displacement + 1))
101 |         return pred_score, pred_regression
102 | 


--------------------------------------------------------------------------------
/tracking/data_loader.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | import cv2
  5 | import torch
  6 | import random
  7 | import numpy as np
  8 | from torchvision import datasets, transforms, utils
  9 | 
 10 | class TrackerDataLoader(object):
 11 | 
 12 |     def get_instance_image(self, img, bbox, size_z, size_x, context_amount, img_mean=None):
 13 | 
 14 |         cx, cy, w, h = bbox  # float type
 15 |         wc_z = w + 0.5 * (w + h)
 16 |         hc_z = h + 0.5 * (w + h)
 17 |         s_z = np.sqrt(wc_z * hc_z)  # the width of the crop box
 18 |         scale_z = size_z / s_z
 19 | 
 20 |         s_x = s_z * size_x / size_z
 21 |         instance_img, scale_x = self.crop_and_pad(img, cx, cy, size_x, s_x, img_mean)
 22 |         w_x = w * scale_x
 23 |         h_x = h * scale_x
 24 |         # point_1 = (size_x + 1) / 2 - w_x / 2, (size_x + 1) / 2 - h_x / 2
 25 |         # point_2 = (size_x + 1) / 2 + w_x / 2, (size_x + 1) / 2 + h_x / 2
 26 |         # frame = cv2.rectangle(instance_img, (int(point_1[0]),int(point_1[1])), (int(point_2[0]),int(point_2[1])), (0, 255, 0), 2)
 27 |         # cv2.imwrite('1.jpg', frame)
 28 |         return instance_img, w_x, h_x, scale_x
 29 | 
 30 |     def get_exemplar_image(self, img, bbox, size_z, context_amount, img_mean=None):
 31 |         cx, cy, w, h = bbox
 32 |         wc_z = w + context_amount * (w + h)
 33 |         hc_z = h + context_amount * (w + h)
 34 |         s_z = np.sqrt(wc_z * hc_z)
 35 |         scale_z = size_z / s_z
 36 |         exemplar_img, _ = self.crop_and_pad(img, cx, cy, size_z, s_z, img_mean)
 37 |         return exemplar_img, scale_z, s_z
 38 | 
 39 |     def crop_and_pad(self, img, cx, cy, model_sz, original_sz, img_mean=None):
 40 | 
 41 |         def round_up(value):
 42 |             # 替换内置round函数,实现保留2位小数的精确四舍五入
 43 |             return round(value + 1e-6 + 1000) - 1000
 44 |         im_h, im_w, _ = img.shape
 45 | 
 46 |         xmin = cx - (original_sz - 1) / 2
 47 |         xmax = xmin + original_sz - 1
 48 |         ymin = cy - (original_sz - 1) / 2
 49 |         ymax = ymin + original_sz - 1
 50 | 
 51 |         left = int(round_up(max(0., -xmin)))
 52 |         top = int(round_up(max(0., -ymin)))
 53 |         right = int(round_up(max(0., xmax - im_w + 1)))
 54 |         bottom = int(round_up(max(0., ymax - im_h + 1)))
 55 | 
 56 |         xmin = int(round_up(xmin + left))
 57 |         xmax = int(round_up(xmax + left))
 58 |         ymin = int(round_up(ymin + top))
 59 |         ymax = int(round_up(ymax + top))
 60 |         r, c, k = img.shape
 61 |         if any([top, bottom, left, right]):
 62 |             te_im = np.zeros((r + top + bottom, c + left + right, k), np.uint8)  # 0 is better than 1 initialization
 63 |             te_im[top:top + r, left:left + c, :] = img
 64 |             if top:
 65 |                 te_im[0:top, left:left + c, :] = img_mean
 66 |             if bottom:
 67 |                 te_im[r + top:, left:left + c, :] = img_mean
 68 |             if left:
 69 |                 te_im[:, 0:left, :] = img_mean
 70 |             if right:
 71 |                 te_im[:, c + left:, :] = img_mean
 72 |             im_patch_original = te_im[int(ymin):int(ymax + 1), int(xmin):int(xmax + 1), :]
 73 |         else:
 74 |             im_patch_original = img[int(ymin):int(ymax + 1), int(xmin):int(xmax + 1), :]
 75 |         if not np.array_equal(model_sz, original_sz):
 76 |             im_patch = cv2.resize(im_patch_original, (model_sz, model_sz))  # zzp: use cv to get a better speed
 77 |         else:
 78 |             im_patch = im_patch_original
 79 |         scale = model_sz / im_patch_original.shape[0]
 80 |         return im_patch, scale
 81 | 
 82 |     def box_transform_inv(self, anchors, offset):
 83 |         anchor_xctr = anchors[:, :1]
 84 |         anchor_yctr = anchors[:, 1:2]
 85 |         anchor_w = anchors[:, 2:3]
 86 |         anchor_h = anchors[:, 3:]
 87 |         offset_x, offset_y, offset_w, offset_h = offset[:, :1], offset[:, 1:2], offset[:, 2:3], offset[:, 3:],
 88 | 
 89 |         box_cx = anchor_w * offset_x + anchor_xctr
 90 |         box_cy = anchor_h * offset_y + anchor_yctr
 91 |         box_w = anchor_w * np.exp(offset_w)
 92 |         box_h = anchor_h * np.exp(offset_h)
 93 |         box = np.hstack([box_cx, box_cy, box_w, box_h])
 94 |         return box
 95 | 
 96 |     def generate_anchors(self, total_stride, base_size, scales, ratios, score_size):
 97 |         anchor_num = len(ratios) * len(scales)
 98 |         anchor = np.zeros((anchor_num, 4), dtype=np.float32)
 99 |         size = base_size * base_size
100 |         count = 0
101 |         for ratio in ratios:
102 |             # ws = int(np.sqrt(size * 1.0 / ratio))
103 |             ws = int(np.sqrt(size / ratio))
104 |             hs = int(ws * ratio)
105 |             for scale in scales:
106 |                 wws = ws * scale
107 |                 hhs = hs * scale
108 |                 anchor[count, 0] = 0
109 |                 anchor[count, 1] = 0
110 |                 anchor[count, 2] = wws
111 |                 anchor[count, 3] = hhs
112 |                 count += 1
113 | 
114 |         anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
115 |         # (5,4x225) to (225x5,4)
116 |         ori = - (score_size // 2) * total_stride
117 |         # the left displacement
118 |         xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
119 |                              [ori + total_stride * dy for dy in range(score_size)])
120 |         # (15,15)
121 |         xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
122 |                  np.tile(yy.flatten(), (anchor_num, 1)).flatten()
123 |         # (15,15) to (225,1) to (5,225) to (225x5,1)
124 |         anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
125 |         return anchor
126 | 


--------------------------------------------------------------------------------
/tracking/util.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import torch
  3 | import numpy as np
  4 | 
  5 | class Util(object):
  6 | 
  7 |     def generate_anchors(self, total_stride, base_size, scales, ratios, score_size):
  8 |         anchor_num = len(ratios) * len(scales)
  9 |         anchor = np.zeros((anchor_num, 4), dtype=np.float32)
 10 |         size = base_size * base_size
 11 |         count = 0
 12 |         for ratio in ratios:
 13 |             # ws = int(np.sqrt(size * 1.0 / ratio))
 14 |             ws = int(np.sqrt(size / ratio))
 15 |             hs = int(ws * ratio)
 16 |             for scale in scales:
 17 |                 wws = ws * scale
 18 |                 hhs = hs * scale
 19 |                 anchor[count, 0] = 0
 20 |                 anchor[count, 1] = 0
 21 |                 anchor[count, 2] = wws
 22 |                 anchor[count, 3] = hhs
 23 |                 count += 1
 24 | 
 25 |         anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
 26 |         # (5,4x225) to (225x5,4)
 27 |         ori = - (score_size // 2) * total_stride
 28 |         # the left displacement
 29 |         xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
 30 |                              [ori + total_stride * dy for dy in range(score_size)])
 31 |         # (15,15)
 32 |         xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
 33 |                  np.tile(yy.flatten(), (anchor_num, 1)).flatten()
 34 |         # (15,15) to (225,1) to (5,225) to (225x5,1)
 35 |         anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
 36 |         return anchor
 37 | 
 38 |     def get_subwindow_tracking(self, im, pos, model_sz, original_sz, avg_chans, out_mode='torch'):
 39 | 
 40 |         # im (720, 1280, 3)
 41 |         # pos [406.  377.5]
 42 |         # model_sz 127
 43 |         # original_sz 768.0
 44 |         # avg_chans [115.18894748 111.79296549 109.10407878]
 45 | 
 46 |         if isinstance(pos, float):
 47 |             pos = [pos, pos]
 48 |         sz = original_sz # original_sz 768.0
 49 |         im_sz = im.shape # im (720, 1280, 3)
 50 |         c = (original_sz+1) / 2 # 384.5
 51 |         context_xmin = round(pos[0] - c)  # floor(pos(2) - sz(2) / 2);
 52 |         context_xmax = context_xmin + sz - 1
 53 |         context_ymin = round(pos[1] - c)  # floor(pos(1) - sz(1) / 2);
 54 |         context_ymax = context_ymin + sz - 1
 55 |         left_pad = int(max(0., -context_xmin))
 56 |         top_pad = int(max(0., -context_ymin))
 57 |         right_pad = int(max(0., context_xmax - im_sz[1] + 1))
 58 |         bottom_pad = int(max(0., context_ymax - im_sz[0] + 1))
 59 | 
 60 |         context_xmin = context_xmin + left_pad
 61 |         context_xmax = context_xmax + left_pad
 62 |         context_ymin = context_ymin + top_pad
 63 |         context_ymax = context_ymax + top_pad
 64 | 
 65 |         # zzp: a more easy speed version
 66 |         r, c, k = im.shape
 67 |         if any([top_pad, bottom_pad, left_pad, right_pad]):
 68 |             te_im = np.zeros((r + top_pad + bottom_pad, c + left_pad + right_pad, k), np.uint8)  # 0 is better than 1 initialization
 69 |             te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im
 70 |             if top_pad:
 71 |                 te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans
 72 |             if bottom_pad:
 73 |                 te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans
 74 |             if left_pad:
 75 |                 te_im[:, 0:left_pad, :] = avg_chans
 76 |             if right_pad:
 77 |                 te_im[:, c + left_pad:, :] = avg_chans
 78 |             im_patch_original = te_im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :]
 79 |         else:
 80 |             im_patch_original = im[int(context_ymin):int(context_ymax + 1), int(context_xmin):int(context_xmax + 1), :]
 81 | 
 82 |         if not np.array_equal(model_sz, original_sz):
 83 |             im_patch = cv2.resize(im_patch_original, (model_sz, model_sz))
 84 |         else:
 85 |             im_patch = im_patch_original
 86 | 
 87 |         cv2.imshow('foto', im_patch)
 88 | 
 89 |         def im_to_torch(img):
 90 | 
 91 |             def to_torch(ndarray):
 92 |                 if type(ndarray).__module__ == 'numpy':
 93 |                     return torch.from_numpy(ndarray)
 94 |                 elif not torch.is_tensor(ndarray):
 95 |                     raise ValueError("Cannot convert {} to torch tensor".format(type(ndarray)))
 96 |                 return ndarray
 97 |             img = np.transpose(img, (2, 0, 1))  # C*H*W
 98 |             img = to_torch(img).float()
 99 |             return img
100 | 
101 |         return im_to_torch(im_patch) if out_mode in 'torch' else im_patch
102 | 
103 |     def cxy_wh_2_rect(self, pos, sz):
104 |         return np.array([pos[0]-sz[0]/2, pos[1]-sz[1]/2, sz[0], sz[1]])  # 0-index
105 | 
106 |     def x1y1_wh_to_xy_wh(self, rect):
107 |         return np.array([rect[0]+rect[2]/2, rect[1]+rect[3]/2]), np.array([rect[2], rect[3]])  # 0-index
108 | 
109 |     def box_transform_inv(self, anchors, offset):
110 |         anchor_xctr = anchors[:, :1]
111 |         anchor_yctr = anchors[:, 1:2]
112 |         anchor_w = anchors[:, 2:3]
113 |         anchor_h = anchors[:, 3:]
114 |         offset_x, offset_y, offset_w, offset_h = offset[:, :1], offset[:, 1:2], offset[:, 2:3], offset[:, 3:],
115 | 
116 |         box_cx = anchor_w * offset_x + anchor_xctr
117 |         box_cy = anchor_h * offset_y + anchor_yctr
118 |         box_w = anchor_w * np.exp(offset_w)
119 |         box_h = anchor_h * np.exp(offset_h)
120 |         box = np.hstack([box_cx, box_cy, box_w, box_h])
121 |         return box
122 | 
123 |     def change(self, r):
124 |         return np.maximum(r, 1. / r)
125 | 
126 |     def sz(self, w, h):
127 |         pad = (w + h) * 0.5
128 |         sz2 = (w + pad) * (h + pad)
129 |         return np.sqrt(sz2)
130 | 
131 |     def sz_wh(self, wh):
132 |         pad = (wh[0] + wh[1]) * 0.5
133 |         sz2 = (wh[0] + pad) * (wh[1] + pad)
134 |         return np.sqrt(sz2)
135 | 
136 | util = Util()
137 | 


--------------------------------------------------------------------------------
/train/custom_transforms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | 
  5 | 
  6 | class RandomStretch(object):
  7 |     def __init__(self, max_stretch=0.05):
  8 |         """Random resize image according to the stretch
  9 |         Args:
 10 |             max_stretch(float): 0 to 1 value   
 11 |         """
 12 |         self.max_stretch = max_stretch
 13 | 
 14 |     def __call__(self, sample):
 15 |         """
 16 |         Args:
 17 |             sample(numpy array): 3 or 1 dim image
 18 |         """
 19 |         scale_h = 1.0 + np.random.uniform(-self.max_stretch, self.max_stretch)
 20 |         scale_w = 1.0 + np.random.uniform(-self.max_stretch, self.max_stretch)
 21 |         h, w = sample.shape[:2]
 22 |         shape = int(w * scale_w), int(h * scale_h)
 23 |         return cv2.resize(sample, shape, cv2.INTER_LINEAR)
 24 | 
 25 | 
 26 | class CenterCrop(object):
 27 |     def __init__(self, size):
 28 |         """Crop the image in the center according the given size 
 29 |             if size greater than image size, zero padding will adpot
 30 |         Args:
 31 |             size (tuple): desired size
 32 |         """
 33 |         self.size = size
 34 | 
 35 |     def __call__(self, sample):
 36 |         """
 37 |         Args:
 38 |             sample(numpy array): 3 or 1 dim image
 39 |         """
 40 |         shape = sample.shape[:2]
 41 |         cy, cx = (shape[0] - 1) // 2, (shape[1] - 1) // 2
 42 |         ymin, xmin = cy - self.size[0] // 2, cx - self.size[1] // 2
 43 |         ymax, xmax = cy + self.size[0] // 2 + self.size[0] % 2, \
 44 |                      cx + self.size[1] // 2 + self.size[1] % 2
 45 |         left = right = top = bottom = 0
 46 |         im_h, im_w = shape
 47 |         if xmin < 0:
 48 |             left = int(abs(xmin))
 49 |         if xmax > im_w:
 50 |             right = int(xmax - im_w)
 51 |         if ymin < 0:
 52 |             top = int(abs(ymin))
 53 |         if ymax > im_h:
 54 |             bottom = int(ymax - im_h)
 55 | 
 56 |         xmin = int(max(0, xmin))
 57 |         xmax = int(min(im_w, xmax))
 58 |         ymin = int(max(0, ymin))
 59 |         ymax = int(min(im_h, ymax))
 60 |         im_patch = sample[ymin:ymax, xmin:xmax]
 61 |         if left != 0 or right != 0 or top != 0 or bottom != 0:
 62 |             im_patch = cv2.copyMakeBorder(im_patch, top, bottom, left, right,
 63 |                                           cv2.BORDER_CONSTANT, value=0)
 64 |         return im_patch
 65 | 
 66 | 
 67 | class RandomCrop(object):
 68 |     def __init__(self, size, max_translate):
 69 |         """Crop the image in the center according the given size 
 70 |             if size greater than image size, zero padding will adpot
 71 |         Args:
 72 |             size (tuple): desired size
 73 |             max_translate: max translate of random shift
 74 |         """
 75 |         self.size = size
 76 |         self.max_translate = max_translate
 77 | 
 78 |     def __call__(self, sample):
 79 |         """
 80 |         Args:
 81 |             sample(numpy array): 3 or 1 dim image
 82 |         """
 83 |         shape = sample.shape[:2]
 84 |         cy_o = (shape[0] - 1) // 2
 85 |         cx_o = (shape[1] - 1) // 2
 86 |         cy = np.random.randint(cy_o - self.max_translate,
 87 |                                cy_o + self.max_translate + 1)
 88 |         cx = np.random.randint(cx_o - self.max_translate,
 89 |                                cx_o + self.max_translate + 1)
 90 |         assert abs(cy - cy_o) <= self.max_translate and \
 91 |                abs(cx - cx_o) <= self.max_translate
 92 |         ymin = cy - self.size[0] // 2
 93 |         xmin = cx - self.size[1] // 2
 94 |         ymax = cy + self.size[0] // 2 + self.size[0] % 2
 95 |         xmax = cx + self.size[1] // 2 + self.size[1] % 2
 96 |         left = right = top = bottom = 0
 97 |         im_h, im_w = shape
 98 |         if xmin < 0:
 99 |             left = int(abs(xmin))
100 |         if xmax > im_w:
101 |             right = int(xmax - im_w)
102 |         if ymin < 0:
103 |             top = int(abs(ymin))
104 |         if ymax > im_h:
105 |             bottom = int(ymax - im_h)
106 | 
107 |         xmin = int(max(0, xmin))
108 |         xmax = int(min(im_w, xmax))
109 |         ymin = int(max(0, ymin))
110 |         ymax = int(min(im_h, ymax))
111 |         im_patch = sample[ymin:ymax, xmin:xmax]
112 |         if left != 0 or right != 0 or top != 0 or bottom != 0:
113 |             im_patch = cv2.copyMakeBorder(im_patch, top, bottom, left, right,
114 |                                           cv2.BORDER_CONSTANT, value=0)
115 |         return im_patch
116 | 
117 | 
118 | class ColorAug(object):
119 |     def __init__(self, type_in='z'):
120 |         if type_in == 'z':
121 |             rgb_var = np.array([[3.2586416e+03, 2.8992207e+03, 2.6392236e+03],
122 |                                 [2.8992207e+03, 3.0958174e+03, 2.9321748e+03],
123 |                                 [2.6392236e+03, 2.9321748e+03, 3.4533721e+03]])
124 |         if type_in == 'x':
125 |             rgb_var = np.array([[2.4847285e+03, 2.1796064e+03, 1.9766885e+03],
126 |                                 [2.1796064e+03, 2.3441289e+03, 2.2357402e+03],
127 |                                 [1.9766885e+03, 2.2357402e+03, 2.7369697e+03]])
128 |         self.v, _ = np.linalg.eig(rgb_var)
129 |         self.v = np.sqrt(self.v)
130 | 
131 |     def __call__(self, sample):
132 |         return sample + 0.1 * self.v * np.random.randn(3)
133 | 
134 | 
135 | class RandomBlur(object):
136 |     def __init__(self, ratio):
137 |         self.ratio = ratio
138 | 
139 |     def __call__(self, sample):
140 |         if np.random.rand(1) < self.ratio:
141 |             # random kernel size
142 |             kernel_size = np.random.choice([3, 5, 7])
143 |             # random gaussian sigma
144 |             sigma = np.random.rand() * 5
145 |             return cv2.GaussianBlur(sample, (kernel_size, kernel_size), sigma)
146 |         else:
147 |             return sample
148 | 
149 | 
150 | class Normalize(object):
151 |     def __init__(self):
152 |         self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
153 |         self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
154 | 
155 |     def __call__(self, sample):
156 |         return (sample / 255. - self.mean) / self.std
157 | 
158 | 
159 | class ToTensor(object):
160 |     def __call__(self, sample):
161 |         sample = sample.transpose(2, 0, 1)
162 |         return torch.from_numpy(sample.astype(np.float32))
163 | 


--------------------------------------------------------------------------------
/tracking/custom_transforms.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import cv2
  4 | 
  5 | 
  6 | class RandomStretch(object):
  7 |     def __init__(self, max_stretch=0.05):
  8 |         """Random resize image according to the stretch
  9 |         Args:
 10 |             max_stretch(float): 0 to 1 value   
 11 |         """
 12 |         self.max_stretch = max_stretch
 13 | 
 14 |     def __call__(self, sample):
 15 |         """
 16 |         Args:
 17 |             sample(numpy array): 3 or 1 dim image
 18 |         """
 19 |         scale_h = 1.0 + np.random.uniform(-self.max_stretch, self.max_stretch)
 20 |         scale_w = 1.0 + np.random.uniform(-self.max_stretch, self.max_stretch)
 21 |         h, w = sample.shape[:2]
 22 |         shape = int(w * scale_w), int(h * scale_h)
 23 |         return cv2.resize(sample, shape, cv2.INTER_LINEAR)
 24 | 
 25 | 
 26 | class CenterCrop(object):
 27 |     def __init__(self, size):
 28 |         """Crop the image in the center according the given size 
 29 |             if size greater than image size, zero padding will adpot
 30 |         Args:
 31 |             size (tuple): desired size
 32 |         """
 33 |         self.size = size
 34 | 
 35 |     def __call__(self, sample):
 36 |         """
 37 |         Args:
 38 |             sample(numpy array): 3 or 1 dim image
 39 |         """
 40 |         shape = sample.shape[:2]
 41 |         cy, cx = (shape[0] - 1) // 2, (shape[1] - 1) // 2
 42 |         ymin, xmin = cy - self.size[0] // 2, cx - self.size[1] // 2
 43 |         ymax, xmax = cy + self.size[0] // 2 + self.size[0] % 2, \
 44 |                      cx + self.size[1] // 2 + self.size[1] % 2
 45 |         left = right = top = bottom = 0
 46 |         im_h, im_w = shape
 47 |         if xmin < 0:
 48 |             left = int(abs(xmin))
 49 |         if xmax > im_w:
 50 |             right = int(xmax - im_w)
 51 |         if ymin < 0:
 52 |             top = int(abs(ymin))
 53 |         if ymax > im_h:
 54 |             bottom = int(ymax - im_h)
 55 | 
 56 |         xmin = int(max(0, xmin))
 57 |         xmax = int(min(im_w, xmax))
 58 |         ymin = int(max(0, ymin))
 59 |         ymax = int(min(im_h, ymax))
 60 |         im_patch = sample[ymin:ymax, xmin:xmax]
 61 |         if left != 0 or right != 0 or top != 0 or bottom != 0:
 62 |             im_patch = cv2.copyMakeBorder(im_patch, top, bottom, left, right,
 63 |                                           cv2.BORDER_CONSTANT, value=0)
 64 |         return im_patch
 65 | 
 66 | 
 67 | class RandomCrop(object):
 68 |     def __init__(self, size, max_translate):
 69 |         """Crop the image in the center according the given size 
 70 |             if size greater than image size, zero padding will adpot
 71 |         Args:
 72 |             size (tuple): desired size
 73 |             max_translate: max translate of random shift
 74 |         """
 75 |         self.size = size
 76 |         self.max_translate = max_translate
 77 | 
 78 |     def __call__(self, sample):
 79 |         """
 80 |         Args:
 81 |             sample(numpy array): 3 or 1 dim image
 82 |         """
 83 |         shape = sample.shape[:2]
 84 |         cy_o = (shape[0] - 1) // 2
 85 |         cx_o = (shape[1] - 1) // 2
 86 |         cy = np.random.randint(cy_o - self.max_translate,
 87 |                                cy_o + self.max_translate + 1)
 88 |         cx = np.random.randint(cx_o - self.max_translate,
 89 |                                cx_o + self.max_translate + 1)
 90 |         assert abs(cy - cy_o) <= self.max_translate and \
 91 |                abs(cx - cx_o) <= self.max_translate
 92 |         ymin = cy - self.size[0] // 2
 93 |         xmin = cx - self.size[1] // 2
 94 |         ymax = cy + self.size[0] // 2 + self.size[0] % 2
 95 |         xmax = cx + self.size[1] // 2 + self.size[1] % 2
 96 |         left = right = top = bottom = 0
 97 |         im_h, im_w = shape
 98 |         if xmin < 0:
 99 |             left = int(abs(xmin))
100 |         if xmax > im_w:
101 |             right = int(xmax - im_w)
102 |         if ymin < 0:
103 |             top = int(abs(ymin))
104 |         if ymax > im_h:
105 |             bottom = int(ymax - im_h)
106 | 
107 |         xmin = int(max(0, xmin))
108 |         xmax = int(min(im_w, xmax))
109 |         ymin = int(max(0, ymin))
110 |         ymax = int(min(im_h, ymax))
111 |         im_patch = sample[ymin:ymax, xmin:xmax]
112 |         if left != 0 or right != 0 or top != 0 or bottom != 0:
113 |             im_patch = cv2.copyMakeBorder(im_patch, top, bottom, left, right,
114 |                                           cv2.BORDER_CONSTANT, value=0)
115 |         return im_patch
116 | 
117 | 
118 | class ColorAug(object):
119 |     def __init__(self, type_in='z'):
120 |         if type_in == 'z':
121 |             rgb_var = np.array([[3.2586416e+03, 2.8992207e+03, 2.6392236e+03],
122 |                                 [2.8992207e+03, 3.0958174e+03, 2.9321748e+03],
123 |                                 [2.6392236e+03, 2.9321748e+03, 3.4533721e+03]])
124 |         if type_in == 'x':
125 |             rgb_var = np.array([[2.4847285e+03, 2.1796064e+03, 1.9766885e+03],
126 |                                 [2.1796064e+03, 2.3441289e+03, 2.2357402e+03],
127 |                                 [1.9766885e+03, 2.2357402e+03, 2.7369697e+03]])
128 |         self.v, _ = np.linalg.eig(rgb_var)
129 |         self.v = np.sqrt(self.v)
130 | 
131 |     def __call__(self, sample):
132 |         return sample + 0.1 * self.v * np.random.randn(3)
133 | 
134 | 
135 | class RandomBlur(object):
136 |     def __init__(self, ratio):
137 |         self.ratio = ratio
138 | 
139 |     def __call__(self, sample):
140 |         if np.random.rand(1) < self.ratio:
141 |             # random kernel size
142 |             kernel_size = np.random.choice([3, 5, 7])
143 |             # random gaussian sigma
144 |             sigma = np.random.rand() * 5
145 |             return cv2.GaussianBlur(sample, (kernel_size, kernel_size), sigma)
146 |         else:
147 |             return sample
148 | 
149 | 
150 | class Normalize(object):
151 |     def __init__(self):
152 |         self.mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
153 |         self.std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
154 | 
155 |     def __call__(self, sample):
156 |         return (sample / 255. - self.mean) / self.std
157 | 
158 | 
159 | class ToTensor(object):
160 |     def __call__(self, sample):
161 |         sample = sample.transpose(2, 0, 1)
162 |         return torch.from_numpy(sample.astype(np.float32))
163 | 


--------------------------------------------------------------------------------
/train/net.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import os
  4 | import cv2
  5 | import torch
  6 | import random
  7 | import numpy as np
  8 | import torch.nn as nn
  9 | from util import util
 10 | import matplotlib.pyplot as plt
 11 | import torch.nn.functional as F
 12 | from config import config
 13 | from got10k.trackers import Tracker
 14 | from network import SiameseAlexNet
 15 | from loss import rpn_smoothL1, rpn_cross_entropy_balance
 16 | 
 17 | class TrackerSiamRPN(Tracker):
 18 | 
 19 |     def __init__(self, net_path=None, **kargs):
 20 |         super(TrackerSiamRPN, self).__init__(
 21 |             name='SiamRPN', is_deterministic=True)
 22 | 
 23 |         '''setup GPU device if available'''
 24 |         self.cuda   = torch.cuda.is_available()
 25 |         self.device = torch.device('cuda:0' if self.cuda else 'cpu')
 26 | 
 27 |         '''setup model'''
 28 |         self.net = SiameseAlexNet()
 29 |         #self.net.init_weights()
 30 | 
 31 |         if net_path is not None:
 32 |             self.net.load_state_dict(torch.load(
 33 |                 net_path, map_location = lambda storage, loc: storage ))
 34 |         if self.cuda:
 35 |             self.net = self.net.to(self.device)
 36 | 
 37 |         '''setup optimizer'''
 38 |         self.optimizer   = torch.optim.SGD(
 39 |             self.net.parameters(),
 40 |             lr           = config.lr,
 41 |             momentum     = config.momentum,
 42 |             weight_decay = config.weight_decay)
 43 | 
 44 |     def step(self, epoch, dataset, anchors, i = 0,  train=True):
 45 | 
 46 |         if train:
 47 |             self.net.train()
 48 |         else:
 49 |             self.net.eval()
 50 | 
 51 |         template, detection, regression_target, conf_target = dataset
 52 | 
 53 |         if self.cuda:
 54 |             template, detection = template.cuda(), detection.cuda()
 55 |             regression_target, conf_target = regression_target.cuda(), conf_target.cuda()
 56 | 
 57 |         pred_score, pred_regression = self.net(template, detection)
 58 | 
 59 |         pred_conf   = pred_score.reshape(-1, 2, config.size).permute(0, 2, 1)
 60 | 
 61 |         pred_offset = pred_regression.reshape(-1, 4, config.size).permute(0, 2, 1)
 62 | 
 63 |         cls_loss = rpn_cross_entropy_balance(   pred_conf,
 64 |                                                 conf_target,
 65 |                                                 config.num_pos,
 66 |                                                 config.num_neg,
 67 |                                                 anchors,
 68 |                                                 ohem_pos=config.ohem_pos,
 69 |                                                 ohem_neg=config.ohem_neg)
 70 | 
 71 |         reg_loss = rpn_smoothL1(pred_offset,
 72 |                                 regression_target,
 73 |                                 conf_target,
 74 |                                 config.num_pos,
 75 |                                 ohem=config.ohem_reg)
 76 | 
 77 |         loss = cls_loss + config.lamb * reg_loss
 78 | 
 79 |         '''anchors_show = anchors
 80 |         exem_img = template[0].cpu().numpy().transpose(1, 2, 0)  # (127, 127, 3)
 81 |         #cv2.imwrite('exem_img.png', exem_img)
 82 | 
 83 |         inst_img = detection[0].cpu().numpy().transpose(1, 2, 0) # (255, 255, 3)
 84 |         #cv2.imwrite('inst_img.png', inst_img)
 85 | 
 86 | 
 87 | 
 88 |         topk = 1
 89 |         cls_pred = F.softmax(pred_conf, dim=2)[0, :, 1]
 90 | 
 91 |         topk_box = util.get_topk_box(cls_pred, pred_offset[0], anchors_show, topk=topk)
 92 |         img_box = util.add_box_img(inst_img, topk_box, color=(0, 0, 255))
 93 | 
 94 |         cv2.imwrite('pred_inst.png', img_box)
 95 | 
 96 |         cls_pred = conf_target[0]
 97 |         gt_box = util.get_topk_box(cls_pred, regression_target[0], anchors_show)
 98 |         #print('gt_box', gt_box)
 99 |         img_box = util.add_box_img(img_box, gt_box, color=(255, 0, 0), x = 1, y = 1)
100 |         #print('gt_box', gt_box)
101 |         cv2.imwrite('pred_inst_gt.png', img_box)'''
102 | 
103 |         if train:
104 |             self.optimizer.zero_grad()
105 |             loss.backward()
106 |             torch.nn.utils.clip_grad_norm_(self.net.parameters(), config.clip)
107 |             self.optimizer.step()
108 | 
109 |         return cls_loss, reg_loss, loss
110 | 
111 |     '''save model'''
112 |     def save(self,model, exp_name_dir, epoch):
113 |         util.adjust_learning_rate(self.optimizer, config.gamma)
114 | 
115 |         model_save_dir_pth = '{}/model'.format(exp_name_dir)
116 |         if not os.path.exists(model_save_dir_pth):
117 |                 os.makedirs(model_save_dir_pth)
118 |         net_path = os.path.join(model_save_dir_pth, 'model_e%d.pth' % (epoch + 1))
119 |         torch.save(model.net.state_dict(), net_path)
120 | 
121 | '''class SiamRPN(nn.Module):
122 | 
123 |     def __init__(self, anchor_num = 5):
124 |         super(SiamRPN, self).__init__()
125 | 
126 |         self.anchor_num = anchor_num
127 |         self.feature = nn.Sequential(
128 |             # conv1
129 |             nn.Conv2d(3, 64, kernel_size = 11, stride = 2),
130 |             nn.BatchNorm2d(64),
131 |             nn.ReLU(inplace = True),
132 |             nn.MaxPool2d(kernel_size = 3, stride = 2),
133 |             # conv2
134 |             nn.Conv2d(64, 192, kernel_size = 5),
135 |             nn.BatchNorm2d(192),
136 |             nn.ReLU(inplace=True),
137 |             nn.MaxPool2d(kernel_size = 3, stride = 2),
138 |             # conv3
139 |             nn.Conv2d(192, 384, kernel_size = 3),
140 |             nn.BatchNorm2d(384),
141 |             nn.ReLU(inplace = True),
142 |             # conv4
143 |             nn.Conv2d(384, 256, kernel_size = 3),
144 |             nn.BatchNorm2d(256),
145 |             nn.ReLU(inplace = True),
146 |             # conv5
147 |             nn.Conv2d(256, 256, kernel_size = 3),
148 |             nn.BatchNorm2d(256))
149 | 
150 |         self.conv_reg_z = nn.Conv2d(256, 256 * 4 * self.anchor_num, 3, 1)
151 |         self.conv_reg_x = nn.Conv2d(256, 256, 3)
152 |         self.conv_cls_z = nn.Conv2d(256, 256 * 2 * anchor_num, 3, 1)
153 |         self.conv_cls_x = nn.Conv2d(256, 256, 3)
154 |         self.adjust_reg = nn.Conv2d(4 * anchor_num, 4 * anchor_num*1, 1)
155 | 
156 |     def forward(self, z, x):
157 |         return self.inference(x, *self.learn(z))
158 | 
159 |     def learn(self, z):
160 |         z = self.feature(z)
161 |         kernel_reg = self.conv_reg_z(z)
162 |         kernel_cls = self.conv_cls_z(z)
163 | 
164 |         k = kernel_reg.size()[-1]
165 |         kernel_reg = kernel_reg.view(4 * self.anchor_num, 256, k, k)
166 |         kernel_cls = kernel_cls.view(2 * self.anchor_num, 256, k, k)
167 | 
168 |         return kernel_reg, kernel_cls
169 | 
170 |     def inference(self, x, kernel_reg, kernel_cls):
171 |         x = self.feature(x)
172 |         x_reg = self.conv_reg_x(x)
173 |         x_cls = self.conv_cls_x(x)
174 | 
175 |         out_reg = self.adjust_reg(F.conv2d(x_reg, kernel_reg))
176 |         out_cls = F.conv2d(x_cls, kernel_cls)
177 | 
178 |         return out_reg, out_cls'''
179 | 


--------------------------------------------------------------------------------
/train/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import random
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from util import util
  7 | 
  8 | def rpn_cross_entropy_old(input, target):
  9 |     mask_ignore = target == -1
 10 |     mask_calcu = 1 - mask_ignore
 11 |     loss = F.cross_entropy(input=input[mask_calcu], target=target[mask_calcu])
 12 |     return loss
 13 | 
 14 | 
 15 | def rpn_cross_entropy_balance_old(input, target, num_pos, num_neg):
 16 |     cal_index_pos = np.array([], dtype=np.int64)
 17 |     cal_index_neg = np.array([], dtype=np.int64)
 18 |     for batch_id in range(target.shape[0]):
 19 |         print(target[batch_id])
 20 |         pos_index = np.random.choice(np.where(target[batch_id].cpu() == 1)[0], num_pos)
 21 |         neg_index = np.random.choice(np.where(target[batch_id].cpu() == 0)[0], num_neg)
 22 |         cal_index_pos = np.append(cal_index_pos, batch_id * target.shape[1] + pos_index)
 23 |         cal_index_neg = np.append(cal_index_neg, batch_id * target.shape[1] + neg_index)
 24 |     pos_loss = F.cross_entropy(input=input.reshape(-1, 2)[cal_index_pos], target=target.flatten()[cal_index_pos],
 25 |                                reduction='sum') / cal_index_pos.shape[0]
 26 |     neg_loss = F.cross_entropy(input=input.reshape(-1, 2)[cal_index_neg], target=target.flatten()[cal_index_neg],
 27 |                                reduction='sum') / cal_index_neg.shape[0]
 28 |     loss = (pos_loss + neg_loss) / 2
 29 |     # loss = F.cross_entropy(input=input.reshape(-1, 2)[cal_index], target=target.flatten()[cal_index])
 30 |     return loss
 31 | 
 32 | def rpn_smoothL1_old(input, target, label):
 33 |     pos_index = np.where(label.cpu() == 1)
 34 |     loss = F.smooth_l1_loss(input[pos_index], target[pos_index])
 35 |     return loss
 36 | 
 37 | def rpn_cross_entropy_balance(input, target, num_pos, num_neg, anchors, ohem_pos=None, ohem_neg=None):
 38 |     cuda = torch.cuda.is_available()
 39 |     loss_all = []
 40 |     for batch_id in range(target.shape[0]):
 41 |         min_pos = min(len(np.where(target[batch_id].cpu() == 1)[0]), num_pos)
 42 |         min_neg = int(min(len(np.where(target[batch_id].cpu() == 1)[0]) * num_neg / num_pos, num_neg))
 43 | 
 44 |         pos_index = np.where(target[batch_id].cpu() == 1)[0].tolist()
 45 |         neg_index = np.where(target[batch_id].cpu() == 0)[0].tolist()
 46 | 
 47 |         if ohem_pos:
 48 |             if len(pos_index) > 0:
 49 |                 pos_loss_bid = F.cross_entropy(input=input[batch_id][pos_index],
 50 |                                                target=target[batch_id][pos_index], reduction='none')
 51 |                 selected_pos_index = util.nms(anchors[pos_index], pos_loss_bid.cpu().detach().numpy(), min_pos)
 52 |                 pos_loss_bid_final = pos_loss_bid[selected_pos_index]
 53 |             else:
 54 |                 if cuda:
 55 |                     pos_loss_bid = torch.FloatTensor([0]).cuda()
 56 |                 else:
 57 |                     pos_loss_bid = torch.FloatTensor([0])
 58 |                 pos_loss_bid_final = pos_loss_bid
 59 |         else:
 60 |             pos_index_random = random.sample(pos_index, min_pos)
 61 |             if len(pos_index) > 0:
 62 |                 pos_loss_bid_final = F.cross_entropy(input=input[batch_id][pos_index_random],
 63 |                                                      target=target[batch_id][pos_index_random], reduction='none')
 64 |             else:
 65 |                 if cuda:
 66 |                     pos_loss_bid_final = torch.FloatTensor([0]).cuda()
 67 |                 else:
 68 |                     pos_loss_bid_final = torch.FloatTensor([0])
 69 | 
 70 |         if ohem_neg:
 71 |             if len(pos_index) > 0:
 72 |                 neg_loss_bid = F.cross_entropy(input=input[batch_id][neg_index],
 73 |                                                target=target[batch_id][neg_index], reduction='none')
 74 |                 selected_neg_index = util.nms(anchors[neg_index], neg_loss_bid.cpu().detach().numpy(), min_neg)
 75 |                 neg_loss_bid_final = neg_loss_bid[selected_neg_index]
 76 |             else:
 77 |                 neg_loss_bid = F.cross_entropy(input=input[batch_id][neg_index],
 78 |                                                target=target[batch_id][neg_index], reduction='none')
 79 |                 selected_neg_index = util.nms(anchors[neg_index], neg_loss_bid.cpu().detach().numpy(), num_neg)
 80 |                 neg_loss_bid_final = neg_loss_bid[selected_neg_index]
 81 |         else:
 82 |             if len(pos_index) > 0:
 83 |                 neg_index_random = random.sample(np.where(target[batch_id].cpu() == 0)[0].tolist(), min_neg)
 84 |                 #neg_index_random = np.where(target[batch_id].cpu() == 0)[0].tolist()
 85 |                 neg_loss_bid_final = F.cross_entropy(input=input[batch_id][neg_index_random],
 86 |                                                      target=target[batch_id][neg_index_random], reduction='none')
 87 |             else:
 88 |                 neg_index_random = random.sample(np.where(target[batch_id].cpu() == 0)[0].tolist(), num_neg)
 89 |                 neg_loss_bid_final = F.cross_entropy(input=input[batch_id][neg_index_random],
 90 |                                                      target=target[batch_id][neg_index_random], reduction='none')
 91 |         loss_bid = (pos_loss_bid_final.mean() + neg_loss_bid_final.mean()) / 2
 92 |         loss_all.append(loss_bid)
 93 |     final_loss = torch.stack(loss_all).mean()
 94 |     return final_loss
 95 | 
 96 | 
 97 | def rpn_smoothL1(input, target, label, num_pos=16, ohem=None):
 98 |     cuda = torch.cuda.is_available()
 99 |     loss_all = []
100 |     for batch_id in range(target.shape[0]):
101 |         min_pos = min(len(np.where(label[batch_id].cpu() == 1)[0]), num_pos)
102 |         if ohem:
103 |             pos_index = np.where(label[batch_id].cpu() == 1)[0]
104 |             if len(pos_index) > 0:
105 |                 loss_bid = F.smooth_l1_loss(input[batch_id][pos_index], target[batch_id][pos_index], reduction='none')
106 |                 sort_index = torch.argsort(loss_bid.mean(1))
107 |                 loss_bid_ohem = loss_bid[sort_index[-num_pos:]]
108 |             else:
109 |                 if cuda:
110 |                     loss_bid_ohem = torch.FloatTensor([0]).cuda()[0]
111 |                 else:
112 |                     loss_bid_ohem = torch.FloatTensor([0])[0]
113 | 
114 |             loss_all.append(loss_bid_ohem.mean())
115 |         else:
116 |             pos_index = np.where(label[batch_id].cpu() == 1)[0]
117 |             pos_index = random.sample(pos_index.tolist(), min_pos)
118 |             if len(pos_index) > 0:
119 |                 loss_bid = F.smooth_l1_loss(input[batch_id][pos_index], target[batch_id][pos_index])
120 |             else:
121 |                 if cuda:
122 |                     loss_bid = torch.FloatTensor([0]).cuda()[0]
123 |                 else:
124 |                     loss_bid = torch.FloatTensor([0])[0]
125 | 
126 |             loss_all.append(loss_bid.mean())
127 |     final_loss = torch.stack(loss_all).mean()
128 |     return final_loss
129 | 


--------------------------------------------------------------------------------
/train/train_siamrpn.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | import json
  5 | import torch
  6 | import random
  7 | import logging
  8 | import argparse
  9 | import numpy as np
 10 | from tqdm import tqdm
 11 | from torch.nn import init
 12 | from config import config
 13 | from net import TrackerSiamRPN
 14 | from data import TrainDataLoader
 15 | from torch.utils.data import DataLoader
 16 | from util import util, AverageMeter, SavePlot
 17 | from got10k.datasets import ImageNetVID, GOT10k
 18 | from torchvision import datasets, transforms, utils
 19 | from custom_transforms import Normalize, ToTensor, RandomStretch, \
 20 |     RandomCrop, CenterCrop, RandomBlur, ColorAug
 21 | 
 22 | torch.manual_seed(1234) # config.seed
 23 | 
 24 | 
 25 | parser = argparse.ArgumentParser(description='PyTorch SiameseRPN Training')
 26 | 
 27 | parser.add_argument('--train_path', default='/home/arbi/desktop/GOT-10k', metavar='DIR',help='path to dataset')
 28 | parser.add_argument('--experiment_name', default='default', metavar='DIR',help='path to weight')
 29 | parser.add_argument('--checkpoint_path', default=None, help='resume')
 30 | # /home/arbi/desktop/GOT-10k # /Users/arbi/Desktop # /home/arbi/desktop/ILSVRC
 31 | # 'experiments/default/model/model_e1.pth'
 32 | def main():
 33 | 
 34 |     '''parameter initialization'''
 35 |     args = parser.parse_args()
 36 |     exp_name_dir = util.experiment_name_dir(args.experiment_name)
 37 | 
 38 |     '''model on gpu'''
 39 |     model = TrackerSiamRPN()
 40 | 
 41 |     '''setup train data loader'''
 42 |     name = 'GOT-10k'
 43 |     assert name in ['VID', 'GOT-10k', 'All']
 44 |     if name == 'GOT-10k':
 45 |         root_dir = args.train_path
 46 |         seq_dataset = GOT10k(root_dir, subset='train')
 47 |     elif name == 'VID':
 48 |         root_dir = '/home/arbi/desktop/ILSVRC'
 49 |         seq_dataset = ImageNetVID(root_dir, subset=('train'))
 50 |     elif name == 'All':
 51 |         root_dir_vid = '/home/arbi/desktop/ILSVRC'
 52 |         seq_datasetVID = ImageNetVID(root_dir_vid, subset=('train'))
 53 |         root_dir_got = args.train_path
 54 |         seq_datasetGOT = GOT10k(root_dir_got, subset='train')
 55 |         seq_dataset = util.data_split(seq_datasetVID, seq_datasetGOT)
 56 |     print('seq_dataset', len(seq_dataset))
 57 | 
 58 |     train_z_transforms = transforms.Compose([
 59 |         ToTensor()
 60 |     ])
 61 |     train_x_transforms = transforms.Compose([
 62 |         ToTensor()
 63 |     ])
 64 | 
 65 |     train_data  = TrainDataLoader(seq_dataset, train_z_transforms, train_x_transforms, name)
 66 |     anchors = train_data.anchors
 67 |     train_loader = DataLoader(  dataset    = train_data,
 68 |                                 batch_size = config.train_batch_size,
 69 |                                 shuffle    = True,
 70 |                                 num_workers= config.train_num_workers,
 71 |                                 pin_memory = True)
 72 | 
 73 |     '''setup val data loader'''
 74 |     name = 'GOT-10k'
 75 |     assert name in ['VID', 'GOT-10k', 'All']
 76 |     if name == 'GOT-10k':
 77 |         root_dir = args.train_path
 78 |         seq_dataset_val = GOT10k(root_dir, subset='val')
 79 |     elif name == 'VID':
 80 |         root_dir = '/home/arbi/desktop/ILSVRC'
 81 |         seq_dataset_val = ImageNetVID(root_dir, subset=('val'))
 82 |     elif name == 'All':
 83 |         root_dir_vid = '/home/arbi/desktop/ILSVRC'
 84 |         seq_datasetVID = ImageNetVID(root_dir_vid, subset=('val'))
 85 |         root_dir_got = args.train_path
 86 |         seq_datasetGOT = GOT10k(root_dir_got, subset='val')
 87 |         seq_dataset_val = util.data_split(seq_datasetVID, seq_datasetGOT)
 88 |     print('seq_dataset_val', len(seq_dataset_val))
 89 | 
 90 |     valid_z_transforms = transforms.Compose([
 91 |         ToTensor()
 92 |     ])
 93 |     valid_x_transforms = transforms.Compose([
 94 |         ToTensor()
 95 |     ])
 96 | 
 97 |     val_data  = TrainDataLoader(seq_dataset_val, valid_z_transforms, valid_x_transforms, name)
 98 |     val_loader = DataLoader(    dataset    = val_data,
 99 |                                 batch_size = config.valid_batch_size,
100 |                                 shuffle    = False,
101 |                                 num_workers= config.valid_num_workers,
102 |                                 pin_memory = True)
103 | 
104 |     '''load weights'''
105 | 
106 |     if not args.checkpoint_path == None:
107 |         assert os.path.isfile(args.checkpoint_path), '{} is not valid checkpoint_path'.format(args.checkpoint_path)
108 |         checkpoint = torch.load(args.checkpoint_path, map_location='cpu')
109 |         if 'model' in checkpoint.keys():
110 |             model.net.load_state_dict(torch.load(args.checkpoint_path, map_location='cpu')['model'])
111 |         else:
112 |             model.net.load_state_dict(torch.load(args.checkpoint_path, map_location='cpu'))
113 |         torch.cuda.empty_cache()
114 |         print('You are loading the model.load_state_dict')
115 | 
116 |     elif config.pretrained_model:
117 |         checkpoint = torch.load(config.pretrained_model)
118 |         # change name and load parameters
119 |         checkpoint = {k.replace('features.features', 'featureExtract'): v for k, v in checkpoint.items()}
120 |         model_dict = model.net.state_dict()
121 |         model_dict.update(checkpoint)
122 |         model.net.load_state_dict(model_dict)
123 |         #torch.cuda.empty_cache()
124 | 
125 |     '''train phase'''
126 |     train_closses, train_rlosses, train_tlosses = AverageMeter(), AverageMeter(), AverageMeter()
127 |     val_closses, val_rlosses, val_tlosses = AverageMeter(), AverageMeter(), AverageMeter()
128 | 
129 |     train_val_plot = SavePlot(exp_name_dir, 'train_val_plot')
130 | 
131 |     for epoch in range(config.epoches):
132 |         model.net.train()
133 |         if config.fix_former_3_layers:
134 |                 util.freeze_layers(model.net)
135 |         print('Train epoch {}/{}'.format(epoch+1, config.epoches))
136 |         train_loss = []
137 |         with tqdm(total=config.train_epoch_size) as progbar:
138 |             for i, dataset in enumerate(train_loader):
139 | 
140 |                 closs, rloss, loss = model.step(epoch, dataset,anchors, i,  train=True)
141 | 
142 |                 closs_ = closs.cpu().item()
143 | 
144 |                 if np.isnan(closs_):
145 |                    sys.exit(0)
146 | 
147 |                 train_closses.update(closs.cpu().item())
148 |                 train_rlosses.update(rloss.cpu().item())
149 |                 train_tlosses.update(loss.cpu().item())
150 | 
151 |                 progbar.set_postfix(closs='{:05.3f}'.format(train_closses.avg),
152 |                                     rloss='{:05.5f}'.format(train_rlosses.avg),
153 |                                     tloss='{:05.3f}'.format(train_tlosses.avg))
154 | 
155 |                 progbar.update()
156 |                 train_loss.append(train_tlosses.avg)
157 | 
158 |                 if i >= config.train_epoch_size - 1:
159 | 
160 |                     '''save model'''
161 |                     model.save(model, exp_name_dir, epoch)
162 | 
163 |                     break
164 | 
165 |         train_loss = np.mean(train_loss)
166 | 
167 |         '''val phase'''
168 |         val_loss = []
169 |         with tqdm(total=config.val_epoch_size) as progbar:
170 |             print('Val epoch {}/{}'.format(epoch+1, config.epoches))
171 |             for i, dataset in enumerate(val_loader):
172 | 
173 |                 val_closs, val_rloss, val_tloss = model.step(epoch, dataset, anchors, train=False)
174 | 
175 |                 closs_ = val_closs.cpu().item()
176 | 
177 |                 if np.isnan(closs_):
178 |                     sys.exit(0)
179 | 
180 |                 val_closses.update(val_closs.cpu().item())
181 |                 val_rlosses.update(val_rloss.cpu().item())
182 |                 val_tlosses.update(val_tloss.cpu().item())
183 | 
184 |                 progbar.set_postfix(closs='{:05.3f}'.format(val_closses.avg),
185 |                                     rloss='{:05.5f}'.format(val_rlosses.avg),
186 |                                     tloss='{:05.3f}'.format(val_tlosses.avg))
187 | 
188 |                 progbar.update()
189 | 
190 |                 val_loss.append(val_tlosses.avg)
191 | 
192 |                 if i >= config.val_epoch_size - 1:
193 |                     break
194 | 
195 |         val_loss = np.mean(val_loss)
196 |         train_val_plot.update(train_loss, val_loss)
197 |         print ('Train loss: {}, val loss: {}'.format(train_loss, val_loss))
198 | 
199 | 
200 | if __name__ == '__main__':
201 |     main()
202 | 


--------------------------------------------------------------------------------
/train/util.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | import numpy as np
  3 | import torch.nn as nn
  4 | import torch
  5 | import cv2
  6 | 
  7 | class Util(object):
  8 | 
  9 |     def add_box_img(self, img, boxes, color=(0, 255, 0), x = 1, y = 1):
 10 |         # boxes (x,y,w,h)
 11 |         if boxes.ndim == 1:
 12 |             boxes = boxes[None, :]
 13 |         img = img.copy()
 14 |         img_ctx = (img.shape[1] - 1) / 2
 15 |         img_cty = (img.shape[0] - 1) / 2
 16 |         for box in boxes:
 17 |             point_1 = [img_ctx - box[2] / 2 + (box[0]/x) + 0.5, img_cty - box[3] / 2 + (box[1]/y) + 0.5]
 18 |             point_2 = [img_ctx + box[2] / 2 + (box[0]/x) - 0.5, img_cty + box[3] / 2 + (box[1]/y) - 0.5]
 19 |             point_1[0] = np.clip(point_1[0], 0, img.shape[1])
 20 |             point_2[0] = np.clip(point_2[0], 0, img.shape[1])
 21 |             point_1[1] = np.clip(point_1[1], 0, img.shape[0])
 22 |             point_2[1] = np.clip(point_2[1], 0, img.shape[0])
 23 |             img = cv2.rectangle(img, (int(point_1[0]), int(point_1[1])), (int(point_2[0]), int(point_2[1])),
 24 |                                 color, 2)
 25 |         return img
 26 | 
 27 |     def get_topk_box(self, cls_score, pred_regression, anchors, topk=10):
 28 |         # anchors xc,yc,w,h
 29 |         regress_offset = pred_regression.cpu().detach().numpy()
 30 | 
 31 |         scores, index = torch.topk(cls_score, topk, )
 32 |         index = index.view(-1).cpu().detach().numpy()
 33 | 
 34 |         topk_offset = regress_offset[index, :]
 35 |         anchors = anchors[index, :]
 36 |         pred_box = self.box_transform_inv(anchors, topk_offset)
 37 |         return pred_box
 38 | 
 39 |     def box_transform_inv(self, anchors, offset):
 40 |         anchor_xctr = anchors[:, :1]
 41 |         anchor_yctr = anchors[:, 1:2]
 42 |         anchor_w = anchors[:, 2:3]
 43 |         anchor_h = anchors[:, 3:]
 44 |         offset_x, offset_y, offset_w, offset_h = offset[:, :1], offset[:, 1:2], offset[:, 2:3], offset[:, 3:],
 45 | 
 46 |         box_cx = anchor_w * offset_x + anchor_xctr
 47 |         box_cy = anchor_h * offset_y + anchor_yctr
 48 |         box_w = anchor_w * np.exp(offset_w)
 49 |         box_h = anchor_h * np.exp(offset_h)
 50 |         box = np.hstack([box_cx, box_cy, box_w, box_h])
 51 |         return box
 52 | 
 53 |     def data_split(self, seq_datasetVID, seq_datasetGOT):
 54 |         seq_dataset = []
 55 |         for i in seq_datasetVID:
 56 |             seq_dataset.append(i)
 57 | 
 58 |         for i, data in enumerate(seq_datasetGOT):
 59 |             seq_dataset.append(data)
 60 |             if i >= 8600:
 61 |                 break
 62 |         return seq_dataset
 63 | 
 64 |     def generate_anchors(self, total_stride, base_size, scales, ratios, score_size):
 65 |         anchor_num = len(ratios) * len(scales) # 5
 66 |         anchor = np.zeros((anchor_num, 4), dtype=np.float32)
 67 |         size = base_size * base_size
 68 |         count = 0
 69 |         for ratio in ratios:
 70 |             # ws = int(np.sqrt(size * 1.0 / ratio))
 71 |             ws = int(np.sqrt(size / ratio))
 72 |             hs = int(ws * ratio)
 73 |             for scale in scales:
 74 |                 wws = ws * scale
 75 |                 hhs = hs * scale
 76 |                 anchor[count, 0] = 0
 77 |                 anchor[count, 1] = 0
 78 |                 anchor[count, 2] = wws
 79 |                 anchor[count, 3] = hhs
 80 |                 count += 1
 81 | 
 82 |         anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4))
 83 |         ori = - (score_size // 2) * total_stride
 84 |         # the left displacement
 85 |         xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)],
 86 |                              [ori + total_stride * dy for dy in range(score_size)])
 87 | 
 88 |         xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \
 89 |                  np.tile(yy.flatten(), (anchor_num, 1)).flatten()
 90 |         anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32)
 91 |         return anchor
 92 | 
 93 |     # freeze layers
 94 |     def freeze_layers(self, model):
 95 |         for layer in model.featureExtract[:10]:
 96 |             if isinstance(layer, nn.BatchNorm2d):
 97 |                 layer.eval()
 98 |                 for k, v in layer.named_parameters():
 99 |                     v.requires_grad = False
100 |             elif isinstance(layer, nn.Conv2d):
101 |                 for k, v in layer.named_parameters():
102 |                     v.requires_grad = False
103 |             elif isinstance(layer, nn.MaxPool2d):
104 |                 continue
105 |             elif isinstance(layer, nn.ReLU):
106 |                 continue
107 |             else:
108 |                 raise KeyError('error in fixing former 3 layers')
109 | 
110 |     def experiment_name_dir(self, experiment_name):
111 |         experiment_name_dir = 'experiments/{}'.format(experiment_name)
112 |         if experiment_name == 'default':
113 |             print('You are using "default" experiment, my advice to you is: Copy "default" change folder name and change settings in file "parameters.json"')
114 |         else:
115 |             print('You are using "{}" experiment'.format(experiment_name))
116 |         return experiment_name_dir
117 | 
118 |     def adjust_learning_rate(self, optimizer, decay=0.1):
119 |         """Sets the learning rate to the initial LR decayed by 0.5 every 20 epochs"""
120 |         for param_group in optimizer.param_groups:
121 |             param_group['lr'] = decay * param_group['lr']
122 | 
123 |     def nms(self, bboxes, scores, num, threshold=0.7):
124 |         print('scores',  scores)
125 |         sort_index = np.argsort(scores)[::-1]
126 |         print('sort_index', sort_index)
127 |         sort_boxes = bboxes[sort_index]
128 |         selected_bbox = [sort_boxes[0]]
129 |         selected_index = [sort_index[0]]
130 |         for i, bbox in enumerate(sort_boxes):
131 |             iou = compute_iou(selected_bbox, bbox)
132 |             print(iou, bbox, selected_bbox)
133 |             if np.max(iou) < threshold:
134 |                 selected_bbox.append(bbox)
135 |                 selected_index.append(sort_index[i])
136 |                 if len(selected_bbox) >= num:
137 |                     break
138 |         return selected_index
139 | 
140 | util = Util()
141 | 
142 | class AverageMeter(object):
143 |     '''Computes and stores the average and current value'''
144 |     def __init__(self):
145 |         self.reset()
146 | 
147 |     def reset(self):
148 |         self.val = 0
149 |         self.avg = 0
150 |         self.sum = 0
151 |         self.count = 0
152 | 
153 |     def update(self, val, n=1):
154 |         self.val = val
155 |         self.sum += val * n
156 |         self.count += n
157 |         self.avg = self.sum / self.count
158 | 
159 | class SavePlot(object):
160 |     def __init__(self,  exp_name_dir,
161 |                         name = 'plot',
162 |                         title  = 'Siamese RPN',
163 |                         ylabel = 'loss',
164 |                         xlabel = 'epoch',
165 |                         show   = False):
166 | 
167 |         self.step = 0
168 |         self.exp_name_dir = exp_name_dir
169 |         self.steps_array  = []
170 |         self.train_array  = []
171 |         self.val_array    = []
172 |         self.name   = name
173 |         self.title  = title
174 |         self.ylabel = ylabel
175 |         self.xlabel = xlabel
176 |         self.show   = show
177 | 
178 |         self.plot(  self.exp_name_dir,
179 |                     self.steps_array,
180 |                     self.train_array,
181 |                     self.val_array,
182 |                     self.name,
183 |                     self.title,
184 |                     self.ylabel,
185 |                     self.xlabel,
186 |                     self.show)
187 | 
188 |         self.plt.legend()
189 | 
190 |     def update(self, train,
191 |                      val,
192 |                      train_label = 'train loss',
193 |                      val_label   = 'val loss',
194 |                      count_step=1):
195 | 
196 |         self.step += count_step
197 |         self.steps_array.append(self.step)
198 |         self.train_array.append(train)
199 |         self.val_array.append(val)
200 | 
201 |         self.plot(exp_name_dir = self.exp_name_dir,
202 |                         step   = self.steps_array,
203 |                         train  = self.train_array,
204 |                         val    = self.val_array,
205 |                         name   = self.name,
206 |                         title  = self.title,
207 |                         ylabel = self.ylabel,
208 |                         xlabel = self.xlabel,
209 |                         show   = self.show,
210 |                         train_label = train_label,
211 |                         val_label   = val_label)
212 | 
213 |     def plot(self,  exp_name_dir,
214 |                     step,
215 |                     train,
216 |                     val,
217 |                     name,
218 |                     title,
219 |                     ylabel,
220 |                     xlabel,
221 |                     show,
222 |                     train_label = 'train loss',
223 |                     val_label   = 'val loss'):
224 |         self.plt  = plt
225 |         self.plt.plot(step, train, 'r', label = train_label, color = 'red')
226 |         self.plt.plot(step, val, 'r', label = val_label, color='black')
227 | 
228 |         self.plt.title(title)
229 |         self.plt.ylabel(ylabel)
230 |         self.plt.xlabel(xlabel)
231 | 
232 |         '''save plot'''
233 |         self.plt.savefig("{}/{}.png".format(exp_name_dir, name))
234 |         if show:
235 |             self.plt.show()
236 | 


--------------------------------------------------------------------------------
/tracking/siamRPNBIG.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import torch
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | from util import util
  6 | import torch.nn.functional as F
  7 | from config import TrackerConfig
  8 | import torchvision.transforms as transforms
  9 | from custom_transforms import ToTensor
 10 | from config import config
 11 | from torch.autograd import Variable
 12 | from got10k.trackers import Tracker
 13 | from network import SiameseAlexNet
 14 | from data_loader import TrackerDataLoader
 15 | from PIL import Image, ImageOps, ImageStat, ImageDraw
 16 | 
 17 | class SiamRPN(nn.Module):
 18 | 
 19 |     def __init__(self, anchor_num = 5):
 20 |         super(SiamRPN, self).__init__()
 21 | 
 22 |         self.anchor_num = anchor_num
 23 |         self.feature = nn.Sequential(
 24 |             # conv1
 25 |             nn.Conv2d(3, 64, kernel_size = 11, stride = 2),
 26 |             nn.BatchNorm2d(64),
 27 |             nn.ReLU(inplace = True),
 28 |             nn.MaxPool2d(kernel_size = 3, stride = 2),
 29 |             # conv2
 30 |             nn.Conv2d(64, 192, kernel_size = 5),
 31 |             nn.BatchNorm2d(192),
 32 |             nn.ReLU(inplace=True),
 33 |             nn.MaxPool2d(kernel_size = 3, stride = 2),
 34 |             # conv3
 35 |             nn.Conv2d(192, 384, kernel_size = 3),
 36 |             nn.BatchNorm2d(384),
 37 |             nn.ReLU(inplace = True),
 38 |             # conv4
 39 |             nn.Conv2d(384, 256, kernel_size = 3),
 40 |             nn.BatchNorm2d(256),
 41 |             nn.ReLU(inplace = True),
 42 |             # conv5
 43 |             nn.Conv2d(256, 256, kernel_size = 3),
 44 |             nn.BatchNorm2d(256))
 45 | 
 46 |         self.conv_reg_z = nn.Conv2d(256, 256 * 4 * self.anchor_num, 3, 1)
 47 |         self.conv_reg_x = nn.Conv2d(256, 256, 3)
 48 |         self.conv_cls_z = nn.Conv2d(256, 256 * 2 * anchor_num, 3, 1)
 49 |         self.conv_cls_x = nn.Conv2d(256, 256, 3)
 50 |         self.adjust_reg = nn.Conv2d(4 * anchor_num, 4 * anchor_num*1, 1)
 51 | 
 52 |     def forward(self, z, x):
 53 |         return self.inference(x, *self.learn(z))
 54 | 
 55 |     def learn(self, z):
 56 |         z = self.feature(z)
 57 |         kernel_reg = self.conv_reg_z(z)
 58 |         kernel_cls = self.conv_cls_z(z)
 59 | 
 60 |         k = kernel_reg.size()[-1]
 61 |         kernel_reg = kernel_reg.view(4 * self.anchor_num, 256, k, k)
 62 |         kernel_cls = kernel_cls.view(2 * self.anchor_num, 256, k, k)
 63 | 
 64 |         return kernel_reg, kernel_cls
 65 | 
 66 |     def inference(self, x, kernel_reg, kernel_cls):
 67 |         x = self.feature(x)
 68 |         x_reg = self.conv_reg_x(x)
 69 |         x_cls = self.conv_cls_x(x)
 70 | 
 71 |         out_reg = self.adjust_reg(F.conv2d(x_reg, kernel_reg))
 72 |         out_cls = F.conv2d(x_cls, kernel_cls)
 73 | 
 74 |         return out_reg, out_cls
 75 | 
 76 | class TrackerSiamRPNBIG(Tracker):
 77 |     def __init__(self, params, model_path = None, **kargs):
 78 |         super(TrackerSiamRPNBIG, self).__init__(name='SiamRPN', is_deterministic=True)
 79 | 
 80 |         self.model = SiameseAlexNet()
 81 | 
 82 |         self.cuda = torch.cuda.is_available()
 83 |         self.device = torch.device('cuda:0' if self.cuda else 'cpu')
 84 | 
 85 |         checkpoint = torch.load(model_path, map_location = self.device)
 86 |         #print("1")
 87 |         if 'model' in checkpoint.keys():
 88 |             self.model.load_state_dict(torch.load(model_path, map_location = self.device)['model'])
 89 |         else:
 90 |             self.model.load_state_dict(torch.load(model_path, map_location = self.device))
 91 | 
 92 | 
 93 |         if self.cuda:
 94 |             self.model = self.model.cuda()
 95 |         self.model.eval()
 96 |         self.transforms = transforms.Compose([
 97 |             ToTensor()
 98 |         ])
 99 | 
100 |         valid_scope = 2 * config.valid_scope + 1
101 |         self.anchors = util.generate_anchors(   config.total_stride,
102 |                                                 config.anchor_base_size,
103 |                                                 config.anchor_scales,
104 |                                                 config.anchor_ratios,
105 |                                                 valid_scope)
106 |         self.window = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :],
107 |                               [config.anchor_num, 1, 1]).flatten()
108 | 
109 |         self.data_loader = TrackerDataLoader()
110 | 
111 |     def _cosine_window(self, size):
112 |         """
113 |             get the cosine window
114 |         """
115 |         cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :])
116 |         cos_window = cos_window.astype(np.float32)
117 |         cos_window /= np.sum(cos_window)
118 |         return cos_window
119 | 
120 |     def init(self, frame, bbox):
121 | 
122 |         """ initialize siamfc tracker
123 |         Args:
124 |             frame: an RGB image
125 |             bbox: one-based bounding box [x, y, width, height]
126 |         """
127 |         frame = np.asarray(frame)
128 |         '''bbox[0] = bbox[0] + bbox[2]/2
129 |         bbox[1] = bbox[1] + bbox[3]/2'''
130 | 
131 |         self.pos = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2])  # center x, center y, zero based
132 |         #self.pos = np.array([bbox[0], bbox[1]])  # center x, center y, zero based
133 | 
134 |         self.target_sz = np.array([bbox[2], bbox[3]])  # width, height
135 |         self.bbox = np.array([bbox[0] + bbox[2] / 2 - 1 / 2, bbox[1] + bbox[3] / 2 - 1 / 2, bbox[2], bbox[3]])
136 |         #self.bbox = np.array([bbox[0], bbox[1], bbox[2], bbox[3]])
137 | 
138 |         self.origin_target_sz = np.array([bbox[2], bbox[3]])
139 |         # get exemplar img
140 |         self.img_mean = np.mean(frame, axis=(0, 1))
141 | 
142 |         exemplar_img, _, _ = self.data_loader.get_exemplar_image(   frame,
143 |                                                                     self.bbox,
144 |                                                                     config.template_img_size,
145 |                                                                     config.context_amount,
146 |                                                                     self.img_mean)
147 | 
148 |         #cv2.imshow('exemplar_img', exemplar_img)
149 |         # get exemplar feature
150 |         exemplar_img = self.transforms(exemplar_img)[None, :, :, :]
151 |         if self.cuda:
152 |             self.model.track_init(exemplar_img.cuda())
153 |         else:
154 |             self.model.track_init(exemplar_img)
155 | 
156 |     def update(self, frame):
157 |         """track object based on the previous frame
158 |         Args:
159 |             frame: an RGB image
160 | 
161 |         Returns:
162 |             bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
163 |         """
164 |         frame = np.asarray(frame)
165 | 
166 |         instance_img, _, _, scale_x = self.data_loader.get_instance_image(  frame,
167 |                                                                             self.bbox,
168 |                                                                             config.template_img_size,
169 |                                                                             config.detection_img_size,
170 |                                                                             config.context_amount,
171 |                                                                             self.img_mean)
172 |         #cv2.imshow('instance_img', instance_img)
173 | 
174 |         instance_img = self.transforms(instance_img)[None, :, :, :]
175 |         if self.cuda:
176 |             pred_score, pred_regression = self.model.track(instance_img.cuda())
177 |         else:
178 |             pred_score, pred_regression = self.model.track(instance_img)
179 | 
180 |         pred_conf   = pred_score.reshape(-1, 2, config.size ).permute(0, 2, 1)
181 |         pred_offset = pred_regression.reshape(-1, 4, config.size ).permute(0, 2, 1)
182 | 
183 |         delta = pred_offset[0].cpu().detach().numpy()
184 |         box_pred = util.box_transform_inv(self.anchors, delta)
185 |         score_pred = F.softmax(pred_conf, dim=2)[0, :, 1].cpu().detach().numpy()
186 | 
187 |         s_c = util.change(util.sz(box_pred[:, 2], box_pred[:, 3]) / (util.sz_wh(self.target_sz * scale_x)))  # scale penalty
188 |         r_c = util.change((self.target_sz[0] / self.target_sz[1]) / (box_pred[:, 2] / box_pred[:, 3]))  # ratio penalty
189 |         penalty = np.exp(-(r_c * s_c - 1.) * config.penalty_k)
190 |         pscore = penalty * score_pred
191 |         pscore = pscore * (1 - config.window_influence) + self.window * config.window_influence
192 |         best_pscore_id = np.argmax(pscore)
193 |         target = box_pred[best_pscore_id, :] / scale_x
194 | 
195 |         lr = penalty[best_pscore_id] * score_pred[best_pscore_id] * config.lr_box
196 | 
197 |         res_x = np.clip(target[0] + self.pos[0], 0, frame.shape[1])
198 |         res_y = np.clip(target[1] + self.pos[1], 0, frame.shape[0])
199 | 
200 |         res_w = np.clip(self.target_sz[0] * (1 - lr) + target[2] * lr, config.min_scale * self.origin_target_sz[0],
201 |                         config.max_scale * self.origin_target_sz[0])
202 |         res_h = np.clip(self.target_sz[1] * (1 - lr) + target[3] * lr, config.min_scale * self.origin_target_sz[1],
203 |                         config.max_scale * self.origin_target_sz[1])
204 | 
205 |         self.pos = np.array([res_x, res_y])
206 |         self.target_sz = np.array([res_w, res_h])
207 | 
208 |         bbox = np.array([res_x, res_y, res_w, res_h])
209 |         #print('bbox', bbox)
210 |         self.bbox = (
211 |             np.clip(bbox[0], 0, frame.shape[1]).astype(np.float64),
212 |             np.clip(bbox[1], 0, frame.shape[0]).astype(np.float64),
213 |             np.clip(bbox[2], 10, frame.shape[1]).astype(np.float64),
214 |             np.clip(bbox[3], 10, frame.shape[0]).astype(np.float64))
215 | 
216 |         res_x = res_x - res_w/2 # x -> x1
217 |         res_y = res_y - res_h/2 # y -> y1
218 |         bbox = np.array([res_x, res_y, res_w, res_h])
219 |         return bbox
220 | 


--------------------------------------------------------------------------------
/train/data.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import sys
  4 | import cv2
  5 | import time
  6 | import torch
  7 | import random
  8 | import numpy as np
  9 | import os.path as osp
 10 | from util import util
 11 | from PIL import Image
 12 | from config import config
 13 | from torch.utils.data import Dataset
 14 | from got10k.datasets import ImageNetVID, GOT10k
 15 | from torchvision import datasets, transforms, utils
 16 | from got10k.datasets import ImageNetVID, GOT10k
 17 | from custom_transforms import Normalize, ToTensor, RandomStretch, \
 18 |     RandomCrop, CenterCrop, RandomBlur, ColorAug
 19 | 
 20 | class TrainDataLoader(Dataset):
 21 |     def __init__(self, seq_dataset, z_transforms, x_transforms, name = 'GOT-10k'):
 22 | 
 23 |         self.max_inter     = config.max_inter
 24 |         self.z_transforms  = z_transforms
 25 |         self.x_transforms  = x_transforms
 26 |         self.sub_class_dir = seq_dataset
 27 |         self.ret           = {}
 28 |         self.count         = 0
 29 |         self.index         = 3000
 30 |         self.name          = name
 31 |         self.anchors       = util.generate_anchors( config.anchor_total_stride,
 32 |                                                     config.anchor_base_size,
 33 |                                                     config.anchor_scales,
 34 |                                                     config.anchor_ratios,
 35 |                                                     config.score_size)
 36 | 
 37 | 
 38 |     def _pick_img_pairs(self, index_of_subclass):
 39 | 
 40 |         assert index_of_subclass < len(self.sub_class_dir), 'index_of_subclass should less than total classes'
 41 | 
 42 |         video_name = self.sub_class_dir[index_of_subclass][0]
 43 | 
 44 |         video_num  = len(video_name)
 45 |         video_gt   = self.sub_class_dir[index_of_subclass][1]
 46 | 
 47 |         status = True
 48 |         while status:
 49 |             if self.max_inter >= video_num-1:
 50 |                 self.max_inter = video_num//2
 51 | 
 52 |             template_index = np.clip(random.choice(range(0, max(1, video_num - self.max_inter))), 0, video_num-1)
 53 | 
 54 |             detection_index= np.clip(random.choice(range(1, max(2, self.max_inter))) + template_index, 0, video_num-1)
 55 | 
 56 |             template_img_path, detection_img_path  = video_name[template_index], video_name[detection_index]
 57 | 
 58 |             template_gt  = video_gt[template_index]
 59 | 
 60 |             detection_gt = video_gt[detection_index]
 61 | 
 62 |             if template_gt[2]*template_gt[3]*detection_gt[2]*detection_gt[3] != 0:
 63 |                 status = False
 64 |             else:
 65 |                 #print('Warning : Encounter object missing, reinitializing ...')
 66 |                 print(  'index_of_subclass:', index_of_subclass, '\n',
 67 |                         'template_index:', template_index, '\n',
 68 |                         'template_gt:', template_gt, '\n',
 69 |                         'detection_index:', detection_index, '\n',
 70 |                         'detection_gt:', detection_gt, '\n')
 71 | 
 72 | 
 73 |         # load infomation of template and detection
 74 |         self.ret['template_img_path']      = template_img_path
 75 |         self.ret['detection_img_path']     = detection_img_path
 76 |         self.ret['template_target_x1y1wh'] = template_gt
 77 |         self.ret['detection_target_x1y1wh']= detection_gt
 78 |         t1, t2 = self.ret['template_target_x1y1wh'].copy(), self.ret['detection_target_x1y1wh'].copy()
 79 |         self.ret['template_target_xywh']   = np.array([t1[0]+t1[2]//2, t1[1]+t1[3]//2, t1[2], t1[3]], np.float32)
 80 |         self.ret['detection_target_xywh']  = np.array([t2[0]+t2[2]//2, t2[1]+t2[3]//2, t2[2], t2[3]], np.float32)
 81 |         self.ret['anchors'] = self.anchors
 82 |         #self._average()
 83 | 
 84 |     def open(self):
 85 | 
 86 |         '''template'''
 87 |         #template_img = cv2.imread(self.ret['template_img_path']) if you use cv2.imread you can not open .JPEG format
 88 |         template_img = Image.open(self.ret['template_img_path'])
 89 |         template_img = np.array(template_img)
 90 | 
 91 |         detection_img = Image.open(self.ret['detection_img_path'])
 92 |         detection_img = np.array(detection_img)
 93 | 
 94 |         if np.random.rand(1) < config.gray_ratio:
 95 | 
 96 |             template_img = cv2.cvtColor(template_img, cv2.COLOR_RGB2GRAY)
 97 |             template_img = cv2.cvtColor(template_img, cv2.COLOR_GRAY2RGB)
 98 |             detection_img = cv2.cvtColor(detection_img, cv2.COLOR_RGB2GRAY)
 99 |             detection_img = cv2.cvtColor(detection_img, cv2.COLOR_GRAY2RGB)
100 | 
101 |         img_mean = np.mean(template_img, axis=(0, 1))
102 |         #img_mean = tuple(map(int, template_img.mean(axis=(0, 1))))
103 | 
104 |         exemplar_img, scale_z, s_z, w_x, h_x = self.get_exemplar_image( template_img,
105 |                                                                         self.ret['template_target_xywh'],
106 |                                                                         config.template_img_size,
107 |                                                                         config.context, img_mean )
108 | 
109 |         size_x = config.template_img_size
110 |         x1, y1 = int((size_x + 1) / 2 - w_x / 2), int((size_x + 1) / 2 - h_x / 2)
111 |         x2, y2 = int((size_x + 1) / 2 + w_x / 2), int((size_x + 1) / 2 + h_x / 2)
112 |         #frame = cv2.rectangle(exemplar_img, (x1,y1), (x2,y2), (0, 255, 0), 1)
113 |         #cv2.imwrite('exemplar_img.png',frame)
114 |         #cv2.waitKey(0)
115 | 
116 |         self.ret['exemplar_img'] = exemplar_img
117 | 
118 |         '''detection'''
119 |         #detection_img = cv2.imread(self.ret['detection_img_path'])
120 |         d = self.ret['detection_target_xywh']
121 |         cx, cy, w, h = d  # float type
122 | 
123 |         wc_z = w + 0.5 * (w + h)
124 |         hc_z = h + 0.5 * (w + h)
125 |         s_z = np.sqrt(wc_z * hc_z)
126 | 
127 |         s_x = s_z / (config.detection_img_size//2)
128 |         img_mean_d = tuple(map(int, detection_img.mean(axis=(0, 1))))
129 | 
130 |         a_x_ = np.random.choice(range(-12,12))
131 |         a_x = a_x_ * s_x
132 | 
133 |         b_y_ = np.random.choice(range(-12,12))
134 |         b_y = b_y_ * s_x
135 | 
136 |         instance_img, a_x, b_y, w_x, h_x, scale_x = self.get_instance_image(  detection_img, d,
137 |                                                                     config.template_img_size, # 127
138 |                                                                     config.detection_img_size,# 255
139 |                                                                     config.context,           # 0.5
140 |                                                                     a_x, b_y,
141 |                                                                     img_mean_d )
142 | 
143 |         size_x = config.detection_img_size
144 | 
145 |         x1, y1 = int((size_x + 1) / 2 - w_x / 2), int((size_x + 1) / 2 - h_x / 2)
146 |         x2, y2 = int((size_x + 1) / 2 + w_x / 2), int((size_x + 1) / 2 + h_x / 2)
147 | 
148 |         #frame_d = cv2.rectangle(instance_img, (int(x1+(a_x*scale_x)),int(y1+(b_y*scale_x))), (int(x2+(a_x*scale_x)),int(y2+(b_y*scale_x))), (0, 255, 0), 1)
149 |         #cv2.imwrite('detection_img_ori.png',frame_d)
150 | 
151 |         w  = x2 - x1
152 |         h  = y2 - y1
153 |         cx = x1 + w/2
154 |         cy = y1 + h/2
155 | 
156 |         #print('[a_x_, b_y_, w, h]', [int(a_x_), int(b_y_), w, h])
157 | 
158 |         self.ret['instance_img'] = instance_img
159 |         #self.ret['cx, cy, w, h'] = [int(a_x_*0.16), int(b_y_*0.16), w, h]
160 |         self.ret['cx, cy, w, h'] = [int(a_x_), int(b_y_), w, h]
161 | 
162 |     def get_exemplar_image(self, img, bbox, size_z, context_amount, img_mean=None):
163 |         cx, cy, w, h = bbox
164 | 
165 |         wc_z = w + context_amount * (w + h)
166 |         hc_z = h + context_amount * (w + h)
167 |         s_z = np.sqrt(wc_z * hc_z)
168 |         scale_z = size_z / s_z
169 | 
170 |         exemplar_img, scale_x = self.crop_and_pad_old(img, cx, cy, size_z, s_z, img_mean)
171 | 
172 |         w_x = w * scale_x
173 |         h_x = h * scale_x
174 | 
175 |         return exemplar_img, scale_z, s_z, w_x, h_x
176 | 
177 |     def get_instance_image(self, img, bbox, size_z, size_x, context_amount, a_x, b_y, img_mean=None):
178 | 
179 |         cx, cy, w, h = bbox  # float type
180 | 
181 |         #cx, cy = cx - a_x , cy - b_y
182 |         wc_z = w + context_amount * (w + h)
183 |         hc_z = h + context_amount * (w + h)
184 |         s_z = np.sqrt(wc_z * hc_z) # the width of the crop box
185 | 
186 |         scale_z = size_z / s_z
187 | 
188 |         s_x = s_z * size_x / size_z
189 |         instance_img, gt_w, gt_h, scale_x, scale_h, scale_w = self.crop_and_pad(img, cx, cy, w, h, a_x, b_y,  size_x, s_x, img_mean)
190 |         w_x = gt_w #* scale_x #w * scale_x
191 |         h_x = gt_h #* scale_x #h * scale_x
192 | 
193 |         #cx, cy = cx/ scale_w *scale_x, cy/ scale_h *scale_x
194 |         #cx, cy = cx/ scale_w, cy/ scale_h
195 |         a_x, b_y = a_x*scale_w, b_y*scale_h
196 |         x1, y1 = int((size_x + 1) / 2 - w_x / 2), int((size_x + 1) / 2 - h_x / 2)
197 |         x2, y2 = int((size_x + 1) / 2 + w_x / 2), int((size_x + 1) / 2 + h_x / 2)
198 |         '''frame = cv2.rectangle(instance_img, (   int(x1+(a_x*scale_x)),
199 |                                                 int(y1+(b_y*scale_x))),
200 |                                                 (int(x2+(a_x*scale_x)),
201 |                                                 int(y2+(b_y*scale_x))),
202 |                                                 (0, 255, 0), 1)'''
203 |         #cv2.imwrite('1.jpg', frame)
204 |         return instance_img, a_x, b_y, w_x, h_x, scale_x
205 | 
206 |     def crop_and_pad(self, img, cx, cy, gt_w, gt_h, a_x, b_y, model_sz, original_sz, img_mean=None):
207 | 
208 |         #random = np.random.uniform(-0.15, 0.15)
209 |         scale_h = 1.0 + np.random.uniform(-0.15, 0.15)
210 |         scale_w = 1.0 + np.random.uniform(-0.15, 0.15)
211 | 
212 |         im_h, im_w, _ = img.shape
213 | 
214 |         xmin = (cx-a_x) - ((original_sz - 1) / 2)* scale_w
215 |         xmax = (cx-a_x) + ((original_sz - 1) / 2)* scale_w
216 | 
217 |         ymin = (cy-b_y) - ((original_sz - 1) / 2)* scale_h
218 |         ymax = (cy-b_y) + ((original_sz - 1) / 2)* scale_h
219 | 
220 |         #print('xmin, xmax, ymin, ymax', xmin, xmax, ymin, ymax)
221 | 
222 |         left   = int(self.round_up(max(0., -xmin)))
223 |         top    = int(self.round_up(max(0., -ymin)))
224 |         right  = int(self.round_up(max(0., xmax - im_w + 1)))
225 |         bottom = int(self.round_up(max(0., ymax - im_h + 1)))
226 | 
227 |         xmin = int(self.round_up(xmin + left))
228 |         xmax = int(self.round_up(xmax + left))
229 |         ymin = int(self.round_up(ymin + top))
230 |         ymax = int(self.round_up(ymax + top))
231 | 
232 |         r, c, k = img.shape
233 |         if any([top, bottom, left, right]):
234 |             te_im_ = np.zeros((int((r + top + bottom)), int((c + left + right)), k), np.uint8)  # 0 is better than 1 initialization
235 |             te_im = np.zeros((int((r + top + bottom)), int((c + left + right)), k), np.uint8)  # 0 is better than 1 initialization
236 | 
237 |             #cv2.imwrite('te_im1.jpg', te_im)
238 |             te_im[:, :, :] = img_mean
239 |             #cv2.imwrite('te_im2_1.jpg', te_im)
240 |             te_im[top:top + r, left:left + c, :] = img
241 |             #cv2.imwrite('te_im2.jpg', te_im)
242 | 
243 |             if top:
244 |                 te_im[0:top, left:left + c, :] = img_mean
245 |             if bottom:
246 |                 te_im[r + top:, left:left + c, :] = img_mean
247 |             if left:
248 |                 te_im[:, 0:left, :] = img_mean
249 |             if right:
250 |                 te_im[:, c + left:, :] = img_mean
251 | 
252 |             im_patch_original = te_im[int(ymin):int(ymax + 1), int(xmin):int(xmax + 1), :]
253 | 
254 |             #cv2.imwrite('te_im3.jpg',   im_patch_original)
255 | 
256 |         else:
257 |             im_patch_original = img[int(ymin):int((ymax) + 1), int(xmin):int((xmax) + 1), :]
258 | 
259 |             #cv2.imwrite('te_im4.jpg', im_patch_original)
260 | 
261 |         if not np.array_equal(model_sz, original_sz):
262 | 
263 |             h, w, _ = im_patch_original.shape
264 | 
265 | 
266 |             if h < w:
267 |                 scale_h_ = 1
268 |                 scale_w_ = h/w
269 |                 scale = config.detection_img_size/h
270 |             elif h > w:
271 |                 scale_h_ = w/h
272 |                 scale_w_ = 1
273 |                 scale = config.detection_img_size/w
274 |             elif h == w:
275 |                 scale_h_ = 1
276 |                 scale_w_ = 1
277 |                 scale = config.detection_img_size/w
278 | 
279 |             gt_w = gt_w * scale_w_
280 |             gt_h = gt_h * scale_h_
281 | 
282 |             gt_w = gt_w * scale
283 |             gt_h = gt_h * scale
284 | 
285 |             #im_patch = cv2.resize(im_patch_original_, (shape))  # zzp: use cv to get a better speed
286 |             #cv2.imwrite('te_im8.jpg', im_patch)
287 | 
288 |             im_patch = cv2.resize(im_patch_original, (model_sz, model_sz))  # zzp: use cv to get a better speed
289 |             #cv2.imwrite('te_im9.jpg', im_patch)
290 | 
291 | 
292 |         else:
293 |             im_patch = im_patch_original
294 |         #scale = model_sz / im_patch_original.shape[0]
295 |         return im_patch, gt_w, gt_h, scale, scale_h_, scale_w_
296 | 
297 | 
298 | 
299 | 
300 |     def crop_and_pad_old(self, img, cx, cy, model_sz, original_sz, img_mean=None):
301 |         im_h, im_w, _ = img.shape
302 | 
303 |         xmin = cx - (original_sz - 1) / 2
304 |         xmax = xmin + original_sz - 1
305 |         ymin = cy - (original_sz - 1) / 2
306 |         ymax = ymin + original_sz - 1
307 | 
308 |         left = int(self.round_up(max(0., -xmin)))
309 |         top = int(self.round_up(max(0., -ymin)))
310 |         right = int(self.round_up(max(0., xmax - im_w + 1)))
311 |         bottom = int(self.round_up(max(0., ymax - im_h + 1)))
312 | 
313 |         xmin = int(self.round_up(xmin + left))
314 |         xmax = int(self.round_up(xmax + left))
315 |         ymin = int(self.round_up(ymin + top))
316 |         ymax = int(self.round_up(ymax + top))
317 |         r, c, k = img.shape
318 |         if any([top, bottom, left, right]):
319 |             te_im = np.zeros((r + top + bottom, c + left + right, k), np.uint8)  # 0 is better than 1 initialization
320 |             te_im[top:top + r, left:left + c, :] = img
321 |             if top:
322 |                 te_im[0:top, left:left + c, :] = img_mean
323 |             if bottom:
324 |                 te_im[r + top:, left:left + c, :] = img_mean
325 |             if left:
326 |                 te_im[:, 0:left, :] = img_mean
327 |             if right:
328 |                 te_im[:, c + left:, :] = img_mean
329 |             im_patch_original = te_im[int(ymin):int(ymax + 1), int(xmin):int(xmax + 1), :]
330 |         else:
331 |             im_patch_original = img[int(ymin):int(ymax + 1), int(xmin):int(xmax + 1), :]
332 |         if not np.array_equal(model_sz, original_sz):
333 | 
334 |             im_patch = cv2.resize(im_patch_original, (model_sz, model_sz))  # zzp: use cv to get a better speed
335 |         else:
336 |             im_patch = im_patch_original
337 |         scale = model_sz / im_patch_original.shape[0]
338 |         return im_patch, scale
339 | 
340 |     def round_up(self, value):
341 |         return round(value + 1e-6 + 1000) - 1000
342 | 
343 |     def _target(self):
344 | 
345 |         regression_target, conf_target = self.compute_target(self.anchors,
346 |                                                              np.array(list(map(round, self.ret['cx, cy, w, h']))))
347 | 
348 |         return regression_target, conf_target
349 | 
350 |     def compute_target(self, anchors, box):
351 |         #box = [-(box[0]), -(box[1]), box[2], box[3]]
352 |         regression_target = self.box_transform(anchors, box)
353 | 
354 |         iou = self.compute_iou(anchors, box).flatten()
355 |         #print(np.max(iou))
356 |         pos_index = np.where(iou > config.pos_threshold)[0]
357 |         neg_index = np.where(iou < config.neg_threshold)[0]
358 |         label = np.ones_like(iou) * -1
359 | 
360 |         label[pos_index] = 1
361 |         label[neg_index] = 0
362 |         '''print(len(neg_index))
363 |         for i, neg_ind in enumerate(neg_index):
364 |             if i % 40 == 0:
365 |                 label[neg_ind] = 0'''
366 | 
367 | 
368 | 
369 |         #max_index = np.argsort(iou.flatten())[-20:]
370 | 
371 |         return regression_target, label
372 | 
373 |     def box_transform(self, anchors, gt_box):
374 |         anchor_xctr = anchors[:, :1]
375 |         anchor_yctr = anchors[:, 1:2]
376 |         anchor_w = anchors[:, 2:3]
377 |         anchor_h = anchors[:, 3:]
378 |         gt_cx, gt_cy, gt_w, gt_h = gt_box
379 | 
380 |         target_x = (gt_cx - anchor_xctr) / anchor_w
381 |         target_y = (gt_cy - anchor_yctr) / anchor_h
382 |         target_w = np.log(gt_w / anchor_w)
383 |         target_h = np.log(gt_h / anchor_h)
384 |         regression_target = np.hstack((target_x, target_y, target_w, target_h))
385 |         return regression_target
386 | 
387 |     def compute_iou(self, anchors, box):
388 |         if np.array(anchors).ndim == 1:
389 |             anchors = np.array(anchors)[None, :]
390 |         else:
391 |             anchors = np.array(anchors)
392 |         if np.array(box).ndim == 1:
393 |             box = np.array(box)[None, :]
394 |         else:
395 |             box = np.array(box)
396 |         gt_box = np.tile(box.reshape(1, -1), (anchors.shape[0], 1))
397 | 
398 |         anchor_x1 = anchors[:, :1] - anchors[:, 2:3] / 2 + 0.5
399 |         anchor_x2 = anchors[:, :1] + anchors[:, 2:3] / 2 - 0.5
400 |         anchor_y1 = anchors[:, 1:2] - anchors[:, 3:] / 2 + 0.5
401 |         anchor_y2 = anchors[:, 1:2] + anchors[:, 3:] / 2 - 0.5
402 | 
403 |         gt_x1 = gt_box[:, :1] - gt_box[:, 2:3] / 2 + 0.5
404 |         gt_x2 = gt_box[:, :1] + gt_box[:, 2:3] / 2 - 0.5
405 |         gt_y1 = gt_box[:, 1:2] - gt_box[:, 3:] / 2 + 0.5
406 |         gt_y2 = gt_box[:, 1:2] + gt_box[:, 3:] / 2 - 0.5
407 | 
408 |         xx1 = np.max([anchor_x1, gt_x1], axis=0)
409 |         xx2 = np.min([anchor_x2, gt_x2], axis=0)
410 |         yy1 = np.max([anchor_y1, gt_y1], axis=0)
411 |         yy2 = np.min([anchor_y2, gt_y2], axis=0)
412 | 
413 |         inter_area = np.max([xx2 - xx1, np.zeros(xx1.shape)], axis=0) * np.max([yy2 - yy1, np.zeros(xx1.shape)],
414 |                                                                                axis=0)
415 |         area_anchor = (anchor_x2 - anchor_x1) * (anchor_y2 - anchor_y1)
416 |         area_gt = (gt_x2 - gt_x1) * (gt_y2 - gt_y1)
417 |         iou = inter_area / (area_anchor + area_gt - inter_area + 1e-6)
418 |         return iou
419 | 
420 |     def _tranform(self):
421 | 
422 |         self.ret['train_x_transforms'] = self.x_transforms(self.ret['instance_img'])
423 |         self.ret['train_z_transforms'] = self.z_transforms(self.ret['exemplar_img'])
424 | 
425 |     def __getitem__(self, index):
426 |         index = random.choice(range(len(self.sub_class_dir)))
427 |         '''if len(self.sub_class_dir) > 180:
428 |             index = self.index
429 |             self.index += 1
430 | 
431 |             if self.index >= 8000:
432 |                 self.index = 3000
433 | 
434 |             index = random.choice(range(3000, 8000))
435 | 
436 |             if index in self.index:
437 |                 index = random.choice(range(3000, 8000))
438 |                 print("index in self.index")
439 | 
440 |             if not index in self.index:
441 |                 self.index.append(index)
442 |             if len(self.index) >= 3000:
443 |                 self.index = []
444 |         else:
445 |             index = random.choice(range(len(self.sub_class_dir)))'''
446 | 
447 |         if self.name == 'GOT-10k':
448 |             if index == 4418 or index == 8627 or index == 8629 or index == 9057 or index == 9058:
449 |                 index += 3
450 |         self._pick_img_pairs(index)
451 |         self.open()
452 |         self._tranform()
453 |         regression_target, conf_target = self._target()
454 |         self.count += 1
455 | 
456 |         return self.ret['train_z_transforms'], self.ret['train_x_transforms'], regression_target, conf_target.astype(np.int64)
457 | 
458 |     def __len__(self):
459 |         return config.train_epoch_size*64
460 | 
461 | if __name__ == "__main__":
462 | 
463 |     root_dir = '/Users/arbi/Desktop'
464 |     seq_dataset = GOT10k(root_dir, subset='val')
465 |     train_data  = TrainDataLoader(seq_dataset)
466 |     train_data.__getitem__(180)
467 | 


--------------------------------------------------------------------------------