├── core ├── __init__.py ├── __pycache__ │ ├── config.cpython-37.pyc │ └── __init__.cpython-37.pyc └── config.py ├── models ├── __init__.py ├── __pycache__ │ └── __init__.cpython-37.pyc ├── config │ └── config.yaml └── rknnlite_rk3588_tracker.py ├── utils ├── __init__.py ├── __pycache__ │ ├── bbox.cpython-37.pyc │ └── __init__.cpython-37.pyc └── bbox.py ├── weights ├── head.rknn ├── track_backbone_T.rknn └── track_backbone_X.rknn ├── README.md └── main.py /core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /weights/head.rknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/weights/head.rknn -------------------------------------------------------------------------------- /weights/track_backbone_T.rknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/weights/track_backbone_T.rknn -------------------------------------------------------------------------------- /weights/track_backbone_X.rknn: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/weights/track_backbone_X.rknn -------------------------------------------------------------------------------- /core/__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/core/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/bbox.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/utils/__pycache__/bbox.cpython-37.pyc -------------------------------------------------------------------------------- /core/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/core/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Try2ChangeX/NanoTrack_RK3588_python/HEAD/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NanoTrack_RK3588_python 2 | 3 | 基于瑞芯微RK3588 NPU的NanoTrack跟踪算法,可运行于RK3588开发板,可达120FPS. 4 | 5 | 6 | ## dependence 7 | 8 | ``` 9 | numpy 10 | opencv 11 | rknn_toolkit_lite2 == 1.3 12 | ``` 13 | 14 | RKNN3588对应的rknn_toolkit_lite2官方开发库以及开发文档请参考[rknn-toolkit2](https://github.com/rockchip-linux/rknn-toolkit2) 15 | 16 | 17 | ## demo 18 | 19 | 模型转换需先使用rknn-toolkit2转为.rknn格式 20 | 21 | ``` 22 | python3 main.py 23 | ``` 24 | 25 | - video_name 为目标视频地址 26 | - init_rect 为初始检测bbox 27 | 28 | 29 | ## reference 30 | 31 | [rknn-toolkit2](https://github.com/rockchip-linux/rknn-toolkit2) 32 | [SiamTracker](https://github.com/HonglinChu/SiamTrackers) 33 | -------------------------------------------------------------------------------- /models/config/config.yaml: -------------------------------------------------------------------------------- 1 | META_ARC: "nanotrack" 2 | 3 | BACKBONE: 4 | TYPE: "mobilenetv3_small" 5 | KWARGS: 6 | used_layers: [4] # 7 | PRETRAINED: './models/pretrained/mobilenetv3_small_1.0.pth' 8 | TRAIN_LAYERS: ['features'] # 9 | TRAIN_EPOCH: 10 10 | LAYERS_LR: 0.1 11 | 12 | ADJUST: 13 | ADJUST: True 14 | TYPE: 'AdjustLayer' 15 | KWARGS: 16 | in_channels: 64 17 | out_channels: 64 18 | 19 | BAN: 20 | BAN: True 21 | TYPE: DepthwiseBAN 22 | KWARGS: 23 | in_channels: 64 24 | out_channels: 64 25 | 26 | CUDA: True 27 | 28 | POINT: 29 | STRIDE: 16 30 | 31 | TRACK: 32 | TYPE: 'NanoTracker' 33 | WINDOW_INFLUENCE: 0.455 34 | PENALTY_K: 0.15 35 | LR: 0.37 36 | EXEMPLAR_SIZE: 127 37 | INSTANCE_SIZE: 255 38 | BASE_SIZE: 7 39 | CONTEXT_AMOUNT: 0.5 40 | 41 | TRAIN: 42 | EPOCH: 50 43 | START_EPOCH: 0 44 | BATCH_SIZE: 32 45 | NUM_WORKERS: 8 46 | BASE_LR: 0.005 47 | CLS_WEIGHT: 1.0 48 | LOC_WEIGHT: 1.0 49 | NUM_CONVS: 4 50 | BASE_SIZE: 7 51 | OUTPUT_SIZE: 16 52 | RESUME: '' 53 | PRETRAINED: '' 54 | SNAPSHOT_DIR: './models/snapshot' 55 | 56 | LR: 57 | TYPE: 'log' 58 | KWARGS: 59 | start_lr: 0.005 60 | end_lr: 0.0005 61 | LR_WARMUP: 62 | TYPE: 'step' 63 | EPOCH: 5 64 | KWARGS: 65 | start_lr: 0.001 66 | end_lr: 0.005 67 | step: 1 68 | 69 | DATASET: 70 | NAMES: 71 | - 'GOT' 72 | 73 | VIDEOS_PER_EPOCH: 100000 74 | 75 | TEMPLATE: 76 | SHIFT: 4 77 | SCALE: 0.05 78 | BLUR: 0.0 79 | FLIP: 0.0 80 | COLOR: 1.0 81 | 82 | SEARCH: 83 | SHIFT: 64 84 | SCALE: 0.18 85 | BLUR: 0.2 86 | FLIP: 0.0 87 | COLOR: 1.0 88 | 89 | NEG: 0.2 90 | GRAY: 0.0 -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | import time 8 | import argparse 9 | 10 | import cv2 11 | import numpy as np 12 | from glob import glob 13 | 14 | import sys 15 | 16 | sys.path.append(os.getcwd()) 17 | from core.config import cfg 18 | 19 | from models.rknnlite_rk3588_tracker import NnoTracker_RKNNLite 20 | 21 | parser = argparse.ArgumentParser(description='tracking demo') 22 | 23 | parser.add_argument('--config', default='./models/config/config.yaml', type=str, help='config file') 24 | 25 | parser.add_argument('--save', action='store_true', help='whether visualzie result') 26 | 27 | args = parser.parse_args() 28 | 29 | def get_frames(video_name): 30 | if not video_name: 31 | cap = cv2.VideoCapture(0) 32 | # warmup 33 | for i in range(5): 34 | cap.read() 35 | while True: 36 | ret, frame = cap.read() 37 | if ret: 38 | yield frame 39 | else: 40 | break 41 | 42 | elif video_name.endswith('avi') or \ 43 | video_name.endswith('mp4') or \ 44 | video_name.endswith('mov'): 45 | cap = cv2.VideoCapture(video_name) 46 | 47 | # warmup 48 | for i in range(50): 49 | cap.read() 50 | 51 | while True: 52 | ret, frame = cap.read() 53 | if ret: 54 | yield frame 55 | else: 56 | break 57 | else: 58 | images = glob(os.path.join(video_name, '*.jp*')) 59 | images = sorted(images, 60 | key=lambda x: int(x.split('/')[-1].split('.')[0])) 61 | for img in images: 62 | frame = cv2.imread(img) 63 | yield frame 64 | 65 | 66 | def main(): 67 | # load config 68 | cfg.merge_from_file(args.config) 69 | 70 | # load_weight 71 | Tback_weight = './weights/track_backbone_T.rknn' 72 | Xback_weight = './weights/track_backbone_X.rknn' 73 | Head_weight = './weights/head.rknn' 74 | 75 | video_name = './data/{your_video}' 76 | tracker = NnoTracker_RKNNLite(Tback_weight, Xback_weight, Head_weight) 77 | first_frame = True 78 | 79 | # img_savedir = './data/debug_img/' 80 | # count = 0 81 | 82 | for frame in get_frames(video_name): 83 | if first_frame: 84 | # build video writer 85 | 86 | init_rect = [280, 472, 70, 47] 87 | tracker.init(frame, init_rect) 88 | first_frame = False 89 | else: 90 | t1 = time.time() 91 | outputs = tracker.track(frame) 92 | print('fps:', 1. / (time.time() - t1)) 93 | if 'polygon' in outputs: 94 | polygon = np.array(outputs['polygon']).astype(np.int32) 95 | cv2.polylines(frame, [polygon.reshape((-1, 1, 2))], 96 | True, (0, 255, 0), 3) 97 | mask = ((outputs['mask'] > cfg.TRACK.MASK_THERSHOLD) * 255) 98 | mask = mask.astype(np.uint8) 99 | mask = np.stack([mask, mask * 255, mask]).transpose(1, 2, 0) 100 | frame = cv2.addWeighted(frame, 0.77, mask, 0.23, -1) 101 | else: 102 | bbox = list(map(int, outputs['bbox'])) 103 | cv2.rectangle(frame, (bbox[0], bbox[1]), 104 | (bbox[0] + bbox[2], bbox[1] + bbox[3]), 105 | (0, 255, 0), 3) 106 | # cv2.imshow(video_name, frame) 107 | # cv2.waitKey(30) 108 | # cv2.imwrite(os.path.join(img_savedir, '%03d.jpg'%count), frame) 109 | # count += 1 110 | 111 | 112 | if __name__ == '__main__': 113 | main() 114 | -------------------------------------------------------------------------------- /utils/bbox.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from collections import namedtuple 9 | 10 | import numpy as np 11 | 12 | 13 | Corner = namedtuple('Corner', 'x1 y1 x2 y2') 14 | # alias 15 | BBox = Corner 16 | Center = namedtuple('Center', 'x y w h') 17 | 18 | 19 | def corner2center(corner): 20 | """ convert (x1, y1, x2, y2) to (cx, cy, w, h) 21 | Args: 22 | conrner: Corner or np.array (4*N) 23 | Return: 24 | Center or np.array (4 * N) 25 | """ 26 | if isinstance(corner, Corner): 27 | x1, y1, x2, y2 = corner 28 | return Center((x1 + x2) * 0.5, (y1 + y2) * 0.5, (x2 - x1), (y2 - y1)) 29 | else: 30 | x1, y1, x2, y2 = corner[0], corner[1], corner[2], corner[3] 31 | x = (x1 + x2) * 0.5 32 | y = (y1 + y2) * 0.5 33 | w = x2 - x1 34 | h = y2 - y1 35 | return x, y, w, h 36 | 37 | 38 | def center2corner(center): 39 | """ convert (cx, cy, w, h) to (x1, y1, x2, y2) 40 | Args: 41 | center: Center or np.array (4 * N) 42 | Return: 43 | center or np.array (4 * N) 44 | """ 45 | if isinstance(center, Center): 46 | x, y, w, h = center 47 | return Corner(x - w * 0.5, y - h * 0.5, x + w * 0.5, y + h * 0.5) 48 | else: 49 | x, y, w, h = center[0], center[1], center[2], center[3] 50 | x1 = x - w * 0.5 51 | y1 = y - h * 0.5 52 | x2 = x + w * 0.5 53 | y2 = y + h * 0.5 54 | return x1, y1, x2, y2 55 | 56 | 57 | def IoU(rect1, rect2): 58 | """ caculate interection over union 59 | Args: 60 | rect1: (x1, y1, x2, y2) 61 | rect2: (x1, y1, x2, y2) 62 | Returns: 63 | iou 64 | """ 65 | # overlap 66 | x1, y1, x2, y2 = rect1[0], rect1[1], rect1[2], rect1[3] 67 | tx1, ty1, tx2, ty2 = rect2[0], rect2[1], rect2[2], rect2[3] 68 | 69 | xx1 = np.maximum(tx1, x1) 70 | yy1 = np.maximum(ty1, y1) 71 | xx2 = np.minimum(tx2, x2) 72 | yy2 = np.minimum(ty2, y2) 73 | 74 | ww = np.maximum(0, xx2 - xx1) 75 | hh = np.maximum(0, yy2 - yy1) 76 | 77 | area = (x2 - x1) * (y2 - y1) 78 | target_a = (tx2 - tx1) * (ty2 - ty1) 79 | inter = ww * hh 80 | iou = inter / (area + target_a - inter) 81 | return iou 82 | 83 | 84 | def cxy_wh_2_rect(pos, sz): 85 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 0-index 86 | """ 87 | return np.array([pos[0] - sz[0] / 2, pos[1] - sz[1] / 2, sz[0], sz[1]]) 88 | 89 | 90 | def rect_2_cxy_wh(rect): 91 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 0-index 92 | """ 93 | return np.array([rect[0] + rect[2] / 2, rect[1] + rect[3] / 2]), \ 94 | np.array([rect[2], rect[3]]) 95 | 96 | 97 | def cxy_wh_2_rect1(pos, sz): 98 | """ convert (cx, cy, w, h) to (x1, y1, w, h), 1-index 99 | """ 100 | return np.array([pos[0] - sz[0] / 2 + 1, pos[1] - sz[1] / 2 + 1, sz[0], sz[1]]) 101 | 102 | 103 | def rect1_2_cxy_wh(rect): 104 | """ convert (x1, y1, w, h) to (cx, cy, w, h), 1-index 105 | """ 106 | return np.array([rect[0] + rect[2] / 2 - 1, rect[1] + rect[3] / 2 - 1]), \ 107 | np.array([rect[2], rect[3]]) 108 | 109 | 110 | def get_axis_aligned_bbox(region): 111 | """ convert region to (cx, cy, w, h) that represent by axis aligned box 112 | """ 113 | nv = region.size 114 | if nv == 8: 115 | cx = np.mean(region[0::2]) 116 | cy = np.mean(region[1::2]) 117 | x1 = min(region[0::2]) 118 | x2 = max(region[0::2]) 119 | y1 = min(region[1::2]) 120 | y2 = max(region[1::2]) 121 | A1 = np.linalg.norm(region[0:2] - region[2:4]) * \ 122 | np.linalg.norm(region[2:4] - region[4:6]) 123 | A2 = (x2 - x1) * (y2 - y1) 124 | s = np.sqrt(A1 / A2) 125 | w = s * (x2 - x1) + 1 126 | h = s * (y2 - y1) + 1 127 | else: 128 | x = region[0] 129 | y = region[1] 130 | w = region[2] 131 | h = region[3] 132 | cx = x + w / 2 133 | cy = y + h / 2 134 | return cx, cy, w, h 135 | 136 | 137 | 138 | def get_min_max_bbox(region): 139 | """ convert region to (cx, cy, w, h) that represent by mim-max box 140 | """ 141 | nv = region.size 142 | if nv == 8: 143 | cx = np.mean(region[0::2]) 144 | cy = np.mean(region[1::2]) 145 | x1 = min(region[0::2]) 146 | x2 = max(region[0::2]) 147 | y1 = min(region[1::2]) 148 | y2 = max(region[1::2]) 149 | w = x2 - x1 150 | h = y2 - y1 151 | else: 152 | x = region[0] 153 | y = region[1] 154 | w = region[2] 155 | h = region[3] 156 | cx = x + w / 2 157 | cy = y + h / 2 158 | return cx, cy, w, h 159 | -------------------------------------------------------------------------------- /core/config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) SenseTime. All Rights Reserved. 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from yacs.config import CfgNode as CN 9 | 10 | __C = CN() 11 | 12 | cfg = __C 13 | 14 | __C.META_ARC = "" 15 | 16 | __C.CUDA = True 17 | 18 | # ------------------------------------------------------------------------ # 19 | # Training options 20 | # ------------------------------------------------------------------------ # 21 | __C.TRAIN = CN() 22 | 23 | # Number of negative 24 | __C.TRAIN.NEG_NUM = 16 25 | 26 | # Number of positive 27 | __C.TRAIN.POS_NUM = 16 28 | 29 | # Number of anchors per images 30 | __C.TRAIN.TOTAL_NUM = 64 31 | 32 | 33 | __C.TRAIN.EXEMPLAR_SIZE = 127 34 | 35 | __C.TRAIN.SEARCH_SIZE = 255 36 | 37 | __C.TRAIN.BASE_SIZE = 8 38 | 39 | __C.TRAIN.OUTPUT_SIZE = 25 40 | 41 | __C.TRAIN.RESUME = '' 42 | 43 | __C.TRAIN.PRETRAINED = '' 44 | 45 | __C.TRAIN.LOG_DIR = './logs' 46 | 47 | __C.TRAIN.SNAPSHOT_DIR = './snapshot' 48 | 49 | __C.TRAIN.EPOCH = 20 50 | 51 | __C.TRAIN.START_EPOCH = 0 52 | __C.TRAIN.NUM_CONVS =4 53 | 54 | __C.TRAIN.BATCH_SIZE = 32 55 | 56 | __C.TRAIN.NUM_WORKERS = 8 57 | 58 | __C.TRAIN.MOMENTUM = 0.9 59 | 60 | __C.TRAIN.WEIGHT_DECAY = 0.0001 61 | 62 | __C.TRAIN.CLS_WEIGHT = 1.0 63 | 64 | __C.TRAIN.LOC_WEIGHT = 1.0 65 | 66 | __C.TRAIN.PRINT_FREQ = 20 67 | 68 | __C.TRAIN.LOG_GRADS = False 69 | 70 | __C.TRAIN.GRAD_CLIP = 10.0 71 | 72 | __C.TRAIN.BASE_LR = 0.005 73 | 74 | __C.TRAIN.LR = CN() 75 | 76 | __C.TRAIN.LR.TYPE = 'log' 77 | 78 | __C.TRAIN.LR.KWARGS = CN(new_allowed=True) 79 | 80 | __C.TRAIN.LR_WARMUP = CN() 81 | 82 | __C.TRAIN.LR_WARMUP.WARMUP = True 83 | 84 | __C.TRAIN.LR_WARMUP.TYPE = 'step' 85 | 86 | __C.TRAIN.LR_WARMUP.EPOCH = 5 87 | 88 | __C.TRAIN.LR_WARMUP.KWARGS = CN(new_allowed=True) 89 | 90 | __C.MASK = CN() 91 | 92 | __C.MASK.MASK = False 93 | 94 | # ------------------------------------------------------------------------ # 95 | # Dataset options 96 | # ------------------------------------------------------------------------ # 97 | __C.DATASET = CN(new_allowed=True) 98 | 99 | # Augmentation 100 | # for template 101 | __C.DATASET.TEMPLATE = CN() 102 | 103 | # for detail discussion 104 | __C.DATASET.TEMPLATE.SHIFT = 4 105 | 106 | __C.DATASET.TEMPLATE.SCALE = 0.05 107 | 108 | __C.DATASET.TEMPLATE.BLUR = 0.0 109 | 110 | __C.DATASET.TEMPLATE.FLIP = 0.0 111 | 112 | __C.DATASET.TEMPLATE.COLOR = 1.0 113 | 114 | __C.DATASET.SEARCH = CN() 115 | 116 | __C.DATASET.SEARCH.SHIFT = 64 117 | 118 | __C.DATASET.SEARCH.SCALE = 0.18 119 | 120 | __C.DATASET.SEARCH.BLUR = 0.0 121 | 122 | __C.DATASET.SEARCH.FLIP = 0.0 123 | 124 | __C.DATASET.SEARCH.COLOR = 1.0 125 | 126 | # for detail discussion 127 | __C.DATASET.NEG = 0.2 128 | 129 | # improve tracking performance for otb100 130 | __C.DATASET.GRAY = 0.0 131 | 132 | __C.DATASET.NAMES = ('VID', 'YOUTUBEBB', 'DET', 'COCO', 'GOT', 'LASOT') 133 | 134 | __C.DATASET.VID = CN() 135 | __C.DATASET.VID.ROOT = '' # VID dataset path 136 | __C.DATASET.VID.ANNO = '' 137 | __C.DATASET.VID.FRAME_RANGE = 100 138 | __C.DATASET.VID.NUM_USE = 100000 139 | 140 | __C.DATASET.YOUTUBEBB = CN() 141 | __C.DATASET.YOUTUBEBB.ROOT = '' 142 | __C.DATASET.YOUTUBEBB.ANNO = '' 143 | __C.DATASET.YOUTUBEBB.FRAME_RANGE = 3 144 | __C.DATASET.YOUTUBEBB.NUM_USE = 100000 145 | 146 | __C.DATASET.COCO = CN() 147 | __C.DATASET.COCO.ROOT = '' 148 | __C.DATASET.COCO.ANNO = '' 149 | __C.DATASET.COCO.FRAME_RANGE = 1 150 | __C.DATASET.COCO.NUM_USE = 100000 151 | 152 | __C.DATASET.DET = CN() 153 | __C.DATASET.DET.ROOT = '' 154 | __C.DATASET.DET.ANNO = '' 155 | __C.DATASET.DET.FRAME_RANGE = 1 156 | __C.DATASET.DET.NUM_USE = 100000 157 | 158 | __C.DATASET.GOT = CN() 159 | __C.DATASET.GOT.ROOT = 'data/GOT-10k/crop511' 160 | __C.DATASET.GOT.ANNO = 'data/GOT-10k/train.json' 161 | __C.DATASET.GOT.FRAME_RANGE = 100 162 | __C.DATASET.GOT.NUM_USE = 100000 163 | 164 | __C.DATASET.LASOT = CN() 165 | __C.DATASET.LASOT.ROOT = '' 166 | __C.DATASET.LASOT.ANNO = '' 167 | __C.DATASET.LASOT.FRAME_RANGE = 100 168 | __C.DATASET.LASOT.NUM_USE = 100000 169 | 170 | __C.DATASET.VIDEOS_PER_EPOCH = 600000 171 | # ------------------------------------------------------------------------ # 172 | 173 | # Backbone options 174 | # ------------------------------------------------------------------------ # 175 | __C.BACKBONE = CN() 176 | 177 | __C.BACKBONE.TYPE = 'res50' 178 | 179 | __C.BACKBONE.KWARGS = CN(new_allowed=True) 180 | 181 | # Pretrained backbone weights 182 | __C.BACKBONE.PRETRAINED = '' 183 | 184 | # Train layers 185 | __C.BACKBONE.TRAIN_LAYERS = [] 186 | 187 | # Layer LR 188 | __C.BACKBONE.LAYERS_LR = 0.1 189 | 190 | # Switch to train layer 191 | __C.BACKBONE.TRAIN_EPOCH = 10 192 | 193 | # ------------------------------------------------------------------------ # 194 | # Adjust layer options 195 | # ------------------------------------------------------------------------ # 196 | __C.ADJUST = CN() 197 | 198 | # Adjust layer 199 | __C.ADJUST.ADJUST = True 200 | 201 | __C.ADJUST.KWARGS = CN(new_allowed=True) 202 | 203 | # Adjust layer type 204 | __C.ADJUST.TYPE = "AdjustAllLayer" 205 | 206 | # ------------------------------------------------------------------------ # 207 | # BAN options 208 | # ------------------------------------------------------------------------ # 209 | __C.BAN = CN() 210 | 211 | # Whether to use ban head 212 | __C.BAN.BAN = False 213 | 214 | # BAN type 215 | __C.BAN.TYPE = 'MultiBAN' 216 | 217 | __C.BAN.KWARGS = CN(new_allowed=True) 218 | 219 | # ------------------------------------------------------------------------ # 220 | # Point options 221 | # ------------------------------------------------------------------------ # 222 | __C.POINT = CN() 223 | 224 | # Point stride 225 | __C.POINT.STRIDE = 8 226 | 227 | # ------------------------------------------------------------------------ # 228 | # Tracker options 229 | # ------------------------------------------------------------------------ # 230 | __C.TRACK = CN() 231 | 232 | __C.TRACK.TYPE = 'NanoTracker' 233 | 234 | # Scale penalty 235 | __C.TRACK.PENALTY_K = 0.16 236 | 237 | # Window influence 238 | __C.TRACK.WINDOW_INFLUENCE = 0.46 239 | 240 | # Interpolation learning rate 241 | __C.TRACK.LR = 0.34 242 | 243 | # Exemplar size 244 | __C.TRACK.EXEMPLAR_SIZE = 127 245 | 246 | # Instance size 247 | __C.TRACK.INSTANCE_SIZE = 255 248 | 249 | # Base size 250 | __C.TRACK.BASE_SIZE = 8 251 | 252 | # Context amount 253 | __C.TRACK.CONTEXT_AMOUNT = 0.5 254 | -------------------------------------------------------------------------------- /models/rknnlite_rk3588_tracker.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import cv2 8 | 9 | from core.config import cfg 10 | from utils.bbox import corner2center 11 | 12 | from rknnlite.api import RKNNLite 13 | 14 | class NnoTracker_RKNNLite(object): 15 | def __init__(self,Tback_weight, Xback_weight, Head_weight): 16 | 17 | self.score_size = (cfg.TRACK.INSTANCE_SIZE - cfg.TRACK.EXEMPLAR_SIZE) // \ 18 | cfg.POINT.STRIDE + 1 + cfg.TRACK.BASE_SIZE 19 | hanning = np.hanning(self.score_size) 20 | window = np.outer(hanning, hanning) 21 | self.cls_out_channels = 2 22 | self.window = window.flatten() 23 | 24 | self.points = self.generate_points(cfg.POINT.STRIDE, self.score_size) 25 | 26 | #--------------------------------------------------------# 27 | #--------------modify environment------------------------# 28 | # 1. T init 29 | self.rknn_Tback = RKNNLite() 30 | 31 | # load RKNN model 32 | print('--> Load RKNN model') 33 | ret = self.rknn_Tback.load_rknn(Tback_weight) 34 | if ret != 0: 35 | print('Load RKNN model failed') 36 | exit(ret) 37 | print('done') 38 | 39 | # init runtime environment 40 | print('--> Init runtime environment') 41 | 42 | ret = self.rknn_Tback.init_runtime(core_mask=RKNNLite.NPU_CORE_0) 43 | if ret != 0: 44 | print('Init runtime environment failed') 45 | exit(ret) 46 | print('done') 47 | 48 | # 2. X init 49 | self.rknn_Xback = RKNNLite() 50 | 51 | # Load model 52 | print('--> rknn_Xback: Loading model') 53 | ret = self.rknn_Xback.load_rknn(Xback_weight) 54 | if ret != 0: 55 | print('rknn_Xback: Load model failed!') 56 | exit(ret) 57 | print('rknn_Xback:done') 58 | 59 | # Init runtime environment 60 | print('--> Init runtime environment') 61 | ret = self.rknn_Xback.init_runtime(core_mask=RKNNLite.NPU_CORE_1) 62 | if ret != 0: 63 | print('Init runtime environment failed!') 64 | exit(ret) 65 | print('done') 66 | 67 | # 3. Head init 68 | self.rknn_Head = RKNNLite() 69 | 70 | # Load model 71 | print('--> rknn_Head: Loading model') 72 | ret = self.rknn_Head.load_rknn(Head_weight) 73 | if ret != 0: 74 | print('rknn_Head: Load model failed!') 75 | exit(ret) 76 | print('rknn_Head:done') 77 | 78 | # Init runtime environment 79 | print('--> Init runtime environment') 80 | ret = self.rknn_Head.init_runtime(core_mask=RKNNLite.NPU_CORE_2) 81 | if ret != 0: 82 | print('Init runtime environment failed!') 83 | exit(ret) 84 | print('done') 85 | 86 | def generate_points(self, stride, size): 87 | ori = - (size // 2) * stride 88 | x, y = np.meshgrid([ori + stride * dx for dx in np.arange(0, size)], 89 | [ori + stride * dy for dy in np.arange(0, size)]) 90 | points = np.zeros((size * size, 2), dtype=np.float32) 91 | points[:, 0], points[:, 1] = x.astype(np.float32).flatten(), y.astype(np.float32).flatten() 92 | 93 | return points 94 | 95 | def _convert_bbox(self, delta, point): 96 | delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1) 97 | delta = delta.detach().cpu().numpy() 98 | 99 | delta[0, :] = point[:, 0] - delta[0, :] # x1 100 | delta[1, :] = point[:, 1] - delta[1, :] # y1 101 | delta[2, :] = point[:, 0] + delta[2, :] # x2 102 | delta[3, :] = point[:, 1] + delta[3, :] # y2 103 | delta[0, :], delta[1, :], delta[2, :], delta[3, :] = corner2center(delta) 104 | return delta 105 | 106 | def _convert_score(self, score): 107 | if self.cls_out_channels == 1: 108 | score = score.permute(1, 2, 3, 0).contiguous().view(-1) 109 | score = score.sigmoid().detach().cpu().numpy() 110 | else: 111 | score = score.permute(1, 2, 3, 0).contiguous().view(self.cls_out_channels, -1).permute(1, 0) 112 | score = score.softmax(1).detach()[:, 1].cpu().numpy() 113 | return score 114 | 115 | def _convert_bbox_numpy(self, delta, point): 116 | # delta = delta.permute(1, 2, 3, 0).contiguous().view(4, -1) 117 | # delta = delta.detach().cpu().numpy() 118 | 119 | delta = delta.transpose((1,2,3,0)).reshape(4, -1) 120 | 121 | delta[0, :] = point[:, 0] - delta[0, :] # x1 122 | delta[1, :] = point[:, 1] - delta[1, :] # y1 123 | delta[2, :] = point[:, 0] + delta[2, :] # x2 124 | delta[3, :] = point[:, 1] + delta[3, :] # y2 125 | delta[0, :], delta[1, :], delta[2, :], delta[3, :] = corner2center(delta) 126 | return delta 127 | 128 | def _convert_score_numpy(self, score): 129 | def sofmax(logits): 130 | e_x = np.exp(logits) 131 | probs = e_x / np.sum(e_x, axis=-1, keepdims=True) 132 | return probs 133 | 134 | # score = score.permute(1, 2, 3, 0).contiguous().view(self.cls_out_channels, -1).permute(1, 0) 135 | # score = score.softmax(1).detach()[:, 1].cpu().numpy() 136 | 137 | score = score.transpose((1,2,3,0)).reshape(self.cls_out_channels, -1).transpose((1,0)) 138 | score = sofmax(score)[:,1] 139 | 140 | return score 141 | 142 | def _bbox_clip(self, cx, cy, width, height, boundary): 143 | cx = max(0, min(cx, boundary[1])) 144 | cy = max(0, min(cy, boundary[0])) 145 | width = max(10, min(width, boundary[1])) 146 | height = max(10, min(height, boundary[0])) 147 | return cx, cy, width, height 148 | 149 | def get_subwindow(self, im, pos, model_sz, original_sz, avg_chans): 150 | """ 151 | args: 152 | im: bgr based image 153 | pos: center position 154 | model_sz: exemplar size 155 | s_z: original size 156 | avg_chans: channel average 157 | """ 158 | if isinstance(pos, float): 159 | pos = [pos, pos] 160 | sz = original_sz 161 | im_sz = im.shape 162 | c = (original_sz + 1) / 2 163 | # context_xmin = round(pos[0] - c) # py2 and py3 round 164 | context_xmin = np.floor(pos[0] - c + 0.5) 165 | context_xmax = context_xmin + sz - 1 166 | # context_ymin = round(pos[1] - c) 167 | context_ymin = np.floor(pos[1] - c + 0.5) 168 | context_ymax = context_ymin + sz - 1 169 | left_pad = int(max(0., -context_xmin)) 170 | top_pad = int(max(0., -context_ymin)) 171 | right_pad = int(max(0., context_xmax - im_sz[1] + 1)) 172 | bottom_pad = int(max(0., context_ymax - im_sz[0] + 1)) 173 | 174 | context_xmin = context_xmin + left_pad 175 | context_xmax = context_xmax + left_pad 176 | context_ymin = context_ymin + top_pad 177 | context_ymax = context_ymax + top_pad 178 | 179 | r, c, k = im.shape 180 | if any([top_pad, bottom_pad, left_pad, right_pad]): 181 | size = (r + top_pad + bottom_pad, c + left_pad + right_pad, k) 182 | te_im = np.zeros(size, np.uint8) 183 | te_im[top_pad:top_pad + r, left_pad:left_pad + c, :] = im 184 | if top_pad: 185 | te_im[0:top_pad, left_pad:left_pad + c, :] = avg_chans 186 | if bottom_pad: 187 | te_im[r + top_pad:, left_pad:left_pad + c, :] = avg_chans 188 | if left_pad: 189 | te_im[:, 0:left_pad, :] = avg_chans 190 | if right_pad: 191 | te_im[:, c + left_pad:, :] = avg_chans 192 | im_patch = te_im[int(context_ymin):int(context_ymax + 1), 193 | int(context_xmin):int(context_xmax + 1), :] 194 | else: 195 | im_patch = im[int(context_ymin):int(context_ymax + 1), 196 | int(context_xmin):int(context_xmax + 1), :] 197 | 198 | if not np.array_equal(model_sz, original_sz): 199 | im_patch = cv2.resize(im_patch, (model_sz, model_sz)) 200 | im_patch = im_patch.transpose(2, 0, 1) 201 | im_patch = im_patch[np.newaxis, :, :, :] 202 | im_patch = im_patch.astype(np.float32) 203 | 204 | return im_patch 205 | 206 | def init(self, img, bbox): 207 | """ 208 | args: 209 | img(np.ndarray): BGR image 210 | bbox: (x, y, w, h) bbox 211 | """ 212 | self.center_pos = np.array([bbox[0] + (bbox[2] - 1) / 2, 213 | bbox[1] + (bbox[3] - 1) / 2]) 214 | self.size = np.array([bbox[2], bbox[3]]) 215 | 216 | # calculate z crop size 217 | w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) 218 | h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) 219 | s_z = round(np.sqrt(w_z * h_z)) 220 | 221 | # calculate channle average 222 | self.channel_average = np.mean(img, axis=(0, 1)) 223 | 224 | # get crop 225 | z_crop = self.get_subwindow(img, self.center_pos, 226 | cfg.TRACK.EXEMPLAR_SIZE, 227 | s_z, self.channel_average) 228 | 229 | back_T_in = z_crop.transpose((0,2,3,1)) 230 | 231 | # self.Toutput = self.rknn_Tback.inference(inputs=[z_crop], data_format='nchw') 232 | self.Toutput = self.rknn_Tback.inference(inputs=[back_T_in]) 233 | 234 | self.rknn_Tback.release() 235 | 236 | def track(self, img): 237 | """ 238 | args: 239 | img(np.ndarray): BGR image 240 | return: 241 | bbox(list):[x, y, width, height] 242 | """ 243 | w_z = self.size[0] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) 244 | h_z = self.size[1] + cfg.TRACK.CONTEXT_AMOUNT * np.sum(self.size) 245 | s_z = np.sqrt(w_z * h_z) 246 | scale_z = cfg.TRACK.EXEMPLAR_SIZE / s_z 247 | s_x = s_z * (cfg.TRACK.INSTANCE_SIZE / cfg.TRACK.EXEMPLAR_SIZE) 248 | x_crop = self.get_subwindow(img, self.center_pos, 249 | cfg.TRACK.INSTANCE_SIZE, 250 | round(s_x), self.channel_average) 251 | 252 | ## yuce 253 | back_X_in = x_crop.transpose((0,2,3,1)) 254 | # self.Xoutput = self.rknn_Xback.inference(inputs=[x_crop], data_format='nchw') 255 | self.Xoutput = self.rknn_Xback.inference(inputs=[back_X_in]) 256 | 257 | head_T_in = self.Toutput[0].transpose((0,2,3,1)) 258 | head_X_in = self.Xoutput[0].transpose((0,2,3,1)) 259 | 260 | # outputs = self.rknn_Head.inference(inputs=[self.Toutput[0], self.Xoutput[0]], data_format='nchw') 261 | outputs = self.rknn_Head.inference(inputs=[head_T_in, head_X_in]) 262 | 263 | score = self._convert_score_numpy(outputs[0]) 264 | pred_bbox = self._convert_bbox_numpy(outputs[1], self.points) 265 | 266 | # score = self._convert_score(outputs['cls']) 267 | # pred_bbox = self._convert_bbox(outputs['loc'], self.points) 268 | 269 | def change(r): 270 | return np.maximum(r, 1. / r) 271 | 272 | def sz(w, h): 273 | pad = (w + h) * 0.5 274 | return np.sqrt((w + pad) * (h + pad)) 275 | 276 | # scale penalty 277 | s_c = change(sz(pred_bbox[2, :], pred_bbox[3, :]) / 278 | (sz(self.size[0] * scale_z, self.size[1] * scale_z))) 279 | 280 | # aspect ratio penalty 281 | r_c = change((self.size[0] / self.size[1]) / 282 | (pred_bbox[2, :] / pred_bbox[3, :])) 283 | penalty = np.exp(-(r_c * s_c - 1) * cfg.TRACK.PENALTY_K) 284 | 285 | # score 286 | pscore = penalty * score 287 | 288 | # window penalty 289 | pscore = pscore * (1 - cfg.TRACK.WINDOW_INFLUENCE) + \ 290 | self.window * cfg.TRACK.WINDOW_INFLUENCE 291 | 292 | best_idx = np.argmax(pscore) 293 | 294 | bbox = pred_bbox[:, best_idx] / scale_z 295 | 296 | lr = penalty[best_idx] * score[best_idx] * cfg.TRACK.LR 297 | 298 | cx = bbox[0] + self.center_pos[0] 299 | 300 | cy = bbox[1] + self.center_pos[1] 301 | 302 | # smooth bbox 303 | width = self.size[0] * (1 - lr) + bbox[2] * lr 304 | 305 | height = self.size[1] * (1 - lr) + bbox[3] * lr 306 | 307 | # clip boundary 308 | cx, cy, width, height = self._bbox_clip(cx, cy, width, 309 | height, img.shape[:2]) 310 | 311 | # udpate state 312 | self.center_pos = np.array([cx, cy]) 313 | self.size = np.array([width, height]) 314 | 315 | bbox = [cx - width / 2, 316 | cy - height / 2, 317 | width, 318 | height] 319 | 320 | best_score = score[best_idx] 321 | return { 322 | 'bbox': bbox, 323 | 'best_score': best_score 324 | } --------------------------------------------------------------------------------