├── yolov7 ├── __init__.py ├── models │ ├── __init__.py │ ├── experimental.py │ ├── common.py │ └── yolo.py └── utils │ ├── __init__.py │ ├── autoanchor.py │ ├── torch_utils.py │ └── general.py ├── superglue ├── __init__.py ├── README.md ├── matching.py ├── superpoint.py ├── superglue.py └── utils.py ├── __init__.py ├── README.md ├── utils ├── args_utils.py ├── json_utils.py └── model_utils.py └── task1.py /yolov7/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /superglue/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /yolov7/models/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /yolov7/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # init -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .task1 import Task1 2 | -------------------------------------------------------------------------------- /superglue/README.md: -------------------------------------------------------------------------------- 1 | ## Superglue Dependencies 2 | - Python 3 >= 3.5 3 | - PyTorch >= 1.1 4 | - OpenCV >= 3.4 (4.1.2.30 recommended for best GUI keyboard interaction, see this note) 5 | - Matplotlib >= 3.1 6 | - NumPy >= 1.18 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AGC2022_Task1_Detection 2 | ### Pre-trained Model Download 3 | - Superglue: https://drive.google.com/file/d/1ACYKMSg8GCb5qEgvfO5m0LTCJvhnOMWm/view?usp=sharing 4 | - Yolov7: https://drive.google.com/file/d/1-eCIYzgr9eXp2ANBp3R4ZQor7Vl8YQwO/view?usp=share_link 5 | 6 | ## Superglue Dependencies 7 | - Python >= 3.5 8 | - PyTorch >= 1.1 9 | - OpenCV >= 3.4 (4.1.2.30 recommended for best GUI keyboard interaction, see this note) 10 | - Matplotlib >= 3.1 11 | - NumPy >= 1.18 12 | 13 | ### Make Checkpoint Folder 14 | ``` 15 | cd superglue 16 | mkdir weights 17 | cd weights 18 | mv {SUPERGLUE_PRETRAINED_MODEL} . 19 | ``` 20 | 21 | ### Run 22 | ``` 23 | python task1.py 24 | --clue_path={CLUE_PATH} 25 | --yolo_path={YOLO_PRETRAINED_MODEL_PATH} 26 | --img_conf_th 0.1 27 | --img_kp_th 150 28 | --txt_th 0.3 29 | --od_th 0.3 30 | ``` 31 | -------------------------------------------------------------------------------- /utils/args_utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def parse_args(): 4 | parser = argparse.ArgumentParser() 5 | parser.add_argument('--video_path', default=None, help='video path') 6 | parser.add_argument('--clue_path', default=None, help='clue(img, txt) path') 7 | parser.add_argument('--json_output_path', default='.', help='json output path') 8 | 9 | parser.add_argument('--task1_debug', action="store_true", help='(optional)debug mode') 10 | parser.add_argument('--debug_input_path', default=None, help='debugging input image path') 11 | parser.add_argument('--debug_output_path', default=None, help='debugging output image path') 12 | 13 | parser.add_argument('--yolo_path', default='.', help='yolo task1 checkpoint path') 14 | parser.add_argument('--img_conf_th', type=float, default=0.6, help='img threshold') # NOTE: determine best confidence threshold value 15 | parser.add_argument('--img_kp_th', type=float, default=50, help='img threshold') # NOTE: determine best keypoint threshold value 16 | parser.add_argument('--txt_th', type=float, default=0.8, help='txt threshold') # NOTE: determine value 17 | parser.add_argument('--od_th', type=float, default=0.5, help='OD threshold') # NOTE: determine value 18 | parser.add_argument('--total_th', type=float, default=0.9, help='img+txt threshold') # NOTE: determine value 19 | args = parser.parse_args() 20 | 21 | return args -------------------------------------------------------------------------------- /yolov7/models/experimental.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | import torch 3 | import torch.nn as nn 4 | import sys 5 | #sys.path.append('/home/eulrang/workspace/git/Drone_Challenge/task1/yolov7/') 6 | sys.path.append(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(__file__))))) 7 | from .common import Conv 8 | 9 | class Ensemble(nn.ModuleList): 10 | # Ensemble of models 11 | def __init__(self): 12 | super(Ensemble, self).__init__() 13 | 14 | def forward(self, x, augment=False): 15 | y = [] 16 | for module in self: 17 | y.append(module(x, augment)[0]) 18 | # y = torch.stack(y).max(0)[0] # max ensemble 19 | # y = torch.stack(y).mean(0) # mean ensemble 20 | y = torch.cat(y, 1) # nms ensemble 21 | return y, None # inference, train output 22 | 23 | 24 | def attempt_load(weights, map_location=None): 25 | # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a 26 | model = Ensemble() 27 | for w in weights if isinstance(weights, list) else [weights]: 28 | # attempt_download(w) 29 | ckpt = torch.load(w, map_location=map_location) # load 30 | model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval()) # FP32 model 31 | 32 | # Compatibility updates 33 | for m in model.modules(): 34 | if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]: 35 | m.inplace = True # pytorch 1.7.0 compatibility 36 | elif type(m) is nn.Upsample: 37 | m.recompute_scale_factor = None # torch 1.11.0 compatibility 38 | elif type(m) is Conv: 39 | m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility 40 | 41 | if len(model) == 1: 42 | return model[-1] # return model 43 | else: 44 | print('Ensemble created with %s\n' % weights) 45 | for k in ['names', 'stride']: 46 | setattr(model, k, getattr(model[-1], k)) 47 | return model # return ensemble 48 | 49 | 50 | -------------------------------------------------------------------------------- /superglue/matching.py: -------------------------------------------------------------------------------- 1 | # %BANNER_BEGIN% 2 | # --------------------------------------------------------------------- 3 | # %COPYRIGHT_BEGIN% 4 | # 5 | # Magic Leap, Inc. ("COMPANY") CONFIDENTIAL 6 | # 7 | # Unpublished Copyright (c) 2020 8 | # Magic Leap, Inc., All Rights Reserved. 9 | # 10 | # NOTICE: All information contained herein is, and remains the property 11 | # of COMPANY. The intellectual and technical concepts contained herein 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign 13 | # Patents, patents in process, and are protected by trade secret or 14 | # copyright law. Dissemination of this information or reproduction of 15 | # this material is strictly forbidden unless prior written permission is 16 | # obtained from COMPANY. Access to the source code contained herein is 17 | # hereby forbidden to anyone except current COMPANY employees, managers 18 | # or contractors who have executed Confidentiality and Non-disclosure 19 | # agreements explicitly covering such access. 20 | # 21 | # The copyright notice above does not evidence any actual or intended 22 | # publication or disclosure of this source code, which includes 23 | # information that is confidential and/or proprietary, and is a trade 24 | # secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, 25 | # PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS 26 | # SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND 28 | # INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, 31 | # USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. 32 | # 33 | # %COPYRIGHT_END% 34 | # ---------------------------------------------------------------------- 35 | # %AUTHORS_BEGIN% 36 | # 37 | # Originating Authors: Paul-Edouard Sarlin 38 | # 39 | # %AUTHORS_END% 40 | # --------------------------------------------------------------------*/ 41 | # %BANNER_END% 42 | 43 | import torch 44 | 45 | from .superpoint import SuperPoint 46 | from .superglue import SuperGlue 47 | 48 | 49 | class Matching(torch.nn.Module): 50 | """ Image Matching Frontend (SuperPoint + SuperGlue) """ 51 | def __init__(self, config={}): 52 | super().__init__() 53 | self.superpoint = SuperPoint(config.get('superpoint', {})) 54 | self.superglue = SuperGlue(config.get('superglue', {})) 55 | 56 | def forward(self, data): 57 | """ Run SuperPoint (optionally) and SuperGlue 58 | SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input 59 | Args: 60 | data: dictionary with minimal keys: ['image0', 'image1'] 61 | """ 62 | pred = {} 63 | 64 | # Extract SuperPoint (keypoints, scores, descriptors) if not provided 65 | if 'keypoints0' not in data: 66 | pred0 = self.superpoint({'image': data['image0']}) 67 | pred = {**pred, **{k+'0': v for k, v in pred0.items()}} 68 | 69 | if 'keypoints1' not in data: 70 | pred1 = self.superpoint({'image': data['image1']}) 71 | pred = {**pred, **{k+'1': v for k, v in pred1.items()}} 72 | 73 | # Batch all features 74 | # We should either have i) one image per batch, or 75 | # ii) the same number of local features for all images in the batch. 76 | data = {**data, **pred} 77 | 78 | for k in data: 79 | if isinstance(data[k], (list, tuple)): 80 | data[k] = torch.stack(data[k]) 81 | 82 | # Perform the matching 83 | pred = {**pred, **self.superglue(data)} 84 | 85 | return pred 86 | -------------------------------------------------------------------------------- /utils/json_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | def json_preprocess(data_folder = './data_toy/'): 4 | text_list = data_folder 5 | 6 | objects_dict = { 7 | '책상': ['desk'], 8 | '칠판': ['whiteboard'], 9 | '의자': ['chair'], 10 | '캐비닛': ['cabinet'], 11 | '모니터': ['monitor'], 12 | '상자': ['box'], 13 | '쓰레기통': ['trash bin'], 14 | '바구니': ['bakset'], 15 | '컴퓨터': ['computer'], 16 | '책장': ['bookshelf'], 17 | '프린터': ['printer'], 18 | '노트북': ['laptop'], 19 | '현수막': ['banner'], 20 | '거울': ['mirror'], 21 | '계단': ['stairs'], 22 | '장난감': ['toy'], 23 | '소화기': ['fire extinguisher'], 24 | '포스터': ['poster'], 25 | '세면대': ['sink'], 26 | '운동기구': ['exercise tool'], 27 | '스피커': ['speaker'], 28 | } 29 | 30 | people_dict = { 31 | '아이': 'person_child', 32 | '아내': 'person_woman', 33 | '남편': 'person_man', 34 | '엄마': 'person_woman', 35 | '아빠': 'person_man' 36 | } 37 | 38 | top_dict = { 39 | '빨강': 'up_red', 40 | '주황': 'up_orange', 41 | '노랑': 'up_yellow', 42 | '초록': 'up_green', 43 | '파랑': 'up_blue', 44 | '보라': 'up_purple', 45 | '흰색': 'up_white', 46 | '검정': 'up_black', 47 | '회색': 'up_gray' 48 | } 49 | 50 | low_dic = { 51 | '빨강': 'low_red', 52 | '주황': 'low_orange', 53 | '노랑': 'low_yellow', 54 | '초록': 'low_green', 55 | '파랑': 'low_blue', 56 | '보라': 'low_purple', 57 | '흰색': 'low_white', 58 | '검정': 'low_black', 59 | '회색': 'low_gray' 60 | 61 | } 62 | 63 | query = {} 64 | 65 | with open(text_list, 'r') as f: 66 | data = json.load(f) # text data 67 | 68 | # (1) json data parsing 69 | num = data.get('no') # later used when making answer sheet file (json) 70 | objects = data.get('주변사물') 71 | people = data.get('일행') 72 | top = data.get('상의') 73 | low = data.get('하의') 74 | 75 | # (2) making query 76 | query[num] = [] 77 | 78 | ## i. objects 79 | for obj in objects: 80 | for obj_query in objects_dict[obj]: 81 | query[num].append(obj_query) 82 | 83 | ## ii. people & clothes 84 | ### rule-based female/male/child classification 85 | if people is not None: 86 | if '아내' in people: 87 | # 요구조자 = male 88 | if top is not None: 89 | shirt = top_dict[top] 90 | query[num].append(shirt) 91 | if low is not None: 92 | pants = low_dic[low] 93 | query[num].append(pants) 94 | # 요구조자 본인 95 | query[num].append('person_man') 96 | elif '남편' in people: 97 | # 요구조자 = female 98 | if top is not None: 99 | shirt = top_dict[top] 100 | query[num].append(shirt) 101 | if low is not None: 102 | skirt = low_dic[low] 103 | query[num].append(skirt) 104 | # 요구조자 본인 105 | query[num].append('person_woman') 106 | elif ('엄마' in people) or ('아빠' in people): 107 | # 요구조자 = child 108 | # female or male 109 | if top is not None: 110 | shirt = top_dict[top] 111 | query[num].append(shirt) 112 | if low is not None: 113 | pants = low_dic[low] 114 | query[num].append(pants) 115 | # 요구조자 본인 116 | query[num].append('person_child') 117 | for person in people: 118 | query[num].append(people_dict[person]) 119 | 120 | return query 121 | 122 | def json_postprocess(clues_num, data): 123 | # json skeleton 124 | json_object = { 125 | 'answer_sheet': { 126 | 'room_id': None, 127 | 'mission': "1", 128 | 'answer': { 129 | 'person_id': { 130 | } 131 | } 132 | } 133 | } 134 | 135 | person_id_list = [] 136 | for i in range(0, len(data)): 137 | if data[i] < 500: 138 | person_id_list.append(str(data[i])) 139 | json_object['answer_sheet']['answer']['person_id'].update({clues_num:person_id_list}) 140 | 141 | return json_object 142 | -------------------------------------------------------------------------------- /utils/model_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import cv2 3 | import matplotlib.pyplot as plt 4 | import matplotlib 5 | import random 6 | 7 | # ----------------------------------------- 8 | # Superglue utils 9 | # ----------------------------------------- 10 | def matching(data, superpoint, superglue): 11 | """ Run SuperPoint (optionally) and SuperGlue 12 | SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input 13 | Args: 14 | data: dictionary with minimal keys: ['image0', 'image1'] 15 | """ 16 | 17 | torch.set_grad_enabled(False) 18 | 19 | pred = {} 20 | 21 | # Extract SuperPoint (keypoints, scores, descriptors) if not provided 22 | if 'keypoints0' not in data: 23 | pred0 = superpoint({'image': data['image0']}) 24 | pred = {**pred, **{k+'0': v for k, v in pred0.items()}} 25 | if 'keypoints1' not in data: 26 | pred1 = superpoint({'image': data['image1']}) 27 | pred = {**pred, **{k+'1': v for k, v in pred1.items()}} 28 | 29 | data = {**data, **pred} 30 | 31 | for k in data: 32 | if isinstance(data[k], (list, tuple)): 33 | data[k] = torch.stack(data[k]) 34 | 35 | # Perform the matching 36 | pred = {**pred, **superglue(data)} 37 | pred = {k: v[0].cpu().numpy() for k, v in pred.items()} 38 | matches, conf = pred['matches0'], pred['matching_scores0'] 39 | 40 | return pred, matches, conf 41 | 42 | 43 | def process_resize(w, h, resize): 44 | assert(len(resize) > 0 and len(resize) <= 2) 45 | if len(resize) == 1 and resize[0] > -1: 46 | scale = resize[0] / max(h, w) 47 | w_new, h_new = int(round(w*scale)), int(round(h*scale)) 48 | elif len(resize) == 1 and resize[0] == -1: 49 | w_new, h_new = w, h 50 | else: # len(resize) == 2: 51 | w_new, h_new = resize[0], resize[1] 52 | 53 | # Issue warning if resolution is too small or too large. 54 | if max(w_new, h_new) < 160: 55 | print('Warning: input resolution is very small, results may vary') 56 | elif max(w_new, h_new) > 2000: 57 | print('Warning: input resolution is very large, results may vary') 58 | 59 | return w_new, h_new 60 | 61 | 62 | def frame2tensor(frame, device): 63 | return torch.from_numpy(frame/255.).float()[None, None].to(device) 64 | 65 | 66 | def read_image(img, resize, device): 67 | image = img 68 | if image is None: 69 | return None, None, None 70 | w, h = image.shape[1], image.shape[0] 71 | w_new, h_new = process_resize(w, h, resize) 72 | scales = (float(w) / float(w_new), float(h) / float(h_new)) 73 | 74 | # resize 75 | image = cv2.resize(image, (w_new, h_new)).astype('float32') 76 | 77 | inp = frame2tensor(image, device) 78 | return image, inp, scales 79 | 80 | 81 | def plot_image_pair(imgs, dpi=100, size=6, pad=.5): 82 | n = len(imgs) 83 | assert n == 2, 'number of images must be two' 84 | figsize = (size*n, size*3/4) if size is not None else None 85 | _, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi) 86 | for i in range(n): 87 | ax[i].imshow(imgs[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=255) 88 | ax[i].get_yaxis().set_ticks([]) 89 | ax[i].get_xaxis().set_ticks([]) 90 | for spine in ax[i].spines.values(): # remove frame 91 | spine.set_visible(False) 92 | plt.tight_layout(pad=pad) 93 | 94 | 95 | def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4): 96 | fig = plt.gcf() 97 | ax = fig.axes 98 | fig.canvas.draw() 99 | 100 | transFigure = fig.transFigure.inverted() 101 | fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0)) 102 | fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1)) 103 | 104 | fig.lines = [matplotlib.lines.Line2D( 105 | (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), zorder=1, 106 | transform=fig.transFigure, c=color[i], linewidth=lw) 107 | for i in range(len(kpts0))] 108 | ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) 109 | ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) 110 | 111 | 112 | def make_matching_plot(image0, image1, mkpts0, mkpts1, 113 | color, text, path, small_text=[]): 114 | 115 | plot_image_pair([image0, image1]) 116 | plot_matches(mkpts0, mkpts1, color) 117 | 118 | fig = plt.gcf() 119 | txt_color = 'k' if image0[:100, :150].mean() > 200 else 'w' 120 | fig.text( 121 | 0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes, 122 | fontsize=15, va='top', ha='left', color=txt_color) 123 | 124 | txt_color = 'k' if image0[-100:, :150].mean() > 200 else 'w' 125 | fig.text( 126 | 0.01, 0.01, '\n'.join(small_text), transform=fig.axes[0].transAxes, 127 | fontsize=5, va='bottom', ha='left', color=txt_color) 128 | 129 | plt.savefig(str(path), bbox_inches='tight', pad_inches=0) 130 | plt.close() 131 | 132 | # ----------------------------------------- 133 | # YOLO utils 134 | # ----------------------------------------- 135 | def plot_one_box(x, img, color=None, label=None, line_thickness=3): 136 | # Plots one bounding box on image img 137 | tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness 138 | color = color or [random.randint(0, 128) for _ in range(3)] 139 | c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3])) 140 | cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) 141 | if label: 142 | tf = max(tl - 1, 1) # font thickness 143 | t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] 144 | c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 145 | cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA) # filled 146 | cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [127, 127, 127], thickness=tf, lineType=cv2.LINE_AA) -------------------------------------------------------------------------------- /yolov7/utils/autoanchor.py: -------------------------------------------------------------------------------- 1 | # Auto-anchor utils 2 | import os.path 3 | 4 | import numpy as np 5 | import torch 6 | import yaml 7 | from scipy.cluster.vq import kmeans 8 | from tqdm import tqdm 9 | 10 | import sys 11 | #sys.path.append('/home/eulrang/workspace/git/Drone_Challenge/task1/yolov7/utils') 12 | sys.path.append(os.path.abspath(os.path.dirname(__file__))) 13 | from general import colorstr 14 | 15 | 16 | def check_anchor_order(m): 17 | # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary 18 | a = m.anchor_grid.prod(-1).view(-1) # anchor area 19 | da = a[-1] - a[0] # delta a 20 | ds = m.stride[-1] - m.stride[0] # delta s 21 | if da.sign() != ds.sign(): # same order 22 | print('Reversing anchor order') 23 | m.anchors[:] = m.anchors.flip(0) 24 | m.anchor_grid[:] = m.anchor_grid.flip(0) 25 | 26 | 27 | def check_anchors(dataset, model, thr=4.0, imgsz=640): 28 | # Check anchor fit to data, recompute if necessary 29 | prefix = colorstr('autoanchor: ') 30 | print(f'\n{prefix}Analyzing anchors... ', end='') 31 | m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect() 32 | shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True) 33 | scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale 34 | wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float() # wh 35 | 36 | def metric(k): # compute metric 37 | r = wh[:, None] / k[None] 38 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 39 | best = x.max(1)[0] # best_x 40 | aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold 41 | bpr = (best > 1. / thr).float().mean() # best possible recall 42 | return bpr, aat 43 | 44 | anchors = m.anchor_grid.clone().cpu().view(-1, 2) # current anchors 45 | bpr, aat = metric(anchors) 46 | print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='') 47 | if bpr < 0.98: # threshold to recompute 48 | print('. Attempting to improve anchors, please wait...') 49 | na = m.anchor_grid.numel() // 2 # number of anchors 50 | try: 51 | anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False) 52 | except Exception as e: 53 | print(f'{prefix}ERROR: {e}') 54 | new_bpr = metric(anchors)[0] 55 | if new_bpr > bpr: # replace anchors 56 | anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) 57 | m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference 58 | m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss 59 | check_anchor_order(m) 60 | print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.') 61 | else: 62 | print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.') 63 | print('') # newline 64 | 65 | 66 | def kmean_anchors(path='./data/coco.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True): 67 | """ Creates kmeans-evolved anchors from training dataset 68 | 69 | Arguments: 70 | path: path to dataset *.yaml, or a loaded dataset 71 | n: number of anchors 72 | img_size: image size used for training 73 | thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0 74 | gen: generations to evolve anchors using genetic algorithm 75 | verbose: print all results 76 | 77 | Return: 78 | k: kmeans evolved anchors 79 | 80 | Usage: 81 | from utils.autoanchor import *; _ = kmean_anchors() 82 | """ 83 | thr = 1. / thr 84 | prefix = colorstr('autoanchor: ') 85 | 86 | def metric(k, wh): # compute metrics 87 | r = wh[:, None] / k[None] 88 | x = torch.min(r, 1. / r).min(2)[0] # ratio metric 89 | # x = wh_iou(wh, torch.tensor(k)) # iou metric 90 | return x, x.max(1)[0] # x, best_x 91 | 92 | def anchor_fitness(k): # mutation fitness 93 | _, best = metric(torch.tensor(k, dtype=torch.float32), wh) 94 | return (best * (best > thr).float()).mean() # fitness 95 | 96 | def print_results(k): 97 | k = k[np.argsort(k.prod(1))] # sort small to large 98 | x, best = metric(k, wh0) 99 | bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr 100 | print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr') 101 | print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, ' 102 | f'past_thr={x[x > thr].mean():.3f}-mean: ', end='') 103 | for i, x in enumerate(k): 104 | print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg 105 | return k 106 | 107 | if isinstance(path, str): # *.yaml file 108 | with open(path) as f: 109 | data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict 110 | from utils.datasets import LoadImagesAndLabels 111 | dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True) 112 | else: 113 | dataset = path # dataset 114 | 115 | # Get label wh 116 | shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True) 117 | wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)]) # wh 118 | 119 | # Filter 120 | i = (wh0 < 3.0).any(1).sum() 121 | if i: 122 | print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.') 123 | wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels 124 | # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1 125 | 126 | # Kmeans calculation 127 | print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...') 128 | s = wh.std(0) # sigmas for whitening 129 | k, dist = kmeans(wh / s, n, iter=30) # points, mean distance 130 | assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}') 131 | k *= s 132 | wh = torch.tensor(wh, dtype=torch.float32) # filtered 133 | wh0 = torch.tensor(wh0, dtype=torch.float32) # unfiltered 134 | k = print_results(k) 135 | 136 | # Plot 137 | # k, d = [None] * 20, [None] * 20 138 | # for i in tqdm(range(1, 21)): 139 | # k[i-1], d[i-1] = kmeans(wh / s, i) # points, mean distance 140 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True) 141 | # ax = ax.ravel() 142 | # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.') 143 | # fig, ax = plt.subplots(1, 2, figsize=(14, 7)) # plot wh 144 | # ax[0].hist(wh[wh[:, 0]<100, 0],400) 145 | # ax[1].hist(wh[wh[:, 1]<100, 1],400) 146 | # fig.savefig('wh.png', dpi=200) 147 | 148 | # Evolve 149 | npr = np.random 150 | f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma 151 | pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar 152 | for _ in pbar: 153 | v = np.ones(sh) 154 | while (v == 1).all(): # mutate until a change occurs (prevent duplicates) 155 | v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0) 156 | kg = (k.copy() * v).clip(min=2.0) 157 | fg = anchor_fitness(kg) 158 | if fg > f: 159 | f, k = fg, kg.copy() 160 | pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}' 161 | if verbose: 162 | print_results(k) 163 | 164 | return print_results(k) 165 | -------------------------------------------------------------------------------- /superglue/superpoint.py: -------------------------------------------------------------------------------- 1 | # %BANNER_BEGIN% 2 | # --------------------------------------------------------------------- 3 | # %COPYRIGHT_BEGIN% 4 | # 5 | # Magic Leap, Inc. ("COMPANY") CONFIDENTIAL 6 | # 7 | # Unpublished Copyright (c) 2020 8 | # Magic Leap, Inc., All Rights Reserved. 9 | # 10 | # NOTICE: All information contained herein is, and remains the property 11 | # of COMPANY. The intellectual and technical concepts contained herein 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign 13 | # Patents, patents in process, and are protected by trade secret or 14 | # copyright law. Dissemination of this information or reproduction of 15 | # this material is strictly forbidden unless prior written permission is 16 | # obtained from COMPANY. Access to the source code contained herein is 17 | # hereby forbidden to anyone except current COMPANY employees, managers 18 | # or contractors who have executed Confidentiality and Non-disclosure 19 | # agreements explicitly covering such access. 20 | # 21 | # The copyright notice above does not evidence any actual or intended 22 | # publication or disclosure of this source code, which includes 23 | # information that is confidential and/or proprietary, and is a trade 24 | # secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, 25 | # PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS 26 | # SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND 28 | # INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, 31 | # USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. 32 | # 33 | # %COPYRIGHT_END% 34 | # ---------------------------------------------------------------------- 35 | # %AUTHORS_BEGIN% 36 | # 37 | # Originating Authors: Paul-Edouard Sarlin 38 | # 39 | # %AUTHORS_END% 40 | # --------------------------------------------------------------------*/ 41 | # %BANNER_END% 42 | 43 | from pathlib import Path 44 | import torch 45 | from torch import nn 46 | 47 | def simple_nms(scores, nms_radius: int): 48 | """ Fast Non-maximum suppression to remove nearby points """ 49 | assert(nms_radius >= 0) 50 | 51 | def max_pool(x): 52 | return torch.nn.functional.max_pool2d( 53 | x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius) 54 | 55 | zeros = torch.zeros_like(scores) 56 | max_mask = scores == max_pool(scores) 57 | for _ in range(2): 58 | supp_mask = max_pool(max_mask.float()) > 0 59 | supp_scores = torch.where(supp_mask, zeros, scores) 60 | new_max_mask = supp_scores == max_pool(supp_scores) 61 | max_mask = max_mask | (new_max_mask & (~supp_mask)) 62 | return torch.where(max_mask, scores, zeros) 63 | 64 | 65 | def remove_borders(keypoints, scores, border: int, height: int, width: int): 66 | """ Removes keypoints too close to the border """ 67 | mask_h = (keypoints[:, 0] >= border) & (keypoints[:, 0] < (height - border)) 68 | mask_w = (keypoints[:, 1] >= border) & (keypoints[:, 1] < (width - border)) 69 | mask = mask_h & mask_w 70 | return keypoints[mask], scores[mask] 71 | 72 | 73 | def top_k_keypoints(keypoints, scores, k: int): 74 | if k >= len(keypoints): 75 | return keypoints, scores 76 | scores, indices = torch.topk(scores, k, dim=0) 77 | return keypoints[indices], scores 78 | 79 | 80 | def sample_descriptors(keypoints, descriptors, s: int = 8): 81 | """ Interpolate descriptors at keypoint locations """ 82 | b, c, h, w = descriptors.shape 83 | keypoints = keypoints - s / 2 + 0.5 84 | keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)], 85 | ).to(keypoints)[None] 86 | keypoints = keypoints*2 - 1 # normalize to (-1, 1) 87 | args = {'align_corners': True} if int(torch.__version__[2]) > 2 else {} 88 | descriptors = torch.nn.functional.grid_sample( 89 | descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args) 90 | descriptors = torch.nn.functional.normalize( 91 | descriptors.reshape(b, c, -1), p=2, dim=1) 92 | return descriptors 93 | 94 | 95 | class SuperPoint(nn.Module): 96 | """SuperPoint Convolutional Detector and Descriptor 97 | 98 | SuperPoint: Self-Supervised Interest Point Detection and 99 | Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew 100 | Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629 101 | 102 | """ 103 | default_config = { 104 | 'descriptor_dim': 256, 105 | 'nms_radius': 4, 106 | 'keypoint_threshold': 0.005, 107 | 'max_keypoints': -1, 108 | 'remove_borders': 4, 109 | } 110 | 111 | def __init__(self, config): 112 | super().__init__() 113 | self.config = {**self.default_config, **config} 114 | 115 | self.relu = nn.ReLU(inplace=True) 116 | self.pool = nn.MaxPool2d(kernel_size=2, stride=2) 117 | c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256 118 | 119 | self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1) 120 | self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1) 121 | self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1) 122 | self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1) 123 | self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1) 124 | self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1) 125 | self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1) 126 | self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1) 127 | 128 | self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) 129 | self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0) 130 | 131 | self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1) 132 | self.convDb = nn.Conv2d( 133 | c5, self.config['descriptor_dim'], 134 | kernel_size=1, stride=1, padding=0) 135 | 136 | path = Path(__file__).parent / 'weights/superpoint_v1.pth' 137 | self.load_state_dict(torch.load(str(path))) 138 | 139 | mk = self.config['max_keypoints'] 140 | if mk == 0 or mk < -1: 141 | raise ValueError('\"max_keypoints\" must be positive or \"-1\"') 142 | 143 | # print('Loaded SuperPoint model') 144 | 145 | def forward(self, data): 146 | """ Compute keypoints, scores, descriptors for image """ 147 | # Shared Encoder 148 | x = self.relu(self.conv1a(data['image'])) 149 | x = self.relu(self.conv1b(x)) 150 | x = self.pool(x) 151 | x = self.relu(self.conv2a(x)) 152 | x = self.relu(self.conv2b(x)) 153 | x = self.pool(x) 154 | x = self.relu(self.conv3a(x)) 155 | x = self.relu(self.conv3b(x)) 156 | x = self.pool(x) 157 | x = self.relu(self.conv4a(x)) 158 | x = self.relu(self.conv4b(x)) 159 | 160 | # Compute the dense keypoint scores 161 | cPa = self.relu(self.convPa(x)) 162 | scores = self.convPb(cPa) 163 | scores = torch.nn.functional.softmax(scores, 1)[:, :-1] 164 | b, _, h, w = scores.shape 165 | scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8) 166 | scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8) 167 | scores = simple_nms(scores, self.config['nms_radius']) 168 | 169 | # Extract keypoints 170 | keypoints = [ 171 | torch.nonzero(s > self.config['keypoint_threshold']) 172 | for s in scores] 173 | scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)] 174 | 175 | # Discard keypoints near the image borders 176 | keypoints, scores = list(zip(*[ 177 | remove_borders(k, s, self.config['remove_borders'], h*8, w*8) 178 | for k, s in zip(keypoints, scores)])) 179 | 180 | # Keep the k keypoints with highest score 181 | if self.config['max_keypoints'] >= 0: 182 | keypoints, scores = list(zip(*[ 183 | top_k_keypoints(k, s, self.config['max_keypoints']) 184 | for k, s in zip(keypoints, scores)])) 185 | 186 | # Convert (h, w) to (x, y) 187 | keypoints = [torch.flip(k, [1]).float() for k in keypoints] 188 | 189 | # Compute the dense descriptors 190 | cDa = self.relu(self.convDa(x)) 191 | descriptors = self.convDb(cDa) 192 | descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1) 193 | 194 | # Extract descriptors 195 | descriptors = [sample_descriptors(k[None], d[None], 8)[0] 196 | for k, d in zip(keypoints, descriptors)] 197 | 198 | return { 199 | 'keypoints': keypoints, 200 | 'scores': scores, 201 | 'descriptors': descriptors, 202 | } 203 | -------------------------------------------------------------------------------- /superglue/superglue.py: -------------------------------------------------------------------------------- 1 | # %BANNER_BEGIN% 2 | # --------------------------------------------------------------------- 3 | # %COPYRIGHT_BEGIN% 4 | # 5 | # Magic Leap, Inc. ("COMPANY") CONFIDENTIAL 6 | # 7 | # Unpublished Copyright (c) 2020 8 | # Magic Leap, Inc., All Rights Reserved. 9 | # 10 | # NOTICE: All information contained herein is, and remains the property 11 | # of COMPANY. The intellectual and technical concepts contained herein 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign 13 | # Patents, patents in process, and are protected by trade secret or 14 | # copyright law. Dissemination of this information or reproduction of 15 | # this material is strictly forbidden unless prior written permission is 16 | # obtained from COMPANY. Access to the source code contained herein is 17 | # hereby forbidden to anyone except current COMPANY employees, managers 18 | # or contractors who have executed Confidentiality and Non-disclosure 19 | # agreements explicitly covering such access. 20 | # 21 | # The copyright notice above does not evidence any actual or intended 22 | # publication or disclosure of this source code, which includes 23 | # information that is confidential and/or proprietary, and is a trade 24 | # secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, 25 | # PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS 26 | # SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND 28 | # INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, 31 | # USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. 32 | # 33 | # %COPYRIGHT_END% 34 | # ---------------------------------------------------------------------- 35 | # %AUTHORS_BEGIN% 36 | # 37 | # Originating Authors: Paul-Edouard Sarlin 38 | # 39 | # %AUTHORS_END% 40 | # --------------------------------------------------------------------*/ 41 | # %BANNER_END% 42 | 43 | from copy import deepcopy 44 | from pathlib import Path 45 | from typing import List, Tuple 46 | 47 | import torch 48 | from torch import nn 49 | 50 | 51 | def MLP(channels: List[int], do_bn: bool = True) -> nn.Module: 52 | """ Multi-layer perceptron """ 53 | n = len(channels) 54 | layers = [] 55 | for i in range(1, n): 56 | layers.append( 57 | nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True)) 58 | if i < (n-1): 59 | if do_bn: 60 | layers.append(nn.BatchNorm1d(channels[i])) 61 | layers.append(nn.ReLU()) 62 | return nn.Sequential(*layers) 63 | 64 | 65 | def normalize_keypoints(kpts, image_shape): 66 | """ Normalize keypoints locations based on image image_shape""" 67 | _, _, height, width = image_shape 68 | one = kpts.new_tensor(1) 69 | size = torch.stack([one*width, one*height])[None] 70 | center = size / 2 71 | scaling = size.max(1, keepdim=True).values * 0.7 72 | return (kpts - center[:, None, :]) / scaling[:, None, :] 73 | 74 | 75 | class KeypointEncoder(nn.Module): 76 | """ Joint encoding of visual appearance and location using MLPs""" 77 | def __init__(self, feature_dim: int, layers: List[int]) -> None: 78 | super().__init__() 79 | self.encoder = MLP([3] + layers + [feature_dim]) 80 | nn.init.constant_(self.encoder[-1].bias, 0.0) 81 | 82 | def forward(self, kpts, scores): 83 | inputs = [kpts.transpose(1, 2), scores.unsqueeze(1)] 84 | return self.encoder(torch.cat(inputs, dim=1)) 85 | 86 | 87 | def attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]: 88 | dim = query.shape[1] 89 | scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5 90 | prob = torch.nn.functional.softmax(scores, dim=-1) 91 | return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob 92 | 93 | 94 | class MultiHeadedAttention(nn.Module): 95 | """ Multi-head attention to increase model expressivitiy """ 96 | def __init__(self, num_heads: int, d_model: int): 97 | super().__init__() 98 | assert d_model % num_heads == 0 99 | self.dim = d_model // num_heads 100 | self.num_heads = num_heads 101 | self.merge = nn.Conv1d(d_model, d_model, kernel_size=1) 102 | self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)]) 103 | 104 | def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor: 105 | batch_dim = query.size(0) 106 | query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1) 107 | for l, x in zip(self.proj, (query, key, value))] 108 | x, _ = attention(query, key, value) 109 | return self.merge(x.contiguous().view(batch_dim, self.dim*self.num_heads, -1)) 110 | 111 | 112 | class AttentionalPropagation(nn.Module): 113 | def __init__(self, feature_dim: int, num_heads: int): 114 | super().__init__() 115 | self.attn = MultiHeadedAttention(num_heads, feature_dim) 116 | self.mlp = MLP([feature_dim*2, feature_dim*2, feature_dim]) 117 | nn.init.constant_(self.mlp[-1].bias, 0.0) 118 | 119 | def forward(self, x: torch.Tensor, source: torch.Tensor) -> torch.Tensor: 120 | message = self.attn(x, source, source) 121 | return self.mlp(torch.cat([x, message], dim=1)) 122 | 123 | 124 | class AttentionalGNN(nn.Module): 125 | def __init__(self, feature_dim: int, layer_names: List[str]) -> None: 126 | super().__init__() 127 | self.layers = nn.ModuleList([ 128 | AttentionalPropagation(feature_dim, 4) 129 | for _ in range(len(layer_names))]) 130 | self.names = layer_names 131 | 132 | def forward(self, desc0: torch.Tensor, desc1: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]: 133 | for layer, name in zip(self.layers, self.names): 134 | if name == 'cross': 135 | src0, src1 = desc1, desc0 136 | else: # if name == 'self': 137 | src0, src1 = desc0, desc1 138 | delta0, delta1 = layer(desc0, src0), layer(desc1, src1) 139 | desc0, desc1 = (desc0 + delta0), (desc1 + delta1) 140 | return desc0, desc1 141 | 142 | 143 | def log_sinkhorn_iterations(Z: torch.Tensor, log_mu: torch.Tensor, log_nu: torch.Tensor, iters: int) -> torch.Tensor: 144 | """ Perform Sinkhorn Normalization in Log-space for stability""" 145 | u, v = torch.zeros_like(log_mu), torch.zeros_like(log_nu) 146 | for _ in range(iters): 147 | u = log_mu - torch.logsumexp(Z + v.unsqueeze(1), dim=2) 148 | v = log_nu - torch.logsumexp(Z + u.unsqueeze(2), dim=1) 149 | return Z + u.unsqueeze(2) + v.unsqueeze(1) 150 | 151 | 152 | def log_optimal_transport(scores: torch.Tensor, alpha: torch.Tensor, iters: int) -> torch.Tensor: 153 | """ Perform Differentiable Optimal Transport in Log-space for stability""" 154 | b, m, n = scores.shape 155 | one = scores.new_tensor(1) 156 | ms, ns = (m*one).to(scores), (n*one).to(scores) 157 | 158 | bins0 = alpha.expand(b, m, 1) 159 | bins1 = alpha.expand(b, 1, n) 160 | alpha = alpha.expand(b, 1, 1) 161 | 162 | couplings = torch.cat([torch.cat([scores, bins0], -1), 163 | torch.cat([bins1, alpha], -1)], 1) 164 | 165 | norm = - (ms + ns).log() 166 | log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm]) 167 | log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm]) 168 | log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1) 169 | 170 | Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters) 171 | Z = Z - norm # multiply probabilities by M+N 172 | return Z 173 | 174 | 175 | def arange_like(x, dim: int): 176 | return x.new_ones(x.shape[dim]).cumsum(0) - 1 # traceable in 1.1 177 | 178 | 179 | class SuperGlue(nn.Module): 180 | """SuperGlue feature matching middle-end 181 | 182 | Given two sets of keypoints and locations, we determine the 183 | correspondences by: 184 | 1. Keypoint Encoding (normalization + visual feature and location fusion) 185 | 2. Graph Neural Network with multiple self and cross-attention layers 186 | 3. Final projection layer 187 | 4. Optimal Transport Layer (a differentiable Hungarian matching algorithm) 188 | 5. Thresholding matrix based on mutual exclusivity and a match_threshold 189 | 190 | The correspondence ids use -1 to indicate non-matching points. 191 | 192 | Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, and Andrew 193 | Rabinovich. SuperGlue: Learning Feature Matching with Graph Neural 194 | Networks. In CVPR, 2020. https://arxiv.org/abs/1911.11763 195 | 196 | """ 197 | default_config = { 198 | 'descriptor_dim': 256, 199 | 'weights': 'indoor', 200 | 'keypoint_encoder': [32, 64, 128, 256], 201 | 'GNN_layers': ['self', 'cross'] * 9, 202 | 'sinkhorn_iterations': 100, 203 | 'match_threshold': 0.2, 204 | } 205 | 206 | def __init__(self, config): 207 | super().__init__() 208 | self.config = {**self.default_config, **config} 209 | 210 | self.kenc = KeypointEncoder( 211 | self.config['descriptor_dim'], self.config['keypoint_encoder']) 212 | 213 | self.gnn = AttentionalGNN( 214 | feature_dim=self.config['descriptor_dim'], layer_names=self.config['GNN_layers']) 215 | 216 | self.final_proj = nn.Conv1d( 217 | self.config['descriptor_dim'], self.config['descriptor_dim'], 218 | kernel_size=1, bias=True) 219 | 220 | bin_score = torch.nn.Parameter(torch.tensor(1.)) 221 | self.register_parameter('bin_score', bin_score) 222 | 223 | assert self.config['weights'] in ['indoor', 'outdoor'] 224 | path = Path(__file__).parent 225 | path = path / 'weights/superglue_{}.pth'.format(self.config['weights']) 226 | self.load_state_dict(torch.load(str(path))) 227 | # print('Loaded SuperGlue model (\"{}\" weights)'.format( 228 | # self.config['weights'])) 229 | 230 | def forward(self, data): 231 | """Run SuperGlue on a pair of keypoints and descriptors""" 232 | desc0, desc1 = data['descriptors0'], data['descriptors1'] 233 | kpts0, kpts1 = data['keypoints0'], data['keypoints1'] 234 | 235 | if kpts0.shape[1] == 0 or kpts1.shape[1] == 0: # no keypoints 236 | shape0, shape1 = kpts0.shape[:-1], kpts1.shape[:-1] 237 | return { 238 | 'matches0': kpts0.new_full(shape0, -1, dtype=torch.int), 239 | 'matches1': kpts1.new_full(shape1, -1, dtype=torch.int), 240 | 'matching_scores0': kpts0.new_zeros(shape0), 241 | 'matching_scores1': kpts1.new_zeros(shape1), 242 | } 243 | 244 | # Keypoint normalization. 245 | kpts0 = normalize_keypoints(kpts0, data['image0'].shape) 246 | kpts1 = normalize_keypoints(kpts1, data['image1'].shape) 247 | 248 | # Keypoint MLP encoder. 249 | desc0 = desc0 + self.kenc(kpts0, data['scores0']) 250 | desc1 = desc1 + self.kenc(kpts1, data['scores1']) 251 | 252 | # Multi-layer Transformer network. 253 | desc0, desc1 = self.gnn(desc0, desc1) 254 | 255 | # Final MLP projection. 256 | mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1) 257 | 258 | # Compute matching descriptor distance. 259 | scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1) 260 | scores = scores / self.config['descriptor_dim']**.5 261 | 262 | # Run the optimal transport. 263 | scores = log_optimal_transport( 264 | scores, self.bin_score, 265 | iters=self.config['sinkhorn_iterations']) 266 | 267 | # Get the matches with score above "match_threshold". 268 | max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1) 269 | indices0, indices1 = max0.indices, max1.indices 270 | mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0) 271 | mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1) 272 | zero = scores.new_tensor(0) 273 | mscores0 = torch.where(mutual0, max0.values.exp(), zero) 274 | mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero) 275 | valid0 = mutual0 & (mscores0 > self.config['match_threshold']) 276 | valid1 = mutual1 & valid0.gather(1, indices1) 277 | indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1)) 278 | indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1)) 279 | 280 | return { 281 | 'matches0': indices0, # use -1 for invalid match 282 | 'matches1': indices1, # use -1 for invalid match 283 | 'matching_scores0': mscores0, 284 | 'matching_scores1': mscores1, 285 | } 286 | -------------------------------------------------------------------------------- /yolov7/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # YOLOR PyTorch utils 2 | 3 | import datetime 4 | import logging 5 | import math 6 | import os 7 | import platform 8 | import subprocess 9 | import time 10 | from contextlib import contextmanager 11 | from copy import deepcopy 12 | from pathlib import Path 13 | 14 | import torch 15 | import torch.backends.cudnn as cudnn 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | import torchvision 19 | 20 | try: 21 | import thop # for FLOPS computation 22 | except ImportError: 23 | thop = None 24 | logger = logging.getLogger(__name__) 25 | 26 | 27 | @contextmanager 28 | def torch_distributed_zero_first(local_rank: int): 29 | """ 30 | Decorator to make all processes in distributed training wait for each local_master to do something. 31 | """ 32 | if local_rank not in [-1, 0]: 33 | torch.distributed.barrier() 34 | yield 35 | if local_rank == 0: 36 | torch.distributed.barrier() 37 | 38 | 39 | def init_torch_seeds(seed=0): 40 | # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html 41 | torch.manual_seed(seed) 42 | if seed == 0: # slower, more reproducible 43 | cudnn.benchmark, cudnn.deterministic = False, True 44 | else: # faster, less reproducible 45 | cudnn.benchmark, cudnn.deterministic = True, False 46 | 47 | 48 | def date_modified(path=__file__): 49 | # return human-readable file modification date, i.e. '2021-3-26' 50 | t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime) 51 | return f'{t.year}-{t.month}-{t.day}' 52 | 53 | 54 | def git_describe(path=Path(__file__).parent): # path must be a directory 55 | # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe 56 | s = f'git -C {path} describe --tags --long --always' 57 | try: 58 | return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1] 59 | except subprocess.CalledProcessError as e: 60 | return '' # not a git repository 61 | 62 | 63 | def select_device(device='', batch_size=None): 64 | # device = 'cpu' or '0' or '0,1,2,3' 65 | s = f'YOLOR 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string 66 | cpu = device.lower() == 'cpu' 67 | if cpu: 68 | os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False 69 | elif device: # non-cpu device requested 70 | os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable 71 | assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability 72 | 73 | cuda = not cpu and torch.cuda.is_available() 74 | if cuda: 75 | n = torch.cuda.device_count() 76 | if n > 1 and batch_size: # check that batch_size is compatible with device_count 77 | assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}' 78 | space = ' ' * len(s) 79 | for i, d in enumerate(device.split(',') if device else range(n)): 80 | p = torch.cuda.get_device_properties(i) 81 | s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB 82 | else: 83 | s += 'CPU\n' 84 | 85 | logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe 86 | return torch.device('cuda:0' if cuda else 'cpu') 87 | 88 | 89 | def time_synchronized(): 90 | # pytorch-accurate time 91 | if torch.cuda.is_available(): 92 | torch.cuda.synchronize() 93 | return time.time() 94 | 95 | 96 | def profile(x, ops, n=100, device=None): 97 | # profile a pytorch module or list of modules. Example usage: 98 | # x = torch.randn(16, 3, 640, 640) # input 99 | # m1 = lambda x: x * torch.sigmoid(x) 100 | # m2 = nn.SiLU() 101 | # profile(x, [m1, m2], n=100) # profile speed over 100 iterations 102 | 103 | device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 104 | x = x.to(device) 105 | x.requires_grad = True 106 | print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '') 107 | print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}") 108 | for m in ops if isinstance(ops, list) else [ops]: 109 | m = m.to(device) if hasattr(m, 'to') else m # device 110 | m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type 111 | dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward 112 | try: 113 | flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS 114 | except: 115 | flops = 0 116 | 117 | for _ in range(n): 118 | t[0] = time_synchronized() 119 | y = m(x) 120 | t[1] = time_synchronized() 121 | try: 122 | _ = y.sum().backward() 123 | t[2] = time_synchronized() 124 | except: # no backward method 125 | t[2] = float('nan') 126 | dtf += (t[1] - t[0]) * 1000 / n # ms per op forward 127 | dtb += (t[2] - t[1]) * 1000 / n # ms per op backward 128 | 129 | s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list' 130 | s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list' 131 | p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters 132 | print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}') 133 | 134 | 135 | def is_parallel(model): 136 | return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) 137 | 138 | 139 | def intersect_dicts(da, db, exclude=()): 140 | # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values 141 | return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} 142 | 143 | 144 | def initialize_weights(model): 145 | for m in model.modules(): 146 | t = type(m) 147 | if t is nn.Conv2d: 148 | pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 149 | elif t is nn.BatchNorm2d: 150 | m.eps = 1e-3 151 | m.momentum = 0.03 152 | elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: 153 | m.inplace = True 154 | 155 | 156 | def find_modules(model, mclass=nn.Conv2d): 157 | # Finds layer indices matching module class 'mclass' 158 | return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)] 159 | 160 | 161 | def sparsity(model): 162 | # Return global model sparsity 163 | a, b = 0., 0. 164 | for p in model.parameters(): 165 | a += p.numel() 166 | b += (p == 0).sum() 167 | return b / a 168 | 169 | 170 | def prune(model, amount=0.3): 171 | # Prune model to requested global sparsity 172 | import torch.nn.utils.prune as prune 173 | print('Pruning model... ', end='') 174 | for name, m in model.named_modules(): 175 | if isinstance(m, nn.Conv2d): 176 | prune.l1_unstructured(m, name='weight', amount=amount) # prune 177 | prune.remove(m, 'weight') # make permanent 178 | print(' %.3g global sparsity' % sparsity(model)) 179 | 180 | 181 | def fuse_conv_and_bn(conv, bn): 182 | # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/ 183 | fusedconv = nn.Conv2d(conv.in_channels, 184 | conv.out_channels, 185 | kernel_size=conv.kernel_size, 186 | stride=conv.stride, 187 | padding=conv.padding, 188 | groups=conv.groups, 189 | bias=True).requires_grad_(False).to(conv.weight.device) 190 | 191 | # prepare filters 192 | w_conv = conv.weight.clone().view(conv.out_channels, -1) 193 | w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var))) 194 | fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape)) 195 | 196 | # prepare spatial bias 197 | b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias 198 | b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps)) 199 | fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn) 200 | 201 | return fusedconv 202 | 203 | 204 | def model_info(model, verbose=False, img_size=640): 205 | # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320] 206 | n_p = sum(x.numel() for x in model.parameters()) # number parameters 207 | n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients 208 | if verbose: 209 | print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma')) 210 | for i, (name, p) in enumerate(model.named_parameters()): 211 | name = name.replace('module_list.', '') 212 | print('%5g %40s %9s %12g %20s %10.3g %10.3g' % 213 | (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std())) 214 | 215 | try: # FLOPS 216 | from thop import profile 217 | stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32 218 | img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input 219 | flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS 220 | img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float 221 | fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS 222 | except (ImportError, Exception): 223 | fs = '' 224 | 225 | logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") 226 | 227 | 228 | def load_classifier(name='resnet101', n=2): 229 | # Loads a pretrained model reshaped to n-class output 230 | model = torchvision.models.__dict__[name](pretrained=True) 231 | 232 | # ResNet model properties 233 | # input_size = [3, 224, 224] 234 | # input_space = 'RGB' 235 | # input_range = [0, 1] 236 | # mean = [0.485, 0.456, 0.406] 237 | # std = [0.229, 0.224, 0.225] 238 | 239 | # Reshape output to n classes 240 | filters = model.fc.weight.shape[1] 241 | model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) 242 | model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) 243 | model.fc.out_features = n 244 | return model 245 | 246 | 247 | def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) 248 | # scales img(bs,3,y,x) by ratio constrained to gs-multiple 249 | if ratio == 1.0: 250 | return img 251 | else: 252 | h, w = img.shape[2:] 253 | s = (int(h * ratio), int(w * ratio)) # new size 254 | img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize 255 | if not same_shape: # pad/crop img 256 | h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)] 257 | return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean 258 | 259 | 260 | def copy_attr(a, b, include=(), exclude=()): 261 | # Copy attributes from b to a, options to only include [...] and to exclude [...] 262 | for k, v in b.__dict__.items(): 263 | if (len(include) and k not in include) or k.startswith('_') or k in exclude: 264 | continue 265 | else: 266 | setattr(a, k, v) 267 | 268 | 269 | class ModelEMA: 270 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models 271 | Keep a moving average of everything in the model state_dict (parameters and buffers). 272 | This is intended to allow functionality like 273 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage 274 | A smoothed version of the weights is necessary for some training schemes to perform well. 275 | This class is sensitive where it is initialized in the sequence of model init, 276 | GPU assignment and distributed training wrappers. 277 | """ 278 | 279 | def __init__(self, model, decay=0.9999, updates=0): 280 | # Create EMA 281 | self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA 282 | # if next(model.parameters()).device.type != 'cpu': 283 | # self.ema.half() # FP16 EMA 284 | self.updates = updates # number of EMA updates 285 | self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs) 286 | for p in self.ema.parameters(): 287 | p.requires_grad_(False) 288 | 289 | def update(self, model): 290 | # Update EMA parameters 291 | with torch.no_grad(): 292 | self.updates += 1 293 | d = self.decay(self.updates) 294 | 295 | msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict 296 | for k, v in self.ema.state_dict().items(): 297 | if v.dtype.is_floating_point: 298 | v *= d 299 | v += (1. - d) * msd[k].detach() 300 | 301 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): 302 | # Update EMA attributes 303 | copy_attr(self.ema, model, include, exclude) 304 | 305 | 306 | class BatchNormXd(torch.nn.modules.batchnorm._BatchNorm): 307 | def _check_input_dim(self, input): 308 | # The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc 309 | # is this method that is overwritten by the sub-class 310 | # This original goal of this method was for tensor sanity checks 311 | # If you're ok bypassing those sanity checks (eg. if you trust your inference 312 | # to provide the right dimensional inputs), then you can just use this method 313 | # for easy conversion from SyncBatchNorm 314 | # (unfortunately, SyncBatchNorm does not store the original class - if it did 315 | # we could return the one that was originally created) 316 | return 317 | 318 | def revert_sync_batchnorm(module): 319 | # this is very similar to the function that it is trying to revert: 320 | # https://github.com/pytorch/pytorch/blob/c8b3686a3e4ba63dc59e5dcfe5db3430df256833/torch/nn/modules/batchnorm.py#L679 321 | module_output = module 322 | if isinstance(module, torch.nn.modules.batchnorm.SyncBatchNorm): 323 | new_cls = BatchNormXd 324 | module_output = BatchNormXd(module.num_features, 325 | module.eps, module.momentum, 326 | module.affine, 327 | module.track_running_stats) 328 | if module.affine: 329 | with torch.no_grad(): 330 | module_output.weight = module.weight 331 | module_output.bias = module.bias 332 | module_output.running_mean = module.running_mean 333 | module_output.running_var = module.running_var 334 | module_output.num_batches_tracked = module.num_batches_tracked 335 | if hasattr(module, "qconfig"): 336 | module_output.qconfig = module.qconfig 337 | for name, child in module.named_children(): 338 | module_output.add_module(name, revert_sync_batchnorm(child)) 339 | del module 340 | return module_output 341 | 342 | 343 | class TracedModel(nn.Module): 344 | 345 | def __init__(self, model=None, device=None, img_size=(640,640)): 346 | super(TracedModel, self).__init__() 347 | 348 | print(" Convert model to Traced-model... ") 349 | self.stride = model.stride 350 | self.names = model.names 351 | self.model = model 352 | 353 | self.model = revert_sync_batchnorm(self.model) 354 | self.model.to('cpu') 355 | self.model.eval() 356 | 357 | self.detect_layer = self.model.model[-1] 358 | self.model.traced = True 359 | 360 | rand_example = torch.rand(1, 3, img_size, img_size) 361 | 362 | traced_script_module = torch.jit.trace(self.model, rand_example, strict=False) 363 | #traced_script_module = torch.jit.script(self.model) 364 | traced_script_module.save("traced_model.pt") 365 | print(" traced_script_module saved! ") 366 | self.model = traced_script_module 367 | self.model.to(device) 368 | self.detect_layer.to(device) 369 | print(" model is traced! \n") 370 | 371 | def forward(self, x, augment=False, profile=False): 372 | out = self.model(x) 373 | out = self.detect_layer(out) 374 | return out -------------------------------------------------------------------------------- /yolov7/models/common.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | def autopad(k, p=None): # kernel, padding 6 | # Pad to 'same' 7 | if p is None: 8 | p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad 9 | return p 10 | 11 | class MP(nn.Module): 12 | def __init__(self, k=2): 13 | super(MP, self).__init__() 14 | self.m = nn.MaxPool2d(kernel_size=k, stride=k) 15 | 16 | def forward(self, x): 17 | return self.m(x) 18 | 19 | class Concat(nn.Module): 20 | def __init__(self, dimension=1): 21 | super(Concat, self).__init__() 22 | self.d = dimension 23 | 24 | def forward(self, x): 25 | return torch.cat(x, self.d) 26 | 27 | class Conv(nn.Module): 28 | # Standard convolution 29 | def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups 30 | super(Conv, self).__init__() 31 | self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) 32 | self.bn = nn.BatchNorm2d(c2) 33 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 34 | 35 | def forward(self, x): 36 | return self.act(self.bn(self.conv(x))) 37 | 38 | def fuseforward(self, x): 39 | return self.act(self.conv(x)) 40 | 41 | class SPPCSPC(nn.Module): 42 | # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks 43 | def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)): 44 | super(SPPCSPC, self).__init__() 45 | c_ = int(2 * c2 * e) # hidden channels 46 | self.cv1 = Conv(c1, c_, 1, 1) 47 | self.cv2 = Conv(c1, c_, 1, 1) 48 | self.cv3 = Conv(c_, c_, 3, 1) 49 | self.cv4 = Conv(c_, c_, 1, 1) 50 | self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k]) 51 | self.cv5 = Conv(4 * c_, c_, 1, 1) 52 | self.cv6 = Conv(c_, c_, 3, 1) 53 | self.cv7 = Conv(2 * c_, c2, 1, 1) 54 | 55 | def forward(self, x): 56 | x1 = self.cv4(self.cv3(self.cv1(x))) 57 | y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1))) 58 | y2 = self.cv2(x) 59 | return self.cv7(torch.cat((y1, y2), dim=1)) 60 | 61 | class ImplicitA(nn.Module): 62 | def __init__(self, channel, mean=0., std=.02): 63 | super(ImplicitA, self).__init__() 64 | self.channel = channel 65 | self.mean = mean 66 | self.std = std 67 | self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1)) 68 | nn.init.normal_(self.implicit, mean=self.mean, std=self.std) 69 | 70 | def forward(self, x): 71 | return self.implicit + x 72 | 73 | 74 | class ImplicitM(nn.Module): 75 | def __init__(self, channel, mean=1., std=.02): 76 | super(ImplicitM, self).__init__() 77 | self.channel = channel 78 | self.mean = mean 79 | self.std = std 80 | self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1)) 81 | # self.implicit = nn.Parameter(torch.ones(1, 3, 1, 1, channel // 3)) 82 | nn.init.normal_(self.implicit, mean=self.mean, std=self.std) 83 | 84 | def forward(self, x): 85 | # print(x.shape) 86 | # print(self.implicit.weight) 87 | # import pdb; pdb.set_trace() 88 | return self.implicit * x 89 | 90 | class RepConv(nn.Module): 91 | # Represented convolution 92 | # https://arxiv.org/abs/2101.03697 93 | 94 | def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False): 95 | super(RepConv, self).__init__() 96 | 97 | self.deploy = deploy 98 | self.groups = g 99 | self.in_channels = c1 100 | self.out_channels = c2 101 | 102 | assert k == 3 103 | assert autopad(k, p) == 1 104 | 105 | padding_11 = autopad(k, p) - k // 2 106 | 107 | self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) 108 | 109 | if deploy: 110 | self.rbr_reparam = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=True) 111 | 112 | else: 113 | self.rbr_identity = (nn.BatchNorm2d(num_features=c1) if c2 == c1 and s == 1 else None) 114 | 115 | self.rbr_dense = nn.Sequential( 116 | nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False), 117 | nn.BatchNorm2d(num_features=c2), 118 | ) 119 | 120 | self.rbr_1x1 = nn.Sequential( 121 | nn.Conv2d( c1, c2, 1, s, padding_11, groups=g, bias=False), 122 | nn.BatchNorm2d(num_features=c2), 123 | ) 124 | 125 | def forward(self, inputs): 126 | if hasattr(self, "rbr_reparam"): 127 | return self.act(self.rbr_reparam(inputs)) 128 | 129 | if self.rbr_identity is None: 130 | id_out = 0 131 | else: 132 | id_out = self.rbr_identity(inputs) 133 | 134 | return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out) 135 | 136 | def get_equivalent_kernel_bias(self): 137 | kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) 138 | kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) 139 | kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) 140 | return ( 141 | kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, 142 | bias3x3 + bias1x1 + biasid, 143 | ) 144 | 145 | def _pad_1x1_to_3x3_tensor(self, kernel1x1): 146 | if kernel1x1 is None: 147 | return 0 148 | else: 149 | return nn.functional.pad(kernel1x1, [1, 1, 1, 1]) 150 | 151 | def _fuse_bn_tensor(self, branch): 152 | if branch is None: 153 | return 0, 0 154 | if isinstance(branch, nn.Sequential): 155 | kernel = branch[0].weight 156 | running_mean = branch[1].running_mean 157 | running_var = branch[1].running_var 158 | gamma = branch[1].weight 159 | beta = branch[1].bias 160 | eps = branch[1].eps 161 | else: 162 | assert isinstance(branch, nn.BatchNorm2d) 163 | if not hasattr(self, "id_tensor"): 164 | input_dim = self.in_channels // self.groups 165 | kernel_value = np.zeros( 166 | (self.in_channels, input_dim, 3, 3), dtype=np.float32 167 | ) 168 | for i in range(self.in_channels): 169 | kernel_value[i, i % input_dim, 1, 1] = 1 170 | self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) 171 | kernel = self.id_tensor 172 | running_mean = branch.running_mean 173 | running_var = branch.running_var 174 | gamma = branch.weight 175 | beta = branch.bias 176 | eps = branch.eps 177 | std = (running_var + eps).sqrt() 178 | t = (gamma / std).reshape(-1, 1, 1, 1) 179 | return kernel * t, beta - running_mean * gamma / std 180 | 181 | def repvgg_convert(self): 182 | kernel, bias = self.get_equivalent_kernel_bias() 183 | return ( 184 | kernel.detach().cpu().numpy(), 185 | bias.detach().cpu().numpy(), 186 | ) 187 | 188 | def fuse_conv_bn(self, conv, bn): 189 | 190 | std = (bn.running_var + bn.eps).sqrt() 191 | bias = bn.bias - bn.running_mean * bn.weight / std 192 | 193 | t = (bn.weight / std).reshape(-1, 1, 1, 1) 194 | weights = conv.weight * t 195 | 196 | bn = nn.Identity() 197 | conv = nn.Conv2d(in_channels = conv.in_channels, 198 | out_channels = conv.out_channels, 199 | kernel_size = conv.kernel_size, 200 | stride=conv.stride, 201 | padding = conv.padding, 202 | dilation = conv.dilation, 203 | groups = conv.groups, 204 | bias = True, 205 | padding_mode = conv.padding_mode) 206 | 207 | conv.weight = torch.nn.Parameter(weights) 208 | conv.bias = torch.nn.Parameter(bias) 209 | return conv 210 | 211 | def fuse_repvgg_block(self): 212 | if self.deploy: 213 | return 214 | print(f"RepConv.fuse_repvgg_block") 215 | 216 | self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1]) 217 | 218 | self.rbr_1x1 = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1]) 219 | rbr_1x1_bias = self.rbr_1x1.bias 220 | weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1]) 221 | 222 | # Fuse self.rbr_identity 223 | if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)): 224 | # print(f"fuse: rbr_identity == BatchNorm2d or SyncBatchNorm") 225 | identity_conv_1x1 = nn.Conv2d( 226 | in_channels=self.in_channels, 227 | out_channels=self.out_channels, 228 | kernel_size=1, 229 | stride=1, 230 | padding=0, 231 | groups=self.groups, 232 | bias=False) 233 | identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device) 234 | identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze() 235 | # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}") 236 | identity_conv_1x1.weight.data.fill_(0.0) 237 | identity_conv_1x1.weight.data.fill_diagonal_(1.0) 238 | identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3) 239 | # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}") 240 | 241 | identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity) 242 | bias_identity_expanded = identity_conv_1x1.bias 243 | weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1]) 244 | else: 245 | # print(f"fuse: rbr_identity != BatchNorm2d, rbr_identity = {self.rbr_identity}") 246 | bias_identity_expanded = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) ) 247 | weight_identity_expanded = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) ) 248 | 249 | 250 | #print(f"self.rbr_1x1.weight = {self.rbr_1x1.weight.shape}, ") 251 | #print(f"weight_1x1_expanded = {weight_1x1_expanded.shape}, ") 252 | #print(f"self.rbr_dense.weight = {self.rbr_dense.weight.shape}, ") 253 | 254 | self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded) 255 | self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded) 256 | 257 | self.rbr_reparam = self.rbr_dense 258 | self.deploy = True 259 | 260 | if self.rbr_identity is not None: 261 | del self.rbr_identity 262 | self.rbr_identity = None 263 | 264 | if self.rbr_1x1 is not None: 265 | del self.rbr_1x1 266 | self.rbr_1x1 = None 267 | 268 | if self.rbr_dense is not None: 269 | del self.rbr_dense 270 | self.rbr_dense = None 271 | 272 | class RepConv_OREPA(nn.Module): 273 | 274 | def __init__(self, c1, c2, k=3, s=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False, nonlinear=nn.SiLU()): 275 | super(RepConv_OREPA, self).__init__() 276 | self.deploy = deploy 277 | self.groups = groups 278 | self.in_channels = c1 279 | self.out_channels = c2 280 | 281 | self.padding = padding 282 | self.dilation = dilation 283 | self.groups = groups 284 | 285 | assert k == 3 286 | assert padding == 1 287 | 288 | padding_11 = padding - k // 2 289 | 290 | if nonlinear is None: 291 | self.nonlinearity = nn.Identity() 292 | else: 293 | self.nonlinearity = nonlinear 294 | 295 | if use_se: 296 | self.se = SEBlock(self.out_channels, internal_neurons=self.out_channels // 16) 297 | else: 298 | self.se = nn.Identity() 299 | 300 | if deploy: 301 | self.rbr_reparam = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=k, stride=s, 302 | padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode) 303 | 304 | else: 305 | self.rbr_identity = nn.BatchNorm2d(num_features=self.in_channels) if self.out_channels == self.in_channels and s == 1 else None 306 | self.rbr_dense = OREPA_3x3_RepConv(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=k, stride=s, padding=padding, groups=groups, dilation=1) 307 | self.rbr_1x1 = ConvBN(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=1, stride=s, padding=padding_11, groups=groups, dilation=1) 308 | print('RepVGG Block, identity = ', self.rbr_identity) 309 | 310 | 311 | def forward(self, inputs): 312 | if hasattr(self, 'rbr_reparam'): 313 | return self.nonlinearity(self.se(self.rbr_reparam(inputs))) 314 | 315 | if self.rbr_identity is None: 316 | id_out = 0 317 | else: 318 | id_out = self.rbr_identity(inputs) 319 | 320 | out1 = self.rbr_dense(inputs) 321 | out2 = self.rbr_1x1(inputs) 322 | out3 = id_out 323 | out = out1 + out2 + out3 324 | 325 | return self.nonlinearity(self.se(out)) 326 | 327 | 328 | # Optional. This improves the accuracy and facilitates quantization. 329 | # 1. Cancel the original weight decay on rbr_dense.conv.weight and rbr_1x1.conv.weight. 330 | # 2. Use like this. 331 | # loss = criterion(....) 332 | # for every RepVGGBlock blk: 333 | # loss += weight_decay_coefficient * 0.5 * blk.get_cust_L2() 334 | # optimizer.zero_grad() 335 | # loss.backward() 336 | 337 | # Not used for OREPA 338 | def get_custom_L2(self): 339 | K3 = self.rbr_dense.weight_gen() 340 | K1 = self.rbr_1x1.conv.weight 341 | t3 = (self.rbr_dense.bn.weight / ((self.rbr_dense.bn.running_var + self.rbr_dense.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() 342 | t1 = (self.rbr_1x1.bn.weight / ((self.rbr_1x1.bn.running_var + self.rbr_1x1.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach() 343 | 344 | l2_loss_circle = (K3 ** 2).sum() - (K3[:, :, 1:2, 1:2] ** 2).sum() # The L2 loss of the "circle" of weights in 3x3 kernel. Use regular L2 on them. 345 | eq_kernel = K3[:, :, 1:2, 1:2] * t3 + K1 * t1 # The equivalent resultant central point of 3x3 kernel. 346 | l2_loss_eq_kernel = (eq_kernel ** 2 / (t3 ** 2 + t1 ** 2)).sum() # Normalize for an L2 coefficient comparable to regular L2. 347 | return l2_loss_eq_kernel + l2_loss_circle 348 | 349 | def get_equivalent_kernel_bias(self): 350 | kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense) 351 | kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1) 352 | kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity) 353 | return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid 354 | 355 | def _pad_1x1_to_3x3_tensor(self, kernel1x1): 356 | if kernel1x1 is None: 357 | return 0 358 | else: 359 | return torch.nn.functional.pad(kernel1x1, [1,1,1,1]) 360 | 361 | def _fuse_bn_tensor(self, branch): 362 | if branch is None: 363 | return 0, 0 364 | if not isinstance(branch, nn.BatchNorm2d): 365 | if isinstance(branch, OREPA_3x3_RepConv): 366 | kernel = branch.weight_gen() 367 | elif isinstance(branch, ConvBN): 368 | kernel = branch.conv.weight 369 | else: 370 | raise NotImplementedError 371 | running_mean = branch.bn.running_mean 372 | running_var = branch.bn.running_var 373 | gamma = branch.bn.weight 374 | beta = branch.bn.bias 375 | eps = branch.bn.eps 376 | else: 377 | if not hasattr(self, 'id_tensor'): 378 | input_dim = self.in_channels // self.groups 379 | kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32) 380 | for i in range(self.in_channels): 381 | kernel_value[i, i % input_dim, 1, 1] = 1 382 | self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device) 383 | kernel = self.id_tensor 384 | running_mean = branch.running_mean 385 | running_var = branch.running_var 386 | gamma = branch.weight 387 | beta = branch.bias 388 | eps = branch.eps 389 | std = (running_var + eps).sqrt() 390 | t = (gamma / std).reshape(-1, 1, 1, 1) 391 | return kernel * t, beta - running_mean * gamma / std 392 | 393 | def switch_to_deploy(self): 394 | if hasattr(self, 'rbr_reparam'): 395 | return 396 | print(f"RepConv_OREPA.switch_to_deploy") 397 | kernel, bias = self.get_equivalent_kernel_bias() 398 | self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.in_channels, out_channels=self.rbr_dense.out_channels, 399 | kernel_size=self.rbr_dense.kernel_size, stride=self.rbr_dense.stride, 400 | padding=self.rbr_dense.padding, dilation=self.rbr_dense.dilation, groups=self.rbr_dense.groups, bias=True) 401 | self.rbr_reparam.weight.data = kernel 402 | self.rbr_reparam.bias.data = bias 403 | for para in self.parameters(): 404 | para.detach_() 405 | self.__delattr__('rbr_dense') 406 | self.__delattr__('rbr_1x1') 407 | if hasattr(self, 'rbr_identity'): 408 | self.__delattr__('rbr_identity') -------------------------------------------------------------------------------- /task1.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import glob 3 | import apriltag 4 | import numpy as np 5 | import matplotlib.cm as cm 6 | from numpy import random 7 | import torch 8 | 9 | from .utils.json_utils import json_preprocess, json_postprocess 10 | from .utils.args_utils import parse_args 11 | from .utils.model_utils import matching, read_image, make_matching_plot, plot_one_box 12 | from .superglue.superpoint import SuperPoint 13 | from .superglue.superglue import SuperGlue 14 | 15 | from .yolov7.models.experimental import attempt_load 16 | from .yolov7.utils.datasets import letterbox, LoadImages 17 | from .yolov7.utils.general import check_img_size, non_max_suppression, scale_coords, cv2 18 | from .yolov7.utils.torch_utils import TracedModel 19 | 20 | 21 | CLASS_MAP = {'person': 0, 'monitor': 1, 'cabinet': 2, 'basket': 3, 'box': 4, 'trash bin': 5, 'computer': 6, 'laptop': 7, 'bookshelf': 8, 'chair': 9, 'printer': 10, 'desk': 11, 22 | 'whiteboard': 12, 'banner': 13, 'mirror': 14, 'stairs': 15, 'toy': 16, 'fire extinguisher': 17, 'poster': 18, 'sink': 19, 'exercise tool': 20, 'speaker': 21, 23 | 'up_occluded': 22, 'up_red': 23, 'up_orange': 24, 'up_yellow': 25, 'up_green': 26, 'up_blue': 27, 'up_purple': 28, 'up_white': 29, 'up_gray': 30, 'up_black': 31, 24 | 'low_occluded': 32, 'low_red': 33, 'low_orange': 34, 'low_yellow': 35, 'low_green': 36, 'low_blue': 37, 'low_purple': 38, 'low_white': 39, 'low_gray': 40, 'low_black': 41, 25 | 'person_man': 42, 'person_woman': 43, 'person_child': 44, 'others_lifeguard': 45, 'others_medic': 46} 26 | 27 | class Task1: 28 | def __init__(self, args): 29 | self.clue_path = args.clue_path 30 | self.json_output_path = args.json_output_path 31 | self.task1_debug = args.task1_debug 32 | self.debug_output_path = args.debug_output_path 33 | self.img_conf_th = args.img_conf_th 34 | self.img_kp_th = args.img_kp_th 35 | self.txt_th = args.txt_th 36 | self.od_th = args.od_th 37 | self.total_th = args.total_th 38 | self.show_video = args.show_vid 39 | self.cnt = 0 40 | self.state = 0 41 | self.room_id = None 42 | self.json = {'answer_sheet': { 43 | 'room_id': None, 44 | 'mission': "1", 45 | 'answer': { 46 | 'person_id': {} 47 | } 48 | } 49 | } 50 | self.json_list = [] 51 | self.obj_cls = set() 52 | self.ppl_cls = set() 53 | 54 | # ----------------------------------------- 55 | # image matching model & preprocessing 56 | # ----------------------------------------- 57 | self.img_config = { 58 | 'superpoint': { 59 | 'nms_radius': 4, 60 | 'keypoint_threshold': 0.005, 61 | 'max_keypoints': 1024 62 | }, 63 | 'superglue': { 64 | 'weights': 'indoor', 65 | 'sinkhorn_iterations': 20, 66 | 'match_threshold': 0.2, 67 | } 68 | } 69 | self.superpoint = SuperPoint(self.img_config.get('superpoint', {})).eval().to('cuda') 70 | self.superglue = SuperGlue(self.img_config.get('superglue', {})).eval().to('cuda') 71 | self.match_batch_size = 1 72 | 73 | # ----------------------------------------- 74 | # YOLO model & preprocessing 75 | # ----------------------------------------- 76 | self.imgsz = (640, 640) 77 | self.half = True 78 | self.conf_th = 0.25 79 | self.iou_th = 0.45 80 | self.classes = None 81 | self.cls_agnostic_nms = False 82 | self.yolo_path = args.yolo_path 83 | yolo = attempt_load(self.yolo_path, map_location='cuda').eval() 84 | self.stride = int(yolo.stride.max()) 85 | self.img_size = check_img_size(self.imgsz[0], s=self.stride) 86 | self.names = yolo.names 87 | self.color_list = ['OCC','RED','ORG','YLW','GRN','BLU','PRP','WHT','GRY','BLK'] 88 | self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names] 89 | # if self.half: 90 | # self.yolo = TracedModel(yolo, 'cuda', self.img_size).half() 91 | # else: 92 | # self.yolo = TracedModel(yolo, 'cuda', self.img_size) 93 | self.yolo = yolo.half() 94 | # self.true=1 # NOTE: dummy code for debugging 95 | 96 | self.clue_json_read = False 97 | self.clue_img_read = False 98 | 99 | self.clue_txts = None 100 | self.clue_txt_list = None 101 | 102 | self.clue_img_list = None 103 | self.clue_imgs = None 104 | self.clue_imgs_p = None 105 | self.clue_imgs_scale = None 106 | 107 | def __call__(self, img: np.ndarray, state, frame_for_vis=None): 108 | try: 109 | clue_info = [] 110 | if (state == 0 or state == -1): # NOTE: 복도에서 json, room_id 초기화 111 | self.json = {'answer_sheet': { 112 | 'room_id': None, 113 | 'mission': "1", 114 | 'answer': { 115 | 'person_id': {} 116 | } 117 | } 118 | } 119 | self.json_list = [] 120 | self.room_id = None 121 | self.obj_cls = set() 122 | self.ppl_cls = set() 123 | 124 | if self.clue_json_read is False: 125 | # ----------------------------------------- 126 | # text clue preprocessing 127 | # ----------------------------------------- 128 | self.clue_txts = glob.glob(self.clue_path+'/*.json', recursive=True) 129 | self.clue_txt_list = ([]) 130 | if len(self.clue_txts) > 0: 131 | self.clue_txts.sort() 132 | for clue_txt_ in self.clue_txts: 133 | clue_txt_key = [] 134 | clue_txt_dict = json_preprocess(clue_txt_) 135 | self.clue_txts_ = list(clue_txt_dict.values())[0] 136 | for i in range(0, len(self.clue_txts_)): 137 | clue_txt_key.append(CLASS_MAP[self.clue_txts_[i]]) 138 | self.clue_txt_list.append(clue_txt_key) 139 | 140 | self.clue_json_read = True 141 | 142 | if self.clue_img_read is False: 143 | # ----------------------------------------- 144 | # image clue preprocessing 145 | # ----------------------------------------- 146 | self.clue_img_list = glob.glob(self.clue_path+'/*.jpg', recursive=True) 147 | self.clue_imgs = [] 148 | self.clue_imgs_p = [] 149 | self.clue_imgs_scales = [] 150 | if len(self.clue_img_list) > 0: 151 | self.clue_img_list.sort() 152 | for clue_img_ in self.clue_img_list: 153 | clue_img_ = cv2.imread(clue_img_, cv2.IMREAD_GRAYSCALE) 154 | image1, inp1, scales1 = read_image(clue_img_, [640, 480], 'cuda') # NOTE: clue image 155 | self.clue_imgs.append(image1) 156 | self.clue_imgs_p.append(inp1) 157 | self.clue_imgs_scales.append(scales1) 158 | 159 | self.clue_img_read = True 160 | 161 | # ----------------------------------------- 162 | # Superglue inference 163 | # ----------------------------------------- 164 | if self.task1_debug: 165 | input_img = cv2.imread(img, cv2.IMREAD_GRAYSCALE) 166 | else: 167 | input_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # NOTE: copy된 frame image 사용 168 | image0, inp0, scales0 = read_image(input_img, [640, 480], 'cuda') # NOTE: video frame image 169 | 170 | if len(self.clue_img_list) > 0: 171 | score_img = [] 172 | for i in range(0, len(self.clue_img_list)): # NOTE: 각 이미지 단서마다 kpts, mean confidence 저장 173 | cv2.putText(frame_for_vis, str(self.clue_txt_list), (80, 300), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2) 174 | pred, matches, conf = matching({'image0': inp0, 'image1': self.clue_imgs_p[i]}, self.superpoint, self.superglue) 175 | kpts0, kpts1 = pred['keypoints0'], pred['keypoints1'] 176 | valid = matches > -1 177 | mkpts0 = kpts0[valid] 178 | mkpts1 = kpts1[matches[valid]] 179 | mconf = conf[valid] # NOTE: superpoint 개수 180 | score_img.append((mkpts0.shape[0], mconf.mean())) 181 | 182 | if (score_img[i][0] > self.img_kp_th and score_img[i][1] > self.img_conf_th): 183 | im_detections = [] 184 | im_detector = apriltag.Detector() 185 | im_detections.append(im_detector.detect(input_img)) 186 | im_tag_id = [] 187 | 188 | for j in range(0, len(im_detections[0])): 189 | im_tag_id.append(im_detections[0][j].tag_id) 190 | im_json_output = json_postprocess(self.clue_img_list[i][-6:-4], im_tag_id) 191 | self.json_list.append(im_json_output) 192 | 193 | if self.debug_output_path != None: # NOTE: for debugging (superpoint > 50 & confidence > 0.5 일 때만 이미지 저장) 194 | color = cm.jet(mconf) 195 | label = ['SuperGlue', 196 | 'Keypoints: {}:{}'.format(len(kpts0), len(kpts1)), 197 | 'Matches: {}'.format(len(mkpts0)),] 198 | k_thresh = self.img_config['superpoint']['keypoint_threshold'] 199 | m_thresh = self.img_config['superglue']['match_threshold'] 200 | small_text = ['Keypoint Threshold: {:.4f}'.format(k_thresh), 201 | 'Match Threshold: {:.2f}'.format(m_thresh),] 202 | make_matching_plot(image0, self.clue_imgs[i], mkpts0, mkpts1, color, label, 203 | self.debug_output_path+'frame'+str(self.cnt)+'_clue'+str(i), small_text) 204 | 205 | clue_info.append(self.clue_img_list[i][-6:-4]) 206 | 207 | # ----------------------------------------- 208 | # YOLO inference 209 | # ----------------------------------------- 210 | if len(self.clue_txt_list) > 0: 211 | score_txt = 0.0 212 | score_bbox = 0.0 213 | for i in range(0, len(self.clue_txt_list)): 214 | self.yolo(torch.zeros(1, 3, self.img_size, self.img_size).to('cuda').type_as(next(self.yolo.parameters()))) 215 | if self.task1_debug: 216 | load_img = LoadImages(img, img_size=self.imgsz, stride=self.stride) 217 | _, yolo_img, im0s, _ = next(iter(load_img)) 218 | else: 219 | im0s = img 220 | # im0s = frame_for_vis # NOTE: video frame image 사용 221 | yolo_img = letterbox(im0s, self.img_size, stride=self.stride)[0] 222 | yolo_img = yolo_img[:, :, ::-1].transpose(2, 0, 1) 223 | yolo_img = np.ascontiguousarray(yolo_img) 224 | yolo_img = torch.from_numpy(yolo_img).to('cuda') 225 | yolo_img = yolo_img.half() if self.half else yolo_img.float() 226 | yolo_img /= 255.0 227 | if len(yolo_img.shape) == 3: 228 | yolo_img = yolo_img.unsqueeze(0) 229 | 230 | pred = self.yolo(yolo_img) 231 | pred = non_max_suppression(pred[0], self.conf_th, self.iou_th, self.classes, self.cls_agnostic_nms, multi_label=False, return_attributes=True)[0] 232 | pred[:, :4] = scale_coords(yolo_img.shape[2:], pred[:, :4], im0s.shape).round() 233 | 234 | if len(pred) > 0: 235 | # NOTE: poster 사람 제거 236 | person_pred = pred[0][pred[0][5] == 0] 237 | not_person_pred = pred[0][pred[0][5] != 0] 238 | poster_pred = pred[0][pred[0][5] == 18] 239 | if len(person_pred) != 0 : 240 | new_person_pred = [] 241 | for pep in person_pred : 242 | flag = False 243 | for pop in poster_pred : 244 | person_loc = pep[:4] 245 | poster_loc = pop[:4] 246 | person_left = person_loc[0] - person_loc[2]/2 247 | person_right = person_loc[0] + person_loc[2]/2 248 | person_top = person_loc[1] - person_loc[3]/2 249 | person_bottom = person_loc[1] + person_loc[3]/2 250 | poster_left = poster_loc[0] - poster_loc[2]/2 251 | poster_right = poster_loc[0] + poster_loc[2]/2 252 | poster_top = poster_loc[1] - poster_loc[3]/2 253 | poster_bottom = poster_loc[1] + poster_loc[3]/2 254 | if (poster_left < person_left) and (poster_top < person_top) and \ 255 | (poster_right > person_right) and (poster_bottom > person_bottom): 256 | # person is in poster 257 | flag = True 258 | break 259 | else : 260 | flag = False 261 | if not flag : 262 | new_person_pred.append(pep) 263 | person_pred = torch.stack(new_person_pred) 264 | pred = [torch.cat([person_pred, not_person_pred])][0] 265 | 266 | # NOTE: pred[0] = [X, Y, W, H, cls_conf, cls, upper_conf, upper_cls, lower_conf, lower_cls, ppl_conf, ppl_cls, oth_conf, oth_cls] 267 | # NOTE: other confidence and other class not used in task1 268 | cls_match_num = 0.0 269 | for j in range(0, len(self.clue_txt_list[i])): 270 | for k in range(0, pred.shape[0]): # NOTE: bbox 여러개 쳐진 경우 271 | if (pred[k][5] == 0 and pred[k][4] >= 0.7): # NOTE: 사람인경우 272 | if pred[k][11] == 0: 273 | name = 42 274 | elif pred[k][11] == 1: 275 | name = 43 276 | else: 277 | name = 44 278 | if name == self.clue_txt_list[i][j]: 279 | score_bbox = score_bbox+pred[k][10] 280 | self.ppl_cls.add(name) 281 | 282 | elif (pred[k][5] == self.clue_txt_list[i][j] and pred[k][4] >= self.od_th): # NOTE: 원하는 class (attribute 제외)가 th이상으로 detecting될 때 283 | score_bbox = score_bbox+pred[k][4] # NOTE: bbox마다 score 계산 284 | self.obj_cls.add(pred[k][5]) 285 | 286 | cls_match_num = len(set(self.clue_txt_list[i]).intersection(self.ppl_cls.union(self.obj_cls))) 287 | 288 | cv2.putText(frame_for_vis, str(self.ppl_cls.union(self.obj_cls)), (50, 400), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2) 289 | 290 | if cls_match_num > self.txt_th: 291 | od_detections = [] 292 | od_detector = apriltag.Detector() 293 | od_detections.append(od_detector.detect(input_img)) 294 | od_tag_id = [] 295 | 296 | for j in range(0, len(od_detections[0])): 297 | od_tag_id.append(od_detections[0][j].tag_id) 298 | od_json_output = json_postprocess(self.clue_txts[i][-7:-5], od_tag_id) 299 | self.json_list.append(od_json_output) 300 | cv2.putText(frame_for_vis, 'TEXT CLUE DETECTED', (50, 450), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2) 301 | 302 | if self.show_video: 303 | for j in range(0, pred.shape[0]): 304 | bboxes = pred[j][0:4] 305 | confs = pred[j][4] 306 | clss = pred[j][5] 307 | upper_clss = pred[j][7] 308 | lower_clss = pred[j][9] 309 | ppl_clss = pred[j][11] 310 | 311 | if clss == 0: # NOTE: person 312 | if ppl_clss == 0: 313 | name = 'man' 314 | elif ppl_clss == 1: 315 | name = 'woman' 316 | else: 317 | name = 'child' 318 | 319 | upper_color = self.color_list[int(upper_clss.item())] 320 | lower_color = self.color_list[int(lower_clss.item())] 321 | 322 | label = f'{name} {float(confs):.2f} {upper_color} {lower_color}' 323 | else: # NOTE: object 324 | label = f'{self.names[int(clss)]} {float(confs):.2f}' 325 | 326 | plot_one_box(bboxes, frame_for_vis, label=label, color=self.colors[int(clss)], line_thickness=2) 327 | # cv2.imwrite(self.debug_output_path+'frame'+str(self.cnt)+'_text_clue.jpg', frame_for_vis) 328 | 329 | clue_info.append(self.clue_txts[i][-7:-5]) 330 | 331 | # ----------------------------------------- 332 | # Apriltag detection for room id 333 | # ----------------------------------------- 334 | room_detections = [] 335 | room_detector = apriltag.Detector() 336 | room_detections.append(room_detector.detect(input_img)) 337 | room_tag_id = [] 338 | for i in range(0, len(room_detections[0])): 339 | room_tag_id.append(room_detections[0][i].tag_id) 340 | 341 | for i in range(0, len(room_tag_id)): 342 | if room_tag_id[i] >= 500: 343 | self.room_id = room_tag_id[i] 344 | 345 | # ----------------------------------------- 346 | # json update and export 347 | # ----------------------------------------- 348 | # NOTE: json dump에서 정답 json 만들기 (+중복 value 제거) 349 | for i in range(0, len(clue_info)): 350 | ans_list = [] 351 | for j in range(0, len(self.json_list)): 352 | k = list(self.json_list[j]['answer_sheet']['answer']['person_id'].keys()) 353 | for m in range(0, len(k)): 354 | if clue_info[i] == k[m]: 355 | v = self.json_list[j]['answer_sheet']['answer']['person_id'][k[m]] 356 | for n in range(0, len(v)): 357 | ans_list.append(v[n]) 358 | self.json['answer_sheet']['answer']['person_id'][clue_info[i]] = list(set(ans_list)) 359 | 360 | if self.room_id != None: # NOTE: room id 저장 361 | self.json['answer_sheet']['room_id'] = str(self.room_id) 362 | 363 | self.cnt = self.cnt+1 364 | self.state = state 365 | 366 | ans_pair = self.json['answer_sheet']['answer']['person_id'] 367 | ans_keys = list(ans_pair.keys()) 368 | empty_cnt = 0 369 | for i in range(0, len(ans_keys)): 370 | if len(ans_pair[ans_keys[i]]) == 0: 371 | empty_cnt = empty_cnt+1 372 | self.json['answer_sheet']['answer']['person_id'][ans_keys[i]] = ["NONE"] 373 | if empty_cnt == len(clue_info): # NOTE: value 전부 비어있으면 UNCLEAR로 채움 374 | for i in range(0, len(ans_keys)): 375 | self.json['answer_sheet']['answer']['person_id'][ans_keys[i]] = ["UNCLEAR"] 376 | 377 | # print(self.cnt, self.state) 378 | return self.json 379 | 380 | except: 381 | self.json = {'answer_sheet': { 382 | 'room_id': None, 383 | 'mission': "1", 384 | 'answer': { 385 | 'person_id': 'UNCLEAR' 386 | } 387 | } 388 | } 389 | # print('exception!') 390 | return self.json 391 | 392 | if __name__ == "__main__": 393 | args = parse_args() 394 | task1 = Task1(args) 395 | 396 | if args.task1_debug == None: 397 | frames = None 398 | else: 399 | frames = args.debug_input_path # NOTE: superglue 테스트이미지 (이미지 한장) 400 | 401 | task1(frames) 402 | -------------------------------------------------------------------------------- /superglue/utils.py: -------------------------------------------------------------------------------- 1 | # %BANNER_BEGIN% 2 | # --------------------------------------------------------------------- 3 | # %COPYRIGHT_BEGIN% 4 | # 5 | # Magic Leap, Inc. ("COMPANY") CONFIDENTIAL 6 | # 7 | # Unpublished Copyright (c) 2020 8 | # Magic Leap, Inc., All Rights Reserved. 9 | # 10 | # NOTICE: All information contained herein is, and remains the property 11 | # of COMPANY. The intellectual and technical concepts contained herein 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign 13 | # Patents, patents in process, and are protected by trade secret or 14 | # copyright law. Dissemination of this information or reproduction of 15 | # this material is strictly forbidden unless prior written permission is 16 | # obtained from COMPANY. Access to the source code contained herein is 17 | # hereby forbidden to anyone except current COMPANY employees, managers 18 | # or contractors who have executed Confidentiality and Non-disclosure 19 | # agreements explicitly covering such access. 20 | # 21 | # The copyright notice above does not evidence any actual or intended 22 | # publication or disclosure of this source code, which includes 23 | # information that is confidential and/or proprietary, and is a trade 24 | # secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION, 25 | # PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS 26 | # SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND 28 | # INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE, 31 | # USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART. 32 | # 33 | # %COPYRIGHT_END% 34 | # ---------------------------------------------------------------------- 35 | # %AUTHORS_BEGIN% 36 | # 37 | # Originating Authors: Paul-Edouard Sarlin 38 | # Daniel DeTone 39 | # Tomasz Malisiewicz 40 | # 41 | # %AUTHORS_END% 42 | # --------------------------------------------------------------------*/ 43 | # %BANNER_END% 44 | 45 | from pathlib import Path 46 | import time 47 | from collections import OrderedDict 48 | from threading import Thread 49 | import numpy as np 50 | import cv2 51 | import torch 52 | import matplotlib.pyplot as plt 53 | import matplotlib 54 | matplotlib.use('Agg') 55 | 56 | class AverageTimer: 57 | """ Class to help manage printing simple timing of code execution. """ 58 | 59 | def __init__(self, smoothing=0.3, newline=False): 60 | self.smoothing = smoothing 61 | self.newline = newline 62 | self.times = OrderedDict() 63 | self.will_print = OrderedDict() 64 | self.reset() 65 | 66 | def reset(self): 67 | now = time.time() 68 | self.start = now 69 | self.last_time = now 70 | for name in self.will_print: 71 | self.will_print[name] = False 72 | 73 | def update(self, name='default'): 74 | now = time.time() 75 | dt = now - self.last_time 76 | if name in self.times: 77 | dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name] 78 | self.times[name] = dt 79 | self.will_print[name] = True 80 | self.last_time = now 81 | 82 | def print(self, text='Timer'): 83 | total = 0. 84 | print('[{}]'.format(text), end=' ') 85 | for key in self.times: 86 | val = self.times[key] 87 | if self.will_print[key]: 88 | print('%s=%.3f' % (key, val), end=' ') 89 | total += val 90 | print('total=%.3f sec {%.1f FPS}' % (total, 1./total), end=' ') 91 | if self.newline: 92 | print(flush=True) 93 | else: 94 | print(end='\r', flush=True) 95 | self.reset() 96 | 97 | 98 | class VideoStreamer: 99 | """ Class to help process image streams. Four types of possible inputs:" 100 | 1.) USB Webcam. 101 | 2.) An IP camera 102 | 3.) A directory of images (files in directory matching 'image_glob'). 103 | 4.) A video file, such as an .mp4 or .avi file. 104 | """ 105 | def __init__(self, basedir, resize, skip, image_glob, max_length=1000000): 106 | self._ip_grabbed = False 107 | self._ip_running = False 108 | self._ip_camera = False 109 | self._ip_image = None 110 | self._ip_index = 0 111 | self.cap = [] 112 | self.camera = True 113 | self.video_file = False 114 | self.listing = [] 115 | self.resize = resize 116 | self.interp = cv2.INTER_AREA 117 | self.i = 0 118 | self.skip = skip 119 | self.max_length = max_length 120 | if isinstance(basedir, int) or basedir.isdigit(): 121 | print('==> Processing USB webcam input: {}'.format(basedir)) 122 | self.cap = cv2.VideoCapture(int(basedir)) 123 | self.listing = range(0, self.max_length) 124 | elif basedir.startswith(('http', 'rtsp')): 125 | print('==> Processing IP camera input: {}'.format(basedir)) 126 | self.cap = cv2.VideoCapture(basedir) 127 | self.start_ip_camera_thread() 128 | self._ip_camera = True 129 | self.listing = range(0, self.max_length) 130 | elif Path(basedir).is_dir(): 131 | print('==> Processing image directory input: {}'.format(basedir)) 132 | self.listing = list(Path(basedir).glob(image_glob[0])) 133 | for j in range(1, len(image_glob)): 134 | image_path = list(Path(basedir).glob(image_glob[j])) 135 | self.listing = self.listing + image_path 136 | self.listing.sort() 137 | self.listing = self.listing[::self.skip] 138 | self.max_length = np.min([self.max_length, len(self.listing)]) 139 | if self.max_length == 0: 140 | raise IOError('No images found (maybe bad \'image_glob\' ?)') 141 | self.listing = self.listing[:self.max_length] 142 | self.camera = False 143 | elif Path(basedir).exists(): 144 | print('==> Processing video input: {}'.format(basedir)) 145 | self.cap = cv2.VideoCapture(basedir) 146 | self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) 147 | num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) 148 | self.listing = range(0, num_frames) 149 | self.listing = self.listing[::self.skip] 150 | self.video_file = True 151 | self.max_length = np.min([self.max_length, len(self.listing)]) 152 | self.listing = self.listing[:self.max_length] 153 | else: 154 | raise ValueError('VideoStreamer input \"{}\" not recognized.'.format(basedir)) 155 | if self.camera and not self.cap.isOpened(): 156 | raise IOError('Could not read camera') 157 | 158 | def load_image(self, impath): 159 | """ Read image as grayscale and resize to img_size. 160 | Inputs 161 | impath: Path to input image. 162 | Returns 163 | grayim: uint8 numpy array sized H x W. 164 | """ 165 | grayim = cv2.imread(impath, 0) 166 | if grayim is None: 167 | raise Exception('Error reading image %s' % impath) 168 | w, h = grayim.shape[1], grayim.shape[0] 169 | w_new, h_new = process_resize(w, h, self.resize) 170 | grayim = cv2.resize( 171 | grayim, (w_new, h_new), interpolation=self.interp) 172 | return grayim 173 | 174 | def next_frame(self): 175 | """ Return the next frame, and increment internal counter. 176 | Returns 177 | image: Next H x W image. 178 | status: True or False depending whether image was loaded. 179 | """ 180 | 181 | if self.i == self.max_length: 182 | return (None, False) 183 | if self.camera: 184 | 185 | if self._ip_camera: 186 | #Wait for first image, making sure we haven't exited 187 | while self._ip_grabbed is False and self._ip_exited is False: 188 | time.sleep(.001) 189 | 190 | ret, image = self._ip_grabbed, self._ip_image.copy() 191 | if ret is False: 192 | self._ip_running = False 193 | else: 194 | ret, image = self.cap.read() 195 | if ret is False: 196 | print('VideoStreamer: Cannot get image from camera') 197 | return (None, False) 198 | w, h = image.shape[1], image.shape[0] 199 | if self.video_file: 200 | self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i]) 201 | 202 | w_new, h_new = process_resize(w, h, self.resize) 203 | image = cv2.resize(image, (w_new, h_new), 204 | interpolation=self.interp) 205 | image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) 206 | else: 207 | image_file = str(self.listing[self.i]) 208 | image = self.load_image(image_file) 209 | self.i = self.i + 1 210 | return (image, True) 211 | 212 | def start_ip_camera_thread(self): 213 | self._ip_thread = Thread(target=self.update_ip_camera, args=()) 214 | self._ip_running = True 215 | self._ip_thread.start() 216 | self._ip_exited = False 217 | return self 218 | 219 | def update_ip_camera(self): 220 | while self._ip_running: 221 | ret, img = self.cap.read() 222 | if ret is False: 223 | self._ip_running = False 224 | self._ip_exited = True 225 | self._ip_grabbed = False 226 | return 227 | 228 | self._ip_image = img 229 | self._ip_grabbed = ret 230 | self._ip_index += 1 231 | #print('IPCAMERA THREAD got frame {}'.format(self._ip_index)) 232 | 233 | 234 | def cleanup(self): 235 | self._ip_running = False 236 | 237 | # --- PREPROCESSING --- 238 | 239 | def process_resize(w, h, resize): 240 | assert(len(resize) > 0 and len(resize) <= 2) 241 | if len(resize) == 1 and resize[0] > -1: 242 | scale = resize[0] / max(h, w) 243 | w_new, h_new = int(round(w*scale)), int(round(h*scale)) 244 | elif len(resize) == 1 and resize[0] == -1: 245 | w_new, h_new = w, h 246 | else: # len(resize) == 2: 247 | w_new, h_new = resize[0], resize[1] 248 | 249 | # Issue warning if resolution is too small or too large. 250 | if max(w_new, h_new) < 160: 251 | print('Warning: input resolution is very small, results may vary') 252 | elif max(w_new, h_new) > 2000: 253 | print('Warning: input resolution is very large, results may vary') 254 | 255 | return w_new, h_new 256 | 257 | 258 | def frame2tensor(frame, device): 259 | return torch.from_numpy(frame/255.).float()[None, None].to(device) 260 | 261 | 262 | def read_image(path, device, resize, rotation, resize_float): 263 | image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE) 264 | if image is None: 265 | return None, None, None 266 | w, h = image.shape[1], image.shape[0] 267 | w_new, h_new = process_resize(w, h, resize) 268 | scales = (float(w) / float(w_new), float(h) / float(h_new)) 269 | 270 | if resize_float: 271 | image = cv2.resize(image.astype('float32'), (w_new, h_new)) 272 | else: 273 | image = cv2.resize(image, (w_new, h_new)).astype('float32') 274 | 275 | if rotation != 0: 276 | image = np.rot90(image, k=rotation) 277 | if rotation % 2: 278 | scales = scales[::-1] 279 | 280 | inp = frame2tensor(image, device) 281 | return image, inp, scales 282 | 283 | 284 | # --- GEOMETRY --- 285 | 286 | 287 | def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999): 288 | if len(kpts0) < 5: 289 | return None 290 | 291 | f_mean = np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]]) 292 | norm_thresh = thresh / f_mean 293 | 294 | kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] 295 | kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] 296 | 297 | E, mask = cv2.findEssentialMat( 298 | kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf, 299 | method=cv2.RANSAC) 300 | 301 | assert E is not None 302 | 303 | best_num_inliers = 0 304 | ret = None 305 | for _E in np.split(E, len(E) / 3): 306 | n, R, t, _ = cv2.recoverPose( 307 | _E, kpts0, kpts1, np.eye(3), 1e9, mask=mask) 308 | if n > best_num_inliers: 309 | best_num_inliers = n 310 | ret = (R, t[:, 0], mask.ravel() > 0) 311 | return ret 312 | 313 | 314 | def rotate_intrinsics(K, image_shape, rot): 315 | """image_shape is the shape of the image after rotation""" 316 | assert rot <= 3 317 | h, w = image_shape[:2][::-1 if (rot % 2) else 1] 318 | fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] 319 | rot = rot % 4 320 | if rot == 1: 321 | return np.array([[fy, 0., cy], 322 | [0., fx, w-1-cx], 323 | [0., 0., 1.]], dtype=K.dtype) 324 | elif rot == 2: 325 | return np.array([[fx, 0., w-1-cx], 326 | [0., fy, h-1-cy], 327 | [0., 0., 1.]], dtype=K.dtype) 328 | else: # if rot == 3: 329 | return np.array([[fy, 0., h-1-cy], 330 | [0., fx, cx], 331 | [0., 0., 1.]], dtype=K.dtype) 332 | 333 | 334 | def rotate_pose_inplane(i_T_w, rot): 335 | rotation_matrices = [ 336 | np.array([[np.cos(r), -np.sin(r), 0., 0.], 337 | [np.sin(r), np.cos(r), 0., 0.], 338 | [0., 0., 1., 0.], 339 | [0., 0., 0., 1.]], dtype=np.float32) 340 | for r in [np.deg2rad(d) for d in (0, 270, 180, 90)] 341 | ] 342 | return np.dot(rotation_matrices[rot], i_T_w) 343 | 344 | 345 | def scale_intrinsics(K, scales): 346 | scales = np.diag([1./scales[0], 1./scales[1], 1.]) 347 | return np.dot(scales, K) 348 | 349 | 350 | def to_homogeneous(points): 351 | return np.concatenate([points, np.ones_like(points[:, :1])], axis=-1) 352 | 353 | 354 | def compute_epipolar_error(kpts0, kpts1, T_0to1, K0, K1): 355 | kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] 356 | kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] 357 | kpts0 = to_homogeneous(kpts0) 358 | kpts1 = to_homogeneous(kpts1) 359 | 360 | t0, t1, t2 = T_0to1[:3, 3] 361 | t_skew = np.array([ 362 | [0, -t2, t1], 363 | [t2, 0, -t0], 364 | [-t1, t0, 0] 365 | ]) 366 | E = t_skew @ T_0to1[:3, :3] 367 | 368 | Ep0 = kpts0 @ E.T # N x 3 369 | p1Ep0 = np.sum(kpts1 * Ep0, -1) # N 370 | Etp1 = kpts1 @ E # N x 3 371 | d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2) 372 | + 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2)) 373 | return d 374 | 375 | 376 | def angle_error_mat(R1, R2): 377 | cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2 378 | cos = np.clip(cos, -1., 1.) # numercial errors can make it out of bounds 379 | return np.rad2deg(np.abs(np.arccos(cos))) 380 | 381 | 382 | def angle_error_vec(v1, v2): 383 | n = np.linalg.norm(v1) * np.linalg.norm(v2) 384 | return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0))) 385 | 386 | 387 | def compute_pose_error(T_0to1, R, t): 388 | R_gt = T_0to1[:3, :3] 389 | t_gt = T_0to1[:3, 3] 390 | error_t = angle_error_vec(t, t_gt) 391 | error_t = np.minimum(error_t, 180 - error_t) # ambiguity of E estimation 392 | error_R = angle_error_mat(R, R_gt) 393 | return error_t, error_R 394 | 395 | 396 | def pose_auc(errors, thresholds): 397 | sort_idx = np.argsort(errors) 398 | errors = np.array(errors.copy())[sort_idx] 399 | recall = (np.arange(len(errors)) + 1) / len(errors) 400 | errors = np.r_[0., errors] 401 | recall = np.r_[0., recall] 402 | aucs = [] 403 | for t in thresholds: 404 | last_index = np.searchsorted(errors, t) 405 | r = np.r_[recall[:last_index], recall[last_index-1]] 406 | e = np.r_[errors[:last_index], t] 407 | aucs.append(np.trapz(r, x=e)/t) 408 | return aucs 409 | 410 | 411 | # --- VISUALIZATION --- 412 | 413 | 414 | def plot_image_pair(imgs, dpi=100, size=6, pad=.5): 415 | n = len(imgs) 416 | assert n == 2, 'number of images must be two' 417 | figsize = (size*n, size*3/4) if size is not None else None 418 | _, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi) 419 | for i in range(n): 420 | ax[i].imshow(imgs[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=255) 421 | ax[i].get_yaxis().set_ticks([]) 422 | ax[i].get_xaxis().set_ticks([]) 423 | for spine in ax[i].spines.values(): # remove frame 424 | spine.set_visible(False) 425 | plt.tight_layout(pad=pad) 426 | 427 | 428 | def plot_keypoints(kpts0, kpts1, color='w', ps=2): 429 | ax = plt.gcf().axes 430 | ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) 431 | ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) 432 | 433 | 434 | def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4): 435 | fig = plt.gcf() 436 | ax = fig.axes 437 | fig.canvas.draw() 438 | 439 | transFigure = fig.transFigure.inverted() 440 | fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0)) 441 | fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1)) 442 | 443 | fig.lines = [matplotlib.lines.Line2D( 444 | (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), zorder=1, 445 | transform=fig.transFigure, c=color[i], linewidth=lw) 446 | for i in range(len(kpts0))] 447 | ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) 448 | ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) 449 | 450 | 451 | def make_matching_plot(image0, image1, kpts0, kpts1, mkpts0, mkpts1, 452 | color, text, path, show_keypoints=False, 453 | fast_viz=False, opencv_display=False, 454 | opencv_title='matches', small_text=[]): 455 | 456 | if fast_viz: 457 | make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, mkpts1, 458 | color, text, path, show_keypoints, 10, 459 | opencv_display, opencv_title, small_text) 460 | return 461 | 462 | plot_image_pair([image0, image1]) 463 | if show_keypoints: 464 | plot_keypoints(kpts0, kpts1, color='k', ps=4) 465 | plot_keypoints(kpts0, kpts1, color='w', ps=2) 466 | plot_matches(mkpts0, mkpts1, color) 467 | 468 | fig = plt.gcf() 469 | txt_color = 'k' if image0[:100, :150].mean() > 200 else 'w' 470 | fig.text( 471 | 0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes, 472 | fontsize=15, va='top', ha='left', color=txt_color) 473 | 474 | txt_color = 'k' if image0[-100:, :150].mean() > 200 else 'w' 475 | fig.text( 476 | 0.01, 0.01, '\n'.join(small_text), transform=fig.axes[0].transAxes, 477 | fontsize=5, va='bottom', ha='left', color=txt_color) 478 | 479 | plt.savefig(str(path), bbox_inches='tight', pad_inches=0) 480 | plt.close() 481 | 482 | 483 | def make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, 484 | mkpts1, color, text, path=None, 485 | show_keypoints=False, margin=10, 486 | opencv_display=False, opencv_title='', 487 | small_text=[]): 488 | H0, W0 = image0.shape 489 | H1, W1 = image1.shape 490 | H, W = max(H0, H1), W0 + W1 + margin 491 | 492 | out = 255*np.ones((H, W), np.uint8) 493 | out[:H0, :W0] = image0 494 | out[:H1, W0+margin:] = image1 495 | out = np.stack([out]*3, -1) 496 | 497 | if show_keypoints: 498 | kpts0, kpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int) 499 | white = (255, 255, 255) 500 | black = (0, 0, 0) 501 | for x, y in kpts0: 502 | cv2.circle(out, (x, y), 2, black, -1, lineType=cv2.LINE_AA) 503 | cv2.circle(out, (x, y), 1, white, -1, lineType=cv2.LINE_AA) 504 | for x, y in kpts1: 505 | cv2.circle(out, (x + margin + W0, y), 2, black, -1, 506 | lineType=cv2.LINE_AA) 507 | cv2.circle(out, (x + margin + W0, y), 1, white, -1, 508 | lineType=cv2.LINE_AA) 509 | 510 | mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int) 511 | color = (np.array(color[:, :3])*255).astype(int)[:, ::-1] 512 | for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color): 513 | c = c.tolist() 514 | cv2.line(out, (x0, y0), (x1 + margin + W0, y1), 515 | color=c, thickness=1, lineType=cv2.LINE_AA) 516 | # display line end-points as circles 517 | cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA) 518 | cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, 519 | lineType=cv2.LINE_AA) 520 | 521 | # Scale factor for consistent visualization across scales. 522 | sc = min(H / 640., 2.0) 523 | 524 | # Big text. 525 | Ht = int(30 * sc) # text height 526 | txt_color_fg = (255, 255, 255) 527 | txt_color_bg = (0, 0, 0) 528 | for i, t in enumerate(text): 529 | cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, 530 | 1.0*sc, txt_color_bg, 2, cv2.LINE_AA) 531 | cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX, 532 | 1.0*sc, txt_color_fg, 1, cv2.LINE_AA) 533 | 534 | # Small text. 535 | Ht = int(18 * sc) # text height 536 | for i, t in enumerate(reversed(small_text)): 537 | cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, 538 | 0.5*sc, txt_color_bg, 2, cv2.LINE_AA) 539 | cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX, 540 | 0.5*sc, txt_color_fg, 1, cv2.LINE_AA) 541 | 542 | if path is not None: 543 | cv2.imwrite(str(path), out) 544 | 545 | if opencv_display: 546 | cv2.imshow(opencv_title, out) 547 | cv2.waitKey(1) 548 | 549 | return out 550 | 551 | 552 | def error_colormap(x): 553 | return np.clip( 554 | np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)], -1), 0, 1) 555 | -------------------------------------------------------------------------------- /yolov7/utils/general.py: -------------------------------------------------------------------------------- 1 | # YOLOR general utils 2 | 3 | import glob 4 | import logging 5 | import math 6 | import os 7 | import platform 8 | import random 9 | import re 10 | import subprocess 11 | import time 12 | from pathlib import Path 13 | 14 | import cv2 15 | import numpy as np 16 | import pandas as pd 17 | import torch 18 | import torchvision 19 | 20 | # Settings 21 | torch.set_printoptions(linewidth=320, precision=5, profile='long') 22 | np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5 23 | pd.options.display.max_columns = 10 24 | cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader) 25 | os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads 26 | 27 | 28 | def set_logging(rank=-1): 29 | logging.basicConfig( 30 | format="%(message)s", 31 | level=logging.INFO if rank in [-1, 0] else logging.WARN) 32 | 33 | 34 | 35 | def get_latest_run(search_dir='.'): 36 | # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) 37 | last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) 38 | return max(last_list, key=os.path.getctime) if last_list else '' 39 | 40 | 41 | def isdocker(): 42 | # Is environment a Docker container 43 | return Path('/workspace').exists() # or Path('/.dockerenv').exists() 44 | 45 | 46 | def emojis(str=''): 47 | # Return platform-dependent emoji-safe version of string 48 | return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str 49 | 50 | 51 | def check_online(): 52 | # Check internet connectivity 53 | import socket 54 | try: 55 | socket.create_connection(("1.1.1.1", 443), 5) # check host accesability 56 | return True 57 | except OSError: 58 | return False 59 | 60 | 61 | def check_git_status(): 62 | # Recommend 'git pull' if code is out of date 63 | print(colorstr('github: '), end='') 64 | try: 65 | assert Path('.git').exists(), 'skipping check (not a git repository)' 66 | assert not isdocker(), 'skipping check (Docker image)' 67 | assert check_online(), 'skipping check (offline)' 68 | 69 | cmd = 'git fetch && git config --get remote.origin.url' 70 | url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip('.git') # github repo url 71 | branch = subprocess.check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out 72 | n = int(subprocess.check_output(f'git rev-list {branch}..origin/master --count', shell=True)) # commits behind 73 | if n > 0: 74 | s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \ 75 | f"Use 'git pull' to update or 'git clone {url}' to download latest." 76 | else: 77 | s = f'up to date with {url} ✅' 78 | print(emojis(s)) # emoji-safe 79 | except Exception as e: 80 | print(e) 81 | 82 | 83 | def check_requirements(requirements='requirements.txt', exclude=()): 84 | # Check installed dependencies meet requirements (pass *.txt file or list of packages) 85 | import pkg_resources as pkg 86 | prefix = colorstr('red', 'bold', 'requirements:') 87 | if isinstance(requirements, (str, Path)): # requirements.txt file 88 | file = Path(requirements) 89 | if not file.exists(): 90 | print(f"{prefix} {file.resolve()} not found, check failed.") 91 | return 92 | requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude] 93 | else: # list or tuple of packages 94 | requirements = [x for x in requirements if x not in exclude] 95 | 96 | n = 0 # number of packages updates 97 | for r in requirements: 98 | try: 99 | pkg.require(r) 100 | except Exception as e: # DistributionNotFound or VersionConflict if requirements not met 101 | n += 1 102 | print(f"{prefix} {e.req} not found and is required by YOLOR, attempting auto-update...") 103 | print(subprocess.check_output(f"pip install '{e.req}'", shell=True).decode()) 104 | 105 | if n: # if packages updated 106 | source = file.resolve() if 'file' in locals() else requirements 107 | s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \ 108 | f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n" 109 | print(emojis(s)) # emoji-safe 110 | 111 | 112 | def check_img_size(img_size, s=32): 113 | # Verify img_size is a multiple of stride s 114 | new_size = make_divisible(img_size, int(s)) # ceil gs-multiple 115 | if new_size != img_size: 116 | print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size)) 117 | return new_size 118 | 119 | 120 | def check_imshow(): 121 | # Check if environment supports image displays 122 | try: 123 | assert not isdocker(), 'cv2.imshow() is disabled in Docker environments' 124 | cv2.imshow('test', np.zeros((1, 1, 3))) 125 | cv2.waitKey(1) 126 | cv2.destroyAllWindows() 127 | cv2.waitKey(1) 128 | return True 129 | except Exception as e: 130 | print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}') 131 | return False 132 | 133 | 134 | def check_file(file): 135 | # Search for file if not found 136 | if Path(file).is_file() or file == '': 137 | return file 138 | else: 139 | files = glob.glob('./**/' + file, recursive=True) # find file 140 | assert len(files), f'File Not Found: {file}' # assert file was found 141 | assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}" # assert unique 142 | return files[0] # return file 143 | 144 | 145 | def check_dataset(dict): 146 | # Download dataset if not found locally 147 | val, s = dict.get('val'), dict.get('download') 148 | if val and len(val): 149 | 150 | val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])] # val path 151 | #import pdb; pdb.set_trace() 152 | if not all(x.exists() for x in val): 153 | print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()]) 154 | if s and len(s): # download script 155 | print('Downloading %s ...' % s) 156 | if s.startswith('http') and s.endswith('.zip'): # URL 157 | f = Path(s).name # filename 158 | torch.hub.download_url_to_file(s, f) 159 | r = os.system('unzip -q %s -d ../ && rm %s' % (f, f)) # unzip 160 | else: # bash script 161 | r = os.system(s) 162 | print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure')) # analyze return value 163 | else: 164 | raise Exception('Dataset not found.') 165 | 166 | 167 | def make_divisible(x, divisor): 168 | # Returns x evenly divisible by divisor 169 | return math.ceil(x / divisor) * divisor 170 | 171 | 172 | def clean_str(s): 173 | # Cleans a string by replacing special characters with underscore _ 174 | return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) 175 | 176 | 177 | def one_cycle(y1=0.0, y2=1.0, steps=100): 178 | # lambda function for sinusoidal ramp from y1 to y2 179 | return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1 180 | 181 | 182 | def colorstr(*input): 183 | # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world') 184 | *args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string 185 | colors = {'black': '\033[30m', # basic colors 186 | 'red': '\033[31m', 187 | 'green': '\033[32m', 188 | 'yellow': '\033[33m', 189 | 'blue': '\033[34m', 190 | 'magenta': '\033[35m', 191 | 'cyan': '\033[36m', 192 | 'white': '\033[37m', 193 | 'bright_black': '\033[90m', # bright colors 194 | 'bright_red': '\033[91m', 195 | 'bright_green': '\033[92m', 196 | 'bright_yellow': '\033[93m', 197 | 'bright_blue': '\033[94m', 198 | 'bright_magenta': '\033[95m', 199 | 'bright_cyan': '\033[96m', 200 | 'bright_white': '\033[97m', 201 | 'end': '\033[0m', # misc 202 | 'bold': '\033[1m', 203 | 'underline': '\033[4m'} 204 | return ''.join(colors[x] for x in args) + f'{string}' + colors['end'] 205 | 206 | 207 | def labels_to_class_weights(labels, nc=80): 208 | # Get class weights (inverse frequency) from training labels 209 | if labels[0] is None: # no labels loaded 210 | return torch.Tensor() 211 | 212 | labels = np.concatenate(labels, 0) # labels.shape = (866643, 5) for COCO 213 | classes = labels[:, 0].astype(np.int) # labels = [class xywh] 214 | weights = np.bincount(classes, minlength=nc) # occurrences per class 215 | 216 | # Prepend gridpoint count (for uCE training) 217 | # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum() # gridpoints per image 218 | # weights = np.hstack([gpi * len(labels) - weights.sum() * 9, weights * 9]) ** 0.5 # prepend gridpoints to start 219 | 220 | weights[weights == 0] = 1 # replace empty bins with 1 221 | weights = 1 / weights # number of targets per class 222 | weights /= weights.sum() # normalize 223 | return torch.from_numpy(weights) 224 | 225 | 226 | def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)): 227 | # Produces image weights based on class_weights and image contents 228 | class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels]) 229 | image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1) 230 | # index = random.choices(range(n), weights=image_weights, k=1) # weight image sample 231 | return image_weights 232 | 233 | 234 | def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper) 235 | # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/ 236 | # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n') 237 | # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n') 238 | # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco 239 | # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknet 240 | x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 241 | 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 242 | 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] 243 | return x 244 | 245 | 246 | def xyxy2xywh(x): 247 | # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right 248 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 249 | y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center 250 | y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center 251 | y[:, 2] = x[:, 2] - x[:, 0] # width 252 | y[:, 3] = x[:, 3] - x[:, 1] # height 253 | return y 254 | 255 | 256 | def xywh2xyxy(x): 257 | # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 258 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 259 | y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x 260 | y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y 261 | y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x 262 | y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y 263 | return y 264 | 265 | 266 | def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0): 267 | # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right 268 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 269 | y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x 270 | y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y 271 | y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x 272 | y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y 273 | return y 274 | 275 | 276 | def xyn2xy(x, w=640, h=640, padw=0, padh=0): 277 | # Convert normalized segments into pixel segments, shape (n,2) 278 | y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) 279 | y[:, 0] = w * x[:, 0] + padw # top left x 280 | y[:, 1] = h * x[:, 1] + padh # top left y 281 | return y 282 | 283 | 284 | def segment2box(segment, width=640, height=640): 285 | # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy) 286 | x, y = segment.T # segment xy 287 | inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height) 288 | x, y, = x[inside], y[inside] 289 | return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy 290 | 291 | 292 | def segments2boxes(segments): 293 | # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh) 294 | boxes = [] 295 | for s in segments: 296 | x, y = s.T # segment xy 297 | boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy 298 | return xyxy2xywh(np.array(boxes)) # cls, xywh 299 | 300 | 301 | def resample_segments(segments, n=1000): 302 | # Up-sample an (n,2) segment 303 | for i, s in enumerate(segments): 304 | s = np.concatenate((s, s[0:1, :]), axis=0) 305 | x = np.linspace(0, len(s) - 1, n) 306 | xp = np.arange(len(s)) 307 | segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy 308 | return segments 309 | 310 | 311 | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None): 312 | # Rescale coords (xyxy) from img1_shape to img0_shape 313 | if ratio_pad is None: # calculate from img0_shape 314 | gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new 315 | pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding 316 | else: 317 | gain = ratio_pad[0][0] 318 | pad = ratio_pad[1] 319 | 320 | coords[:, [0, 2]] -= pad[0] # x padding 321 | coords[:, [1, 3]] -= pad[1] # y padding 322 | coords[:, :4] /= gain 323 | clip_coords(coords, img0_shape) 324 | return coords 325 | 326 | 327 | def clip_coords(boxes, img_shape): 328 | # Clip bounding xyxy bounding boxes to image shape (height, width) 329 | boxes[:, 0].clamp_(0, img_shape[1]) # x1 330 | boxes[:, 1].clamp_(0, img_shape[0]) # y1 331 | boxes[:, 2].clamp_(0, img_shape[1]) # x2 332 | boxes[:, 3].clamp_(0, img_shape[0]) # y2 333 | 334 | 335 | def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): 336 | # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4 337 | box2 = box2.T 338 | 339 | # Get the coordinates of bounding boxes 340 | if x1y1x2y2: # x1, y1, x2, y2 = box1 341 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 342 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 343 | else: # transform from xywh to xyxy 344 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 345 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 346 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 347 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 348 | 349 | # Intersection area 350 | inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ 351 | (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) 352 | 353 | # Union Area 354 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 355 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 356 | union = w1 * h1 + w2 * h2 - inter + eps 357 | 358 | iou = inter / union 359 | 360 | if GIoU or DIoU or CIoU: 361 | cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 362 | ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 363 | if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 364 | c2 = cw ** 2 + ch ** 2 + eps # convex diagonal squared 365 | rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + 366 | (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 # center distance squared 367 | if DIoU: 368 | return iou - rho2 / c2 # DIoU 369 | elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 370 | v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2) 371 | with torch.no_grad(): 372 | alpha = v / (v - iou + (1 + eps)) 373 | return iou - (rho2 / c2 + v * alpha) # CIoU 374 | else: # GIoU https://arxiv.org/pdf/1902.09630.pdf 375 | c_area = cw * ch + eps # convex area 376 | return iou - (c_area - union) / c_area # GIoU 377 | else: 378 | return iou # IoU 379 | 380 | 381 | 382 | 383 | def bbox_alpha_iou(box1, box2, x1y1x2y2=False, GIoU=False, DIoU=False, CIoU=False, alpha=2, eps=1e-9): 384 | # Returns tsqrt_he IoU of box1 to box2. box1 is 4, box2 is nx4 385 | box2 = box2.T 386 | 387 | # Get the coordinates of bounding boxes 388 | if x1y1x2y2: # x1, y1, x2, y2 = box1 389 | b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] 390 | b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] 391 | else: # transform from xywh to xyxy 392 | b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 393 | b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 394 | b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 395 | b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 396 | 397 | # Intersection area 398 | inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ 399 | (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) 400 | 401 | # Union Area 402 | w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps 403 | w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps 404 | union = w1 * h1 + w2 * h2 - inter + eps 405 | 406 | # change iou into pow(iou+eps) 407 | # iou = inter / union 408 | iou = torch.pow(inter/union + eps, alpha) 409 | # beta = 2 * alpha 410 | if GIoU or DIoU or CIoU: 411 | cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) # convex (smallest enclosing box) width 412 | ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) # convex height 413 | if CIoU or DIoU: # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1 414 | c2 = (cw ** 2 + ch ** 2) ** alpha + eps # convex diagonal 415 | rho_x = torch.abs(b2_x1 + b2_x2 - b1_x1 - b1_x2) 416 | rho_y = torch.abs(b2_y1 + b2_y2 - b1_y1 - b1_y2) 417 | rho2 = ((rho_x ** 2 + rho_y ** 2) / 4) ** alpha # center distance 418 | if DIoU: 419 | return iou - rho2 / c2 # DIoU 420 | elif CIoU: # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47 421 | v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) 422 | with torch.no_grad(): 423 | alpha_ciou = v / ((1 + eps) - inter / union + v) 424 | # return iou - (rho2 / c2 + v * alpha_ciou) # CIoU 425 | return iou - (rho2 / c2 + torch.pow(v * alpha_ciou + eps, alpha)) # CIoU 426 | else: # GIoU https://arxiv.org/pdf/1902.09630.pdf 427 | # c_area = cw * ch + eps # convex area 428 | # return iou - (c_area - union) / c_area # GIoU 429 | c_area = torch.max(cw * ch + eps, union) # convex area 430 | return iou - torch.pow((c_area - union) / c_area + eps, alpha) # GIoU 431 | else: 432 | return iou # torch.log(iou+eps) or iou 433 | 434 | 435 | def box_iou(box1, box2): 436 | # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py 437 | """ 438 | Return intersection-over-union (Jaccard index) of boxes. 439 | Both sets of boxes are expected to be in (x1, y1, x2, y2) format. 440 | Arguments: 441 | box1 (Tensor[N, 4]) 442 | box2 (Tensor[M, 4]) 443 | Returns: 444 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 445 | IoU values for every element in boxes1 and boxes2 446 | """ 447 | 448 | def box_area(box): 449 | # box = 4xn 450 | return (box[2] - box[0]) * (box[3] - box[1]) 451 | 452 | area1 = box_area(box1.T) 453 | area2 = box_area(box2.T) 454 | 455 | # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) 456 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 457 | return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter) 458 | 459 | 460 | def wh_iou(wh1, wh2): 461 | # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2 462 | wh1 = wh1[:, None] # [N,1,2] 463 | wh2 = wh2[None] # [1,M,2] 464 | inter = torch.min(wh1, wh2).prod(2) # [N,M] 465 | return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter) 466 | 467 | 468 | def box_giou(box1, box2): 469 | """ 470 | Return generalized intersection-over-union (Jaccard index) between two sets of boxes. 471 | Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with 472 | ``0 <= x1 < x2`` and ``0 <= y1 < y2``. 473 | Args: 474 | boxes1 (Tensor[N, 4]): first set of boxes 475 | boxes2 (Tensor[M, 4]): second set of boxes 476 | Returns: 477 | Tensor[N, M]: the NxM matrix containing the pairwise generalized IoU values 478 | for every element in boxes1 and boxes2 479 | """ 480 | 481 | def box_area(box): 482 | # box = 4xn 483 | return (box[2] - box[0]) * (box[3] - box[1]) 484 | 485 | area1 = box_area(box1.T) 486 | area2 = box_area(box2.T) 487 | 488 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 489 | union = (area1[:, None] + area2 - inter) 490 | 491 | iou = inter / union 492 | 493 | lti = torch.min(box1[:, None, :2], box2[:, :2]) 494 | rbi = torch.max(box1[:, None, 2:], box2[:, 2:]) 495 | 496 | whi = (rbi - lti).clamp(min=0) # [N,M,2] 497 | areai = whi[:, :, 0] * whi[:, :, 1] 498 | 499 | return iou - (areai - union) / areai 500 | 501 | 502 | def box_ciou(box1, box2, eps: float = 1e-7): 503 | """ 504 | Return complete intersection-over-union (Jaccard index) between two sets of boxes. 505 | Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with 506 | ``0 <= x1 < x2`` and ``0 <= y1 < y2``. 507 | Args: 508 | boxes1 (Tensor[N, 4]): first set of boxes 509 | boxes2 (Tensor[M, 4]): second set of boxes 510 | eps (float, optional): small number to prevent division by zero. Default: 1e-7 511 | Returns: 512 | Tensor[N, M]: the NxM matrix containing the pairwise complete IoU values 513 | for every element in boxes1 and boxes2 514 | """ 515 | 516 | def box_area(box): 517 | # box = 4xn 518 | return (box[2] - box[0]) * (box[3] - box[1]) 519 | 520 | area1 = box_area(box1.T) 521 | area2 = box_area(box2.T) 522 | 523 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 524 | union = (area1[:, None] + area2 - inter) 525 | 526 | iou = inter / union 527 | 528 | lti = torch.min(box1[:, None, :2], box2[:, :2]) 529 | rbi = torch.max(box1[:, None, 2:], box2[:, 2:]) 530 | 531 | whi = (rbi - lti).clamp(min=0) # [N,M,2] 532 | diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps 533 | 534 | # centers of boxes 535 | x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2 536 | y_p = (box1[:, None, 1] + box1[:, None, 3]) / 2 537 | x_g = (box2[:, 0] + box2[:, 2]) / 2 538 | y_g = (box2[:, 1] + box2[:, 3]) / 2 539 | # The distance between boxes' centers squared. 540 | centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2 541 | 542 | w_pred = box1[:, None, 2] - box1[:, None, 0] 543 | h_pred = box1[:, None, 3] - box1[:, None, 1] 544 | 545 | w_gt = box2[:, 2] - box2[:, 0] 546 | h_gt = box2[:, 3] - box2[:, 1] 547 | 548 | v = (4 / (torch.pi ** 2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2) 549 | with torch.no_grad(): 550 | alpha = v / (1 - iou + v + eps) 551 | return iou - (centers_distance_squared / diagonal_distance_squared) - alpha * v 552 | 553 | 554 | def box_diou(box1, box2, eps: float = 1e-7): 555 | """ 556 | Return distance intersection-over-union (Jaccard index) between two sets of boxes. 557 | Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with 558 | ``0 <= x1 < x2`` and ``0 <= y1 < y2``. 559 | Args: 560 | boxes1 (Tensor[N, 4]): first set of boxes 561 | boxes2 (Tensor[M, 4]): second set of boxes 562 | eps (float, optional): small number to prevent division by zero. Default: 1e-7 563 | Returns: 564 | Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values 565 | for every element in boxes1 and boxes2 566 | """ 567 | 568 | def box_area(box): 569 | # box = 4xn 570 | return (box[2] - box[0]) * (box[3] - box[1]) 571 | 572 | area1 = box_area(box1.T) 573 | area2 = box_area(box2.T) 574 | 575 | inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2) 576 | union = (area1[:, None] + area2 - inter) 577 | 578 | iou = inter / union 579 | 580 | lti = torch.min(box1[:, None, :2], box2[:, :2]) 581 | rbi = torch.max(box1[:, None, 2:], box2[:, 2:]) 582 | 583 | whi = (rbi - lti).clamp(min=0) # [N,M,2] 584 | diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps 585 | 586 | # centers of boxes 587 | x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2 588 | y_p = (box1[:, None, 1] + box1[:, None, 3]) / 2 589 | x_g = (box2[:, 0] + box2[:, 2]) / 2 590 | y_g = (box2[:, 1] + box2[:, 3]) / 2 591 | # The distance between boxes' centers squared. 592 | centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2 593 | 594 | # The distance IoU is the IoU penalized by a normalized 595 | # distance between boxes' centers squared. 596 | return iou - (centers_distance_squared / diagonal_distance_squared) 597 | 598 | 599 | def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, 600 | labels=(), return_attributes=False): 601 | """Runs Non-Maximum Suppression (NMS) on inference results 602 | 603 | Returns: 604 | list of detections, on (n,6) tensor per image [xyxy, conf, cls] 605 | """ 606 | # nc = prediction.shape[2] - 5 # number of classes 607 | ######################################## DC 608 | nc = prediction.shape[2] - 30 609 | ######################################## DC 610 | 611 | xc = prediction[..., 4] > conf_thres # candidates 612 | 613 | # Settings 614 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 615 | max_det = 300 # maximum number of detections per image 616 | max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() 617 | time_limit = 10.0 # seconds to quit after 618 | redundant = True # require redundant detections 619 | multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) 620 | merge = False # use merge-NMS 621 | 622 | t = time.time() 623 | if return_attributes : 624 | # also return upper color, lower color, people type, other type 625 | output = [torch.zeros((0, 10), device=prediction.device)] * prediction.shape[0] 626 | else: 627 | output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] 628 | for xi, x in enumerate(prediction): # image index, image inference 629 | # Apply constraints 630 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 631 | x = x[xc[xi]] # confidence 632 | # Cat apriori labels if autolabelling 633 | if labels and len(labels[xi]): 634 | l = labels[xi] 635 | v = torch.zeros((len(l), nc + 5), device=x.device) 636 | v[:, :4] = l[:, 1:5] # box 637 | v[:, 4] = 1.0 # conf 638 | v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls 639 | x = torch.cat((x, v), 0) 640 | 641 | # If none remain process next image 642 | if not x.shape[0]: 643 | continue 644 | 645 | # Compute conf 646 | if nc == 1: 647 | x[:, 5:nc+5] = x[:, 4:5] # for models with one class, cls_loss is 0 and cls_conf is always 0.5, 648 | # so there is no need to multiplicate. 649 | else: 650 | x[:, 5:nc+5] *= x[:, 4:5] # conf = obj_conf * cls_conf 651 | 652 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 653 | box = xywh2xyxy(x[:, :4]) 654 | 655 | # Detections matrix nx6 (xyxy, conf, cls) 656 | ######################################## DC 657 | if multi_label: 658 | i, j = (x[:, 5:nc+5] > conf_thres).nonzero(as_tuple=False).T 659 | if return_attributes: 660 | upcol_i, upcol_j = (x[:, 5:nc+15] > conf_thres).nonzero(as_tuple=False).T 661 | lowcol_i, lowcol_j = (x[:, nc+15:nc+25] > conf_thres).nonzero(as_tuple=False).T 662 | ppl_i, ppl_j = (x[:, nc+25:nc+28] > conf_thres).nonzero(as_tuple=False).T 663 | oth_i, oth_j = (x[:, nc+28:nc+30] > conf_thres).nonzero(as_tuple=False).T 664 | to_cat = [box[i], x[i, j + 5, None], j[:, None].float(), 665 | x[upcol_i, upcol_j + 5, None], upcol_j[:, None].float(), 666 | x[lowcol_i, lowcol_j + 5, None], lowcol_j[:, None].float(), 667 | x[ppl_i, ppl_j + 5, None], ppl_j[:, None].float(), 668 | x[oth_i, oth_j + 5, None], oth_j[:, None].float()] 669 | else : 670 | to_cat = [box[i], x[i, j + 5, None], j[:, None].float()] 671 | x = torch.cat(to_cat, 1) 672 | else: # best class only 673 | conf, j = x[:, 5:nc+5].max(1, keepdim=True) 674 | if return_attributes: 675 | upcol_conf, upcol_j = x[:, nc+5:nc+15].max(1, keepdim=True) 676 | lowcol_conf, lowcol_j = x[:, nc+15:nc+25].max(1, keepdim=True) 677 | ppl_conf, ppl_j = x[:, nc+25:nc+28].max(1, keepdim=True) 678 | oth_conf, oth_j = x[:, nc+28:nc+30].max(1, keepdim=True) 679 | to_cat = [box, conf, j.float(), upcol_conf, upcol_j.float(), 680 | lowcol_conf, lowcol_j.float(), ppl_conf, ppl_j.float(), 681 | oth_conf, oth_j.float()] 682 | else : 683 | to_cat = [box, conf, j.float()] 684 | x = torch.cat(to_cat, 1)[conf.view(-1) > conf_thres] 685 | # x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 686 | ######################################## DC 687 | 688 | # Filter by class 689 | if classes is not None: 690 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 691 | 692 | # Apply finite constraint 693 | # if not torch.isfinite(x).all(): 694 | # x = x[torch.isfinite(x).all(1)] 695 | 696 | # Check shape 697 | n = x.shape[0] # number of boxes 698 | if not n: # no boxes 699 | continue 700 | elif n > max_nms: # excess boxes 701 | x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence 702 | 703 | # Batched NMS 704 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 705 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 706 | i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS 707 | if i.shape[0] > max_det: # limit detections 708 | i = i[:max_det] 709 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 710 | # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 711 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 712 | weights = iou * scores[None] # box weights 713 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 714 | if redundant: 715 | i = i[iou.sum(1) > 1] # require redundancy 716 | output[xi] = x[i] 717 | if (time.time() - t) > time_limit: 718 | print(f'WARNING: NMS time limit {time_limit}s exceeded') 719 | break # time limit exceeded 720 | 721 | return output 722 | 723 | 724 | def non_max_suppression_kpt(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, 725 | labels=(), kpt_label=False, nc=None, nkpt=None): 726 | """Runs Non-Maximum Suppression (NMS) on inference results 727 | 728 | Returns: 729 | list of detections, on (n,6) tensor per image [xyxy, conf, cls] 730 | """ 731 | if nc is None: 732 | nc = prediction.shape[2] - 5 if not kpt_label else prediction.shape[2] - 56 # number of classes 733 | xc = prediction[..., 4] > conf_thres # candidates 734 | 735 | # Settings 736 | min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height 737 | max_det = 300 # maximum number of detections per image 738 | max_nms = 30000 # maximum number of boxes into torchvision.ops.nms() 739 | time_limit = 10.0 # seconds to quit after 740 | redundant = True # require redundant detections 741 | multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img) 742 | merge = False # use merge-NMS 743 | 744 | t = time.time() 745 | output = [torch.zeros((0,6), device=prediction.device)] * prediction.shape[0] 746 | for xi, x in enumerate(prediction): # image index, image inference 747 | # Apply constraints 748 | # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height 749 | x = x[xc[xi]] # confidence 750 | 751 | # Cat apriori labels if autolabelling 752 | if labels and len(labels[xi]): 753 | l = labels[xi] 754 | v = torch.zeros((len(l), nc + 5), device=x.device) 755 | v[:, :4] = l[:, 1:5] # box 756 | v[:, 4] = 1.0 # conf 757 | v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls 758 | x = torch.cat((x, v), 0) 759 | 760 | # If none remain process next image 761 | if not x.shape[0]: 762 | continue 763 | 764 | # Compute conf 765 | x[:, 5:5+nc] *= x[:, 4:5] # conf = obj_conf * cls_conf 766 | 767 | # Box (center x, center y, width, height) to (x1, y1, x2, y2) 768 | box = xywh2xyxy(x[:, :4]) 769 | 770 | # Detections matrix nx6 (xyxy, conf, cls) 771 | if multi_label: 772 | i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T 773 | x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) 774 | else: # best class only 775 | if not kpt_label: 776 | conf, j = x[:, 5:].max(1, keepdim=True) 777 | x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] 778 | else: 779 | kpts = x[:, 6:] 780 | conf, j = x[:, 5:6].max(1, keepdim=True) 781 | x = torch.cat((box, conf, j.float(), kpts), 1)[conf.view(-1) > conf_thres] 782 | 783 | 784 | # Filter by class 785 | if classes is not None: 786 | x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] 787 | 788 | # Apply finite constraint 789 | # if not torch.isfinite(x).all(): 790 | # x = x[torch.isfinite(x).all(1)] 791 | 792 | # Check shape 793 | n = x.shape[0] # number of boxes 794 | if not n: # no boxes 795 | continue 796 | elif n > max_nms: # excess boxes 797 | x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence 798 | 799 | # Batched NMS 800 | c = x[:, 5:6] * (0 if agnostic else max_wh) # classes 801 | boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores 802 | i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS 803 | if i.shape[0] > max_det: # limit detections 804 | i = i[:max_det] 805 | if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean) 806 | # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4) 807 | iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix 808 | weights = iou * scores[None] # box weights 809 | x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes 810 | if redundant: 811 | i = i[iou.sum(1) > 1] # require redundancy 812 | 813 | output[xi] = x[i] 814 | if (time.time() - t) > time_limit: 815 | print(f'WARNING: NMS time limit {time_limit}s exceeded') 816 | break # time limit exceeded 817 | 818 | return output 819 | 820 | 821 | def strip_optimizer(f='best.pt', s=''): # from utils.general import *; strip_optimizer() 822 | # Strip optimizer from 'f' to finalize training, optionally save as 's' 823 | x = torch.load(f, map_location=torch.device('cpu')) 824 | if x.get('ema'): 825 | x['model'] = x['ema'] # replace model with ema 826 | for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates': # keys 827 | x[k] = None 828 | x['epoch'] = -1 829 | x['model'].half() # to FP16 830 | for p in x['model'].parameters(): 831 | p.requires_grad = False 832 | torch.save(x, s or f) 833 | mb = os.path.getsize(s or f) / 1E6 # filesize 834 | print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB") 835 | 836 | 837 | def apply_classifier(x, model, img, im0): 838 | # applies a second stage classifier to yolo outputs 839 | im0 = [im0] if isinstance(im0, np.ndarray) else im0 840 | for i, d in enumerate(x): # per image 841 | if d is not None and len(d): 842 | d = d.clone() 843 | 844 | # Reshape and pad cutouts 845 | b = xyxy2xywh(d[:, :4]) # boxes 846 | b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # rectangle to square 847 | b[:, 2:] = b[:, 2:] * 1.3 + 30 # pad 848 | d[:, :4] = xywh2xyxy(b).long() 849 | 850 | # Rescale boxes from img_size to im0 size 851 | scale_coords(img.shape[2:], d[:, :4], im0[i].shape) 852 | 853 | # Classes 854 | pred_cls1 = d[:, 5].long() 855 | ims = [] 856 | for j, a in enumerate(d): # per item 857 | cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])] 858 | im = cv2.resize(cutout, (224, 224)) # BGR 859 | # cv2.imwrite('test%i.jpg' % j, cutout) 860 | 861 | im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 862 | im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 863 | im /= 255.0 # 0 - 255 to 0.0 - 1.0 864 | ims.append(im) 865 | 866 | pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction 867 | x[i] = x[i][pred_cls1 == pred_cls2] # retain matching class detections 868 | 869 | return x 870 | 871 | 872 | def increment_path(path, exist_ok=True, sep=''): 873 | # Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc. 874 | path = Path(path) # os-agnostic 875 | if (path.exists() and exist_ok) or (not path.exists()): 876 | return str(path) 877 | else: 878 | dirs = glob.glob(f"{path}{sep}*") # similar paths 879 | matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs] 880 | i = [int(m.groups()[0]) for m in matches if m] # indices 881 | n = max(i) + 1 if i else 2 # increment number 882 | return f"{path}{sep}{n}" # update path 883 | -------------------------------------------------------------------------------- /yolov7/models/yolo.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os.path 4 | import sys 5 | from copy import deepcopy 6 | 7 | # sys.path.append('./') # to run '$ python *.py' files in subdirectories 8 | sys.path.append(os.path.abspath(os.path.dirname(__file__))) 9 | logger = logging.getLogger(__name__) 10 | import torch 11 | 12 | from models.common import * 13 | from models.experimental import * 14 | #sys.path.append('/home/eulrang/workspace/git/AGC2022_round3_task1/yolov7/utils') 15 | sys.path.append(os.path.join(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))), 'utils')) 16 | from autoanchor import check_anchor_order 17 | from general import make_divisible, check_file, set_logging 18 | from torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \ 19 | select_device, copy_attr 20 | from loss import SigmoidBin 21 | 22 | 23 | class Detect(nn.Module): 24 | stride = None # strides computed during build 25 | export = False # onnx export 26 | end2end = False 27 | include_nms = False 28 | concat = False 29 | 30 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 31 | super(Detect, self).__init__() 32 | self.nc = nc # number of classes 33 | self.no = nc + 5 # number of outputs per anchor 34 | self.nl = len(anchors) # number of detection layers 35 | self.na = len(anchors[0]) // 2 # number of anchors 36 | self.grid = [torch.zeros(1)] * self.nl # init grid 37 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 38 | self.register_buffer('anchors', a) # shape(nl,na,2) 39 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 40 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 41 | 42 | def forward(self, x): 43 | # x = x.copy() # for profiling 44 | z = [] # inference output 45 | self.training |= self.export 46 | for i in range(self.nl): 47 | x[i] = self.m[i](x[i]) # conv 48 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 49 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 50 | 51 | if not self.training: # inference 52 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 53 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 54 | y = x[i].sigmoid() 55 | if not torch.onnx.is_in_onnx_export(): 56 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 57 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 58 | else: 59 | xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 60 | xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5)) # new xy 61 | wh = wh ** 2 * (4 * self.anchor_grid[i].data) # new wh 62 | y = torch.cat((xy, wh, conf), 4) 63 | z.append(y.view(bs, -1, self.no)) 64 | 65 | if self.training: 66 | out = x 67 | elif self.end2end: 68 | out = torch.cat(z, 1) 69 | elif self.include_nms: 70 | z = self.convert(z) 71 | out = (z, ) 72 | elif self.concat: 73 | out = torch.cat(z, 1) 74 | else: 75 | out = (torch.cat(z, 1), x) 76 | 77 | return out 78 | 79 | @staticmethod 80 | def _make_grid(nx=20, ny=20): 81 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 82 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 83 | 84 | def convert(self, z): 85 | z = torch.cat(z, 1) 86 | box = z[:, :, :4] 87 | conf = z[:, :, 4:5] 88 | score = z[:, :, 5:] 89 | score *= conf 90 | convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], 91 | dtype=torch.float32, 92 | device=z.device) 93 | box @= convert_matrix 94 | return (box, score) 95 | 96 | 97 | class IDetect(nn.Module): 98 | stride = None # strides computed during build 99 | export = False # onnx export 100 | end2end = False 101 | include_nms = False 102 | concat = False 103 | 104 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 105 | super(IDetect, self).__init__() 106 | self.nc = nc # number of classes 107 | self.no = nc + 5 # number of outputs per anchor 108 | self.nl = len(anchors) # number of detection layers 109 | self.na = len(anchors[0]) // 2 # number of anchors 110 | self.grid = [torch.zeros(1)] * self.nl # init grid 111 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 112 | self.register_buffer('anchors', a) # shape(nl,na,2) 113 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 114 | # self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 22 x 3 115 | self.m = nn.ModuleList(nn.Conv2d(x, 85 * 3, 1) for x in ch) 116 | # self.m_loc = nn.ModuleList(nn.Conv2d(x, 5 * self.na, 1) for x in ch) 117 | self.m_cls = nn.ModuleList(nn.Conv2d(x, (self.no - 5) * self.na, 1) for x in ch) 118 | 119 | self.ia = nn.ModuleList(ImplicitA(x) for x in ch) 120 | self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch) 121 | 122 | def forward(self, x): 123 | # x = x.copy() # for profiling 124 | z = [] # inference output 125 | self.training |= self.export 126 | for i in range(self.nl): 127 | # x[i] = self.m[i](self.ia[i](x[i])) # conv 128 | x[i] = torch.cat([self.m[i](self.ia[i](x[i]))[:,:5*self.na], self.m_cls[i](self.ia[i](x[i]))], 1) 129 | x[i] = self.im[i](x[i]) 130 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 131 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 132 | 133 | if not self.training: # inference 134 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 135 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 136 | 137 | y = x[i].sigmoid() 138 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 139 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 140 | z.append(y.view(bs, -1, self.no)) 141 | 142 | return x if self.training else (torch.cat(z, 1), x) 143 | 144 | def fuseforward(self, x): 145 | # x = x.copy() # for profiling 146 | z = [] # inference output 147 | self.training |= self.export 148 | for i in range(self.nl): 149 | x[i] = self.m[i](x[i]) # conv 150 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 151 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 152 | 153 | if not self.training: # inference 154 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 155 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 156 | 157 | y = x[i].sigmoid() 158 | if not torch.onnx.is_in_onnx_export(): 159 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 160 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 161 | else: 162 | xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 163 | xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5)) # new xy 164 | wh = wh ** 2 * (4 * self.anchor_grid[i].data) # new wh 165 | y = torch.cat((xy, wh, conf), 4) 166 | z.append(y.view(bs, -1, self.no)) 167 | 168 | if self.training: 169 | out = x 170 | elif self.end2end: 171 | out = torch.cat(z, 1) 172 | elif self.include_nms: 173 | z = self.convert(z) 174 | out = (z, ) 175 | elif self.concat: 176 | out = torch.cat(z, 1) 177 | else: 178 | out = (torch.cat(z, 1), x) 179 | 180 | return out 181 | 182 | def fuse(self): 183 | print("IDetect.fuse") 184 | # fuse ImplicitA and Convolution 185 | for i in range(len(self.m)): 186 | c1,c2,_,_ = self.m[i].weight.shape 187 | c1_,c2_, _,_ = self.ia[i].implicit.shape 188 | self.m[i].bias += torch.matmul(self.m[i].weight.reshape(c1,c2),self.ia[i].implicit.reshape(c2_,c1_)).squeeze(1) 189 | 190 | # fuse ImplicitM and Convolution 191 | for i in range(len(self.m)): 192 | c1,c2, _,_ = self.im[i].implicit.shape 193 | self.m[i].bias *= self.im[i].implicit.reshape(c2) 194 | self.m[i].weight *= self.im[i].implicit.transpose(0,1) 195 | 196 | @staticmethod 197 | def _make_grid(nx=20, ny=20): 198 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 199 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 200 | 201 | def convert(self, z): 202 | z = torch.cat(z, 1) 203 | box = z[:, :, :4] 204 | conf = z[:, :, 4:5] 205 | score = z[:, :, 5:] 206 | score *= conf 207 | convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], 208 | dtype=torch.float32, 209 | device=z.device) 210 | box @= convert_matrix 211 | return (box, score) 212 | 213 | 214 | class IKeypoint(nn.Module): 215 | stride = None # strides computed during build 216 | export = False # onnx export 217 | 218 | def __init__(self, nc=80, anchors=(), nkpt=17, ch=(), inplace=True, dw_conv_kpt=False): # detection layer 219 | super(IKeypoint, self).__init__() 220 | self.nc = nc # number of classes 221 | self.nkpt = nkpt 222 | self.dw_conv_kpt = dw_conv_kpt 223 | self.no_det=(nc + 5) # number of outputs per anchor for box and class 224 | self.no_kpt = 3*self.nkpt ## number of outputs per anchor for keypoints 225 | self.no = self.no_det+self.no_kpt 226 | self.nl = len(anchors) # number of detection layers 227 | self.na = len(anchors[0]) // 2 # number of anchors 228 | self.grid = [torch.zeros(1)] * self.nl # init grid 229 | self.flip_test = False 230 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 231 | self.register_buffer('anchors', a) # shape(nl,na,2) 232 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 233 | self.m = nn.ModuleList(nn.Conv2d(x, self.no_det * self.na, 1) for x in ch) # output conv 234 | 235 | self.ia = nn.ModuleList(ImplicitA(x) for x in ch) 236 | self.im = nn.ModuleList(ImplicitM(self.no_det * self.na) for _ in ch) 237 | 238 | if self.nkpt is not None: 239 | if self.dw_conv_kpt: #keypoint head is slightly more complex 240 | self.m_kpt = nn.ModuleList( 241 | nn.Sequential(DWConv(x, x, k=3), Conv(x,x), 242 | DWConv(x, x, k=3), Conv(x, x), 243 | DWConv(x, x, k=3), Conv(x,x), 244 | DWConv(x, x, k=3), Conv(x, x), 245 | DWConv(x, x, k=3), Conv(x, x), 246 | DWConv(x, x, k=3), nn.Conv2d(x, self.no_kpt * self.na, 1)) for x in ch) 247 | else: #keypoint head is a single convolution 248 | self.m_kpt = nn.ModuleList(nn.Conv2d(x, self.no_kpt * self.na, 1) for x in ch) 249 | 250 | self.inplace = inplace # use in-place ops (e.g. slice assignment) 251 | 252 | def forward(self, x): 253 | # x = x.copy() # for profiling 254 | z = [] # inference output 255 | self.training |= self.export 256 | for i in range(self.nl): 257 | if self.nkpt is None or self.nkpt==0: 258 | x[i] = self.im[i](self.m[i](self.ia[i](x[i]))) # conv 259 | else : 260 | x[i] = torch.cat((self.im[i](self.m[i](self.ia[i](x[i]))), self.m_kpt[i](x[i])), axis=1) 261 | 262 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 263 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 264 | x_det = x[i][..., :6] 265 | x_kpt = x[i][..., 6:] 266 | 267 | if not self.training: # inference 268 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 269 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 270 | kpt_grid_x = self.grid[i][..., 0:1] 271 | kpt_grid_y = self.grid[i][..., 1:2] 272 | 273 | if self.nkpt == 0: 274 | y = x[i].sigmoid() 275 | else: 276 | y = x_det.sigmoid() 277 | 278 | if self.inplace: 279 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 280 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh 281 | if self.nkpt != 0: 282 | x_kpt[..., 0::3] = (x_kpt[..., ::3] * 2. - 0.5 + kpt_grid_x.repeat(1,1,1,1,17)) * self.stride[i] # xy 283 | x_kpt[..., 1::3] = (x_kpt[..., 1::3] * 2. - 0.5 + kpt_grid_y.repeat(1,1,1,1,17)) * self.stride[i] # xy 284 | #x_kpt[..., 0::3] = (x_kpt[..., ::3] + kpt_grid_x.repeat(1,1,1,1,17)) * self.stride[i] # xy 285 | #x_kpt[..., 1::3] = (x_kpt[..., 1::3] + kpt_grid_y.repeat(1,1,1,1,17)) * self.stride[i] # xy 286 | #print('=============') 287 | #print(self.anchor_grid[i].shape) 288 | #print(self.anchor_grid[i][...,0].unsqueeze(4).shape) 289 | #print(x_kpt[..., 0::3].shape) 290 | #x_kpt[..., 0::3] = ((x_kpt[..., 0::3].tanh() * 2.) ** 3 * self.anchor_grid[i][...,0].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_x.repeat(1,1,1,1,17) * self.stride[i] # xy 291 | #x_kpt[..., 1::3] = ((x_kpt[..., 1::3].tanh() * 2.) ** 3 * self.anchor_grid[i][...,1].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_y.repeat(1,1,1,1,17) * self.stride[i] # xy 292 | #x_kpt[..., 0::3] = (((x_kpt[..., 0::3].sigmoid() * 4.) ** 2 - 8.) * self.anchor_grid[i][...,0].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_x.repeat(1,1,1,1,17) * self.stride[i] # xy 293 | #x_kpt[..., 1::3] = (((x_kpt[..., 1::3].sigmoid() * 4.) ** 2 - 8.) * self.anchor_grid[i][...,1].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_y.repeat(1,1,1,1,17) * self.stride[i] # xy 294 | x_kpt[..., 2::3] = x_kpt[..., 2::3].sigmoid() 295 | 296 | y = torch.cat((xy, wh, y[..., 4:], x_kpt), dim = -1) 297 | 298 | else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 299 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 300 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 301 | if self.nkpt != 0: 302 | y[..., 6:] = (y[..., 6:] * 2. - 0.5 + self.grid[i].repeat((1,1,1,1,self.nkpt))) * self.stride[i] # xy 303 | y = torch.cat((xy, wh, y[..., 4:]), -1) 304 | 305 | z.append(y.view(bs, -1, self.no)) 306 | 307 | return x if self.training else (torch.cat(z, 1), x) 308 | 309 | @staticmethod 310 | def _make_grid(nx=20, ny=20): 311 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 312 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 313 | 314 | 315 | class IAuxDetect(nn.Module): 316 | stride = None # strides computed during build 317 | export = False # onnx export 318 | end2end = False 319 | include_nms = False 320 | concat = False 321 | 322 | def __init__(self, nc=80, anchors=(), ch=()): # detection layer 323 | super(IAuxDetect, self).__init__() 324 | self.nc = nc # number of classes 325 | self.no = nc + 5 # number of outputs per anchor 326 | self.nl = len(anchors) # number of detection layers 327 | self.na = len(anchors[0]) // 2 # number of anchors 328 | self.grid = [torch.zeros(1)] * self.nl # init grid 329 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 330 | self.register_buffer('anchors', a) # shape(nl,na,2) 331 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 332 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch[:self.nl]) # output conv 333 | self.m2 = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch[self.nl:]) # output conv 334 | 335 | self.ia = nn.ModuleList(ImplicitA(x) for x in ch[:self.nl]) 336 | self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch[:self.nl]) 337 | 338 | def forward(self, x): 339 | # x = x.copy() # for profiling 340 | z = [] # inference output 341 | self.training |= self.export 342 | for i in range(self.nl): 343 | x[i] = self.m[i](self.ia[i](x[i])) # conv 344 | x[i] = self.im[i](x[i]) 345 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 346 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 347 | 348 | x[i+self.nl] = self.m2[i](x[i+self.nl]) 349 | x[i+self.nl] = x[i+self.nl].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 350 | 351 | if not self.training: # inference 352 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 353 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 354 | 355 | y = x[i].sigmoid() 356 | if not torch.onnx.is_in_onnx_export(): 357 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 358 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 359 | else: 360 | xy, wh, conf = y.split((2, 2, self.nc + 1), 4) # y.tensor_split((2, 4, 5), 4) # torch 1.8.0 361 | xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5)) # new xy 362 | wh = wh ** 2 * (4 * self.anchor_grid[i].data) # new wh 363 | y = torch.cat((xy, wh, conf), 4) 364 | z.append(y.view(bs, -1, self.no)) 365 | 366 | return x if self.training else (torch.cat(z, 1), x[:self.nl]) 367 | 368 | def fuseforward(self, x): 369 | # x = x.copy() # for profiling 370 | z = [] # inference output 371 | self.training |= self.export 372 | for i in range(self.nl): 373 | x[i] = self.m[i](x[i]) # conv 374 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 375 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 376 | 377 | if not self.training: # inference 378 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 379 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 380 | 381 | y = x[i].sigmoid() 382 | if not torch.onnx.is_in_onnx_export(): 383 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 384 | y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 385 | else: 386 | xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 387 | wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data # wh 388 | y = torch.cat((xy, wh, y[..., 4:]), -1) 389 | z.append(y.view(bs, -1, self.no)) 390 | 391 | if self.training: 392 | out = x 393 | elif self.end2end: 394 | out = torch.cat(z, 1) 395 | elif self.include_nms: 396 | z = self.convert(z) 397 | out = (z, ) 398 | elif self.concat: 399 | out = torch.cat(z, 1) 400 | else: 401 | out = (torch.cat(z, 1), x) 402 | 403 | return out 404 | 405 | def fuse(self): 406 | print("IAuxDetect.fuse") 407 | # fuse ImplicitA and Convolution 408 | for i in range(len(self.m)): 409 | c1,c2,_,_ = self.m[i].weight.shape 410 | c1_,c2_, _,_ = self.ia[i].implicit.shape 411 | self.m[i].bias += torch.matmul(self.m[i].weight.reshape(c1,c2),self.ia[i].implicit.reshape(c2_,c1_)).squeeze(1) 412 | 413 | # fuse ImplicitM and Convolution 414 | for i in range(len(self.m)): 415 | c1,c2, _,_ = self.im[i].implicit.shape 416 | self.m[i].bias *= self.im[i].implicit.reshape(c2) 417 | self.m[i].weight *= self.im[i].implicit.transpose(0,1) 418 | 419 | @staticmethod 420 | def _make_grid(nx=20, ny=20): 421 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 422 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 423 | 424 | def convert(self, z): 425 | z = torch.cat(z, 1) 426 | box = z[:, :, :4] 427 | conf = z[:, :, 4:5] 428 | score = z[:, :, 5:] 429 | score *= conf 430 | convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]], 431 | dtype=torch.float32, 432 | device=z.device) 433 | box @= convert_matrix 434 | return (box, score) 435 | 436 | 437 | class IBin(nn.Module): 438 | stride = None # strides computed during build 439 | export = False # onnx export 440 | 441 | def __init__(self, nc=80, anchors=(), ch=(), bin_count=21): # detection layer 442 | super(IBin, self).__init__() 443 | self.nc = nc # number of classes 444 | self.bin_count = bin_count 445 | 446 | self.w_bin_sigmoid = SigmoidBin(bin_count=self.bin_count, min=0.0, max=4.0) 447 | self.h_bin_sigmoid = SigmoidBin(bin_count=self.bin_count, min=0.0, max=4.0) 448 | # classes, x,y,obj 449 | self.no = nc + 3 + \ 450 | self.w_bin_sigmoid.get_length() + self.h_bin_sigmoid.get_length() # w-bce, h-bce 451 | # + self.x_bin_sigmoid.get_length() + self.y_bin_sigmoid.get_length() 452 | 453 | self.nl = len(anchors) # number of detection layers 454 | self.na = len(anchors[0]) // 2 # number of anchors 455 | self.grid = [torch.zeros(1)] * self.nl # init grid 456 | a = torch.tensor(anchors).float().view(self.nl, -1, 2) 457 | self.register_buffer('anchors', a) # shape(nl,na,2) 458 | self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) # shape(nl,1,na,1,1,2) 459 | self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv 460 | 461 | self.ia = nn.ModuleList(ImplicitA(x) for x in ch) 462 | self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch) 463 | 464 | def forward(self, x): 465 | 466 | #self.x_bin_sigmoid.use_fw_regression = True 467 | #self.y_bin_sigmoid.use_fw_regression = True 468 | self.w_bin_sigmoid.use_fw_regression = True 469 | self.h_bin_sigmoid.use_fw_regression = True 470 | 471 | # x = x.copy() # for profiling 472 | z = [] # inference output 473 | self.training |= self.export 474 | for i in range(self.nl): 475 | x[i] = self.m[i](self.ia[i](x[i])) # conv 476 | x[i] = self.im[i](x[i]) 477 | bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) 478 | x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() 479 | 480 | if not self.training: # inference 481 | if self.grid[i].shape[2:4] != x[i].shape[2:4]: 482 | self.grid[i] = self._make_grid(nx, ny).to(x[i].device) 483 | 484 | y = x[i].sigmoid() 485 | y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy 486 | #y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh 487 | 488 | 489 | #px = (self.x_bin_sigmoid.forward(y[..., 0:12]) + self.grid[i][..., 0]) * self.stride[i] 490 | #py = (self.y_bin_sigmoid.forward(y[..., 12:24]) + self.grid[i][..., 1]) * self.stride[i] 491 | 492 | pw = self.w_bin_sigmoid.forward(y[..., 2:24]) * self.anchor_grid[i][..., 0] 493 | ph = self.h_bin_sigmoid.forward(y[..., 24:46]) * self.anchor_grid[i][..., 1] 494 | 495 | #y[..., 0] = px 496 | #y[..., 1] = py 497 | y[..., 2] = pw 498 | y[..., 3] = ph 499 | 500 | y = torch.cat((y[..., 0:4], y[..., 46:]), dim=-1) 501 | 502 | z.append(y.view(bs, -1, y.shape[-1])) 503 | 504 | return x if self.training else (torch.cat(z, 1), x) 505 | 506 | @staticmethod 507 | def _make_grid(nx=20, ny=20): 508 | yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) 509 | return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() 510 | 511 | 512 | class Model(nn.Module): 513 | def __init__(self, cfg='yolor-csp-c.yaml', ch=3, nc=None, anchors=None): # model, input channels, number of classes 514 | super(Model, self).__init__() 515 | self.traced = False 516 | if isinstance(cfg, dict): 517 | self.yaml = cfg # model dict 518 | else: # is *.yaml 519 | import yaml # for torch hub 520 | self.yaml_file = Path(cfg).name 521 | with open(cfg) as f: 522 | self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict 523 | 524 | # Define model 525 | ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels 526 | if nc and nc != self.yaml['nc']: 527 | logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") 528 | self.yaml['nc'] = nc # override yaml value 529 | if anchors: 530 | logger.info(f'Overriding model.yaml anchors with anchors={anchors}') 531 | self.yaml['anchors'] = round(anchors) # override yaml value 532 | self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist 533 | self.names = [str(i) for i in range(self.yaml['nc'])] # default names 534 | # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))]) 535 | 536 | # Build strides, anchors 537 | m = self.model[-1] # Detect() 538 | if isinstance(m, Detect): 539 | s = 256 # 2x min stride 540 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 541 | check_anchor_order(m) 542 | m.anchors /= m.stride.view(-1, 1, 1) 543 | self.stride = m.stride 544 | self._initialize_biases() # only run once 545 | # print('Strides: %s' % m.stride.tolist()) 546 | if isinstance(m, IDetect): 547 | s = 256 # 2x min stride 548 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 549 | check_anchor_order(m) 550 | m.anchors /= m.stride.view(-1, 1, 1) 551 | self.stride = m.stride 552 | self._initialize_biases() # only run once 553 | # print('Strides: %s' % m.stride.tolist()) 554 | if isinstance(m, IAuxDetect): 555 | s = 256 # 2x min stride 556 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[:4]]) # forward 557 | #print(m.stride) 558 | check_anchor_order(m) 559 | m.anchors /= m.stride.view(-1, 1, 1) 560 | self.stride = m.stride 561 | self._initialize_aux_biases() # only run once 562 | # print('Strides: %s' % m.stride.tolist()) 563 | if isinstance(m, IBin): 564 | s = 256 # 2x min stride 565 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 566 | check_anchor_order(m) 567 | m.anchors /= m.stride.view(-1, 1, 1) 568 | self.stride = m.stride 569 | self._initialize_biases_bin() # only run once 570 | # print('Strides: %s' % m.stride.tolist()) 571 | if isinstance(m, IKeypoint): 572 | s = 256 # 2x min stride 573 | m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward 574 | check_anchor_order(m) 575 | m.anchors /= m.stride.view(-1, 1, 1) 576 | self.stride = m.stride 577 | self._initialize_biases_kpt() # only run once 578 | # print('Strides: %s' % m.stride.tolist()) 579 | 580 | # Init weights, biases 581 | initialize_weights(self) 582 | self.info() 583 | logger.info('') 584 | 585 | def forward(self, x, augment=False, profile=False): 586 | if augment: 587 | img_size = x.shape[-2:] # height, width 588 | s = [1, 0.83, 0.67] # scales 589 | f = [None, 3, None] # flips (2-ud, 3-lr) 590 | y = [] # outputs 591 | for si, fi in zip(s, f): 592 | xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) 593 | yi = self.forward_once(xi)[0] # forward 594 | # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save 595 | yi[..., :4] /= si # de-scale 596 | if fi == 2: 597 | yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud 598 | elif fi == 3: 599 | yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr 600 | y.append(yi) 601 | return torch.cat(y, 1), None # augmented inference, train 602 | else: 603 | return self.forward_once(x, profile) # single-scale inference, train 604 | 605 | def forward_once(self, x, profile=False): 606 | y, dt = [], [] # outputs 607 | for m in self.model: 608 | if m.f != -1: # if not from previous layer 609 | x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers 610 | 611 | if not hasattr(self, 'traced'): 612 | self.traced=False 613 | 614 | if self.traced: 615 | if isinstance(m, Detect) or isinstance(m, IDetect) or isinstance(m, IAuxDetect) or isinstance(m, IKeypoint): 616 | break 617 | 618 | if profile: 619 | c = isinstance(m, (Detect, IDetect, IAuxDetect, IBin)) 620 | o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0 # FLOPS 621 | for _ in range(10): 622 | m(x.copy() if c else x) 623 | t = time_synchronized() 624 | for _ in range(10): 625 | m(x.copy() if c else x) 626 | dt.append((time_synchronized() - t) * 100) 627 | print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type)) 628 | 629 | x = m(x) # run 630 | 631 | y.append(x if m.i in self.save else None) # save output 632 | 633 | if profile: 634 | print('%.1fms total' % sum(dt)) 635 | return x 636 | 637 | def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 638 | # https://arxiv.org/abs/1708.02002 section 3.3 639 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 640 | m = self.model[-1] # Detect() module 641 | for mi, s in zip(m.m, m.stride): # from 642 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 643 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 644 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 645 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 646 | 647 | def _initialize_aux_biases(self, cf=None): # initialize biases into Detect(), cf is class frequency 648 | # https://arxiv.org/abs/1708.02002 section 3.3 649 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 650 | m = self.model[-1] # Detect() module 651 | for mi, mi2, s in zip(m.m, m.m2, m.stride): # from 652 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 653 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 654 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 655 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 656 | b2 = mi2.bias.view(m.na, -1) # conv.bias(255) to (3,85) 657 | b2.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 658 | b2.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 659 | mi2.bias = torch.nn.Parameter(b2.view(-1), requires_grad=True) 660 | 661 | def _initialize_biases_bin(self, cf=None): # initialize biases into Detect(), cf is class frequency 662 | # https://arxiv.org/abs/1708.02002 section 3.3 663 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 664 | m = self.model[-1] # Bin() module 665 | bc = m.bin_count 666 | for mi, s in zip(m.m, m.stride): # from 667 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 668 | old = b[:, (0,1,2,bc+3)].data 669 | obj_idx = 2*bc+4 670 | b[:, :obj_idx].data += math.log(0.6 / (bc + 1 - 0.99)) 671 | b[:, obj_idx].data += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 672 | b[:, (obj_idx+1):].data += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 673 | b[:, (0,1,2,bc+3)].data = old 674 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 675 | 676 | def _initialize_biases_kpt(self, cf=None): # initialize biases into Detect(), cf is class frequency 677 | # https://arxiv.org/abs/1708.02002 section 3.3 678 | # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. 679 | m = self.model[-1] # Detect() module 680 | for mi, s in zip(m.m, m.stride): # from 681 | b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) 682 | b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) 683 | b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls 684 | mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) 685 | 686 | def _print_biases(self): 687 | m = self.model[-1] # Detect() module 688 | for mi in m.m: # from 689 | b = mi.bias.detach().view(m.na, -1).T # conv.bias(255) to (3,85) 690 | print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean())) 691 | 692 | # def _print_weights(self): 693 | # for m in self.model.modules(): 694 | # if type(m) is Bottleneck: 695 | # print('%10.3g' % (m.w.detach().sigmoid() * 2)) # shortcut weights 696 | 697 | def fuse(self): # fuse model Conv2d() + BatchNorm2d() layers 698 | print('Fusing layers... ') 699 | for m in self.model.modules(): 700 | if isinstance(m, RepConv): 701 | #print(f" fuse_repvgg_block") 702 | m.fuse_repvgg_block() 703 | elif isinstance(m, RepConv_OREPA): 704 | #print(f" switch_to_deploy") 705 | m.switch_to_deploy() 706 | elif type(m) is Conv and hasattr(m, 'bn'): 707 | m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv 708 | delattr(m, 'bn') # remove batchnorm 709 | m.forward = m.fuseforward # update forward 710 | elif isinstance(m, (IDetect, IAuxDetect)): 711 | m.fuse() 712 | m.forward = m.fuseforward 713 | self.info() 714 | return self 715 | 716 | def nms(self, mode=True): # add or remove NMS module 717 | present = type(self.model[-1]) is NMS # last layer is NMS 718 | if mode and not present: 719 | print('Adding NMS... ') 720 | m = NMS() # module 721 | m.f = -1 # from 722 | m.i = self.model[-1].i + 1 # index 723 | self.model.add_module(name='%s' % m.i, module=m) # add 724 | self.eval() 725 | elif not mode and present: 726 | print('Removing NMS... ') 727 | self.model = self.model[:-1] # remove 728 | return self 729 | 730 | def autoshape(self): # add autoShape module 731 | print('Adding autoShape... ') 732 | m = autoShape(self) # wrap model 733 | copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=()) # copy attributes 734 | return m 735 | 736 | def info(self, verbose=False, img_size=640): # print model information 737 | model_info(self, verbose, img_size) 738 | 739 | 740 | def parse_model(d, ch): # model_dict, input_channels(3) 741 | logger.info('\n%3s%18s%3s%10s %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments')) 742 | anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple'] 743 | na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors 744 | no = na * (nc + 5) # number of outputs = anchors * (classes + 5) 745 | 746 | layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out 747 | for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']): # from, number, module, args 748 | m = eval(m) if isinstance(m, str) else m # eval strings 749 | for j, a in enumerate(args): 750 | try: 751 | args[j] = eval(a) if isinstance(a, str) else a # eval strings 752 | except: 753 | pass 754 | 755 | n = max(round(n * gd), 1) if n > 1 else n # depth gain 756 | if m in [nn.Conv2d, Conv, RobustConv, RobustConv2, DWConv, GhostConv, RepConv, RepConv_OREPA, DownC, 757 | SPP, SPPF, SPPCSPC, GhostSPPCSPC, MixConv2d, Focus, Stem, GhostStem, CrossConv, 758 | Bottleneck, BottleneckCSPA, BottleneckCSPB, BottleneckCSPC, 759 | RepBottleneck, RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC, 760 | Res, ResCSPA, ResCSPB, ResCSPC, 761 | RepRes, RepResCSPA, RepResCSPB, RepResCSPC, 762 | ResX, ResXCSPA, ResXCSPB, ResXCSPC, 763 | RepResX, RepResXCSPA, RepResXCSPB, RepResXCSPC, 764 | Ghost, GhostCSPA, GhostCSPB, GhostCSPC, 765 | SwinTransformerBlock, STCSPA, STCSPB, STCSPC, 766 | SwinTransformer2Block, ST2CSPA, ST2CSPB, ST2CSPC]: 767 | c1, c2 = ch[f], args[0] 768 | if c2 != no: # if not output 769 | c2 = make_divisible(c2 * gw, 8) 770 | 771 | args = [c1, c2, *args[1:]] 772 | if m in [DownC, SPPCSPC, GhostSPPCSPC, 773 | BottleneckCSPA, BottleneckCSPB, BottleneckCSPC, 774 | RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC, 775 | ResCSPA, ResCSPB, ResCSPC, 776 | RepResCSPA, RepResCSPB, RepResCSPC, 777 | ResXCSPA, ResXCSPB, ResXCSPC, 778 | RepResXCSPA, RepResXCSPB, RepResXCSPC, 779 | GhostCSPA, GhostCSPB, GhostCSPC, 780 | STCSPA, STCSPB, STCSPC, 781 | ST2CSPA, ST2CSPB, ST2CSPC]: 782 | args.insert(2, n) # number of repeats 783 | n = 1 784 | elif m is nn.BatchNorm2d: 785 | args = [ch[f]] 786 | elif m is Concat: 787 | c2 = sum([ch[x] for x in f]) 788 | elif m is Chuncat: 789 | c2 = sum([ch[x] for x in f]) 790 | elif m is Shortcut: 791 | c2 = ch[f[0]] 792 | elif m is Foldcut: 793 | c2 = ch[f] // 2 794 | elif m in [Detect, IDetect, IAuxDetect, IBin, IKeypoint]: 795 | args.append([ch[x] for x in f]) 796 | if isinstance(args[1], int): # number of anchors 797 | args[1] = [list(range(args[1] * 2))] * len(f) 798 | elif m is ReOrg: 799 | c2 = ch[f] * 4 800 | elif m is Contract: 801 | c2 = ch[f] * args[0] ** 2 802 | elif m is Expand: 803 | c2 = ch[f] // args[0] ** 2 804 | else: 805 | c2 = ch[f] 806 | 807 | m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module 808 | t = str(m)[8:-2].replace('__main__.', '') # module type 809 | np = sum([x.numel() for x in m_.parameters()]) # number params 810 | m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params 811 | logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print 812 | save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist 813 | layers.append(m_) 814 | if i == 0: 815 | ch = [] 816 | ch.append(c2) 817 | return nn.Sequential(*layers), sorted(save) 818 | 819 | 820 | if __name__ == '__main__': 821 | parser = argparse.ArgumentParser() 822 | parser.add_argument('--cfg', type=str, default='yolor-csp-c.yaml', help='model.yaml') 823 | parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') 824 | parser.add_argument('--profile', action='store_true', help='profile model speed') 825 | opt = parser.parse_args() 826 | opt.cfg = check_file(opt.cfg) # check file 827 | set_logging() 828 | device = select_device(opt.device) 829 | 830 | # Create model 831 | model = Model(opt.cfg).to(device) 832 | model.train() 833 | 834 | if opt.profile: 835 | img = torch.rand(1, 3, 640, 640).to(device) 836 | y = model(img, profile=True) 837 | 838 | # Profile 839 | # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) 840 | # y = model(img, profile=True) 841 | 842 | # Tensorboard 843 | # from torch.utils.tensorboard import SummaryWriter 844 | # tb_writer = SummaryWriter() 845 | # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/") 846 | # tb_writer.add_graph(model.model, img) # add model to tensorboard 847 | # tb_writer.add_image('test', img[0], dataformats='CWH') # add model to tensorboard --------------------------------------------------------------------------------