├── yolov7
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── experimental.py
    │   ├── common.py
    │   └── yolo.py
    └── utils
    │   ├── __init__.py
    │   ├── autoanchor.py
    │   ├── torch_utils.py
    │   └── general.py
├── superglue
    ├── __init__.py
    ├── README.md
    ├── matching.py
    ├── superpoint.py
    ├── superglue.py
    └── utils.py
├── __init__.py
├── README.md
├── utils
    ├── args_utils.py
    ├── json_utils.py
    └── model_utils.py
└── task1.py


/yolov7/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/superglue/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/yolov7/models/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/yolov7/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # init


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | from .task1 import Task1
2 | 


--------------------------------------------------------------------------------
/superglue/README.md:
--------------------------------------------------------------------------------
1 | ## Superglue Dependencies  
2 | - Python 3 >= 3.5   
3 | - PyTorch >= 1.1   
4 | - OpenCV >= 3.4 (4.1.2.30 recommended for best GUI keyboard interaction, see this note)   
5 | - Matplotlib >= 3.1   
6 | - NumPy >= 1.18   
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # AGC2022_Task1_Detection
 2 | ### Pre-trained Model Download
 3 | - Superglue: https://drive.google.com/file/d/1ACYKMSg8GCb5qEgvfO5m0LTCJvhnOMWm/view?usp=sharing
 4 | - Yolov7: https://drive.google.com/file/d/1-eCIYzgr9eXp2ANBp3R4ZQor7Vl8YQwO/view?usp=share_link
 5 | 
 6 | ## Superglue Dependencies  
 7 | - Python >= 3.5   
 8 | - PyTorch >= 1.1   
 9 | - OpenCV >= 3.4 (4.1.2.30 recommended for best GUI keyboard interaction, see this note)   
10 | - Matplotlib >= 3.1   
11 | - NumPy >= 1.18   
12 | 
13 | ### Make Checkpoint Folder
14 | ``` 
15 |     cd superglue
16 |     mkdir weights
17 |     cd weights
18 |     mv {SUPERGLUE_PRETRAINED_MODEL} .
19 | ```
20 | 
21 | ### Run
22 | ```
23 |     python task1.py
24 |    --clue_path={CLUE_PATH}
25 |    --yolo_path={YOLO_PRETRAINED_MODEL_PATH}
26 |    --img_conf_th 0.1 
27 |    --img_kp_th 150 
28 |    --txt_th 0.3 
29 |    --od_th 0.3 
30 | ```
31 | 


--------------------------------------------------------------------------------
/utils/args_utils.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | def parse_args():
 4 |     parser = argparse.ArgumentParser()
 5 |     parser.add_argument('--video_path', default=None, help='video path')
 6 |     parser.add_argument('--clue_path', default=None, help='clue(img, txt) path')
 7 |     parser.add_argument('--json_output_path', default='.', help='json output path')
 8 | 
 9 |     parser.add_argument('--task1_debug', action="store_true", help='(optional)debug mode')
10 |     parser.add_argument('--debug_input_path', default=None, help='debugging input image path')
11 |     parser.add_argument('--debug_output_path', default=None, help='debugging output image path')
12 |     
13 |     parser.add_argument('--yolo_path', default='.', help='yolo task1 checkpoint path')
14 |     parser.add_argument('--img_conf_th', type=float, default=0.6, help='img threshold')   # NOTE: determine best confidence threshold value
15 |     parser.add_argument('--img_kp_th', type=float, default=50, help='img threshold')      # NOTE: determine best keypoint threshold value
16 |     parser.add_argument('--txt_th', type=float, default=0.8, help='txt threshold')        # NOTE: determine value
17 |     parser.add_argument('--od_th', type=float, default=0.5, help='OD threshold')          # NOTE: determine value
18 |     parser.add_argument('--total_th', type=float, default=0.9, help='img+txt threshold')  # NOTE: determine value
19 |     args = parser.parse_args()
20 | 
21 |     return args


--------------------------------------------------------------------------------
/yolov7/models/experimental.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import torch
 3 | import torch.nn as nn
 4 | import sys
 5 | #sys.path.append('/home/eulrang/workspace/git/Drone_Challenge/task1/yolov7/')
 6 | sys.path.append(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))))
 7 | from .common import Conv
 8 | 
 9 | class Ensemble(nn.ModuleList):
10 |     # Ensemble of models
11 |     def __init__(self):
12 |         super(Ensemble, self).__init__()
13 | 
14 |     def forward(self, x, augment=False):
15 |         y = []
16 |         for module in self:
17 |             y.append(module(x, augment)[0])
18 |         # y = torch.stack(y).max(0)[0]  # max ensemble
19 |         # y = torch.stack(y).mean(0)  # mean ensemble
20 |         y = torch.cat(y, 1)  # nms ensemble
21 |         return y, None  # inference, train output
22 | 
23 | 
24 | def attempt_load(weights, map_location=None):
25 |     # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
26 |     model = Ensemble()
27 |     for w in weights if isinstance(weights, list) else [weights]:
28 |         # attempt_download(w)
29 |         ckpt = torch.load(w, map_location=map_location)  # load
30 |         model.append(ckpt['ema' if ckpt.get('ema') else 'model'].float().fuse().eval())  # FP32 model
31 |     
32 |     # Compatibility updates
33 |     for m in model.modules():
34 |         if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
35 |             m.inplace = True  # pytorch 1.7.0 compatibility
36 |         elif type(m) is nn.Upsample:
37 |             m.recompute_scale_factor = None  # torch 1.11.0 compatibility
38 |         elif type(m) is Conv:
39 |             m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
40 |     
41 |     if len(model) == 1:
42 |         return model[-1]  # return model
43 |     else:
44 |         print('Ensemble created with %s\n' % weights)
45 |         for k in ['names', 'stride']:
46 |             setattr(model, k, getattr(model[-1], k))
47 |         return model  # return ensemble
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/superglue/matching.py:
--------------------------------------------------------------------------------
 1 | # %BANNER_BEGIN%
 2 | # ---------------------------------------------------------------------
 3 | # %COPYRIGHT_BEGIN%
 4 | #
 5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
 6 | #
 7 | #  Unpublished Copyright (c) 2020
 8 | #  Magic Leap, Inc., All Rights Reserved.
 9 | #
10 | # NOTICE:  All information contained herein is, and remains the property
11 | # of COMPANY. The intellectual and technical concepts contained herein
12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
13 | # Patents, patents in process, and are protected by trade secret or
14 | # copyright law.  Dissemination of this information or reproduction of
15 | # this material is strictly forbidden unless prior written permission is
16 | # obtained from COMPANY.  Access to the source code contained herein is
17 | # hereby forbidden to anyone except current COMPANY employees, managers
18 | # or contractors who have executed Confidentiality and Non-disclosure
19 | # agreements explicitly covering such access.
20 | #
21 | # The copyright notice above does not evidence any actual or intended
22 | # publication or disclosure  of  this source code, which includes
23 | # information that is confidential and/or proprietary, and is a trade
24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
32 | #
33 | # %COPYRIGHT_END%
34 | # ----------------------------------------------------------------------
35 | # %AUTHORS_BEGIN%
36 | #
37 | #  Originating Authors: Paul-Edouard Sarlin
38 | #
39 | # %AUTHORS_END%
40 | # --------------------------------------------------------------------*/
41 | # %BANNER_END%
42 | 
43 | import torch
44 | 
45 | from .superpoint import SuperPoint
46 | from .superglue import SuperGlue
47 | 
48 | 
49 | class Matching(torch.nn.Module):
50 |     """ Image Matching Frontend (SuperPoint + SuperGlue) """
51 |     def __init__(self, config={}):
52 |         super().__init__()
53 |         self.superpoint = SuperPoint(config.get('superpoint', {}))
54 |         self.superglue = SuperGlue(config.get('superglue', {}))
55 | 
56 |     def forward(self, data):
57 |         """ Run SuperPoint (optionally) and SuperGlue
58 |         SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input
59 |         Args:
60 |           data: dictionary with minimal keys: ['image0', 'image1']
61 |         """
62 |         pred = {}
63 | 
64 |         # Extract SuperPoint (keypoints, scores, descriptors) if not provided
65 |         if 'keypoints0' not in data:
66 |             pred0 = self.superpoint({'image': data['image0']})
67 |             pred = {**pred, **{k+'0': v for k, v in pred0.items()}}
68 | 
69 |         if 'keypoints1' not in data:
70 |             pred1 = self.superpoint({'image': data['image1']})
71 |             pred = {**pred, **{k+'1': v for k, v in pred1.items()}}
72 |         
73 |         # Batch all features
74 |         # We should either have i) one image per batch, or
75 |         # ii) the same number of local features for all images in the batch.
76 |         data = {**data, **pred}
77 | 
78 |         for k in data:
79 |             if isinstance(data[k], (list, tuple)):
80 |                 data[k] = torch.stack(data[k])
81 | 
82 |         # Perform the matching
83 |         pred = {**pred, **self.superglue(data)}
84 |         
85 |         return pred
86 | 


--------------------------------------------------------------------------------
/utils/json_utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | def json_preprocess(data_folder = './data_toy/'):
  4 |     text_list = data_folder
  5 | 
  6 |     objects_dict = {
  7 |         '책상': ['desk'],
  8 |         '칠판': ['whiteboard'],
  9 |         '의자': ['chair'],
 10 |         '캐비닛': ['cabinet'],
 11 |         '모니터': ['monitor'],
 12 |         '상자': ['box'],
 13 |         '쓰레기통': ['trash bin'],
 14 |         '바구니': ['bakset'],
 15 |         '컴퓨터': ['computer'],
 16 |         '책장': ['bookshelf'],
 17 |         '프린터': ['printer'],
 18 |         '노트북': ['laptop'],
 19 |         '현수막': ['banner'],
 20 |         '거울': ['mirror'],
 21 |         '계단': ['stairs'],
 22 |         '장난감': ['toy'],
 23 |         '소화기': ['fire extinguisher'],
 24 |         '포스터': ['poster'],
 25 |         '세면대': ['sink'],
 26 |         '운동기구': ['exercise tool'],
 27 |         '스피커': ['speaker'],
 28 |     }
 29 | 
 30 |     people_dict = {
 31 |         '아이': 'person_child',
 32 |         '아내': 'person_woman',
 33 |         '남편': 'person_man',
 34 |         '엄마': 'person_woman',
 35 |         '아빠': 'person_man'
 36 |     }
 37 | 
 38 |     top_dict = {
 39 |         '빨강': 'up_red',
 40 |         '주황': 'up_orange',
 41 |         '노랑': 'up_yellow',
 42 |         '초록': 'up_green',
 43 |         '파랑': 'up_blue',
 44 |         '보라': 'up_purple',
 45 |         '흰색': 'up_white',
 46 |         '검정': 'up_black',
 47 |         '회색': 'up_gray'
 48 |     }
 49 | 
 50 |     low_dic = {
 51 |         '빨강': 'low_red',
 52 |         '주황': 'low_orange',
 53 |         '노랑': 'low_yellow',
 54 |         '초록': 'low_green',
 55 |         '파랑': 'low_blue',
 56 |         '보라': 'low_purple',
 57 |         '흰색': 'low_white',
 58 |         '검정': 'low_black',
 59 |         '회색': 'low_gray'
 60 | 
 61 |     }
 62 | 
 63 |     query = {}
 64 | 
 65 |     with open(text_list, 'r') as f:
 66 |         data = json.load(f)     # text data
 67 | 
 68 |         # (1) json data parsing
 69 |         num = data.get('no')    # later used when making answer sheet file (json)
 70 |         objects = data.get('주변사물')
 71 |         people = data.get('일행')
 72 |         top = data.get('상의')
 73 |         low = data.get('하의')
 74 |         
 75 |         # (2) making query
 76 |         query[num] = []
 77 |         
 78 |         ## i. objects
 79 |         for obj in objects:
 80 |             for obj_query in objects_dict[obj]:
 81 |                 query[num].append(obj_query)
 82 |         
 83 |         ## ii. people & clothes
 84 |         ### rule-based female/male/child classification
 85 |         if people is not None:
 86 |             if '아내' in people:
 87 |                 # 요구조자 = male
 88 |                 if top is not None:
 89 |                     shirt = top_dict[top]
 90 |                     query[num].append(shirt)
 91 |                 if low is not None:
 92 |                     pants = low_dic[low]
 93 |                     query[num].append(pants)
 94 |                 # 요구조자 본인
 95 |                 query[num].append('person_man')
 96 |             elif '남편' in people: 
 97 |                 # 요구조자 = female
 98 |                 if top is not None:
 99 |                     shirt = top_dict[top]
100 |                     query[num].append(shirt)
101 |                 if low is not None:
102 |                     skirt = low_dic[low]
103 |                     query[num].append(skirt)
104 |                 # 요구조자 본인
105 |                 query[num].append('person_woman')
106 |             elif ('엄마' in people) or ('아빠' in people):
107 |                 # 요구조자 = child
108 |                 # female or male
109 |                 if top is not None:
110 |                     shirt = top_dict[top]
111 |                     query[num].append(shirt)
112 |                 if low is not None:
113 |                     pants = low_dic[low]
114 |                     query[num].append(pants)
115 |                 # 요구조자 본인
116 |                 query[num].append('person_child')
117 |             for person in people:
118 |                 query[num].append(people_dict[person])
119 |         
120 |     return query
121 | 
122 | def json_postprocess(clues_num, data):
123 |     # json skeleton
124 |     json_object = {
125 |         'answer_sheet': {
126 |             'room_id': None,
127 |             'mission': "1",
128 |             'answer': {
129 |                 'person_id': {
130 |                 }
131 |             }
132 |         }
133 |     }
134 | 
135 |     person_id_list = []
136 |     for i in range(0, len(data)):
137 |         if data[i] < 500:
138 |             person_id_list.append(str(data[i]))
139 |     json_object['answer_sheet']['answer']['person_id'].update({clues_num:person_id_list})
140 | 
141 |     return json_object
142 | 


--------------------------------------------------------------------------------
/utils/model_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import cv2
  3 | import matplotlib.pyplot as plt
  4 | import matplotlib
  5 | import random
  6 | 
  7 | # -----------------------------------------
  8 | # Superglue utils
  9 | # -----------------------------------------
 10 | def matching(data, superpoint, superglue):
 11 |     """ Run SuperPoint (optionally) and SuperGlue
 12 |     SuperPoint is skipped if ['keypoints0', 'keypoints1'] exist in input
 13 |     Args:
 14 |         data: dictionary with minimal keys: ['image0', 'image1']
 15 |     """
 16 | 
 17 |     torch.set_grad_enabled(False)
 18 |     
 19 |     pred = {}
 20 | 
 21 |     # Extract SuperPoint (keypoints, scores, descriptors) if not provided
 22 |     if 'keypoints0' not in data:
 23 |         pred0 = superpoint({'image': data['image0']})
 24 |         pred = {**pred, **{k+'0': v for k, v in pred0.items()}}
 25 |     if 'keypoints1' not in data:
 26 |         pred1 = superpoint({'image': data['image1']})
 27 |         pred = {**pred, **{k+'1': v for k, v in pred1.items()}}
 28 | 
 29 |     data = {**data, **pred}
 30 | 
 31 |     for k in data:
 32 |         if isinstance(data[k], (list, tuple)):
 33 |             data[k] = torch.stack(data[k])
 34 | 
 35 |     # Perform the matching
 36 |     pred = {**pred, **superglue(data)}
 37 |     pred = {k: v[0].cpu().numpy() for k, v in pred.items()}
 38 |     matches, conf = pred['matches0'], pred['matching_scores0']
 39 | 
 40 |     return pred, matches, conf
 41 | 
 42 |     
 43 | def process_resize(w, h, resize):
 44 |     assert(len(resize) > 0 and len(resize) <= 2)
 45 |     if len(resize) == 1 and resize[0] > -1:
 46 |         scale = resize[0] / max(h, w)
 47 |         w_new, h_new = int(round(w*scale)), int(round(h*scale))
 48 |     elif len(resize) == 1 and resize[0] == -1:
 49 |         w_new, h_new = w, h
 50 |     else:  # len(resize) == 2:
 51 |         w_new, h_new = resize[0], resize[1]
 52 | 
 53 |     # Issue warning if resolution is too small or too large.
 54 |     if max(w_new, h_new) < 160:
 55 |         print('Warning: input resolution is very small, results may vary')
 56 |     elif max(w_new, h_new) > 2000:
 57 |         print('Warning: input resolution is very large, results may vary')
 58 | 
 59 |     return w_new, h_new
 60 | 
 61 | 
 62 | def frame2tensor(frame, device):
 63 |     return torch.from_numpy(frame/255.).float()[None, None].to(device)
 64 | 
 65 | 
 66 | def read_image(img, resize, device):
 67 |     image = img
 68 |     if image is None:
 69 |         return None, None, None
 70 |     w, h = image.shape[1], image.shape[0]
 71 |     w_new, h_new = process_resize(w, h, resize)
 72 |     scales = (float(w) / float(w_new), float(h) / float(h_new))
 73 | 
 74 |     # resize
 75 |     image = cv2.resize(image, (w_new, h_new)).astype('float32')
 76 |     
 77 |     inp = frame2tensor(image, device)
 78 |     return image, inp, scales
 79 | 
 80 | 
 81 | def plot_image_pair(imgs, dpi=100, size=6, pad=.5):
 82 |     n = len(imgs)
 83 |     assert n == 2, 'number of images must be two'
 84 |     figsize = (size*n, size*3/4) if size is not None else None
 85 |     _, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)
 86 |     for i in range(n):
 87 |         ax[i].imshow(imgs[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=255)
 88 |         ax[i].get_yaxis().set_ticks([])
 89 |         ax[i].get_xaxis().set_ticks([])
 90 |         for spine in ax[i].spines.values():  # remove frame
 91 |             spine.set_visible(False)
 92 |     plt.tight_layout(pad=pad)
 93 | 
 94 | 
 95 | def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4):
 96 |     fig = plt.gcf()
 97 |     ax = fig.axes
 98 |     fig.canvas.draw()
 99 | 
100 |     transFigure = fig.transFigure.inverted()
101 |     fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0))
102 |     fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1))
103 | 
104 |     fig.lines = [matplotlib.lines.Line2D(
105 |         (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), zorder=1,
106 |         transform=fig.transFigure, c=color[i], linewidth=lw)
107 |                  for i in range(len(kpts0))]
108 |     ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
109 |     ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
110 | 
111 | 
112 | def make_matching_plot(image0, image1, mkpts0, mkpts1,
113 |                        color, text, path, small_text=[]):
114 | 
115 |     plot_image_pair([image0, image1])
116 |     plot_matches(mkpts0, mkpts1, color)
117 | 
118 |     fig = plt.gcf()
119 |     txt_color = 'k' if image0[:100, :150].mean() > 200 else 'w'
120 |     fig.text(
121 |         0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes,
122 |         fontsize=15, va='top', ha='left', color=txt_color)
123 | 
124 |     txt_color = 'k' if image0[-100:, :150].mean() > 200 else 'w'
125 |     fig.text(
126 |         0.01, 0.01, '\n'.join(small_text), transform=fig.axes[0].transAxes,
127 |         fontsize=5, va='bottom', ha='left', color=txt_color)
128 | 
129 |     plt.savefig(str(path), bbox_inches='tight', pad_inches=0)
130 |     plt.close()
131 | 
132 | # -----------------------------------------
133 | # YOLO utils
134 | # -----------------------------------------
135 | def plot_one_box(x, img, color=None, label=None, line_thickness=3):
136 |     # Plots one bounding box on image img
137 |     tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
138 |     color = color or [random.randint(0, 128) for _ in range(3)]
139 |     c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
140 |     cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
141 |     if label:
142 |         tf = max(tl - 1, 1)  # font thickness
143 |         t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
144 |         c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
145 |         cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
146 |         cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [127, 127, 127], thickness=tf, lineType=cv2.LINE_AA)


--------------------------------------------------------------------------------
/yolov7/utils/autoanchor.py:
--------------------------------------------------------------------------------
  1 | # Auto-anchor utils
  2 | import os.path
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | import yaml
  7 | from scipy.cluster.vq import kmeans
  8 | from tqdm import tqdm
  9 | 
 10 | import sys
 11 | #sys.path.append('/home/eulrang/workspace/git/Drone_Challenge/task1/yolov7/utils')
 12 | sys.path.append(os.path.abspath(os.path.dirname(__file__)))
 13 | from general import colorstr
 14 | 
 15 | 
 16 | def check_anchor_order(m):
 17 |     # Check anchor order against stride order for YOLO Detect() module m, and correct if necessary
 18 |     a = m.anchor_grid.prod(-1).view(-1)  # anchor area
 19 |     da = a[-1] - a[0]  # delta a
 20 |     ds = m.stride[-1] - m.stride[0]  # delta s
 21 |     if da.sign() != ds.sign():  # same order
 22 |         print('Reversing anchor order')
 23 |         m.anchors[:] = m.anchors.flip(0)
 24 |         m.anchor_grid[:] = m.anchor_grid.flip(0)
 25 | 
 26 | 
 27 | def check_anchors(dataset, model, thr=4.0, imgsz=640):
 28 |     # Check anchor fit to data, recompute if necessary
 29 |     prefix = colorstr('autoanchor: ')
 30 |     print(f'\n{prefix}Analyzing anchors... ', end='')
 31 |     m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1]  # Detect()
 32 |     shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
 33 |     scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1))  # augment scale
 34 |     wh = torch.tensor(np.concatenate([l[:, 3:5] * s for s, l in zip(shapes * scale, dataset.labels)])).float()  # wh
 35 | 
 36 |     def metric(k):  # compute metric
 37 |         r = wh[:, None] / k[None]
 38 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 39 |         best = x.max(1)[0]  # best_x
 40 |         aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
 41 |         bpr = (best > 1. / thr).float().mean()  # best possible recall
 42 |         return bpr, aat
 43 | 
 44 |     anchors = m.anchor_grid.clone().cpu().view(-1, 2)  # current anchors
 45 |     bpr, aat = metric(anchors)
 46 |     print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
 47 |     if bpr < 0.98:  # threshold to recompute
 48 |         print('. Attempting to improve anchors, please wait...')
 49 |         na = m.anchor_grid.numel() // 2  # number of anchors
 50 |         try:
 51 |             anchors = kmean_anchors(dataset, n=na, img_size=imgsz, thr=thr, gen=1000, verbose=False)
 52 |         except Exception as e:
 53 |             print(f'{prefix}ERROR: {e}')
 54 |         new_bpr = metric(anchors)[0]
 55 |         if new_bpr > bpr:  # replace anchors
 56 |             anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors)
 57 |             m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid)  # for inference
 58 |             m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1)  # loss
 59 |             check_anchor_order(m)
 60 |             print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
 61 |         else:
 62 |             print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
 63 |     print('')  # newline
 64 | 
 65 | 
 66 | def kmean_anchors(path='./data/coco.yaml', n=9, img_size=640, thr=4.0, gen=1000, verbose=True):
 67 |     """ Creates kmeans-evolved anchors from training dataset
 68 | 
 69 |         Arguments:
 70 |             path: path to dataset *.yaml, or a loaded dataset
 71 |             n: number of anchors
 72 |             img_size: image size used for training
 73 |             thr: anchor-label wh ratio threshold hyperparameter hyp['anchor_t'] used for training, default=4.0
 74 |             gen: generations to evolve anchors using genetic algorithm
 75 |             verbose: print all results
 76 | 
 77 |         Return:
 78 |             k: kmeans evolved anchors
 79 | 
 80 |         Usage:
 81 |             from utils.autoanchor import *; _ = kmean_anchors()
 82 |     """
 83 |     thr = 1. / thr
 84 |     prefix = colorstr('autoanchor: ')
 85 | 
 86 |     def metric(k, wh):  # compute metrics
 87 |         r = wh[:, None] / k[None]
 88 |         x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
 89 |         # x = wh_iou(wh, torch.tensor(k))  # iou metric
 90 |         return x, x.max(1)[0]  # x, best_x
 91 | 
 92 |     def anchor_fitness(k):  # mutation fitness
 93 |         _, best = metric(torch.tensor(k, dtype=torch.float32), wh)
 94 |         return (best * (best > thr).float()).mean()  # fitness
 95 | 
 96 |     def print_results(k):
 97 |         k = k[np.argsort(k.prod(1))]  # sort small to large
 98 |         x, best = metric(k, wh0)
 99 |         bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n  # best possible recall, anch > thr
100 |         print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
101 |         print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
102 |               f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
103 |         for i, x in enumerate(k):
104 |             print('%i,%i' % (round(x[0]), round(x[1])), end=',  ' if i < len(k) - 1 else '\n')  # use in *.cfg
105 |         return k
106 | 
107 |     if isinstance(path, str):  # *.yaml file
108 |         with open(path) as f:
109 |             data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
110 |         from utils.datasets import LoadImagesAndLabels
111 |         dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
112 |     else:
113 |         dataset = path  # dataset
114 | 
115 |     # Get label wh
116 |     shapes = img_size * dataset.shapes / dataset.shapes.max(1, keepdims=True)
117 |     wh0 = np.concatenate([l[:, 3:5] * s for s, l in zip(shapes, dataset.labels)])  # wh
118 | 
119 |     # Filter
120 |     i = (wh0 < 3.0).any(1).sum()
121 |     if i:
122 |         print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
123 |     wh = wh0[(wh0 >= 2.0).any(1)]  # filter > 2 pixels
124 |     # wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1)  # multiply by random scale 0-1
125 | 
126 |     # Kmeans calculation
127 |     print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
128 |     s = wh.std(0)  # sigmas for whitening
129 |     k, dist = kmeans(wh / s, n, iter=30)  # points, mean distance
130 |     assert len(k) == n, print(f'{prefix}ERROR: scipy.cluster.vq.kmeans requested {n} points but returned only {len(k)}')
131 |     k *= s
132 |     wh = torch.tensor(wh, dtype=torch.float32)  # filtered
133 |     wh0 = torch.tensor(wh0, dtype=torch.float32)  # unfiltered
134 |     k = print_results(k)
135 | 
136 |     # Plot
137 |     # k, d = [None] * 20, [None] * 20
138 |     # for i in tqdm(range(1, 21)):
139 |     #     k[i-1], d[i-1] = kmeans(wh / s, i)  # points, mean distance
140 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7), tight_layout=True)
141 |     # ax = ax.ravel()
142 |     # ax[0].plot(np.arange(1, 21), np.array(d) ** 2, marker='.')
143 |     # fig, ax = plt.subplots(1, 2, figsize=(14, 7))  # plot wh
144 |     # ax[0].hist(wh[wh[:, 0]<100, 0],400)
145 |     # ax[1].hist(wh[wh[:, 1]<100, 1],400)
146 |     # fig.savefig('wh.png', dpi=200)
147 | 
148 |     # Evolve
149 |     npr = np.random
150 |     f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1  # fitness, generations, mutation prob, sigma
151 |     pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:')  # progress bar
152 |     for _ in pbar:
153 |         v = np.ones(sh)
154 |         while (v == 1).all():  # mutate until a change occurs (prevent duplicates)
155 |             v = ((npr.random(sh) < mp) * npr.random() * npr.randn(*sh) * s + 1).clip(0.3, 3.0)
156 |         kg = (k.copy() * v).clip(min=2.0)
157 |         fg = anchor_fitness(kg)
158 |         if fg > f:
159 |             f, k = fg, kg.copy()
160 |             pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
161 |             if verbose:
162 |                 print_results(k)
163 | 
164 |     return print_results(k)
165 | 


--------------------------------------------------------------------------------
/superglue/superpoint.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #
 39 | # %AUTHORS_END%
 40 | # --------------------------------------------------------------------*/
 41 | # %BANNER_END%
 42 | 
 43 | from pathlib import Path
 44 | import torch
 45 | from torch import nn
 46 | 
 47 | def simple_nms(scores, nms_radius: int):
 48 |     """ Fast Non-maximum suppression to remove nearby points """
 49 |     assert(nms_radius >= 0)
 50 | 
 51 |     def max_pool(x):
 52 |         return torch.nn.functional.max_pool2d(
 53 |             x, kernel_size=nms_radius*2+1, stride=1, padding=nms_radius)
 54 | 
 55 |     zeros = torch.zeros_like(scores)
 56 |     max_mask = scores == max_pool(scores)
 57 |     for _ in range(2):
 58 |         supp_mask = max_pool(max_mask.float()) > 0
 59 |         supp_scores = torch.where(supp_mask, zeros, scores)
 60 |         new_max_mask = supp_scores == max_pool(supp_scores)
 61 |         max_mask = max_mask | (new_max_mask & (~supp_mask))
 62 |     return torch.where(max_mask, scores, zeros)
 63 | 
 64 | 
 65 | def remove_borders(keypoints, scores, border: int, height: int, width: int):
 66 |     """ Removes keypoints too close to the border """
 67 |     mask_h = (keypoints[:, 0] >= border) & (keypoints[:, 0] < (height - border))
 68 |     mask_w = (keypoints[:, 1] >= border) & (keypoints[:, 1] < (width - border))
 69 |     mask = mask_h & mask_w
 70 |     return keypoints[mask], scores[mask]
 71 | 
 72 | 
 73 | def top_k_keypoints(keypoints, scores, k: int):
 74 |     if k >= len(keypoints):
 75 |         return keypoints, scores
 76 |     scores, indices = torch.topk(scores, k, dim=0)
 77 |     return keypoints[indices], scores
 78 | 
 79 | 
 80 | def sample_descriptors(keypoints, descriptors, s: int = 8):
 81 |     """ Interpolate descriptors at keypoint locations """
 82 |     b, c, h, w = descriptors.shape
 83 |     keypoints = keypoints - s / 2 + 0.5
 84 |     keypoints /= torch.tensor([(w*s - s/2 - 0.5), (h*s - s/2 - 0.5)],
 85 |                               ).to(keypoints)[None]
 86 |     keypoints = keypoints*2 - 1  # normalize to (-1, 1)
 87 |     args = {'align_corners': True} if int(torch.__version__[2]) > 2 else {}
 88 |     descriptors = torch.nn.functional.grid_sample(
 89 |         descriptors, keypoints.view(b, 1, -1, 2), mode='bilinear', **args)
 90 |     descriptors = torch.nn.functional.normalize(
 91 |         descriptors.reshape(b, c, -1), p=2, dim=1)
 92 |     return descriptors
 93 | 
 94 | 
 95 | class SuperPoint(nn.Module):
 96 |     """SuperPoint Convolutional Detector and Descriptor
 97 | 
 98 |     SuperPoint: Self-Supervised Interest Point Detection and
 99 |     Description. Daniel DeTone, Tomasz Malisiewicz, and Andrew
100 |     Rabinovich. In CVPRW, 2019. https://arxiv.org/abs/1712.07629
101 | 
102 |     """
103 |     default_config = {
104 |         'descriptor_dim': 256,
105 |         'nms_radius': 4,
106 |         'keypoint_threshold': 0.005,
107 |         'max_keypoints': -1,
108 |         'remove_borders': 4,
109 |     }
110 | 
111 |     def __init__(self, config):
112 |         super().__init__()
113 |         self.config = {**self.default_config, **config}
114 | 
115 |         self.relu = nn.ReLU(inplace=True)
116 |         self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
117 |         c1, c2, c3, c4, c5 = 64, 64, 128, 128, 256
118 | 
119 |         self.conv1a = nn.Conv2d(1, c1, kernel_size=3, stride=1, padding=1)
120 |         self.conv1b = nn.Conv2d(c1, c1, kernel_size=3, stride=1, padding=1)
121 |         self.conv2a = nn.Conv2d(c1, c2, kernel_size=3, stride=1, padding=1)
122 |         self.conv2b = nn.Conv2d(c2, c2, kernel_size=3, stride=1, padding=1)
123 |         self.conv3a = nn.Conv2d(c2, c3, kernel_size=3, stride=1, padding=1)
124 |         self.conv3b = nn.Conv2d(c3, c3, kernel_size=3, stride=1, padding=1)
125 |         self.conv4a = nn.Conv2d(c3, c4, kernel_size=3, stride=1, padding=1)
126 |         self.conv4b = nn.Conv2d(c4, c4, kernel_size=3, stride=1, padding=1)
127 | 
128 |         self.convPa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
129 |         self.convPb = nn.Conv2d(c5, 65, kernel_size=1, stride=1, padding=0)
130 | 
131 |         self.convDa = nn.Conv2d(c4, c5, kernel_size=3, stride=1, padding=1)
132 |         self.convDb = nn.Conv2d(
133 |             c5, self.config['descriptor_dim'],
134 |             kernel_size=1, stride=1, padding=0)
135 | 
136 |         path = Path(__file__).parent / 'weights/superpoint_v1.pth'
137 |         self.load_state_dict(torch.load(str(path)))
138 | 
139 |         mk = self.config['max_keypoints']
140 |         if mk == 0 or mk < -1:
141 |             raise ValueError('\"max_keypoints\" must be positive or \"-1\"')
142 | 
143 |         # print('Loaded SuperPoint model')
144 | 
145 |     def forward(self, data):
146 |         """ Compute keypoints, scores, descriptors for image """
147 |         # Shared Encoder
148 |         x = self.relu(self.conv1a(data['image']))
149 |         x = self.relu(self.conv1b(x))
150 |         x = self.pool(x)
151 |         x = self.relu(self.conv2a(x))
152 |         x = self.relu(self.conv2b(x))
153 |         x = self.pool(x)
154 |         x = self.relu(self.conv3a(x))
155 |         x = self.relu(self.conv3b(x))
156 |         x = self.pool(x)
157 |         x = self.relu(self.conv4a(x))
158 |         x = self.relu(self.conv4b(x))
159 | 
160 |         # Compute the dense keypoint scores
161 |         cPa = self.relu(self.convPa(x))
162 |         scores = self.convPb(cPa)
163 |         scores = torch.nn.functional.softmax(scores, 1)[:, :-1]
164 |         b, _, h, w = scores.shape
165 |         scores = scores.permute(0, 2, 3, 1).reshape(b, h, w, 8, 8)
166 |         scores = scores.permute(0, 1, 3, 2, 4).reshape(b, h*8, w*8)
167 |         scores = simple_nms(scores, self.config['nms_radius'])
168 | 
169 |         # Extract keypoints
170 |         keypoints = [
171 |             torch.nonzero(s > self.config['keypoint_threshold'])
172 |             for s in scores]
173 |         scores = [s[tuple(k.t())] for s, k in zip(scores, keypoints)]
174 | 
175 |         # Discard keypoints near the image borders
176 |         keypoints, scores = list(zip(*[
177 |             remove_borders(k, s, self.config['remove_borders'], h*8, w*8)
178 |             for k, s in zip(keypoints, scores)]))
179 | 
180 |         # Keep the k keypoints with highest score
181 |         if self.config['max_keypoints'] >= 0:
182 |             keypoints, scores = list(zip(*[
183 |                 top_k_keypoints(k, s, self.config['max_keypoints'])
184 |                 for k, s in zip(keypoints, scores)]))
185 | 
186 |         # Convert (h, w) to (x, y)
187 |         keypoints = [torch.flip(k, [1]).float() for k in keypoints]
188 | 
189 |         # Compute the dense descriptors
190 |         cDa = self.relu(self.convDa(x))
191 |         descriptors = self.convDb(cDa)
192 |         descriptors = torch.nn.functional.normalize(descriptors, p=2, dim=1)
193 | 
194 |         # Extract descriptors
195 |         descriptors = [sample_descriptors(k[None], d[None], 8)[0]
196 |                        for k, d in zip(keypoints, descriptors)]
197 | 
198 |         return {
199 |             'keypoints': keypoints,
200 |             'scores': scores,
201 |             'descriptors': descriptors,
202 |         }
203 | 


--------------------------------------------------------------------------------
/superglue/superglue.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #
 39 | # %AUTHORS_END%
 40 | # --------------------------------------------------------------------*/
 41 | # %BANNER_END%
 42 | 
 43 | from copy import deepcopy
 44 | from pathlib import Path
 45 | from typing import List, Tuple
 46 | 
 47 | import torch
 48 | from torch import nn
 49 | 
 50 | 
 51 | def MLP(channels: List[int], do_bn: bool = True) -> nn.Module:
 52 |     """ Multi-layer perceptron """
 53 |     n = len(channels)
 54 |     layers = []
 55 |     for i in range(1, n):
 56 |         layers.append(
 57 |             nn.Conv1d(channels[i - 1], channels[i], kernel_size=1, bias=True))
 58 |         if i < (n-1):
 59 |             if do_bn:
 60 |                 layers.append(nn.BatchNorm1d(channels[i]))
 61 |             layers.append(nn.ReLU())
 62 |     return nn.Sequential(*layers)
 63 | 
 64 | 
 65 | def normalize_keypoints(kpts, image_shape):
 66 |     """ Normalize keypoints locations based on image image_shape"""
 67 |     _, _, height, width = image_shape
 68 |     one = kpts.new_tensor(1)
 69 |     size = torch.stack([one*width, one*height])[None]
 70 |     center = size / 2
 71 |     scaling = size.max(1, keepdim=True).values * 0.7
 72 |     return (kpts - center[:, None, :]) / scaling[:, None, :]
 73 | 
 74 | 
 75 | class KeypointEncoder(nn.Module):
 76 |     """ Joint encoding of visual appearance and location using MLPs"""
 77 |     def __init__(self, feature_dim: int, layers: List[int]) -> None:
 78 |         super().__init__()
 79 |         self.encoder = MLP([3] + layers + [feature_dim])
 80 |         nn.init.constant_(self.encoder[-1].bias, 0.0)
 81 | 
 82 |     def forward(self, kpts, scores):
 83 |         inputs = [kpts.transpose(1, 2), scores.unsqueeze(1)]
 84 |         return self.encoder(torch.cat(inputs, dim=1))
 85 | 
 86 | 
 87 | def attention(query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]:
 88 |     dim = query.shape[1]
 89 |     scores = torch.einsum('bdhn,bdhm->bhnm', query, key) / dim**.5
 90 |     prob = torch.nn.functional.softmax(scores, dim=-1)
 91 |     return torch.einsum('bhnm,bdhm->bdhn', prob, value), prob
 92 | 
 93 | 
 94 | class MultiHeadedAttention(nn.Module):
 95 |     """ Multi-head attention to increase model expressivitiy """
 96 |     def __init__(self, num_heads: int, d_model: int):
 97 |         super().__init__()
 98 |         assert d_model % num_heads == 0
 99 |         self.dim = d_model // num_heads
100 |         self.num_heads = num_heads
101 |         self.merge = nn.Conv1d(d_model, d_model, kernel_size=1)
102 |         self.proj = nn.ModuleList([deepcopy(self.merge) for _ in range(3)])
103 | 
104 |     def forward(self, query: torch.Tensor, key: torch.Tensor, value: torch.Tensor) -> torch.Tensor:
105 |         batch_dim = query.size(0)
106 |         query, key, value = [l(x).view(batch_dim, self.dim, self.num_heads, -1)
107 |                              for l, x in zip(self.proj, (query, key, value))]
108 |         x, _ = attention(query, key, value)
109 |         return self.merge(x.contiguous().view(batch_dim, self.dim*self.num_heads, -1))
110 | 
111 | 
112 | class AttentionalPropagation(nn.Module):
113 |     def __init__(self, feature_dim: int, num_heads: int):
114 |         super().__init__()
115 |         self.attn = MultiHeadedAttention(num_heads, feature_dim)
116 |         self.mlp = MLP([feature_dim*2, feature_dim*2, feature_dim])
117 |         nn.init.constant_(self.mlp[-1].bias, 0.0)
118 | 
119 |     def forward(self, x: torch.Tensor, source: torch.Tensor) -> torch.Tensor:
120 |         message = self.attn(x, source, source)
121 |         return self.mlp(torch.cat([x, message], dim=1))
122 | 
123 | 
124 | class AttentionalGNN(nn.Module):
125 |     def __init__(self, feature_dim: int, layer_names: List[str]) -> None:
126 |         super().__init__()
127 |         self.layers = nn.ModuleList([
128 |             AttentionalPropagation(feature_dim, 4)
129 |             for _ in range(len(layer_names))])
130 |         self.names = layer_names
131 | 
132 |     def forward(self, desc0: torch.Tensor, desc1: torch.Tensor) -> Tuple[torch.Tensor,torch.Tensor]:
133 |         for layer, name in zip(self.layers, self.names):
134 |             if name == 'cross':
135 |                 src0, src1 = desc1, desc0
136 |             else:  # if name == 'self':
137 |                 src0, src1 = desc0, desc1
138 |             delta0, delta1 = layer(desc0, src0), layer(desc1, src1)
139 |             desc0, desc1 = (desc0 + delta0), (desc1 + delta1)
140 |         return desc0, desc1
141 | 
142 | 
143 | def log_sinkhorn_iterations(Z: torch.Tensor, log_mu: torch.Tensor, log_nu: torch.Tensor, iters: int) -> torch.Tensor:
144 |     """ Perform Sinkhorn Normalization in Log-space for stability"""
145 |     u, v = torch.zeros_like(log_mu), torch.zeros_like(log_nu)
146 |     for _ in range(iters):
147 |         u = log_mu - torch.logsumexp(Z + v.unsqueeze(1), dim=2)
148 |         v = log_nu - torch.logsumexp(Z + u.unsqueeze(2), dim=1)
149 |     return Z + u.unsqueeze(2) + v.unsqueeze(1)
150 | 
151 | 
152 | def log_optimal_transport(scores: torch.Tensor, alpha: torch.Tensor, iters: int) -> torch.Tensor:
153 |     """ Perform Differentiable Optimal Transport in Log-space for stability"""
154 |     b, m, n = scores.shape
155 |     one = scores.new_tensor(1)
156 |     ms, ns = (m*one).to(scores), (n*one).to(scores)
157 | 
158 |     bins0 = alpha.expand(b, m, 1)
159 |     bins1 = alpha.expand(b, 1, n)
160 |     alpha = alpha.expand(b, 1, 1)
161 | 
162 |     couplings = torch.cat([torch.cat([scores, bins0], -1),
163 |                            torch.cat([bins1, alpha], -1)], 1)
164 | 
165 |     norm = - (ms + ns).log()
166 |     log_mu = torch.cat([norm.expand(m), ns.log()[None] + norm])
167 |     log_nu = torch.cat([norm.expand(n), ms.log()[None] + norm])
168 |     log_mu, log_nu = log_mu[None].expand(b, -1), log_nu[None].expand(b, -1)
169 | 
170 |     Z = log_sinkhorn_iterations(couplings, log_mu, log_nu, iters)
171 |     Z = Z - norm  # multiply probabilities by M+N
172 |     return Z
173 | 
174 | 
175 | def arange_like(x, dim: int):
176 |     return x.new_ones(x.shape[dim]).cumsum(0) - 1  # traceable in 1.1
177 | 
178 | 
179 | class SuperGlue(nn.Module):
180 |     """SuperGlue feature matching middle-end
181 | 
182 |     Given two sets of keypoints and locations, we determine the
183 |     correspondences by:
184 |       1. Keypoint Encoding (normalization + visual feature and location fusion)
185 |       2. Graph Neural Network with multiple self and cross-attention layers
186 |       3. Final projection layer
187 |       4. Optimal Transport Layer (a differentiable Hungarian matching algorithm)
188 |       5. Thresholding matrix based on mutual exclusivity and a match_threshold
189 | 
190 |     The correspondence ids use -1 to indicate non-matching points.
191 | 
192 |     Paul-Edouard Sarlin, Daniel DeTone, Tomasz Malisiewicz, and Andrew
193 |     Rabinovich. SuperGlue: Learning Feature Matching with Graph Neural
194 |     Networks. In CVPR, 2020. https://arxiv.org/abs/1911.11763
195 | 
196 |     """
197 |     default_config = {
198 |         'descriptor_dim': 256,
199 |         'weights': 'indoor',
200 |         'keypoint_encoder': [32, 64, 128, 256],
201 |         'GNN_layers': ['self', 'cross'] * 9,
202 |         'sinkhorn_iterations': 100,
203 |         'match_threshold': 0.2,
204 |     }
205 | 
206 |     def __init__(self, config):
207 |         super().__init__()
208 |         self.config = {**self.default_config, **config}
209 | 
210 |         self.kenc = KeypointEncoder(
211 |             self.config['descriptor_dim'], self.config['keypoint_encoder'])
212 | 
213 |         self.gnn = AttentionalGNN(
214 |             feature_dim=self.config['descriptor_dim'], layer_names=self.config['GNN_layers'])
215 | 
216 |         self.final_proj = nn.Conv1d(
217 |             self.config['descriptor_dim'], self.config['descriptor_dim'],
218 |             kernel_size=1, bias=True)
219 | 
220 |         bin_score = torch.nn.Parameter(torch.tensor(1.))
221 |         self.register_parameter('bin_score', bin_score)
222 | 
223 |         assert self.config['weights'] in ['indoor', 'outdoor']
224 |         path = Path(__file__).parent
225 |         path = path / 'weights/superglue_{}.pth'.format(self.config['weights'])
226 |         self.load_state_dict(torch.load(str(path)))
227 |         # print('Loaded SuperGlue model (\"{}\" weights)'.format(
228 |         #     self.config['weights']))
229 | 
230 |     def forward(self, data):
231 |         """Run SuperGlue on a pair of keypoints and descriptors"""
232 |         desc0, desc1 = data['descriptors0'], data['descriptors1']
233 |         kpts0, kpts1 = data['keypoints0'], data['keypoints1']
234 | 
235 |         if kpts0.shape[1] == 0 or kpts1.shape[1] == 0:  # no keypoints
236 |             shape0, shape1 = kpts0.shape[:-1], kpts1.shape[:-1]
237 |             return {
238 |                 'matches0': kpts0.new_full(shape0, -1, dtype=torch.int),
239 |                 'matches1': kpts1.new_full(shape1, -1, dtype=torch.int),
240 |                 'matching_scores0': kpts0.new_zeros(shape0),
241 |                 'matching_scores1': kpts1.new_zeros(shape1),
242 |             }
243 | 
244 |         # Keypoint normalization.
245 |         kpts0 = normalize_keypoints(kpts0, data['image0'].shape)
246 |         kpts1 = normalize_keypoints(kpts1, data['image1'].shape)
247 | 
248 |         # Keypoint MLP encoder.
249 |         desc0 = desc0 + self.kenc(kpts0, data['scores0'])
250 |         desc1 = desc1 + self.kenc(kpts1, data['scores1'])
251 | 
252 |         # Multi-layer Transformer network.
253 |         desc0, desc1 = self.gnn(desc0, desc1)
254 | 
255 |         # Final MLP projection.
256 |         mdesc0, mdesc1 = self.final_proj(desc0), self.final_proj(desc1)
257 | 
258 |         # Compute matching descriptor distance.
259 |         scores = torch.einsum('bdn,bdm->bnm', mdesc0, mdesc1)
260 |         scores = scores / self.config['descriptor_dim']**.5
261 | 
262 |         # Run the optimal transport.
263 |         scores = log_optimal_transport(
264 |             scores, self.bin_score,
265 |             iters=self.config['sinkhorn_iterations'])
266 | 
267 |         # Get the matches with score above "match_threshold".
268 |         max0, max1 = scores[:, :-1, :-1].max(2), scores[:, :-1, :-1].max(1)
269 |         indices0, indices1 = max0.indices, max1.indices
270 |         mutual0 = arange_like(indices0, 1)[None] == indices1.gather(1, indices0)
271 |         mutual1 = arange_like(indices1, 1)[None] == indices0.gather(1, indices1)
272 |         zero = scores.new_tensor(0)
273 |         mscores0 = torch.where(mutual0, max0.values.exp(), zero)
274 |         mscores1 = torch.where(mutual1, mscores0.gather(1, indices1), zero)
275 |         valid0 = mutual0 & (mscores0 > self.config['match_threshold'])
276 |         valid1 = mutual1 & valid0.gather(1, indices1)
277 |         indices0 = torch.where(valid0, indices0, indices0.new_tensor(-1))
278 |         indices1 = torch.where(valid1, indices1, indices1.new_tensor(-1))
279 | 
280 |         return {
281 |             'matches0': indices0, # use -1 for invalid match
282 |             'matches1': indices1, # use -1 for invalid match
283 |             'matching_scores0': mscores0,
284 |             'matching_scores1': mscores1,
285 |         }
286 | 


--------------------------------------------------------------------------------
/yolov7/utils/torch_utils.py:
--------------------------------------------------------------------------------
  1 | # YOLOR PyTorch utils
  2 | 
  3 | import datetime
  4 | import logging
  5 | import math
  6 | import os
  7 | import platform
  8 | import subprocess
  9 | import time
 10 | from contextlib import contextmanager
 11 | from copy import deepcopy
 12 | from pathlib import Path
 13 | 
 14 | import torch
 15 | import torch.backends.cudnn as cudnn
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | import torchvision
 19 | 
 20 | try:
 21 |     import thop  # for FLOPS computation
 22 | except ImportError:
 23 |     thop = None
 24 | logger = logging.getLogger(__name__)
 25 | 
 26 | 
 27 | @contextmanager
 28 | def torch_distributed_zero_first(local_rank: int):
 29 |     """
 30 |     Decorator to make all processes in distributed training wait for each local_master to do something.
 31 |     """
 32 |     if local_rank not in [-1, 0]:
 33 |         torch.distributed.barrier()
 34 |     yield
 35 |     if local_rank == 0:
 36 |         torch.distributed.barrier()
 37 | 
 38 | 
 39 | def init_torch_seeds(seed=0):
 40 |     # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
 41 |     torch.manual_seed(seed)
 42 |     if seed == 0:  # slower, more reproducible
 43 |         cudnn.benchmark, cudnn.deterministic = False, True
 44 |     else:  # faster, less reproducible
 45 |         cudnn.benchmark, cudnn.deterministic = True, False
 46 | 
 47 | 
 48 | def date_modified(path=__file__):
 49 |     # return human-readable file modification date, i.e. '2021-3-26'
 50 |     t = datetime.datetime.fromtimestamp(Path(path).stat().st_mtime)
 51 |     return f'{t.year}-{t.month}-{t.day}'
 52 | 
 53 | 
 54 | def git_describe(path=Path(__file__).parent):  # path must be a directory
 55 |     # return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
 56 |     s = f'git -C {path} describe --tags --long --always'
 57 |     try:
 58 |         return subprocess.check_output(s, shell=True, stderr=subprocess.STDOUT).decode()[:-1]
 59 |     except subprocess.CalledProcessError as e:
 60 |         return ''  # not a git repository
 61 | 
 62 | 
 63 | def select_device(device='', batch_size=None):
 64 |     # device = 'cpu' or '0' or '0,1,2,3'
 65 |     s = f'YOLOR 🚀 {git_describe() or date_modified()} torch {torch.__version__} '  # string
 66 |     cpu = device.lower() == 'cpu'
 67 |     if cpu:
 68 |         os.environ['CUDA_VISIBLE_DEVICES'] = '-1'  # force torch.cuda.is_available() = False
 69 |     elif device:  # non-cpu device requested
 70 |         os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
 71 |         assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'  # check availability
 72 | 
 73 |     cuda = not cpu and torch.cuda.is_available()
 74 |     if cuda:
 75 |         n = torch.cuda.device_count()
 76 |         if n > 1 and batch_size:  # check that batch_size is compatible with device_count
 77 |             assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
 78 |         space = ' ' * len(s)
 79 |         for i, d in enumerate(device.split(',') if device else range(n)):
 80 |             p = torch.cuda.get_device_properties(i)
 81 |             s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n"  # bytes to MB
 82 |     else:
 83 |         s += 'CPU\n'
 84 | 
 85 |     logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
 86 |     return torch.device('cuda:0' if cuda else 'cpu')
 87 | 
 88 | 
 89 | def time_synchronized():
 90 |     # pytorch-accurate time
 91 |     if torch.cuda.is_available():
 92 |         torch.cuda.synchronize()
 93 |     return time.time()
 94 | 
 95 | 
 96 | def profile(x, ops, n=100, device=None):
 97 |     # profile a pytorch module or list of modules. Example usage:
 98 |     #     x = torch.randn(16, 3, 640, 640)  # input
 99 |     #     m1 = lambda x: x * torch.sigmoid(x)
100 |     #     m2 = nn.SiLU()
101 |     #     profile(x, [m1, m2], n=100)  # profile speed over 100 iterations
102 | 
103 |     device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
104 |     x = x.to(device)
105 |     x.requires_grad = True
106 |     print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
107 |     print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
108 |     for m in ops if isinstance(ops, list) else [ops]:
109 |         m = m.to(device) if hasattr(m, 'to') else m  # device
110 |         m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m  # type
111 |         dtf, dtb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
112 |         try:
113 |             flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPS
114 |         except:
115 |             flops = 0
116 | 
117 |         for _ in range(n):
118 |             t[0] = time_synchronized()
119 |             y = m(x)
120 |             t[1] = time_synchronized()
121 |             try:
122 |                 _ = y.sum().backward()
123 |                 t[2] = time_synchronized()
124 |             except:  # no backward method
125 |                 t[2] = float('nan')
126 |             dtf += (t[1] - t[0]) * 1000 / n  # ms per op forward
127 |             dtb += (t[2] - t[1]) * 1000 / n  # ms per op backward
128 | 
129 |         s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
130 |         s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
131 |         p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0  # parameters
132 |         print(f'{p:12}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}')
133 | 
134 | 
135 | def is_parallel(model):
136 |     return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
137 | 
138 | 
139 | def intersect_dicts(da, db, exclude=()):
140 |     # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
141 |     return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
142 | 
143 | 
144 | def initialize_weights(model):
145 |     for m in model.modules():
146 |         t = type(m)
147 |         if t is nn.Conv2d:
148 |             pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
149 |         elif t is nn.BatchNorm2d:
150 |             m.eps = 1e-3
151 |             m.momentum = 0.03
152 |         elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
153 |             m.inplace = True
154 | 
155 | 
156 | def find_modules(model, mclass=nn.Conv2d):
157 |     # Finds layer indices matching module class 'mclass'
158 |     return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
159 | 
160 | 
161 | def sparsity(model):
162 |     # Return global model sparsity
163 |     a, b = 0., 0.
164 |     for p in model.parameters():
165 |         a += p.numel()
166 |         b += (p == 0).sum()
167 |     return b / a
168 | 
169 | 
170 | def prune(model, amount=0.3):
171 |     # Prune model to requested global sparsity
172 |     import torch.nn.utils.prune as prune
173 |     print('Pruning model... ', end='')
174 |     for name, m in model.named_modules():
175 |         if isinstance(m, nn.Conv2d):
176 |             prune.l1_unstructured(m, name='weight', amount=amount)  # prune
177 |             prune.remove(m, 'weight')  # make permanent
178 |     print(' %.3g global sparsity' % sparsity(model))
179 | 
180 | 
181 | def fuse_conv_and_bn(conv, bn):
182 |     # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
183 |     fusedconv = nn.Conv2d(conv.in_channels,
184 |                           conv.out_channels,
185 |                           kernel_size=conv.kernel_size,
186 |                           stride=conv.stride,
187 |                           padding=conv.padding,
188 |                           groups=conv.groups,
189 |                           bias=True).requires_grad_(False).to(conv.weight.device)
190 | 
191 |     # prepare filters
192 |     w_conv = conv.weight.clone().view(conv.out_channels, -1)
193 |     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
194 |     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
195 | 
196 |     # prepare spatial bias
197 |     b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
198 |     b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
199 |     fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)
200 | 
201 |     return fusedconv
202 | 
203 | 
204 | def model_info(model, verbose=False, img_size=640):
205 |     # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
206 |     n_p = sum(x.numel() for x in model.parameters())  # number parameters
207 |     n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
208 |     if verbose:
209 |         print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
210 |         for i, (name, p) in enumerate(model.named_parameters()):
211 |             name = name.replace('module_list.', '')
212 |             print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
213 |                   (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))
214 | 
215 |     try:  # FLOPS
216 |         from thop import profile
217 |         stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
218 |         img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device)  # input
219 |         flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2  # stride GFLOPS
220 |         img_size = img_size if isinstance(img_size, list) else [img_size, img_size]  # expand if int/float
221 |         fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride)  # 640x640 GFLOPS
222 |     except (ImportError, Exception):
223 |         fs = ''
224 | 
225 |     logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
226 | 
227 | 
228 | def load_classifier(name='resnet101', n=2):
229 |     # Loads a pretrained model reshaped to n-class output
230 |     model = torchvision.models.__dict__[name](pretrained=True)
231 | 
232 |     # ResNet model properties
233 |     # input_size = [3, 224, 224]
234 |     # input_space = 'RGB'
235 |     # input_range = [0, 1]
236 |     # mean = [0.485, 0.456, 0.406]
237 |     # std = [0.229, 0.224, 0.225]
238 | 
239 |     # Reshape output to n classes
240 |     filters = model.fc.weight.shape[1]
241 |     model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
242 |     model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
243 |     model.fc.out_features = n
244 |     return model
245 | 
246 | 
247 | def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
248 |     # scales img(bs,3,y,x) by ratio constrained to gs-multiple
249 |     if ratio == 1.0:
250 |         return img
251 |     else:
252 |         h, w = img.shape[2:]
253 |         s = (int(h * ratio), int(w * ratio))  # new size
254 |         img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
255 |         if not same_shape:  # pad/crop img
256 |             h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
257 |         return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
258 | 
259 | 
260 | def copy_attr(a, b, include=(), exclude=()):
261 |     # Copy attributes from b to a, options to only include [...] and to exclude [...]
262 |     for k, v in b.__dict__.items():
263 |         if (len(include) and k not in include) or k.startswith('_') or k in exclude:
264 |             continue
265 |         else:
266 |             setattr(a, k, v)
267 | 
268 | 
269 | class ModelEMA:
270 |     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
271 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
272 |     This is intended to allow functionality like
273 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
274 |     A smoothed version of the weights is necessary for some training schemes to perform well.
275 |     This class is sensitive where it is initialized in the sequence of model init,
276 |     GPU assignment and distributed training wrappers.
277 |     """
278 | 
279 |     def __init__(self, model, decay=0.9999, updates=0):
280 |         # Create EMA
281 |         self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
282 |         # if next(model.parameters()).device.type != 'cpu':
283 |         #     self.ema.half()  # FP16 EMA
284 |         self.updates = updates  # number of EMA updates
285 |         self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
286 |         for p in self.ema.parameters():
287 |             p.requires_grad_(False)
288 | 
289 |     def update(self, model):
290 |         # Update EMA parameters
291 |         with torch.no_grad():
292 |             self.updates += 1
293 |             d = self.decay(self.updates)
294 | 
295 |             msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
296 |             for k, v in self.ema.state_dict().items():
297 |                 if v.dtype.is_floating_point:
298 |                     v *= d
299 |                     v += (1. - d) * msd[k].detach()
300 | 
301 |     def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
302 |         # Update EMA attributes
303 |         copy_attr(self.ema, model, include, exclude)
304 | 
305 | 
306 | class BatchNormXd(torch.nn.modules.batchnorm._BatchNorm):
307 |     def _check_input_dim(self, input):
308 |         # The only difference between BatchNorm1d, BatchNorm2d, BatchNorm3d, etc
309 |         # is this method that is overwritten by the sub-class
310 |         # This original goal of this method was for tensor sanity checks
311 |         # If you're ok bypassing those sanity checks (eg. if you trust your inference
312 |         # to provide the right dimensional inputs), then you can just use this method
313 |         # for easy conversion from SyncBatchNorm
314 |         # (unfortunately, SyncBatchNorm does not store the original class - if it did
315 |         #  we could return the one that was originally created)
316 |         return
317 | 
318 | def revert_sync_batchnorm(module):
319 |     # this is very similar to the function that it is trying to revert:
320 |     # https://github.com/pytorch/pytorch/blob/c8b3686a3e4ba63dc59e5dcfe5db3430df256833/torch/nn/modules/batchnorm.py#L679
321 |     module_output = module
322 |     if isinstance(module, torch.nn.modules.batchnorm.SyncBatchNorm):
323 |         new_cls = BatchNormXd
324 |         module_output = BatchNormXd(module.num_features,
325 |                                                module.eps, module.momentum,
326 |                                                module.affine,
327 |                                                module.track_running_stats)
328 |         if module.affine:
329 |             with torch.no_grad():
330 |                 module_output.weight = module.weight
331 |                 module_output.bias = module.bias
332 |         module_output.running_mean = module.running_mean
333 |         module_output.running_var = module.running_var
334 |         module_output.num_batches_tracked = module.num_batches_tracked
335 |         if hasattr(module, "qconfig"):
336 |             module_output.qconfig = module.qconfig
337 |     for name, child in module.named_children():
338 |         module_output.add_module(name, revert_sync_batchnorm(child))
339 |     del module
340 |     return module_output
341 | 
342 | 
343 | class TracedModel(nn.Module):
344 | 
345 |     def __init__(self, model=None, device=None, img_size=(640,640)): 
346 |         super(TracedModel, self).__init__()
347 |         
348 |         print(" Convert model to Traced-model... ") 
349 |         self.stride = model.stride
350 |         self.names = model.names
351 |         self.model = model
352 | 
353 |         self.model = revert_sync_batchnorm(self.model)
354 |         self.model.to('cpu')
355 |         self.model.eval()
356 | 
357 |         self.detect_layer = self.model.model[-1]
358 |         self.model.traced = True
359 |         
360 |         rand_example = torch.rand(1, 3, img_size, img_size)
361 |         
362 |         traced_script_module = torch.jit.trace(self.model, rand_example, strict=False)
363 |         #traced_script_module = torch.jit.script(self.model)
364 |         traced_script_module.save("traced_model.pt")
365 |         print(" traced_script_module saved! ")
366 |         self.model = traced_script_module
367 |         self.model.to(device)
368 |         self.detect_layer.to(device)
369 |         print(" model is traced! \n") 
370 | 
371 |     def forward(self, x, augment=False, profile=False):
372 |         out = self.model(x)
373 |         out = self.detect_layer(out)
374 |         return out


--------------------------------------------------------------------------------
/yolov7/models/common.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | def autopad(k, p=None):  # kernel, padding
  6 |     # Pad to 'same'
  7 |     if p is None:
  8 |         p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
  9 |     return p
 10 | 
 11 | class MP(nn.Module):
 12 |     def __init__(self, k=2):
 13 |         super(MP, self).__init__()
 14 |         self.m = nn.MaxPool2d(kernel_size=k, stride=k)
 15 | 
 16 |     def forward(self, x):
 17 |         return self.m(x)
 18 | 
 19 | class Concat(nn.Module):
 20 |     def __init__(self, dimension=1):
 21 |         super(Concat, self).__init__()
 22 |         self.d = dimension
 23 | 
 24 |     def forward(self, x):
 25 |         return torch.cat(x, self.d)
 26 | 
 27 | class Conv(nn.Module):
 28 |     # Standard convolution
 29 |     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
 30 |         super(Conv, self).__init__()
 31 |         self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
 32 |         self.bn = nn.BatchNorm2d(c2)
 33 |         self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
 34 | 
 35 |     def forward(self, x):
 36 |         return self.act(self.bn(self.conv(x)))
 37 | 
 38 |     def fuseforward(self, x):
 39 |         return self.act(self.conv(x))
 40 | 
 41 | class SPPCSPC(nn.Module):
 42 |     # CSP https://github.com/WongKinYiu/CrossStagePartialNetworks
 43 |     def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5, k=(5, 9, 13)):
 44 |         super(SPPCSPC, self).__init__()
 45 |         c_ = int(2 * c2 * e)  # hidden channels
 46 |         self.cv1 = Conv(c1, c_, 1, 1)
 47 |         self.cv2 = Conv(c1, c_, 1, 1)
 48 |         self.cv3 = Conv(c_, c_, 3, 1)
 49 |         self.cv4 = Conv(c_, c_, 1, 1)
 50 |         self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
 51 |         self.cv5 = Conv(4 * c_, c_, 1, 1)
 52 |         self.cv6 = Conv(c_, c_, 3, 1)
 53 |         self.cv7 = Conv(2 * c_, c2, 1, 1)
 54 | 
 55 |     def forward(self, x):
 56 |         x1 = self.cv4(self.cv3(self.cv1(x)))
 57 |         y1 = self.cv6(self.cv5(torch.cat([x1] + [m(x1) for m in self.m], 1)))
 58 |         y2 = self.cv2(x)
 59 |         return self.cv7(torch.cat((y1, y2), dim=1))
 60 | 
 61 | class ImplicitA(nn.Module):
 62 |     def __init__(self, channel, mean=0., std=.02):
 63 |         super(ImplicitA, self).__init__()
 64 |         self.channel = channel
 65 |         self.mean = mean
 66 |         self.std = std
 67 |         self.implicit = nn.Parameter(torch.zeros(1, channel, 1, 1))
 68 |         nn.init.normal_(self.implicit, mean=self.mean, std=self.std)
 69 | 
 70 |     def forward(self, x):
 71 |         return self.implicit + x
 72 |     
 73 | 
 74 | class ImplicitM(nn.Module):
 75 |     def __init__(self, channel, mean=1., std=.02):
 76 |         super(ImplicitM, self).__init__()
 77 |         self.channel = channel
 78 |         self.mean = mean
 79 |         self.std = std
 80 |         self.implicit = nn.Parameter(torch.ones(1, channel, 1, 1))
 81 |         # self.implicit = nn.Parameter(torch.ones(1, 3, 1, 1, channel // 3))
 82 |         nn.init.normal_(self.implicit, mean=self.mean, std=self.std)
 83 | 
 84 |     def forward(self, x):
 85 |         # print(x.shape)
 86 |         # print(self.implicit.weight)
 87 |         # import pdb; pdb.set_trace()
 88 |         return self.implicit * x
 89 | 
 90 | class RepConv(nn.Module):
 91 |     # Represented convolution
 92 |     # https://arxiv.org/abs/2101.03697
 93 | 
 94 |     def __init__(self, c1, c2, k=3, s=1, p=None, g=1, act=True, deploy=False):
 95 |         super(RepConv, self).__init__()
 96 | 
 97 |         self.deploy = deploy
 98 |         self.groups = g
 99 |         self.in_channels = c1
100 |         self.out_channels = c2
101 | 
102 |         assert k == 3
103 |         assert autopad(k, p) == 1
104 | 
105 |         padding_11 = autopad(k, p) - k // 2
106 | 
107 |         self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
108 | 
109 |         if deploy:
110 |             self.rbr_reparam = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=True)
111 | 
112 |         else:
113 |             self.rbr_identity = (nn.BatchNorm2d(num_features=c1) if c2 == c1 and s == 1 else None)
114 | 
115 |             self.rbr_dense = nn.Sequential(
116 |                 nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False),
117 |                 nn.BatchNorm2d(num_features=c2),
118 |             )
119 | 
120 |             self.rbr_1x1 = nn.Sequential(
121 |                 nn.Conv2d( c1, c2, 1, s, padding_11, groups=g, bias=False),
122 |                 nn.BatchNorm2d(num_features=c2),
123 |             )
124 | 
125 |     def forward(self, inputs):
126 |         if hasattr(self, "rbr_reparam"):
127 |             return self.act(self.rbr_reparam(inputs))
128 | 
129 |         if self.rbr_identity is None:
130 |             id_out = 0
131 |         else:
132 |             id_out = self.rbr_identity(inputs)
133 | 
134 |         return self.act(self.rbr_dense(inputs) + self.rbr_1x1(inputs) + id_out)
135 |     
136 |     def get_equivalent_kernel_bias(self):
137 |         kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
138 |         kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
139 |         kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
140 |         return (
141 |             kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid,
142 |             bias3x3 + bias1x1 + biasid,
143 |         )
144 | 
145 |     def _pad_1x1_to_3x3_tensor(self, kernel1x1):
146 |         if kernel1x1 is None:
147 |             return 0
148 |         else:
149 |             return nn.functional.pad(kernel1x1, [1, 1, 1, 1])
150 | 
151 |     def _fuse_bn_tensor(self, branch):
152 |         if branch is None:
153 |             return 0, 0
154 |         if isinstance(branch, nn.Sequential):
155 |             kernel = branch[0].weight
156 |             running_mean = branch[1].running_mean
157 |             running_var = branch[1].running_var
158 |             gamma = branch[1].weight
159 |             beta = branch[1].bias
160 |             eps = branch[1].eps
161 |         else:
162 |             assert isinstance(branch, nn.BatchNorm2d)
163 |             if not hasattr(self, "id_tensor"):
164 |                 input_dim = self.in_channels // self.groups
165 |                 kernel_value = np.zeros(
166 |                     (self.in_channels, input_dim, 3, 3), dtype=np.float32
167 |                 )
168 |                 for i in range(self.in_channels):
169 |                     kernel_value[i, i % input_dim, 1, 1] = 1
170 |                 self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
171 |             kernel = self.id_tensor
172 |             running_mean = branch.running_mean
173 |             running_var = branch.running_var
174 |             gamma = branch.weight
175 |             beta = branch.bias
176 |             eps = branch.eps
177 |         std = (running_var + eps).sqrt()
178 |         t = (gamma / std).reshape(-1, 1, 1, 1)
179 |         return kernel * t, beta - running_mean * gamma / std
180 | 
181 |     def repvgg_convert(self):
182 |         kernel, bias = self.get_equivalent_kernel_bias()
183 |         return (
184 |             kernel.detach().cpu().numpy(),
185 |             bias.detach().cpu().numpy(),
186 |         )
187 | 
188 |     def fuse_conv_bn(self, conv, bn):
189 | 
190 |         std = (bn.running_var + bn.eps).sqrt()
191 |         bias = bn.bias - bn.running_mean * bn.weight / std
192 | 
193 |         t = (bn.weight / std).reshape(-1, 1, 1, 1)
194 |         weights = conv.weight * t
195 | 
196 |         bn = nn.Identity()
197 |         conv = nn.Conv2d(in_channels = conv.in_channels,
198 |                               out_channels = conv.out_channels,
199 |                               kernel_size = conv.kernel_size,
200 |                               stride=conv.stride,
201 |                               padding = conv.padding,
202 |                               dilation = conv.dilation,
203 |                               groups = conv.groups,
204 |                               bias = True,
205 |                               padding_mode = conv.padding_mode)
206 | 
207 |         conv.weight = torch.nn.Parameter(weights)
208 |         conv.bias = torch.nn.Parameter(bias)
209 |         return conv
210 | 
211 |     def fuse_repvgg_block(self):    
212 |         if self.deploy:
213 |             return
214 |         print(f"RepConv.fuse_repvgg_block")
215 |                 
216 |         self.rbr_dense = self.fuse_conv_bn(self.rbr_dense[0], self.rbr_dense[1])
217 |         
218 |         self.rbr_1x1 = self.fuse_conv_bn(self.rbr_1x1[0], self.rbr_1x1[1])
219 |         rbr_1x1_bias = self.rbr_1x1.bias
220 |         weight_1x1_expanded = torch.nn.functional.pad(self.rbr_1x1.weight, [1, 1, 1, 1])
221 |         
222 |         # Fuse self.rbr_identity
223 |         if (isinstance(self.rbr_identity, nn.BatchNorm2d) or isinstance(self.rbr_identity, nn.modules.batchnorm.SyncBatchNorm)):
224 |             # print(f"fuse: rbr_identity == BatchNorm2d or SyncBatchNorm")
225 |             identity_conv_1x1 = nn.Conv2d(
226 |                     in_channels=self.in_channels,
227 |                     out_channels=self.out_channels,
228 |                     kernel_size=1,
229 |                     stride=1,
230 |                     padding=0,
231 |                     groups=self.groups, 
232 |                     bias=False)
233 |             identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.to(self.rbr_1x1.weight.data.device)
234 |             identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.squeeze().squeeze()
235 |             # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}")
236 |             identity_conv_1x1.weight.data.fill_(0.0)
237 |             identity_conv_1x1.weight.data.fill_diagonal_(1.0)
238 |             identity_conv_1x1.weight.data = identity_conv_1x1.weight.data.unsqueeze(2).unsqueeze(3)
239 |             # print(f" identity_conv_1x1.weight = {identity_conv_1x1.weight.shape}")
240 | 
241 |             identity_conv_1x1 = self.fuse_conv_bn(identity_conv_1x1, self.rbr_identity)
242 |             bias_identity_expanded = identity_conv_1x1.bias
243 |             weight_identity_expanded = torch.nn.functional.pad(identity_conv_1x1.weight, [1, 1, 1, 1])            
244 |         else:
245 |             # print(f"fuse: rbr_identity != BatchNorm2d, rbr_identity = {self.rbr_identity}")
246 |             bias_identity_expanded = torch.nn.Parameter( torch.zeros_like(rbr_1x1_bias) )
247 |             weight_identity_expanded = torch.nn.Parameter( torch.zeros_like(weight_1x1_expanded) )            
248 |         
249 | 
250 |         #print(f"self.rbr_1x1.weight = {self.rbr_1x1.weight.shape}, ")
251 |         #print(f"weight_1x1_expanded = {weight_1x1_expanded.shape}, ")
252 |         #print(f"self.rbr_dense.weight = {self.rbr_dense.weight.shape}, ")
253 | 
254 |         self.rbr_dense.weight = torch.nn.Parameter(self.rbr_dense.weight + weight_1x1_expanded + weight_identity_expanded)
255 |         self.rbr_dense.bias = torch.nn.Parameter(self.rbr_dense.bias + rbr_1x1_bias + bias_identity_expanded)
256 |                 
257 |         self.rbr_reparam = self.rbr_dense
258 |         self.deploy = True
259 | 
260 |         if self.rbr_identity is not None:
261 |             del self.rbr_identity
262 |             self.rbr_identity = None
263 | 
264 |         if self.rbr_1x1 is not None:
265 |             del self.rbr_1x1
266 |             self.rbr_1x1 = None
267 | 
268 |         if self.rbr_dense is not None:
269 |             del self.rbr_dense
270 |             self.rbr_dense = None
271 | 
272 | class RepConv_OREPA(nn.Module):
273 | 
274 |     def __init__(self, c1, c2, k=3, s=1, padding=1, dilation=1, groups=1, padding_mode='zeros', deploy=False, use_se=False, nonlinear=nn.SiLU()):
275 |         super(RepConv_OREPA, self).__init__()
276 |         self.deploy = deploy
277 |         self.groups = groups
278 |         self.in_channels = c1
279 |         self.out_channels = c2
280 | 
281 |         self.padding = padding
282 |         self.dilation = dilation
283 |         self.groups = groups
284 | 
285 |         assert k == 3
286 |         assert padding == 1
287 | 
288 |         padding_11 = padding - k // 2
289 | 
290 |         if nonlinear is None:
291 |             self.nonlinearity = nn.Identity()
292 |         else:
293 |             self.nonlinearity = nonlinear
294 | 
295 |         if use_se:
296 |             self.se = SEBlock(self.out_channels, internal_neurons=self.out_channels // 16)
297 |         else:
298 |             self.se = nn.Identity()
299 | 
300 |         if deploy:
301 |             self.rbr_reparam = nn.Conv2d(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=k, stride=s,
302 |                                       padding=padding, dilation=dilation, groups=groups, bias=True, padding_mode=padding_mode)
303 | 
304 |         else:
305 |             self.rbr_identity = nn.BatchNorm2d(num_features=self.in_channels) if self.out_channels == self.in_channels and s == 1 else None
306 |             self.rbr_dense = OREPA_3x3_RepConv(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=k, stride=s, padding=padding, groups=groups, dilation=1)
307 |             self.rbr_1x1 = ConvBN(in_channels=self.in_channels, out_channels=self.out_channels, kernel_size=1, stride=s, padding=padding_11, groups=groups, dilation=1)
308 |             print('RepVGG Block, identity = ', self.rbr_identity)
309 | 
310 | 
311 |     def forward(self, inputs):
312 |         if hasattr(self, 'rbr_reparam'):
313 |             return self.nonlinearity(self.se(self.rbr_reparam(inputs)))
314 | 
315 |         if self.rbr_identity is None:
316 |             id_out = 0
317 |         else:
318 |             id_out = self.rbr_identity(inputs)
319 | 
320 |         out1 = self.rbr_dense(inputs)
321 |         out2 = self.rbr_1x1(inputs)
322 |         out3 = id_out
323 |         out = out1 + out2 + out3
324 | 
325 |         return self.nonlinearity(self.se(out))
326 | 
327 | 
328 |     #   Optional. This improves the accuracy and facilitates quantization.
329 |     #   1.  Cancel the original weight decay on rbr_dense.conv.weight and rbr_1x1.conv.weight.
330 |     #   2.  Use like this.
331 |     #       loss = criterion(....)
332 |     #       for every RepVGGBlock blk:
333 |     #           loss += weight_decay_coefficient * 0.5 * blk.get_cust_L2()
334 |     #       optimizer.zero_grad()
335 |     #       loss.backward()
336 | 
337 |     # Not used for OREPA
338 |     def get_custom_L2(self):
339 |         K3 = self.rbr_dense.weight_gen()
340 |         K1 = self.rbr_1x1.conv.weight
341 |         t3 = (self.rbr_dense.bn.weight / ((self.rbr_dense.bn.running_var + self.rbr_dense.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach()
342 |         t1 = (self.rbr_1x1.bn.weight / ((self.rbr_1x1.bn.running_var + self.rbr_1x1.bn.eps).sqrt())).reshape(-1, 1, 1, 1).detach()
343 | 
344 |         l2_loss_circle = (K3 ** 2).sum() - (K3[:, :, 1:2, 1:2] ** 2).sum()      # The L2 loss of the "circle" of weights in 3x3 kernel. Use regular L2 on them.
345 |         eq_kernel = K3[:, :, 1:2, 1:2] * t3 + K1 * t1                           # The equivalent resultant central point of 3x3 kernel.
346 |         l2_loss_eq_kernel = (eq_kernel ** 2 / (t3 ** 2 + t1 ** 2)).sum()        # Normalize for an L2 coefficient comparable to regular L2.
347 |         return l2_loss_eq_kernel + l2_loss_circle
348 | 
349 |     def get_equivalent_kernel_bias(self):
350 |         kernel3x3, bias3x3 = self._fuse_bn_tensor(self.rbr_dense)
351 |         kernel1x1, bias1x1 = self._fuse_bn_tensor(self.rbr_1x1)
352 |         kernelid, biasid = self._fuse_bn_tensor(self.rbr_identity)
353 |         return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
354 | 
355 |     def _pad_1x1_to_3x3_tensor(self, kernel1x1):
356 |         if kernel1x1 is None:
357 |             return 0
358 |         else:
359 |             return torch.nn.functional.pad(kernel1x1, [1,1,1,1])
360 | 
361 |     def _fuse_bn_tensor(self, branch):
362 |         if branch is None:
363 |             return 0, 0
364 |         if not isinstance(branch, nn.BatchNorm2d):
365 |             if isinstance(branch, OREPA_3x3_RepConv):
366 |                 kernel = branch.weight_gen()
367 |             elif isinstance(branch, ConvBN):
368 |                 kernel = branch.conv.weight
369 |             else:
370 |                 raise NotImplementedError
371 |             running_mean = branch.bn.running_mean
372 |             running_var = branch.bn.running_var
373 |             gamma = branch.bn.weight
374 |             beta = branch.bn.bias
375 |             eps = branch.bn.eps
376 |         else:
377 |             if not hasattr(self, 'id_tensor'):
378 |                 input_dim = self.in_channels // self.groups
379 |                 kernel_value = np.zeros((self.in_channels, input_dim, 3, 3), dtype=np.float32)
380 |                 for i in range(self.in_channels):
381 |                     kernel_value[i, i % input_dim, 1, 1] = 1
382 |                 self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
383 |             kernel = self.id_tensor
384 |             running_mean = branch.running_mean
385 |             running_var = branch.running_var
386 |             gamma = branch.weight
387 |             beta = branch.bias
388 |             eps = branch.eps
389 |         std = (running_var + eps).sqrt()
390 |         t = (gamma / std).reshape(-1, 1, 1, 1)
391 |         return kernel * t, beta - running_mean * gamma / std
392 | 
393 |     def switch_to_deploy(self):
394 |         if hasattr(self, 'rbr_reparam'):
395 |             return
396 |         print(f"RepConv_OREPA.switch_to_deploy")
397 |         kernel, bias = self.get_equivalent_kernel_bias()
398 |         self.rbr_reparam = nn.Conv2d(in_channels=self.rbr_dense.in_channels, out_channels=self.rbr_dense.out_channels,
399 |                                      kernel_size=self.rbr_dense.kernel_size, stride=self.rbr_dense.stride,
400 |                                      padding=self.rbr_dense.padding, dilation=self.rbr_dense.dilation, groups=self.rbr_dense.groups, bias=True)
401 |         self.rbr_reparam.weight.data = kernel
402 |         self.rbr_reparam.bias.data = bias
403 |         for para in self.parameters():
404 |             para.detach_()
405 |         self.__delattr__('rbr_dense')
406 |         self.__delattr__('rbr_1x1')
407 |         if hasattr(self, 'rbr_identity'):
408 |             self.__delattr__('rbr_identity') 


--------------------------------------------------------------------------------
/task1.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import glob
  3 | import apriltag
  4 | import numpy as np
  5 | import matplotlib.cm as cm
  6 | from numpy import random
  7 | import torch
  8 | 
  9 | from .utils.json_utils import json_preprocess, json_postprocess
 10 | from .utils.args_utils import parse_args
 11 | from .utils.model_utils import matching, read_image, make_matching_plot, plot_one_box
 12 | from .superglue.superpoint import SuperPoint
 13 | from .superglue.superglue import SuperGlue
 14 | 
 15 | from .yolov7.models.experimental import attempt_load
 16 | from .yolov7.utils.datasets import letterbox, LoadImages
 17 | from .yolov7.utils.general import check_img_size, non_max_suppression, scale_coords, cv2
 18 | from .yolov7.utils.torch_utils import TracedModel
 19 | 
 20 | 
 21 | CLASS_MAP = {'person': 0, 'monitor': 1, 'cabinet': 2, 'basket': 3, 'box': 4, 'trash bin': 5, 'computer': 6, 'laptop': 7, 'bookshelf': 8, 'chair': 9, 'printer': 10, 'desk': 11,
 22 |              'whiteboard': 12, 'banner': 13, 'mirror': 14, 'stairs': 15, 'toy': 16, 'fire extinguisher': 17, 'poster': 18, 'sink': 19, 'exercise tool': 20, 'speaker': 21,
 23 |              'up_occluded': 22, 'up_red': 23, 'up_orange': 24, 'up_yellow': 25, 'up_green': 26, 'up_blue': 27, 'up_purple': 28, 'up_white': 29, 'up_gray': 30, 'up_black': 31,
 24 |              'low_occluded': 32, 'low_red': 33, 'low_orange': 34, 'low_yellow': 35, 'low_green': 36, 'low_blue': 37, 'low_purple': 38, 'low_white': 39, 'low_gray': 40, 'low_black': 41,
 25 |              'person_man': 42, 'person_woman': 43, 'person_child': 44, 'others_lifeguard': 45, 'others_medic': 46}
 26 | 
 27 | class Task1:
 28 |     def __init__(self, args):
 29 |         self.clue_path = args.clue_path
 30 |         self.json_output_path = args.json_output_path
 31 |         self.task1_debug = args.task1_debug
 32 |         self.debug_output_path = args.debug_output_path
 33 |         self.img_conf_th = args.img_conf_th
 34 |         self.img_kp_th = args.img_kp_th
 35 |         self.txt_th = args.txt_th
 36 |         self.od_th = args.od_th
 37 |         self.total_th = args.total_th
 38 |         self.show_video = args.show_vid
 39 |         self.cnt = 0
 40 |         self.state = 0
 41 |         self.room_id = None
 42 |         self.json = {'answer_sheet': {
 43 |                         'room_id': None,
 44 |                         'mission': "1",
 45 |                         'answer': {
 46 |                             'person_id': {}
 47 |                             }   
 48 |                         }   
 49 |                     }
 50 |         self.json_list = []
 51 |         self.obj_cls = set()
 52 |         self.ppl_cls = set()
 53 | 
 54 |         # -----------------------------------------
 55 |         # image matching model & preprocessing
 56 |         # -----------------------------------------
 57 |         self.img_config = {
 58 |             'superpoint': {
 59 |                 'nms_radius': 4,
 60 |                 'keypoint_threshold': 0.005,
 61 |                 'max_keypoints': 1024
 62 |             },
 63 |             'superglue': {
 64 |                 'weights': 'indoor',
 65 |                 'sinkhorn_iterations': 20,
 66 |                 'match_threshold': 0.2,
 67 |             }
 68 |         }
 69 |         self.superpoint = SuperPoint(self.img_config.get('superpoint', {})).eval().to('cuda')
 70 |         self.superglue = SuperGlue(self.img_config.get('superglue', {})).eval().to('cuda')
 71 |         self.match_batch_size = 1
 72 | 
 73 |         # -----------------------------------------
 74 |         # YOLO model & preprocessing
 75 |         # -----------------------------------------
 76 |         self.imgsz = (640, 640)
 77 |         self.half = True
 78 |         self.conf_th = 0.25
 79 |         self.iou_th = 0.45
 80 |         self.classes = None
 81 |         self.cls_agnostic_nms = False
 82 |         self.yolo_path = args.yolo_path
 83 |         yolo = attempt_load(self.yolo_path, map_location='cuda').eval()
 84 |         self.stride = int(yolo.stride.max())
 85 |         self.img_size = check_img_size(self.imgsz[0], s=self.stride)
 86 |         self.names = yolo.names
 87 |         self.color_list = ['OCC','RED','ORG','YLW','GRN','BLU','PRP','WHT','GRY','BLK']
 88 |         self.colors = [[random.randint(0, 255) for _ in range(3)] for _ in self.names]
 89 |         # if self.half:
 90 |         #     self.yolo = TracedModel(yolo, 'cuda', self.img_size).half()
 91 |         # else:
 92 |         #     self.yolo = TracedModel(yolo, 'cuda', self.img_size)
 93 |         self.yolo = yolo.half()
 94 |         # self.true=1 # NOTE: dummy code for debugging
 95 | 
 96 |         self.clue_json_read = False
 97 |         self.clue_img_read = False
 98 | 
 99 |         self.clue_txts = None
100 |         self.clue_txt_list = None
101 | 
102 |         self.clue_img_list = None
103 |         self.clue_imgs = None
104 |         self.clue_imgs_p = None
105 |         self.clue_imgs_scale = None
106 | 
107 |     def __call__(self, img: np.ndarray, state, frame_for_vis=None):
108 |         try:
109 |             clue_info = []
110 |             if (state == 0 or state == -1):  # NOTE: 복도에서 json, room_id 초기화
111 |                 self.json = {'answer_sheet': {
112 |                         'room_id': None,
113 |                         'mission': "1",
114 |                         'answer': {
115 |                             'person_id': {}
116 |                             }   
117 |                         }   
118 |                     }
119 |                 self.json_list = []
120 |                 self.room_id = None
121 |                 self.obj_cls = set()
122 |                 self.ppl_cls = set()
123 | 
124 |             if self.clue_json_read is False:
125 |                 # -----------------------------------------
126 |                 # text clue preprocessing
127 |                 # -----------------------------------------
128 |                 self.clue_txts = glob.glob(self.clue_path+'/*.json', recursive=True)
129 |                 self.clue_txt_list = ([])
130 |                 if len(self.clue_txts) > 0:
131 |                     self.clue_txts.sort()
132 |                     for clue_txt_ in self.clue_txts:
133 |                         clue_txt_key = []
134 |                         clue_txt_dict = json_preprocess(clue_txt_)
135 |                         self.clue_txts_ = list(clue_txt_dict.values())[0]
136 |                         for i in range(0, len(self.clue_txts_)):
137 |                             clue_txt_key.append(CLASS_MAP[self.clue_txts_[i]])
138 |                         self.clue_txt_list.append(clue_txt_key)
139 | 
140 |                 self.clue_json_read = True
141 | 
142 |             if self.clue_img_read is False:
143 |                 # -----------------------------------------
144 |                 # image clue preprocessing
145 |                 # -----------------------------------------
146 |                 self.clue_img_list = glob.glob(self.clue_path+'/*.jpg', recursive=True)
147 |                 self.clue_imgs = []
148 |                 self.clue_imgs_p = []
149 |                 self.clue_imgs_scales = []
150 |                 if len(self.clue_img_list) > 0:
151 |                     self.clue_img_list.sort()
152 |                     for clue_img_ in self.clue_img_list:
153 |                         clue_img_ = cv2.imread(clue_img_, cv2.IMREAD_GRAYSCALE)
154 |                         image1, inp1, scales1 = read_image(clue_img_, [640, 480], 'cuda')   # NOTE: clue image
155 |                         self.clue_imgs.append(image1)
156 |                         self.clue_imgs_p.append(inp1)
157 |                         self.clue_imgs_scales.append(scales1)
158 | 
159 |                 self.clue_img_read = True
160 | 
161 |             # -----------------------------------------
162 |             # Superglue inference
163 |             # -----------------------------------------
164 |             if self.task1_debug:
165 |                 input_img = cv2.imread(img, cv2.IMREAD_GRAYSCALE)
166 |             else:
167 |                 input_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)                       # NOTE: copy된 frame image 사용
168 |             image0, inp0, scales0 = read_image(input_img, [640, 480], 'cuda')           # NOTE: video frame image
169 | 
170 |             if len(self.clue_img_list) > 0:
171 |                 score_img = []
172 |                 for i in range(0, len(self.clue_img_list)):                                  # NOTE: 각 이미지 단서마다 kpts, mean confidence 저장
173 |                     cv2.putText(frame_for_vis, str(self.clue_txt_list), (80, 300), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2)
174 |                     pred, matches, conf = matching({'image0': inp0, 'image1': self.clue_imgs_p[i]}, self.superpoint, self.superglue)
175 |                     kpts0, kpts1 = pred['keypoints0'], pred['keypoints1']
176 |                     valid = matches > -1
177 |                     mkpts0 = kpts0[valid]
178 |                     mkpts1 = kpts1[matches[valid]]
179 |                     mconf = conf[valid]                                                 # NOTE: superpoint 개수
180 |                     score_img.append((mkpts0.shape[0], mconf.mean()))
181 | 
182 |                     if (score_img[i][0] > self.img_kp_th and score_img[i][1] > self.img_conf_th):
183 |                         im_detections = []
184 |                         im_detector = apriltag.Detector()
185 |                         im_detections.append(im_detector.detect(input_img))
186 |                         im_tag_id = []
187 | 
188 |                         for j in range(0, len(im_detections[0])):
189 |                             im_tag_id.append(im_detections[0][j].tag_id)
190 |                         im_json_output = json_postprocess(self.clue_img_list[i][-6:-4], im_tag_id)
191 |                         self.json_list.append(im_json_output)
192 |                     
193 |                         if self.debug_output_path != None:                              # NOTE: for debugging (superpoint > 50 & confidence > 0.5 일 때만 이미지 저장)
194 |                             color = cm.jet(mconf)
195 |                             label = ['SuperGlue',
196 |                                      'Keypoints: {}:{}'.format(len(kpts0), len(kpts1)),
197 |                                      'Matches: {}'.format(len(mkpts0)),]
198 |                             k_thresh = self.img_config['superpoint']['keypoint_threshold']
199 |                             m_thresh = self.img_config['superglue']['match_threshold']
200 |                             small_text = ['Keypoint Threshold: {:.4f}'.format(k_thresh),
201 |                                           'Match Threshold: {:.2f}'.format(m_thresh),]
202 |                             make_matching_plot(image0, self.clue_imgs[i], mkpts0, mkpts1, color, label,
203 |                                             self.debug_output_path+'frame'+str(self.cnt)+'_clue'+str(i), small_text)
204 |                                             
205 |                     clue_info.append(self.clue_img_list[i][-6:-4])
206 | 
207 |             # -----------------------------------------
208 |             # YOLO inference
209 |             # -----------------------------------------
210 |             if len(self.clue_txt_list) > 0:
211 |                 score_txt = 0.0
212 |                 score_bbox = 0.0
213 |                 for i in range(0, len(self.clue_txt_list)):
214 |                     self.yolo(torch.zeros(1, 3, self.img_size, self.img_size).to('cuda').type_as(next(self.yolo.parameters())))
215 |                     if self.task1_debug:
216 |                         load_img = LoadImages(img, img_size=self.imgsz, stride=self.stride)
217 |                         _, yolo_img, im0s, _ = next(iter(load_img))
218 |                     else:
219 |                         im0s = img
220 |                         # im0s = frame_for_vis    # NOTE: video frame image 사용
221 |                         yolo_img = letterbox(im0s, self.img_size, stride=self.stride)[0]
222 |                         yolo_img = yolo_img[:, :, ::-1].transpose(2, 0, 1)
223 |                         yolo_img = np.ascontiguousarray(yolo_img)
224 |                     yolo_img = torch.from_numpy(yolo_img).to('cuda')
225 |                     yolo_img = yolo_img.half() if self.half else yolo_img.float()
226 |                     yolo_img /= 255.0
227 |                     if len(yolo_img.shape) == 3:
228 |                         yolo_img = yolo_img.unsqueeze(0)
229 | 
230 |                     pred = self.yolo(yolo_img)
231 |                     pred = non_max_suppression(pred[0], self.conf_th, self.iou_th, self.classes, self.cls_agnostic_nms, multi_label=False, return_attributes=True)[0]
232 |                     pred[:, :4] = scale_coords(yolo_img.shape[2:], pred[:, :4], im0s.shape).round()
233 | 
234 |                     if len(pred) > 0:
235 |                         # NOTE: poster 사람 제거
236 |                         person_pred = pred[0][pred[0][5] == 0]
237 |                         not_person_pred = pred[0][pred[0][5] != 0]
238 |                         poster_pred = pred[0][pred[0][5] == 18]
239 |                         if len(person_pred) != 0 :
240 |                             new_person_pred = []
241 |                             for pep in person_pred :
242 |                                 flag = False
243 |                                 for pop in poster_pred :
244 |                                     person_loc = pep[:4]
245 |                                     poster_loc = pop[:4]
246 |                                     person_left = person_loc[0] - person_loc[2]/2
247 |                                     person_right = person_loc[0] + person_loc[2]/2
248 |                                     person_top = person_loc[1] - person_loc[3]/2
249 |                                     person_bottom = person_loc[1] + person_loc[3]/2
250 |                                     poster_left = poster_loc[0] - poster_loc[2]/2
251 |                                     poster_right = poster_loc[0] + poster_loc[2]/2
252 |                                     poster_top = poster_loc[1] - poster_loc[3]/2
253 |                                     poster_bottom = poster_loc[1] + poster_loc[3]/2
254 |                                     if (poster_left < person_left) and (poster_top < person_top) and \
255 |                                             (poster_right > person_right) and (poster_bottom > person_bottom):
256 |                                         # person is in poster
257 |                                         flag = True
258 |                                         break
259 |                                     else :
260 |                                         flag = False
261 |                                 if not flag :
262 |                                     new_person_pred.append(pep)
263 |                             person_pred = torch.stack(new_person_pred)
264 |                             pred = [torch.cat([person_pred, not_person_pred])][0]
265 |                             
266 |                         # NOTE: pred[0] = [X, Y, W, H, cls_conf, cls, upper_conf, upper_cls, lower_conf, lower_cls, ppl_conf, ppl_cls, oth_conf, oth_cls]
267 |                         # NOTE: other confidence and other class not used in task1
268 |                         cls_match_num = 0.0
269 |                         for j in range(0, len(self.clue_txt_list[i])):
270 |                             for k in range(0, pred.shape[0]):                                           # NOTE: bbox 여러개 쳐진 경우
271 |                                 if (pred[k][5] == 0 and pred[k][4] >= 0.7):                             # NOTE: 사람인경우
272 |                                     if pred[k][11] == 0:
273 |                                         name = 42
274 |                                     elif pred[k][11] == 1:
275 |                                         name = 43
276 |                                     else:
277 |                                         name = 44
278 |                                     if name == self.clue_txt_list[i][j]:
279 |                                         score_bbox = score_bbox+pred[k][10]
280 |                                         self.ppl_cls.add(name)
281 | 
282 |                                 elif (pred[k][5] == self.clue_txt_list[i][j] and pred[k][4] >= self.od_th):    # NOTE: 원하는 class (attribute 제외)가 th이상으로 detecting될 때
283 |                                     score_bbox = score_bbox+pred[k][4]                                  # NOTE: bbox마다 score 계산
284 |                                     self.obj_cls.add(pred[k][5])
285 | 
286 |                                 cls_match_num = len(set(self.clue_txt_list[i]).intersection(self.ppl_cls.union(self.obj_cls)))
287 | 
288 |                         cv2.putText(frame_for_vis, str(self.ppl_cls.union(self.obj_cls)), (50, 400), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2)
289 | 
290 |                         if cls_match_num > self.txt_th:
291 |                             od_detections = []
292 |                             od_detector = apriltag.Detector()
293 |                             od_detections.append(od_detector.detect(input_img))
294 |                             od_tag_id = []
295 | 
296 |                             for j in range(0, len(od_detections[0])):
297 |                                 od_tag_id.append(od_detections[0][j].tag_id)
298 |                             od_json_output = json_postprocess(self.clue_txts[i][-7:-5], od_tag_id)
299 |                             self.json_list.append(od_json_output)
300 |                             cv2.putText(frame_for_vis, 'TEXT CLUE DETECTED', (50, 450), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 0), 2)
301 |                         
302 |                         if self.show_video:
303 |                             for j in range(0, pred.shape[0]):
304 |                                 bboxes = pred[j][0:4]
305 |                                 confs = pred[j][4]
306 |                                 clss = pred[j][5]
307 |                                 upper_clss = pred[j][7]
308 |                                 lower_clss = pred[j][9]
309 |                                 ppl_clss = pred[j][11]
310 | 
311 |                                 if clss == 0:   # NOTE: person
312 |                                     if ppl_clss == 0:
313 |                                         name = 'man'
314 |                                     elif ppl_clss == 1:
315 |                                         name = 'woman'
316 |                                     else:
317 |                                         name = 'child'
318 | 
319 |                                     upper_color = self.color_list[int(upper_clss.item())]
320 |                                     lower_color = self.color_list[int(lower_clss.item())]
321 | 
322 |                                     label = f'{name} {float(confs):.2f} {upper_color} {lower_color}'
323 |                                 else:   # NOTE: object
324 |                                     label = f'{self.names[int(clss)]} {float(confs):.2f}'
325 | 
326 |                                 plot_one_box(bboxes, frame_for_vis, label=label, color=self.colors[int(clss)], line_thickness=2)
327 |                             # cv2.imwrite(self.debug_output_path+'frame'+str(self.cnt)+'_text_clue.jpg', frame_for_vis)
328 | 
329 |                         clue_info.append(self.clue_txts[i][-7:-5])
330 | 
331 |             # -----------------------------------------
332 |             # Apriltag detection for room id
333 |             # -----------------------------------------
334 |             room_detections = []
335 |             room_detector = apriltag.Detector()
336 |             room_detections.append(room_detector.detect(input_img))
337 |             room_tag_id = []
338 |             for i in range(0, len(room_detections[0])):
339 |                 room_tag_id.append(room_detections[0][i].tag_id)
340 | 
341 |             for i in range(0, len(room_tag_id)):
342 |                 if room_tag_id[i] >= 500:
343 |                     self.room_id = room_tag_id[i]
344 | 
345 |             # -----------------------------------------
346 |             # json update and export
347 |             # -----------------------------------------
348 |             # NOTE: json dump에서 정답 json 만들기 (+중복 value 제거)
349 |             for i in range(0, len(clue_info)):
350 |                 ans_list = []
351 |                 for j in range(0, len(self.json_list)):
352 |                     k = list(self.json_list[j]['answer_sheet']['answer']['person_id'].keys())
353 |                     for m in range(0, len(k)):
354 |                         if clue_info[i] == k[m]:
355 |                             v = self.json_list[j]['answer_sheet']['answer']['person_id'][k[m]]
356 |                             for n in range(0, len(v)):
357 |                                 ans_list.append(v[n])
358 |                 self.json['answer_sheet']['answer']['person_id'][clue_info[i]] = list(set(ans_list))
359 | 
360 |             if self.room_id != None:                # NOTE: room id 저장
361 |                 self.json['answer_sheet']['room_id'] = str(self.room_id)
362 | 
363 |             self.cnt = self.cnt+1
364 |             self.state = state
365 | 
366 |             ans_pair = self.json['answer_sheet']['answer']['person_id']
367 |             ans_keys = list(ans_pair.keys())
368 |             empty_cnt = 0
369 |             for i in range(0, len(ans_keys)):
370 |                 if len(ans_pair[ans_keys[i]]) == 0:
371 |                     empty_cnt = empty_cnt+1
372 |                     self.json['answer_sheet']['answer']['person_id'][ans_keys[i]] = ["NONE"]
373 |             if empty_cnt == len(clue_info):         # NOTE: value 전부 비어있으면 UNCLEAR로 채움
374 |                 for i in range(0, len(ans_keys)):
375 |                     self.json['answer_sheet']['answer']['person_id'][ans_keys[i]] = ["UNCLEAR"]
376 |             
377 |             # print(self.cnt, self.state)
378 |             return self.json
379 | 
380 |         except:
381 |             self.json = {'answer_sheet': {
382 |                         'room_id': None,
383 |                         'mission': "1",
384 |                         'answer': {
385 |                             'person_id': 'UNCLEAR'
386 |                             }   
387 |                         }   
388 |                     }
389 |             # print('exception!')
390 |             return self.json
391 | 
392 | if __name__ == "__main__":
393 |     args = parse_args()
394 |     task1 = Task1(args)
395 | 
396 |     if args.task1_debug == None:
397 |         frames = None
398 |     else:
399 |         frames = args.debug_input_path              # NOTE: superglue 테스트이미지 (이미지 한장)
400 | 
401 |     task1(frames)
402 | 


--------------------------------------------------------------------------------
/superglue/utils.py:
--------------------------------------------------------------------------------
  1 | # %BANNER_BEGIN%
  2 | # ---------------------------------------------------------------------
  3 | # %COPYRIGHT_BEGIN%
  4 | #
  5 | #  Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
  6 | #
  7 | #  Unpublished Copyright (c) 2020
  8 | #  Magic Leap, Inc., All Rights Reserved.
  9 | #
 10 | # NOTICE:  All information contained herein is, and remains the property
 11 | # of COMPANY. The intellectual and technical concepts contained herein
 12 | # are proprietary to COMPANY and may be covered by U.S. and Foreign
 13 | # Patents, patents in process, and are protected by trade secret or
 14 | # copyright law.  Dissemination of this information or reproduction of
 15 | # this material is strictly forbidden unless prior written permission is
 16 | # obtained from COMPANY.  Access to the source code contained herein is
 17 | # hereby forbidden to anyone except current COMPANY employees, managers
 18 | # or contractors who have executed Confidentiality and Non-disclosure
 19 | # agreements explicitly covering such access.
 20 | #
 21 | # The copyright notice above does not evidence any actual or intended
 22 | # publication or disclosure  of  this source code, which includes
 23 | # information that is confidential and/or proprietary, and is a trade
 24 | # secret, of  COMPANY.   ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
 25 | # PUBLIC  PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE  OF THIS
 26 | # SOURCE CODE  WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
 27 | # STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
 28 | # INTERNATIONAL TREATIES.  THE RECEIPT OR POSSESSION OF  THIS SOURCE
 29 | # CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
 30 | # TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
 31 | # USE, OR SELL ANYTHING THAT IT  MAY DESCRIBE, IN WHOLE OR IN PART.
 32 | #
 33 | # %COPYRIGHT_END%
 34 | # ----------------------------------------------------------------------
 35 | # %AUTHORS_BEGIN%
 36 | #
 37 | #  Originating Authors: Paul-Edouard Sarlin
 38 | #                       Daniel DeTone
 39 | #                       Tomasz Malisiewicz
 40 | #
 41 | # %AUTHORS_END%
 42 | # --------------------------------------------------------------------*/
 43 | # %BANNER_END%
 44 | 
 45 | from pathlib import Path
 46 | import time
 47 | from collections import OrderedDict
 48 | from threading import Thread
 49 | import numpy as np
 50 | import cv2
 51 | import torch
 52 | import matplotlib.pyplot as plt
 53 | import matplotlib
 54 | matplotlib.use('Agg')
 55 | 
 56 | class AverageTimer:
 57 |     """ Class to help manage printing simple timing of code execution. """
 58 | 
 59 |     def __init__(self, smoothing=0.3, newline=False):
 60 |         self.smoothing = smoothing
 61 |         self.newline = newline
 62 |         self.times = OrderedDict()
 63 |         self.will_print = OrderedDict()
 64 |         self.reset()
 65 | 
 66 |     def reset(self):
 67 |         now = time.time()
 68 |         self.start = now
 69 |         self.last_time = now
 70 |         for name in self.will_print:
 71 |             self.will_print[name] = False
 72 | 
 73 |     def update(self, name='default'):
 74 |         now = time.time()
 75 |         dt = now - self.last_time
 76 |         if name in self.times:
 77 |             dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name]
 78 |         self.times[name] = dt
 79 |         self.will_print[name] = True
 80 |         self.last_time = now
 81 | 
 82 |     def print(self, text='Timer'):
 83 |         total = 0.
 84 |         print('[{}]'.format(text), end=' ')
 85 |         for key in self.times:
 86 |             val = self.times[key]
 87 |             if self.will_print[key]:
 88 |                 print('%s=%.3f' % (key, val), end=' ')
 89 |                 total += val
 90 |         print('total=%.3f sec {%.1f FPS}' % (total, 1./total), end=' ')
 91 |         if self.newline:
 92 |             print(flush=True)
 93 |         else:
 94 |             print(end='\r', flush=True)
 95 |         self.reset()
 96 | 
 97 | 
 98 | class VideoStreamer:
 99 |     """ Class to help process image streams. Four types of possible inputs:"
100 |         1.) USB Webcam.
101 |         2.) An IP camera
102 |         3.) A directory of images (files in directory matching 'image_glob').
103 |         4.) A video file, such as an .mp4 or .avi file.
104 |     """
105 |     def __init__(self, basedir, resize, skip, image_glob, max_length=1000000):
106 |         self._ip_grabbed = False
107 |         self._ip_running = False
108 |         self._ip_camera = False
109 |         self._ip_image = None
110 |         self._ip_index = 0
111 |         self.cap = []
112 |         self.camera = True
113 |         self.video_file = False
114 |         self.listing = []
115 |         self.resize = resize
116 |         self.interp = cv2.INTER_AREA
117 |         self.i = 0
118 |         self.skip = skip
119 |         self.max_length = max_length
120 |         if isinstance(basedir, int) or basedir.isdigit():
121 |             print('==> Processing USB webcam input: {}'.format(basedir))
122 |             self.cap = cv2.VideoCapture(int(basedir))
123 |             self.listing = range(0, self.max_length)
124 |         elif basedir.startswith(('http', 'rtsp')):
125 |             print('==> Processing IP camera input: {}'.format(basedir))
126 |             self.cap = cv2.VideoCapture(basedir)
127 |             self.start_ip_camera_thread()
128 |             self._ip_camera = True
129 |             self.listing = range(0, self.max_length)
130 |         elif Path(basedir).is_dir():
131 |             print('==> Processing image directory input: {}'.format(basedir))
132 |             self.listing = list(Path(basedir).glob(image_glob[0]))
133 |             for j in range(1, len(image_glob)):
134 |                 image_path = list(Path(basedir).glob(image_glob[j]))
135 |                 self.listing = self.listing + image_path
136 |             self.listing.sort()
137 |             self.listing = self.listing[::self.skip]
138 |             self.max_length = np.min([self.max_length, len(self.listing)])
139 |             if self.max_length == 0:
140 |                 raise IOError('No images found (maybe bad \'image_glob\' ?)')
141 |             self.listing = self.listing[:self.max_length]
142 |             self.camera = False
143 |         elif Path(basedir).exists():
144 |             print('==> Processing video input: {}'.format(basedir))
145 |             self.cap = cv2.VideoCapture(basedir)
146 |             self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
147 |             num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
148 |             self.listing = range(0, num_frames)
149 |             self.listing = self.listing[::self.skip]
150 |             self.video_file = True
151 |             self.max_length = np.min([self.max_length, len(self.listing)])
152 |             self.listing = self.listing[:self.max_length]
153 |         else:
154 |             raise ValueError('VideoStreamer input \"{}\" not recognized.'.format(basedir))
155 |         if self.camera and not self.cap.isOpened():
156 |             raise IOError('Could not read camera')
157 | 
158 |     def load_image(self, impath):
159 |         """ Read image as grayscale and resize to img_size.
160 |         Inputs
161 |             impath: Path to input image.
162 |         Returns
163 |             grayim: uint8 numpy array sized H x W.
164 |         """
165 |         grayim = cv2.imread(impath, 0)
166 |         if grayim is None:
167 |             raise Exception('Error reading image %s' % impath)
168 |         w, h = grayim.shape[1], grayim.shape[0]
169 |         w_new, h_new = process_resize(w, h, self.resize)
170 |         grayim = cv2.resize(
171 |             grayim, (w_new, h_new), interpolation=self.interp)
172 |         return grayim
173 | 
174 |     def next_frame(self):
175 |         """ Return the next frame, and increment internal counter.
176 |         Returns
177 |              image: Next H x W image.
178 |              status: True or False depending whether image was loaded.
179 |         """
180 | 
181 |         if self.i == self.max_length:
182 |             return (None, False)
183 |         if self.camera:
184 | 
185 |             if self._ip_camera:
186 |                 #Wait for first image, making sure we haven't exited
187 |                 while self._ip_grabbed is False and self._ip_exited is False:
188 |                     time.sleep(.001)
189 | 
190 |                 ret, image = self._ip_grabbed, self._ip_image.copy()
191 |                 if ret is False:
192 |                     self._ip_running = False
193 |             else:
194 |                 ret, image = self.cap.read()
195 |             if ret is False:
196 |                 print('VideoStreamer: Cannot get image from camera')
197 |                 return (None, False)
198 |             w, h = image.shape[1], image.shape[0]
199 |             if self.video_file:
200 |                 self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i])
201 | 
202 |             w_new, h_new = process_resize(w, h, self.resize)
203 |             image = cv2.resize(image, (w_new, h_new),
204 |                                interpolation=self.interp)
205 |             image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
206 |         else:
207 |             image_file = str(self.listing[self.i])
208 |             image = self.load_image(image_file)
209 |         self.i = self.i + 1
210 |         return (image, True)
211 | 
212 |     def start_ip_camera_thread(self):
213 |         self._ip_thread = Thread(target=self.update_ip_camera, args=())
214 |         self._ip_running = True
215 |         self._ip_thread.start()
216 |         self._ip_exited = False
217 |         return self
218 | 
219 |     def update_ip_camera(self):
220 |         while self._ip_running:
221 |             ret, img = self.cap.read()
222 |             if ret is False:
223 |                 self._ip_running = False
224 |                 self._ip_exited = True
225 |                 self._ip_grabbed = False
226 |                 return
227 | 
228 |             self._ip_image = img
229 |             self._ip_grabbed = ret
230 |             self._ip_index += 1
231 |             #print('IPCAMERA THREAD got frame {}'.format(self._ip_index))
232 | 
233 | 
234 |     def cleanup(self):
235 |         self._ip_running = False
236 | 
237 | # --- PREPROCESSING ---
238 | 
239 | def process_resize(w, h, resize):
240 |     assert(len(resize) > 0 and len(resize) <= 2)
241 |     if len(resize) == 1 and resize[0] > -1:
242 |         scale = resize[0] / max(h, w)
243 |         w_new, h_new = int(round(w*scale)), int(round(h*scale))
244 |     elif len(resize) == 1 and resize[0] == -1:
245 |         w_new, h_new = w, h
246 |     else:  # len(resize) == 2:
247 |         w_new, h_new = resize[0], resize[1]
248 | 
249 |     # Issue warning if resolution is too small or too large.
250 |     if max(w_new, h_new) < 160:
251 |         print('Warning: input resolution is very small, results may vary')
252 |     elif max(w_new, h_new) > 2000:
253 |         print('Warning: input resolution is very large, results may vary')
254 | 
255 |     return w_new, h_new
256 | 
257 | 
258 | def frame2tensor(frame, device):
259 |     return torch.from_numpy(frame/255.).float()[None, None].to(device)
260 | 
261 | 
262 | def read_image(path, device, resize, rotation, resize_float):
263 |     image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
264 |     if image is None:
265 |         return None, None, None
266 |     w, h = image.shape[1], image.shape[0]
267 |     w_new, h_new = process_resize(w, h, resize)
268 |     scales = (float(w) / float(w_new), float(h) / float(h_new))
269 | 
270 |     if resize_float:
271 |         image = cv2.resize(image.astype('float32'), (w_new, h_new))
272 |     else:
273 |         image = cv2.resize(image, (w_new, h_new)).astype('float32')
274 | 
275 |     if rotation != 0:
276 |         image = np.rot90(image, k=rotation)
277 |         if rotation % 2:
278 |             scales = scales[::-1]
279 | 
280 |     inp = frame2tensor(image, device)
281 |     return image, inp, scales
282 | 
283 | 
284 | # --- GEOMETRY ---
285 | 
286 | 
287 | def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999):
288 |     if len(kpts0) < 5:
289 |         return None
290 | 
291 |     f_mean = np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]])
292 |     norm_thresh = thresh / f_mean
293 | 
294 |     kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None]
295 |     kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None]
296 | 
297 |     E, mask = cv2.findEssentialMat(
298 |         kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf,
299 |         method=cv2.RANSAC)
300 | 
301 |     assert E is not None
302 | 
303 |     best_num_inliers = 0
304 |     ret = None
305 |     for _E in np.split(E, len(E) / 3):
306 |         n, R, t, _ = cv2.recoverPose(
307 |             _E, kpts0, kpts1, np.eye(3), 1e9, mask=mask)
308 |         if n > best_num_inliers:
309 |             best_num_inliers = n
310 |             ret = (R, t[:, 0], mask.ravel() > 0)
311 |     return ret
312 | 
313 | 
314 | def rotate_intrinsics(K, image_shape, rot):
315 |     """image_shape is the shape of the image after rotation"""
316 |     assert rot <= 3
317 |     h, w = image_shape[:2][::-1 if (rot % 2) else 1]
318 |     fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
319 |     rot = rot % 4
320 |     if rot == 1:
321 |         return np.array([[fy, 0., cy],
322 |                          [0., fx, w-1-cx],
323 |                          [0., 0., 1.]], dtype=K.dtype)
324 |     elif rot == 2:
325 |         return np.array([[fx, 0., w-1-cx],
326 |                          [0., fy, h-1-cy],
327 |                          [0., 0., 1.]], dtype=K.dtype)
328 |     else:  # if rot == 3:
329 |         return np.array([[fy, 0., h-1-cy],
330 |                          [0., fx, cx],
331 |                          [0., 0., 1.]], dtype=K.dtype)
332 | 
333 | 
334 | def rotate_pose_inplane(i_T_w, rot):
335 |     rotation_matrices = [
336 |         np.array([[np.cos(r), -np.sin(r), 0., 0.],
337 |                   [np.sin(r), np.cos(r), 0., 0.],
338 |                   [0., 0., 1., 0.],
339 |                   [0., 0., 0., 1.]], dtype=np.float32)
340 |         for r in [np.deg2rad(d) for d in (0, 270, 180, 90)]
341 |     ]
342 |     return np.dot(rotation_matrices[rot], i_T_w)
343 | 
344 | 
345 | def scale_intrinsics(K, scales):
346 |     scales = np.diag([1./scales[0], 1./scales[1], 1.])
347 |     return np.dot(scales, K)
348 | 
349 | 
350 | def to_homogeneous(points):
351 |     return np.concatenate([points, np.ones_like(points[:, :1])], axis=-1)
352 | 
353 | 
354 | def compute_epipolar_error(kpts0, kpts1, T_0to1, K0, K1):
355 |     kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None]
356 |     kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None]
357 |     kpts0 = to_homogeneous(kpts0)
358 |     kpts1 = to_homogeneous(kpts1)
359 | 
360 |     t0, t1, t2 = T_0to1[:3, 3]
361 |     t_skew = np.array([
362 |         [0, -t2, t1],
363 |         [t2, 0, -t0],
364 |         [-t1, t0, 0]
365 |     ])
366 |     E = t_skew @ T_0to1[:3, :3]
367 | 
368 |     Ep0 = kpts0 @ E.T  # N x 3
369 |     p1Ep0 = np.sum(kpts1 * Ep0, -1)  # N
370 |     Etp1 = kpts1 @ E  # N x 3
371 |     d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2)
372 |                     + 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2))
373 |     return d
374 | 
375 | 
376 | def angle_error_mat(R1, R2):
377 |     cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2
378 |     cos = np.clip(cos, -1., 1.)  # numercial errors can make it out of bounds
379 |     return np.rad2deg(np.abs(np.arccos(cos)))
380 | 
381 | 
382 | def angle_error_vec(v1, v2):
383 |     n = np.linalg.norm(v1) * np.linalg.norm(v2)
384 |     return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0)))
385 | 
386 | 
387 | def compute_pose_error(T_0to1, R, t):
388 |     R_gt = T_0to1[:3, :3]
389 |     t_gt = T_0to1[:3, 3]
390 |     error_t = angle_error_vec(t, t_gt)
391 |     error_t = np.minimum(error_t, 180 - error_t)  # ambiguity of E estimation
392 |     error_R = angle_error_mat(R, R_gt)
393 |     return error_t, error_R
394 | 
395 | 
396 | def pose_auc(errors, thresholds):
397 |     sort_idx = np.argsort(errors)
398 |     errors = np.array(errors.copy())[sort_idx]
399 |     recall = (np.arange(len(errors)) + 1) / len(errors)
400 |     errors = np.r_[0., errors]
401 |     recall = np.r_[0., recall]
402 |     aucs = []
403 |     for t in thresholds:
404 |         last_index = np.searchsorted(errors, t)
405 |         r = np.r_[recall[:last_index], recall[last_index-1]]
406 |         e = np.r_[errors[:last_index], t]
407 |         aucs.append(np.trapz(r, x=e)/t)
408 |     return aucs
409 | 
410 | 
411 | # --- VISUALIZATION ---
412 | 
413 | 
414 | def plot_image_pair(imgs, dpi=100, size=6, pad=.5):
415 |     n = len(imgs)
416 |     assert n == 2, 'number of images must be two'
417 |     figsize = (size*n, size*3/4) if size is not None else None
418 |     _, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)
419 |     for i in range(n):
420 |         ax[i].imshow(imgs[i], cmap=plt.get_cmap('gray'), vmin=0, vmax=255)
421 |         ax[i].get_yaxis().set_ticks([])
422 |         ax[i].get_xaxis().set_ticks([])
423 |         for spine in ax[i].spines.values():  # remove frame
424 |             spine.set_visible(False)
425 |     plt.tight_layout(pad=pad)
426 | 
427 | 
428 | def plot_keypoints(kpts0, kpts1, color='w', ps=2):
429 |     ax = plt.gcf().axes
430 |     ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
431 |     ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
432 | 
433 | 
434 | def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4):
435 |     fig = plt.gcf()
436 |     ax = fig.axes
437 |     fig.canvas.draw()
438 | 
439 |     transFigure = fig.transFigure.inverted()
440 |     fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0))
441 |     fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1))
442 | 
443 |     fig.lines = [matplotlib.lines.Line2D(
444 |         (fkpts0[i, 0], fkpts1[i, 0]), (fkpts0[i, 1], fkpts1[i, 1]), zorder=1,
445 |         transform=fig.transFigure, c=color[i], linewidth=lw)
446 |                  for i in range(len(kpts0))]
447 |     ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
448 |     ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
449 | 
450 | 
451 | def make_matching_plot(image0, image1, kpts0, kpts1, mkpts0, mkpts1,
452 |                        color, text, path, show_keypoints=False,
453 |                        fast_viz=False, opencv_display=False,
454 |                        opencv_title='matches', small_text=[]):
455 | 
456 |     if fast_viz:
457 |         make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0, mkpts1,
458 |                                 color, text, path, show_keypoints, 10,
459 |                                 opencv_display, opencv_title, small_text)
460 |         return
461 | 
462 |     plot_image_pair([image0, image1])
463 |     if show_keypoints:
464 |         plot_keypoints(kpts0, kpts1, color='k', ps=4)
465 |         plot_keypoints(kpts0, kpts1, color='w', ps=2)
466 |     plot_matches(mkpts0, mkpts1, color)
467 | 
468 |     fig = plt.gcf()
469 |     txt_color = 'k' if image0[:100, :150].mean() > 200 else 'w'
470 |     fig.text(
471 |         0.01, 0.99, '\n'.join(text), transform=fig.axes[0].transAxes,
472 |         fontsize=15, va='top', ha='left', color=txt_color)
473 | 
474 |     txt_color = 'k' if image0[-100:, :150].mean() > 200 else 'w'
475 |     fig.text(
476 |         0.01, 0.01, '\n'.join(small_text), transform=fig.axes[0].transAxes,
477 |         fontsize=5, va='bottom', ha='left', color=txt_color)
478 | 
479 |     plt.savefig(str(path), bbox_inches='tight', pad_inches=0)
480 |     plt.close()
481 | 
482 | 
483 | def make_matching_plot_fast(image0, image1, kpts0, kpts1, mkpts0,
484 |                             mkpts1, color, text, path=None,
485 |                             show_keypoints=False, margin=10,
486 |                             opencv_display=False, opencv_title='',
487 |                             small_text=[]):
488 |     H0, W0 = image0.shape
489 |     H1, W1 = image1.shape
490 |     H, W = max(H0, H1), W0 + W1 + margin
491 | 
492 |     out = 255*np.ones((H, W), np.uint8)
493 |     out[:H0, :W0] = image0
494 |     out[:H1, W0+margin:] = image1
495 |     out = np.stack([out]*3, -1)
496 | 
497 |     if show_keypoints:
498 |         kpts0, kpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
499 |         white = (255, 255, 255)
500 |         black = (0, 0, 0)
501 |         for x, y in kpts0:
502 |             cv2.circle(out, (x, y), 2, black, -1, lineType=cv2.LINE_AA)
503 |             cv2.circle(out, (x, y), 1, white, -1, lineType=cv2.LINE_AA)
504 |         for x, y in kpts1:
505 |             cv2.circle(out, (x + margin + W0, y), 2, black, -1,
506 |                        lineType=cv2.LINE_AA)
507 |             cv2.circle(out, (x + margin + W0, y), 1, white, -1,
508 |                        lineType=cv2.LINE_AA)
509 | 
510 |     mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int)
511 |     color = (np.array(color[:, :3])*255).astype(int)[:, ::-1]
512 |     for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color):
513 |         c = c.tolist()
514 |         cv2.line(out, (x0, y0), (x1 + margin + W0, y1),
515 |                  color=c, thickness=1, lineType=cv2.LINE_AA)
516 |         # display line end-points as circles
517 |         cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA)
518 |         cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1,
519 |                    lineType=cv2.LINE_AA)
520 | 
521 |     # Scale factor for consistent visualization across scales.
522 |     sc = min(H / 640., 2.0)
523 | 
524 |     # Big text.
525 |     Ht = int(30 * sc)  # text height
526 |     txt_color_fg = (255, 255, 255)
527 |     txt_color_bg = (0, 0, 0)
528 |     for i, t in enumerate(text):
529 |         cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX,
530 |                     1.0*sc, txt_color_bg, 2, cv2.LINE_AA)
531 |         cv2.putText(out, t, (int(8*sc), Ht*(i+1)), cv2.FONT_HERSHEY_DUPLEX,
532 |                     1.0*sc, txt_color_fg, 1, cv2.LINE_AA)
533 | 
534 |     # Small text.
535 |     Ht = int(18 * sc)  # text height
536 |     for i, t in enumerate(reversed(small_text)):
537 |         cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX,
538 |                     0.5*sc, txt_color_bg, 2, cv2.LINE_AA)
539 |         cv2.putText(out, t, (int(8*sc), int(H-Ht*(i+.6))), cv2.FONT_HERSHEY_DUPLEX,
540 |                     0.5*sc, txt_color_fg, 1, cv2.LINE_AA)
541 | 
542 |     if path is not None:
543 |         cv2.imwrite(str(path), out)
544 | 
545 |     if opencv_display:
546 |         cv2.imshow(opencv_title, out)
547 |         cv2.waitKey(1)
548 | 
549 |     return out
550 | 
551 | 
552 | def error_colormap(x):
553 |     return np.clip(
554 |         np.stack([2-x*2, x*2, np.zeros_like(x), np.ones_like(x)], -1), 0, 1)
555 | 


--------------------------------------------------------------------------------
/yolov7/utils/general.py:
--------------------------------------------------------------------------------
  1 | # YOLOR general utils
  2 | 
  3 | import glob
  4 | import logging
  5 | import math
  6 | import os
  7 | import platform
  8 | import random
  9 | import re
 10 | import subprocess
 11 | import time
 12 | from pathlib import Path
 13 | 
 14 | import cv2
 15 | import numpy as np
 16 | import pandas as pd
 17 | import torch
 18 | import torchvision
 19 | 
 20 | # Settings
 21 | torch.set_printoptions(linewidth=320, precision=5, profile='long')
 22 | np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format})  # format short g, %precision=5
 23 | pd.options.display.max_columns = 10
 24 | cv2.setNumThreads(0)  # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
 25 | os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8))  # NumExpr max threads
 26 | 
 27 | 
 28 | def set_logging(rank=-1):
 29 |     logging.basicConfig(
 30 |         format="%(message)s",
 31 |         level=logging.INFO if rank in [-1, 0] else logging.WARN)
 32 | 
 33 | 
 34 | 
 35 | def get_latest_run(search_dir='.'):
 36 |     # Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
 37 |     last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
 38 |     return max(last_list, key=os.path.getctime) if last_list else ''
 39 | 
 40 | 
 41 | def isdocker():
 42 |     # Is environment a Docker container
 43 |     return Path('/workspace').exists()  # or Path('/.dockerenv').exists()
 44 | 
 45 | 
 46 | def emojis(str=''):
 47 |     # Return platform-dependent emoji-safe version of string
 48 |     return str.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else str
 49 | 
 50 | 
 51 | def check_online():
 52 |     # Check internet connectivity
 53 |     import socket
 54 |     try:
 55 |         socket.create_connection(("1.1.1.1", 443), 5)  # check host accesability
 56 |         return True
 57 |     except OSError:
 58 |         return False
 59 | 
 60 | 
 61 | def check_git_status():
 62 |     # Recommend 'git pull' if code is out of date
 63 |     print(colorstr('github: '), end='')
 64 |     try:
 65 |         assert Path('.git').exists(), 'skipping check (not a git repository)'
 66 |         assert not isdocker(), 'skipping check (Docker image)'
 67 |         assert check_online(), 'skipping check (offline)'
 68 | 
 69 |         cmd = 'git fetch && git config --get remote.origin.url'
 70 |         url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip('.git')  # github repo url
 71 |         branch = subprocess.check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip()  # checked out
 72 |         n = int(subprocess.check_output(f'git rev-list {branch}..origin/master --count', shell=True))  # commits behind
 73 |         if n > 0:
 74 |             s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \
 75 |                 f"Use 'git pull' to update or 'git clone {url}' to download latest."
 76 |         else:
 77 |             s = f'up to date with {url} ✅'
 78 |         print(emojis(s))  # emoji-safe
 79 |     except Exception as e:
 80 |         print(e)
 81 | 
 82 | 
 83 | def check_requirements(requirements='requirements.txt', exclude=()):
 84 |     # Check installed dependencies meet requirements (pass *.txt file or list of packages)
 85 |     import pkg_resources as pkg
 86 |     prefix = colorstr('red', 'bold', 'requirements:')
 87 |     if isinstance(requirements, (str, Path)):  # requirements.txt file
 88 |         file = Path(requirements)
 89 |         if not file.exists():
 90 |             print(f"{prefix} {file.resolve()} not found, check failed.")
 91 |             return
 92 |         requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
 93 |     else:  # list or tuple of packages
 94 |         requirements = [x for x in requirements if x not in exclude]
 95 | 
 96 |     n = 0  # number of packages updates
 97 |     for r in requirements:
 98 |         try:
 99 |             pkg.require(r)
100 |         except Exception as e:  # DistributionNotFound or VersionConflict if requirements not met
101 |             n += 1
102 |             print(f"{prefix} {e.req} not found and is required by YOLOR, attempting auto-update...")
103 |             print(subprocess.check_output(f"pip install '{e.req}'", shell=True).decode())
104 | 
105 |     if n:  # if packages updated
106 |         source = file.resolve() if 'file' in locals() else requirements
107 |         s = f"{prefix} {n} package{'s' * (n > 1)} updated per {source}\n" \
108 |             f"{prefix} ⚠️ {colorstr('bold', 'Restart runtime or rerun command for updates to take effect')}\n"
109 |         print(emojis(s))  # emoji-safe
110 | 
111 | 
112 | def check_img_size(img_size, s=32):
113 |     # Verify img_size is a multiple of stride s
114 |     new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
115 |     if new_size != img_size:
116 |         print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
117 |     return new_size
118 | 
119 | 
120 | def check_imshow():
121 |     # Check if environment supports image displays
122 |     try:
123 |         assert not isdocker(), 'cv2.imshow() is disabled in Docker environments'
124 |         cv2.imshow('test', np.zeros((1, 1, 3)))
125 |         cv2.waitKey(1)
126 |         cv2.destroyAllWindows()
127 |         cv2.waitKey(1)
128 |         return True
129 |     except Exception as e:
130 |         print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
131 |         return False
132 | 
133 | 
134 | def check_file(file):
135 |     # Search for file if not found
136 |     if Path(file).is_file() or file == '':
137 |         return file
138 |     else:
139 |         files = glob.glob('./**/' + file, recursive=True)  # find file
140 |         assert len(files), f'File Not Found: {file}'  # assert file was found
141 |         assert len(files) == 1, f"Multiple files match '{file}', specify exact path: {files}"  # assert unique
142 |         return files[0]  # return file
143 | 
144 | 
145 | def check_dataset(dict):
146 |     # Download dataset if not found locally
147 |     val, s = dict.get('val'), dict.get('download')
148 |     if val and len(val):
149 | 
150 |         val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
151 |         #import pdb; pdb.set_trace()
152 |         if not all(x.exists() for x in val):
153 |             print('\nWARNING: Dataset not found, nonexistent paths: %s' % [str(x) for x in val if not x.exists()])
154 |             if s and len(s):  # download script
155 |                 print('Downloading %s ...' % s)
156 |                 if s.startswith('http') and s.endswith('.zip'):  # URL
157 |                     f = Path(s).name  # filename
158 |                     torch.hub.download_url_to_file(s, f)
159 |                     r = os.system('unzip -q %s -d ../ && rm %s' % (f, f))  # unzip
160 |                 else:  # bash script
161 |                     r = os.system(s)
162 |                 print('Dataset autodownload %s\n' % ('success' if r == 0 else 'failure'))  # analyze return value
163 |             else:
164 |                 raise Exception('Dataset not found.')
165 | 
166 | 
167 | def make_divisible(x, divisor):
168 |     # Returns x evenly divisible by divisor
169 |     return math.ceil(x / divisor) * divisor
170 | 
171 | 
172 | def clean_str(s):
173 |     # Cleans a string by replacing special characters with underscore _
174 |     return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
175 | 
176 | 
177 | def one_cycle(y1=0.0, y2=1.0, steps=100):
178 |     # lambda function for sinusoidal ramp from y1 to y2
179 |     return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
180 | 
181 | 
182 | def colorstr(*input):
183 |     # Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e.  colorstr('blue', 'hello world')
184 |     *args, string = input if len(input) > 1 else ('blue', 'bold', input[0])  # color arguments, string
185 |     colors = {'black': '\033[30m',  # basic colors
186 |               'red': '\033[31m',
187 |               'green': '\033[32m',
188 |               'yellow': '\033[33m',
189 |               'blue': '\033[34m',
190 |               'magenta': '\033[35m',
191 |               'cyan': '\033[36m',
192 |               'white': '\033[37m',
193 |               'bright_black': '\033[90m',  # bright colors
194 |               'bright_red': '\033[91m',
195 |               'bright_green': '\033[92m',
196 |               'bright_yellow': '\033[93m',
197 |               'bright_blue': '\033[94m',
198 |               'bright_magenta': '\033[95m',
199 |               'bright_cyan': '\033[96m',
200 |               'bright_white': '\033[97m',
201 |               'end': '\033[0m',  # misc
202 |               'bold': '\033[1m',
203 |               'underline': '\033[4m'}
204 |     return ''.join(colors[x] for x in args) + f'{string}' + colors['end']
205 | 
206 | 
207 | def labels_to_class_weights(labels, nc=80):
208 |     # Get class weights (inverse frequency) from training labels
209 |     if labels[0] is None:  # no labels loaded
210 |         return torch.Tensor()
211 | 
212 |     labels = np.concatenate(labels, 0)  # labels.shape = (866643, 5) for COCO
213 |     classes = labels[:, 0].astype(np.int)  # labels = [class xywh]
214 |     weights = np.bincount(classes, minlength=nc)  # occurrences per class
215 | 
216 |     # Prepend gridpoint count (for uCE training)
217 |     # gpi = ((320 / 32 * np.array([1, 2, 4])) ** 2 * 3).sum()  # gridpoints per image
218 |     # weights = np.hstack([gpi * len(labels)  - weights.sum() * 9, weights * 9]) ** 0.5  # prepend gridpoints to start
219 | 
220 |     weights[weights == 0] = 1  # replace empty bins with 1
221 |     weights = 1 / weights  # number of targets per class
222 |     weights /= weights.sum()  # normalize
223 |     return torch.from_numpy(weights)
224 | 
225 | 
226 | def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
227 |     # Produces image weights based on class_weights and image contents
228 |     class_counts = np.array([np.bincount(x[:, 0].astype(np.int), minlength=nc) for x in labels])
229 |     image_weights = (class_weights.reshape(1, nc) * class_counts).sum(1)
230 |     # index = random.choices(range(n), weights=image_weights, k=1)  # weight image sample
231 |     return image_weights
232 | 
233 | 
234 | def coco80_to_coco91_class():  # converts 80-index (val2014) to 91-index (paper)
235 |     # https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/
236 |     # a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')
237 |     # b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')
238 |     # x1 = [list(a[i] == b).index(True) + 1 for i in range(80)]  # darknet to coco
239 |     # x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)]  # coco to darknet
240 |     x = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34,
241 |          35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
242 |          64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
243 |     return x
244 | 
245 | 
246 | def xyxy2xywh(x):
247 |     # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
248 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
249 |     y[:, 0] = (x[:, 0] + x[:, 2]) / 2  # x center
250 |     y[:, 1] = (x[:, 1] + x[:, 3]) / 2  # y center
251 |     y[:, 2] = x[:, 2] - x[:, 0]  # width
252 |     y[:, 3] = x[:, 3] - x[:, 1]  # height
253 |     return y
254 | 
255 | 
256 | def xywh2xyxy(x):
257 |     # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
258 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
259 |     y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
260 |     y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
261 |     y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
262 |     y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
263 |     return y
264 | 
265 | 
266 | def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
267 |     # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
268 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
269 |     y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw  # top left x
270 |     y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh  # top left y
271 |     y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw  # bottom right x
272 |     y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh  # bottom right y
273 |     return y
274 | 
275 | 
276 | def xyn2xy(x, w=640, h=640, padw=0, padh=0):
277 |     # Convert normalized segments into pixel segments, shape (n,2)
278 |     y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
279 |     y[:, 0] = w * x[:, 0] + padw  # top left x
280 |     y[:, 1] = h * x[:, 1] + padh  # top left y
281 |     return y
282 | 
283 | 
284 | def segment2box(segment, width=640, height=640):
285 |     # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
286 |     x, y = segment.T  # segment xy
287 |     inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
288 |     x, y, = x[inside], y[inside]
289 |     return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4))  # xyxy
290 | 
291 | 
292 | def segments2boxes(segments):
293 |     # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
294 |     boxes = []
295 |     for s in segments:
296 |         x, y = s.T  # segment xy
297 |         boxes.append([x.min(), y.min(), x.max(), y.max()])  # cls, xyxy
298 |     return xyxy2xywh(np.array(boxes))  # cls, xywh
299 | 
300 | 
301 | def resample_segments(segments, n=1000):
302 |     # Up-sample an (n,2) segment
303 |     for i, s in enumerate(segments):
304 |         s = np.concatenate((s, s[0:1, :]), axis=0)
305 |         x = np.linspace(0, len(s) - 1, n)
306 |         xp = np.arange(len(s))
307 |         segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T  # segment xy
308 |     return segments
309 | 
310 | 
311 | def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
312 |     # Rescale coords (xyxy) from img1_shape to img0_shape
313 |     if ratio_pad is None:  # calculate from img0_shape
314 |         gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
315 |         pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
316 |     else:
317 |         gain = ratio_pad[0][0]
318 |         pad = ratio_pad[1]
319 | 
320 |     coords[:, [0, 2]] -= pad[0]  # x padding
321 |     coords[:, [1, 3]] -= pad[1]  # y padding
322 |     coords[:, :4] /= gain
323 |     clip_coords(coords, img0_shape)
324 |     return coords
325 | 
326 | 
327 | def clip_coords(boxes, img_shape):
328 |     # Clip bounding xyxy bounding boxes to image shape (height, width)
329 |     boxes[:, 0].clamp_(0, img_shape[1])  # x1
330 |     boxes[:, 1].clamp_(0, img_shape[0])  # y1
331 |     boxes[:, 2].clamp_(0, img_shape[1])  # x2
332 |     boxes[:, 3].clamp_(0, img_shape[0])  # y2
333 | 
334 | 
335 | def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
336 |     # Returns the IoU of box1 to box2. box1 is 4, box2 is nx4
337 |     box2 = box2.T
338 | 
339 |     # Get the coordinates of bounding boxes
340 |     if x1y1x2y2:  # x1, y1, x2, y2 = box1
341 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
342 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
343 |     else:  # transform from xywh to xyxy
344 |         b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
345 |         b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
346 |         b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
347 |         b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
348 | 
349 |     # Intersection area
350 |     inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
351 |             (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
352 | 
353 |     # Union Area
354 |     w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
355 |     w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
356 |     union = w1 * h1 + w2 * h2 - inter + eps
357 | 
358 |     iou = inter / union
359 | 
360 |     if GIoU or DIoU or CIoU:
361 |         cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
362 |         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
363 |         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
364 |             c2 = cw ** 2 + ch ** 2 + eps  # convex diagonal squared
365 |             rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 +
366 |                     (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4  # center distance squared
367 |             if DIoU:
368 |                 return iou - rho2 / c2  # DIoU
369 |             elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
370 |                 v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / (h2 + eps)) - torch.atan(w1 / (h1 + eps)), 2)
371 |                 with torch.no_grad():
372 |                     alpha = v / (v - iou + (1 + eps))
373 |                 return iou - (rho2 / c2 + v * alpha)  # CIoU
374 |         else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
375 |             c_area = cw * ch + eps  # convex area
376 |             return iou - (c_area - union) / c_area  # GIoU
377 |     else:
378 |         return iou  # IoU
379 | 
380 | 
381 | 
382 | 
383 | def bbox_alpha_iou(box1, box2, x1y1x2y2=False, GIoU=False, DIoU=False, CIoU=False, alpha=2, eps=1e-9):
384 |     # Returns tsqrt_he IoU of box1 to box2. box1 is 4, box2 is nx4
385 |     box2 = box2.T
386 | 
387 |     # Get the coordinates of bounding boxes
388 |     if x1y1x2y2:  # x1, y1, x2, y2 = box1
389 |         b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
390 |         b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
391 |     else:  # transform from xywh to xyxy
392 |         b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
393 |         b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
394 |         b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
395 |         b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
396 | 
397 |     # Intersection area
398 |     inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \
399 |             (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0)
400 | 
401 |     # Union Area
402 |     w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
403 |     w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
404 |     union = w1 * h1 + w2 * h2 - inter + eps
405 | 
406 |     # change iou into pow(iou+eps)
407 |     # iou = inter / union
408 |     iou = torch.pow(inter/union + eps, alpha)
409 |     # beta = 2 * alpha
410 |     if GIoU or DIoU or CIoU:
411 |         cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)  # convex (smallest enclosing box) width
412 |         ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)  # convex height
413 |         if CIoU or DIoU:  # Distance or Complete IoU https://arxiv.org/abs/1911.08287v1
414 |             c2 = (cw ** 2 + ch ** 2) ** alpha + eps  # convex diagonal
415 |             rho_x = torch.abs(b2_x1 + b2_x2 - b1_x1 - b1_x2)
416 |             rho_y = torch.abs(b2_y1 + b2_y2 - b1_y1 - b1_y2)
417 |             rho2 = ((rho_x ** 2 + rho_y ** 2) / 4) ** alpha  # center distance
418 |             if DIoU:
419 |                 return iou - rho2 / c2  # DIoU
420 |             elif CIoU:  # https://github.com/Zzh-tju/DIoU-SSD-pytorch/blob/master/utils/box/box_utils.py#L47
421 |                 v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
422 |                 with torch.no_grad():
423 |                     alpha_ciou = v / ((1 + eps) - inter / union + v)
424 |                 # return iou - (rho2 / c2 + v * alpha_ciou)  # CIoU
425 |                 return iou - (rho2 / c2 + torch.pow(v * alpha_ciou + eps, alpha))  # CIoU
426 |         else:  # GIoU https://arxiv.org/pdf/1902.09630.pdf
427 |             # c_area = cw * ch + eps  # convex area
428 |             # return iou - (c_area - union) / c_area  # GIoU
429 |             c_area = torch.max(cw * ch + eps, union) # convex area
430 |             return iou - torch.pow((c_area - union) / c_area + eps, alpha)  # GIoU
431 |     else:
432 |         return iou # torch.log(iou+eps) or iou
433 | 
434 | 
435 | def box_iou(box1, box2):
436 |     # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
437 |     """
438 |     Return intersection-over-union (Jaccard index) of boxes.
439 |     Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
440 |     Arguments:
441 |         box1 (Tensor[N, 4])
442 |         box2 (Tensor[M, 4])
443 |     Returns:
444 |         iou (Tensor[N, M]): the NxM matrix containing the pairwise
445 |             IoU values for every element in boxes1 and boxes2
446 |     """
447 | 
448 |     def box_area(box):
449 |         # box = 4xn
450 |         return (box[2] - box[0]) * (box[3] - box[1])
451 | 
452 |     area1 = box_area(box1.T)
453 |     area2 = box_area(box2.T)
454 | 
455 |     # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
456 |     inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
457 |     return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
458 | 
459 | 
460 | def wh_iou(wh1, wh2):
461 |     # Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
462 |     wh1 = wh1[:, None]  # [N,1,2]
463 |     wh2 = wh2[None]  # [1,M,2]
464 |     inter = torch.min(wh1, wh2).prod(2)  # [N,M]
465 |     return inter / (wh1.prod(2) + wh2.prod(2) - inter)  # iou = inter / (area1 + area2 - inter)
466 | 
467 | 
468 | def box_giou(box1, box2):
469 |     """
470 |     Return generalized intersection-over-union (Jaccard index) between two sets of boxes.
471 |     Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
472 |     ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
473 |     Args:
474 |         boxes1 (Tensor[N, 4]): first set of boxes
475 |         boxes2 (Tensor[M, 4]): second set of boxes
476 |     Returns:
477 |         Tensor[N, M]: the NxM matrix containing the pairwise generalized IoU values
478 |         for every element in boxes1 and boxes2
479 |     """
480 | 
481 |     def box_area(box):
482 |         # box = 4xn
483 |         return (box[2] - box[0]) * (box[3] - box[1])
484 | 
485 |     area1 = box_area(box1.T)
486 |     area2 = box_area(box2.T)
487 |     
488 |     inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
489 |     union = (area1[:, None] + area2 - inter)
490 | 
491 |     iou = inter / union
492 | 
493 |     lti = torch.min(box1[:, None, :2], box2[:, :2])
494 |     rbi = torch.max(box1[:, None, 2:], box2[:, 2:])
495 | 
496 |     whi = (rbi - lti).clamp(min=0)  # [N,M,2]
497 |     areai = whi[:, :, 0] * whi[:, :, 1]
498 | 
499 |     return iou - (areai - union) / areai
500 | 
501 | 
502 | def box_ciou(box1, box2, eps: float = 1e-7):
503 |     """
504 |     Return complete intersection-over-union (Jaccard index) between two sets of boxes.
505 |     Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
506 |     ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
507 |     Args:
508 |         boxes1 (Tensor[N, 4]): first set of boxes
509 |         boxes2 (Tensor[M, 4]): second set of boxes
510 |         eps (float, optional): small number to prevent division by zero. Default: 1e-7
511 |     Returns:
512 |         Tensor[N, M]: the NxM matrix containing the pairwise complete IoU values
513 |         for every element in boxes1 and boxes2
514 |     """
515 | 
516 |     def box_area(box):
517 |         # box = 4xn
518 |         return (box[2] - box[0]) * (box[3] - box[1])
519 | 
520 |     area1 = box_area(box1.T)
521 |     area2 = box_area(box2.T)
522 |     
523 |     inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
524 |     union = (area1[:, None] + area2 - inter)
525 | 
526 |     iou = inter / union
527 | 
528 |     lti = torch.min(box1[:, None, :2], box2[:, :2])
529 |     rbi = torch.max(box1[:, None, 2:], box2[:, 2:])
530 | 
531 |     whi = (rbi - lti).clamp(min=0)  # [N,M,2]
532 |     diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps
533 | 
534 |     # centers of boxes
535 |     x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2
536 |     y_p = (box1[:, None, 1] + box1[:, None, 3]) / 2
537 |     x_g = (box2[:, 0] + box2[:, 2]) / 2
538 |     y_g = (box2[:, 1] + box2[:, 3]) / 2
539 |     # The distance between boxes' centers squared.
540 |     centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2
541 | 
542 |     w_pred = box1[:, None, 2] - box1[:, None, 0]
543 |     h_pred = box1[:, None, 3] - box1[:, None, 1]
544 | 
545 |     w_gt = box2[:, 2] - box2[:, 0]
546 |     h_gt = box2[:, 3] - box2[:, 1]
547 | 
548 |     v = (4 / (torch.pi ** 2)) * torch.pow((torch.atan(w_gt / h_gt) - torch.atan(w_pred / h_pred)), 2)
549 |     with torch.no_grad():
550 |         alpha = v / (1 - iou + v + eps)
551 |     return iou - (centers_distance_squared / diagonal_distance_squared) - alpha * v
552 | 
553 | 
554 | def box_diou(box1, box2, eps: float = 1e-7):
555 |     """
556 |     Return distance intersection-over-union (Jaccard index) between two sets of boxes.
557 |     Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with
558 |     ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
559 |     Args:
560 |         boxes1 (Tensor[N, 4]): first set of boxes
561 |         boxes2 (Tensor[M, 4]): second set of boxes
562 |         eps (float, optional): small number to prevent division by zero. Default: 1e-7
563 |     Returns:
564 |         Tensor[N, M]: the NxM matrix containing the pairwise distance IoU values
565 |         for every element in boxes1 and boxes2
566 |     """
567 | 
568 |     def box_area(box):
569 |         # box = 4xn
570 |         return (box[2] - box[0]) * (box[3] - box[1])
571 | 
572 |     area1 = box_area(box1.T)
573 |     area2 = box_area(box2.T)
574 |     
575 |     inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
576 |     union = (area1[:, None] + area2 - inter)
577 | 
578 |     iou = inter / union
579 | 
580 |     lti = torch.min(box1[:, None, :2], box2[:, :2])
581 |     rbi = torch.max(box1[:, None, 2:], box2[:, 2:])
582 | 
583 |     whi = (rbi - lti).clamp(min=0)  # [N,M,2]
584 |     diagonal_distance_squared = (whi[:, :, 0] ** 2) + (whi[:, :, 1] ** 2) + eps
585 | 
586 |     # centers of boxes
587 |     x_p = (box1[:, None, 0] + box1[:, None, 2]) / 2
588 |     y_p = (box1[:, None, 1] + box1[:, None, 3]) / 2
589 |     x_g = (box2[:, 0] + box2[:, 2]) / 2
590 |     y_g = (box2[:, 1] + box2[:, 3]) / 2
591 |     # The distance between boxes' centers squared.
592 |     centers_distance_squared = (x_p - x_g) ** 2 + (y_p - y_g) ** 2
593 | 
594 |     # The distance IoU is the IoU penalized by a normalized
595 |     # distance between boxes' centers squared.
596 |     return iou - (centers_distance_squared / diagonal_distance_squared)
597 | 
598 | 
599 | def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
600 |                         labels=(), return_attributes=False):
601 |     """Runs Non-Maximum Suppression (NMS) on inference results
602 | 
603 |     Returns:
604 |          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
605 |     """
606 |     # nc = prediction.shape[2] - 5  # number of classes
607 |     ######################################## DC
608 |     nc = prediction.shape[2] - 30
609 |     ######################################## DC
610 | 
611 |     xc = prediction[..., 4] > conf_thres  # candidates
612 | 
613 |     # Settings
614 |     min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
615 |     max_det = 300  # maximum number of detections per image
616 |     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
617 |     time_limit = 10.0  # seconds to quit after
618 |     redundant = True  # require redundant detections
619 |     multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
620 |     merge = False  # use merge-NMS
621 | 
622 |     t = time.time()
623 |     if return_attributes :
624 |         # also return upper color, lower color, people type, other type
625 |         output = [torch.zeros((0, 10), device=prediction.device)] * prediction.shape[0]
626 |     else:
627 |         output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
628 |     for xi, x in enumerate(prediction):  # image index, image inference
629 |         # Apply constraints
630 |         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
631 |         x = x[xc[xi]]  # confidence
632 |         # Cat apriori labels if autolabelling
633 |         if labels and len(labels[xi]):
634 |             l = labels[xi]
635 |             v = torch.zeros((len(l), nc + 5), device=x.device)
636 |             v[:, :4] = l[:, 1:5]  # box
637 |             v[:, 4] = 1.0  # conf
638 |             v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
639 |             x = torch.cat((x, v), 0)
640 | 
641 |         # If none remain process next image
642 |         if not x.shape[0]:
643 |             continue
644 | 
645 |         # Compute conf
646 |         if nc == 1:
647 |             x[:, 5:nc+5] = x[:, 4:5] # for models with one class, cls_loss is 0 and cls_conf is always 0.5,
648 |                                  # so there is no need to multiplicate.
649 |         else:
650 |             x[:, 5:nc+5] *= x[:, 4:5]  # conf = obj_conf * cls_conf
651 | 
652 |         # Box (center x, center y, width, height) to (x1, y1, x2, y2)
653 |         box = xywh2xyxy(x[:, :4])
654 | 
655 |         # Detections matrix nx6 (xyxy, conf, cls)
656 |         ######################################## DC
657 |         if multi_label:
658 |             i, j = (x[:, 5:nc+5] > conf_thres).nonzero(as_tuple=False).T
659 |             if return_attributes:
660 |                 upcol_i, upcol_j = (x[:, 5:nc+15] > conf_thres).nonzero(as_tuple=False).T
661 |                 lowcol_i, lowcol_j = (x[:, nc+15:nc+25] > conf_thres).nonzero(as_tuple=False).T
662 |                 ppl_i, ppl_j = (x[:, nc+25:nc+28] > conf_thres).nonzero(as_tuple=False).T
663 |                 oth_i, oth_j = (x[:, nc+28:nc+30] > conf_thres).nonzero(as_tuple=False).T
664 |                 to_cat = [box[i], x[i, j + 5, None], j[:, None].float(), 
665 |                           x[upcol_i, upcol_j + 5, None], upcol_j[:, None].float(),
666 |                           x[lowcol_i, lowcol_j + 5, None], lowcol_j[:, None].float(),
667 |                           x[ppl_i, ppl_j + 5, None], ppl_j[:, None].float(),
668 |                           x[oth_i, oth_j + 5, None], oth_j[:, None].float()]
669 |             else :
670 |                 to_cat = [box[i], x[i, j + 5, None], j[:, None].float()]
671 |             x = torch.cat(to_cat, 1)
672 |         else:  # best class only
673 |             conf, j = x[:, 5:nc+5].max(1, keepdim=True)
674 |             if return_attributes:
675 |                 upcol_conf, upcol_j = x[:, nc+5:nc+15].max(1, keepdim=True)
676 |                 lowcol_conf, lowcol_j = x[:, nc+15:nc+25].max(1, keepdim=True)
677 |                 ppl_conf, ppl_j = x[:, nc+25:nc+28].max(1, keepdim=True)
678 |                 oth_conf, oth_j = x[:, nc+28:nc+30].max(1, keepdim=True)
679 |                 to_cat = [box, conf, j.float(), upcol_conf, upcol_j.float(),
680 |                           lowcol_conf, lowcol_j.float(), ppl_conf, ppl_j.float(),
681 |                           oth_conf, oth_j.float()]
682 |             else :
683 |                 to_cat = [box, conf, j.float()]
684 |             x = torch.cat(to_cat, 1)[conf.view(-1) > conf_thres]
685 |             # x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
686 |         ######################################## DC
687 |             
688 |         # Filter by class
689 |         if classes is not None:
690 |             x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
691 | 
692 |         # Apply finite constraint
693 |         # if not torch.isfinite(x).all():
694 |         #     x = x[torch.isfinite(x).all(1)]
695 | 
696 |         # Check shape
697 |         n = x.shape[0]  # number of boxes
698 |         if not n:  # no boxes
699 |             continue
700 |         elif n > max_nms:  # excess boxes
701 |             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
702 | 
703 |         # Batched NMS
704 |         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
705 |         boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
706 |         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
707 |         if i.shape[0] > max_det:  # limit detections
708 |             i = i[:max_det]
709 |         if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
710 |             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
711 |             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
712 |             weights = iou * scores[None]  # box weights
713 |             x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
714 |             if redundant:
715 |                 i = i[iou.sum(1) > 1]  # require redundancy
716 |         output[xi] = x[i]
717 |         if (time.time() - t) > time_limit:
718 |             print(f'WARNING: NMS time limit {time_limit}s exceeded')
719 |             break  # time limit exceeded
720 | 
721 |     return output
722 | 
723 | 
724 | def non_max_suppression_kpt(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
725 |                         labels=(), kpt_label=False, nc=None, nkpt=None):
726 |     """Runs Non-Maximum Suppression (NMS) on inference results
727 | 
728 |     Returns:
729 |          list of detections, on (n,6) tensor per image [xyxy, conf, cls]
730 |     """
731 |     if nc is None:
732 |         nc = prediction.shape[2] - 5  if not kpt_label else prediction.shape[2] - 56 # number of classes
733 |     xc = prediction[..., 4] > conf_thres  # candidates
734 | 
735 |     # Settings
736 |     min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
737 |     max_det = 300  # maximum number of detections per image
738 |     max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
739 |     time_limit = 10.0  # seconds to quit after
740 |     redundant = True  # require redundant detections
741 |     multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
742 |     merge = False  # use merge-NMS
743 | 
744 |     t = time.time()
745 |     output = [torch.zeros((0,6), device=prediction.device)] * prediction.shape[0]
746 |     for xi, x in enumerate(prediction):  # image index, image inference
747 |         # Apply constraints
748 |         # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
749 |         x = x[xc[xi]]  # confidence
750 | 
751 |         # Cat apriori labels if autolabelling
752 |         if labels and len(labels[xi]):
753 |             l = labels[xi]
754 |             v = torch.zeros((len(l), nc + 5), device=x.device)
755 |             v[:, :4] = l[:, 1:5]  # box
756 |             v[:, 4] = 1.0  # conf
757 |             v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
758 |             x = torch.cat((x, v), 0)
759 | 
760 |         # If none remain process next image
761 |         if not x.shape[0]:
762 |             continue
763 | 
764 |         # Compute conf
765 |         x[:, 5:5+nc] *= x[:, 4:5]  # conf = obj_conf * cls_conf
766 | 
767 |         # Box (center x, center y, width, height) to (x1, y1, x2, y2)
768 |         box = xywh2xyxy(x[:, :4])
769 | 
770 |         # Detections matrix nx6 (xyxy, conf, cls)
771 |         if multi_label:
772 |             i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
773 |             x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
774 |         else:  # best class only
775 |             if not kpt_label:
776 |                 conf, j = x[:, 5:].max(1, keepdim=True)
777 |                 x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
778 |             else:
779 |                 kpts = x[:, 6:]
780 |                 conf, j = x[:, 5:6].max(1, keepdim=True)
781 |                 x = torch.cat((box, conf, j.float(), kpts), 1)[conf.view(-1) > conf_thres]
782 | 
783 | 
784 |         # Filter by class
785 |         if classes is not None:
786 |             x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
787 | 
788 |         # Apply finite constraint
789 |         # if not torch.isfinite(x).all():
790 |         #     x = x[torch.isfinite(x).all(1)]
791 | 
792 |         # Check shape
793 |         n = x.shape[0]  # number of boxes
794 |         if not n:  # no boxes
795 |             continue
796 |         elif n > max_nms:  # excess boxes
797 |             x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
798 | 
799 |         # Batched NMS
800 |         c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
801 |         boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
802 |         i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
803 |         if i.shape[0] > max_det:  # limit detections
804 |             i = i[:max_det]
805 |         if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
806 |             # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
807 |             iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
808 |             weights = iou * scores[None]  # box weights
809 |             x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
810 |             if redundant:
811 |                 i = i[iou.sum(1) > 1]  # require redundancy
812 | 
813 |         output[xi] = x[i]
814 |         if (time.time() - t) > time_limit:
815 |             print(f'WARNING: NMS time limit {time_limit}s exceeded')
816 |             break  # time limit exceeded
817 | 
818 |     return output
819 | 
820 | 
821 | def strip_optimizer(f='best.pt', s=''):  # from utils.general import *; strip_optimizer()
822 |     # Strip optimizer from 'f' to finalize training, optionally save as 's'
823 |     x = torch.load(f, map_location=torch.device('cpu'))
824 |     if x.get('ema'):
825 |         x['model'] = x['ema']  # replace model with ema
826 |     for k in 'optimizer', 'training_results', 'wandb_id', 'ema', 'updates':  # keys
827 |         x[k] = None
828 |     x['epoch'] = -1
829 |     x['model'].half()  # to FP16
830 |     for p in x['model'].parameters():
831 |         p.requires_grad = False
832 |     torch.save(x, s or f)
833 |     mb = os.path.getsize(s or f) / 1E6  # filesize
834 |     print(f"Optimizer stripped from {f},{(' saved as %s,' % s) if s else ''} {mb:.1f}MB")
835 | 
836 | 
837 | def apply_classifier(x, model, img, im0):
838 |     # applies a second stage classifier to yolo outputs
839 |     im0 = [im0] if isinstance(im0, np.ndarray) else im0
840 |     for i, d in enumerate(x):  # per image
841 |         if d is not None and len(d):
842 |             d = d.clone()
843 | 
844 |             # Reshape and pad cutouts
845 |             b = xyxy2xywh(d[:, :4])  # boxes
846 |             b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # rectangle to square
847 |             b[:, 2:] = b[:, 2:] * 1.3 + 30  # pad
848 |             d[:, :4] = xywh2xyxy(b).long()
849 | 
850 |             # Rescale boxes from img_size to im0 size
851 |             scale_coords(img.shape[2:], d[:, :4], im0[i].shape)
852 | 
853 |             # Classes
854 |             pred_cls1 = d[:, 5].long()
855 |             ims = []
856 |             for j, a in enumerate(d):  # per item
857 |                 cutout = im0[i][int(a[1]):int(a[3]), int(a[0]):int(a[2])]
858 |                 im = cv2.resize(cutout, (224, 224))  # BGR
859 |                 # cv2.imwrite('test%i.jpg' % j, cutout)
860 | 
861 |                 im = im[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
862 |                 im = np.ascontiguousarray(im, dtype=np.float32)  # uint8 to float32
863 |                 im /= 255.0  # 0 - 255 to 0.0 - 1.0
864 |                 ims.append(im)
865 | 
866 |             pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1)  # classifier prediction
867 |             x[i] = x[i][pred_cls1 == pred_cls2]  # retain matching class detections
868 | 
869 |     return x
870 | 
871 | 
872 | def increment_path(path, exist_ok=True, sep=''):
873 |     # Increment path, i.e. runs/exp --> runs/exp{sep}0, runs/exp{sep}1 etc.
874 |     path = Path(path)  # os-agnostic
875 |     if (path.exists() and exist_ok) or (not path.exists()):
876 |         return str(path)
877 |     else:
878 |         dirs = glob.glob(f"{path}{sep}*")  # similar paths
879 |         matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
880 |         i = [int(m.groups()[0]) for m in matches if m]  # indices
881 |         n = max(i) + 1 if i else 2  # increment number
882 |         return f"{path}{sep}{n}"  # update path
883 | 


--------------------------------------------------------------------------------
/yolov7/models/yolo.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import logging
  3 | import os.path
  4 | import sys
  5 | from copy import deepcopy
  6 | 
  7 | # sys.path.append('./')  # to run '$ python *.py' files in subdirectories
  8 | sys.path.append(os.path.abspath(os.path.dirname(__file__)))
  9 | logger = logging.getLogger(__name__)
 10 | import torch
 11 | 
 12 | from models.common import *
 13 | from models.experimental import *
 14 | #sys.path.append('/home/eulrang/workspace/git/AGC2022_round3_task1/yolov7/utils')
 15 | sys.path.append(os.path.join(os.path.abspath(os.path.dirname(os.path.abspath(os.path.dirname(__file__)))), 'utils'))
 16 | from autoanchor import check_anchor_order
 17 | from general import make_divisible, check_file, set_logging
 18 | from torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
 19 |     select_device, copy_attr
 20 | from loss import SigmoidBin
 21 | 
 22 | 
 23 | class Detect(nn.Module):
 24 |     stride = None  # strides computed during build
 25 |     export = False  # onnx export
 26 |     end2end = False
 27 |     include_nms = False
 28 |     concat = False
 29 | 
 30 |     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
 31 |         super(Detect, self).__init__()
 32 |         self.nc = nc  # number of classes
 33 |         self.no = nc + 5  # number of outputs per anchor
 34 |         self.nl = len(anchors)  # number of detection layers
 35 |         self.na = len(anchors[0]) // 2  # number of anchors
 36 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
 37 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
 38 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
 39 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
 40 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
 41 | 
 42 |     def forward(self, x):
 43 |         # x = x.copy()  # for profiling
 44 |         z = []  # inference output
 45 |         self.training |= self.export
 46 |         for i in range(self.nl):
 47 |             x[i] = self.m[i](x[i])  # conv
 48 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
 49 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 50 | 
 51 |             if not self.training:  # inference
 52 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
 53 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
 54 |                 y = x[i].sigmoid()
 55 |                 if not torch.onnx.is_in_onnx_export():
 56 |                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
 57 |                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
 58 |                 else:
 59 |                     xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
 60 |                     xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5))  # new xy
 61 |                     wh = wh ** 2 * (4 * self.anchor_grid[i].data)  # new wh
 62 |                     y = torch.cat((xy, wh, conf), 4)
 63 |                 z.append(y.view(bs, -1, self.no))
 64 | 
 65 |         if self.training:
 66 |             out = x
 67 |         elif self.end2end:
 68 |             out = torch.cat(z, 1)
 69 |         elif self.include_nms:
 70 |             z = self.convert(z)
 71 |             out = (z, )
 72 |         elif self.concat:
 73 |             out = torch.cat(z, 1)
 74 |         else:
 75 |             out = (torch.cat(z, 1), x)
 76 | 
 77 |         return out
 78 | 
 79 |     @staticmethod
 80 |     def _make_grid(nx=20, ny=20):
 81 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
 82 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
 83 | 
 84 |     def convert(self, z):
 85 |         z = torch.cat(z, 1)
 86 |         box = z[:, :, :4]
 87 |         conf = z[:, :, 4:5]
 88 |         score = z[:, :, 5:]
 89 |         score *= conf
 90 |         convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
 91 |                                            dtype=torch.float32,
 92 |                                            device=z.device)
 93 |         box @= convert_matrix                          
 94 |         return (box, score)
 95 | 
 96 | 
 97 | class IDetect(nn.Module):
 98 |     stride = None  # strides computed during build
 99 |     export = False  # onnx export
100 |     end2end = False
101 |     include_nms = False
102 |     concat = False
103 | 
104 |     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
105 |         super(IDetect, self).__init__()
106 |         self.nc = nc  # number of classes
107 |         self.no = nc + 5  # number of outputs per anchor
108 |         self.nl = len(anchors)  # number of detection layers
109 |         self.na = len(anchors[0]) // 2  # number of anchors
110 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
111 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
112 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
113 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
114 |         # self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv 22 x 3
115 |         self.m = nn.ModuleList(nn.Conv2d(x, 85 * 3, 1) for x in ch)
116 |         # self.m_loc = nn.ModuleList(nn.Conv2d(x, 5 * self.na, 1) for x in ch)
117 |         self.m_cls = nn.ModuleList(nn.Conv2d(x, (self.no - 5) * self.na, 1) for x in ch)
118 |         
119 |         self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
120 |         self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)
121 | 
122 |     def forward(self, x):
123 |         # x = x.copy()  # for profiling
124 |         z = []  # inference output
125 |         self.training |= self.export
126 |         for i in range(self.nl):
127 |             # x[i] = self.m[i](self.ia[i](x[i]))  # conv
128 |             x[i] = torch.cat([self.m[i](self.ia[i](x[i]))[:,:5*self.na], self.m_cls[i](self.ia[i](x[i]))], 1)
129 |             x[i] = self.im[i](x[i])
130 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
131 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
132 | 
133 |             if not self.training:  # inference
134 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
135 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
136 | 
137 |                 y = x[i].sigmoid()
138 |                 y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
139 |                 y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
140 |                 z.append(y.view(bs, -1, self.no))
141 | 
142 |         return x if self.training else (torch.cat(z, 1), x)
143 |     
144 |     def fuseforward(self, x):
145 |         # x = x.copy()  # for profiling
146 |         z = []  # inference output
147 |         self.training |= self.export
148 |         for i in range(self.nl):
149 |             x[i] = self.m[i](x[i])  # conv
150 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
151 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
152 | 
153 |             if not self.training:  # inference
154 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
155 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
156 | 
157 |                 y = x[i].sigmoid()
158 |                 if not torch.onnx.is_in_onnx_export():
159 |                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
160 |                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
161 |                 else:
162 |                     xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
163 |                     xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5))  # new xy
164 |                     wh = wh ** 2 * (4 * self.anchor_grid[i].data)  # new wh
165 |                     y = torch.cat((xy, wh, conf), 4)
166 |                 z.append(y.view(bs, -1, self.no))
167 | 
168 |         if self.training:
169 |             out = x
170 |         elif self.end2end:
171 |             out = torch.cat(z, 1)
172 |         elif self.include_nms:
173 |             z = self.convert(z)
174 |             out = (z, )
175 |         elif self.concat:
176 |             out = torch.cat(z, 1)            
177 |         else:
178 |             out = (torch.cat(z, 1), x)
179 | 
180 |         return out
181 |     
182 |     def fuse(self):
183 |         print("IDetect.fuse")
184 |         # fuse ImplicitA and Convolution
185 |         for i in range(len(self.m)):
186 |             c1,c2,_,_ = self.m[i].weight.shape
187 |             c1_,c2_, _,_ = self.ia[i].implicit.shape
188 |             self.m[i].bias += torch.matmul(self.m[i].weight.reshape(c1,c2),self.ia[i].implicit.reshape(c2_,c1_)).squeeze(1)
189 | 
190 |         # fuse ImplicitM and Convolution
191 |         for i in range(len(self.m)):
192 |             c1,c2, _,_ = self.im[i].implicit.shape
193 |             self.m[i].bias *= self.im[i].implicit.reshape(c2)
194 |             self.m[i].weight *= self.im[i].implicit.transpose(0,1)
195 |             
196 |     @staticmethod
197 |     def _make_grid(nx=20, ny=20):
198 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
199 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
200 | 
201 |     def convert(self, z):
202 |         z = torch.cat(z, 1)
203 |         box = z[:, :, :4]
204 |         conf = z[:, :, 4:5]
205 |         score = z[:, :, 5:]
206 |         score *= conf
207 |         convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
208 |                                            dtype=torch.float32,
209 |                                            device=z.device)
210 |         box @= convert_matrix                          
211 |         return (box, score)
212 | 
213 | 
214 | class IKeypoint(nn.Module):
215 |     stride = None  # strides computed during build
216 |     export = False  # onnx export
217 | 
218 |     def __init__(self, nc=80, anchors=(), nkpt=17, ch=(), inplace=True, dw_conv_kpt=False):  # detection layer
219 |         super(IKeypoint, self).__init__()
220 |         self.nc = nc  # number of classes
221 |         self.nkpt = nkpt
222 |         self.dw_conv_kpt = dw_conv_kpt
223 |         self.no_det=(nc + 5)  # number of outputs per anchor for box and class
224 |         self.no_kpt = 3*self.nkpt ## number of outputs per anchor for keypoints
225 |         self.no = self.no_det+self.no_kpt
226 |         self.nl = len(anchors)  # number of detection layers
227 |         self.na = len(anchors[0]) // 2  # number of anchors
228 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
229 |         self.flip_test = False
230 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
231 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
232 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
233 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no_det * self.na, 1) for x in ch)  # output conv
234 |         
235 |         self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
236 |         self.im = nn.ModuleList(ImplicitM(self.no_det * self.na) for _ in ch)
237 |         
238 |         if self.nkpt is not None:
239 |             if self.dw_conv_kpt: #keypoint head is slightly more complex
240 |                 self.m_kpt = nn.ModuleList(
241 |                             nn.Sequential(DWConv(x, x, k=3), Conv(x,x),
242 |                                           DWConv(x, x, k=3), Conv(x, x),
243 |                                           DWConv(x, x, k=3), Conv(x,x),
244 |                                           DWConv(x, x, k=3), Conv(x, x),
245 |                                           DWConv(x, x, k=3), Conv(x, x),
246 |                                           DWConv(x, x, k=3), nn.Conv2d(x, self.no_kpt * self.na, 1)) for x in ch)
247 |             else: #keypoint head is a single convolution
248 |                 self.m_kpt = nn.ModuleList(nn.Conv2d(x, self.no_kpt * self.na, 1) for x in ch)
249 | 
250 |         self.inplace = inplace  # use in-place ops (e.g. slice assignment)
251 | 
252 |     def forward(self, x):
253 |         # x = x.copy()  # for profiling
254 |         z = []  # inference output
255 |         self.training |= self.export
256 |         for i in range(self.nl):
257 |             if self.nkpt is None or self.nkpt==0:
258 |                 x[i] = self.im[i](self.m[i](self.ia[i](x[i])))  # conv
259 |             else :
260 |                 x[i] = torch.cat((self.im[i](self.m[i](self.ia[i](x[i]))), self.m_kpt[i](x[i])), axis=1)
261 | 
262 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
263 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
264 |             x_det = x[i][..., :6]
265 |             x_kpt = x[i][..., 6:]
266 | 
267 |             if not self.training:  # inference
268 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
269 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
270 |                 kpt_grid_x = self.grid[i][..., 0:1]
271 |                 kpt_grid_y = self.grid[i][..., 1:2]
272 | 
273 |                 if self.nkpt == 0:
274 |                     y = x[i].sigmoid()
275 |                 else:
276 |                     y = x_det.sigmoid()
277 | 
278 |                 if self.inplace:
279 |                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
280 |                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].view(1, self.na, 1, 1, 2) # wh
281 |                     if self.nkpt != 0:
282 |                         x_kpt[..., 0::3] = (x_kpt[..., ::3] * 2. - 0.5 + kpt_grid_x.repeat(1,1,1,1,17)) * self.stride[i]  # xy
283 |                         x_kpt[..., 1::3] = (x_kpt[..., 1::3] * 2. - 0.5 + kpt_grid_y.repeat(1,1,1,1,17)) * self.stride[i]  # xy
284 |                         #x_kpt[..., 0::3] = (x_kpt[..., ::3] + kpt_grid_x.repeat(1,1,1,1,17)) * self.stride[i]  # xy
285 |                         #x_kpt[..., 1::3] = (x_kpt[..., 1::3] + kpt_grid_y.repeat(1,1,1,1,17)) * self.stride[i]  # xy
286 |                         #print('=============')
287 |                         #print(self.anchor_grid[i].shape)
288 |                         #print(self.anchor_grid[i][...,0].unsqueeze(4).shape)
289 |                         #print(x_kpt[..., 0::3].shape)
290 |                         #x_kpt[..., 0::3] = ((x_kpt[..., 0::3].tanh() * 2.) ** 3 * self.anchor_grid[i][...,0].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_x.repeat(1,1,1,1,17) * self.stride[i]  # xy
291 |                         #x_kpt[..., 1::3] = ((x_kpt[..., 1::3].tanh() * 2.) ** 3 * self.anchor_grid[i][...,1].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_y.repeat(1,1,1,1,17) * self.stride[i]  # xy
292 |                         #x_kpt[..., 0::3] = (((x_kpt[..., 0::3].sigmoid() * 4.) ** 2 - 8.) * self.anchor_grid[i][...,0].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_x.repeat(1,1,1,1,17) * self.stride[i]  # xy
293 |                         #x_kpt[..., 1::3] = (((x_kpt[..., 1::3].sigmoid() * 4.) ** 2 - 8.) * self.anchor_grid[i][...,1].unsqueeze(4).repeat(1,1,1,1,self.nkpt)) + kpt_grid_y.repeat(1,1,1,1,17) * self.stride[i]  # xy
294 |                         x_kpt[..., 2::3] = x_kpt[..., 2::3].sigmoid()
295 | 
296 |                     y = torch.cat((xy, wh, y[..., 4:], x_kpt), dim = -1)
297 | 
298 |                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
299 |                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
300 |                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
301 |                     if self.nkpt != 0:
302 |                         y[..., 6:] = (y[..., 6:] * 2. - 0.5 + self.grid[i].repeat((1,1,1,1,self.nkpt))) * self.stride[i]  # xy
303 |                     y = torch.cat((xy, wh, y[..., 4:]), -1)
304 | 
305 |                 z.append(y.view(bs, -1, self.no))
306 | 
307 |         return x if self.training else (torch.cat(z, 1), x)
308 | 
309 |     @staticmethod
310 |     def _make_grid(nx=20, ny=20):
311 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
312 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
313 | 
314 | 
315 | class IAuxDetect(nn.Module):
316 |     stride = None  # strides computed during build
317 |     export = False  # onnx export
318 |     end2end = False
319 |     include_nms = False
320 |     concat = False
321 | 
322 |     def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
323 |         super(IAuxDetect, self).__init__()
324 |         self.nc = nc  # number of classes
325 |         self.no = nc + 5  # number of outputs per anchor
326 |         self.nl = len(anchors)  # number of detection layers
327 |         self.na = len(anchors[0]) // 2  # number of anchors
328 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
329 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
330 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
331 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
332 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch[:self.nl])  # output conv
333 |         self.m2 = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch[self.nl:])  # output conv
334 |         
335 |         self.ia = nn.ModuleList(ImplicitA(x) for x in ch[:self.nl])
336 |         self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch[:self.nl])
337 | 
338 |     def forward(self, x):
339 |         # x = x.copy()  # for profiling
340 |         z = []  # inference output
341 |         self.training |= self.export
342 |         for i in range(self.nl):
343 |             x[i] = self.m[i](self.ia[i](x[i]))  # conv
344 |             x[i] = self.im[i](x[i])
345 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
346 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
347 |             
348 |             x[i+self.nl] = self.m2[i](x[i+self.nl])
349 |             x[i+self.nl] = x[i+self.nl].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
350 | 
351 |             if not self.training:  # inference
352 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
353 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
354 | 
355 |                 y = x[i].sigmoid()
356 |                 if not torch.onnx.is_in_onnx_export():
357 |                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
358 |                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
359 |                 else:
360 |                     xy, wh, conf = y.split((2, 2, self.nc + 1), 4)  # y.tensor_split((2, 4, 5), 4)  # torch 1.8.0
361 |                     xy = xy * (2. * self.stride[i]) + (self.stride[i] * (self.grid[i] - 0.5))  # new xy
362 |                     wh = wh ** 2 * (4 * self.anchor_grid[i].data)  # new wh
363 |                     y = torch.cat((xy, wh, conf), 4)
364 |                 z.append(y.view(bs, -1, self.no))
365 | 
366 |         return x if self.training else (torch.cat(z, 1), x[:self.nl])
367 | 
368 |     def fuseforward(self, x):
369 |         # x = x.copy()  # for profiling
370 |         z = []  # inference output
371 |         self.training |= self.export
372 |         for i in range(self.nl):
373 |             x[i] = self.m[i](x[i])  # conv
374 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
375 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
376 | 
377 |             if not self.training:  # inference
378 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
379 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
380 | 
381 |                 y = x[i].sigmoid()
382 |                 if not torch.onnx.is_in_onnx_export():
383 |                     y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
384 |                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
385 |                 else:
386 |                     xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
387 |                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i].data  # wh
388 |                     y = torch.cat((xy, wh, y[..., 4:]), -1)
389 |                 z.append(y.view(bs, -1, self.no))
390 | 
391 |         if self.training:
392 |             out = x
393 |         elif self.end2end:
394 |             out = torch.cat(z, 1)
395 |         elif self.include_nms:
396 |             z = self.convert(z)
397 |             out = (z, )
398 |         elif self.concat:
399 |             out = torch.cat(z, 1)            
400 |         else:
401 |             out = (torch.cat(z, 1), x)
402 | 
403 |         return out
404 |     
405 |     def fuse(self):
406 |         print("IAuxDetect.fuse")
407 |         # fuse ImplicitA and Convolution
408 |         for i in range(len(self.m)):
409 |             c1,c2,_,_ = self.m[i].weight.shape
410 |             c1_,c2_, _,_ = self.ia[i].implicit.shape
411 |             self.m[i].bias += torch.matmul(self.m[i].weight.reshape(c1,c2),self.ia[i].implicit.reshape(c2_,c1_)).squeeze(1)
412 | 
413 |         # fuse ImplicitM and Convolution
414 |         for i in range(len(self.m)):
415 |             c1,c2, _,_ = self.im[i].implicit.shape
416 |             self.m[i].bias *= self.im[i].implicit.reshape(c2)
417 |             self.m[i].weight *= self.im[i].implicit.transpose(0,1)
418 | 
419 |     @staticmethod
420 |     def _make_grid(nx=20, ny=20):
421 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
422 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
423 | 
424 |     def convert(self, z):
425 |         z = torch.cat(z, 1)
426 |         box = z[:, :, :4]
427 |         conf = z[:, :, 4:5]
428 |         score = z[:, :, 5:]
429 |         score *= conf
430 |         convert_matrix = torch.tensor([[1, 0, 1, 0], [0, 1, 0, 1], [-0.5, 0, 0.5, 0], [0, -0.5, 0, 0.5]],
431 |                                            dtype=torch.float32,
432 |                                            device=z.device)
433 |         box @= convert_matrix                          
434 |         return (box, score)
435 | 
436 | 
437 | class IBin(nn.Module):
438 |     stride = None  # strides computed during build
439 |     export = False  # onnx export
440 | 
441 |     def __init__(self, nc=80, anchors=(), ch=(), bin_count=21):  # detection layer
442 |         super(IBin, self).__init__()
443 |         self.nc = nc  # number of classes
444 |         self.bin_count = bin_count
445 | 
446 |         self.w_bin_sigmoid = SigmoidBin(bin_count=self.bin_count, min=0.0, max=4.0)
447 |         self.h_bin_sigmoid = SigmoidBin(bin_count=self.bin_count, min=0.0, max=4.0)
448 |         # classes, x,y,obj
449 |         self.no = nc + 3 + \
450 |             self.w_bin_sigmoid.get_length() + self.h_bin_sigmoid.get_length()   # w-bce, h-bce
451 |             # + self.x_bin_sigmoid.get_length() + self.y_bin_sigmoid.get_length()
452 |         
453 |         self.nl = len(anchors)  # number of detection layers
454 |         self.na = len(anchors[0]) // 2  # number of anchors
455 |         self.grid = [torch.zeros(1)] * self.nl  # init grid
456 |         a = torch.tensor(anchors).float().view(self.nl, -1, 2)
457 |         self.register_buffer('anchors', a)  # shape(nl,na,2)
458 |         self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
459 |         self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
460 |         
461 |         self.ia = nn.ModuleList(ImplicitA(x) for x in ch)
462 |         self.im = nn.ModuleList(ImplicitM(self.no * self.na) for _ in ch)
463 | 
464 |     def forward(self, x):
465 | 
466 |         #self.x_bin_sigmoid.use_fw_regression = True
467 |         #self.y_bin_sigmoid.use_fw_regression = True
468 |         self.w_bin_sigmoid.use_fw_regression = True
469 |         self.h_bin_sigmoid.use_fw_regression = True
470 |         
471 |         # x = x.copy()  # for profiling
472 |         z = []  # inference output
473 |         self.training |= self.export
474 |         for i in range(self.nl):
475 |             x[i] = self.m[i](self.ia[i](x[i]))  # conv
476 |             x[i] = self.im[i](x[i])
477 |             bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
478 |             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
479 | 
480 |             if not self.training:  # inference
481 |                 if self.grid[i].shape[2:4] != x[i].shape[2:4]:
482 |                     self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
483 | 
484 |                 y = x[i].sigmoid()
485 |                 y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
486 |                 #y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
487 |                 
488 | 
489 |                 #px = (self.x_bin_sigmoid.forward(y[..., 0:12]) + self.grid[i][..., 0]) * self.stride[i]
490 |                 #py = (self.y_bin_sigmoid.forward(y[..., 12:24]) + self.grid[i][..., 1]) * self.stride[i]
491 | 
492 |                 pw = self.w_bin_sigmoid.forward(y[..., 2:24]) * self.anchor_grid[i][..., 0]
493 |                 ph = self.h_bin_sigmoid.forward(y[..., 24:46]) * self.anchor_grid[i][..., 1]
494 | 
495 |                 #y[..., 0] = px
496 |                 #y[..., 1] = py
497 |                 y[..., 2] = pw
498 |                 y[..., 3] = ph
499 |                 
500 |                 y = torch.cat((y[..., 0:4], y[..., 46:]), dim=-1)
501 |                 
502 |                 z.append(y.view(bs, -1, y.shape[-1]))
503 | 
504 |         return x if self.training else (torch.cat(z, 1), x)
505 | 
506 |     @staticmethod
507 |     def _make_grid(nx=20, ny=20):
508 |         yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
509 |         return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
510 | 
511 | 
512 | class Model(nn.Module):
513 |     def __init__(self, cfg='yolor-csp-c.yaml', ch=3, nc=None, anchors=None):  # model, input channels, number of classes
514 |         super(Model, self).__init__()
515 |         self.traced = False
516 |         if isinstance(cfg, dict):
517 |             self.yaml = cfg  # model dict
518 |         else:  # is *.yaml
519 |             import yaml  # for torch hub
520 |             self.yaml_file = Path(cfg).name
521 |             with open(cfg) as f:
522 |                 self.yaml = yaml.load(f, Loader=yaml.SafeLoader)  # model dict
523 | 
524 |         # Define model
525 |         ch = self.yaml['ch'] = self.yaml.get('ch', ch)  # input channels
526 |         if nc and nc != self.yaml['nc']:
527 |             logger.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
528 |             self.yaml['nc'] = nc  # override yaml value
529 |         if anchors:
530 |             logger.info(f'Overriding model.yaml anchors with anchors={anchors}')
531 |             self.yaml['anchors'] = round(anchors)  # override yaml value
532 |         self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
533 |         self.names = [str(i) for i in range(self.yaml['nc'])]  # default names
534 |         # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
535 | 
536 |         # Build strides, anchors
537 |         m = self.model[-1]  # Detect()
538 |         if isinstance(m, Detect):
539 |             s = 256  # 2x min stride
540 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
541 |             check_anchor_order(m)
542 |             m.anchors /= m.stride.view(-1, 1, 1)
543 |             self.stride = m.stride
544 |             self._initialize_biases()  # only run once
545 |             # print('Strides: %s' % m.stride.tolist())
546 |         if isinstance(m, IDetect):
547 |             s = 256  # 2x min stride
548 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
549 |             check_anchor_order(m)
550 |             m.anchors /= m.stride.view(-1, 1, 1)
551 |             self.stride = m.stride
552 |             self._initialize_biases()  # only run once
553 |             # print('Strides: %s' % m.stride.tolist())
554 |         if isinstance(m, IAuxDetect):
555 |             s = 256  # 2x min stride
556 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))[:4]])  # forward
557 |             #print(m.stride)
558 |             check_anchor_order(m)
559 |             m.anchors /= m.stride.view(-1, 1, 1)
560 |             self.stride = m.stride
561 |             self._initialize_aux_biases()  # only run once
562 |             # print('Strides: %s' % m.stride.tolist())
563 |         if isinstance(m, IBin):
564 |             s = 256  # 2x min stride
565 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
566 |             check_anchor_order(m)
567 |             m.anchors /= m.stride.view(-1, 1, 1)
568 |             self.stride = m.stride
569 |             self._initialize_biases_bin()  # only run once
570 |             # print('Strides: %s' % m.stride.tolist())
571 |         if isinstance(m, IKeypoint):
572 |             s = 256  # 2x min stride
573 |             m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
574 |             check_anchor_order(m)
575 |             m.anchors /= m.stride.view(-1, 1, 1)
576 |             self.stride = m.stride
577 |             self._initialize_biases_kpt()  # only run once
578 |             # print('Strides: %s' % m.stride.tolist())
579 | 
580 |         # Init weights, biases
581 |         initialize_weights(self)
582 |         self.info()
583 |         logger.info('')
584 | 
585 |     def forward(self, x, augment=False, profile=False):
586 |         if augment:
587 |             img_size = x.shape[-2:]  # height, width
588 |             s = [1, 0.83, 0.67]  # scales
589 |             f = [None, 3, None]  # flips (2-ud, 3-lr)
590 |             y = []  # outputs
591 |             for si, fi in zip(s, f):
592 |                 xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
593 |                 yi = self.forward_once(xi)[0]  # forward
594 |                 # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
595 |                 yi[..., :4] /= si  # de-scale
596 |                 if fi == 2:
597 |                     yi[..., 1] = img_size[0] - yi[..., 1]  # de-flip ud
598 |                 elif fi == 3:
599 |                     yi[..., 0] = img_size[1] - yi[..., 0]  # de-flip lr
600 |                 y.append(yi)
601 |             return torch.cat(y, 1), None  # augmented inference, train
602 |         else:
603 |             return self.forward_once(x, profile)  # single-scale inference, train
604 | 
605 |     def forward_once(self, x, profile=False):
606 |         y, dt = [], []  # outputs
607 |         for m in self.model:
608 |             if m.f != -1:  # if not from previous layer
609 |                 x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
610 | 
611 |             if not hasattr(self, 'traced'):
612 |                 self.traced=False
613 | 
614 |             if self.traced:
615 |                 if isinstance(m, Detect) or isinstance(m, IDetect) or isinstance(m, IAuxDetect) or isinstance(m, IKeypoint):
616 |                     break
617 | 
618 |             if profile:
619 |                 c = isinstance(m, (Detect, IDetect, IAuxDetect, IBin))
620 |                 o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1E9 * 2 if thop else 0  # FLOPS
621 |                 for _ in range(10):
622 |                     m(x.copy() if c else x)
623 |                 t = time_synchronized()
624 |                 for _ in range(10):
625 |                     m(x.copy() if c else x)
626 |                 dt.append((time_synchronized() - t) * 100)
627 |                 print('%10.1f%10.0f%10.1fms %-40s' % (o, m.np, dt[-1], m.type))
628 | 
629 |             x = m(x)  # run
630 |             
631 |             y.append(x if m.i in self.save else None)  # save output
632 | 
633 |         if profile:
634 |             print('%.1fms total' % sum(dt))
635 |         return x
636 | 
637 |     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
638 |         # https://arxiv.org/abs/1708.02002 section 3.3
639 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
640 |         m = self.model[-1]  # Detect() module
641 |         for mi, s in zip(m.m, m.stride):  # from
642 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
643 |             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
644 |             b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
645 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
646 | 
647 |     def _initialize_aux_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
648 |         # https://arxiv.org/abs/1708.02002 section 3.3
649 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
650 |         m = self.model[-1]  # Detect() module
651 |         for mi, mi2, s in zip(m.m, m.m2, m.stride):  # from
652 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
653 |             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
654 |             b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
655 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
656 |             b2 = mi2.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
657 |             b2.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
658 |             b2.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
659 |             mi2.bias = torch.nn.Parameter(b2.view(-1), requires_grad=True)
660 | 
661 |     def _initialize_biases_bin(self, cf=None):  # initialize biases into Detect(), cf is class frequency
662 |         # https://arxiv.org/abs/1708.02002 section 3.3
663 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
664 |         m = self.model[-1]  # Bin() module
665 |         bc = m.bin_count
666 |         for mi, s in zip(m.m, m.stride):  # from
667 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
668 |             old = b[:, (0,1,2,bc+3)].data
669 |             obj_idx = 2*bc+4
670 |             b[:, :obj_idx].data += math.log(0.6 / (bc + 1 - 0.99))
671 |             b[:, obj_idx].data += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
672 |             b[:, (obj_idx+1):].data += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
673 |             b[:, (0,1,2,bc+3)].data = old
674 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
675 | 
676 |     def _initialize_biases_kpt(self, cf=None):  # initialize biases into Detect(), cf is class frequency
677 |         # https://arxiv.org/abs/1708.02002 section 3.3
678 |         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
679 |         m = self.model[-1]  # Detect() module
680 |         for mi, s in zip(m.m, m.stride):  # from
681 |             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
682 |             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
683 |             b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
684 |             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
685 | 
686 |     def _print_biases(self):
687 |         m = self.model[-1]  # Detect() module
688 |         for mi in m.m:  # from
689 |             b = mi.bias.detach().view(m.na, -1).T  # conv.bias(255) to (3,85)
690 |             print(('%6g Conv2d.bias:' + '%10.3g' * 6) % (mi.weight.shape[1], *b[:5].mean(1).tolist(), b[5:].mean()))
691 | 
692 |     # def _print_weights(self):
693 |     #     for m in self.model.modules():
694 |     #         if type(m) is Bottleneck:
695 |     #             print('%10.3g' % (m.w.detach().sigmoid() * 2))  # shortcut weights
696 | 
697 |     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
698 |         print('Fusing layers... ')
699 |         for m in self.model.modules():
700 |             if isinstance(m, RepConv):
701 |                 #print(f" fuse_repvgg_block")
702 |                 m.fuse_repvgg_block()
703 |             elif isinstance(m, RepConv_OREPA):
704 |                 #print(f" switch_to_deploy")
705 |                 m.switch_to_deploy()
706 |             elif type(m) is Conv and hasattr(m, 'bn'):
707 |                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
708 |                 delattr(m, 'bn')  # remove batchnorm
709 |                 m.forward = m.fuseforward  # update forward
710 |             elif isinstance(m, (IDetect, IAuxDetect)):
711 |                 m.fuse()
712 |                 m.forward = m.fuseforward
713 |         self.info()
714 |         return self
715 | 
716 |     def nms(self, mode=True):  # add or remove NMS module
717 |         present = type(self.model[-1]) is NMS  # last layer is NMS
718 |         if mode and not present:
719 |             print('Adding NMS... ')
720 |             m = NMS()  # module
721 |             m.f = -1  # from
722 |             m.i = self.model[-1].i + 1  # index
723 |             self.model.add_module(name='%s' % m.i, module=m)  # add
724 |             self.eval()
725 |         elif not mode and present:
726 |             print('Removing NMS... ')
727 |             self.model = self.model[:-1]  # remove
728 |         return self
729 | 
730 |     def autoshape(self):  # add autoShape module
731 |         print('Adding autoShape... ')
732 |         m = autoShape(self)  # wrap model
733 |         copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
734 |         return m
735 | 
736 |     def info(self, verbose=False, img_size=640):  # print model information
737 |         model_info(self, verbose, img_size)
738 | 
739 | 
740 | def parse_model(d, ch):  # model_dict, input_channels(3)
741 |     logger.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
742 |     anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
743 |     na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
744 |     no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
745 | 
746 |     layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
747 |     for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
748 |         m = eval(m) if isinstance(m, str) else m  # eval strings
749 |         for j, a in enumerate(args):
750 |             try:
751 |                 args[j] = eval(a) if isinstance(a, str) else a  # eval strings
752 |             except:
753 |                 pass
754 | 
755 |         n = max(round(n * gd), 1) if n > 1 else n  # depth gain
756 |         if m in [nn.Conv2d, Conv, RobustConv, RobustConv2, DWConv, GhostConv, RepConv, RepConv_OREPA, DownC, 
757 |                  SPP, SPPF, SPPCSPC, GhostSPPCSPC, MixConv2d, Focus, Stem, GhostStem, CrossConv, 
758 |                  Bottleneck, BottleneckCSPA, BottleneckCSPB, BottleneckCSPC, 
759 |                  RepBottleneck, RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC,  
760 |                  Res, ResCSPA, ResCSPB, ResCSPC, 
761 |                  RepRes, RepResCSPA, RepResCSPB, RepResCSPC, 
762 |                  ResX, ResXCSPA, ResXCSPB, ResXCSPC, 
763 |                  RepResX, RepResXCSPA, RepResXCSPB, RepResXCSPC, 
764 |                  Ghost, GhostCSPA, GhostCSPB, GhostCSPC,
765 |                  SwinTransformerBlock, STCSPA, STCSPB, STCSPC,
766 |                  SwinTransformer2Block, ST2CSPA, ST2CSPB, ST2CSPC]:
767 |             c1, c2 = ch[f], args[0]
768 |             if c2 != no:  # if not output
769 |                 c2 = make_divisible(c2 * gw, 8)
770 | 
771 |             args = [c1, c2, *args[1:]]
772 |             if m in [DownC, SPPCSPC, GhostSPPCSPC, 
773 |                      BottleneckCSPA, BottleneckCSPB, BottleneckCSPC, 
774 |                      RepBottleneckCSPA, RepBottleneckCSPB, RepBottleneckCSPC, 
775 |                      ResCSPA, ResCSPB, ResCSPC, 
776 |                      RepResCSPA, RepResCSPB, RepResCSPC, 
777 |                      ResXCSPA, ResXCSPB, ResXCSPC, 
778 |                      RepResXCSPA, RepResXCSPB, RepResXCSPC,
779 |                      GhostCSPA, GhostCSPB, GhostCSPC,
780 |                      STCSPA, STCSPB, STCSPC,
781 |                      ST2CSPA, ST2CSPB, ST2CSPC]:
782 |                 args.insert(2, n)  # number of repeats
783 |                 n = 1
784 |         elif m is nn.BatchNorm2d:
785 |             args = [ch[f]]
786 |         elif m is Concat:
787 |             c2 = sum([ch[x] for x in f])
788 |         elif m is Chuncat:
789 |             c2 = sum([ch[x] for x in f])
790 |         elif m is Shortcut:
791 |             c2 = ch[f[0]]
792 |         elif m is Foldcut:
793 |             c2 = ch[f] // 2
794 |         elif m in [Detect, IDetect, IAuxDetect, IBin, IKeypoint]:
795 |             args.append([ch[x] for x in f])
796 |             if isinstance(args[1], int):  # number of anchors
797 |                 args[1] = [list(range(args[1] * 2))] * len(f)
798 |         elif m is ReOrg:
799 |             c2 = ch[f] * 4
800 |         elif m is Contract:
801 |             c2 = ch[f] * args[0] ** 2
802 |         elif m is Expand:
803 |             c2 = ch[f] // args[0] ** 2
804 |         else:
805 |             c2 = ch[f]
806 | 
807 |         m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
808 |         t = str(m)[8:-2].replace('__main__.', '')  # module type
809 |         np = sum([x.numel() for x in m_.parameters()])  # number params
810 |         m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
811 |         logger.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
812 |         save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
813 |         layers.append(m_)
814 |         if i == 0:
815 |             ch = []
816 |         ch.append(c2)
817 |     return nn.Sequential(*layers), sorted(save)
818 | 
819 | 
820 | if __name__ == '__main__':
821 |     parser = argparse.ArgumentParser()
822 |     parser.add_argument('--cfg', type=str, default='yolor-csp-c.yaml', help='model.yaml')
823 |     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
824 |     parser.add_argument('--profile', action='store_true', help='profile model speed')
825 |     opt = parser.parse_args()
826 |     opt.cfg = check_file(opt.cfg)  # check file
827 |     set_logging()
828 |     device = select_device(opt.device)
829 | 
830 |     # Create model
831 |     model = Model(opt.cfg).to(device)
832 |     model.train()
833 |     
834 |     if opt.profile:
835 |         img = torch.rand(1, 3, 640, 640).to(device)
836 |         y = model(img, profile=True)
837 | 
838 |     # Profile
839 |     # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
840 |     # y = model(img, profile=True)
841 | 
842 |     # Tensorboard
843 |     # from torch.utils.tensorboard import SummaryWriter
844 |     # tb_writer = SummaryWriter()
845 |     # print("Run 'tensorboard --logdir=models/runs' to view tensorboard at http://localhost:6006/")
846 |     # tb_writer.add_graph(model.model, img)  # add model to tensorboard
847 |     # tb_writer.add_image('test', img[0], dataformats='CWH')  # add model to tensorboard


--------------------------------------------------------------------------------